├── k8s
├── pvc.yaml
├── secrets.yaml
├── configmap.yaml
└── deployment.yaml
├── internal
├── sync
│ ├── interface.go
│ └── manager_simple_test.go
├── openwebui
│ ├── interface.go
│ └── client_test.go
├── adapter
│ ├── adapter.go
│ ├── jira_comment.go
│ ├── github_test.go
│ ├── confluence_test.go
│ ├── local_test.go
│ ├── local.go
│ ├── github.go
│ └── slack_test.go
├── scheduler
│ ├── scheduler.go
│ └── scheduler_simple_test.go
├── health
│ ├── health.go
│ └── health_test.go
├── utils
│ └── retry.go
├── mocks
│ └── mocks.go
└── config
│ ├── config_test.go
│ └── config.go
├── .gitignore
├── Dockerfile
├── go.mod
├── .github
└── workflows
│ └── docker-build.yml
├── Makefile
├── main.go
├── adapter_readme
├── GITHUB_ADAPTER.md
├── JIRA_ADAPTER.md
├── CONFLUENCE_ADAPTER.md
├── LOCAL_ADAPTER.md
└── SLACK_ADAPTER.md
├── ARCHITECTURE.md
├── config.example.yaml
├── go.sum
├── main_test.go
└── LICENSE
/k8s/pvc.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: PersistentVolumeClaim
3 | metadata:
4 | name: connector-data-pvc
5 | labels:
6 | app: openwebui-content-sync
7 | spec:
8 | accessModes:
9 | - ReadWriteOnce
10 | resources:
11 | requests:
12 | storage: 10Gi
13 | storageClassName: ebs-sc
14 |
--------------------------------------------------------------------------------
/internal/sync/interface.go:
--------------------------------------------------------------------------------
1 | package sync
2 |
3 | import (
4 | "context"
5 |
6 | "github.com/openwebui-content-sync/internal/adapter"
7 | )
8 |
9 | // ManagerInterface defines the interface for sync manager operations
10 | type ManagerInterface interface {
11 | SyncFiles(ctx context.Context, adapters []adapter.Adapter) error
12 | SetKnowledgeID(knowledgeID string)
13 | InitializeFileIndex(ctx context.Context, adapters []adapter.Adapter) error
14 | }
15 |
--------------------------------------------------------------------------------
/internal/openwebui/interface.go:
--------------------------------------------------------------------------------
1 | package openwebui
2 |
3 | import (
4 | "context"
5 | )
6 |
7 | // ClientInterface defines the interface for OpenWebUI client operations
8 | type ClientInterface interface {
9 | UploadFile(ctx context.Context, filename string, content []byte) (*File, error)
10 | GetFile(ctx context.Context, fileID string) (*File, error)
11 | ListKnowledge(ctx context.Context) ([]*Knowledge, error)
12 | AddFileToKnowledge(ctx context.Context, knowledgeID, fileID string) error
13 | RemoveFileFromKnowledge(ctx context.Context, knowledgeID, fileID string) error
14 | GetKnowledgeFiles(ctx context.Context, knowledgeID string) ([]*File, error)
15 | DeleteFile(ctx context.Context, fileID string) error
16 | }
17 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Binaries for programs and plugins
2 | *.exe
3 | *.exe~
4 | *.dll
5 | *.so
6 | *.dylib
7 | connector
8 |
9 | # Test binary, built with `go test -c`
10 | *.test
11 |
12 | # Output of the go coverage tool, specifically when used with LiteIDE
13 | *.out
14 | coverage.html
15 |
16 | # Dependency directories (remove the comment below to include it)
17 | # vendor/
18 |
19 | # Go workspace file
20 | go.work
21 |
22 | # IDE files
23 | .vscode/
24 | .idea/
25 | *.swp
26 | *.swo
27 |
28 | # OS generated files
29 | .DS_Store
30 | .DS_Store?
31 | ._*
32 | .Spotlight-V100
33 | .Trashes
34 | ehthumbs.db
35 | Thumbs.db
36 |
37 | # Local data directory
38 | /data/
39 |
40 | # Log files
41 | *.log
42 |
43 | # Environment files
44 | .env
45 | .env.local
46 | .env.production
47 | .env.development
48 | .env.test
49 | config.yaml
50 | k8s-local/
51 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Build stage
2 | FROM --platform=$BUILDPLATFORM golang:1.23.0-alpine AS builder
3 |
4 | WORKDIR /app
5 |
6 | # Install git for go modules
7 | RUN apk add --no-cache git
8 |
9 | # Copy go mod files
10 | COPY go.mod go.sum ./
11 |
12 | # Download dependencies
13 | RUN go mod download
14 |
15 | # Copy source code
16 | COPY . .
17 |
18 | # Build the application for the target platform
19 | ARG TARGETOS
20 | ARG TARGETARCH
21 | RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -a -installsuffix cgo -o main .
22 |
23 | # Final stage
24 | FROM --platform=$TARGETPLATFORM alpine:latest
25 |
26 | # Install ca-certificates for HTTPS requests
27 | RUN apk --no-cache add ca-certificates
28 |
29 | WORKDIR /root/
30 |
31 | # Copy the binary from builder stage
32 | COPY --from=builder /app/main .
33 |
34 | # Create data directory
35 | RUN mkdir -p /data
36 |
37 | # Expose port (if needed for health checks)
38 | EXPOSE 8080
39 |
40 | # Run the application
41 | CMD ["./main"]
42 |
--------------------------------------------------------------------------------
/internal/adapter/adapter.go:
--------------------------------------------------------------------------------
1 | package adapter
2 |
3 | import (
4 | "context"
5 | "time"
6 | )
7 |
8 | // File represents a file from an external source
9 | type File struct {
10 | Path string `json:"path"`
11 | Content []byte `json:"content"`
12 | Hash string `json:"hash"`
13 | Modified time.Time `json:"modified"`
14 | Size int64 `json:"size"`
15 | Source string `json:"source"`
16 | KnowledgeID string `json:"knowledge_id,omitempty"` // Optional: specific knowledge base ID for this file
17 | }
18 |
19 | // Adapter defines the interface for data source adapters
20 | type Adapter interface {
21 | // Name returns the adapter name
22 | Name() string
23 |
24 | // FetchFiles retrieves files from the data source
25 | FetchFiles(ctx context.Context) ([]*File, error)
26 |
27 | // GetLastSync returns the last sync timestamp
28 | GetLastSync() time.Time
29 |
30 | // SetLastSync updates the last sync timestamp
31 | SetLastSync(t time.Time)
32 | }
33 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/openwebui-content-sync
2 |
3 | go 1.23.0
4 |
5 | require (
6 | github.com/JohannesKaufmann/html-to-markdown/v2 v2.4.0
7 | github.com/google/go-github/v56 v56.0.0
8 | github.com/robfig/cron/v3 v3.0.1
9 | github.com/sirupsen/logrus v1.9.3
10 | golang.org/x/net v0.43.0
11 | golang.org/x/oauth2 v0.15.0
12 | gopkg.in/yaml.v3 v3.0.1
13 | )
14 |
15 | require (
16 | github.com/JohannesKaufmann/dom v0.2.0 // indirect
17 | github.com/golang/protobuf v1.5.3 // indirect
18 | github.com/google/go-cmp v0.6.0 // indirect
19 | github.com/google/go-querystring v1.1.0 // indirect
20 | github.com/gorilla/websocket v1.5.3 // indirect
21 | github.com/kr/pretty v0.3.1 // indirect
22 | github.com/rogpeppe/go-internal v1.10.0 // indirect
23 | github.com/slack-go/slack v0.17.3 // indirect
24 | github.com/stretchr/testify v1.10.0 // indirect
25 | golang.org/x/sys v0.35.0 // indirect
26 | google.golang.org/appengine v1.6.7 // indirect
27 | google.golang.org/protobuf v1.31.0 // indirect
28 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
29 | )
30 |
--------------------------------------------------------------------------------
/.github/workflows/docker-build.yml:
--------------------------------------------------------------------------------
1 | name: Build and Push Docker Image
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 |
7 | jobs:
8 | build:
9 | runs-on: ubuntu-latest
10 | steps:
11 | -
12 | name: Checkout
13 | uses: actions/checkout@v4
14 | - name: Lowercase repository name
15 | run: echo "LOWER_REPO=${GITHUB_REPOSITORY,,}" >> $GITHUB_ENV
16 | -
17 | name: Set up Docker Buildx
18 | uses: docker/setup-buildx-action@v3
19 | -
20 | name: Login to GitHub Container Registry
21 | if: github.event_name != 'pull_request'
22 | uses: docker/login-action@v3
23 | with:
24 | registry: ghcr.io
25 | username: ${{ github.actor }}
26 | password: ${{ secrets.GITHUB_TOKEN }}
27 | -
28 | name: Build and push
29 | uses: docker/build-push-action@v5
30 | with:
31 | context: .
32 | platforms: linux/amd64
33 | registry: ghcr.io
34 | push: ${{ github.event_name != 'pull_request' }}
35 | tags: ghcr.io/${{ env.LOWER_REPO }}:latest
36 | cache-from: type=gha
37 |
--------------------------------------------------------------------------------
/k8s/secrets.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 | name: openwebui-secrets
5 | labels:
6 | app: openwebui-content-sync
7 | type: Opaque
8 | data:
9 | # Base64 encoded API key
10 | # echo -n "your-openwebui-api-key" | base64
11 | api-key: eW91ci1jb25mbHVlbmNlLWFwaS1rZXk
12 |
13 | ---
14 | apiVersion: v1
15 | kind: Secret
16 | metadata:
17 | name: github-secrets
18 | labels:
19 | app: openwebui-content-sync
20 | type: Opaque
21 | data:
22 | # Base64 encoded GitHub token
23 | # echo -n "your-github-token" | base64
24 | token: eW91ci1jb25mbHVlbmNlLWFwaS1rZXk
25 |
26 | ---
27 | apiVersion: v1
28 | kind: Secret
29 | metadata:
30 | name: confluence-secrets
31 | labels:
32 | app: openwebui-content-sync
33 | type: Opaque
34 | data:
35 | # Base64 encoded Confluence API key
36 | # echo -n "your-confluence-api-key" | base64
37 | api-key: eW91ci1jb25mbHVlbmNlLWFwaS1rZXk=
38 |
39 | ---
40 | apiVersion: v1
41 | kind: Secret
42 | metadata:
43 | name: slack-secrets
44 | labels:
45 | app: openwebui-content-sync
46 | type: Opaque
47 | data:
48 | # Base64 encoded Slack token
49 | # echo -n "your-slack-token" | base64
50 | token: eW91ci1zbGFjay10b2tlbg==
51 |
--------------------------------------------------------------------------------
/internal/scheduler/scheduler.go:
--------------------------------------------------------------------------------
1 | package scheduler
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "time"
7 |
8 | "github.com/openwebui-content-sync/internal/adapter"
9 | "github.com/openwebui-content-sync/internal/sync"
10 | "github.com/robfig/cron/v3"
11 | "github.com/sirupsen/logrus"
12 | )
13 |
14 | // Scheduler manages periodic synchronization
15 | type Scheduler struct {
16 | cron *cron.Cron
17 | interval time.Duration
18 | adapters []adapter.Adapter
19 | syncManager sync.ManagerInterface
20 | }
21 |
22 | // New creates a new scheduler
23 | func New(interval time.Duration, adapters []adapter.Adapter, syncManager sync.ManagerInterface) *Scheduler {
24 | return &Scheduler{
25 | cron: cron.New(cron.WithSeconds()),
26 | interval: interval,
27 | adapters: adapters,
28 | syncManager: syncManager,
29 | }
30 | }
31 |
32 | // Start starts the scheduler
33 | func (s *Scheduler) Start(ctx context.Context) {
34 | logrus.Infof("Starting scheduler with interval: %v", s.interval)
35 |
36 | // Schedule the sync job
37 | cronSpec := fmt.Sprintf("@every %v", s.interval)
38 | _, err := s.cron.AddFunc(cronSpec, func() {
39 | logrus.Info("Running scheduled sync")
40 | if err := s.RunSyncWithContext(ctx); err != nil {
41 | logrus.Errorf("Scheduled sync failed: %v", err)
42 | }
43 | })
44 | if err != nil {
45 | logrus.Errorf("Failed to schedule sync job: %v", err)
46 | return
47 | }
48 |
49 | s.cron.Start()
50 |
51 | // Wait for context cancellation
52 | <-ctx.Done()
53 | logrus.Info("Stopping scheduler...")
54 | s.cron.Stop()
55 | }
56 |
57 | // RunSyncWithContext runs a synchronization cycle with the provided context
58 | func (s *Scheduler) RunSyncWithContext(ctx context.Context) error {
59 | // Create a timeout context, but make it respect the parent context cancellation
60 | syncCtx, cancel := context.WithTimeout(ctx, 30*time.Minute)
61 | defer cancel()
62 |
63 | return s.syncManager.SyncFiles(syncCtx, s.adapters)
64 | }
65 |
--------------------------------------------------------------------------------
/internal/health/health.go:
--------------------------------------------------------------------------------
1 | package health
2 |
3 | import (
4 | "context"
5 | "encoding/json"
6 | "fmt"
7 | "net/http"
8 | "time"
9 | )
10 |
11 | // Server provides health check endpoints
12 | type Server struct {
13 | server *http.Server
14 | }
15 |
16 | // HealthResponse represents the health check response
17 | type HealthResponse struct {
18 | Status string `json:"status"`
19 | Timestamp time.Time `json:"timestamp"`
20 | Version string `json:"version"`
21 | }
22 |
23 | // NewServer creates a new health check server
24 | func NewServer(port int) *Server {
25 | mux := http.NewServeMux()
26 |
27 | server := &http.Server{
28 | Addr: fmt.Sprintf(":%d", port),
29 | Handler: mux,
30 | }
31 |
32 | healthServer := &Server{
33 | server: server,
34 | }
35 |
36 | // Register health check endpoint
37 | mux.HandleFunc("/health", healthServer.healthHandler)
38 | mux.HandleFunc("/ready", healthServer.readyHandler)
39 |
40 | return healthServer
41 | }
42 |
43 | // Start starts the health check server
44 | func (s *Server) Start() error {
45 | return s.server.ListenAndServe()
46 | }
47 |
48 | // Stop stops the health check server
49 | func (s *Server) Stop(ctx context.Context) error {
50 | return s.server.Shutdown(ctx)
51 | }
52 |
53 | // healthHandler handles health check requests
54 | func (s *Server) healthHandler(w http.ResponseWriter, r *http.Request) {
55 | response := HealthResponse{
56 | Status: "healthy",
57 | Timestamp: time.Now(),
58 | Version: "1.0.0",
59 | }
60 |
61 | w.Header().Set("Content-Type", "application/json")
62 | w.WriteHeader(http.StatusOK)
63 | json.NewEncoder(w).Encode(response)
64 | }
65 |
66 | // readyHandler handles readiness check requests
67 | func (s *Server) readyHandler(w http.ResponseWriter, r *http.Request) {
68 | response := HealthResponse{
69 | Status: "ready",
70 | Timestamp: time.Now(),
71 | Version: "1.0.0",
72 | }
73 |
74 | w.Header().Set("Content-Type", "application/json")
75 | w.WriteHeader(http.StatusOK)
76 | json.NewEncoder(w).Encode(response)
77 | }
78 |
--------------------------------------------------------------------------------
/k8s/configmap.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: connector-config
5 | labels:
6 | app: openwebui-content-sync
7 | data:
8 | config.yaml: |
9 | log_level: info
10 | schedule:
11 | interval: 1h
12 | storage:
13 | path: /data
14 | openwebui:
15 | base_url: "http://open-webui:8080"
16 | github:
17 | enabled: true
18 | token: ""
19 | mappings:
20 | - repository: "owner/repo1"
21 | knowledge_id: "repo1-knowledge-base"
22 | - repository: "owner/repo2"
23 | knowledge_id: "repo2-knowledge-base"
24 | confluence:
25 | enabled: false
26 | base_url: "https://your-domain.atlassian.net"
27 | username: "your-email@example.com"
28 | api_key: ""
29 | space_mappings:
30 | - space_key: "SPACEKEY1"
31 | knowledge_id: "space1-knowledge-base"
32 | - space_key: "SPACEKEY2"
33 | knowledge_id: "space2-knowledge-base"
34 | parent_page_mappings:
35 | - parent_page_id: "1234567890"
36 | knowledge_id: "parent-page-knowledge-base"
37 | page_limit: 100
38 | include_attachments: true
39 | local_folders:
40 | enabled: false
41 | mappings:
42 | - folder_path: "/data/docs"
43 | knowledge_id: "docs-knowledge-base"
44 | - folder_path: "/data/guides"
45 | knowledge_id: "guides-knowledge-base"
46 | slack:
47 | enabled: false
48 | token: ""
49 | channel_mappings:
50 | - channel_id: "C1234567890"
51 | channel_name: "general"
52 | knowledge_id: "general-knowledge-base"
53 | - channel_id: "C0987654321"
54 | channel_name: "dev-team"
55 | knowledge_id: "dev-knowledge-base"
56 | regex_patterns:
57 | - pattern: "^sales-.*-internal$"
58 | knowledge_id: "sales-knowledge-base"
59 | auto_join: true
60 | - pattern: "^dev-.*"
61 | knowledge_id: "dev-knowledge-base"
62 | auto_join: true
63 | - pattern: "^support-.*"
64 | knowledge_id: "support-knowledge-base"
65 | auto_join: false
66 | days_to_fetch: 30
67 | maintain_history: false
68 | message_limit: 1000
69 | include_threads: true
70 | include_reactions: false
71 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: build test clean docker-build docker-build-multi docker-build-multi-local docker-build-amd64 docker-build-arm64 docker-build-local docker-push deploy undeploy setup-buildx show-platforms
2 |
3 | # Build the application
4 | build:
5 | go build -o connector .
6 |
7 | # Run tests
8 | test:
9 | go test ./...
10 |
11 | # Clean build artifacts
12 | clean:
13 | rm -f connector
14 |
15 | # Build Docker image (single platform)
16 | docker-build:
17 | docker build -t openwebui-content-sync:latest .
18 |
19 | # Build multi-architecture Docker image (builds manifest, no local load)
20 | docker-build-multi-local:
21 | docker buildx build --platform linux/amd64,linux/arm64 -t openwebui-content-sync:latest .
22 |
23 | # Build multi-architecture Docker image and push to registry
24 | docker-build-multi:
25 | docker buildx build --platform linux/amd64,linux/arm64 -t castaiphil/openwebui-content-sync:latest --push .
26 |
27 | # Build for specific platform and load locally (useful for testing)
28 | docker-build-amd64:
29 | docker buildx build --platform linux/amd64 -t openwebui-content-sync:amd64 --load .
30 |
31 | docker-build-arm64:
32 | docker buildx build --platform linux/arm64 -t openwebui-content-sync:arm64 --load .
33 |
34 | # Build for current platform and load locally (useful for testing)
35 | docker-build-local:
36 | docker buildx build --platform linux/amd64,linux/arm64 -t openwebui-content-sync:latest --load --builder desktop-linux .
37 |
38 | # Push Docker image (update registry as needed)
39 | docker-push:
40 | docker push openwebui-content-sync:latest
41 |
42 | # Deploy to Kubernetes
43 | deploy:
44 | kubectl apply -f k8s/
45 |
46 | # Undeploy from Kubernetes
47 | undeploy:
48 | kubectl delete -f k8s/
49 |
50 | # Run locally with config
51 | run:
52 | ./content -config config.yaml
53 |
54 | # Run tests with coverage
55 | test-coverage:
56 | go test -coverprofile=coverage.out ./...
57 | go tool cover -html=coverage.out -o coverage.html
58 |
59 | # Format code
60 | fmt:
61 | go fmt ./...
62 |
63 | # Lint code
64 | lint:
65 | golangci-lint run
66 |
67 | # Install dependencies
68 | deps:
69 | go mod download
70 | go mod tidy
71 |
72 | # Setup Docker buildx for multi-arch builds
73 | setup-buildx:
74 | docker buildx create --name multiarch --driver docker-container --use
75 | docker buildx inspect --bootstrap
76 |
77 | # Show available platforms
78 | show-platforms:
79 | docker buildx inspect
80 |
--------------------------------------------------------------------------------
/k8s/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: openwebui-content-sync
5 | labels:
6 | app: openwebui-content-sync
7 | spec:
8 | replicas: 1
9 | selector:
10 | matchLabels:
11 | app: openwebui-content-sync
12 | template:
13 | metadata:
14 | labels:
15 | app: openwebui-content-sync
16 | spec:
17 | containers:
18 | - name: connector
19 | image: castaiphil/openwebui-content-sync:latest
20 | imagePullPolicy: Always
21 | env:
22 | - name: OPENWEBUI_API_KEY
23 | valueFrom:
24 | secretKeyRef:
25 | name: openwebui-secrets
26 | key: api-key
27 | - name: GITHUB_TOKEN
28 | valueFrom:
29 | secretKeyRef:
30 | name: github-secrets
31 | key: token
32 | - name: CONFLUENCE_API_KEY
33 | valueFrom:
34 | secretKeyRef:
35 | name: confluence-secrets
36 | key: api-key
37 | - name: SLACK_TOKEN
38 | valueFrom:
39 | secretKeyRef:
40 | name: slack-secrets
41 | key: token
42 | - name: STORAGE_PATH
43 | value: "/data"
44 | - name: LOG_LEVEL
45 | value: "info"
46 | volumeMounts:
47 | - name: data-storage
48 | mountPath: /data
49 | - name: config
50 | mountPath: /root/config.yaml
51 | subPath: config.yaml
52 | resources:
53 | requests:
54 | memory: "128Mi"
55 | cpu: "100m"
56 | limits:
57 | memory: "512Mi"
58 | cpu: "500m"
59 | livenessProbe:
60 | httpGet:
61 | path: /health
62 | port: 8080
63 | initialDelaySeconds: 30
64 | periodSeconds: 10
65 | timeoutSeconds: 5
66 | failureThreshold: 3
67 | readinessProbe:
68 | httpGet:
69 | path: /ready
70 | port: 8080
71 | initialDelaySeconds: 5
72 | periodSeconds: 5
73 | timeoutSeconds: 3
74 | failureThreshold: 3
75 | volumes:
76 | - name: data-storage
77 | persistentVolumeClaim:
78 | claimName: connector-data-pvc
79 | - name: config
80 | configMap:
81 | name: connector-config
82 | - name: confluence-secrets
83 | secret:
84 | secretName: confluence-secrets
85 | restartPolicy: Always
86 |
--------------------------------------------------------------------------------
/internal/adapter/jira_comment.go:
--------------------------------------------------------------------------------
1 | package adapter
2 |
3 | import (
4 | "context"
5 | "encoding/json"
6 | "fmt"
7 | "net/http"
8 |
9 | "github.com/sirupsen/logrus"
10 | )
11 |
12 | // CommentData holds the extracted comment data we want
13 | type CommentData struct {
14 | RenderedBody string `json:"renderedBody"`
15 | AuthorName string `json:"authorName"`
16 | Created string `json:"created"`
17 | }
18 |
19 | // fetchComment fetches a single comment by URL and returns only the renderedBody and author displayName
20 | func (j *JiraAdapter) fetchComment(ctx context.Context, commentURL string) (*CommentData, error) {
21 | // Build URL for individual comment fetch
22 | url := commentURL
23 | url += "?expand=renderedBody&name&fields=summary,description,parent,issuetype,reporter,status"
24 |
25 | req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
26 | if err != nil {
27 | return nil, fmt.Errorf("failed to create request: %w", err)
28 | }
29 |
30 | // Set authentication
31 | req.SetBasicAuth(j.config.Username, j.config.APIKey)
32 | req.Header.Set("Accept", "application/json")
33 |
34 | logrus.Debugf("Jira comment API URL: %s", url)
35 |
36 | resp, err := j.client.Do(req)
37 | if err != nil {
38 | return nil, fmt.Errorf("failed to make request: %w", err)
39 | }
40 |
41 | if resp.StatusCode != http.StatusOK {
42 | resp.Body.Close()
43 | return nil, fmt.Errorf("API request failed with status %d: response body omitted", resp.StatusCode)
44 | }
45 |
46 | var comment struct {
47 | Self string `json:"self"`
48 | ID string `json:"id"`
49 | Author JiraUser `json:"author"`
50 | RenderedBody string `json:"renderedBody"`
51 | UpdateAuthor JiraUser `json:"updateAuthor"`
52 | Created string `json:"created"`
53 | Updated string `json:"updated"`
54 | JsdPublic bool `json:"jsdPublic"`
55 | }
56 |
57 | if err := json.NewDecoder(resp.Body).Decode(&comment); err != nil {
58 | resp.Body.Close()
59 | return nil, fmt.Errorf("failed to decode response: %w", err)
60 | }
61 | resp.Body.Close()
62 |
63 | return &CommentData{
64 | RenderedBody: comment.RenderedBody,
65 | AuthorName: comment.Author.DisplayName,
66 | }, nil
67 | }
68 |
69 | // fetchCommentsForIssue fetches all comments for a specific issue and returns only the renderedBody and author displayName
70 | func (j *JiraAdapter) fetchCommentsForIssue(ctx context.Context, issueID string) ([]CommentData, error) {
71 | var comments []CommentData
72 |
73 | // First fetch the issue to get the comments
74 | issue, err := j.fetchIssue(ctx, issueID)
75 | if err != nil {
76 | return nil, fmt.Errorf("failed to fetch issue %s: %w", issueID, err)
77 | }
78 |
79 | // Extract comments from the issue
80 | for _, comment := range issue.Fields.Comment.Comments {
81 | // Extract rendered body from the comment's body field
82 | fetchedComment, err2 := j.fetchComment(ctx, comment.Self)
83 | if err2 != nil {
84 | return comments, fmt.Errorf("failed to Fetch Comment %w", err)
85 | }
86 |
87 | renderedBody := j.HtmlToMarkdown(fetchedComment.RenderedBody)
88 | logrus.Debugf("FetchedComment: %s,renderedBody %s ", fetchedComment, renderedBody)
89 | comments = append(comments, CommentData{
90 | RenderedBody: j.HtmlToMarkdown(renderedBody),
91 | AuthorName: comment.Author.DisplayName,
92 | Created: comment.Created,
93 | })
94 | }
95 |
96 | return comments, nil
97 | }
98 |
--------------------------------------------------------------------------------
/internal/scheduler/scheduler_simple_test.go:
--------------------------------------------------------------------------------
1 | package scheduler
2 |
3 | import (
4 | "context"
5 | "sync"
6 | "testing"
7 | "time"
8 |
9 | "github.com/openwebui-content-sync/internal/adapter"
10 | "github.com/openwebui-content-sync/internal/mocks"
11 | )
12 |
13 | // MockSyncManager is a simple mock for testing
14 | type MockSyncManager struct{}
15 |
16 | func (m *MockSyncManager) SyncFiles(ctx context.Context, adapters []adapter.Adapter) error {
17 | return nil
18 | }
19 |
20 | func (m *MockSyncManager) SetKnowledgeID(knowledgeID string) {
21 | // Mock implementation
22 | }
23 |
24 | func (m *MockSyncManager) InitializeFileIndex(ctx context.Context, adapters []adapter.Adapter) error {
25 | // Mock implementation
26 | return nil
27 | }
28 |
29 | func TestNew(t *testing.T) {
30 | interval := 1 * time.Hour
31 | adapters := []adapter.Adapter{}
32 | syncManager := &MockSyncManager{}
33 |
34 | scheduler := New(interval, adapters, syncManager)
35 | if scheduler == nil {
36 | t.Fatal("Expected scheduler to be created")
37 | }
38 | if scheduler.interval != interval {
39 | t.Errorf("Expected interval %v, got %v", interval, scheduler.interval)
40 | }
41 | if len(scheduler.adapters) != len(adapters) {
42 | t.Errorf("Expected %d adapters, got %d", len(adapters), len(scheduler.adapters))
43 | }
44 | }
45 |
46 | func TestScheduler_RunSync(t *testing.T) {
47 | // Create mock sync manager
48 | syncManager := &MockSyncManager{}
49 |
50 | // Create mock adapters
51 | adapters := []adapter.Adapter{
52 | &mocks.MockAdapter{},
53 | &mocks.MockAdapter{},
54 | }
55 |
56 | scheduler := New(1*time.Hour, adapters, syncManager)
57 |
58 | // Test RunSyncWithContext
59 | ctx := context.Background()
60 | err := scheduler.RunSyncWithContext(ctx)
61 | if err != nil {
62 | t.Errorf("RunSyncWithContext failed: %v", err)
63 | }
64 | }
65 |
66 | func TestScheduler_Start(t *testing.T) {
67 | // Create mock sync manager
68 | syncManager := &MockSyncManager{}
69 |
70 | // Create mock adapters
71 | adapters := []adapter.Adapter{
72 | &mocks.MockAdapter{},
73 | }
74 |
75 | scheduler := New(100*time.Millisecond, adapters, syncManager)
76 |
77 | ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
78 | defer cancel()
79 |
80 | // Start scheduler in goroutine
81 | var wg sync.WaitGroup
82 | wg.Add(1)
83 | go func() {
84 | defer wg.Done()
85 | scheduler.Start(ctx)
86 | }()
87 |
88 | // Wait for context to be cancelled
89 | <-ctx.Done()
90 | wg.Wait()
91 | }
92 |
93 | func TestScheduler_Interval(t *testing.T) {
94 | interval := 2 * time.Hour
95 | scheduler := New(interval, []adapter.Adapter{}, &MockSyncManager{})
96 |
97 | if scheduler.interval != interval {
98 | t.Errorf("Expected interval %v, got %v", interval, scheduler.interval)
99 | }
100 | }
101 |
102 | func TestScheduler_Adapters(t *testing.T) {
103 | adapters := []adapter.Adapter{
104 | &mocks.MockAdapter{},
105 | &mocks.MockAdapter{},
106 | }
107 | scheduler := New(1*time.Hour, adapters, &MockSyncManager{})
108 |
109 | if len(scheduler.adapters) != len(adapters) {
110 | t.Errorf("Expected %d adapters, got %d", len(adapters), len(scheduler.adapters))
111 | }
112 |
113 | for i, expected := range adapters {
114 | if scheduler.adapters[i] != expected {
115 | t.Errorf("Expected adapter %d to be %v, got %v", i, expected, scheduler.adapters[i])
116 | }
117 | }
118 | }
119 |
--------------------------------------------------------------------------------
/internal/adapter/github_test.go:
--------------------------------------------------------------------------------
1 | package adapter
2 |
3 | import (
4 | "context"
5 | "testing"
6 | "time"
7 |
8 | "github.com/openwebui-content-sync/internal/config"
9 | )
10 |
11 | func TestGitHubAdapter_Name(t *testing.T) {
12 | adapter := &GitHubAdapter{}
13 | if adapter.Name() != "github" {
14 | t.Errorf("Expected name 'github', got '%s'", adapter.Name())
15 | }
16 | }
17 |
18 | func TestGitHubAdapter_GetSetLastSync(t *testing.T) {
19 | adapter := &GitHubAdapter{}
20 | now := time.Now()
21 |
22 | adapter.SetLastSync(now)
23 | if !adapter.GetLastSync().Equal(now) {
24 | t.Errorf("Expected last sync time %v, got %v", now, adapter.GetLastSync())
25 | }
26 | }
27 |
28 | func TestNewGitHubAdapter(t *testing.T) {
29 | tests := []struct {
30 | name string
31 | config config.GitHubConfig
32 | expectError bool
33 | }{
34 | {
35 | name: "valid config",
36 | config: config.GitHubConfig{
37 | Token: "test-token",
38 | Mappings: []config.RepositoryMapping{
39 | {Repository: "owner/repo", KnowledgeID: "knowledge-id"},
40 | },
41 | },
42 | expectError: false,
43 | },
44 | {
45 | name: "missing token",
46 | config: config.GitHubConfig{
47 | Token: "",
48 | Mappings: []config.RepositoryMapping{
49 | {Repository: "owner/repo", KnowledgeID: "knowledge-id"},
50 | },
51 | },
52 | expectError: true,
53 | },
54 | {
55 | name: "no mappings",
56 | config: config.GitHubConfig{
57 | Token: "test-token",
58 | Mappings: []config.RepositoryMapping{},
59 | },
60 | expectError: true,
61 | },
62 | {
63 | name: "invalid repository format",
64 | config: config.GitHubConfig{
65 | Token: "test-token",
66 | Mappings: []config.RepositoryMapping{
67 | {Repository: "invalid-repo", KnowledgeID: "knowledge-id"},
68 | },
69 | },
70 | expectError: false, // This will fail later during fetch
71 | },
72 | }
73 |
74 | for _, tt := range tests {
75 | t.Run(tt.name, func(t *testing.T) {
76 | adapter, err := NewGitHubAdapter(tt.config)
77 | if tt.expectError {
78 | if err == nil {
79 | t.Errorf("Expected error but got none")
80 | }
81 | return
82 | }
83 | if err != nil {
84 | t.Errorf("Unexpected error: %v", err)
85 | return
86 | }
87 | if adapter == nil {
88 | t.Errorf("Expected adapter but got nil")
89 | }
90 | })
91 | }
92 | }
93 |
94 | func TestIsTextFile(t *testing.T) {
95 | tests := []struct {
96 | filename string
97 | expected bool
98 | }{
99 | {"test.md", true},
100 | {"test.txt", true},
101 | {"test.go", true},
102 | {"test.py", true},
103 | {"test.js", true},
104 | {"test.ts", true},
105 | {"test.json", true},
106 | {"test.yaml", true},
107 | {"test.yml", true},
108 | {"test.xml", true},
109 | {"test.html", true},
110 | {"test.css", true},
111 | {"test.sh", true},
112 | {"test.dockerfile", true},
113 | {"test.gitignore", true},
114 | {"test.env", true},
115 | {"test.png", false},
116 | {"test.jpg", false},
117 | {"test.jpeg", false},
118 | {"test.gif", false},
119 | {"test.exe", false},
120 | {"test.dll", false},
121 | {"test.so", false},
122 | {"test.dylib", false},
123 | {"test", true}, // No extension should be considered text
124 | {"test.TXT", true}, // Case insensitive
125 | {"test.MD", true}, // Case insensitive
126 | }
127 |
128 | for _, test := range tests {
129 | t.Run(test.filename, func(t *testing.T) {
130 | result := isTextFile(test.filename)
131 | if result != test.expected {
132 | t.Errorf("isTextFile(%s) = %v, expected %v", test.filename, result, test.expected)
133 | }
134 | })
135 | }
136 | }
137 |
138 | func TestGitHubAdapter_FetchFiles(t *testing.T) {
139 | // This test would require mocking the GitHub API
140 | // For now, we'll test the error cases
141 | config := config.GitHubConfig{
142 | Token: "invalid-token",
143 | Mappings: []config.RepositoryMapping{
144 | {Repository: "nonexistent/owner", KnowledgeID: "knowledge-id"},
145 | },
146 | }
147 |
148 | adapter, err := NewGitHubAdapter(config)
149 | if err != nil {
150 | t.Fatalf("Failed to create adapter: %v", err)
151 | }
152 |
153 | ctx := context.Background()
154 | _, err = adapter.FetchFiles(ctx)
155 | if err == nil {
156 | t.Errorf("Expected error for invalid repository, got none")
157 | }
158 | }
159 |
160 | func TestFile_String(t *testing.T) {
161 | file := &File{
162 | Path: "test.md",
163 | Hash: "abc123",
164 | Size: 100,
165 | Source: "github",
166 | Modified: time.Now(),
167 | }
168 |
169 | // Test that File struct can be created and accessed
170 | if file.Path != "test.md" {
171 | t.Errorf("Expected path 'test.md', got '%s'", file.Path)
172 | }
173 | if file.Hash != "abc123" {
174 | t.Errorf("Expected hash 'abc123', got '%s'", file.Hash)
175 | }
176 | if file.Size != 100 {
177 | t.Errorf("Expected size 100, got %d", file.Size)
178 | }
179 | if file.Source != "github" {
180 | t.Errorf("Expected source 'github', got '%s'", file.Source)
181 | }
182 | }
183 |
--------------------------------------------------------------------------------
/internal/utils/retry.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "math"
7 | "math/rand"
8 | "net"
9 | "strings"
10 | "time"
11 |
12 | "github.com/sirupsen/logrus"
13 | )
14 |
15 | // RetryConfig holds configuration for retry logic
16 | type RetryConfig struct {
17 | MaxRetries int // Maximum number of retries
18 | BaseDelay time.Duration // Base delay between retries
19 | MaxDelay time.Duration // Maximum delay between retries
20 | Multiplier float64 // Exponential backoff multiplier
21 | }
22 |
23 | // DefaultRetryConfig returns a sensible default retry configuration
24 | func DefaultRetryConfig() RetryConfig {
25 | return RetryConfig{
26 | MaxRetries: 3,
27 | BaseDelay: time.Second,
28 | MaxDelay: time.Minute,
29 | Multiplier: 2.0,
30 | }
31 | }
32 |
33 | // IsRetryableError checks if an error is retryable
34 | func IsRetryableError(err error) bool {
35 | if err == nil {
36 | return false
37 | }
38 |
39 | // Check for network errors
40 | if netErr, ok := err.(net.Error); ok {
41 | return netErr.Temporary() || netErr.Timeout()
42 | }
43 |
44 | // Check for specific error messages that indicate retryable conditions
45 | errStr := err.Error()
46 | retryableErrors := []string{
47 | "timeout",
48 | "connection refused",
49 | "connection reset",
50 | "temporary failure",
51 | "rate limit",
52 | "too many requests",
53 | "service unavailable",
54 | "internal server error",
55 | "bad gateway",
56 | "gateway timeout",
57 | "network is unreachable",
58 | "slack rate limit",
59 | "rate_limited",
60 | "ratelimited",
61 | "429",
62 | "too_many_requests",
63 | }
64 |
65 | // Check for permanent errors that should NOT be retried
66 | permanentErrors := []string{
67 | "is_archived",
68 | "not_in_channel",
69 | "channel_not_found",
70 | "cant_invite_self",
71 | "invalid_auth",
72 | "account_inactive",
73 | "token_revoked",
74 | }
75 |
76 | for _, permanentErr := range permanentErrors {
77 | if strings.Contains(strings.ToLower(errStr), permanentErr) {
78 | return false // Don't retry permanent errors
79 | }
80 | }
81 |
82 | for _, retryableErr := range retryableErrors {
83 | if strings.Contains(strings.ToLower(errStr), retryableErr) {
84 | return true
85 | }
86 | }
87 |
88 | return false
89 | }
90 |
91 | // GetRetryDelay calculates the appropriate delay for retrying based on error type
92 | func GetRetryDelay(err error, attempt int, baseDelay time.Duration) time.Duration {
93 | if err == nil {
94 | return baseDelay
95 | }
96 |
97 | errStr := strings.ToLower(err.Error())
98 |
99 | // Slack rate limiting - use longer delays
100 | if strings.Contains(errStr, "rate limit") || strings.Contains(errStr, "429") ||
101 | strings.Contains(errStr, "too_many_requests") || strings.Contains(errStr, "ratelimited") {
102 | // Slack typically requires longer waits for rate limits
103 | delay := time.Duration(attempt) * 5 * time.Second
104 | if delay > 5*time.Minute {
105 | delay = 5 * time.Minute
106 | }
107 | return delay
108 | }
109 |
110 | // Network errors - moderate delays
111 | if strings.Contains(errStr, "timeout") || strings.Contains(errStr, "connection") {
112 | delay := time.Duration(attempt) * 2 * time.Second
113 | if delay > 30*time.Second {
114 | delay = 30 * time.Second
115 | }
116 | return delay
117 | }
118 |
119 | // Default exponential backoff
120 | delay := time.Duration(float64(baseDelay) * math.Pow(2, float64(attempt)))
121 | if delay > 2*time.Minute {
122 | delay = 2 * time.Minute
123 | }
124 | return delay
125 | }
126 |
127 | // RetryWithBackoff executes a function with exponential backoff retry logic
128 | func RetryWithBackoff(ctx context.Context, config RetryConfig, operation func() error) error {
129 | var lastErr error
130 |
131 | for attempt := 0; attempt <= config.MaxRetries; attempt++ {
132 | if attempt > 0 {
133 | // Calculate delay based on error type and attempt number
134 | delay := GetRetryDelay(lastErr, attempt-1, config.BaseDelay)
135 | if delay > config.MaxDelay {
136 | delay = config.MaxDelay
137 | }
138 |
139 | // Add jitter to prevent thundering herd
140 | jitter := time.Duration(rand.Float64() * float64(delay) * 0.1)
141 | delay += jitter
142 |
143 | logrus.Debugf("Retry attempt %d/%d after %v (last error: %v)",
144 | attempt+1, config.MaxRetries+1, delay, lastErr)
145 |
146 | select {
147 | case <-ctx.Done():
148 | return ctx.Err()
149 | case <-time.After(delay):
150 | }
151 | }
152 |
153 | err := operation()
154 | if err == nil {
155 | if attempt > 0 {
156 | logrus.Debugf("Operation succeeded on attempt %d", attempt+1)
157 | }
158 | return nil
159 | }
160 |
161 | lastErr = err
162 |
163 | // Check if error is retryable
164 | if !IsRetryableError(err) {
165 | logrus.Debugf("Error is not retryable: %v", err)
166 | return err
167 | }
168 |
169 | if attempt == config.MaxRetries {
170 | logrus.Warnf("Max retries (%d) exceeded, giving up. Last error: %v", config.MaxRetries, err)
171 | break
172 | }
173 |
174 | logrus.Debugf("Attempt %d failed with retryable error: %v", attempt+1, err)
175 | }
176 |
177 | return fmt.Errorf("operation failed after %d retries: %w", config.MaxRetries+1, lastErr)
178 | }
179 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | // OpenWebUI Content Sync
2 | // Copyright (C) 2025 OpenWebUI Content Sync Contributors
3 | //
4 | // This program is free software: you can redistribute it and/or modify
5 | // it under the terms of the GNU General Public License as published by
6 | // the Free Software Foundation, either version 3 of the License, or
7 | // (at your option) any later version.
8 | //
9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program. If not, see .
16 |
17 | package main
18 |
19 | import (
20 | "context"
21 | "flag"
22 | "os"
23 | "os/signal"
24 | "syscall"
25 | "time"
26 |
27 | "github.com/openwebui-content-sync/internal/adapter"
28 | "github.com/openwebui-content-sync/internal/config"
29 | "github.com/openwebui-content-sync/internal/health"
30 | "github.com/openwebui-content-sync/internal/scheduler"
31 | "github.com/openwebui-content-sync/internal/sync"
32 | "github.com/sirupsen/logrus"
33 | )
34 |
35 | func main() {
36 | var configPath = flag.String("config", "config.yaml", "Path to configuration file")
37 | flag.Parse()
38 |
39 | // Load configuration
40 | cfg, err := config.Load(*configPath)
41 | if err != nil {
42 | logrus.Fatalf("Failed to load configuration: %v", err)
43 | }
44 |
45 | // Set log level
46 | level, err := logrus.ParseLevel(cfg.LogLevel)
47 | if err != nil {
48 | logrus.Fatalf("Invalid log level: %v", err)
49 | }
50 | logrus.SetLevel(level)
51 |
52 | logrus.Info("Starting OpenWebUI Content Sync")
53 |
54 | // Initialize adapters
55 | adapters := make([]adapter.Adapter, 0)
56 |
57 | // Add GitHub adapter if configured
58 | if cfg.GitHub.Enabled {
59 | githubAdapter, err := adapter.NewGitHubAdapter(cfg.GitHub)
60 | if err != nil {
61 | logrus.Fatalf("Failed to create GitHub adapter: %v", err)
62 | }
63 | adapters = append(adapters, githubAdapter)
64 | }
65 |
66 | // Add Confluence adapter if configured
67 | if cfg.Confluence.Enabled {
68 | confluenceAdapter, err := adapter.NewConfluenceAdapter(cfg.Confluence)
69 | if err != nil {
70 | logrus.Fatalf("Failed to create Confluence adapter: %v", err)
71 | }
72 | adapters = append(adapters, confluenceAdapter)
73 | }
74 |
75 | // Add Local Folders adapter if configured
76 | if cfg.LocalFolders.Enabled {
77 | localAdapter, err := adapter.NewLocalFolderAdapter(cfg.LocalFolders)
78 | if err != nil {
79 | logrus.Fatalf("Failed to create Local Folders adapter: %v", err)
80 | }
81 | adapters = append(adapters, localAdapter)
82 | }
83 |
84 | // Add Slack adapter if configured
85 | if cfg.Slack.Enabled {
86 | slackAdapter, err := adapter.NewSlackAdapter(cfg.Slack, cfg.Storage.Path)
87 | if err != nil {
88 | logrus.Fatalf("Failed to create Slack adapter: %v", err)
89 | }
90 | adapters = append(adapters, slackAdapter)
91 | }
92 | // Add Jira adapter if configured
93 | if cfg.Jira.Enabled {
94 | jiraAdapter, err := adapter.NewJiraAdapter(cfg.Jira)
95 | if err != nil {
96 | logrus.Fatalf("Failed to create Jira adapter: %v", err)
97 | }
98 | adapters = append(adapters, jiraAdapter)
99 | }
100 |
101 | // Initialize sync manager
102 | syncManager, err := sync.NewManager(cfg.OpenWebUI, cfg.Storage)
103 | if err != nil {
104 | logrus.Fatalf("Failed to create sync manager: %v", err)
105 | }
106 |
107 | // Note: With the mapping system, individual files will have their own knowledge IDs
108 | logrus.Infof("Using mapping-based knowledge ID assignment - files will use their individual knowledge IDs from mappings")
109 |
110 | // Initialize scheduler
111 | sched := scheduler.New(cfg.Schedule.Interval, adapters, syncManager)
112 |
113 | // Start health check server
114 | healthServer := health.NewServer(8080)
115 | go func() {
116 | if err := healthServer.Start(); err != nil {
117 | logrus.Errorf("Health server error: %v", err)
118 | }
119 | }()
120 |
121 | // Create context for graceful shutdown
122 | ctx, cancel := context.WithCancel(context.Background())
123 | defer cancel()
124 |
125 | // Start scheduler
126 | go sched.Start(ctx)
127 |
128 | // Wait for interrupt signal
129 | sigChan := make(chan os.Signal, 1)
130 | signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
131 |
132 | // Initialize file index from OpenWebUI
133 | logrus.Info("Initializing file index from OpenWebUI...")
134 | if err := syncManager.InitializeFileIndex(ctx, adapters); err != nil {
135 | logrus.Errorf("Failed to initialize file index: %v", err)
136 | // Continue even if initialization fails
137 | }
138 |
139 | // Run initial sync
140 | logrus.Info("Running initial sync...")
141 | if err := sched.RunSyncWithContext(ctx); err != nil {
142 | logrus.Errorf("Initial sync failed: %v", err)
143 | }
144 |
145 | // Wait for shutdown signal
146 | <-sigChan
147 | logrus.Info("Shutting down gracefully... (press CTRL+C again to force)")
148 | cancel()
149 |
150 | // Create a channel for forced shutdown
151 | forceChan := make(chan os.Signal, 1)
152 | signal.Notify(forceChan, syscall.SIGINT, syscall.SIGTERM)
153 |
154 | // Stop health server with timeout
155 | healthCtx, healthCancel := context.WithTimeout(context.Background(), 5*time.Second)
156 | defer healthCancel()
157 |
158 | // Run shutdown in a goroutine so we can detect double CTRL+C
159 | shutdownDone := make(chan bool, 1)
160 | go func() {
161 | healthServer.Stop(healthCtx)
162 | // Give some time for graceful shutdown
163 | time.Sleep(5 * time.Second)
164 | shutdownDone <- true
165 | }()
166 |
167 | // Wait for either shutdown completion or forced termination
168 | select {
169 | case <-shutdownDone:
170 | logrus.Info("Graceful shutdown completed")
171 | case <-forceChan:
172 | logrus.Warn("Force shutdown requested, exiting immediately")
173 | os.Exit(1)
174 | }
175 | }
176 |
--------------------------------------------------------------------------------
/internal/adapter/confluence_test.go:
--------------------------------------------------------------------------------
1 | package adapter
2 |
3 | import (
4 | "testing"
5 | "time"
6 |
7 | "github.com/openwebui-content-sync/internal/config"
8 | )
9 |
10 | func TestNewConfluenceAdapter(t *testing.T) {
11 | tests := []struct {
12 | name string
13 | config config.ConfluenceConfig
14 | wantErr bool
15 | }{
16 | {
17 | name: "valid config",
18 | config: config.ConfluenceConfig{
19 | BaseURL: "https://test.atlassian.net",
20 | Username: "test@example.com",
21 | APIKey: "test-key",
22 | SpaceMappings: []config.SpaceMapping{
23 | {SpaceKey: "TEST", KnowledgeID: "knowledge-id"},
24 | },
25 | },
26 | wantErr: false,
27 | },
28 | {
29 | name: "missing base URL",
30 | config: config.ConfluenceConfig{
31 | Username: "test@example.com",
32 | APIKey: "test-key",
33 | SpaceMappings: []config.SpaceMapping{
34 | {SpaceKey: "TEST", KnowledgeID: "knowledge-id"},
35 | },
36 | },
37 | wantErr: true,
38 | },
39 | {
40 | name: "missing username",
41 | config: config.ConfluenceConfig{
42 | BaseURL: "https://test.atlassian.net",
43 | APIKey: "test-key",
44 | SpaceMappings: []config.SpaceMapping{
45 | {SpaceKey: "TEST", KnowledgeID: "knowledge-id"},
46 | },
47 | },
48 | wantErr: true,
49 | },
50 | {
51 | name: "missing API key",
52 | config: config.ConfluenceConfig{
53 | BaseURL: "https://test.atlassian.net",
54 | Username: "test@example.com",
55 | SpaceMappings: []config.SpaceMapping{
56 | {SpaceKey: "TEST", KnowledgeID: "knowledge-id"},
57 | },
58 | },
59 | wantErr: true,
60 | },
61 | {
62 | name: "missing mappings",
63 | config: config.ConfluenceConfig{
64 | BaseURL: "https://test.atlassian.net",
65 | Username: "test@example.com",
66 | APIKey: "test-key",
67 | SpaceMappings: []config.SpaceMapping{},
68 | },
69 | wantErr: true,
70 | },
71 | }
72 |
73 | for _, tt := range tests {
74 | t.Run(tt.name, func(t *testing.T) {
75 | adapter, err := NewConfluenceAdapter(tt.config)
76 | if (err != nil) != tt.wantErr {
77 | t.Errorf("NewConfluenceAdapter() error = %v, wantErr %v", err, tt.wantErr)
78 | return
79 | }
80 | if !tt.wantErr && adapter == nil {
81 | t.Error("NewConfluenceAdapter() returned nil adapter when no error expected")
82 | }
83 | })
84 | }
85 | }
86 |
87 | func TestConfluenceAdapter_Name(t *testing.T) {
88 | config := config.ConfluenceConfig{
89 | BaseURL: "https://test.atlassian.net",
90 | Username: "test@example.com",
91 | APIKey: "test-key",
92 | SpaceMappings: []config.SpaceMapping{
93 | {SpaceKey: "TEST", KnowledgeID: "knowledge-id"},
94 | },
95 | }
96 |
97 | adapter, err := NewConfluenceAdapter(config)
98 | if err != nil {
99 | t.Fatalf("NewConfluenceAdapter() error = %v", err)
100 | }
101 |
102 | if adapter.Name() != "confluence" {
103 | t.Errorf("Name() = %v, want %v", adapter.Name(), "confluence")
104 | }
105 | }
106 |
107 | func TestConfluenceAdapter_GetSetLastSync(t *testing.T) {
108 | config := config.ConfluenceConfig{
109 | BaseURL: "https://test.atlassian.net",
110 | Username: "test@example.com",
111 | APIKey: "test-key",
112 | SpaceMappings: []config.SpaceMapping{
113 | {SpaceKey: "TEST", KnowledgeID: "knowledge-id"},
114 | },
115 | }
116 |
117 | adapter, err := NewConfluenceAdapter(config)
118 | if err != nil {
119 | t.Fatalf("NewConfluenceAdapter() error = %v", err)
120 | }
121 |
122 | // Test initial last sync
123 | initialSync := adapter.GetLastSync()
124 | if initialSync.IsZero() {
125 | t.Error("GetLastSync() returned zero time")
126 | }
127 |
128 | // Test setting last sync
129 | newTime := time.Now()
130 | adapter.SetLastSync(newTime)
131 | if !adapter.GetLastSync().Equal(newTime) {
132 | t.Errorf("SetLastSync() did not update last sync time")
133 | }
134 | }
135 |
136 | func TestSanitizeFilename(t *testing.T) {
137 | adapter := &ConfluenceAdapter{}
138 |
139 | tests := []struct {
140 | input string
141 | expected string
142 | }{
143 | {"normal-file.txt", "normal-file.txt"},
144 | {"file/with/slashes.txt", "file_with_slashes.txt"},
145 | {"file:with:colons.txt", "file_with_colons.txt"},
146 | {"file*with*asterisks.txt", "file_with_asterisks.txt"},
147 | {"file?with?questions.txt", "file_with_questions.txt"},
148 | {"file\"with\"quotes.txt", "file_with_quotes.txt"},
149 | {"filebrackets.txt", "file_with_brackets.txt"},
150 | {"file|with|pipes.txt", "file_with_pipes.txt"},
151 | {"very-long-filename-that-should-be-truncated-because-it-exceeds-the-maximum-length-limit-of-one-hundred-characters.txt", "very-long-filename-that-should-be-truncated-because-it-exceeds-the-maximum-length-limit-of-one-hundr"},
152 | }
153 |
154 | for _, tt := range tests {
155 | t.Run(tt.input, func(t *testing.T) {
156 | result := adapter.SanitizeFilename(tt.input)
157 | if result != tt.expected {
158 | t.Errorf("SanitizeFilename(%q) = %q, want %q", tt.input, result, tt.expected)
159 | }
160 | })
161 | }
162 | }
163 |
164 | func TestHtmlToText(t *testing.T) {
165 | adapter := &ConfluenceAdapter{}
166 |
167 | tests := []struct {
168 | input string
169 | expected string
170 | }{
171 | {"Hello world
", "Hello world"},
172 | {"Line 1
Line 2
", "Line 1\nLine 2"},
173 | {"Paragraph 1
Paragraph 2
", "Paragraph 1\n\nParagraph 2"},
174 | {"Title
Content
", "Title\nContent"},
175 | {"Bold text", "Bold text"},
176 | {"Italic text", "Italic text"},
177 | {"", ""},
178 | {"Plain text without HTML", "Plain text without HTML"},
179 | }
180 |
181 | for _, tt := range tests {
182 | t.Run(tt.input, func(t *testing.T) {
183 | result := adapter.HtmlToText(tt.input)
184 | if result != tt.expected {
185 | t.Errorf("HtmlToText(%q) = %q, want %q", tt.input, result, tt.expected)
186 | }
187 | })
188 | }
189 | }
190 |
191 | // Note: FetchFiles test would require mocking HTTP requests
192 | // This would be more complex and would typically use a library like httptest
193 | // or a mocking framework like gomock
194 |
--------------------------------------------------------------------------------
/adapter_readme/GITHUB_ADAPTER.md:
--------------------------------------------------------------------------------
1 | # GitHub Adapter
2 |
3 | The GitHub adapter allows you to sync content from GitHub repositories into OpenWebUI knowledge bases. It supports multiple repositories and can map each repository to a different knowledge base.
4 |
5 | ## Features
6 |
7 | - **Multi-repository support**: Sync from multiple GitHub repositories
8 | - **Knowledge base mapping**: Map each repository to a specific OpenWebUI knowledge base
9 | - **Incremental sync**: Only fetches files that have changed since the last sync
10 | - **File filtering**: Automatically filters out binary files and common non-content files
11 | - **Authentication**: Uses GitHub personal access tokens for secure API access
12 |
13 | ## Configuration
14 |
15 | ### Environment Variables
16 |
17 | Set the following environment variable:
18 |
19 | ```bash
20 | export GITHUB_TOKEN="your-github-personal-access-token"
21 | ```
22 |
23 | ### Configuration File
24 |
25 | Add the following section to your `config.yaml`:
26 |
27 | ```yaml
28 | github:
29 | enabled: true
30 | token: "" # Set via GITHUB_TOKEN environment variable
31 | mappings:
32 | - repository: "owner/repo-name" # GitHub repository in format "owner/repo"
33 | knowledge_id: "repo-knowledge-base"
34 | - repository: "another-owner/another-repo"
35 | knowledge_id: "another-knowledge-base"
36 | ```
37 |
38 | ### Configuration Options
39 |
40 | | Option | Type | Required | Default | Description |
41 | |--------|------|----------|---------|-------------|
42 | | `enabled` | boolean | Yes | `false` | Enable/disable the GitHub adapter |
43 | | `token` | string | Yes | - | GitHub personal access token (set via `GITHUB_TOKEN` env var) |
44 | | `mappings` | array | Yes | `[]` | List of repository mappings |
45 |
46 | ### Repository Mapping
47 |
48 | Each mapping in the `mappings` array should contain:
49 |
50 | | Field | Type | Required | Description |
51 | |-------|------|----------|-------------|
52 | | `repository` | string | Yes | GitHub repository in format "owner/repo" |
53 | | `knowledge_id` | string | Yes | Target OpenWebUI knowledge base ID |
54 |
55 | ## GitHub Token Setup
56 |
57 | 1. Go to GitHub Settings → Developer settings → Personal access tokens → Tokens (classic)
58 | 2. Click "Generate new token (classic)"
59 | 3. Give it a descriptive name (e.g., "OpenWebUI Content Sync")
60 | 4. Select the following scopes:
61 | - `repo` (Full control of private repositories)
62 | - `public_repo` (Access public repositories)
63 | 5. Click "Generate token"
64 | 6. Copy the token and set it as the `GITHUB_TOKEN` environment variable
65 |
66 | ## File Processing
67 |
68 | The GitHub adapter processes files as follows:
69 |
70 | ### Supported File Types
71 |
72 | - **Markdown files** (`.md`, `.markdown`)
73 | - **Text files** (`.txt`, `.text`)
74 | - **Documentation files** (`.rst`, `.adoc`)
75 | - **Code files** (`.py`, `.js`, `.ts`, `.go`, `.java`, `.cpp`, `.c`, `.h`, `.hpp`)
76 | - **Configuration files** (`.yaml`, `.yml`, `.json`, `.toml`, `.ini`, `.cfg`)
77 | - **Shell scripts** (`.sh`, `.bash`, `.zsh`)
78 |
79 | ### Excluded Files
80 |
81 | The adapter automatically excludes:
82 | - Binary files (images, videos, executables, etc.)
83 | - Common non-content files (`.gitignore`, `.gitattributes`, etc.)
84 | - Large files (> 1MB)
85 | - Files in common exclusion directories (`.git/`, `node_modules/`, `vendor/`, etc.)
86 |
87 | ### File Path Structure
88 |
89 | Files are stored with paths that include the repository name:
90 | ```
91 | github/owner-repo-name/path/to/file.md
92 | ```
93 |
94 | ## Sync Behavior
95 |
96 | - **Initial sync**: Fetches all files from configured repositories
97 | - **Incremental sync**: Only fetches files modified since the last successful sync
98 | - **Error handling**: If a repository fails to sync, other repositories continue processing
99 | - **Rate limiting**: Respects GitHub API rate limits with automatic backoff
100 |
101 | ## Troubleshooting
102 |
103 | ### Common Issues
104 |
105 | 1. **Authentication errors**
106 | - Verify your GitHub token is valid and has the correct permissions
107 | - Check that the token hasn't expired
108 |
109 | 2. **Repository not found**
110 | - Verify the repository name format is correct: "owner/repo"
111 | - Ensure the token has access to the repository (for private repos)
112 |
113 | 3. **Rate limit exceeded**
114 | - The adapter automatically handles rate limits with exponential backoff
115 | - Consider reducing the sync frequency if this occurs frequently
116 |
117 | 4. **Empty knowledge base**
118 | - Check that the repository contains supported file types
119 | - Verify the repository has content in the root directory or subdirectories
120 |
121 | ### Debug Logging
122 |
123 | Enable debug logging to see detailed sync information:
124 |
125 | ```yaml
126 | log_level: debug
127 | ```
128 |
129 | This will show:
130 | - Which repositories are being processed
131 | - File discovery and filtering details
132 | - API request/response information
133 | - Sync progress and timing
134 |
135 | ## Example Configuration
136 |
137 | ```yaml
138 | # Complete example configuration
139 | log_level: info
140 | schedule:
141 | interval: 1h
142 |
143 | storage:
144 | path: "/data"
145 |
146 | openwebui:
147 | base_url: "http://localhost:8080"
148 | api_key: "your-openwebui-api-key"
149 |
150 | github:
151 | enabled: true
152 | token: "" # Set via GITHUB_TOKEN environment variable
153 | mappings:
154 | - repository: "microsoft/vscode"
155 | knowledge_id: "vscode-docs"
156 | - repository: "kubernetes/kubernetes"
157 | knowledge_id: "k8s-docs"
158 | - repository: "your-org/private-repo"
159 | knowledge_id: "private-docs"
160 | ```
161 |
162 | ## Security Considerations
163 |
164 | - **Token security**: Store your GitHub token securely and never commit it to version control
165 | - **Repository access**: Only grant access to repositories that contain appropriate content
166 | - **Content filtering**: Review the content being synced to ensure it's appropriate for your knowledge base
167 | - **Rate limits**: Be mindful of GitHub API rate limits, especially with large repositories
168 |
169 | ## Performance Tips
170 |
171 | - **Repository size**: Large repositories with many files may take longer to sync
172 | - **Sync frequency**: Balance sync frequency with API rate limits
173 | - **File filtering**: The adapter automatically filters out unnecessary files to improve performance
174 | - **Incremental sync**: Only changed files are processed after the initial sync
175 |
--------------------------------------------------------------------------------
/internal/health/health_test.go:
--------------------------------------------------------------------------------
1 | package health
2 |
3 | import (
4 | "context"
5 | "encoding/json"
6 | "fmt"
7 | "net/http"
8 | "net/http/httptest"
9 | "testing"
10 | "time"
11 | )
12 |
13 | func TestNewServer(t *testing.T) {
14 | server := NewServer(8080)
15 | if server == nil {
16 | t.Fatal("Expected server to be created")
17 | }
18 | if server.server == nil {
19 | t.Fatal("Expected HTTP server to be created")
20 | }
21 | if server.server.Addr != ":8080" {
22 | t.Errorf("Expected server address ':8080', got '%s'", server.server.Addr)
23 | }
24 | }
25 |
26 | func TestServer_healthHandler(t *testing.T) {
27 | server := NewServer(8080)
28 |
29 | req := httptest.NewRequest("GET", "/health", nil)
30 | w := httptest.NewRecorder()
31 |
32 | server.healthHandler(w, req)
33 |
34 | if w.Code != http.StatusOK {
35 | t.Errorf("Expected status code %d, got %d", http.StatusOK, w.Code)
36 | }
37 |
38 | var response HealthResponse
39 | err := json.NewDecoder(w.Body).Decode(&response)
40 | if err != nil {
41 | t.Fatalf("Failed to decode response: %v", err)
42 | }
43 |
44 | if response.Status != "healthy" {
45 | t.Errorf("Expected status 'healthy', got '%s'", response.Status)
46 | }
47 | if response.Version != "1.0.0" {
48 | t.Errorf("Expected version '1.0.0', got '%s'", response.Version)
49 | }
50 | if response.Timestamp.IsZero() {
51 | t.Error("Expected timestamp to be set")
52 | }
53 | }
54 |
55 | func TestServer_readyHandler(t *testing.T) {
56 | server := NewServer(8080)
57 |
58 | req := httptest.NewRequest("GET", "/ready", nil)
59 | w := httptest.NewRecorder()
60 |
61 | server.readyHandler(w, req)
62 |
63 | if w.Code != http.StatusOK {
64 | t.Errorf("Expected status code %d, got %d", http.StatusOK, w.Code)
65 | }
66 |
67 | var response HealthResponse
68 | err := json.NewDecoder(w.Body).Decode(&response)
69 | if err != nil {
70 | t.Fatalf("Failed to decode response: %v", err)
71 | }
72 |
73 | if response.Status != "ready" {
74 | t.Errorf("Expected status 'ready', got '%s'", response.Status)
75 | }
76 | if response.Version != "1.0.0" {
77 | t.Errorf("Expected version '1.0.0', got '%s'", response.Version)
78 | }
79 | if response.Timestamp.IsZero() {
80 | t.Error("Expected timestamp to be set")
81 | }
82 | }
83 |
84 | func TestServer_Start(t *testing.T) {
85 | server := NewServer(8080) // Use port 0 for random port
86 |
87 | // Start server in goroutine
88 | go func() {
89 | err := server.Start()
90 | if err != nil && err != http.ErrServerClosed {
91 | t.Errorf("Server start error: %v", err)
92 | }
93 | }()
94 |
95 | // Give server time to start
96 | time.Sleep(10 * time.Millisecond)
97 |
98 | // Test health endpoint
99 | resp, err := http.Get("http://" + server.server.Addr + "/health")
100 | if err != nil {
101 | t.Fatalf("Failed to make health request: %v", err)
102 | }
103 | defer resp.Body.Close()
104 |
105 | if resp.StatusCode != http.StatusOK {
106 | t.Errorf("Expected status code %d, got %d", http.StatusOK, resp.StatusCode)
107 | }
108 |
109 | // Test ready endpoint
110 | resp, err = http.Get("http://" + server.server.Addr + "/ready")
111 | if err != nil {
112 | t.Fatalf("Failed to make ready request: %v", err)
113 | }
114 | defer resp.Body.Close()
115 |
116 | if resp.StatusCode != http.StatusOK {
117 | t.Errorf("Expected status code %d, got %d", http.StatusOK, resp.StatusCode)
118 | }
119 |
120 | // Stop server
121 | ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
122 | defer cancel()
123 | server.Stop(ctx)
124 | }
125 |
126 | func TestServer_Stop(t *testing.T) {
127 | server := NewServer(0)
128 |
129 | // Start server in goroutine
130 | go func() {
131 | server.Start()
132 | }()
133 |
134 | // Give server time to start
135 | time.Sleep(10 * time.Millisecond)
136 |
137 | // Stop server
138 | ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
139 | defer cancel()
140 |
141 | err := server.Stop(ctx)
142 | if err != nil {
143 | t.Errorf("Failed to stop server: %v", err)
144 | }
145 | }
146 |
147 | func TestHealthResponse_JSON(t *testing.T) {
148 | response := HealthResponse{
149 | Status: "healthy",
150 | Timestamp: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC),
151 | Version: "1.0.0",
152 | }
153 |
154 | // Test JSON marshaling
155 | data, err := json.Marshal(response)
156 | if err != nil {
157 | t.Fatalf("Failed to marshal response: %v", err)
158 | }
159 |
160 | // Test JSON unmarshaling
161 | var unmarshaled HealthResponse
162 | err = json.Unmarshal(data, &unmarshaled)
163 | if err != nil {
164 | t.Fatalf("Failed to unmarshal response: %v", err)
165 | }
166 |
167 | if unmarshaled.Status != response.Status {
168 | t.Errorf("Expected status %s, got %s", response.Status, unmarshaled.Status)
169 | }
170 | if unmarshaled.Version != response.Version {
171 | t.Errorf("Expected version %s, got %s", response.Version, unmarshaled.Version)
172 | }
173 | if !unmarshaled.Timestamp.Equal(response.Timestamp) {
174 | t.Errorf("Expected timestamp %v, got %v", response.Timestamp, unmarshaled.Timestamp)
175 | }
176 | }
177 |
178 | func TestServer_DifferentPorts(t *testing.T) {
179 | ports := []int{8080, 8081, 9000, 0}
180 |
181 | for _, port := range ports {
182 | server := NewServer(port)
183 | if server == nil {
184 | t.Fatalf("Failed to create server on port %d", port)
185 | }
186 |
187 | expectedAddr := ":" + fmt.Sprintf("%d", port)
188 | if port != 0 && server.server.Addr != expectedAddr {
189 | t.Errorf("Expected address %s, got %s", expectedAddr, server.server.Addr)
190 | }
191 | }
192 | }
193 |
194 | func TestServer_ConcurrentRequests(t *testing.T) {
195 | server := NewServer(8080)
196 |
197 | // Start server
198 | go func() {
199 | server.Start()
200 | }()
201 | defer func() {
202 | ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
203 | defer cancel()
204 | server.Stop(ctx)
205 | }()
206 |
207 | // Give server time to start
208 | time.Sleep(10 * time.Millisecond)
209 |
210 | // Make concurrent requests
211 | done := make(chan bool, 10)
212 | for i := 0; i < 10; i++ {
213 | go func() {
214 | defer func() { done <- true }()
215 |
216 | resp, err := http.Get("http://" + server.server.Addr + "/health")
217 | if err != nil {
218 | t.Errorf("Failed to make health request: %v", err)
219 | return
220 | }
221 | defer resp.Body.Close()
222 |
223 | if resp.StatusCode != http.StatusOK {
224 | t.Errorf("Expected status code %d, got %d", http.StatusOK, resp.StatusCode)
225 | }
226 | }()
227 | }
228 |
229 | // Wait for all requests to complete
230 | for i := 0; i < 10; i++ {
231 | <-done
232 | }
233 | }
234 |
--------------------------------------------------------------------------------
/adapter_readme/JIRA_ADAPTER.md:
--------------------------------------------------------------------------------
1 | # Jira Adapter
2 |
3 | The Jira adapter allows you to sync content from Atlassian Jira projects into OpenWebUI knowledge bases. This adapter uses the Jira REST API to fetch issues and comments from specified Jira projects and uploads them to OpenWebUI.
4 |
5 | ## API Compatibility
6 |
7 | This adapter uses Jira REST API v3, which provides:
8 | - Modern cursor-based pagination
9 | - Improved performance and reliability
10 | - Better support for large projects
11 | - Enhanced metadata and content structure
12 |
13 | ## Features
14 |
15 | - **Issue Content Sync**: Fetches all issues from specified Jira projects using Jira API v3
16 | - **HTML to Markdown Conversion**: Converts Jira's HTML content to markdown format
17 | - **Comment Support**: downloads and syncs issue comments
18 | - **Multi-Project Support**: Can sync from multiple Jira projects
19 | - **Cursor-based Pagination**: Uses modern cursor-based pagination for efficient data retrieval
20 |
21 | ## Configuration
22 |
23 | ### YAML Configuration
24 |
25 | Add the following to your `config.yaml`:
26 |
27 | ```yaml
28 | jira:
29 | enabled: true
30 | base_url: "https://your-domain.atlassian.net"
31 | username: "your-email@example.com"
32 | api_key: "your-jira-api-key"
33 | project_mappings:
34 | - project_key: "PROJ"
35 | knowledge_id: "your-knowledge-base-id"
36 | - project_key: "ANOTHER"
37 | knowledge_id: "another-knowledge-base-id"
38 | page_limit: 100
39 | ```
40 |
41 | ### Environment Variables
42 |
43 | Only the API key can be configured via environment variable (for security):
44 |
45 | ```bash
46 | JIRA_API_KEY="your-jira-api-key"
47 | ```
48 |
49 | All other configuration should be done in the `config.yaml` file.
50 |
51 | ### Kubernetes Configuration
52 |
53 | #### ConfigMap
54 |
55 | ```yaml
56 | apiVersion: v1
57 | kind: ConfigMap
58 | metadata:
59 | name: connector-config
60 | data:
61 | config.yaml: |
62 | jira:
63 | enabled: true
64 | base_url: "https://your-domain.atlassian.net"
65 | username: "your-email@example.com"
66 | project_mappings:
67 | - project_key: "PROJ"
68 | knowledge_id: "your-knowledge-base-id"
69 | - project_key: "ANOTHER"
70 | knowledge_id: "another-knowledge-base-id"
71 | page_limit: 100
72 | ```
73 |
74 | ## Authentication
75 |
76 | The Jira adapter uses Basic Authentication with your Jira username and API key. To get an API key:
77 |
78 | 1. Go to [Atlassian Account Settings](https://id.atlassian.com/manage-profile/security/api-tokens)
79 | 2. Click "Create API token"
80 | 3. Give it a label and copy the generated token
81 | 4. Use your email address as the username and the token as the API key
82 |
83 | ## Configuration Parameters
84 |
85 | | Parameter | Type | Required | Default | Description |
86 | |-----------|------|----------|---------|-------------|
87 | | `enabled` | boolean | No | `false` | Enable the Jira adapter |
88 | | `base_url` | string | Yes | - | Your Jira instance URL (e.g., `https://your-domain.atlassian.net`) |
89 | | `username` | string | Yes | - | Your Jira username (usually your email) |
90 | | `api_key` | string | Yes | - | Your Jira API key |
91 | | `project_mappings` | array | Yes | - | List of Jira project keys and their corresponding OpenWebUI knowledge base IDs |
92 | | `page_limit` | integer | No | `100` | Maximum number of issues to fetch per project |
93 |
94 | ## File Processing
95 |
96 | ### Issue Content
97 |
98 | - Jira issues are converted from HTML to markdown format
99 | - Issues are saved as `.md` files with sanitized filenames
100 | - File paths follow the pattern: `{issue-id}.md`
101 |
102 | ### Issue Metadata
103 |
104 | Each issue file includes:
105 | - Issue key
106 | - Reporter name
107 | - Issue type
108 | - Status
109 | - Resolution status
110 |
111 | ### Comments
112 |
113 | - Comments are fetched and included in the markdown file
114 | - Each comment includes the author's display name and timestamp
115 | - Comments are formatted in markdown
116 |
117 | ## Error Handling
118 |
119 | - **Authentication Errors**: Invalid credentials will cause the adapter to fail initialization
120 | - **API Errors**: HTTP errors from Jira API are logged and may cause individual issue processing to fail
121 | - **File Processing Errors**: Individual file processing errors are logged but don't stop the overall sync
122 | - **Network Errors**: Connection timeouts and network issues are handled gracefully
123 |
124 | ## Limitations
125 |
126 | 1. **API Rate Limits**: Jira has API rate limits that may affect sync performance
127 | 2. **Large Projects**: Very large projects with many issues may take significant time to sync
128 | 3. **HTML Conversion**: The HTML to markdown conversion is basic and may not preserve all formatting
129 | 4. **Comment Limitations**: Comments are limited to the Jira API's available fields
130 |
131 | ## Troubleshooting
132 |
133 | ### Common Issues
134 |
135 | 1. **Authentication Failed**
136 | - Verify your username and API key are correct
137 | - Ensure your API key has the necessary permissions
138 |
139 | 2. **Project Not Found**
140 | - Check that the project key is correct
141 | - Verify you have access to the project
142 |
143 | 3. **No Content Synced**
144 | - Check that the project contains issues
145 | - Verify the `page_limit` setting is appropriate
146 | - Check logs for API errors
147 |
148 | ### Debug Mode
149 |
150 | Enable debug logging to see detailed information about the sync process:
151 |
152 | ```yaml
153 | log_level: debug
154 | ```
155 |
156 | ## Example Usage
157 |
158 | ### Basic Configuration
159 |
160 | ```yaml
161 | jira:
162 | enabled: true
163 | base_url: "https://mycompany.atlassian.net"
164 | username: "john.doe@mycompany.com"
165 | api_key: "ATATT3xFfGF0..."
166 | project_mappings:
167 | - project_key: "DOCS"
168 | knowledge_id: "fbc18bc4-72c1-40f0-84b1-52055368c583"
169 | - project_key: "PROJ"
170 | knowledge_id: "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
171 | page_limit: 100
172 | ```
173 |
174 | ### Advanced Configuration
175 |
176 | ```yaml
177 | jira:
178 | enabled: true
179 | base_url: "https://mycompany.atlassian.net"
180 | username: "john.doe@mycompany.com"
181 | api_key: "ATATT3xFfGF0..."
182 | project_mappings:
183 | - project_key: "DOCS"
184 | knowledge_id: "fbc18bc4-72c1-40f0-84b1-52055368c583"
185 | - project_key: "PROJ"
186 | knowledge_id: "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
187 | - project_key: "OPS"
188 | knowledge_id: "98765432-10fe-dcba-0987-6543210fedcb"
189 | page_limit: 500
190 | ```
191 |
192 | This configuration will sync up to 500 issues from each of the three specified projects.
193 |
--------------------------------------------------------------------------------
/ARCHITECTURE.md:
--------------------------------------------------------------------------------
1 | # OpenWebUI GitHub Connector - Architecture
2 |
3 | ## Overview
4 |
5 | The OpenWebUI GitHub Connector is a Kubernetes-native application that synchronizes files from GitHub repositories to OpenWebUI knowledge bases using an adapter architecture pattern.
6 |
7 | ## Architecture Components
8 |
9 | ### 1. Adapter Layer
10 | - **Interface**: `adapter.Adapter` defines the contract for data source adapters
11 | - **GitHub Adapter**: Implements GitHub API integration for repository file fetching
12 | - **Extensible**: Easy to add new adapters (GitLab, Bitbucket, etc.)
13 |
14 | ### 2. Sync Manager
15 | - **File Diffing**: Uses SHA256 hashing to detect file changes
16 | - **Local Storage**: Maintains files on persistent volumes
17 | - **OpenWebUI Integration**: Handles file uploads and knowledge base association
18 |
19 | ### 3. Scheduler
20 | - **Cron-based**: Uses robfig/cron for scheduled synchronization
21 | - **Configurable**: Supports various interval patterns (1h, 2h, etc.)
22 | - **Graceful Shutdown**: Properly handles termination signals
23 |
24 | ### 4. Configuration Management
25 | - **YAML-based**: Primary configuration via YAML files
26 | - **Environment Override**: Environment variables override file settings
27 | - **Kubernetes Integration**: ConfigMaps and Secrets support
28 |
29 | ### 5. Health Monitoring
30 | - **HTTP Endpoints**: `/health` and `/ready` for Kubernetes probes
31 | - **Structured Logging**: JSON-formatted logs with configurable levels
32 | - **Error Handling**: Comprehensive error handling and recovery
33 |
34 | ## Data Flow
35 |
36 | ```
37 | GitHub Repository → GitHub Adapter → Sync Manager → OpenWebUI API
38 | ↓
39 | Local Storage (PVC)
40 | ```
41 |
42 | ### Detailed Flow:
43 |
44 | 1. **Scheduler Trigger**: Cron job triggers sync process
45 | 2. **Adapter Fetch**: GitHub adapter fetches repository files
46 | 3. **File Processing**: Files are filtered (text files only) and hashed
47 | 4. **Change Detection**: Compare hashes with previously synced files
48 | 5. **Local Storage**: Save files to persistent volume
49 | 6. **OpenWebUI Upload**: Upload new/changed files to OpenWebUI
50 | 7. **Knowledge Association**: Add files to specified knowledge base
51 | 8. **Index Update**: Update local file index for future comparisons
52 |
53 | ## API Integration
54 |
55 | ### OpenWebUI APIs Used:
56 | - `POST /api/v1/files/` - Upload files
57 | - `GET /api/v1/knowledge/` - List knowledge sources
58 | - `POST /api/v1/knowledge/{id}/file/add` - Add file to knowledge
59 | - `POST /api/v1/knowledge/{id}/file/remove` - Remove file from knowledge
60 |
61 | ### GitHub APIs Used:
62 | - `GET /repos/{owner}/{repo}/contents` - Fetch repository contents
63 | - File content retrieval via GitHub's content API
64 |
65 | ## File Processing
66 |
67 | ### Supported File Types:
68 | - Markdown (`.md`)
69 | - Text files (`.txt`)
70 | - Code files (`.go`, `.py`, `.js`, `.ts`, etc.)
71 | - Configuration files (`.yaml`, `.json`, `.env`)
72 | - Documentation files (`.rst`, `.adoc`)
73 | - And many more text-based formats
74 |
75 | ### File Filtering:
76 | - Binary files are automatically excluded
77 | - Large files are handled via GitHub's download URLs
78 | - File size limits can be configured
79 |
80 | ## Storage Strategy
81 |
82 | ### Local Storage:
83 | - **Persistent Volume**: Kubernetes PVC for data persistence
84 | - **File Organization**: Files organized by source and path
85 | - **Index Management**: JSON-based file index for change tracking
86 |
87 | ### File Index Structure:
88 | ```json
89 | {
90 | "source:path": {
91 | "path": "file.md",
92 | "hash": "sha256_hash",
93 | "file_id": "openwebui_file_id",
94 | "source": "github",
95 | "synced_at": "2024-01-01T00:00:00Z",
96 | "modified": "2024-01-01T00:00:00Z"
97 | }
98 | }
99 | ```
100 |
101 | ## Error Handling
102 |
103 | ### Retry Logic:
104 | - Network failures are retried with exponential backoff
105 | - GitHub API rate limits are respected
106 | - OpenWebUI API failures are logged and retried
107 |
108 | ### Recovery:
109 | - Application can recover from crashes
110 | - File index is persisted and restored
111 | - Partial syncs are resumed on restart
112 |
113 | ## Security Considerations
114 |
115 | ### Authentication:
116 | - GitHub Personal Access Tokens
117 | - OpenWebUI API Keys
118 | - Kubernetes Secrets for credential management
119 |
120 | ### Network Security:
121 | - HTTPS for all external API calls
122 | - Configurable timeouts and retry limits
123 | - No sensitive data in logs
124 |
125 | ## Monitoring and Observability
126 |
127 | ### Logging:
128 | - Structured JSON logging
129 | - Configurable log levels
130 | - Request/response logging for debugging
131 |
132 | ### Health Checks:
133 | - Liveness probe: `/health`
134 | - Readiness probe: `/ready`
135 | - Kubernetes-native health monitoring
136 |
137 | ### Metrics:
138 | - Sync operation counts
139 | - File processing statistics
140 | - Error rates and types
141 |
142 | ## Scalability
143 |
144 | ### Horizontal Scaling:
145 | - Stateless design allows multiple replicas
146 | - File index can be shared via external storage
147 | - Adapter instances can be distributed
148 |
149 | ### Vertical Scaling:
150 | - Configurable resource limits
151 | - Memory usage scales with repository size
152 | - CPU usage scales with sync frequency
153 |
154 | ## Deployment
155 |
156 | ### Kubernetes Manifests:
157 | - Deployment with health checks
158 | - PersistentVolumeClaim for storage
159 | - ConfigMap for configuration
160 | - Secrets for credentials
161 |
162 | ### Docker:
163 | - Multi-stage build for minimal image size
164 | - Alpine Linux base image
165 | - Non-root user for security
166 |
167 | ## Configuration
168 |
169 | ### Environment Variables:
170 | - `OPENWEBUI_BASE_URL`: OpenWebUI instance URL
171 | - `OPENWEBUI_API_KEY`: API authentication
172 | - `GITHUB_TOKEN`: GitHub authentication
173 | - `GITHUB_KNOWLEDGE_ID`: Target knowledge base
174 | - `STORAGE_PATH`: Local storage path
175 | - `LOG_LEVEL`: Logging verbosity
176 |
177 | ### Configuration File:
178 | ```yaml
179 | log_level: info
180 | schedule:
181 | interval: 1h
182 | storage:
183 | path: /data
184 | openwebui:
185 | base_url: "http://localhost:8080"
186 | api_key: ""
187 | github:
188 | enabled: true
189 | token: ""
190 | repositories:
191 | - "owner/repo1"
192 | - "owner/repo2"
193 | knowledge_id: ""
194 | ```
195 |
196 | ## Future Enhancements
197 |
198 | ### Planned Features:
199 | - Additional adapters (GitLab, Bitbucket)
200 | - Webhook-based real-time sync
201 | - File content transformation
202 | - Advanced filtering rules
203 | - Sync status dashboard
204 | - Metrics and alerting
205 |
206 | ### Extensibility:
207 | - Plugin architecture for custom adapters
208 | - Custom file processors
209 | - Integration with CI/CD pipelines
210 | - Multi-tenant support
211 |
--------------------------------------------------------------------------------
/config.example.yaml:
--------------------------------------------------------------------------------
1 | # Example configuration for OpenWebUI GitHub Connector
2 | # Copy this file to config.yaml and update the values
3 |
4 | log_level: info
5 |
6 | # Sync schedule configuration
7 | schedule:
8 | interval: 1h # Options: 30m, 1h, 2h, 6h, 12h, 24h
9 |
10 | # Local storage configuration
11 | storage:
12 | path: /data # Path where files will be stored locally
13 |
14 | # OpenWebUI API configuration
15 | openwebui:
16 | base_url: "http://localhost:8080" # OpenWebUI instance URL
17 | api_key: "" # Set via OPENWEBUI_API_KEY environment variable
18 |
19 | # GitHub adapter configuration
20 | github:
21 | enabled: true
22 | token: "" # Set via GITHUB_TOKEN environment variable
23 | mappings:
24 | - repository: "owner/repo1"
25 | knowledge_id: "knowledge-base-1"
26 | - repository: "owner/repo2"
27 | knowledge_id: "knowledge-base-2"
28 | - repository: "microsoft/vscode"
29 | knowledge_id: "vscode-knowledge-base"
30 |
31 | # Confluence adapter configuration
32 | confluence:
33 | enabled: false
34 | base_url: "https://your-domain.atlassian.net" # Your Confluence instance URL
35 | username: "your-email@example.com" # Your Confluence username (usually email)
36 | api_key: "" # Set via CONFLUENCE_API_KEY environment variable
37 |
38 | # Space mappings (per-space knowledge IDs)
39 | space_mappings:
40 | - space_key: "SPACEKEY1"
41 | knowledge_id: "space1-knowledge-base"
42 | - space_key: "SPACEKEY2"
43 | knowledge_id: "space2-knowledge-base"
44 | - space_key: "DOCS"
45 | knowledge_id: "docs-knowledge-base"
46 |
47 | # Parent page mappings (per-parent-page knowledge IDs)
48 | parent_page_mappings:
49 | - parent_page_id: "3098214470"
50 | knowledge_id: "parent-page-knowledge-base"
51 | - parent_page_id: "1234567890"
52 | knowledge_id: "another-parent-page-knowledge-base"
53 |
54 | page_limit: 100 # Maximum pages to fetch per space (0 = no limit)
55 | include_attachments: true # Whether to download and sync page attachments
56 |
57 | # Local Folders adapter configuration
58 | local_folders:
59 | enabled: false
60 | mappings:
61 | - folder_path: "/path/to/docs"
62 | knowledge_id: "docs-knowledge-base"
63 | - folder_path: "/path/to/guides"
64 | knowledge_id: "guides-knowledge-base"
65 | - folder_path: "/path/to/notes"
66 | knowledge_id: "notes-knowledge-base"
67 |
68 | # Slack adapter configuration
69 | slack:
70 | enabled: false
71 | token: "" # Set via SLACK_TOKEN environment variable
72 | channel_mappings:
73 | - channel_id: "C1234567890" # Slack channel ID
74 | channel_name: "general" # Channel name for display
75 | knowledge_id: "general-knowledge-base"
76 | - channel_id: "C0987654321"
77 | channel_name: "dev-team"
78 | knowledge_id: "dev-knowledge-base"
79 | - channel_id: "C1122334455"
80 | channel_name: "support"
81 | knowledge_id: "support-knowledge-base"
82 | regex_patterns:
83 | # Auto-discover and join channels matching regex patterns
84 | - pattern: "^sales-.*-internal$" # Matches channels like "sales-team-internal", "sales-west-internal"
85 | knowledge_id: "sales-knowledge-base"
86 | auto_join: true # Automatically join matching channels
87 | - pattern: "^dev-.*" # Matches channels like "dev-frontend", "dev-backend", "dev-ops"
88 | knowledge_id: "dev-knowledge-base"
89 | auto_join: true
90 | - pattern: "^support-.*" # Matches channels like "support-tier1", "support-escalation"
91 | knowledge_id: "support-knowledge-base"
92 | auto_join: false # Discover but don't auto-join (manual approval required)
93 | - pattern: "^alert-.*" # Matches channels like "alert-production", "alert-staging"
94 | knowledge_id: "monitoring-knowledge-base"
95 | auto_join: true
96 | days_to_fetch: 30 # Number of days to fetch messages (default: 30)
97 | maintain_history: false # Whether to maintain indefinite history or age off (default: false)
98 | message_limit: 1000 # Max messages per channel per run (default: 1000)
99 | include_threads: true # Whether to include thread messages (default: true)
100 | include_reactions: false # Whether to include reaction data (default: false)
101 | # Jira adapter configuration
102 | jira:
103 | enabled: false
104 | base_url: "https://your-domain.atlassian.net" # Your Jira instance URL
105 | username: "your-email@example.com" # Your Jira username (usually email)
106 | api_key: "" # Set via JIRA_API_KEY environment variable
107 | page_limit: 100 # Maximum pages to fetch per space (0 = no limit)
108 |
109 | project_mappings:
110 | - project_key: "PROJ"
111 | knowledge_id: "your-knowledge-base-id"
112 | - project_key: "ANOTHER"
113 | knowledge_id: "another-knowledge-base-id"
114 |
115 | # Example configurations for different environments:
116 |
117 | # Development
118 | # log_level: debug
119 | # schedule:
120 | # interval: 5m
121 | # openwebui:
122 | # base_url: "http://localhost:8080"
123 | # api_key: "dev-api-key"
124 | # confluence:
125 | # enabled: true
126 | # base_url: "https://dev-company.atlassian.net"
127 | # username: "dev-user@company.com"
128 | # api_key: "dev-confluence-api-key"
129 | # spaces:
130 | # - "DEV"
131 | # knowledge_id: "dev-knowledge-base-id"
132 | # page_limit: 50
133 | # include_attachments: false
134 |
135 | # Production
136 | # log_level: info
137 | # schedule:
138 | # interval: 1h
139 | # openwebui:
140 | # base_url: "https://openwebui.yourdomain.com"
141 | # api_key: "prod-api-key"
142 | # confluence:
143 | # enabled: true
144 | # base_url: "https://company.atlassian.net"
145 | # username: "sync-bot@company.com"
146 | # api_key: "prod-confluence-api-key"
147 | # spaces:
148 | # - "DOCS"
149 | # - "WIKI"
150 | # - "PROJECTS"
151 | # knowledge_id: "prod-knowledge-base-id"
152 | # page_limit: 500
153 | # include_attachments: true
154 |
155 | # Kubernetes (using environment variables)
156 | # All values should be set via environment variables or ConfigMap
157 | # log_level: info
158 | # schedule:
159 | # interval: 1h
160 | # storage:
161 | # path: /data
162 | # openwebui:
163 | # base_url: "" # Set via OPENWEBUI_BASE_URL
164 | # api_key: "" # Set via OPENWEBUI_API_KEY
165 | # github:
166 | # enabled: true
167 | # token: "" # Set via GITHUB_TOKEN
168 | # repositories: [] # Set via ConfigMap
169 | # knowledge_id: "" # Set via GITHUB_KNOWLEDGE_ID
170 | # confluence:
171 | # enabled: false
172 | # base_url: "" # Set in ConfigMap
173 | # username: "" # Set in ConfigMap
174 | # api_key: "" # Set via CONFLUENCE_API_KEY
175 | # spaces: [] # Set in ConfigMap
176 | # knowledge_id: "" # Set in ConfigMap
177 | # page_limit: 100
178 | # include_attachments: true
179 |
--------------------------------------------------------------------------------
/internal/adapter/local_test.go:
--------------------------------------------------------------------------------
1 | package adapter
2 |
3 | import (
4 | "context"
5 | "os"
6 | "path/filepath"
7 | "testing"
8 | "time"
9 |
10 | "github.com/openwebui-content-sync/internal/config"
11 | )
12 |
13 | func TestNewLocalFolderAdapter(t *testing.T) {
14 | // Create a temporary directory for testing
15 | tempDir := t.TempDir()
16 | testFile := filepath.Join(tempDir, "test.txt")
17 | err := os.WriteFile(testFile, []byte("test content"), 0644)
18 | if err != nil {
19 | t.Fatalf("Failed to create test file: %v", err)
20 | }
21 |
22 | tests := []struct {
23 | name string
24 | config config.LocalFolderConfig
25 | wantErr bool
26 | }{
27 | {
28 | name: "valid mapping configuration",
29 | config: config.LocalFolderConfig{
30 | Enabled: true,
31 | Mappings: []config.LocalFolderMapping{
32 | {FolderPath: tempDir, KnowledgeID: "test-knowledge"},
33 | },
34 | },
35 | wantErr: false,
36 | },
37 | {
38 | name: "valid configuration with multiple mappings",
39 | config: func() config.LocalFolderConfig {
40 | // Create subdirectory for the test
41 | subDir := filepath.Join(tempDir, "subdir")
42 | os.MkdirAll(subDir, 0755)
43 | return config.LocalFolderConfig{
44 | Enabled: true,
45 | Mappings: []config.LocalFolderMapping{
46 | {FolderPath: tempDir, KnowledgeID: "test-knowledge"},
47 | {FolderPath: subDir, KnowledgeID: "another-knowledge"},
48 | },
49 | }
50 | }(),
51 | wantErr: false,
52 | },
53 | {
54 | name: "disabled adapter",
55 | config: config.LocalFolderConfig{
56 | Enabled: false,
57 | },
58 | wantErr: true,
59 | },
60 | {
61 | name: "non-existent folder",
62 | config: config.LocalFolderConfig{
63 | Enabled: true,
64 | Mappings: []config.LocalFolderMapping{
65 | {FolderPath: "/non/existent/path", KnowledgeID: "test-knowledge"},
66 | },
67 | },
68 | wantErr: true,
69 | },
70 | {
71 | name: "no folders configured",
72 | config: config.LocalFolderConfig{
73 | Enabled: true,
74 | },
75 | wantErr: true,
76 | },
77 | }
78 |
79 | for _, tt := range tests {
80 | t.Run(tt.name, func(t *testing.T) {
81 | adapter, err := NewLocalFolderAdapter(tt.config)
82 | if (err != nil) != tt.wantErr {
83 | t.Errorf("NewLocalFolderAdapter() error = %v, wantErr %v", err, tt.wantErr)
84 | return
85 | }
86 | if !tt.wantErr && adapter == nil {
87 | t.Error("NewLocalFolderAdapter() returned nil adapter when no error expected")
88 | }
89 | })
90 | }
91 | }
92 |
93 | func TestLocalFolderAdapter_Name(t *testing.T) {
94 | adapter := &LocalFolderAdapter{}
95 | if got := adapter.Name(); got != "local" {
96 | t.Errorf("Name() = %v, want %v", got, "local")
97 | }
98 | }
99 |
100 | func TestLocalFolderAdapter_GetSetLastSync(t *testing.T) {
101 | adapter := &LocalFolderAdapter{}
102 |
103 | // Test initial last sync
104 | initialSync := adapter.GetLastSync()
105 | if !initialSync.IsZero() {
106 | t.Error("GetLastSync() should return zero time initially")
107 | }
108 |
109 | // Test setting last sync
110 | newTime := time.Now()
111 | adapter.SetLastSync(newTime)
112 | if !adapter.GetLastSync().Equal(newTime) {
113 | t.Errorf("SetLastSync() did not update last sync time")
114 | }
115 | }
116 |
117 | func TestLocalFolderAdapter_FetchFiles(t *testing.T) {
118 | // Create a temporary directory with test files
119 | tempDir := t.TempDir()
120 |
121 | // Create test files
122 | testFiles := map[string]string{
123 | "test1.txt": "content 1",
124 | "test2.md": "content 2",
125 | "subdir/test3.txt": "content 3",
126 | ".hidden.txt": "hidden content", // Should be ignored
127 | "binary.bin": string([]byte{0, 1, 2, 3, 4}), // Should be ignored
128 | }
129 |
130 | for filename, content := range testFiles {
131 | fullPath := filepath.Join(tempDir, filename)
132 | dir := filepath.Dir(fullPath)
133 | if dir != tempDir {
134 | err := os.MkdirAll(dir, 0755)
135 | if err != nil {
136 | t.Fatalf("Failed to create subdirectory: %v", err)
137 | }
138 | }
139 | err := os.WriteFile(fullPath, []byte(content), 0644)
140 | if err != nil {
141 | t.Fatalf("Failed to create test file %s: %v", filename, err)
142 | }
143 | }
144 |
145 | config := config.LocalFolderConfig{
146 | Enabled: true,
147 | Mappings: []config.LocalFolderMapping{
148 | {FolderPath: tempDir, KnowledgeID: "test-knowledge"},
149 | },
150 | }
151 |
152 | adapter, err := NewLocalFolderAdapter(config)
153 | if err != nil {
154 | t.Fatalf("NewLocalFolderAdapter() error = %v", err)
155 | }
156 |
157 | ctx := context.Background()
158 | files, err := adapter.FetchFiles(ctx)
159 | if err != nil {
160 | t.Fatalf("FetchFiles() error = %v", err)
161 | }
162 |
163 | // Should find 3 files (test1.txt, test2.md, subdir/test3.txt)
164 | // Hidden and binary files should be ignored
165 | expectedCount := 3
166 | if len(files) != expectedCount {
167 | t.Errorf("FetchFiles() returned %d files, want %d", len(files), expectedCount)
168 | }
169 |
170 | // Check that all files have the correct knowledge ID
171 | for _, file := range files {
172 | if file.KnowledgeID != "test-knowledge" {
173 | t.Errorf("File %s has knowledge ID %s, want %s", file.Path, file.KnowledgeID, "test-knowledge")
174 | }
175 | if file.Source != "local:"+tempDir {
176 | t.Errorf("File %s has source %s, want %s", file.Path, file.Source, "local:"+tempDir)
177 | }
178 | }
179 | }
180 |
181 | func TestLocalFolderAdapter_shouldIgnoreFile(t *testing.T) {
182 | adapter := &LocalFolderAdapter{}
183 |
184 | tests := []struct {
185 | filename string
186 | want bool
187 | }{
188 | {"test.txt", false},
189 | {".hidden", true},
190 | {"node_modules", true},
191 | {"test.log", true},
192 | {"Thumbs.db", true},
193 | {"normal_file.py", false},
194 | {"__pycache__", true},
195 | }
196 |
197 | for _, tt := range tests {
198 | t.Run(tt.filename, func(t *testing.T) {
199 | if got := adapter.shouldIgnoreFile(tt.filename); got != tt.want {
200 | t.Errorf("shouldIgnoreFile(%q) = %v, want %v", tt.filename, got, tt.want)
201 | }
202 | })
203 | }
204 | }
205 |
206 | func TestLocalFolderAdapter_isBinaryFile(t *testing.T) {
207 | adapter := &LocalFolderAdapter{}
208 |
209 | tests := []struct {
210 | name string
211 | content []byte
212 | want bool
213 | }{
214 | {"empty", []byte{}, false},
215 | {"text", []byte("hello world"), false},
216 | {"text with newlines", []byte("hello\nworld\r\n"), false},
217 | {"binary with null", []byte{0, 1, 2, 3}, true},
218 | {"high non-printable ratio", make([]byte, 1000), true},
219 | {"normal text", []byte("This is normal text content"), false},
220 | }
221 |
222 | for _, tt := range tests {
223 | t.Run(tt.name, func(t *testing.T) {
224 | if got := adapter.isBinaryFile(tt.content); got != tt.want {
225 | t.Errorf("isBinaryFile(%q) = %v, want %v", tt.name, got, tt.want)
226 | }
227 | })
228 | }
229 | }
230 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/JohannesKaufmann/dom v0.2.0 h1:1bragmEb19K8lHAqgFgqCpiPCFEZMTXzOIEjuxkUfLQ=
2 | github.com/JohannesKaufmann/dom v0.2.0/go.mod h1:57iSUl5RKric4bUkgos4zu6Xt5LMHUnw3TF1l5CbGZo=
3 | github.com/JohannesKaufmann/html-to-markdown/v2 v2.4.0 h1:C0/TerKdQX9Y9pbYi1EsLr5LDNANsqunyI/btpyfCg8=
4 | github.com/JohannesKaufmann/html-to-markdown/v2 v2.4.0/go.mod h1:OLaKh+giepO8j7teevrNwiy/fwf8LXgoc9g7rwaE1jk=
5 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
6 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
7 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
8 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
9 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
10 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
11 | github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
12 | github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
13 | github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
14 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
15 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
16 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
17 | github.com/google/go-github/v56 v56.0.0 h1:TysL7dMa/r7wsQi44BjqlwaHvwlFlqkK8CtBWCX3gb4=
18 | github.com/google/go-github/v56 v56.0.0/go.mod h1:D8cdcX98YWJvi7TLo7zM4/h8ZTx6u6fwGEkCdisopo0=
19 | github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8=
20 | github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU=
21 | github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
22 | github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
23 | github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
24 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
25 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
26 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
27 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
28 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
29 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
30 | github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
31 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
32 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
33 | github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
34 | github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
35 | github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
36 | github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
37 | github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
38 | github.com/sebdah/goldie/v2 v2.7.1 h1:PkBHymaYdtvEkZV7TmyqKxdmn5/Vcj+8TpATWZjnG5E=
39 | github.com/sebdah/goldie/v2 v2.7.1/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI=
40 | github.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw=
41 | github.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=
42 | github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
43 | github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
44 | github.com/slack-go/slack v0.17.3 h1:zV5qO3Q+WJAQ/XwbGfNFrRMaJ5T/naqaonyPV/1TP4g=
45 | github.com/slack-go/slack v0.17.3/go.mod h1:X+UqOufi3LYQHDnMG1vxf0J8asC6+WllXrVrhl8/Prk=
46 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
47 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
48 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
49 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
50 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
51 | github.com/yuin/goldmark v1.7.13 h1:GPddIs617DnBLFFVJFgpo1aBfe/4xcvMc3SB5t/D0pA=
52 | github.com/yuin/goldmark v1.7.13/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg=
53 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
54 | golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
55 | golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
56 | golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
57 | golang.org/x/oauth2 v0.15.0 h1:s8pnnxNVzjWyrvYdFUQq5llS1PX2zhPXmccZv99h7uQ=
58 | golang.org/x/oauth2 v0.15.0/go.mod h1:q48ptWNTY5XWf+JNten23lcvHpLJ0ZSxF5ttTHKVCAM=
59 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
60 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
61 | golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
62 | golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
63 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
64 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
65 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
66 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
67 | google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
68 | google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
69 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
70 | google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
71 | google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
72 | google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
73 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
74 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
75 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
76 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
77 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
78 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
79 |
--------------------------------------------------------------------------------
/internal/sync/manager_simple_test.go:
--------------------------------------------------------------------------------
1 | package sync
2 |
3 | import (
4 | "context"
5 | "os"
6 | "path/filepath"
7 | "testing"
8 | "time"
9 |
10 | "github.com/openwebui-content-sync/internal/adapter"
11 | "github.com/openwebui-content-sync/internal/config"
12 | "github.com/openwebui-content-sync/internal/mocks"
13 | "github.com/openwebui-content-sync/internal/openwebui"
14 | )
15 |
16 | func TestNewManager(t *testing.T) {
17 | tempDir := t.TempDir()
18 | defer os.RemoveAll(tempDir)
19 |
20 | openwebuiConfig := config.OpenWebUIConfig{
21 | BaseURL: "http://localhost:8080",
22 | APIKey: "test-key",
23 | }
24 | storageConfig := config.StorageConfig{
25 | Path: tempDir,
26 | }
27 |
28 | manager, err := NewManager(openwebuiConfig, storageConfig)
29 | if err != nil {
30 | t.Fatalf("Failed to create manager: %v", err)
31 | }
32 |
33 | if manager == nil {
34 | t.Fatal("Expected manager to be created")
35 | }
36 | if manager.storagePath != tempDir {
37 | t.Errorf("Expected storage path %s, got %s", tempDir, manager.storagePath)
38 | }
39 | }
40 |
41 | func TestManager_SetKnowledgeID(t *testing.T) {
42 | tempDir := t.TempDir()
43 | defer os.RemoveAll(tempDir)
44 |
45 | manager := &Manager{
46 | storagePath: tempDir,
47 | fileIndex: make(map[string]*FileMetadata),
48 | }
49 |
50 | knowledgeID := "test-knowledge-id"
51 | manager.SetKnowledgeID(knowledgeID)
52 |
53 | if manager.knowledgeID != knowledgeID {
54 | t.Errorf("Expected knowledge ID %s, got %s", knowledgeID, manager.knowledgeID)
55 | }
56 | }
57 |
58 | func TestManager_syncFile_NewFile(t *testing.T) {
59 | tempDir := t.TempDir()
60 | defer os.RemoveAll(tempDir)
61 |
62 | mockClient := &mocks.MockOpenWebUIClient{
63 | UploadFileFunc: func(ctx context.Context, filename string, content []byte) (*openwebui.File, error) {
64 | return &openwebui.File{
65 | ID: "mock-file-id",
66 | Filename: filename,
67 | }, nil
68 | },
69 | }
70 |
71 | manager := &Manager{
72 | openwebuiClient: mockClient,
73 | storagePath: tempDir,
74 | fileIndex: make(map[string]*FileMetadata),
75 | }
76 |
77 | file := &adapter.File{
78 | Path: "new-file.md",
79 | Content: []byte("# New File"),
80 | Hash: "test-hash",
81 | Modified: time.Now(),
82 | Size: 10,
83 | Source: "test",
84 | }
85 |
86 | ctx := context.Background()
87 | err := manager.syncFile(ctx, file, "test-source")
88 | if err != nil {
89 | t.Fatalf("Failed to sync file: %v", err)
90 | }
91 |
92 | // Check that file was added to index
93 | fileKey := "new-file.md" // Now using filename as key
94 | if _, exists := manager.fileIndex[fileKey]; !exists {
95 | t.Errorf("Expected file to be added to index")
96 | }
97 |
98 | // Check that file was saved locally
99 | expectedPath := filepath.Join(tempDir, "files", "test-source", "new-file.md")
100 | if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
101 | t.Errorf("Expected file to be saved locally at %s", expectedPath)
102 | }
103 | }
104 |
105 | func TestManager_syncFile_UnchangedFile(t *testing.T) {
106 | tempDir := t.TempDir()
107 | defer os.RemoveAll(tempDir)
108 |
109 | mockClient := &mocks.MockOpenWebUIClient{}
110 | manager := &Manager{
111 | openwebuiClient: mockClient,
112 | storagePath: tempDir,
113 | fileIndex: make(map[string]*FileMetadata),
114 | }
115 |
116 | // Add file to index first
117 | fileKey := "unchanged-file.md" // Now using filename as key
118 | manager.fileIndex[fileKey] = &FileMetadata{
119 | Path: "unchanged-file.md",
120 | Hash: "same-hash",
121 | FileID: "existing-file-id",
122 | Source: "test-source",
123 | SyncedAt: time.Now(),
124 | Modified: time.Now(),
125 | }
126 |
127 | file := &adapter.File{
128 | Path: "unchanged-file.md",
129 | Content: []byte("# Unchanged File"),
130 | Hash: "same-hash", // Same hash as in index
131 | Modified: time.Now(),
132 | Size: 17,
133 | Source: "test",
134 | }
135 |
136 | ctx := context.Background()
137 | err := manager.syncFile(ctx, file, "test-source")
138 | if err != nil {
139 | t.Fatalf("Failed to sync file: %v", err)
140 | }
141 |
142 | // File should not be uploaded again (we can't easily test this without more complex mocking)
143 | // But we can verify the file index wasn't updated with a new file ID
144 | if manager.fileIndex[fileKey].FileID != "existing-file-id" {
145 | t.Errorf("Expected file ID to remain unchanged")
146 | }
147 | }
148 |
149 | func TestManager_saveFileLocally(t *testing.T) {
150 | tempDir := t.TempDir()
151 | defer os.RemoveAll(tempDir)
152 |
153 | manager := &Manager{
154 | storagePath: tempDir,
155 | }
156 |
157 | filePath := filepath.Join(tempDir, "test", "nested", "file.md")
158 | content := []byte("# Test Content")
159 |
160 | err := manager.saveFileLocally(filePath, content)
161 | if err != nil {
162 | t.Fatalf("Failed to save file locally: %v", err)
163 | }
164 |
165 | if _, err := os.Stat(filePath); os.IsNotExist(err) {
166 | t.Errorf("Expected file to exist at %s", filePath)
167 | }
168 |
169 | // Check content
170 | readContent, err := os.ReadFile(filePath)
171 | if err != nil {
172 | t.Fatalf("Failed to read file: %v", err)
173 | }
174 | if string(readContent) != string(content) {
175 | t.Errorf("Expected content %s, got %s", string(content), string(readContent))
176 | }
177 | }
178 |
179 | func TestGetFileHash(t *testing.T) {
180 | content := []byte("test content")
181 | // Calculate the actual expected hash
182 | expectedHash := "6ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72"
183 |
184 | hash := GetFileHash(content)
185 | if hash != expectedHash {
186 | t.Errorf("Expected hash %s, got %s", expectedHash, hash)
187 | }
188 | }
189 |
190 | func TestManager_loadFileIndex(t *testing.T) {
191 | tempDir := t.TempDir()
192 | defer os.RemoveAll(tempDir)
193 |
194 | manager := &Manager{
195 | storagePath: tempDir,
196 | fileIndex: make(map[string]*FileMetadata),
197 | indexPath: filepath.Join(tempDir, "file_index.json"),
198 | }
199 |
200 | // Test loading non-existent index (should not error)
201 | err := manager.loadFileIndex()
202 | if err != nil {
203 | t.Fatalf("Failed to load non-existent index: %v", err)
204 | }
205 |
206 | // Create a test index file
207 | testIndex := map[string]*FileMetadata{
208 | "file.md": { // Now using filename as key
209 | Path: "file.md",
210 | Hash: "test-hash",
211 | FileID: "test-file-id",
212 | Source: "test",
213 | SyncedAt: time.Now(),
214 | Modified: time.Now(),
215 | },
216 | }
217 |
218 | // Save test index
219 | manager.fileIndex = testIndex
220 | err = manager.saveFileIndex()
221 | if err != nil {
222 | t.Fatalf("Failed to save test index: %v", err)
223 | }
224 |
225 | // Create new manager and load index
226 | newManager := &Manager{
227 | storagePath: tempDir,
228 | fileIndex: make(map[string]*FileMetadata),
229 | indexPath: filepath.Join(tempDir, "file_index.json"),
230 | }
231 |
232 | err = newManager.loadFileIndex()
233 | if err != nil {
234 | t.Fatalf("Failed to load index: %v", err)
235 | }
236 |
237 | if len(newManager.fileIndex) != 1 {
238 | t.Errorf("Expected 1 file in index, got %d", len(newManager.fileIndex))
239 | }
240 |
241 | fileKey := "file.md" // Now using filename as key
242 | if _, exists := newManager.fileIndex[fileKey]; !exists {
243 | t.Errorf("Expected file %s to be in index", fileKey)
244 | }
245 | }
246 |
--------------------------------------------------------------------------------
/internal/mocks/mocks.go:
--------------------------------------------------------------------------------
1 | package mocks
2 |
3 | import (
4 | "context"
5 | "time"
6 |
7 | "github.com/openwebui-content-sync/internal/adapter"
8 | "github.com/openwebui-content-sync/internal/openwebui"
9 | )
10 |
11 | // MockOpenWebUIClient is a mock implementation of OpenWebUI client
12 | type MockOpenWebUIClient struct {
13 | UploadFileFunc func(ctx context.Context, filename string, content []byte) (*openwebui.File, error)
14 | GetFileFunc func(ctx context.Context, fileID string) (*openwebui.File, error)
15 | ListKnowledgeFunc func(ctx context.Context) ([]*openwebui.Knowledge, error)
16 | AddFileToKnowledgeFunc func(ctx context.Context, knowledgeID, fileID string) error
17 | RemoveFileFromKnowledgeFunc func(ctx context.Context, knowledgeID, fileID string) error
18 | GetKnowledgeFilesFunc func(ctx context.Context, knowledgeID string) ([]*openwebui.File, error)
19 | DeleteFileFunc func(ctx context.Context, fileID string) error
20 | }
21 |
22 | // UploadFile mocks the UploadFile method
23 | func (m *MockOpenWebUIClient) UploadFile(ctx context.Context, filename string, content []byte) (*openwebui.File, error) {
24 | if m.UploadFileFunc != nil {
25 | return m.UploadFileFunc(ctx, filename, content)
26 | }
27 | return &openwebui.File{
28 | ID: "mock-file-id",
29 | Filename: filename,
30 | UserID: "test-user",
31 | Hash: "mock-hash",
32 | Data: struct {
33 | Status string `json:"status"`
34 | }{
35 | Status: "pending",
36 | },
37 | Meta: struct {
38 | Name string `json:"name"`
39 | ContentType string `json:"content_type"`
40 | Size int64 `json:"size"`
41 | Data map[string]interface{} `json:"data"`
42 | }{
43 | Name: filename,
44 | ContentType: "text/markdown",
45 | Size: 0,
46 | Data: map[string]interface{}{},
47 | },
48 | CreatedAt: time.Now().Unix(),
49 | UpdatedAt: time.Now().Unix(),
50 | Status: true,
51 | Path: "/app/backend/data/uploads/mock-file-id_" + filename,
52 | AccessControl: nil,
53 | }, nil
54 | }
55 |
56 | // GetFile mocks the GetFile method
57 | func (m *MockOpenWebUIClient) GetFile(ctx context.Context, fileID string) (*openwebui.File, error) {
58 | if m.GetFileFunc != nil {
59 | return m.GetFileFunc(ctx, fileID)
60 | }
61 | return &openwebui.File{
62 | ID: fileID,
63 | Filename: "mock-file.md",
64 | UserID: "test-user",
65 | Hash: "mock-hash",
66 | Data: struct {
67 | Status string `json:"status"`
68 | }{
69 | Status: "processed", // Default to processed status
70 | },
71 | Meta: struct {
72 | Name string `json:"name"`
73 | ContentType string `json:"content_type"`
74 | Size int64 `json:"size"`
75 | Data map[string]interface{} `json:"data"`
76 | }{
77 | Name: "mock-file.md",
78 | ContentType: "text/markdown",
79 | Size: 100,
80 | Data: map[string]interface{}{},
81 | },
82 | Status: true,
83 | }, nil
84 | }
85 |
86 | // ListKnowledge mocks the ListKnowledge method
87 | func (m *MockOpenWebUIClient) ListKnowledge(ctx context.Context) ([]*openwebui.Knowledge, error) {
88 | if m.ListKnowledgeFunc != nil {
89 | return m.ListKnowledgeFunc(ctx)
90 | }
91 | return []*openwebui.Knowledge{
92 | {
93 | ID: "mock-knowledge-id",
94 | UserID: "test-user",
95 | Name: "Test Knowledge",
96 | Description: "Mock knowledge base",
97 | Data: nil,
98 | Meta: nil,
99 | AccessControl: map[string]interface{}{},
100 | CreatedAt: time.Now().Unix(),
101 | UpdatedAt: time.Now().Unix(),
102 | },
103 | }, nil
104 | }
105 |
106 | // AddFileToKnowledge mocks the AddFileToKnowledge method
107 | func (m *MockOpenWebUIClient) AddFileToKnowledge(ctx context.Context, knowledgeID, fileID string) error {
108 | if m.AddFileToKnowledgeFunc != nil {
109 | return m.AddFileToKnowledgeFunc(ctx, knowledgeID, fileID)
110 | }
111 | return nil
112 | }
113 |
114 | // RemoveFileFromKnowledge mocks the RemoveFileFromKnowledge method
115 | func (m *MockOpenWebUIClient) RemoveFileFromKnowledge(ctx context.Context, knowledgeID, fileID string) error {
116 | if m.RemoveFileFromKnowledgeFunc != nil {
117 | return m.RemoveFileFromKnowledgeFunc(ctx, knowledgeID, fileID)
118 | }
119 | return nil
120 | }
121 |
122 | // GetKnowledgeFiles mocks the GetKnowledgeFiles method
123 | func (m *MockOpenWebUIClient) GetKnowledgeFiles(ctx context.Context, knowledgeID string) ([]*openwebui.File, error) {
124 | if m.GetKnowledgeFilesFunc != nil {
125 | return m.GetKnowledgeFilesFunc(ctx, knowledgeID)
126 | }
127 | return []*openwebui.File{
128 | {
129 | ID: "existing-file-1",
130 | Filename: "existing-file-1.md",
131 | UserID: "test-user",
132 | Hash: "existing-hash-1",
133 | Data: struct {
134 | Status string `json:"status"`
135 | }{
136 | Status: "processed",
137 | },
138 | Meta: struct {
139 | Name string `json:"name"`
140 | ContentType string `json:"content_type"`
141 | Size int64 `json:"size"`
142 | Data map[string]interface{} `json:"data"`
143 | }{
144 | Name: "existing-file-1.md",
145 | ContentType: "text/markdown",
146 | Size: 1000,
147 | Data: map[string]interface{}{"source": "github"},
148 | },
149 | CreatedAt: time.Now().Unix() - 3600, // 1 hour ago
150 | UpdatedAt: time.Now().Unix() - 1800, // 30 minutes ago
151 | Status: true,
152 | Path: "existing-file-1.md",
153 | AccessControl: nil,
154 | },
155 | }, nil
156 | }
157 |
158 | // DeleteFile mocks the DeleteFile method
159 | func (m *MockOpenWebUIClient) DeleteFile(ctx context.Context, fileID string) error {
160 | if m.DeleteFileFunc != nil {
161 | return m.DeleteFileFunc(ctx, fileID)
162 | }
163 | return nil
164 | }
165 |
166 | // MockAdapter is a mock implementation of the Adapter interface
167 | type MockAdapter struct {
168 | NameFunc func() string
169 | FetchFilesFunc func(ctx context.Context) ([]*adapter.File, error)
170 | GetLastSyncFunc func() time.Time
171 | SetLastSyncFunc func(t time.Time)
172 | lastSync time.Time
173 | }
174 |
175 | // Name mocks the Name method
176 | func (m *MockAdapter) Name() string {
177 | if m.NameFunc != nil {
178 | return m.NameFunc()
179 | }
180 | return "mock-adapter"
181 | }
182 |
183 | // FetchFiles mocks the FetchFiles method
184 | func (m *MockAdapter) FetchFiles(ctx context.Context) ([]*adapter.File, error) {
185 | if m.FetchFilesFunc != nil {
186 | return m.FetchFilesFunc(ctx)
187 | }
188 | return []*adapter.File{
189 | {
190 | Path: "test.md",
191 | Content: []byte("# Test File"),
192 | Hash: "test-hash",
193 | Modified: time.Now(),
194 | Size: 10,
195 | Source: "mock",
196 | },
197 | }, nil
198 | }
199 |
200 | // GetLastSync mocks the GetLastSync method
201 | func (m *MockAdapter) GetLastSync() time.Time {
202 | if m.GetLastSyncFunc != nil {
203 | return m.GetLastSyncFunc()
204 | }
205 | return m.lastSync
206 | }
207 |
208 | // SetLastSync mocks the SetLastSync method
209 | func (m *MockAdapter) SetLastSync(t time.Time) {
210 | if m.SetLastSyncFunc != nil {
211 | m.SetLastSyncFunc(t)
212 | } else {
213 | m.lastSync = t
214 | }
215 | }
216 |
--------------------------------------------------------------------------------
/main_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "flag"
6 | "os"
7 | "path/filepath"
8 | "testing"
9 | "time"
10 |
11 | "github.com/openwebui-content-sync/internal/config"
12 | )
13 |
14 | func TestMain_WithConfigFile(t *testing.T) {
15 | // Create temporary config file
16 | tempDir := t.TempDir()
17 | configPath := filepath.Join(tempDir, "test-config.yaml")
18 |
19 | configContent := `
20 | log_level: debug
21 | schedule:
22 | interval: 1h
23 | storage:
24 | path: /tmp/test-storage
25 | openwebui:
26 | base_url: "http://localhost:8080"
27 | api_key: "test-api-key"
28 | github:
29 | enabled: false
30 | `
31 |
32 | err := os.WriteFile(configPath, []byte(configContent), 0644)
33 | if err != nil {
34 | t.Fatalf("Failed to write config file: %v", err)
35 | }
36 |
37 | // Test loading config
38 | cfg, err := config.Load(configPath)
39 | if err != nil {
40 | t.Fatalf("Failed to load config: %v", err)
41 | }
42 |
43 | if cfg.LogLevel != "debug" {
44 | t.Errorf("Expected log level 'debug', got '%s'", cfg.LogLevel)
45 | }
46 | if cfg.GitHub.Enabled != false {
47 | t.Errorf("Expected GitHub enabled false, got %v", cfg.GitHub.Enabled)
48 | }
49 | }
50 |
51 | func TestMain_WithInvalidConfigFile(t *testing.T) {
52 | // Create temporary config file with invalid YAML
53 | tempDir := t.TempDir()
54 | configPath := filepath.Join(tempDir, "invalid-config.yaml")
55 |
56 | invalidYAML := `
57 | log_level: debug
58 | schedule:
59 | interval: 1h
60 | invalid: [unclosed list
61 | `
62 |
63 | err := os.WriteFile(configPath, []byte(invalidYAML), 0644)
64 | if err != nil {
65 | t.Fatalf("Failed to write invalid config file: %v", err)
66 | }
67 |
68 | // Test loading invalid config
69 | _, err = config.Load(configPath)
70 | if err == nil {
71 | t.Errorf("Expected error for invalid config, got none")
72 | }
73 | }
74 |
75 | func TestMain_WithNonExistentConfigFile(t *testing.T) {
76 | // Test loading non-existent config file (should use defaults)
77 | cfg, err := config.Load("non-existent-config.yaml")
78 | if err != nil {
79 | t.Fatalf("Failed to load default config: %v", err)
80 | }
81 |
82 | // Check default values
83 | if cfg.LogLevel != "info" {
84 | t.Errorf("Expected log level 'info', got '%s'", cfg.LogLevel)
85 | }
86 | if cfg.Schedule.Interval != 1*time.Hour {
87 | t.Errorf("Expected schedule interval 1h, got %v", cfg.Schedule.Interval)
88 | }
89 | }
90 |
91 | func TestMain_FlagParsing(t *testing.T) {
92 | // Save original command line args
93 | originalArgs := os.Args
94 | defer func() {
95 | os.Args = originalArgs
96 | flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError)
97 | }()
98 |
99 | // Test with custom config path
100 | os.Args = []string{"cmd", "-config", "custom-config.yaml"}
101 | flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError)
102 |
103 | configPath := flag.String("config", "config.yaml", "Path to configuration file")
104 | flag.Parse()
105 |
106 | if *configPath != "custom-config.yaml" {
107 | t.Errorf("Expected config path 'custom-config.yaml', got '%s'", *configPath)
108 | }
109 | }
110 |
111 | func TestMain_DefaultFlagValue(t *testing.T) {
112 | // Save original command line args
113 | originalArgs := os.Args
114 | defer func() {
115 | os.Args = originalArgs
116 | flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError)
117 | }()
118 |
119 | // Test with no flags
120 | os.Args = []string{"cmd"}
121 | flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError)
122 |
123 | configPath := flag.String("config", "config.yaml", "Path to configuration file")
124 | flag.Parse()
125 |
126 | if *configPath != "config.yaml" {
127 | t.Errorf("Expected default config path 'config.yaml', got '%s'", *configPath)
128 | }
129 | }
130 |
131 | // Helper function to create test config
132 | func createTestConfig() *config.Config {
133 | return &config.Config{
134 | LogLevel: "debug",
135 | Schedule: config.ScheduleConfig{
136 | Interval: 1 * time.Hour,
137 | },
138 | Storage: config.StorageConfig{
139 | Path: "/tmp/test-storage",
140 | },
141 | OpenWebUI: config.OpenWebUIConfig{
142 | BaseURL: "http://localhost:8080",
143 | APIKey: "test-api-key",
144 | },
145 | GitHub: config.GitHubConfig{
146 | Enabled: false,
147 | Token: "test-token",
148 | Mappings: []config.RepositoryMapping{
149 | {Repository: "owner/repo", KnowledgeID: "test-knowledge-id"},
150 | },
151 | },
152 | }
153 | }
154 |
155 | func TestMain_ContextHandling(t *testing.T) {
156 | // Test context creation and cancellation
157 | ctx, cancel := context.WithCancel(context.Background())
158 |
159 | // Verify context is not cancelled initially
160 | select {
161 | case <-ctx.Done():
162 | t.Error("Context should not be cancelled initially")
163 | default:
164 | // Expected
165 | }
166 |
167 | // Cancel context
168 | cancel()
169 |
170 | // Verify context is cancelled
171 | select {
172 | case <-ctx.Done():
173 | // Expected
174 | default:
175 | t.Error("Context should be cancelled after cancel()")
176 | }
177 | }
178 |
179 | func TestMain_SignalHandling(t *testing.T) {
180 | // Test signal channel creation
181 | sigChan := make(chan os.Signal, 1)
182 |
183 | // Verify channel is empty initially
184 | select {
185 | case <-sigChan:
186 | t.Error("Signal channel should be empty initially")
187 | default:
188 | // Expected
189 | }
190 |
191 | // Test sending a signal
192 | sigChan <- os.Interrupt
193 |
194 | // Verify signal was received
195 | select {
196 | case sig := <-sigChan:
197 | if sig != os.Interrupt {
198 | t.Errorf("Expected os.Interrupt signal, got %v", sig)
199 | }
200 | default:
201 | t.Error("Expected to receive signal")
202 | }
203 | }
204 |
205 | func TestMain_TimeHandling(t *testing.T) {
206 | // Test time operations used in main
207 | now := time.Now()
208 |
209 | // Test time addition
210 | future := now.Add(5 * time.Second)
211 | if future.Before(now) {
212 | t.Error("Future time should be after now")
213 | }
214 |
215 | // Test time comparison
216 | if !now.Before(future) {
217 | t.Error("Now should be before future time")
218 | }
219 |
220 | // Test duration
221 | duration := future.Sub(now)
222 | expectedDuration := 5 * time.Second
223 | if duration < expectedDuration-100*time.Millisecond || duration > expectedDuration+100*time.Millisecond {
224 | t.Errorf("Expected duration around %v, got %v", expectedDuration, duration)
225 | }
226 | }
227 |
228 | func TestMain_ErrorHandling(t *testing.T) {
229 | // Test various error scenarios that might occur in main
230 |
231 | // Test with invalid log level
232 | cfg := createTestConfig()
233 | cfg.LogLevel = "invalid-level"
234 |
235 | // This would normally cause an error in the main function
236 | // For testing, we'll just verify the config was set
237 | if cfg.LogLevel != "invalid-level" {
238 | t.Errorf("Expected log level 'invalid-level', got '%s'", cfg.LogLevel)
239 | }
240 | }
241 |
242 | func TestMain_ResourceCleanup(t *testing.T) {
243 | // Test that resources are properly cleaned up
244 | tempDir := t.TempDir()
245 | defer os.RemoveAll(tempDir)
246 |
247 | // Create a test file
248 | testFile := filepath.Join(tempDir, "test.txt")
249 | err := os.WriteFile(testFile, []byte("test content"), 0644)
250 | if err != nil {
251 | t.Fatalf("Failed to create test file: %v", err)
252 | }
253 |
254 | // Verify file exists
255 | if _, err := os.Stat(testFile); os.IsNotExist(err) {
256 | t.Error("Test file should exist")
257 | }
258 |
259 | // Cleanup
260 | os.RemoveAll(tempDir)
261 |
262 | // Verify file is cleaned up
263 | if _, err := os.Stat(testFile); !os.IsNotExist(err) {
264 | t.Error("Test file should be cleaned up")
265 | }
266 | }
267 |
--------------------------------------------------------------------------------
/internal/adapter/local.go:
--------------------------------------------------------------------------------
1 | // OpenWebUI Content Sync
2 | // Copyright (C) 2025 OpenWebUI Content Sync Contributors
3 | //
4 | // This program is free software: you can redistribute it and/or modify
5 | // it under the terms of the GNU General Public License as published by
6 | // the Free Software Foundation, either version 3 of the License, or
7 | // (at your option) any later version.
8 | //
9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program. If not, see .
16 |
17 | package adapter
18 |
19 | import (
20 | "context"
21 | "crypto/sha256"
22 | "fmt"
23 | "io/fs"
24 | "os"
25 | "path/filepath"
26 | "strings"
27 | "time"
28 |
29 | "github.com/openwebui-content-sync/internal/config"
30 | "github.com/sirupsen/logrus"
31 | )
32 |
33 | // LocalFolderAdapter implements the Adapter interface for local folders
34 | type LocalFolderAdapter struct {
35 | config config.LocalFolderConfig
36 | lastSync time.Time
37 | folders []string
38 | mappings map[string]string // folder_path -> knowledge_id mapping
39 | }
40 |
41 | // NewLocalFolderAdapter creates a new local folder adapter
42 | func NewLocalFolderAdapter(cfg config.LocalFolderConfig) (*LocalFolderAdapter, error) {
43 | if !cfg.Enabled {
44 | return nil, fmt.Errorf("local folder adapter is disabled")
45 | }
46 |
47 | // Build folder mappings
48 | mappings := make(map[string]string)
49 | folders := []string{}
50 |
51 | // Process mappings
52 | for _, mapping := range cfg.Mappings {
53 | if mapping.FolderPath != "" && mapping.KnowledgeID != "" {
54 | // Validate folder exists
55 | if _, err := os.Stat(mapping.FolderPath); os.IsNotExist(err) {
56 | return nil, fmt.Errorf("folder does not exist: %s", mapping.FolderPath)
57 | }
58 | mappings[mapping.FolderPath] = mapping.KnowledgeID
59 | folders = append(folders, mapping.FolderPath)
60 | }
61 | }
62 |
63 | if len(folders) == 0 {
64 | return nil, fmt.Errorf("at least one local folder mapping must be configured")
65 | }
66 |
67 | return &LocalFolderAdapter{
68 | config: cfg,
69 | folders: folders,
70 | mappings: mappings,
71 | lastSync: time.Now().Add(-24 * time.Hour), // Default to 24 hours ago
72 | }, nil
73 | }
74 |
75 | // Name returns the adapter name
76 | func (l *LocalFolderAdapter) Name() string {
77 | return "local"
78 | }
79 |
80 | // FetchFiles retrieves files from local folders
81 | func (l *LocalFolderAdapter) FetchFiles(ctx context.Context) ([]*File, error) {
82 | var files []*File
83 |
84 | for _, folder := range l.folders {
85 | logrus.Debugf("Fetching files from local folder: %s", folder)
86 | knowledgeID := l.mappings[folder]
87 | folderFiles, err := l.fetchFolderFiles(ctx, folder, knowledgeID)
88 | if err != nil {
89 | return nil, fmt.Errorf("failed to fetch files from folder %s: %w", folder, err)
90 | }
91 | logrus.Debugf("Found %d files in folder %s (knowledge_id: %s)", len(folderFiles), folder, knowledgeID)
92 | files = append(files, folderFiles...)
93 | }
94 |
95 | logrus.Debugf("Total files fetched: %d", len(files))
96 | return files, nil
97 | }
98 |
99 | // fetchFolderFiles fetches files from a specific folder recursively
100 | func (l *LocalFolderAdapter) fetchFolderFiles(ctx context.Context, folderPath string, knowledgeID string) ([]*File, error) {
101 | var files []*File
102 |
103 | err := filepath.WalkDir(folderPath, func(path string, d fs.DirEntry, err error) error {
104 | if err != nil {
105 | logrus.Warnf("Error accessing path %s: %v", path, err)
106 | return nil // Continue walking
107 | }
108 |
109 | // Skip directories
110 | if d.IsDir() {
111 | return nil
112 | }
113 |
114 | // Skip hidden files and common ignore patterns
115 | baseName := filepath.Base(path)
116 | if strings.HasPrefix(baseName, ".") || l.shouldIgnoreFile(baseName) {
117 | return nil
118 | }
119 |
120 | // Read file content
121 | content, err := os.ReadFile(path)
122 | if err != nil {
123 | logrus.Warnf("Failed to read file %s: %v", path, err)
124 | return nil
125 | }
126 |
127 | // Skip binary files (basic check)
128 | if l.isBinaryFile(content) {
129 | logrus.Debugf("Skipping binary file: %s", path)
130 | return nil
131 | }
132 |
133 | // Get file info
134 | info, err := d.Info()
135 | if err != nil {
136 | logrus.Warnf("Failed to get file info for %s: %v", path, err)
137 | return nil
138 | }
139 |
140 | // Calculate relative path from the folder root
141 | relPath, err := filepath.Rel(folderPath, path)
142 | if err != nil {
143 | logrus.Warnf("Failed to calculate relative path for %s: %v", path, err)
144 | return nil
145 | }
146 |
147 | // Calculate hash
148 | hash := fmt.Sprintf("%x", sha256.Sum256(content))
149 |
150 | file := &File{
151 | Path: relPath,
152 | Content: content,
153 | Hash: hash,
154 | Modified: info.ModTime(),
155 | Size: info.Size(),
156 | Source: fmt.Sprintf("local:%s", folderPath),
157 | KnowledgeID: knowledgeID,
158 | }
159 |
160 | files = append(files, file)
161 | return nil
162 | })
163 |
164 | if err != nil {
165 | return nil, fmt.Errorf("failed to walk directory %s: %w", folderPath, err)
166 | }
167 |
168 | return files, nil
169 | }
170 |
171 | // shouldIgnoreFile checks if a file should be ignored based on common patterns
172 | func (l *LocalFolderAdapter) shouldIgnoreFile(filename string) bool {
173 | // Check for hidden files (starting with .)
174 | if strings.HasPrefix(filename, ".") {
175 | return true
176 | }
177 |
178 | ignorePatterns := []string{
179 | "node_modules", "vendor", ".git", ".svn", ".hg",
180 | "__pycache__", ".pytest_cache", ".coverage",
181 | }
182 |
183 | // Check for specific patterns
184 | lowerName := strings.ToLower(filename)
185 | for _, pattern := range ignorePatterns {
186 | if strings.Contains(lowerName, pattern) {
187 | return true
188 | }
189 | }
190 |
191 | // Check for specific filenames
192 | specificFiles := []string{"thumbs.db", ".ds_store", "desktop.ini"}
193 | for _, file := range specificFiles {
194 | if lowerName == file {
195 | return true
196 | }
197 | }
198 |
199 | // Check for file extensions
200 | extensions := []string{".log", ".tmp", ".temp", ".swp", ".swo"}
201 | for _, ext := range extensions {
202 | if strings.HasSuffix(lowerName, ext) {
203 | return true
204 | }
205 | }
206 |
207 | return false
208 | }
209 |
210 | // isBinaryFile checks if content appears to be binary
211 | func (l *LocalFolderAdapter) isBinaryFile(content []byte) bool {
212 | if len(content) == 0 {
213 | return false
214 | }
215 |
216 | // Check for null bytes (common in binary files)
217 | for i := 0; i < len(content) && i < 1024; i++ {
218 | if content[i] == 0 {
219 | return true
220 | }
221 | }
222 |
223 | // Check for high ratio of non-printable characters
224 | nonPrintable := 0
225 | checkLen := len(content)
226 | if checkLen > 1024 {
227 | checkLen = 1024
228 | }
229 |
230 | for i := 0; i < checkLen; i++ {
231 | if content[i] < 32 && content[i] != 9 && content[i] != 10 && content[i] != 13 {
232 | nonPrintable++
233 | }
234 | }
235 |
236 | // If more than 30% of characters are non-printable, consider it binary
237 | return float64(nonPrintable)/float64(checkLen) > 0.3
238 | }
239 |
240 | // GetLastSync returns the last sync time
241 | func (l *LocalFolderAdapter) GetLastSync() time.Time {
242 | return l.lastSync
243 | }
244 |
245 | // SetLastSync sets the last sync time
246 | func (l *LocalFolderAdapter) SetLastSync(t time.Time) {
247 | l.lastSync = t
248 | }
249 |
--------------------------------------------------------------------------------
/adapter_readme/CONFLUENCE_ADAPTER.md:
--------------------------------------------------------------------------------
1 | # Confluence Adapter
2 |
3 | The Confluence adapter allows you to sync content from Atlassian Confluence spaces into OpenWebUI knowledge bases. This adapter uses the Confluence REST API v2 to fetch pages and optionally attachments from specified Confluence spaces and uploads them to OpenWebUI.
4 |
5 | ## API Compatibility
6 |
7 | This adapter uses Confluence REST API v2, which provides:
8 | - Modern cursor-based pagination
9 | - Improved performance and reliability
10 | - Better support for large spaces
11 | - Enhanced metadata and content structure
12 |
13 | ## Features
14 |
15 | - **Page Content Sync**: Fetches all pages from specified Confluence spaces using Confluence API v2
16 | - **Attachment Support**: Optionally downloads and syncs page attachments
17 | - **HTML to Text Conversion**: Converts Confluence's HTML content to plain text
18 | - **Incremental Sync**: Tracks last sync time to avoid re-processing content
19 | - **Multi-Space Support**: Can sync from multiple Confluence spaces
20 | - **Configurable Limits**: Set page limits and control attachment inclusion
21 | - **Cursor-based Pagination**: Uses modern cursor-based pagination for efficient data retrieval
22 |
23 | ## Configuration
24 |
25 | ### YAML Configuration
26 |
27 | Add the following to your `config.yaml`:
28 |
29 | ```yaml
30 | confluence:
31 | enabled: true
32 | base_url: "https://your-domain.atlassian.net"
33 | username: "your-email@example.com"
34 | api_key: "your-confluence-api-key"
35 | spaces:
36 | - "SPACEKEY1"
37 | - "SPACEKEY2"
38 | knowledge_id: "your-knowledge-base-id"
39 | page_limit: 100
40 | include_attachments: true
41 | include_blog_posts: false
42 | ```
43 |
44 | ### Environment Variables
45 |
46 | Only the API key can be configured via environment variable (for security):
47 |
48 | ```bash
49 | CONFLUENCE_API_KEY="your-confluence-api-key"
50 | ```
51 |
52 | All other configuration should be done in the `config.yaml` file.
53 |
54 | ### Kubernetes Configuration
55 |
56 | #### ConfigMap
57 |
58 | ```yaml
59 | apiVersion: v1
60 | kind: ConfigMap
61 | metadata:
62 | name: connector-config
63 | data:
64 | config.yaml: |
65 | confluence:
66 | enabled: true
67 | base_url: "https://your-domain.atlassian.net"
68 | username: "your-email@example.com"
69 | spaces:
70 | - "SPACEKEY1"
71 | - "SPACEKEY2"
72 | knowledge_id: "your-knowledge-base-id"
73 | page_limit: 100
74 | include_attachments: true
75 | use_markdown_parser: true
76 | ```
77 |
78 | #### Secrets
79 |
80 | ```yaml
81 | apiVersion: v1
82 | kind: Secret
83 | metadata:
84 | name: confluence-secrets
85 | type: Opaque
86 | data:
87 | api-key:
88 | ```
89 |
90 | ## Authentication
91 |
92 | The Confluence adapter uses Basic Authentication with your Confluence username and API key. To get an API key:
93 |
94 | 1. Go to [Atlassian Account Settings](https://id.atlassian.com/manage-profile/security/api-tokens)
95 | 2. Click "Create API token"
96 | 3. Give it a label and copy the generated token
97 | 4. Use your email address as the username and the token as the API key
98 |
99 | ## Configuration Parameters
100 |
101 | | Parameter | Type | Required | Default | Description |
102 | |-----------|------|----------|---------|-------------|
103 | | `enabled` | boolean | No | `false` | Enable the Confluence adapter |
104 | | `base_url` | string | Yes | - | Your Confluence instance URL (e.g., `https://your-domain.atlassian.net`) |
105 | | `username` | string | Yes | - | Your Confluence username (usually your email) |
106 | | `api_key` | string | Yes | - | Your Confluence API key |
107 | | `spaces` | array | Yes | - | List of Confluence space keys to sync |
108 | | `knowledge_id` | string | No | - | OpenWebUI knowledge base ID to sync content to |
109 | | `page_limit` | integer | No | `100` | Maximum number of pages to fetch per space |
110 | | `include_attachments` | boolean | No | `true` | Whether to download and sync page attachments |
111 | | `include_blog_posts` | boolean | No | `false` | Whether to download and sync blog posts |
112 | | `use_markdown_parser` | boolean | No | `false` | Whether to use markdown parser for HTML content conversion (true = markdown, false = plain text) |
113 | | `add_additional_data` | boolean | No | `false` | Whether to fetch additional user data (display names) for pages and blog posts |
114 |
115 | ## File Processing
116 |
117 | ### Page Content
118 |
119 | - Confluence pages are converted from HTML to plain text
120 | - Pages are saved as `.md` files with sanitized filenames
121 | - File paths follow the pattern: `{space}/{page-title}.md`
122 |
123 | ### Attachments
124 |
125 | - Only text-based attachments are processed (based on file extension)
126 | - Binary files are skipped
127 | - Attachments are saved in: `{space}/attachments/{filename}`
128 |
129 | ### Supported File Types
130 |
131 | The adapter processes the following file types:
132 | - Markdown (`.md`)
133 | - Text (`.txt`)
134 | - JSON (`.json`)
135 | - YAML (`.yaml`, `.yml`)
136 | - Code files (`.go`, `.py`, `.js`, `.ts`, `.java`, etc.)
137 | - Configuration files (`.env`, `.gitignore`, etc.)
138 | - And many more text-based formats
139 |
140 | ## Error Handling
141 |
142 | - **Authentication Errors**: Invalid credentials will cause the adapter to fail initialization
143 | - **API Errors**: HTTP errors from Confluence API are logged and may cause individual page/attachment processing to fail
144 | - **File Processing Errors**: Individual file processing errors are logged but don't stop the overall sync
145 | - **Network Errors**: Connection timeouts and network issues are handled gracefully
146 |
147 | ## Logging
148 |
149 | The adapter provides detailed logging at the debug level:
150 |
151 | ```
152 | DEBUG: Fetching files from Confluence space: SPACEKEY1
153 | DEBUG: Found 25 files in space SPACEKEY1
154 | DEBUG: Processing page: Page Title
155 | DEBUG: Downloading attachment: document.pdf
156 | ```
157 |
158 | ## Limitations
159 |
160 | 1. **API Rate Limits**: Confluence has API rate limits that may affect sync performance
161 | 2. **Large Spaces**: Very large spaces with many pages may take significant time to sync
162 | 3. **HTML Conversion**: The HTML to text conversion is basic and may not preserve all formatting
163 | 4. **Attachment Size**: Large attachments may cause memory issues or timeouts
164 |
165 | ## Troubleshooting
166 |
167 | ### Common Issues
168 |
169 | 1. **Authentication Failed**
170 | - Verify your username and API key are correct
171 | - Ensure your API key has the necessary permissions
172 |
173 | 2. **Space Not Found**
174 | - Check that the space key is correct
175 | - Verify you have access to the space
176 |
177 | 3. **No Content Synced**
178 | - Check that the space contains pages
179 | - Verify the `page_limit` setting is appropriate
180 | - Check logs for API errors
181 |
182 | 4. **Attachments Not Synced**
183 | - Ensure `include_attachments` is set to `true`
184 | - Check that attachments are text-based files
185 | - Verify you have download permissions for attachments
186 |
187 | ### Debug Mode
188 |
189 | Enable debug logging to see detailed information about the sync process:
190 |
191 | ```yaml
192 | log_level: debug
193 | ```
194 |
195 | ## Example Usage
196 |
197 | ### Basic Configuration
198 |
199 | ```yaml
200 | confluence:
201 | enabled: true
202 | base_url: "https://mycompany.atlassian.net"
203 | username: "john.doe@mycompany.com"
204 | api_key: "ATATT3xFfGF0..."
205 | spaces:
206 | - "DOCS"
207 | - "WIKI"
208 | knowledge_id: "fbc18bc4-72c1-40f0-84b1-52055368c583"
209 | ```
210 |
211 | ### Advanced Configuration
212 |
213 | ```yaml
214 | confluence:
215 | enabled: true
216 | base_url: "https://mycompany.atlassian.net"
217 | username: "john.doe@mycompany.com"
218 | api_key: "ATATT3xFfGF0..."
219 | spaces:
220 | - "DOCS"
221 | - "WIKI"
222 | - "PROJECTS"
223 | knowledge_id: "fbc18bc4-72c1-40f0-84b1-52055368c583"
224 | page_limit: 500
225 | include_attachments: true
226 | ```
227 |
228 | This configuration will sync up to 500 pages from each of the three specified spaces, including all text-based attachments.
229 |
--------------------------------------------------------------------------------
/adapter_readme/LOCAL_ADAPTER.md:
--------------------------------------------------------------------------------
1 | # Local Folder Adapter
2 |
3 | The Local Folder adapter allows you to sync content from local directories on your filesystem into OpenWebUI knowledge bases. This is useful for syncing documentation, notes, or other content stored locally.
4 |
5 | ## Features
6 |
7 | - **Multi-directory support**: Sync from multiple local directories
8 | - **Knowledge base mapping**: Map each directory to a specific OpenWebUI knowledge base
9 | - **Recursive scanning**: Automatically scans subdirectories for content
10 | - **File filtering**: Automatically filters out binary files and common non-content files
11 | - **Incremental sync**: Only processes files that have changed since the last sync
12 | - **Path preservation**: Maintains directory structure in the knowledge base
13 |
14 | ## Configuration
15 |
16 | ### Configuration File
17 |
18 | Add the following section to your `config.yaml`:
19 |
20 | ```yaml
21 | local_folders:
22 | enabled: true
23 | mappings:
24 | - folder_path: "/path/to/documentation"
25 | knowledge_id: "docs-knowledge-base"
26 | - folder_path: "/path/to/notes"
27 | knowledge_id: "notes-knowledge-base"
28 | - folder_path: "/home/user/projects/docs"
29 | knowledge_id: "project-docs"
30 | ```
31 |
32 | ### Configuration Options
33 |
34 | | Option | Type | Required | Default | Description |
35 | |--------|------|----------|---------|-------------|
36 | | `enabled` | boolean | Yes | `false` | Enable/disable the local folder adapter |
37 | | `mappings` | array | Yes | `[]` | List of folder mappings |
38 |
39 | ### Folder Mapping
40 |
41 | Each mapping in the `mappings` array should contain:
42 |
43 | | Field | Type | Required | Description |
44 | |-------|------|----------|-------------|
45 | | `folder_path` | string | Yes | Absolute path to the local directory |
46 | | `knowledge_id` | string | Yes | Target OpenWebUI knowledge base ID |
47 |
48 | ## Directory Requirements
49 |
50 | ### Path Format
51 |
52 | - Use **absolute paths** for all directory mappings
53 | - Paths must exist and be readable by the application
54 | - Avoid paths with spaces or special characters (use quotes if necessary)
55 |
56 | ### Permissions
57 |
58 | The application must have:
59 | - **Read access** to all configured directories
60 | - **Execute access** to traverse subdirectories
61 | - **Read access** to all files within the directories
62 |
63 | ## File Processing
64 |
65 | The Local Folder adapter processes files as follows:
66 |
67 | ### Supported File Types
68 |
69 | - **Markdown files** (`.md`, `.markdown`)
70 | - **Text files** (`.txt`, `.text`)
71 | - **Documentation files** (`.rst`, `.adoc`)
72 | - **Code files** (`.py`, `.js`, `.ts`, `.go`, `.java`, `.cpp`, `.c`, `.h`, `.hpp`)
73 | - **Configuration files** (`.yaml`, `.yml`, `.json`, `.toml`, `.ini`, `.cfg`)
74 | - **Shell scripts** (`.sh`, `.bash`, `.zsh`)
75 | - **HTML files** (`.html`, `.htm`)
76 |
77 | ### Excluded Files
78 |
79 | The adapter automatically excludes:
80 | - Binary files (images, videos, executables, etc.)
81 | - Common non-content files (`.gitignore`, `.gitattributes`, etc.)
82 | - Large files (> 1MB)
83 | - Hidden files and directories (starting with `.`)
84 | - Common exclusion directories (`node_modules/`, `vendor/`, `.git/`, etc.)
85 |
86 | ### File Path Structure
87 |
88 | Files are stored with paths that preserve the directory structure:
89 | ```
90 | local/folder-name/subdirectory/file.md
91 | ```
92 |
93 | ## Sync Behavior
94 |
95 | - **Initial sync**: Scans all configured directories and processes all supported files
96 | - **Incremental sync**: Only processes files modified since the last successful sync
97 | - **Error handling**: If a directory fails to sync, other directories continue processing
98 | - **File monitoring**: Uses file modification timestamps to detect changes
99 |
100 | ## Use Cases
101 |
102 | ### Documentation Sync
103 |
104 | Sync local documentation directories:
105 |
106 | ```yaml
107 | local_folders:
108 | enabled: true
109 | mappings:
110 | - folder_path: "/home/user/docs"
111 | knowledge_id: "user-docs"
112 | - folder_path: "/opt/company/docs"
113 | knowledge_id: "company-docs"
114 | ```
115 |
116 | ### Project Documentation
117 |
118 | Sync project-specific documentation:
119 |
120 | ```yaml
121 | local_folders:
122 | enabled: true
123 | mappings:
124 | - folder_path: "/home/user/projects/my-app/docs"
125 | knowledge_id: "my-app-docs"
126 | - folder_path: "/home/user/projects/api-docs"
127 | knowledge_id: "api-docs"
128 | ```
129 |
130 | ### Notes and Knowledge Base
131 |
132 | Sync personal or team notes:
133 |
134 | ```yaml
135 | local_folders:
136 | enabled: true
137 | mappings:
138 | - folder_path: "/home/user/notes"
139 | knowledge_id: "personal-notes"
140 | - folder_path: "/shared/team-notes"
141 | knowledge_id: "team-notes"
142 | ```
143 |
144 | ## Troubleshooting
145 |
146 | ### Common Issues
147 |
148 | 1. **Directory not found**
149 | - Verify the directory path exists and is accessible
150 | - Check that the path is absolute (starts with `/`)
151 | - Ensure the application has read permissions
152 |
153 | 2. **Permission denied**
154 | - Check file and directory permissions
155 | - Ensure the application user can read the directories
156 | - Verify execute permissions on parent directories
157 |
158 | 3. **Empty knowledge base**
159 | - Check that directories contain supported file types
160 | - Verify files are not hidden or in excluded directories
161 | - Check file size limits (files > 1MB are excluded)
162 |
163 | 4. **Sync not updating**
164 | - Verify file modification timestamps are updating
165 | - Check that files are being modified (not just accessed)
166 | - Ensure the application has write access to the storage directory
167 |
168 | ### Debug Logging
169 |
170 | Enable debug logging to see detailed sync information:
171 |
172 | ```yaml
173 | log_level: debug
174 | ```
175 |
176 | This will show:
177 | - Which directories are being scanned
178 | - File discovery and filtering details
179 | - File processing progress
180 | - Sync timing and statistics
181 |
182 | ## Security Considerations
183 |
184 | - **File access**: Only sync directories that contain appropriate content
185 | - **Path traversal**: The adapter validates paths to prevent directory traversal attacks
186 | - **Content filtering**: Review the content being synced to ensure it's appropriate
187 | - **Permissions**: Run the application with minimal required permissions
188 |
189 | ## Performance Tips
190 |
191 | - **Directory size**: Large directories with many files may take longer to sync
192 | - **File filtering**: The adapter automatically filters out unnecessary files
193 | - **Incremental sync**: Only changed files are processed after the initial sync
194 | - **Storage location**: Use fast storage for the application's data directory
195 |
196 | ## Example Configuration
197 |
198 | ```yaml
199 | # Complete example configuration
200 | log_level: info
201 | schedule:
202 | interval: 30m
203 |
204 | storage:
205 | path: "/data"
206 |
207 | openwebui:
208 | base_url: "http://localhost:8080"
209 | api_key: "your-openwebui-api-key"
210 |
211 | local_folders:
212 | enabled: true
213 | mappings:
214 | - folder_path: "/home/user/docs"
215 | knowledge_id: "user-docs"
216 | - folder_path: "/opt/company/knowledge-base"
217 | knowledge_id: "company-kb"
218 | - folder_path: "/shared/project-docs"
219 | knowledge_id: "project-docs"
220 | ```
221 |
222 | ## Docker Considerations
223 |
224 | When running in Docker, ensure that:
225 |
226 | 1. **Volume mounts** are properly configured for local directories
227 | 2. **Permissions** are set correctly for the container user
228 | 3. **Paths** are accessible from within the container
229 |
230 | Example Docker volume mount:
231 |
232 | ```yaml
233 | volumes:
234 | - /host/path/to/docs:/container/path/to/docs:ro
235 | ```
236 |
237 | ## File System Monitoring
238 |
239 | The adapter uses file modification timestamps to detect changes. For optimal performance:
240 |
241 | - Avoid frequently modifying files unnecessarily
242 | - Use proper file locking when editing files
243 | - Consider using a file system that supports efficient timestamp updates
244 | - Monitor disk space to ensure sufficient storage for the application
245 |
--------------------------------------------------------------------------------
/internal/adapter/github.go:
--------------------------------------------------------------------------------
1 | package adapter
2 |
3 | import (
4 | "context"
5 | "crypto/sha256"
6 | "fmt"
7 | "io"
8 | "path/filepath"
9 | "strings"
10 | "time"
11 |
12 | "github.com/google/go-github/v56/github"
13 | "github.com/openwebui-content-sync/internal/config"
14 | "github.com/sirupsen/logrus"
15 | "golang.org/x/oauth2"
16 | )
17 |
18 | // GitHubAdapter implements the Adapter interface for GitHub repositories
19 | type GitHubAdapter struct {
20 | client *github.Client
21 | config config.GitHubConfig
22 | lastSync time.Time
23 | repositories []string
24 | mappings map[string]string // repository -> knowledge_id mapping
25 | }
26 |
27 | // NewGitHubAdapter creates a new GitHub adapter
28 | func NewGitHubAdapter(cfg config.GitHubConfig) (*GitHubAdapter, error) {
29 | if cfg.Token == "" {
30 | return nil, fmt.Errorf("GitHub token is required")
31 | }
32 |
33 | ctx := context.Background()
34 | ts := oauth2.StaticTokenSource(
35 | &oauth2.Token{AccessToken: cfg.Token},
36 | )
37 | tc := oauth2.NewClient(ctx, ts)
38 |
39 | client := github.NewClient(tc)
40 |
41 | // Build repository mappings
42 | mappings := make(map[string]string)
43 | repos := []string{}
44 |
45 | // Process mappings
46 | for _, mapping := range cfg.Mappings {
47 | if mapping.Repository != "" && mapping.KnowledgeID != "" {
48 | mappings[mapping.Repository] = mapping.KnowledgeID
49 | repos = append(repos, mapping.Repository)
50 | }
51 | }
52 |
53 | if len(repos) == 0 {
54 | return nil, fmt.Errorf("at least one repository mapping must be configured")
55 | }
56 |
57 | return &GitHubAdapter{
58 | client: client,
59 | config: cfg,
60 | repositories: repos,
61 | mappings: mappings,
62 | lastSync: time.Now().Add(-24 * time.Hour), // Default to 24 hours ago
63 | }, nil
64 | }
65 |
66 | // Name returns the adapter name
67 | func (g *GitHubAdapter) Name() string {
68 | return "github"
69 | }
70 |
71 | // FetchFiles retrieves files from GitHub repositories
72 | func (g *GitHubAdapter) FetchFiles(ctx context.Context) ([]*File, error) {
73 | var files []*File
74 |
75 | for _, repo := range g.repositories {
76 | logrus.Debugf("Fetching files from repository: %s", repo)
77 | knowledgeID := g.mappings[repo]
78 | repoFiles, err := g.fetchRepositoryFiles(ctx, repo, knowledgeID)
79 | if err != nil {
80 | return nil, fmt.Errorf("failed to fetch files from repository %s: %w", repo, err)
81 | }
82 | logrus.Debugf("Found %d files in repository %s (knowledge_id: %s)", len(repoFiles), repo, knowledgeID)
83 | files = append(files, repoFiles...)
84 | }
85 |
86 | logrus.Debugf("Total files fetched: %d", len(files))
87 | return files, nil
88 | }
89 |
90 | // fetchRepositoryFiles fetches files from a specific repository
91 | func (g *GitHubAdapter) fetchRepositoryFiles(ctx context.Context, repo string, knowledgeID string) ([]*File, error) {
92 | parts := strings.Split(repo, "/")
93 | if len(parts) != 2 {
94 | return nil, fmt.Errorf("invalid repository format, expected 'owner/repo'")
95 | }
96 |
97 | owner, repoName := parts[0], parts[1]
98 |
99 | // Get repository contents
100 | _, contents, _, err := g.client.Repositories.GetContents(ctx, owner, repoName, "", nil)
101 | if err != nil {
102 | return nil, fmt.Errorf("failed to get repository contents: %w", err)
103 | }
104 |
105 | var files []*File
106 | for _, content := range contents {
107 | fileList, err := g.processContent(ctx, owner, repoName, content, "", knowledgeID)
108 | if err != nil {
109 | continue // Skip files that can't be processed
110 | }
111 | if fileList != nil {
112 | files = append(files, fileList...)
113 | }
114 | }
115 |
116 | return files, nil
117 | }
118 |
119 | // processContent processes a GitHub content item recursively
120 | func (g *GitHubAdapter) processContent(ctx context.Context, owner, repo string, content *github.RepositoryContent, path string, knowledgeID string) ([]*File, error) {
121 | if content == nil {
122 | return nil, nil
123 | }
124 |
125 | currentPath := filepath.Join(path, content.GetName())
126 |
127 | // Skip binary files and non-text files
128 | if content.GetType() == "file" {
129 | // Check if it's a text file
130 | if !isTextFile(content.GetName()) {
131 | return nil, nil
132 | }
133 |
134 | // Get file content
135 | fileContent, err := g.getFileContent(ctx, owner, repo, content)
136 | if err != nil {
137 | return nil, fmt.Errorf("failed to get file content: %w", err)
138 | }
139 |
140 | // Calculate hash
141 | hash := fmt.Sprintf("%x", sha256.Sum256(fileContent))
142 |
143 | return []*File{{
144 | Path: currentPath,
145 | Content: fileContent,
146 | Hash: hash,
147 | Modified: time.Now(), // GitHub API doesn't provide modification time for content
148 | Size: int64(len(fileContent)),
149 | Source: fmt.Sprintf("%s/%s", owner, repo),
150 | KnowledgeID: knowledgeID,
151 | }}, nil
152 | }
153 |
154 | // If it's a directory, recurse
155 | if content.GetType() == "dir" {
156 | _, contents, _, err := g.client.Repositories.GetContents(ctx, owner, repo, content.GetPath(), nil)
157 | if err != nil {
158 | return nil, fmt.Errorf("failed to get directory contents: %w", err)
159 | }
160 |
161 | var allFiles []*File
162 | for _, subContent := range contents {
163 | files, err := g.processContent(ctx, owner, repo, subContent, currentPath, knowledgeID)
164 | if err != nil {
165 | continue
166 | }
167 | if files != nil {
168 | allFiles = append(allFiles, files...)
169 | }
170 | }
171 |
172 | return allFiles, nil
173 | }
174 |
175 | return nil, nil
176 | }
177 |
178 | // getFileContent retrieves the actual content of a file
179 | func (g *GitHubAdapter) getFileContent(ctx context.Context, owner, repo string, content *github.RepositoryContent) ([]byte, error) {
180 | fileContent, err := content.GetContent()
181 | if err != nil {
182 | return nil, fmt.Errorf("failed to get content: %w", err)
183 | }
184 |
185 | if fileContent != "" {
186 | // Content is already available (for small files)
187 | return []byte(fileContent), nil
188 | }
189 |
190 | // For larger files, we need to download them
191 | url := content.GetDownloadURL()
192 | if url == "" {
193 | return nil, fmt.Errorf("no download URL available for file")
194 | }
195 |
196 | resp, err := g.client.Client().Get(url)
197 | if err != nil {
198 | return nil, fmt.Errorf("failed to download file: %w", err)
199 | }
200 | defer resp.Body.Close()
201 |
202 | return io.ReadAll(resp.Body)
203 | }
204 |
205 | // isTextFile checks if a file is likely to be a text file
206 | func isTextFile(filename string) bool {
207 | ext := strings.ToLower(filepath.Ext(filename))
208 |
209 | // Common text file extensions
210 | textExts := map[string]bool{
211 | ".md": true,
212 | ".txt": true,
213 | ".json": true,
214 | ".yaml": true,
215 | ".yml": true,
216 | ".go": true,
217 | ".py": true,
218 | ".js": true,
219 | ".ts": true,
220 | ".java": true,
221 | ".cpp": true,
222 | ".c": true,
223 | ".h": true,
224 | ".hpp": true,
225 | ".cs": true,
226 | ".php": true,
227 | ".rb": true,
228 | ".rs": true,
229 | ".swift": true,
230 | ".kt": true,
231 | ".scala": true,
232 | ".sh": true,
233 | ".bash": true,
234 | ".zsh": true,
235 | ".fish": true,
236 | ".ps1": true,
237 | ".sql": true,
238 | ".xml": true,
239 | ".html": true,
240 | ".css": true,
241 | ".scss": true,
242 | ".sass": true,
243 | ".less": true,
244 | ".dockerfile": true,
245 | ".gitignore": true,
246 | ".gitattributes": true,
247 | ".editorconfig": true,
248 | ".env": true,
249 | ".env.example": true,
250 | ".env.local": true,
251 | ".env.production": true,
252 | ".env.development": true,
253 | ".env.test": true,
254 | }
255 |
256 | return textExts[ext] || ext == ""
257 | }
258 |
259 | // GetLastSync returns the last sync timestamp
260 | func (g *GitHubAdapter) GetLastSync() time.Time {
261 | return g.lastSync
262 | }
263 |
264 | // SetLastSync updates the last sync timestamp
265 | func (g *GitHubAdapter) SetLastSync(t time.Time) {
266 | g.lastSync = t
267 | }
268 |
--------------------------------------------------------------------------------
/internal/adapter/slack_test.go:
--------------------------------------------------------------------------------
1 | // OpenWebUI Content Sync
2 | // Copyright (C) 2025 OpenWebUI Content Sync Contributors
3 | //
4 | // This program is free software: you can redistribute it and/or modify
5 | // it under the terms of the GNU General Public License as published by
6 | // the Free Software Foundation, either version 3 of the License, or
7 | // (at your option) any later version.
8 | //
9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program. If not, see .
16 |
17 | package adapter
18 |
19 | import (
20 | "context"
21 | "os"
22 | "path/filepath"
23 | "testing"
24 | "time"
25 |
26 | "github.com/openwebui-content-sync/internal/config"
27 | )
28 |
29 | func TestNewSlackAdapter(t *testing.T) {
30 | // Create a temporary directory for testing
31 | tempDir := t.TempDir()
32 |
33 | tests := []struct {
34 | name string
35 | config config.SlackConfig
36 | expectError bool
37 | }{
38 | {
39 | name: "valid config",
40 | config: config.SlackConfig{
41 | Enabled: true,
42 | Token: "xoxb-test-token",
43 | ChannelMappings: []config.ChannelMapping{
44 | {
45 | ChannelID: "C1234567890",
46 | ChannelName: "test-channel",
47 | KnowledgeID: "test-knowledge",
48 | },
49 | },
50 | DaysToFetch: 30,
51 | MaintainHistory: false,
52 | MessageLimit: 1000,
53 | IncludeThreads: true,
54 | IncludeReactions: false,
55 | },
56 | expectError: false,
57 | },
58 | {
59 | name: "missing token",
60 | config: config.SlackConfig{
61 | Enabled: true,
62 | Token: "",
63 | },
64 | expectError: true,
65 | },
66 | {
67 | name: "disabled adapter",
68 | config: config.SlackConfig{
69 | Enabled: false,
70 | Token: "",
71 | },
72 | expectError: false, // Should not error even without token when disabled
73 | },
74 | }
75 |
76 | for _, tt := range tests {
77 | t.Run(tt.name, func(t *testing.T) {
78 | adapter, err := NewSlackAdapter(tt.config, tempDir)
79 |
80 | if tt.expectError {
81 | if err == nil {
82 | t.Errorf("Expected error but got none")
83 | }
84 | return
85 | }
86 |
87 | if err != nil {
88 | t.Errorf("Unexpected error: %v", err)
89 | return
90 | }
91 |
92 | if adapter == nil {
93 | t.Errorf("Expected adapter but got nil")
94 | return
95 | }
96 |
97 | // Test basic methods
98 | if adapter.Name() != "slack" {
99 | t.Errorf("Expected adapter name 'slack', got '%s'", adapter.Name())
100 | }
101 |
102 | // Test GetLastSync returns zero time initially
103 | if !adapter.GetLastSync().IsZero() {
104 | t.Errorf("Expected zero time for GetLastSync, got %v", adapter.GetLastSync())
105 | }
106 |
107 | // Test SetLastSync
108 | testTime := time.Now()
109 | adapter.SetLastSync(testTime)
110 | if !adapter.GetLastSync().Equal(testTime) {
111 | t.Errorf("Expected GetLastSync to return %v, got %v", testTime, adapter.GetLastSync())
112 | }
113 | })
114 | }
115 | }
116 |
117 | func TestSlackAdapter_FetchFiles_NoToken(t *testing.T) {
118 | tempDir := t.TempDir()
119 |
120 | config := config.SlackConfig{
121 | Enabled: true,
122 | Token: "", // No token
123 | }
124 |
125 | _, err := NewSlackAdapter(config, tempDir)
126 | if err == nil {
127 | t.Errorf("Expected error for missing token, got none")
128 | return
129 | }
130 | }
131 |
132 | func TestSlackAdapter_FetchFiles_Disabled(t *testing.T) {
133 | tempDir := t.TempDir()
134 |
135 | config := config.SlackConfig{
136 | Enabled: false,
137 | Token: "xoxb-test-token",
138 | }
139 |
140 | adapter, err := NewSlackAdapter(config, tempDir)
141 | if err != nil {
142 | t.Errorf("Unexpected error: %v", err)
143 | return
144 | }
145 |
146 | // FetchFiles should return empty slice when disabled
147 | files, err := adapter.FetchFiles(context.Background())
148 | if err != nil {
149 | t.Errorf("Unexpected error: %v", err)
150 | return
151 | }
152 |
153 | if len(files) != 0 {
154 | t.Errorf("Expected empty files slice when disabled, got %d files", len(files))
155 | }
156 | }
157 |
158 | func TestSlackAdapter_StorageDirectory(t *testing.T) {
159 | tempDir := t.TempDir()
160 |
161 | config := config.SlackConfig{
162 | Enabled: true,
163 | Token: "xoxb-test-token",
164 | ChannelMappings: []config.ChannelMapping{
165 | {
166 | ChannelID: "C1234567890",
167 | ChannelName: "test-channel",
168 | KnowledgeID: "test-knowledge",
169 | },
170 | },
171 | }
172 |
173 | adapter, err := NewSlackAdapter(config, tempDir)
174 | if err != nil {
175 | t.Errorf("Unexpected error: %v", err)
176 | return
177 | }
178 |
179 | // Check that adapter was created successfully
180 | if adapter == nil {
181 | t.Errorf("Expected adapter but got nil")
182 | }
183 |
184 | // Check that storage directory was created
185 | expectedStorageDir := filepath.Join(tempDir, "slack", "channels")
186 | if _, err := os.Stat(expectedStorageDir); os.IsNotExist(err) {
187 | t.Errorf("Expected storage directory %s to be created", expectedStorageDir)
188 | }
189 | }
190 |
191 | func TestSanitizeChannelName(t *testing.T) {
192 | tests := []struct {
193 | input string
194 | expected string
195 | }{
196 | {"#general", "general"},
197 | {"dev-team", "dev-team"},
198 | {"test channel", "test_channel"},
199 | {"test@channel", "test_channel"},
200 | {"test#channel", "test_channel"},
201 | {"test/channel", "test_channel"},
202 | {"test\\channel", "test_channel"},
203 | {"test:channel", "test_channel"},
204 | {"test*channel", "test_channel"},
205 | {"test?channel", "test_channel"},
206 | {"test", "test_channel"},
207 | {"test|channel", "test_channel"},
208 | {"test\"channel\"", "test_channel"},
209 | {"", ""},
210 | }
211 |
212 | for _, tt := range tests {
213 | t.Run(tt.input, func(t *testing.T) {
214 | result := sanitizeChannelName(tt.input)
215 | if result != tt.expected {
216 | t.Errorf("sanitizeChannelName(%q) = %q, expected %q", tt.input, result, tt.expected)
217 | }
218 | })
219 | }
220 | }
221 |
222 | func TestSlackAdapter_InterfaceCompliance(t *testing.T) {
223 | tempDir := t.TempDir()
224 |
225 | config := config.SlackConfig{
226 | Enabled: true,
227 | Token: "xoxb-test-token",
228 | ChannelMappings: []config.ChannelMapping{
229 | {
230 | ChannelID: "C1234567890",
231 | ChannelName: "test-channel",
232 | KnowledgeID: "test-knowledge",
233 | },
234 | },
235 | }
236 |
237 | adapter, err := NewSlackAdapter(config, tempDir)
238 | if err != nil {
239 | t.Errorf("Unexpected error: %v", err)
240 | return
241 | }
242 |
243 | // Test that SlackAdapter implements the Adapter interface
244 | var _ Adapter = adapter
245 |
246 | // Test all interface methods exist and work
247 | ctx := context.Background()
248 |
249 | // Name method
250 | name := adapter.Name()
251 | if name != "slack" {
252 | t.Errorf("Expected name 'slack', got '%s'", name)
253 | }
254 |
255 | // GetLastSync method
256 | syncTime := adapter.GetLastSync()
257 | if !syncTime.IsZero() {
258 | t.Errorf("Expected zero time for GetLastSync, got %v", syncTime)
259 | }
260 |
261 | // SetLastSync method
262 | testTime := time.Now()
263 | adapter.SetLastSync(testTime)
264 | if !adapter.GetLastSync().Equal(testTime) {
265 | t.Errorf("Expected GetLastSync to return %v, got %v", testTime, adapter.GetLastSync())
266 | }
267 |
268 | // FetchFiles method (this will fail with actual API call, but we can test the method exists)
269 | // We'll use a context with timeout to avoid hanging
270 | ctx, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
271 | defer cancel()
272 |
273 | _, err = adapter.FetchFiles(ctx)
274 | // We expect an error due to timeout or invalid token, but the method should exist
275 | if err == nil {
276 | t.Log("FetchFiles completed without error (unexpected)")
277 | }
278 | }
279 |
280 | // Benchmark tests
281 | func BenchmarkSanitizeChannelName(b *testing.B) {
282 | testName := "#test-channel-with-special-chars!@#$%^&*()"
283 | for i := 0; i < b.N; i++ {
284 | sanitizeChannelName(testName)
285 | }
286 | }
287 |
288 | func BenchmarkSlackAdapter_Creation(b *testing.B) {
289 | tempDir := b.TempDir()
290 |
291 | config := config.SlackConfig{
292 | Enabled: true,
293 | Token: "xoxb-test-token",
294 | ChannelMappings: []config.ChannelMapping{
295 | {
296 | ChannelID: "C1234567890",
297 | ChannelName: "test-channel",
298 | KnowledgeID: "test-knowledge",
299 | },
300 | },
301 | }
302 |
303 | b.ResetTimer()
304 | for i := 0; i < b.N; i++ {
305 | adapter, err := NewSlackAdapter(config, tempDir)
306 | if err != nil {
307 | b.Errorf("Unexpected error: %v", err)
308 | }
309 | if adapter == nil {
310 | b.Errorf("Expected adapter but got nil")
311 | }
312 | }
313 | }
314 |
--------------------------------------------------------------------------------
/internal/config/config_test.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | import (
4 | "os"
5 | "path/filepath"
6 | "testing"
7 | "time"
8 | )
9 |
10 | func TestLoad_DefaultConfig(t *testing.T) {
11 | // Test loading with non-existent file (should use defaults)
12 | cfg, err := Load("non-existent-config.yaml")
13 | if err != nil {
14 | t.Fatalf("Failed to load default config: %v", err)
15 | }
16 |
17 | // Check default values
18 | if cfg.LogLevel != "info" {
19 | t.Errorf("Expected log level 'info', got '%s'", cfg.LogLevel)
20 | }
21 | if cfg.Schedule.Interval != 1*time.Hour {
22 | t.Errorf("Expected schedule interval 1h, got %v", cfg.Schedule.Interval)
23 | }
24 | if cfg.Storage.Path != "/data" {
25 | t.Errorf("Expected storage path '/data', got '%s'", cfg.Storage.Path)
26 | }
27 | if cfg.OpenWebUI.BaseURL != "http://localhost:8080" {
28 | t.Errorf("Expected OpenWebUI base URL 'http://localhost:8080', got '%s'", cfg.OpenWebUI.BaseURL)
29 | }
30 | if cfg.GitHub.Enabled != false {
31 | t.Errorf("Expected GitHub enabled false, got %v", cfg.GitHub.Enabled)
32 | }
33 | }
34 |
35 | func TestLoad_FromFile(t *testing.T) {
36 | // Create temporary config file
37 | tempDir := t.TempDir()
38 | configPath := filepath.Join(tempDir, "config.yaml")
39 |
40 | configContent := `
41 | log_level: debug
42 | schedule:
43 | interval: 2h
44 | storage:
45 | path: /custom/data
46 | openwebui:
47 | base_url: "https://custom.openwebui.com"
48 | api_key: "custom-api-key"
49 | github:
50 | enabled: true
51 | token: "custom-token"
52 | mappings:
53 | - repository: "owner/repo1"
54 | knowledge_id: "custom-knowledge-id"
55 | - repository: "owner/repo2"
56 | knowledge_id: "custom-knowledge-id"
57 | `
58 |
59 | err := os.WriteFile(configPath, []byte(configContent), 0644)
60 | if err != nil {
61 | t.Fatalf("Failed to write config file: %v", err)
62 | }
63 |
64 | cfg, err := Load(configPath)
65 | if err != nil {
66 | t.Fatalf("Failed to load config from file: %v", err)
67 | }
68 |
69 | // Check loaded values
70 | if cfg.LogLevel != "debug" {
71 | t.Errorf("Expected log level 'debug', got '%s'", cfg.LogLevel)
72 | }
73 | if cfg.Schedule.Interval != 2*time.Hour {
74 | t.Errorf("Expected schedule interval 2h, got %v", cfg.Schedule.Interval)
75 | }
76 | if cfg.Storage.Path != "/custom/data" {
77 | t.Errorf("Expected storage path '/custom/data', got '%s'", cfg.Storage.Path)
78 | }
79 | if cfg.OpenWebUI.BaseURL != "https://custom.openwebui.com" {
80 | t.Errorf("Expected OpenWebUI base URL 'https://custom.openwebui.com', got '%s'", cfg.OpenWebUI.BaseURL)
81 | }
82 | if cfg.OpenWebUI.APIKey != "custom-api-key" {
83 | t.Errorf("Expected OpenWebUI API key 'custom-api-key', got '%s'", cfg.OpenWebUI.APIKey)
84 | }
85 | if cfg.GitHub.Enabled != true {
86 | t.Errorf("Expected GitHub enabled true, got %v", cfg.GitHub.Enabled)
87 | }
88 | if cfg.GitHub.Token != "custom-token" {
89 | t.Errorf("Expected GitHub token 'custom-token', got '%s'", cfg.GitHub.Token)
90 | }
91 | if len(cfg.GitHub.Mappings) != 2 {
92 | t.Errorf("Expected 2 repository mappings, got %d", len(cfg.GitHub.Mappings))
93 | }
94 | if cfg.GitHub.Mappings[0].Repository != "owner/repo1" {
95 | t.Errorf("Expected first repository 'owner/repo1', got '%s'", cfg.GitHub.Mappings[0].Repository)
96 | }
97 | if cfg.GitHub.Mappings[0].KnowledgeID != "custom-knowledge-id" {
98 | t.Errorf("Expected first knowledge ID 'custom-knowledge-id', got '%s'", cfg.GitHub.Mappings[0].KnowledgeID)
99 | }
100 | }
101 |
102 | func TestLoad_EnvironmentOverride(t *testing.T) {
103 | // Set environment variables
104 | os.Setenv("OPENWEBUI_BASE_URL", "https://env.openwebui.com")
105 | os.Setenv("OPENWEBUI_API_KEY", "env-api-key")
106 | os.Setenv("GITHUB_TOKEN", "env-github-token")
107 | os.Setenv("GITHUB_KNOWLEDGE_ID", "env-knowledge-id")
108 | os.Setenv("STORAGE_PATH", "/env/storage")
109 | defer func() {
110 | os.Unsetenv("OPENWEBUI_BASE_URL")
111 | os.Unsetenv("OPENWEBUI_API_KEY")
112 | os.Unsetenv("GITHUB_TOKEN")
113 | os.Unsetenv("GITHUB_KNOWLEDGE_ID")
114 | os.Unsetenv("STORAGE_PATH")
115 | }()
116 |
117 | cfg, err := Load("non-existent-config.yaml")
118 | if err != nil {
119 | t.Fatalf("Failed to load config: %v", err)
120 | }
121 |
122 | // Check environment overrides
123 | if cfg.OpenWebUI.BaseURL != "https://env.openwebui.com" {
124 | t.Errorf("Expected OpenWebUI base URL 'https://env.openwebui.com', got '%s'", cfg.OpenWebUI.BaseURL)
125 | }
126 | if cfg.OpenWebUI.APIKey != "env-api-key" {
127 | t.Errorf("Expected OpenWebUI API key 'env-api-key', got '%s'", cfg.OpenWebUI.APIKey)
128 | }
129 | if cfg.GitHub.Token != "env-github-token" {
130 | t.Errorf("Expected GitHub token 'env-github-token', got '%s'", cfg.GitHub.Token)
131 | }
132 | // Note: GitHub knowledge ID is now handled via mappings, not environment variables
133 | if cfg.Storage.Path != "/env/storage" {
134 | t.Errorf("Expected storage path '/env/storage', got '%s'", cfg.Storage.Path)
135 | }
136 | }
137 |
138 | func TestLoad_InvalidYAML(t *testing.T) {
139 | // Create temporary config file with invalid YAML
140 | tempDir := t.TempDir()
141 | configPath := filepath.Join(tempDir, "invalid-config.yaml")
142 |
143 | invalidYAML := `
144 | log_level: debug
145 | schedule:
146 | interval: 2h
147 | invalid: [unclosed list
148 | `
149 |
150 | err := os.WriteFile(configPath, []byte(invalidYAML), 0644)
151 | if err != nil {
152 | t.Fatalf("Failed to write invalid config file: %v", err)
153 | }
154 |
155 | _, err = Load(configPath)
156 | if err == nil {
157 | t.Errorf("Expected error for invalid YAML, got none")
158 | }
159 | }
160 |
161 | func TestLoad_FileAndEnvironment(t *testing.T) {
162 | // Create temporary config file
163 | tempDir := t.TempDir()
164 | configPath := filepath.Join(tempDir, "config.yaml")
165 |
166 | configContent := `
167 | log_level: debug
168 | openwebui:
169 | base_url: "https://file.openwebui.com"
170 | api_key: "file-api-key"
171 | github:
172 | token: "file-token"
173 | `
174 |
175 | err := os.WriteFile(configPath, []byte(configContent), 0644)
176 | if err != nil {
177 | t.Fatalf("Failed to write config file: %v", err)
178 | }
179 |
180 | // Set environment variables (should override file values)
181 | os.Setenv("OPENWEBUI_BASE_URL", "https://env.openwebui.com")
182 | os.Setenv("GITHUB_TOKEN", "env-token")
183 | defer func() {
184 | os.Unsetenv("OPENWEBUI_BASE_URL")
185 | os.Unsetenv("GITHUB_TOKEN")
186 | }()
187 |
188 | cfg, err := Load(configPath)
189 | if err != nil {
190 | t.Fatalf("Failed to load config: %v", err)
191 | }
192 |
193 | // Environment should override file values
194 | if cfg.OpenWebUI.BaseURL != "https://env.openwebui.com" {
195 | t.Errorf("Expected environment to override file value, got '%s'", cfg.OpenWebUI.BaseURL)
196 | }
197 | if cfg.GitHub.Token != "env-token" {
198 | t.Errorf("Expected environment to override file value, got '%s'", cfg.GitHub.Token)
199 | }
200 |
201 | // File values should be used where environment is not set
202 | if cfg.OpenWebUI.APIKey != "file-api-key" {
203 | t.Errorf("Expected file value to be used, got '%s'", cfg.OpenWebUI.APIKey)
204 | }
205 | }
206 |
207 | func TestGetEnv(t *testing.T) {
208 | // Test with existing environment variable
209 | os.Setenv("TEST_VAR", "test-value")
210 | defer os.Unsetenv("TEST_VAR")
211 |
212 | result := getEnv("TEST_VAR", "default")
213 | if result != "test-value" {
214 | t.Errorf("Expected 'test-value', got '%s'", result)
215 | }
216 |
217 | // Test with non-existing environment variable
218 | result = getEnv("NON_EXISTING_VAR", "default")
219 | if result != "default" {
220 | t.Errorf("Expected 'default', got '%s'", result)
221 | }
222 |
223 | // Test with empty environment variable
224 | os.Setenv("EMPTY_VAR", "")
225 | defer os.Unsetenv("EMPTY_VAR")
226 |
227 | result = getEnv("EMPTY_VAR", "default")
228 | if result != "default" {
229 | t.Errorf("Expected 'default' for empty env var, got '%s'", result)
230 | }
231 | }
232 |
233 | func TestConfig_StructFields(t *testing.T) {
234 | cfg := &Config{
235 | LogLevel: "debug",
236 | Schedule: ScheduleConfig{
237 | Interval: 2 * time.Hour,
238 | },
239 | Storage: StorageConfig{
240 | Path: "/test/path",
241 | },
242 | OpenWebUI: OpenWebUIConfig{
243 | BaseURL: "https://test.com",
244 | APIKey: "test-key",
245 | },
246 | GitHub: GitHubConfig{
247 | Enabled: true,
248 | Token: "github-token",
249 | Mappings: []RepositoryMapping{
250 | {Repository: "owner/repo", KnowledgeID: "knowledge-id"},
251 | },
252 | },
253 | }
254 |
255 | // Test that all fields can be set and accessed
256 | if cfg.LogLevel != "debug" {
257 | t.Errorf("LogLevel not set correctly")
258 | }
259 | if cfg.Schedule.Interval != 2*time.Hour {
260 | t.Errorf("Schedule.Interval not set correctly")
261 | }
262 | if cfg.Storage.Path != "/test/path" {
263 | t.Errorf("Storage.Path not set correctly")
264 | }
265 | if cfg.OpenWebUI.BaseURL != "https://test.com" {
266 | t.Errorf("OpenWebUI.BaseURL not set correctly")
267 | }
268 | if cfg.OpenWebUI.APIKey != "test-key" {
269 | t.Errorf("OpenWebUI.APIKey not set correctly")
270 | }
271 | if cfg.GitHub.Enabled != true {
272 | t.Errorf("GitHub.Enabled not set correctly")
273 | }
274 | if cfg.GitHub.Token != "github-token" {
275 | t.Errorf("GitHub.Token not set correctly")
276 | }
277 | if len(cfg.GitHub.Mappings) != 1 {
278 | t.Errorf("GitHub.Mappings not set correctly")
279 | }
280 | if cfg.GitHub.Mappings[0].Repository != "owner/repo" {
281 | t.Errorf("GitHub.Mappings[0].Repository not set correctly")
282 | }
283 | if cfg.GitHub.Mappings[0].KnowledgeID != "knowledge-id" {
284 | t.Errorf("GitHub.Mappings[0].KnowledgeID not set correctly")
285 | }
286 | }
287 |
--------------------------------------------------------------------------------
/internal/config/config.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "time"
7 |
8 | "gopkg.in/yaml.v3"
9 | )
10 |
11 | // Config represents the application configuration
12 | type Config struct {
13 | LogLevel string `yaml:"log_level"`
14 | Schedule ScheduleConfig `yaml:"schedule"`
15 | Storage StorageConfig `yaml:"storage"`
16 | OpenWebUI OpenWebUIConfig `yaml:"openwebui"`
17 | GitHub GitHubConfig `yaml:"github"`
18 | Confluence ConfluenceConfig `yaml:"confluence"`
19 | Jira JiraConfig `yaml:"jira"`
20 | LocalFolders LocalFolderConfig `yaml:"local_folders"`
21 | Slack SlackConfig `yaml:"slack"`
22 | }
23 |
24 | // ScheduleConfig defines the sync schedule
25 | type ScheduleConfig struct {
26 | Interval time.Duration `yaml:"interval"`
27 | }
28 |
29 | // StorageConfig defines local storage settings
30 | type StorageConfig struct {
31 | Path string `yaml:"path"`
32 | }
33 |
34 | // OpenWebUIConfig defines OpenWebUI API settings
35 | type OpenWebUIConfig struct {
36 | BaseURL string `yaml:"base_url"`
37 | APIKey string `yaml:"api_key"`
38 | }
39 |
40 | // RepositoryMapping defines a mapping between a GitHub repository and a knowledge base
41 | type RepositoryMapping struct {
42 | Repository string `yaml:"repository"` // Format: "owner/repo"
43 | KnowledgeID string `yaml:"knowledge_id"`
44 | }
45 |
46 | // SpaceMapping defines a mapping between a Confluence space and a knowledge base
47 | type SpaceMapping struct {
48 | SpaceKey string `yaml:"space_key"`
49 | KnowledgeID string `yaml:"knowledge_id"`
50 | }
51 |
52 | // ParentPageMapping defines a mapping between a Confluence parent page and a knowledge base
53 | type ParentPageMapping struct {
54 | ParentPageID string `yaml:"parent_page_id"`
55 | KnowledgeID string `yaml:"knowledge_id"`
56 | }
57 |
58 | // LocalFolderMapping defines a mapping between a local folder and a knowledge base
59 | type LocalFolderMapping struct {
60 | FolderPath string `yaml:"folder_path"`
61 | KnowledgeID string `yaml:"knowledge_id"`
62 | }
63 |
64 | // GitHubConfig defines GitHub adapter settings
65 | type GitHubConfig struct {
66 | Enabled bool `yaml:"enabled"`
67 | Token string `yaml:"token"`
68 | Mappings []RepositoryMapping `yaml:"mappings"` // Per-repository knowledge mappings
69 | }
70 |
71 | // ConfluenceConfig defines Confluence adapter settings
72 | type ConfluenceConfig struct {
73 | Enabled bool `yaml:"enabled"`
74 | BaseURL string `yaml:"base_url"`
75 | Username string `yaml:"username"`
76 | APIKey string `yaml:"api_key"`
77 | SpaceMappings []SpaceMapping `yaml:"space_mappings"` // Per-space knowledge mappings
78 | ParentPageMappings []ParentPageMapping `yaml:"parent_page_mappings"` // Per-parent-page knowledge mappings
79 | PageLimit int `yaml:"page_limit"`
80 | IncludeAttachments bool `yaml:"include_attachments"`
81 | UseMarkdownParser bool `yaml:"use_markdown_parser"`
82 | IncludeBlogPosts bool `yaml:"include_blog_posts"`
83 | AddAdditionalData bool `yaml:"add_additional_data"`
84 | }
85 |
86 | // LocalFolderConfig defines local folder adapter settings
87 | type LocalFolderConfig struct {
88 | Enabled bool `yaml:"enabled"`
89 | Mappings []LocalFolderMapping `yaml:"mappings"` // Per-folder knowledge mappings
90 | }
91 |
92 | // SlackConfig defines Slack adapter settings
93 | type SlackConfig struct {
94 | Enabled bool `yaml:"enabled"`
95 | Token string `yaml:"token"`
96 | ChannelMappings []ChannelMapping `yaml:"channel_mappings"` // Per-channel knowledge mappings
97 | RegexPatterns []RegexPattern `yaml:"regex_patterns"` // Regex patterns for auto-discovering channels
98 | DaysToFetch int `yaml:"days_to_fetch"` // Number of days to fetch messages
99 | MaintainHistory bool `yaml:"maintain_history"` // Whether to maintain indefinite history or age off
100 | MessageLimit int `yaml:"message_limit"` // Max messages per channel per run
101 | IncludeThreads bool `yaml:"include_threads"` // Whether to include thread messages
102 | IncludeReactions bool `yaml:"include_reactions"` // Whether to include reaction data
103 | }
104 |
105 | // ChannelMapping defines mapping between Slack channels and knowledge bases
106 | type ChannelMapping struct {
107 | ChannelID string `yaml:"channel_id"` // Slack channel ID
108 | ChannelName string `yaml:"channel_name"` // Slack channel name (for display)
109 | KnowledgeID string `yaml:"knowledge_id"` // Target knowledge base ID
110 | }
111 |
112 | // RegexPattern defines regex patterns for auto-discovering Slack channels
113 | type RegexPattern struct {
114 | Pattern string `yaml:"pattern"` // Regex pattern to match channel names
115 | KnowledgeID string `yaml:"knowledge_id"` // Target knowledge base ID for matching channels
116 | AutoJoin bool `yaml:"auto_join"` // Whether to automatically join matching channels
117 | }
118 |
119 | // JiraProjectMapping defines a mapping between a Jira project and a knowledge base
120 | type JiraProjectMapping struct {
121 | ProjectKey string `yaml:"project_key"`
122 | KnowledgeID string `yaml:"knowledge_id"`
123 | }
124 |
125 | // JiraConfig defines Jira adapter settings
126 | type JiraConfig struct {
127 | Enabled bool `yaml:"enabled"`
128 | BaseURL string `yaml:"base_url"`
129 | Username string `yaml:"username"`
130 | APIKey string `yaml:"api_key"`
131 | ProjectMappings []JiraProjectMapping `yaml:"project_mappings"` // Per-project knowledge mappings
132 | PageLimit int `yaml:"page_limit"`
133 | }
134 |
135 | // Load loads configuration from file and environment variables
136 | func Load(path string) (*Config, error) {
137 | fmt.Printf("Loading configuration from: %s\n", path)
138 |
139 | cfg := &Config{
140 | LogLevel: "info",
141 | Schedule: ScheduleConfig{
142 | Interval: 1 * time.Hour,
143 | },
144 | Storage: StorageConfig{
145 | Path: "/data",
146 | },
147 | OpenWebUI: OpenWebUIConfig{
148 | BaseURL: getEnv("OPENWEBUI_BASE_URL", "http://localhost:8080"),
149 | APIKey: getEnv("OPENWEBUI_API_KEY", ""),
150 | },
151 | GitHub: GitHubConfig{
152 | Enabled: false,
153 | Token: getEnv("GITHUB_TOKEN", ""),
154 | Mappings: []RepositoryMapping{},
155 | },
156 | Confluence: ConfluenceConfig{
157 | Enabled: false,
158 | BaseURL: "",
159 | Username: "",
160 | APIKey: getEnv("CONFLUENCE_API_KEY", ""),
161 | SpaceMappings: []SpaceMapping{},
162 | ParentPageMappings: []ParentPageMapping{},
163 | PageLimit: 100,
164 | IncludeAttachments: true,
165 | UseMarkdownParser: false,
166 | IncludeBlogPosts: false,
167 | },
168 | Jira: JiraConfig{
169 | Enabled: false,
170 | BaseURL: "",
171 | Username: "",
172 | APIKey: getEnv("JIRA_API_KEY", ""),
173 | ProjectMappings: []JiraProjectMapping{},
174 | },
175 | LocalFolders: LocalFolderConfig{
176 | Enabled: false,
177 | Mappings: []LocalFolderMapping{},
178 | },
179 | Slack: SlackConfig{
180 | Enabled: false,
181 | Token: getEnv("SLACK_TOKEN", ""),
182 | ChannelMappings: []ChannelMapping{},
183 | DaysToFetch: 30,
184 | MaintainHistory: false,
185 | MessageLimit: 1000,
186 | IncludeThreads: true,
187 | IncludeReactions: false,
188 | },
189 | }
190 |
191 | fmt.Printf("Default OpenWebUI BaseURL: %s\n", cfg.OpenWebUI.BaseURL)
192 | fmt.Printf("Confluence API Key loaded: %s\n", func() string {
193 | if cfg.Confluence.APIKey != "" {
194 | return "***" + cfg.Confluence.APIKey[len(cfg.Confluence.APIKey)-4:] // Show last 4 chars
195 | }
196 | return "NOT SET"
197 | }())
198 |
199 | // Load from file if it exists
200 | if _, err := os.Stat(path); err == nil {
201 | fmt.Printf("Config file exists, loading from: %s\n", path)
202 | data, err := os.ReadFile(path)
203 | if err != nil {
204 | return nil, fmt.Errorf("failed to read config file: %w", err)
205 | }
206 |
207 | // fmt.Printf("Config file content:\n%s\n", string(data))
208 |
209 | if err := yaml.Unmarshal(data, cfg); err != nil {
210 | return nil, fmt.Errorf("failed to parse config file: %w", err)
211 | }
212 |
213 | fmt.Printf("After loading config file - OpenWebUI BaseURL: %s\n", cfg.OpenWebUI.BaseURL)
214 | } else {
215 | fmt.Printf("Config file does not exist at: %s (error: %v)\n", path, err)
216 | }
217 |
218 | // Override with environment variables
219 | cfg.OpenWebUI.BaseURL = getEnv("OPENWEBUI_BASE_URL", cfg.OpenWebUI.BaseURL)
220 | cfg.OpenWebUI.APIKey = getEnv("OPENWEBUI_API_KEY", cfg.OpenWebUI.APIKey)
221 | cfg.GitHub.Token = getEnv("GITHUB_TOKEN", cfg.GitHub.Token)
222 | cfg.Confluence.APIKey = getEnv("CONFLUENCE_API_KEY", cfg.Confluence.APIKey)
223 | cfg.Jira.APIKey = getEnv("CONFLUENCE_API_KEY", cfg.Jira.APIKey)
224 | cfg.Storage.Path = getEnv("STORAGE_PATH", cfg.Storage.Path)
225 |
226 | fmt.Printf("Final OpenWebUI BaseURL: %s\n", cfg.OpenWebUI.BaseURL)
227 | fmt.Printf("Environment OPENWEBUI_BASE_URL: %s\n", os.Getenv("OPENWEBUI_BASE_URL"))
228 | fmt.Printf("Final Confluence API Key: %s\n", func() string {
229 | if cfg.Confluence.APIKey != "" {
230 | return "***" + cfg.Confluence.APIKey[len(cfg.Confluence.APIKey)-4:] // Show last 4 chars
231 | }
232 | return "NOT SET"
233 | }())
234 | fmt.Printf("Environment CONFLUENCE_API_KEY: %s\n", func() string {
235 | env := os.Getenv("CONFLUENCE_API_KEY")
236 | if env != "" {
237 | return "***" + env[len(env)-4:] // Show last 4 chars
238 | }
239 | return "NOT SET"
240 | }())
241 |
242 | return cfg, nil
243 | }
244 |
245 | func getEnv(key, defaultValue string) string {
246 | if value := os.Getenv(key); value != "" {
247 | return value
248 | }
249 | return defaultValue
250 | }
251 |
--------------------------------------------------------------------------------
/internal/openwebui/client_test.go:
--------------------------------------------------------------------------------
1 | package openwebui
2 |
3 | import (
4 | "context"
5 | "encoding/json"
6 | "net/http"
7 | "net/http/httptest"
8 | "strings"
9 | "testing"
10 | "time"
11 | )
12 |
13 | func TestNewClient(t *testing.T) {
14 | client := NewClient("http://localhost:8080", "test-api-key")
15 | if client == nil {
16 | t.Fatal("Expected client to be created")
17 | }
18 | if client.baseURL != "http://localhost:8080" {
19 | t.Errorf("Expected baseURL 'http://localhost:8080', got '%s'", client.baseURL)
20 | }
21 | if client.apiKey != "test-api-key" {
22 | t.Errorf("Expected apiKey 'test-api-key', got '%s'", client.apiKey)
23 | }
24 | }
25 |
26 | func TestClient_UploadFile(t *testing.T) {
27 | tests := []struct {
28 | name string
29 | filename string
30 | content []byte
31 | serverResponse File
32 | serverStatus int
33 | expectError bool
34 | }{
35 | {
36 | name: "successful upload",
37 | filename: "test.md",
38 | content: []byte("# Test"),
39 | serverResponse: File{
40 | ID: "file-123",
41 | Filename: "test.md",
42 | UserID: "user-123",
43 | Hash: "hash-123",
44 | Data: struct {
45 | Status string `json:"status"`
46 | }{
47 | Status: "pending",
48 | },
49 | Meta: struct {
50 | Name string `json:"name"`
51 | ContentType string `json:"content_type"`
52 | Size int64 `json:"size"`
53 | Data map[string]interface{} `json:"data"`
54 | }{
55 | Name: "test.md",
56 | ContentType: "text/markdown",
57 | Size: 6,
58 | Data: map[string]interface{}{},
59 | },
60 | CreatedAt: time.Now().Unix(),
61 | UpdatedAt: time.Now().Unix(),
62 | Status: true,
63 | Path: "/app/backend/data/uploads/file-123_test.md",
64 | AccessControl: nil,
65 | },
66 | serverStatus: http.StatusOK,
67 | expectError: false,
68 | },
69 | {
70 | name: "server error",
71 | filename: "test.md",
72 | content: []byte("# Test"),
73 | serverStatus: http.StatusInternalServerError,
74 | expectError: true,
75 | },
76 | }
77 |
78 | for _, tt := range tests {
79 | t.Run(tt.name, func(t *testing.T) {
80 | requestCount := 0
81 | server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
82 | requestCount++
83 | t.Logf("Request %d: %s %s", requestCount, r.Method, r.URL.Path)
84 |
85 | // Handle POST requests for file uploads
86 | if r.Method == "POST" && strings.Contains(r.URL.Path, "/api/v1/files/") {
87 | if r.Header.Get("Authorization") != "Bearer test-api-key" {
88 | t.Errorf("Expected Authorization header, got %s", r.Header.Get("Authorization"))
89 | }
90 | w.WriteHeader(tt.serverStatus)
91 | if tt.serverStatus == http.StatusOK {
92 | json.NewEncoder(w).Encode(tt.serverResponse)
93 | } else {
94 | w.Write([]byte("Server Error"))
95 | }
96 | } else if r.Method == "GET" && strings.Contains(r.URL.Path, "/api/v1/files/") {
97 | // Handle GET requests for file polling (file processing status)
98 | if r.Header.Get("Authorization") != "Bearer test-api-key" {
99 | t.Errorf("Expected Authorization header, got %s", r.Header.Get("Authorization"))
100 | }
101 |
102 | // Extract file ID from path
103 | pathParts := strings.Split(r.URL.Path, "/")
104 | fileID := pathParts[len(pathParts)-1]
105 |
106 | // Return file with "processed" status to complete polling quickly
107 | fileResponse := map[string]interface{}{
108 | "id": fileID,
109 | "filename": "test-file.md",
110 | "user_id": "test-user",
111 | "hash": "test-hash",
112 | "data": map[string]interface{}{
113 | "status": "processed",
114 | },
115 | "meta": map[string]interface{}{
116 | "name": "test-file.md",
117 | "content_type": "text/markdown",
118 | "size": 100,
119 | "data": map[string]interface{}{},
120 | },
121 | "status": true,
122 | }
123 |
124 | w.WriteHeader(http.StatusOK)
125 | json.NewEncoder(w).Encode(fileResponse)
126 | } else {
127 | // Handle other requests gracefully
128 | w.WriteHeader(http.StatusOK)
129 | w.Write([]byte("OK"))
130 | }
131 | }))
132 | defer server.Close()
133 |
134 | client := NewClient(server.URL, "test-api-key")
135 | ctx := context.Background()
136 |
137 | result, err := client.UploadFile(ctx, tt.filename, tt.content)
138 |
139 | if tt.expectError {
140 | if err == nil {
141 | t.Errorf("Expected error but got none")
142 | }
143 | return
144 | }
145 |
146 | if err != nil {
147 | t.Errorf("Unexpected error: %v", err)
148 | return
149 | }
150 |
151 | if result.ID != tt.serverResponse.ID {
152 | t.Errorf("Expected ID %s, got %s", tt.serverResponse.ID, result.ID)
153 | }
154 | if result.Filename != tt.serverResponse.Filename {
155 | t.Errorf("Expected Filename %s, got %s", tt.serverResponse.Filename, result.Filename)
156 | }
157 | })
158 | }
159 | }
160 |
161 | func TestClient_ListKnowledge(t *testing.T) {
162 | expectedKnowledge := []*Knowledge{
163 | {
164 | ID: "knowledge-123",
165 | UserID: "user-123",
166 | Name: "Test Knowledge",
167 | Description: "Test Description",
168 | Data: nil,
169 | Meta: nil,
170 | AccessControl: map[string]interface{}{},
171 | CreatedAt: time.Now().Unix(),
172 | UpdatedAt: time.Now().Unix(),
173 | },
174 | }
175 |
176 | server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
177 | if r.Method != "GET" {
178 | t.Errorf("Expected GET method, got %s", r.Method)
179 | }
180 | if !strings.Contains(r.URL.Path, "/api/v1/knowledge/") {
181 | t.Errorf("Expected path to contain /api/v1/knowledge/, got %s", r.URL.Path)
182 | }
183 | if r.Header.Get("Authorization") != "Bearer test-api-key" {
184 | t.Errorf("Expected Authorization header, got %s", r.Header.Get("Authorization"))
185 | }
186 |
187 | w.WriteHeader(http.StatusOK)
188 | json.NewEncoder(w).Encode(expectedKnowledge)
189 | }))
190 | defer server.Close()
191 |
192 | client := NewClient(server.URL, "test-api-key")
193 | ctx := context.Background()
194 |
195 | result, err := client.ListKnowledge(ctx)
196 | if err != nil {
197 | t.Fatalf("Unexpected error: %v", err)
198 | }
199 |
200 | if len(result) != len(expectedKnowledge) {
201 | t.Fatalf("Expected %d knowledge items, got %d", len(expectedKnowledge), len(result))
202 | }
203 |
204 | if result[0].ID != expectedKnowledge[0].ID {
205 | t.Errorf("Expected ID %s, got %s", expectedKnowledge[0].ID, result[0].ID)
206 | }
207 | }
208 |
209 | func TestClient_AddFileToKnowledge(t *testing.T) {
210 | tests := []struct {
211 | name string
212 | knowledgeID string
213 | fileID string
214 | serverStatus int
215 | expectError bool
216 | }{
217 | {
218 | name: "successful add",
219 | knowledgeID: "knowledge-123",
220 | fileID: "file-123",
221 | serverStatus: http.StatusOK,
222 | expectError: false,
223 | },
224 | {
225 | name: "server error",
226 | knowledgeID: "knowledge-123",
227 | fileID: "file-123",
228 | serverStatus: http.StatusInternalServerError,
229 | expectError: true,
230 | },
231 | }
232 |
233 | for _, tt := range tests {
234 | t.Run(tt.name, func(t *testing.T) {
235 | server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
236 | if r.Method != "POST" {
237 | t.Errorf("Expected POST method, got %s", r.Method)
238 | }
239 | expectedPath := "/api/v1/knowledge/" + tt.knowledgeID + "/file/add"
240 | if !strings.Contains(r.URL.Path, expectedPath) {
241 | t.Errorf("Expected path to contain %s, got %s", expectedPath, r.URL.Path)
242 | }
243 | if r.Header.Get("Authorization") != "Bearer test-api-key" {
244 | t.Errorf("Expected Authorization header, got %s", r.Header.Get("Authorization"))
245 | }
246 |
247 | // Check request body
248 | var requestBody map[string]string
249 | if err := json.NewDecoder(r.Body).Decode(&requestBody); err != nil {
250 | t.Errorf("Failed to decode request body: %v", err)
251 | }
252 | if requestBody["file_id"] != tt.fileID {
253 | t.Errorf("Expected file_id %s, got %s", tt.fileID, requestBody["file_id"])
254 | }
255 |
256 | w.WriteHeader(tt.serverStatus)
257 | }))
258 | defer server.Close()
259 |
260 | client := NewClient(server.URL, "test-api-key")
261 | ctx := context.Background()
262 |
263 | err := client.AddFileToKnowledge(ctx, tt.knowledgeID, tt.fileID)
264 |
265 | if tt.expectError {
266 | if err == nil {
267 | t.Errorf("Expected error but got none")
268 | }
269 | return
270 | }
271 |
272 | if err != nil {
273 | t.Errorf("Unexpected error: %v", err)
274 | }
275 | })
276 | }
277 | }
278 |
279 | func TestClient_RemoveFileFromKnowledge(t *testing.T) {
280 | tests := []struct {
281 | name string
282 | knowledgeID string
283 | fileID string
284 | serverStatus int
285 | expectError bool
286 | }{
287 | {
288 | name: "successful remove",
289 | knowledgeID: "knowledge-123",
290 | fileID: "file-123",
291 | serverStatus: http.StatusOK,
292 | expectError: false,
293 | },
294 | {
295 | name: "server error",
296 | knowledgeID: "knowledge-123",
297 | fileID: "file-123",
298 | serverStatus: http.StatusInternalServerError,
299 | expectError: true,
300 | },
301 | }
302 |
303 | for _, tt := range tests {
304 | t.Run(tt.name, func(t *testing.T) {
305 | server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
306 | if r.Method != "POST" {
307 | t.Errorf("Expected POST method, got %s", r.Method)
308 | }
309 | expectedPath := "/api/v1/knowledge/" + tt.knowledgeID + "/file/remove"
310 | if !strings.Contains(r.URL.Path, expectedPath) {
311 | t.Errorf("Expected path to contain %s, got %s", expectedPath, r.URL.Path)
312 | }
313 | if r.Header.Get("Authorization") != "Bearer test-api-key" {
314 | t.Errorf("Expected Authorization header, got %s", r.Header.Get("Authorization"))
315 | }
316 |
317 | // Check request body
318 | var requestBody map[string]string
319 | if err := json.NewDecoder(r.Body).Decode(&requestBody); err != nil {
320 | t.Errorf("Failed to decode request body: %v", err)
321 | }
322 | if requestBody["file_id"] != tt.fileID {
323 | t.Errorf("Expected file_id %s, got %s", tt.fileID, requestBody["file_id"])
324 | }
325 |
326 | w.WriteHeader(tt.serverStatus)
327 | }))
328 | defer server.Close()
329 |
330 | client := NewClient(server.URL, "test-api-key")
331 | ctx := context.Background()
332 |
333 | err := client.RemoveFileFromKnowledge(ctx, tt.knowledgeID, tt.fileID)
334 |
335 | if tt.expectError {
336 | if err == nil {
337 | t.Errorf("Expected error but got none")
338 | }
339 | return
340 | }
341 |
342 | if err != nil {
343 | t.Errorf("Unexpected error: %v", err)
344 | }
345 | })
346 | }
347 | }
348 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
--------------------------------------------------------------------------------
/adapter_readme/SLACK_ADAPTER.md:
--------------------------------------------------------------------------------
1 | # Slack Adapter
2 |
3 | The Slack adapter allows you to sync messages from Slack channels into OpenWebUI knowledge bases. This enables you to search and reference Slack conversations, decisions, and discussions within your OpenWebUI interface.
4 |
5 | ## Features
6 |
7 | - **Multi-channel support**: Sync from multiple Slack channels
8 | - **Knowledge base mapping**: Map each channel to a specific OpenWebUI knowledge base
9 | - **Regex pattern discovery**: Automatically discover and sync channels matching regex patterns
10 | - **Auto-join functionality**: Automatically join channels that match configured patterns
11 | - **Thread support**: Include or exclude thread messages
12 | - **Reaction data**: Optionally include emoji reactions
13 | - **Message filtering**: Control the number of messages and time range
14 | - **History management**: Choose between maintaining indefinite history or aging off old messages
15 | - **Incremental sync**: Only fetches new messages since the last sync
16 | - **Channel caching**: Improved performance with intelligent channel caching
17 | - **Retry logic**: Robust error handling with exponential backoff
18 | - **Join error logging**: Detailed logging of channel join failures
19 |
20 | ## Configuration
21 |
22 | ### Environment Variables
23 |
24 | Set the following environment variable:
25 |
26 | ```bash
27 | export SLACK_TOKEN="xoxb-your-slack-bot-token"
28 | ```
29 |
30 | ### Configuration File
31 |
32 | Add the following section to your `config.yaml`:
33 |
34 | ```yaml
35 | slack:
36 | enabled: true
37 | token: "" # Set via SLACK_TOKEN environment variable
38 | channel_mappings:
39 | - channel_id: "C1234567890" # Slack channel ID
40 | channel_name: "general" # Channel name for display
41 | knowledge_id: "general-knowledge-base"
42 | - channel_id: "C0987654321"
43 | channel_name: "dev-team"
44 | knowledge_id: "dev-knowledge-base"
45 | - channel_id: "C1122334455"
46 | channel_name: "support"
47 | knowledge_id: "support-knowledge-base"
48 | regex_patterns:
49 | - pattern: "sales-.*-internal.*" # Matches channels like sales-team-internal
50 | knowledge_id: "sales-knowledge-base"
51 | auto_join: true # Automatically join matching channels
52 | - pattern: "dev-.*" # Matches channels like dev-frontend, dev-backend
53 | knowledge_id: "dev-knowledge-base"
54 | auto_join: false # Don't auto-join, just sync if already a member
55 | - pattern: "support-.*" # Matches channels like support-tier1, support-tier2
56 | knowledge_id: "support-knowledge-base"
57 | auto_join: true
58 | days_to_fetch: 30 # Number of days to fetch messages (default: 30)
59 | maintain_history: false # Whether to maintain indefinite history or age off (default: false)
60 | message_limit: 1000 # Max messages per channel per run (default: 1000)
61 | include_threads: true # Whether to include thread messages (default: true)
62 | include_reactions: false # Whether to include reaction data (default: false)
63 | ```
64 |
65 | ### Configuration Options
66 |
67 | | Option | Type | Required | Default | Description |
68 | |--------|------|----------|---------|-------------|
69 | | `enabled` | boolean | Yes | `false` | Enable/disable the Slack adapter |
70 | | `token` | string | Yes | - | Slack bot token (set via `SLACK_TOKEN` env var) |
71 | | `channel_mappings` | array | No | `[]` | List of explicit channel mappings |
72 | | `regex_patterns` | array | No | `[]` | List of regex patterns for auto-discovering channels |
73 | | `days_to_fetch` | integer | No | `30` | Number of days to fetch messages |
74 | | `maintain_history` | boolean | No | `false` | Whether to maintain indefinite history or age off |
75 | | `message_limit` | integer | No | `1000` | Max messages per channel per run |
76 | | `include_threads` | boolean | No | `true` | Whether to include thread messages |
77 | | `include_reactions` | boolean | No | `false` | Whether to include reaction data |
78 |
79 | ### Channel Mapping
80 |
81 | Each mapping in the `channel_mappings` array should contain:
82 |
83 | | Field | Type | Required | Description |
84 | |-------|------|----------|-------------|
85 | | `channel_id` | string | Yes | Slack channel ID (starts with 'C') |
86 | | `channel_name` | string | Yes | Channel name for display purposes |
87 | | `knowledge_id` | string | Yes | Target OpenWebUI knowledge base ID |
88 |
89 | ### Regex Pattern Discovery
90 |
91 | The `regex_patterns` feature allows you to automatically discover and sync channels that match specific patterns. This is useful for:
92 |
93 | - **Dynamic channel discovery**: Automatically find new channels that match your naming conventions
94 | - **Bulk channel management**: Sync multiple similar channels without manual configuration
95 | - **Auto-joining**: Automatically join channels that match patterns
96 |
97 | Each pattern in the `regex_patterns` array should contain:
98 |
99 | | Field | Type | Required | Description |
100 | |-------|------|----------|-------------|
101 | | `pattern` | string | Yes | Regex pattern to match channel names |
102 | | `knowledge_id` | string | Yes | Target OpenWebUI knowledge base ID for matching channels |
103 | | `auto_join` | boolean | No | Whether to automatically join matching channels (default: `false`) |
104 |
105 | #### Regex Pattern Examples
106 |
107 | ```yaml
108 | regex_patterns:
109 | # Match all sales internal channels
110 | - pattern: "sales-.*-internal.*"
111 | knowledge_id: "sales-knowledge-base"
112 | auto_join: true
113 |
114 | # Match all development channels
115 | - pattern: "dev-.*"
116 | knowledge_id: "dev-knowledge-base"
117 | auto_join: false
118 |
119 | # Match support channels
120 | - pattern: "support-.*"
121 | knowledge_id: "support-knowledge-base"
122 | auto_join: true
123 |
124 | # Match project-specific channels
125 | - pattern: "project-[a-zA-Z0-9]+-.*"
126 | knowledge_id: "project-knowledge-base"
127 | auto_join: true
128 | ```
129 |
130 | #### How Regex Discovery Works
131 |
132 | 1. **Channel Discovery**: The adapter fetches all channels the bot can access
133 | 2. **Pattern Matching**: Each channel name is tested against configured regex patterns
134 | 3. **Auto-joining**: If `auto_join: true`, the bot attempts to join matching channels
135 | 4. **Sync Setup**: Matching channels are added to the sync list with the specified knowledge ID
136 | 5. **Caching**: Channel lists are cached to improve performance and reduce API calls
137 |
138 | #### Important Notes
139 |
140 | - **Channel Access**: The bot can only discover channels it has access to
141 | - **Auto-join Limitations**: Some channels may not allow bots to join (e.g., private channels requiring invitation)
142 | - **Performance**: Regex discovery happens once per sync session and results are cached
143 | - **Error Handling**: Failed joins are logged to `data/slack/join_errors.log` for troubleshooting
144 |
145 | ## Slack Bot Setup
146 |
147 | ### 1. Create a Slack App
148 |
149 | 1. Go to [api.slack.com/apps](https://api.slack.com/apps)
150 | 2. Click "Create New App"
151 | 3. Choose "From scratch"
152 | 4. Enter app name and select your workspace
153 |
154 | ### 2. Configure Bot Permissions
155 |
156 | In your app settings, go to "OAuth & Permissions" and add these scopes:
157 |
158 | **Bot Token Scopes:**
159 | - `channels:history` - View messages in public channels
160 | - `channels:read` - View basic information about public channels
161 | - `groups:history` - View messages in private channels
162 | - `groups:read` - View basic information about private channels
163 | - `im:history` - View messages in direct messages
164 | - `im:read` - View basic information about direct messages
165 | - `mpim:history` - View messages in group direct messages
166 | - `mpim:read` - View basic information about group direct messages
167 | - `reactions:read` - View emoji reactions (if including reactions)
168 |
169 | ### 3. Install the App
170 |
171 | 1. Go to "Install App" in your app settings
172 | 2. Click "Install to Workspace"
173 | 3. Review permissions and click "Allow"
174 | 4. Copy the "Bot User OAuth Token" (starts with `xoxb-`)
175 |
176 | ### 4. Get Channel IDs
177 |
178 | To find channel IDs:
179 |
180 | 1. Open Slack in your browser
181 | 2. Navigate to the channel
182 | 3. Look at the URL: `https://yourworkspace.slack.com/messages/C1234567890`
183 | 4. The channel ID is the part after `/messages/`
184 |
185 | Or use the Slack API:
186 | ```bash
187 | curl -H "Authorization: Bearer xoxb-your-token" \
188 | "https://slack.com/api/conversations.list"
189 | ```
190 |
191 | ## Message Processing
192 |
193 | ### Message Format
194 |
195 | Messages are processed and stored in markdown format:
196 |
197 | ```markdown
198 | # Channel: #general
199 | **User:** @john.doe
200 | **Timestamp:** 2024-01-15 10:30:00
201 | **Message:**
202 | This is the message content.
203 |
204 | **Thread Reply:**
205 | - @jane.smith: This is a thread reply
206 | - @bob.wilson: Another thread reply
207 |
208 | **Reactions:** :thumbsup: :heart: :laughing:
209 | ```
210 |
211 | ### Message Types
212 |
213 | The adapter processes:
214 | - **Regular messages**: Text messages from users
215 | - **Thread messages**: Replies to messages (if enabled)
216 | - **File attachments**: File names and descriptions
217 | - **Reactions**: Emoji reactions (if enabled)
218 | - **System messages**: Channel join/leave notifications
219 |
220 | ### Excluded Content
221 |
222 | The adapter automatically excludes:
223 | - Messages from bots (unless specifically configured)
224 | - Deleted messages
225 | - Messages older than the configured `days_to_fetch`
226 | - Messages exceeding the `message_limit`
227 |
228 | ## Sync Behavior
229 |
230 | ### Initial Sync
231 |
232 | - Fetches messages from the last `days_to_fetch` days
233 | - Respects the `message_limit` per channel
234 | - Processes all configured channels
235 |
236 | ### Incremental Sync
237 |
238 | - Only fetches new messages since the last sync
239 | - Maintains sync state per channel
240 | - Handles rate limiting automatically
241 |
242 | ### History Management
243 |
244 | Two modes are available:
245 |
246 | 1. **Age-off mode** (`maintain_history: false`):
247 | - Only keeps messages from the last `days_to_fetch` days
248 | - Older messages are automatically removed
249 | - Reduces storage usage
250 |
251 | 2. **Indefinite history** (`maintain_history: true`):
252 | - Keeps all messages indefinitely
253 | - Only fetches new messages on subsequent syncs
254 | - Requires more storage but preserves all history
255 |
256 | ## Use Cases
257 |
258 | ### Team Knowledge Base
259 |
260 | Sync important team discussions:
261 |
262 | ```yaml
263 | slack:
264 | enabled: true
265 | channel_mappings:
266 | - channel_id: "C1234567890"
267 | channel_name: "general"
268 | knowledge_id: "team-general"
269 | - channel_id: "C0987654321"
270 | channel_name: "dev-team"
271 | knowledge_id: "dev-discussions"
272 | days_to_fetch: 90
273 | maintain_history: true
274 | include_threads: true
275 | ```
276 |
277 | ### Support Documentation
278 |
279 | Sync support channel discussions:
280 |
281 | ```yaml
282 | slack:
283 | enabled: true
284 | channel_mappings:
285 | - channel_id: "C1122334455"
286 | channel_name: "support"
287 | knowledge_id: "support-knowledge"
288 | days_to_fetch: 30
289 | maintain_history: false
290 | include_threads: true
291 | include_reactions: true
292 | ```
293 |
294 | ### Project Discussions
295 |
296 | Sync project-specific channels:
297 |
298 | ```yaml
299 | slack:
300 | enabled: true
301 | channel_mappings:
302 | - channel_id: "C5555666677"
303 | channel_name: "project-alpha"
304 | knowledge_id: "project-alpha-docs"
305 | - channel_id: "C8888999900"
306 | channel_name: "project-beta"
307 | knowledge_id: "project-beta-docs"
308 | days_to_fetch: 60
309 | maintain_history: true
310 | ```
311 |
312 | ## Troubleshooting
313 |
314 | ### Common Issues
315 |
316 | 1. **Authentication errors**
317 | - Verify your Slack token is valid and starts with `xoxb-`
318 | - Check that the bot has been installed to your workspace
319 | - Ensure the token hasn't expired
320 |
321 | 2. **Channel access denied**
322 | - Verify the bot has been added to the channels you want to sync
323 | - Check that the bot has the required permissions
324 | - Ensure channel IDs are correct
325 |
326 | 3. **No messages synced**
327 | - Check that channels have messages within the `days_to_fetch` range
328 | - Verify the `message_limit` isn't too restrictive
329 | - Ensure channels aren't empty or archived
330 |
331 | 4. **Rate limit exceeded**
332 | - The adapter automatically handles rate limits with exponential backoff
333 | - Consider reducing sync frequency if this occurs frequently
334 | - Check if you're hitting Slack's API rate limits
335 |
336 | 5. **Channel join failures**
337 | - Check `data/slack/join_errors.log` for detailed join failure information
338 | - Common issues: archived channels, permission restrictions, private channel access
339 | - Verify bot permissions and channel settings
340 |
341 | ### Debug Logging
342 |
343 | Enable debug logging to see detailed sync information:
344 |
345 | ```yaml
346 | log_level: debug
347 | ```
348 |
349 | This will show:
350 | - Which channels are being processed
351 | - Message fetching progress
352 | - API request/response details
353 | - Sync timing and statistics
354 | - Channel discovery and regex matching
355 | - Join attempts and results
356 |
357 | ### Error Logging
358 |
359 | The adapter provides detailed error logging for troubleshooting:
360 |
361 | - **Join Errors**: `data/slack/join_errors.log` - Detailed log of channel join failures
362 | - **Channel Tracking**: `data/slack/channels/channel_tracking.txt` - Overview of all discovered channels and their status
363 | - **Debug Logs**: Console output with detailed processing information
364 |
365 | ## Security Considerations
366 |
367 | - **Token security**: Store your Slack token securely and never commit it to version control
368 | - **Channel access**: Only sync channels that contain appropriate content
369 | - **Content filtering**: Review the content being synced to ensure it's appropriate
370 | - **Privacy**: Be mindful of private channels and sensitive information
371 |
372 | ## Performance Tips
373 |
374 | - **Message limits**: Set appropriate `message_limit` values to balance completeness with performance
375 | - **Days to fetch**: Adjust `days_to_fetch` based on your needs
376 | - **Thread inclusion**: Disable `include_threads` if you don't need thread context
377 | - **Reaction inclusion**: Disable `include_reactions` to reduce data volume
378 |
379 | ## Example Configuration
380 |
381 | ```yaml
382 | # Complete example configuration
383 | log_level: info
384 | schedule:
385 | interval: 1h
386 |
387 | storage:
388 | path: "/data"
389 |
390 | openwebui:
391 | base_url: "http://localhost:8080"
392 | api_key: "your-openwebui-api-key"
393 |
394 | slack:
395 | enabled: true
396 | token: "" # Set via SLACK_TOKEN environment variable
397 | channel_mappings:
398 | - channel_id: "C1234567890"
399 | channel_name: "general"
400 | knowledge_id: "general-discussions"
401 | - channel_id: "C0987654321"
402 | channel_name: "dev-team"
403 | knowledge_id: "dev-discussions"
404 | - channel_id: "C1122334455"
405 | channel_name: "support"
406 | knowledge_id: "support-knowledge"
407 | regex_patterns:
408 | - pattern: "sales-.*-internal.*"
409 | knowledge_id: "sales-knowledge-base"
410 | auto_join: true
411 | - pattern: "dev-.*"
412 | knowledge_id: "dev-knowledge-base"
413 | auto_join: false
414 | - pattern: "support-.*"
415 | knowledge_id: "support-knowledge-base"
416 | auto_join: true
417 | days_to_fetch: 30
418 | maintain_history: false
419 | message_limit: 1000
420 | include_threads: true
421 | include_reactions: false
422 | ```
423 |
424 | ## Rate Limits
425 |
426 | Slack has the following rate limits for bots:
427 |
428 | - **Tier 1**: 1+ per minute
429 | - **Tier 2**: 20+ per minute
430 | - **Tier 3**: 50+ per minute
431 | - **Tier 4**: 100+ per minute
432 |
433 | The adapter automatically handles rate limiting with exponential backoff. For optimal performance:
434 |
435 | - Avoid syncing too many channels simultaneously
436 | - Use appropriate sync intervals
437 | - Monitor your API usage in the Slack app dashboard
438 |
--------------------------------------------------------------------------------