├── k8s ├── pvc.yaml ├── secrets.yaml ├── configmap.yaml └── deployment.yaml ├── internal ├── sync │ ├── interface.go │ └── manager_simple_test.go ├── openwebui │ ├── interface.go │ └── client_test.go ├── adapter │ ├── adapter.go │ ├── jira_comment.go │ ├── github_test.go │ ├── confluence_test.go │ ├── local_test.go │ ├── local.go │ ├── github.go │ └── slack_test.go ├── scheduler │ ├── scheduler.go │ └── scheduler_simple_test.go ├── health │ ├── health.go │ └── health_test.go ├── utils │ └── retry.go ├── mocks │ └── mocks.go └── config │ ├── config_test.go │ └── config.go ├── .gitignore ├── Dockerfile ├── go.mod ├── .github └── workflows │ └── docker-build.yml ├── Makefile ├── main.go ├── adapter_readme ├── GITHUB_ADAPTER.md ├── JIRA_ADAPTER.md ├── CONFLUENCE_ADAPTER.md ├── LOCAL_ADAPTER.md └── SLACK_ADAPTER.md ├── ARCHITECTURE.md ├── config.example.yaml ├── go.sum ├── main_test.go └── LICENSE /k8s/pvc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: connector-data-pvc 5 | labels: 6 | app: openwebui-content-sync 7 | spec: 8 | accessModes: 9 | - ReadWriteOnce 10 | resources: 11 | requests: 12 | storage: 10Gi 13 | storageClassName: ebs-sc 14 | -------------------------------------------------------------------------------- /internal/sync/interface.go: -------------------------------------------------------------------------------- 1 | package sync 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/openwebui-content-sync/internal/adapter" 7 | ) 8 | 9 | // ManagerInterface defines the interface for sync manager operations 10 | type ManagerInterface interface { 11 | SyncFiles(ctx context.Context, adapters []adapter.Adapter) error 12 | SetKnowledgeID(knowledgeID string) 13 | InitializeFileIndex(ctx context.Context, adapters []adapter.Adapter) error 14 | } 15 | -------------------------------------------------------------------------------- /internal/openwebui/interface.go: -------------------------------------------------------------------------------- 1 | package openwebui 2 | 3 | import ( 4 | "context" 5 | ) 6 | 7 | // ClientInterface defines the interface for OpenWebUI client operations 8 | type ClientInterface interface { 9 | UploadFile(ctx context.Context, filename string, content []byte) (*File, error) 10 | GetFile(ctx context.Context, fileID string) (*File, error) 11 | ListKnowledge(ctx context.Context) ([]*Knowledge, error) 12 | AddFileToKnowledge(ctx context.Context, knowledgeID, fileID string) error 13 | RemoveFileFromKnowledge(ctx context.Context, knowledgeID, fileID string) error 14 | GetKnowledgeFiles(ctx context.Context, knowledgeID string) ([]*File, error) 15 | DeleteFile(ctx context.Context, fileID string) error 16 | } 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | connector 8 | 9 | # Test binary, built with `go test -c` 10 | *.test 11 | 12 | # Output of the go coverage tool, specifically when used with LiteIDE 13 | *.out 14 | coverage.html 15 | 16 | # Dependency directories (remove the comment below to include it) 17 | # vendor/ 18 | 19 | # Go workspace file 20 | go.work 21 | 22 | # IDE files 23 | .vscode/ 24 | .idea/ 25 | *.swp 26 | *.swo 27 | 28 | # OS generated files 29 | .DS_Store 30 | .DS_Store? 31 | ._* 32 | .Spotlight-V100 33 | .Trashes 34 | ehthumbs.db 35 | Thumbs.db 36 | 37 | # Local data directory 38 | /data/ 39 | 40 | # Log files 41 | *.log 42 | 43 | # Environment files 44 | .env 45 | .env.local 46 | .env.production 47 | .env.development 48 | .env.test 49 | config.yaml 50 | k8s-local/ 51 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Build stage 2 | FROM --platform=$BUILDPLATFORM golang:1.23.0-alpine AS builder 3 | 4 | WORKDIR /app 5 | 6 | # Install git for go modules 7 | RUN apk add --no-cache git 8 | 9 | # Copy go mod files 10 | COPY go.mod go.sum ./ 11 | 12 | # Download dependencies 13 | RUN go mod download 14 | 15 | # Copy source code 16 | COPY . . 17 | 18 | # Build the application for the target platform 19 | ARG TARGETOS 20 | ARG TARGETARCH 21 | RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -a -installsuffix cgo -o main . 22 | 23 | # Final stage 24 | FROM --platform=$TARGETPLATFORM alpine:latest 25 | 26 | # Install ca-certificates for HTTPS requests 27 | RUN apk --no-cache add ca-certificates 28 | 29 | WORKDIR /root/ 30 | 31 | # Copy the binary from builder stage 32 | COPY --from=builder /app/main . 33 | 34 | # Create data directory 35 | RUN mkdir -p /data 36 | 37 | # Expose port (if needed for health checks) 38 | EXPOSE 8080 39 | 40 | # Run the application 41 | CMD ["./main"] 42 | -------------------------------------------------------------------------------- /internal/adapter/adapter.go: -------------------------------------------------------------------------------- 1 | package adapter 2 | 3 | import ( 4 | "context" 5 | "time" 6 | ) 7 | 8 | // File represents a file from an external source 9 | type File struct { 10 | Path string `json:"path"` 11 | Content []byte `json:"content"` 12 | Hash string `json:"hash"` 13 | Modified time.Time `json:"modified"` 14 | Size int64 `json:"size"` 15 | Source string `json:"source"` 16 | KnowledgeID string `json:"knowledge_id,omitempty"` // Optional: specific knowledge base ID for this file 17 | } 18 | 19 | // Adapter defines the interface for data source adapters 20 | type Adapter interface { 21 | // Name returns the adapter name 22 | Name() string 23 | 24 | // FetchFiles retrieves files from the data source 25 | FetchFiles(ctx context.Context) ([]*File, error) 26 | 27 | // GetLastSync returns the last sync timestamp 28 | GetLastSync() time.Time 29 | 30 | // SetLastSync updates the last sync timestamp 31 | SetLastSync(t time.Time) 32 | } 33 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/openwebui-content-sync 2 | 3 | go 1.23.0 4 | 5 | require ( 6 | github.com/JohannesKaufmann/html-to-markdown/v2 v2.4.0 7 | github.com/google/go-github/v56 v56.0.0 8 | github.com/robfig/cron/v3 v3.0.1 9 | github.com/sirupsen/logrus v1.9.3 10 | golang.org/x/net v0.43.0 11 | golang.org/x/oauth2 v0.15.0 12 | gopkg.in/yaml.v3 v3.0.1 13 | ) 14 | 15 | require ( 16 | github.com/JohannesKaufmann/dom v0.2.0 // indirect 17 | github.com/golang/protobuf v1.5.3 // indirect 18 | github.com/google/go-cmp v0.6.0 // indirect 19 | github.com/google/go-querystring v1.1.0 // indirect 20 | github.com/gorilla/websocket v1.5.3 // indirect 21 | github.com/kr/pretty v0.3.1 // indirect 22 | github.com/rogpeppe/go-internal v1.10.0 // indirect 23 | github.com/slack-go/slack v0.17.3 // indirect 24 | github.com/stretchr/testify v1.10.0 // indirect 25 | golang.org/x/sys v0.35.0 // indirect 26 | google.golang.org/appengine v1.6.7 // indirect 27 | google.golang.org/protobuf v1.31.0 // indirect 28 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect 29 | ) 30 | -------------------------------------------------------------------------------- /.github/workflows/docker-build.yml: -------------------------------------------------------------------------------- 1 | name: Build and Push Docker Image 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - 12 | name: Checkout 13 | uses: actions/checkout@v4 14 | - name: Lowercase repository name 15 | run: echo "LOWER_REPO=${GITHUB_REPOSITORY,,}" >> $GITHUB_ENV 16 | - 17 | name: Set up Docker Buildx 18 | uses: docker/setup-buildx-action@v3 19 | - 20 | name: Login to GitHub Container Registry 21 | if: github.event_name != 'pull_request' 22 | uses: docker/login-action@v3 23 | with: 24 | registry: ghcr.io 25 | username: ${{ github.actor }} 26 | password: ${{ secrets.GITHUB_TOKEN }} 27 | - 28 | name: Build and push 29 | uses: docker/build-push-action@v5 30 | with: 31 | context: . 32 | platforms: linux/amd64 33 | registry: ghcr.io 34 | push: ${{ github.event_name != 'pull_request' }} 35 | tags: ghcr.io/${{ env.LOWER_REPO }}:latest 36 | cache-from: type=gha 37 | -------------------------------------------------------------------------------- /k8s/secrets.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: openwebui-secrets 5 | labels: 6 | app: openwebui-content-sync 7 | type: Opaque 8 | data: 9 | # Base64 encoded API key 10 | # echo -n "your-openwebui-api-key" | base64 11 | api-key: eW91ci1jb25mbHVlbmNlLWFwaS1rZXk 12 | 13 | --- 14 | apiVersion: v1 15 | kind: Secret 16 | metadata: 17 | name: github-secrets 18 | labels: 19 | app: openwebui-content-sync 20 | type: Opaque 21 | data: 22 | # Base64 encoded GitHub token 23 | # echo -n "your-github-token" | base64 24 | token: eW91ci1jb25mbHVlbmNlLWFwaS1rZXk 25 | 26 | --- 27 | apiVersion: v1 28 | kind: Secret 29 | metadata: 30 | name: confluence-secrets 31 | labels: 32 | app: openwebui-content-sync 33 | type: Opaque 34 | data: 35 | # Base64 encoded Confluence API key 36 | # echo -n "your-confluence-api-key" | base64 37 | api-key: eW91ci1jb25mbHVlbmNlLWFwaS1rZXk= 38 | 39 | --- 40 | apiVersion: v1 41 | kind: Secret 42 | metadata: 43 | name: slack-secrets 44 | labels: 45 | app: openwebui-content-sync 46 | type: Opaque 47 | data: 48 | # Base64 encoded Slack token 49 | # echo -n "your-slack-token" | base64 50 | token: eW91ci1zbGFjay10b2tlbg== 51 | -------------------------------------------------------------------------------- /internal/scheduler/scheduler.go: -------------------------------------------------------------------------------- 1 | package scheduler 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "time" 7 | 8 | "github.com/openwebui-content-sync/internal/adapter" 9 | "github.com/openwebui-content-sync/internal/sync" 10 | "github.com/robfig/cron/v3" 11 | "github.com/sirupsen/logrus" 12 | ) 13 | 14 | // Scheduler manages periodic synchronization 15 | type Scheduler struct { 16 | cron *cron.Cron 17 | interval time.Duration 18 | adapters []adapter.Adapter 19 | syncManager sync.ManagerInterface 20 | } 21 | 22 | // New creates a new scheduler 23 | func New(interval time.Duration, adapters []adapter.Adapter, syncManager sync.ManagerInterface) *Scheduler { 24 | return &Scheduler{ 25 | cron: cron.New(cron.WithSeconds()), 26 | interval: interval, 27 | adapters: adapters, 28 | syncManager: syncManager, 29 | } 30 | } 31 | 32 | // Start starts the scheduler 33 | func (s *Scheduler) Start(ctx context.Context) { 34 | logrus.Infof("Starting scheduler with interval: %v", s.interval) 35 | 36 | // Schedule the sync job 37 | cronSpec := fmt.Sprintf("@every %v", s.interval) 38 | _, err := s.cron.AddFunc(cronSpec, func() { 39 | logrus.Info("Running scheduled sync") 40 | if err := s.RunSyncWithContext(ctx); err != nil { 41 | logrus.Errorf("Scheduled sync failed: %v", err) 42 | } 43 | }) 44 | if err != nil { 45 | logrus.Errorf("Failed to schedule sync job: %v", err) 46 | return 47 | } 48 | 49 | s.cron.Start() 50 | 51 | // Wait for context cancellation 52 | <-ctx.Done() 53 | logrus.Info("Stopping scheduler...") 54 | s.cron.Stop() 55 | } 56 | 57 | // RunSyncWithContext runs a synchronization cycle with the provided context 58 | func (s *Scheduler) RunSyncWithContext(ctx context.Context) error { 59 | // Create a timeout context, but make it respect the parent context cancellation 60 | syncCtx, cancel := context.WithTimeout(ctx, 30*time.Minute) 61 | defer cancel() 62 | 63 | return s.syncManager.SyncFiles(syncCtx, s.adapters) 64 | } 65 | -------------------------------------------------------------------------------- /internal/health/health.go: -------------------------------------------------------------------------------- 1 | package health 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "net/http" 8 | "time" 9 | ) 10 | 11 | // Server provides health check endpoints 12 | type Server struct { 13 | server *http.Server 14 | } 15 | 16 | // HealthResponse represents the health check response 17 | type HealthResponse struct { 18 | Status string `json:"status"` 19 | Timestamp time.Time `json:"timestamp"` 20 | Version string `json:"version"` 21 | } 22 | 23 | // NewServer creates a new health check server 24 | func NewServer(port int) *Server { 25 | mux := http.NewServeMux() 26 | 27 | server := &http.Server{ 28 | Addr: fmt.Sprintf(":%d", port), 29 | Handler: mux, 30 | } 31 | 32 | healthServer := &Server{ 33 | server: server, 34 | } 35 | 36 | // Register health check endpoint 37 | mux.HandleFunc("/health", healthServer.healthHandler) 38 | mux.HandleFunc("/ready", healthServer.readyHandler) 39 | 40 | return healthServer 41 | } 42 | 43 | // Start starts the health check server 44 | func (s *Server) Start() error { 45 | return s.server.ListenAndServe() 46 | } 47 | 48 | // Stop stops the health check server 49 | func (s *Server) Stop(ctx context.Context) error { 50 | return s.server.Shutdown(ctx) 51 | } 52 | 53 | // healthHandler handles health check requests 54 | func (s *Server) healthHandler(w http.ResponseWriter, r *http.Request) { 55 | response := HealthResponse{ 56 | Status: "healthy", 57 | Timestamp: time.Now(), 58 | Version: "1.0.0", 59 | } 60 | 61 | w.Header().Set("Content-Type", "application/json") 62 | w.WriteHeader(http.StatusOK) 63 | json.NewEncoder(w).Encode(response) 64 | } 65 | 66 | // readyHandler handles readiness check requests 67 | func (s *Server) readyHandler(w http.ResponseWriter, r *http.Request) { 68 | response := HealthResponse{ 69 | Status: "ready", 70 | Timestamp: time.Now(), 71 | Version: "1.0.0", 72 | } 73 | 74 | w.Header().Set("Content-Type", "application/json") 75 | w.WriteHeader(http.StatusOK) 76 | json.NewEncoder(w).Encode(response) 77 | } 78 | -------------------------------------------------------------------------------- /k8s/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: connector-config 5 | labels: 6 | app: openwebui-content-sync 7 | data: 8 | config.yaml: | 9 | log_level: info 10 | schedule: 11 | interval: 1h 12 | storage: 13 | path: /data 14 | openwebui: 15 | base_url: "http://open-webui:8080" 16 | github: 17 | enabled: true 18 | token: "" 19 | mappings: 20 | - repository: "owner/repo1" 21 | knowledge_id: "repo1-knowledge-base" 22 | - repository: "owner/repo2" 23 | knowledge_id: "repo2-knowledge-base" 24 | confluence: 25 | enabled: false 26 | base_url: "https://your-domain.atlassian.net" 27 | username: "your-email@example.com" 28 | api_key: "" 29 | space_mappings: 30 | - space_key: "SPACEKEY1" 31 | knowledge_id: "space1-knowledge-base" 32 | - space_key: "SPACEKEY2" 33 | knowledge_id: "space2-knowledge-base" 34 | parent_page_mappings: 35 | - parent_page_id: "1234567890" 36 | knowledge_id: "parent-page-knowledge-base" 37 | page_limit: 100 38 | include_attachments: true 39 | local_folders: 40 | enabled: false 41 | mappings: 42 | - folder_path: "/data/docs" 43 | knowledge_id: "docs-knowledge-base" 44 | - folder_path: "/data/guides" 45 | knowledge_id: "guides-knowledge-base" 46 | slack: 47 | enabled: false 48 | token: "" 49 | channel_mappings: 50 | - channel_id: "C1234567890" 51 | channel_name: "general" 52 | knowledge_id: "general-knowledge-base" 53 | - channel_id: "C0987654321" 54 | channel_name: "dev-team" 55 | knowledge_id: "dev-knowledge-base" 56 | regex_patterns: 57 | - pattern: "^sales-.*-internal$" 58 | knowledge_id: "sales-knowledge-base" 59 | auto_join: true 60 | - pattern: "^dev-.*" 61 | knowledge_id: "dev-knowledge-base" 62 | auto_join: true 63 | - pattern: "^support-.*" 64 | knowledge_id: "support-knowledge-base" 65 | auto_join: false 66 | days_to_fetch: 30 67 | maintain_history: false 68 | message_limit: 1000 69 | include_threads: true 70 | include_reactions: false 71 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build test clean docker-build docker-build-multi docker-build-multi-local docker-build-amd64 docker-build-arm64 docker-build-local docker-push deploy undeploy setup-buildx show-platforms 2 | 3 | # Build the application 4 | build: 5 | go build -o connector . 6 | 7 | # Run tests 8 | test: 9 | go test ./... 10 | 11 | # Clean build artifacts 12 | clean: 13 | rm -f connector 14 | 15 | # Build Docker image (single platform) 16 | docker-build: 17 | docker build -t openwebui-content-sync:latest . 18 | 19 | # Build multi-architecture Docker image (builds manifest, no local load) 20 | docker-build-multi-local: 21 | docker buildx build --platform linux/amd64,linux/arm64 -t openwebui-content-sync:latest . 22 | 23 | # Build multi-architecture Docker image and push to registry 24 | docker-build-multi: 25 | docker buildx build --platform linux/amd64,linux/arm64 -t castaiphil/openwebui-content-sync:latest --push . 26 | 27 | # Build for specific platform and load locally (useful for testing) 28 | docker-build-amd64: 29 | docker buildx build --platform linux/amd64 -t openwebui-content-sync:amd64 --load . 30 | 31 | docker-build-arm64: 32 | docker buildx build --platform linux/arm64 -t openwebui-content-sync:arm64 --load . 33 | 34 | # Build for current platform and load locally (useful for testing) 35 | docker-build-local: 36 | docker buildx build --platform linux/amd64,linux/arm64 -t openwebui-content-sync:latest --load --builder desktop-linux . 37 | 38 | # Push Docker image (update registry as needed) 39 | docker-push: 40 | docker push openwebui-content-sync:latest 41 | 42 | # Deploy to Kubernetes 43 | deploy: 44 | kubectl apply -f k8s/ 45 | 46 | # Undeploy from Kubernetes 47 | undeploy: 48 | kubectl delete -f k8s/ 49 | 50 | # Run locally with config 51 | run: 52 | ./content -config config.yaml 53 | 54 | # Run tests with coverage 55 | test-coverage: 56 | go test -coverprofile=coverage.out ./... 57 | go tool cover -html=coverage.out -o coverage.html 58 | 59 | # Format code 60 | fmt: 61 | go fmt ./... 62 | 63 | # Lint code 64 | lint: 65 | golangci-lint run 66 | 67 | # Install dependencies 68 | deps: 69 | go mod download 70 | go mod tidy 71 | 72 | # Setup Docker buildx for multi-arch builds 73 | setup-buildx: 74 | docker buildx create --name multiarch --driver docker-container --use 75 | docker buildx inspect --bootstrap 76 | 77 | # Show available platforms 78 | show-platforms: 79 | docker buildx inspect 80 | -------------------------------------------------------------------------------- /k8s/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: openwebui-content-sync 5 | labels: 6 | app: openwebui-content-sync 7 | spec: 8 | replicas: 1 9 | selector: 10 | matchLabels: 11 | app: openwebui-content-sync 12 | template: 13 | metadata: 14 | labels: 15 | app: openwebui-content-sync 16 | spec: 17 | containers: 18 | - name: connector 19 | image: castaiphil/openwebui-content-sync:latest 20 | imagePullPolicy: Always 21 | env: 22 | - name: OPENWEBUI_API_KEY 23 | valueFrom: 24 | secretKeyRef: 25 | name: openwebui-secrets 26 | key: api-key 27 | - name: GITHUB_TOKEN 28 | valueFrom: 29 | secretKeyRef: 30 | name: github-secrets 31 | key: token 32 | - name: CONFLUENCE_API_KEY 33 | valueFrom: 34 | secretKeyRef: 35 | name: confluence-secrets 36 | key: api-key 37 | - name: SLACK_TOKEN 38 | valueFrom: 39 | secretKeyRef: 40 | name: slack-secrets 41 | key: token 42 | - name: STORAGE_PATH 43 | value: "/data" 44 | - name: LOG_LEVEL 45 | value: "info" 46 | volumeMounts: 47 | - name: data-storage 48 | mountPath: /data 49 | - name: config 50 | mountPath: /root/config.yaml 51 | subPath: config.yaml 52 | resources: 53 | requests: 54 | memory: "128Mi" 55 | cpu: "100m" 56 | limits: 57 | memory: "512Mi" 58 | cpu: "500m" 59 | livenessProbe: 60 | httpGet: 61 | path: /health 62 | port: 8080 63 | initialDelaySeconds: 30 64 | periodSeconds: 10 65 | timeoutSeconds: 5 66 | failureThreshold: 3 67 | readinessProbe: 68 | httpGet: 69 | path: /ready 70 | port: 8080 71 | initialDelaySeconds: 5 72 | periodSeconds: 5 73 | timeoutSeconds: 3 74 | failureThreshold: 3 75 | volumes: 76 | - name: data-storage 77 | persistentVolumeClaim: 78 | claimName: connector-data-pvc 79 | - name: config 80 | configMap: 81 | name: connector-config 82 | - name: confluence-secrets 83 | secret: 84 | secretName: confluence-secrets 85 | restartPolicy: Always 86 | -------------------------------------------------------------------------------- /internal/adapter/jira_comment.go: -------------------------------------------------------------------------------- 1 | package adapter 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "net/http" 8 | 9 | "github.com/sirupsen/logrus" 10 | ) 11 | 12 | // CommentData holds the extracted comment data we want 13 | type CommentData struct { 14 | RenderedBody string `json:"renderedBody"` 15 | AuthorName string `json:"authorName"` 16 | Created string `json:"created"` 17 | } 18 | 19 | // fetchComment fetches a single comment by URL and returns only the renderedBody and author displayName 20 | func (j *JiraAdapter) fetchComment(ctx context.Context, commentURL string) (*CommentData, error) { 21 | // Build URL for individual comment fetch 22 | url := commentURL 23 | url += "?expand=renderedBody&name&fields=summary,description,parent,issuetype,reporter,status" 24 | 25 | req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 26 | if err != nil { 27 | return nil, fmt.Errorf("failed to create request: %w", err) 28 | } 29 | 30 | // Set authentication 31 | req.SetBasicAuth(j.config.Username, j.config.APIKey) 32 | req.Header.Set("Accept", "application/json") 33 | 34 | logrus.Debugf("Jira comment API URL: %s", url) 35 | 36 | resp, err := j.client.Do(req) 37 | if err != nil { 38 | return nil, fmt.Errorf("failed to make request: %w", err) 39 | } 40 | 41 | if resp.StatusCode != http.StatusOK { 42 | resp.Body.Close() 43 | return nil, fmt.Errorf("API request failed with status %d: response body omitted", resp.StatusCode) 44 | } 45 | 46 | var comment struct { 47 | Self string `json:"self"` 48 | ID string `json:"id"` 49 | Author JiraUser `json:"author"` 50 | RenderedBody string `json:"renderedBody"` 51 | UpdateAuthor JiraUser `json:"updateAuthor"` 52 | Created string `json:"created"` 53 | Updated string `json:"updated"` 54 | JsdPublic bool `json:"jsdPublic"` 55 | } 56 | 57 | if err := json.NewDecoder(resp.Body).Decode(&comment); err != nil { 58 | resp.Body.Close() 59 | return nil, fmt.Errorf("failed to decode response: %w", err) 60 | } 61 | resp.Body.Close() 62 | 63 | return &CommentData{ 64 | RenderedBody: comment.RenderedBody, 65 | AuthorName: comment.Author.DisplayName, 66 | }, nil 67 | } 68 | 69 | // fetchCommentsForIssue fetches all comments for a specific issue and returns only the renderedBody and author displayName 70 | func (j *JiraAdapter) fetchCommentsForIssue(ctx context.Context, issueID string) ([]CommentData, error) { 71 | var comments []CommentData 72 | 73 | // First fetch the issue to get the comments 74 | issue, err := j.fetchIssue(ctx, issueID) 75 | if err != nil { 76 | return nil, fmt.Errorf("failed to fetch issue %s: %w", issueID, err) 77 | } 78 | 79 | // Extract comments from the issue 80 | for _, comment := range issue.Fields.Comment.Comments { 81 | // Extract rendered body from the comment's body field 82 | fetchedComment, err2 := j.fetchComment(ctx, comment.Self) 83 | if err2 != nil { 84 | return comments, fmt.Errorf("failed to Fetch Comment %w", err) 85 | } 86 | 87 | renderedBody := j.HtmlToMarkdown(fetchedComment.RenderedBody) 88 | logrus.Debugf("FetchedComment: %s,renderedBody %s ", fetchedComment, renderedBody) 89 | comments = append(comments, CommentData{ 90 | RenderedBody: j.HtmlToMarkdown(renderedBody), 91 | AuthorName: comment.Author.DisplayName, 92 | Created: comment.Created, 93 | }) 94 | } 95 | 96 | return comments, nil 97 | } 98 | -------------------------------------------------------------------------------- /internal/scheduler/scheduler_simple_test.go: -------------------------------------------------------------------------------- 1 | package scheduler 2 | 3 | import ( 4 | "context" 5 | "sync" 6 | "testing" 7 | "time" 8 | 9 | "github.com/openwebui-content-sync/internal/adapter" 10 | "github.com/openwebui-content-sync/internal/mocks" 11 | ) 12 | 13 | // MockSyncManager is a simple mock for testing 14 | type MockSyncManager struct{} 15 | 16 | func (m *MockSyncManager) SyncFiles(ctx context.Context, adapters []adapter.Adapter) error { 17 | return nil 18 | } 19 | 20 | func (m *MockSyncManager) SetKnowledgeID(knowledgeID string) { 21 | // Mock implementation 22 | } 23 | 24 | func (m *MockSyncManager) InitializeFileIndex(ctx context.Context, adapters []adapter.Adapter) error { 25 | // Mock implementation 26 | return nil 27 | } 28 | 29 | func TestNew(t *testing.T) { 30 | interval := 1 * time.Hour 31 | adapters := []adapter.Adapter{} 32 | syncManager := &MockSyncManager{} 33 | 34 | scheduler := New(interval, adapters, syncManager) 35 | if scheduler == nil { 36 | t.Fatal("Expected scheduler to be created") 37 | } 38 | if scheduler.interval != interval { 39 | t.Errorf("Expected interval %v, got %v", interval, scheduler.interval) 40 | } 41 | if len(scheduler.adapters) != len(adapters) { 42 | t.Errorf("Expected %d adapters, got %d", len(adapters), len(scheduler.adapters)) 43 | } 44 | } 45 | 46 | func TestScheduler_RunSync(t *testing.T) { 47 | // Create mock sync manager 48 | syncManager := &MockSyncManager{} 49 | 50 | // Create mock adapters 51 | adapters := []adapter.Adapter{ 52 | &mocks.MockAdapter{}, 53 | &mocks.MockAdapter{}, 54 | } 55 | 56 | scheduler := New(1*time.Hour, adapters, syncManager) 57 | 58 | // Test RunSyncWithContext 59 | ctx := context.Background() 60 | err := scheduler.RunSyncWithContext(ctx) 61 | if err != nil { 62 | t.Errorf("RunSyncWithContext failed: %v", err) 63 | } 64 | } 65 | 66 | func TestScheduler_Start(t *testing.T) { 67 | // Create mock sync manager 68 | syncManager := &MockSyncManager{} 69 | 70 | // Create mock adapters 71 | adapters := []adapter.Adapter{ 72 | &mocks.MockAdapter{}, 73 | } 74 | 75 | scheduler := New(100*time.Millisecond, adapters, syncManager) 76 | 77 | ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) 78 | defer cancel() 79 | 80 | // Start scheduler in goroutine 81 | var wg sync.WaitGroup 82 | wg.Add(1) 83 | go func() { 84 | defer wg.Done() 85 | scheduler.Start(ctx) 86 | }() 87 | 88 | // Wait for context to be cancelled 89 | <-ctx.Done() 90 | wg.Wait() 91 | } 92 | 93 | func TestScheduler_Interval(t *testing.T) { 94 | interval := 2 * time.Hour 95 | scheduler := New(interval, []adapter.Adapter{}, &MockSyncManager{}) 96 | 97 | if scheduler.interval != interval { 98 | t.Errorf("Expected interval %v, got %v", interval, scheduler.interval) 99 | } 100 | } 101 | 102 | func TestScheduler_Adapters(t *testing.T) { 103 | adapters := []adapter.Adapter{ 104 | &mocks.MockAdapter{}, 105 | &mocks.MockAdapter{}, 106 | } 107 | scheduler := New(1*time.Hour, adapters, &MockSyncManager{}) 108 | 109 | if len(scheduler.adapters) != len(adapters) { 110 | t.Errorf("Expected %d adapters, got %d", len(adapters), len(scheduler.adapters)) 111 | } 112 | 113 | for i, expected := range adapters { 114 | if scheduler.adapters[i] != expected { 115 | t.Errorf("Expected adapter %d to be %v, got %v", i, expected, scheduler.adapters[i]) 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /internal/adapter/github_test.go: -------------------------------------------------------------------------------- 1 | package adapter 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | "time" 7 | 8 | "github.com/openwebui-content-sync/internal/config" 9 | ) 10 | 11 | func TestGitHubAdapter_Name(t *testing.T) { 12 | adapter := &GitHubAdapter{} 13 | if adapter.Name() != "github" { 14 | t.Errorf("Expected name 'github', got '%s'", adapter.Name()) 15 | } 16 | } 17 | 18 | func TestGitHubAdapter_GetSetLastSync(t *testing.T) { 19 | adapter := &GitHubAdapter{} 20 | now := time.Now() 21 | 22 | adapter.SetLastSync(now) 23 | if !adapter.GetLastSync().Equal(now) { 24 | t.Errorf("Expected last sync time %v, got %v", now, adapter.GetLastSync()) 25 | } 26 | } 27 | 28 | func TestNewGitHubAdapter(t *testing.T) { 29 | tests := []struct { 30 | name string 31 | config config.GitHubConfig 32 | expectError bool 33 | }{ 34 | { 35 | name: "valid config", 36 | config: config.GitHubConfig{ 37 | Token: "test-token", 38 | Mappings: []config.RepositoryMapping{ 39 | {Repository: "owner/repo", KnowledgeID: "knowledge-id"}, 40 | }, 41 | }, 42 | expectError: false, 43 | }, 44 | { 45 | name: "missing token", 46 | config: config.GitHubConfig{ 47 | Token: "", 48 | Mappings: []config.RepositoryMapping{ 49 | {Repository: "owner/repo", KnowledgeID: "knowledge-id"}, 50 | }, 51 | }, 52 | expectError: true, 53 | }, 54 | { 55 | name: "no mappings", 56 | config: config.GitHubConfig{ 57 | Token: "test-token", 58 | Mappings: []config.RepositoryMapping{}, 59 | }, 60 | expectError: true, 61 | }, 62 | { 63 | name: "invalid repository format", 64 | config: config.GitHubConfig{ 65 | Token: "test-token", 66 | Mappings: []config.RepositoryMapping{ 67 | {Repository: "invalid-repo", KnowledgeID: "knowledge-id"}, 68 | }, 69 | }, 70 | expectError: false, // This will fail later during fetch 71 | }, 72 | } 73 | 74 | for _, tt := range tests { 75 | t.Run(tt.name, func(t *testing.T) { 76 | adapter, err := NewGitHubAdapter(tt.config) 77 | if tt.expectError { 78 | if err == nil { 79 | t.Errorf("Expected error but got none") 80 | } 81 | return 82 | } 83 | if err != nil { 84 | t.Errorf("Unexpected error: %v", err) 85 | return 86 | } 87 | if adapter == nil { 88 | t.Errorf("Expected adapter but got nil") 89 | } 90 | }) 91 | } 92 | } 93 | 94 | func TestIsTextFile(t *testing.T) { 95 | tests := []struct { 96 | filename string 97 | expected bool 98 | }{ 99 | {"test.md", true}, 100 | {"test.txt", true}, 101 | {"test.go", true}, 102 | {"test.py", true}, 103 | {"test.js", true}, 104 | {"test.ts", true}, 105 | {"test.json", true}, 106 | {"test.yaml", true}, 107 | {"test.yml", true}, 108 | {"test.xml", true}, 109 | {"test.html", true}, 110 | {"test.css", true}, 111 | {"test.sh", true}, 112 | {"test.dockerfile", true}, 113 | {"test.gitignore", true}, 114 | {"test.env", true}, 115 | {"test.png", false}, 116 | {"test.jpg", false}, 117 | {"test.jpeg", false}, 118 | {"test.gif", false}, 119 | {"test.exe", false}, 120 | {"test.dll", false}, 121 | {"test.so", false}, 122 | {"test.dylib", false}, 123 | {"test", true}, // No extension should be considered text 124 | {"test.TXT", true}, // Case insensitive 125 | {"test.MD", true}, // Case insensitive 126 | } 127 | 128 | for _, test := range tests { 129 | t.Run(test.filename, func(t *testing.T) { 130 | result := isTextFile(test.filename) 131 | if result != test.expected { 132 | t.Errorf("isTextFile(%s) = %v, expected %v", test.filename, result, test.expected) 133 | } 134 | }) 135 | } 136 | } 137 | 138 | func TestGitHubAdapter_FetchFiles(t *testing.T) { 139 | // This test would require mocking the GitHub API 140 | // For now, we'll test the error cases 141 | config := config.GitHubConfig{ 142 | Token: "invalid-token", 143 | Mappings: []config.RepositoryMapping{ 144 | {Repository: "nonexistent/owner", KnowledgeID: "knowledge-id"}, 145 | }, 146 | } 147 | 148 | adapter, err := NewGitHubAdapter(config) 149 | if err != nil { 150 | t.Fatalf("Failed to create adapter: %v", err) 151 | } 152 | 153 | ctx := context.Background() 154 | _, err = adapter.FetchFiles(ctx) 155 | if err == nil { 156 | t.Errorf("Expected error for invalid repository, got none") 157 | } 158 | } 159 | 160 | func TestFile_String(t *testing.T) { 161 | file := &File{ 162 | Path: "test.md", 163 | Hash: "abc123", 164 | Size: 100, 165 | Source: "github", 166 | Modified: time.Now(), 167 | } 168 | 169 | // Test that File struct can be created and accessed 170 | if file.Path != "test.md" { 171 | t.Errorf("Expected path 'test.md', got '%s'", file.Path) 172 | } 173 | if file.Hash != "abc123" { 174 | t.Errorf("Expected hash 'abc123', got '%s'", file.Hash) 175 | } 176 | if file.Size != 100 { 177 | t.Errorf("Expected size 100, got %d", file.Size) 178 | } 179 | if file.Source != "github" { 180 | t.Errorf("Expected source 'github', got '%s'", file.Source) 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /internal/utils/retry.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "math" 7 | "math/rand" 8 | "net" 9 | "strings" 10 | "time" 11 | 12 | "github.com/sirupsen/logrus" 13 | ) 14 | 15 | // RetryConfig holds configuration for retry logic 16 | type RetryConfig struct { 17 | MaxRetries int // Maximum number of retries 18 | BaseDelay time.Duration // Base delay between retries 19 | MaxDelay time.Duration // Maximum delay between retries 20 | Multiplier float64 // Exponential backoff multiplier 21 | } 22 | 23 | // DefaultRetryConfig returns a sensible default retry configuration 24 | func DefaultRetryConfig() RetryConfig { 25 | return RetryConfig{ 26 | MaxRetries: 3, 27 | BaseDelay: time.Second, 28 | MaxDelay: time.Minute, 29 | Multiplier: 2.0, 30 | } 31 | } 32 | 33 | // IsRetryableError checks if an error is retryable 34 | func IsRetryableError(err error) bool { 35 | if err == nil { 36 | return false 37 | } 38 | 39 | // Check for network errors 40 | if netErr, ok := err.(net.Error); ok { 41 | return netErr.Temporary() || netErr.Timeout() 42 | } 43 | 44 | // Check for specific error messages that indicate retryable conditions 45 | errStr := err.Error() 46 | retryableErrors := []string{ 47 | "timeout", 48 | "connection refused", 49 | "connection reset", 50 | "temporary failure", 51 | "rate limit", 52 | "too many requests", 53 | "service unavailable", 54 | "internal server error", 55 | "bad gateway", 56 | "gateway timeout", 57 | "network is unreachable", 58 | "slack rate limit", 59 | "rate_limited", 60 | "ratelimited", 61 | "429", 62 | "too_many_requests", 63 | } 64 | 65 | // Check for permanent errors that should NOT be retried 66 | permanentErrors := []string{ 67 | "is_archived", 68 | "not_in_channel", 69 | "channel_not_found", 70 | "cant_invite_self", 71 | "invalid_auth", 72 | "account_inactive", 73 | "token_revoked", 74 | } 75 | 76 | for _, permanentErr := range permanentErrors { 77 | if strings.Contains(strings.ToLower(errStr), permanentErr) { 78 | return false // Don't retry permanent errors 79 | } 80 | } 81 | 82 | for _, retryableErr := range retryableErrors { 83 | if strings.Contains(strings.ToLower(errStr), retryableErr) { 84 | return true 85 | } 86 | } 87 | 88 | return false 89 | } 90 | 91 | // GetRetryDelay calculates the appropriate delay for retrying based on error type 92 | func GetRetryDelay(err error, attempt int, baseDelay time.Duration) time.Duration { 93 | if err == nil { 94 | return baseDelay 95 | } 96 | 97 | errStr := strings.ToLower(err.Error()) 98 | 99 | // Slack rate limiting - use longer delays 100 | if strings.Contains(errStr, "rate limit") || strings.Contains(errStr, "429") || 101 | strings.Contains(errStr, "too_many_requests") || strings.Contains(errStr, "ratelimited") { 102 | // Slack typically requires longer waits for rate limits 103 | delay := time.Duration(attempt) * 5 * time.Second 104 | if delay > 5*time.Minute { 105 | delay = 5 * time.Minute 106 | } 107 | return delay 108 | } 109 | 110 | // Network errors - moderate delays 111 | if strings.Contains(errStr, "timeout") || strings.Contains(errStr, "connection") { 112 | delay := time.Duration(attempt) * 2 * time.Second 113 | if delay > 30*time.Second { 114 | delay = 30 * time.Second 115 | } 116 | return delay 117 | } 118 | 119 | // Default exponential backoff 120 | delay := time.Duration(float64(baseDelay) * math.Pow(2, float64(attempt))) 121 | if delay > 2*time.Minute { 122 | delay = 2 * time.Minute 123 | } 124 | return delay 125 | } 126 | 127 | // RetryWithBackoff executes a function with exponential backoff retry logic 128 | func RetryWithBackoff(ctx context.Context, config RetryConfig, operation func() error) error { 129 | var lastErr error 130 | 131 | for attempt := 0; attempt <= config.MaxRetries; attempt++ { 132 | if attempt > 0 { 133 | // Calculate delay based on error type and attempt number 134 | delay := GetRetryDelay(lastErr, attempt-1, config.BaseDelay) 135 | if delay > config.MaxDelay { 136 | delay = config.MaxDelay 137 | } 138 | 139 | // Add jitter to prevent thundering herd 140 | jitter := time.Duration(rand.Float64() * float64(delay) * 0.1) 141 | delay += jitter 142 | 143 | logrus.Debugf("Retry attempt %d/%d after %v (last error: %v)", 144 | attempt+1, config.MaxRetries+1, delay, lastErr) 145 | 146 | select { 147 | case <-ctx.Done(): 148 | return ctx.Err() 149 | case <-time.After(delay): 150 | } 151 | } 152 | 153 | err := operation() 154 | if err == nil { 155 | if attempt > 0 { 156 | logrus.Debugf("Operation succeeded on attempt %d", attempt+1) 157 | } 158 | return nil 159 | } 160 | 161 | lastErr = err 162 | 163 | // Check if error is retryable 164 | if !IsRetryableError(err) { 165 | logrus.Debugf("Error is not retryable: %v", err) 166 | return err 167 | } 168 | 169 | if attempt == config.MaxRetries { 170 | logrus.Warnf("Max retries (%d) exceeded, giving up. Last error: %v", config.MaxRetries, err) 171 | break 172 | } 173 | 174 | logrus.Debugf("Attempt %d failed with retryable error: %v", attempt+1, err) 175 | } 176 | 177 | return fmt.Errorf("operation failed after %d retries: %w", config.MaxRetries+1, lastErr) 178 | } 179 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | // OpenWebUI Content Sync 2 | // Copyright (C) 2025 OpenWebUI Content Sync Contributors 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | package main 18 | 19 | import ( 20 | "context" 21 | "flag" 22 | "os" 23 | "os/signal" 24 | "syscall" 25 | "time" 26 | 27 | "github.com/openwebui-content-sync/internal/adapter" 28 | "github.com/openwebui-content-sync/internal/config" 29 | "github.com/openwebui-content-sync/internal/health" 30 | "github.com/openwebui-content-sync/internal/scheduler" 31 | "github.com/openwebui-content-sync/internal/sync" 32 | "github.com/sirupsen/logrus" 33 | ) 34 | 35 | func main() { 36 | var configPath = flag.String("config", "config.yaml", "Path to configuration file") 37 | flag.Parse() 38 | 39 | // Load configuration 40 | cfg, err := config.Load(*configPath) 41 | if err != nil { 42 | logrus.Fatalf("Failed to load configuration: %v", err) 43 | } 44 | 45 | // Set log level 46 | level, err := logrus.ParseLevel(cfg.LogLevel) 47 | if err != nil { 48 | logrus.Fatalf("Invalid log level: %v", err) 49 | } 50 | logrus.SetLevel(level) 51 | 52 | logrus.Info("Starting OpenWebUI Content Sync") 53 | 54 | // Initialize adapters 55 | adapters := make([]adapter.Adapter, 0) 56 | 57 | // Add GitHub adapter if configured 58 | if cfg.GitHub.Enabled { 59 | githubAdapter, err := adapter.NewGitHubAdapter(cfg.GitHub) 60 | if err != nil { 61 | logrus.Fatalf("Failed to create GitHub adapter: %v", err) 62 | } 63 | adapters = append(adapters, githubAdapter) 64 | } 65 | 66 | // Add Confluence adapter if configured 67 | if cfg.Confluence.Enabled { 68 | confluenceAdapter, err := adapter.NewConfluenceAdapter(cfg.Confluence) 69 | if err != nil { 70 | logrus.Fatalf("Failed to create Confluence adapter: %v", err) 71 | } 72 | adapters = append(adapters, confluenceAdapter) 73 | } 74 | 75 | // Add Local Folders adapter if configured 76 | if cfg.LocalFolders.Enabled { 77 | localAdapter, err := adapter.NewLocalFolderAdapter(cfg.LocalFolders) 78 | if err != nil { 79 | logrus.Fatalf("Failed to create Local Folders adapter: %v", err) 80 | } 81 | adapters = append(adapters, localAdapter) 82 | } 83 | 84 | // Add Slack adapter if configured 85 | if cfg.Slack.Enabled { 86 | slackAdapter, err := adapter.NewSlackAdapter(cfg.Slack, cfg.Storage.Path) 87 | if err != nil { 88 | logrus.Fatalf("Failed to create Slack adapter: %v", err) 89 | } 90 | adapters = append(adapters, slackAdapter) 91 | } 92 | // Add Jira adapter if configured 93 | if cfg.Jira.Enabled { 94 | jiraAdapter, err := adapter.NewJiraAdapter(cfg.Jira) 95 | if err != nil { 96 | logrus.Fatalf("Failed to create Jira adapter: %v", err) 97 | } 98 | adapters = append(adapters, jiraAdapter) 99 | } 100 | 101 | // Initialize sync manager 102 | syncManager, err := sync.NewManager(cfg.OpenWebUI, cfg.Storage) 103 | if err != nil { 104 | logrus.Fatalf("Failed to create sync manager: %v", err) 105 | } 106 | 107 | // Note: With the mapping system, individual files will have their own knowledge IDs 108 | logrus.Infof("Using mapping-based knowledge ID assignment - files will use their individual knowledge IDs from mappings") 109 | 110 | // Initialize scheduler 111 | sched := scheduler.New(cfg.Schedule.Interval, adapters, syncManager) 112 | 113 | // Start health check server 114 | healthServer := health.NewServer(8080) 115 | go func() { 116 | if err := healthServer.Start(); err != nil { 117 | logrus.Errorf("Health server error: %v", err) 118 | } 119 | }() 120 | 121 | // Create context for graceful shutdown 122 | ctx, cancel := context.WithCancel(context.Background()) 123 | defer cancel() 124 | 125 | // Start scheduler 126 | go sched.Start(ctx) 127 | 128 | // Wait for interrupt signal 129 | sigChan := make(chan os.Signal, 1) 130 | signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) 131 | 132 | // Initialize file index from OpenWebUI 133 | logrus.Info("Initializing file index from OpenWebUI...") 134 | if err := syncManager.InitializeFileIndex(ctx, adapters); err != nil { 135 | logrus.Errorf("Failed to initialize file index: %v", err) 136 | // Continue even if initialization fails 137 | } 138 | 139 | // Run initial sync 140 | logrus.Info("Running initial sync...") 141 | if err := sched.RunSyncWithContext(ctx); err != nil { 142 | logrus.Errorf("Initial sync failed: %v", err) 143 | } 144 | 145 | // Wait for shutdown signal 146 | <-sigChan 147 | logrus.Info("Shutting down gracefully... (press CTRL+C again to force)") 148 | cancel() 149 | 150 | // Create a channel for forced shutdown 151 | forceChan := make(chan os.Signal, 1) 152 | signal.Notify(forceChan, syscall.SIGINT, syscall.SIGTERM) 153 | 154 | // Stop health server with timeout 155 | healthCtx, healthCancel := context.WithTimeout(context.Background(), 5*time.Second) 156 | defer healthCancel() 157 | 158 | // Run shutdown in a goroutine so we can detect double CTRL+C 159 | shutdownDone := make(chan bool, 1) 160 | go func() { 161 | healthServer.Stop(healthCtx) 162 | // Give some time for graceful shutdown 163 | time.Sleep(5 * time.Second) 164 | shutdownDone <- true 165 | }() 166 | 167 | // Wait for either shutdown completion or forced termination 168 | select { 169 | case <-shutdownDone: 170 | logrus.Info("Graceful shutdown completed") 171 | case <-forceChan: 172 | logrus.Warn("Force shutdown requested, exiting immediately") 173 | os.Exit(1) 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /internal/adapter/confluence_test.go: -------------------------------------------------------------------------------- 1 | package adapter 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/openwebui-content-sync/internal/config" 8 | ) 9 | 10 | func TestNewConfluenceAdapter(t *testing.T) { 11 | tests := []struct { 12 | name string 13 | config config.ConfluenceConfig 14 | wantErr bool 15 | }{ 16 | { 17 | name: "valid config", 18 | config: config.ConfluenceConfig{ 19 | BaseURL: "https://test.atlassian.net", 20 | Username: "test@example.com", 21 | APIKey: "test-key", 22 | SpaceMappings: []config.SpaceMapping{ 23 | {SpaceKey: "TEST", KnowledgeID: "knowledge-id"}, 24 | }, 25 | }, 26 | wantErr: false, 27 | }, 28 | { 29 | name: "missing base URL", 30 | config: config.ConfluenceConfig{ 31 | Username: "test@example.com", 32 | APIKey: "test-key", 33 | SpaceMappings: []config.SpaceMapping{ 34 | {SpaceKey: "TEST", KnowledgeID: "knowledge-id"}, 35 | }, 36 | }, 37 | wantErr: true, 38 | }, 39 | { 40 | name: "missing username", 41 | config: config.ConfluenceConfig{ 42 | BaseURL: "https://test.atlassian.net", 43 | APIKey: "test-key", 44 | SpaceMappings: []config.SpaceMapping{ 45 | {SpaceKey: "TEST", KnowledgeID: "knowledge-id"}, 46 | }, 47 | }, 48 | wantErr: true, 49 | }, 50 | { 51 | name: "missing API key", 52 | config: config.ConfluenceConfig{ 53 | BaseURL: "https://test.atlassian.net", 54 | Username: "test@example.com", 55 | SpaceMappings: []config.SpaceMapping{ 56 | {SpaceKey: "TEST", KnowledgeID: "knowledge-id"}, 57 | }, 58 | }, 59 | wantErr: true, 60 | }, 61 | { 62 | name: "missing mappings", 63 | config: config.ConfluenceConfig{ 64 | BaseURL: "https://test.atlassian.net", 65 | Username: "test@example.com", 66 | APIKey: "test-key", 67 | SpaceMappings: []config.SpaceMapping{}, 68 | }, 69 | wantErr: true, 70 | }, 71 | } 72 | 73 | for _, tt := range tests { 74 | t.Run(tt.name, func(t *testing.T) { 75 | adapter, err := NewConfluenceAdapter(tt.config) 76 | if (err != nil) != tt.wantErr { 77 | t.Errorf("NewConfluenceAdapter() error = %v, wantErr %v", err, tt.wantErr) 78 | return 79 | } 80 | if !tt.wantErr && adapter == nil { 81 | t.Error("NewConfluenceAdapter() returned nil adapter when no error expected") 82 | } 83 | }) 84 | } 85 | } 86 | 87 | func TestConfluenceAdapter_Name(t *testing.T) { 88 | config := config.ConfluenceConfig{ 89 | BaseURL: "https://test.atlassian.net", 90 | Username: "test@example.com", 91 | APIKey: "test-key", 92 | SpaceMappings: []config.SpaceMapping{ 93 | {SpaceKey: "TEST", KnowledgeID: "knowledge-id"}, 94 | }, 95 | } 96 | 97 | adapter, err := NewConfluenceAdapter(config) 98 | if err != nil { 99 | t.Fatalf("NewConfluenceAdapter() error = %v", err) 100 | } 101 | 102 | if adapter.Name() != "confluence" { 103 | t.Errorf("Name() = %v, want %v", adapter.Name(), "confluence") 104 | } 105 | } 106 | 107 | func TestConfluenceAdapter_GetSetLastSync(t *testing.T) { 108 | config := config.ConfluenceConfig{ 109 | BaseURL: "https://test.atlassian.net", 110 | Username: "test@example.com", 111 | APIKey: "test-key", 112 | SpaceMappings: []config.SpaceMapping{ 113 | {SpaceKey: "TEST", KnowledgeID: "knowledge-id"}, 114 | }, 115 | } 116 | 117 | adapter, err := NewConfluenceAdapter(config) 118 | if err != nil { 119 | t.Fatalf("NewConfluenceAdapter() error = %v", err) 120 | } 121 | 122 | // Test initial last sync 123 | initialSync := adapter.GetLastSync() 124 | if initialSync.IsZero() { 125 | t.Error("GetLastSync() returned zero time") 126 | } 127 | 128 | // Test setting last sync 129 | newTime := time.Now() 130 | adapter.SetLastSync(newTime) 131 | if !adapter.GetLastSync().Equal(newTime) { 132 | t.Errorf("SetLastSync() did not update last sync time") 133 | } 134 | } 135 | 136 | func TestSanitizeFilename(t *testing.T) { 137 | adapter := &ConfluenceAdapter{} 138 | 139 | tests := []struct { 140 | input string 141 | expected string 142 | }{ 143 | {"normal-file.txt", "normal-file.txt"}, 144 | {"file/with/slashes.txt", "file_with_slashes.txt"}, 145 | {"file:with:colons.txt", "file_with_colons.txt"}, 146 | {"file*with*asterisks.txt", "file_with_asterisks.txt"}, 147 | {"file?with?questions.txt", "file_with_questions.txt"}, 148 | {"file\"with\"quotes.txt", "file_with_quotes.txt"}, 149 | {"filebrackets.txt", "file_with_brackets.txt"}, 150 | {"file|with|pipes.txt", "file_with_pipes.txt"}, 151 | {"very-long-filename-that-should-be-truncated-because-it-exceeds-the-maximum-length-limit-of-one-hundred-characters.txt", "very-long-filename-that-should-be-truncated-because-it-exceeds-the-maximum-length-limit-of-one-hundr"}, 152 | } 153 | 154 | for _, tt := range tests { 155 | t.Run(tt.input, func(t *testing.T) { 156 | result := adapter.SanitizeFilename(tt.input) 157 | if result != tt.expected { 158 | t.Errorf("SanitizeFilename(%q) = %q, want %q", tt.input, result, tt.expected) 159 | } 160 | }) 161 | } 162 | } 163 | 164 | func TestHtmlToText(t *testing.T) { 165 | adapter := &ConfluenceAdapter{} 166 | 167 | tests := []struct { 168 | input string 169 | expected string 170 | }{ 171 | {"

Hello world

", "Hello world"}, 172 | {"
Line 1
Line 2
", "Line 1\nLine 2"}, 173 | {"

Paragraph 1

Paragraph 2

", "Paragraph 1\n\nParagraph 2"}, 174 | {"

Title

Content

", "Title\nContent"}, 175 | {"Bold text", "Bold text"}, 176 | {"Italic text", "Italic text"}, 177 | {"", ""}, 178 | {"Plain text without HTML", "Plain text without HTML"}, 179 | } 180 | 181 | for _, tt := range tests { 182 | t.Run(tt.input, func(t *testing.T) { 183 | result := adapter.HtmlToText(tt.input) 184 | if result != tt.expected { 185 | t.Errorf("HtmlToText(%q) = %q, want %q", tt.input, result, tt.expected) 186 | } 187 | }) 188 | } 189 | } 190 | 191 | // Note: FetchFiles test would require mocking HTTP requests 192 | // This would be more complex and would typically use a library like httptest 193 | // or a mocking framework like gomock 194 | -------------------------------------------------------------------------------- /adapter_readme/GITHUB_ADAPTER.md: -------------------------------------------------------------------------------- 1 | # GitHub Adapter 2 | 3 | The GitHub adapter allows you to sync content from GitHub repositories into OpenWebUI knowledge bases. It supports multiple repositories and can map each repository to a different knowledge base. 4 | 5 | ## Features 6 | 7 | - **Multi-repository support**: Sync from multiple GitHub repositories 8 | - **Knowledge base mapping**: Map each repository to a specific OpenWebUI knowledge base 9 | - **Incremental sync**: Only fetches files that have changed since the last sync 10 | - **File filtering**: Automatically filters out binary files and common non-content files 11 | - **Authentication**: Uses GitHub personal access tokens for secure API access 12 | 13 | ## Configuration 14 | 15 | ### Environment Variables 16 | 17 | Set the following environment variable: 18 | 19 | ```bash 20 | export GITHUB_TOKEN="your-github-personal-access-token" 21 | ``` 22 | 23 | ### Configuration File 24 | 25 | Add the following section to your `config.yaml`: 26 | 27 | ```yaml 28 | github: 29 | enabled: true 30 | token: "" # Set via GITHUB_TOKEN environment variable 31 | mappings: 32 | - repository: "owner/repo-name" # GitHub repository in format "owner/repo" 33 | knowledge_id: "repo-knowledge-base" 34 | - repository: "another-owner/another-repo" 35 | knowledge_id: "another-knowledge-base" 36 | ``` 37 | 38 | ### Configuration Options 39 | 40 | | Option | Type | Required | Default | Description | 41 | |--------|------|----------|---------|-------------| 42 | | `enabled` | boolean | Yes | `false` | Enable/disable the GitHub adapter | 43 | | `token` | string | Yes | - | GitHub personal access token (set via `GITHUB_TOKEN` env var) | 44 | | `mappings` | array | Yes | `[]` | List of repository mappings | 45 | 46 | ### Repository Mapping 47 | 48 | Each mapping in the `mappings` array should contain: 49 | 50 | | Field | Type | Required | Description | 51 | |-------|------|----------|-------------| 52 | | `repository` | string | Yes | GitHub repository in format "owner/repo" | 53 | | `knowledge_id` | string | Yes | Target OpenWebUI knowledge base ID | 54 | 55 | ## GitHub Token Setup 56 | 57 | 1. Go to GitHub Settings → Developer settings → Personal access tokens → Tokens (classic) 58 | 2. Click "Generate new token (classic)" 59 | 3. Give it a descriptive name (e.g., "OpenWebUI Content Sync") 60 | 4. Select the following scopes: 61 | - `repo` (Full control of private repositories) 62 | - `public_repo` (Access public repositories) 63 | 5. Click "Generate token" 64 | 6. Copy the token and set it as the `GITHUB_TOKEN` environment variable 65 | 66 | ## File Processing 67 | 68 | The GitHub adapter processes files as follows: 69 | 70 | ### Supported File Types 71 | 72 | - **Markdown files** (`.md`, `.markdown`) 73 | - **Text files** (`.txt`, `.text`) 74 | - **Documentation files** (`.rst`, `.adoc`) 75 | - **Code files** (`.py`, `.js`, `.ts`, `.go`, `.java`, `.cpp`, `.c`, `.h`, `.hpp`) 76 | - **Configuration files** (`.yaml`, `.yml`, `.json`, `.toml`, `.ini`, `.cfg`) 77 | - **Shell scripts** (`.sh`, `.bash`, `.zsh`) 78 | 79 | ### Excluded Files 80 | 81 | The adapter automatically excludes: 82 | - Binary files (images, videos, executables, etc.) 83 | - Common non-content files (`.gitignore`, `.gitattributes`, etc.) 84 | - Large files (> 1MB) 85 | - Files in common exclusion directories (`.git/`, `node_modules/`, `vendor/`, etc.) 86 | 87 | ### File Path Structure 88 | 89 | Files are stored with paths that include the repository name: 90 | ``` 91 | github/owner-repo-name/path/to/file.md 92 | ``` 93 | 94 | ## Sync Behavior 95 | 96 | - **Initial sync**: Fetches all files from configured repositories 97 | - **Incremental sync**: Only fetches files modified since the last successful sync 98 | - **Error handling**: If a repository fails to sync, other repositories continue processing 99 | - **Rate limiting**: Respects GitHub API rate limits with automatic backoff 100 | 101 | ## Troubleshooting 102 | 103 | ### Common Issues 104 | 105 | 1. **Authentication errors** 106 | - Verify your GitHub token is valid and has the correct permissions 107 | - Check that the token hasn't expired 108 | 109 | 2. **Repository not found** 110 | - Verify the repository name format is correct: "owner/repo" 111 | - Ensure the token has access to the repository (for private repos) 112 | 113 | 3. **Rate limit exceeded** 114 | - The adapter automatically handles rate limits with exponential backoff 115 | - Consider reducing the sync frequency if this occurs frequently 116 | 117 | 4. **Empty knowledge base** 118 | - Check that the repository contains supported file types 119 | - Verify the repository has content in the root directory or subdirectories 120 | 121 | ### Debug Logging 122 | 123 | Enable debug logging to see detailed sync information: 124 | 125 | ```yaml 126 | log_level: debug 127 | ``` 128 | 129 | This will show: 130 | - Which repositories are being processed 131 | - File discovery and filtering details 132 | - API request/response information 133 | - Sync progress and timing 134 | 135 | ## Example Configuration 136 | 137 | ```yaml 138 | # Complete example configuration 139 | log_level: info 140 | schedule: 141 | interval: 1h 142 | 143 | storage: 144 | path: "/data" 145 | 146 | openwebui: 147 | base_url: "http://localhost:8080" 148 | api_key: "your-openwebui-api-key" 149 | 150 | github: 151 | enabled: true 152 | token: "" # Set via GITHUB_TOKEN environment variable 153 | mappings: 154 | - repository: "microsoft/vscode" 155 | knowledge_id: "vscode-docs" 156 | - repository: "kubernetes/kubernetes" 157 | knowledge_id: "k8s-docs" 158 | - repository: "your-org/private-repo" 159 | knowledge_id: "private-docs" 160 | ``` 161 | 162 | ## Security Considerations 163 | 164 | - **Token security**: Store your GitHub token securely and never commit it to version control 165 | - **Repository access**: Only grant access to repositories that contain appropriate content 166 | - **Content filtering**: Review the content being synced to ensure it's appropriate for your knowledge base 167 | - **Rate limits**: Be mindful of GitHub API rate limits, especially with large repositories 168 | 169 | ## Performance Tips 170 | 171 | - **Repository size**: Large repositories with many files may take longer to sync 172 | - **Sync frequency**: Balance sync frequency with API rate limits 173 | - **File filtering**: The adapter automatically filters out unnecessary files to improve performance 174 | - **Incremental sync**: Only changed files are processed after the initial sync 175 | -------------------------------------------------------------------------------- /internal/health/health_test.go: -------------------------------------------------------------------------------- 1 | package health 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "net/http" 8 | "net/http/httptest" 9 | "testing" 10 | "time" 11 | ) 12 | 13 | func TestNewServer(t *testing.T) { 14 | server := NewServer(8080) 15 | if server == nil { 16 | t.Fatal("Expected server to be created") 17 | } 18 | if server.server == nil { 19 | t.Fatal("Expected HTTP server to be created") 20 | } 21 | if server.server.Addr != ":8080" { 22 | t.Errorf("Expected server address ':8080', got '%s'", server.server.Addr) 23 | } 24 | } 25 | 26 | func TestServer_healthHandler(t *testing.T) { 27 | server := NewServer(8080) 28 | 29 | req := httptest.NewRequest("GET", "/health", nil) 30 | w := httptest.NewRecorder() 31 | 32 | server.healthHandler(w, req) 33 | 34 | if w.Code != http.StatusOK { 35 | t.Errorf("Expected status code %d, got %d", http.StatusOK, w.Code) 36 | } 37 | 38 | var response HealthResponse 39 | err := json.NewDecoder(w.Body).Decode(&response) 40 | if err != nil { 41 | t.Fatalf("Failed to decode response: %v", err) 42 | } 43 | 44 | if response.Status != "healthy" { 45 | t.Errorf("Expected status 'healthy', got '%s'", response.Status) 46 | } 47 | if response.Version != "1.0.0" { 48 | t.Errorf("Expected version '1.0.0', got '%s'", response.Version) 49 | } 50 | if response.Timestamp.IsZero() { 51 | t.Error("Expected timestamp to be set") 52 | } 53 | } 54 | 55 | func TestServer_readyHandler(t *testing.T) { 56 | server := NewServer(8080) 57 | 58 | req := httptest.NewRequest("GET", "/ready", nil) 59 | w := httptest.NewRecorder() 60 | 61 | server.readyHandler(w, req) 62 | 63 | if w.Code != http.StatusOK { 64 | t.Errorf("Expected status code %d, got %d", http.StatusOK, w.Code) 65 | } 66 | 67 | var response HealthResponse 68 | err := json.NewDecoder(w.Body).Decode(&response) 69 | if err != nil { 70 | t.Fatalf("Failed to decode response: %v", err) 71 | } 72 | 73 | if response.Status != "ready" { 74 | t.Errorf("Expected status 'ready', got '%s'", response.Status) 75 | } 76 | if response.Version != "1.0.0" { 77 | t.Errorf("Expected version '1.0.0', got '%s'", response.Version) 78 | } 79 | if response.Timestamp.IsZero() { 80 | t.Error("Expected timestamp to be set") 81 | } 82 | } 83 | 84 | func TestServer_Start(t *testing.T) { 85 | server := NewServer(8080) // Use port 0 for random port 86 | 87 | // Start server in goroutine 88 | go func() { 89 | err := server.Start() 90 | if err != nil && err != http.ErrServerClosed { 91 | t.Errorf("Server start error: %v", err) 92 | } 93 | }() 94 | 95 | // Give server time to start 96 | time.Sleep(10 * time.Millisecond) 97 | 98 | // Test health endpoint 99 | resp, err := http.Get("http://" + server.server.Addr + "/health") 100 | if err != nil { 101 | t.Fatalf("Failed to make health request: %v", err) 102 | } 103 | defer resp.Body.Close() 104 | 105 | if resp.StatusCode != http.StatusOK { 106 | t.Errorf("Expected status code %d, got %d", http.StatusOK, resp.StatusCode) 107 | } 108 | 109 | // Test ready endpoint 110 | resp, err = http.Get("http://" + server.server.Addr + "/ready") 111 | if err != nil { 112 | t.Fatalf("Failed to make ready request: %v", err) 113 | } 114 | defer resp.Body.Close() 115 | 116 | if resp.StatusCode != http.StatusOK { 117 | t.Errorf("Expected status code %d, got %d", http.StatusOK, resp.StatusCode) 118 | } 119 | 120 | // Stop server 121 | ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) 122 | defer cancel() 123 | server.Stop(ctx) 124 | } 125 | 126 | func TestServer_Stop(t *testing.T) { 127 | server := NewServer(0) 128 | 129 | // Start server in goroutine 130 | go func() { 131 | server.Start() 132 | }() 133 | 134 | // Give server time to start 135 | time.Sleep(10 * time.Millisecond) 136 | 137 | // Stop server 138 | ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) 139 | defer cancel() 140 | 141 | err := server.Stop(ctx) 142 | if err != nil { 143 | t.Errorf("Failed to stop server: %v", err) 144 | } 145 | } 146 | 147 | func TestHealthResponse_JSON(t *testing.T) { 148 | response := HealthResponse{ 149 | Status: "healthy", 150 | Timestamp: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC), 151 | Version: "1.0.0", 152 | } 153 | 154 | // Test JSON marshaling 155 | data, err := json.Marshal(response) 156 | if err != nil { 157 | t.Fatalf("Failed to marshal response: %v", err) 158 | } 159 | 160 | // Test JSON unmarshaling 161 | var unmarshaled HealthResponse 162 | err = json.Unmarshal(data, &unmarshaled) 163 | if err != nil { 164 | t.Fatalf("Failed to unmarshal response: %v", err) 165 | } 166 | 167 | if unmarshaled.Status != response.Status { 168 | t.Errorf("Expected status %s, got %s", response.Status, unmarshaled.Status) 169 | } 170 | if unmarshaled.Version != response.Version { 171 | t.Errorf("Expected version %s, got %s", response.Version, unmarshaled.Version) 172 | } 173 | if !unmarshaled.Timestamp.Equal(response.Timestamp) { 174 | t.Errorf("Expected timestamp %v, got %v", response.Timestamp, unmarshaled.Timestamp) 175 | } 176 | } 177 | 178 | func TestServer_DifferentPorts(t *testing.T) { 179 | ports := []int{8080, 8081, 9000, 0} 180 | 181 | for _, port := range ports { 182 | server := NewServer(port) 183 | if server == nil { 184 | t.Fatalf("Failed to create server on port %d", port) 185 | } 186 | 187 | expectedAddr := ":" + fmt.Sprintf("%d", port) 188 | if port != 0 && server.server.Addr != expectedAddr { 189 | t.Errorf("Expected address %s, got %s", expectedAddr, server.server.Addr) 190 | } 191 | } 192 | } 193 | 194 | func TestServer_ConcurrentRequests(t *testing.T) { 195 | server := NewServer(8080) 196 | 197 | // Start server 198 | go func() { 199 | server.Start() 200 | }() 201 | defer func() { 202 | ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) 203 | defer cancel() 204 | server.Stop(ctx) 205 | }() 206 | 207 | // Give server time to start 208 | time.Sleep(10 * time.Millisecond) 209 | 210 | // Make concurrent requests 211 | done := make(chan bool, 10) 212 | for i := 0; i < 10; i++ { 213 | go func() { 214 | defer func() { done <- true }() 215 | 216 | resp, err := http.Get("http://" + server.server.Addr + "/health") 217 | if err != nil { 218 | t.Errorf("Failed to make health request: %v", err) 219 | return 220 | } 221 | defer resp.Body.Close() 222 | 223 | if resp.StatusCode != http.StatusOK { 224 | t.Errorf("Expected status code %d, got %d", http.StatusOK, resp.StatusCode) 225 | } 226 | }() 227 | } 228 | 229 | // Wait for all requests to complete 230 | for i := 0; i < 10; i++ { 231 | <-done 232 | } 233 | } 234 | -------------------------------------------------------------------------------- /adapter_readme/JIRA_ADAPTER.md: -------------------------------------------------------------------------------- 1 | # Jira Adapter 2 | 3 | The Jira adapter allows you to sync content from Atlassian Jira projects into OpenWebUI knowledge bases. This adapter uses the Jira REST API to fetch issues and comments from specified Jira projects and uploads them to OpenWebUI. 4 | 5 | ## API Compatibility 6 | 7 | This adapter uses Jira REST API v3, which provides: 8 | - Modern cursor-based pagination 9 | - Improved performance and reliability 10 | - Better support for large projects 11 | - Enhanced metadata and content structure 12 | 13 | ## Features 14 | 15 | - **Issue Content Sync**: Fetches all issues from specified Jira projects using Jira API v3 16 | - **HTML to Markdown Conversion**: Converts Jira's HTML content to markdown format 17 | - **Comment Support**: downloads and syncs issue comments 18 | - **Multi-Project Support**: Can sync from multiple Jira projects 19 | - **Cursor-based Pagination**: Uses modern cursor-based pagination for efficient data retrieval 20 | 21 | ## Configuration 22 | 23 | ### YAML Configuration 24 | 25 | Add the following to your `config.yaml`: 26 | 27 | ```yaml 28 | jira: 29 | enabled: true 30 | base_url: "https://your-domain.atlassian.net" 31 | username: "your-email@example.com" 32 | api_key: "your-jira-api-key" 33 | project_mappings: 34 | - project_key: "PROJ" 35 | knowledge_id: "your-knowledge-base-id" 36 | - project_key: "ANOTHER" 37 | knowledge_id: "another-knowledge-base-id" 38 | page_limit: 100 39 | ``` 40 | 41 | ### Environment Variables 42 | 43 | Only the API key can be configured via environment variable (for security): 44 | 45 | ```bash 46 | JIRA_API_KEY="your-jira-api-key" 47 | ``` 48 | 49 | All other configuration should be done in the `config.yaml` file. 50 | 51 | ### Kubernetes Configuration 52 | 53 | #### ConfigMap 54 | 55 | ```yaml 56 | apiVersion: v1 57 | kind: ConfigMap 58 | metadata: 59 | name: connector-config 60 | data: 61 | config.yaml: | 62 | jira: 63 | enabled: true 64 | base_url: "https://your-domain.atlassian.net" 65 | username: "your-email@example.com" 66 | project_mappings: 67 | - project_key: "PROJ" 68 | knowledge_id: "your-knowledge-base-id" 69 | - project_key: "ANOTHER" 70 | knowledge_id: "another-knowledge-base-id" 71 | page_limit: 100 72 | ``` 73 | 74 | ## Authentication 75 | 76 | The Jira adapter uses Basic Authentication with your Jira username and API key. To get an API key: 77 | 78 | 1. Go to [Atlassian Account Settings](https://id.atlassian.com/manage-profile/security/api-tokens) 79 | 2. Click "Create API token" 80 | 3. Give it a label and copy the generated token 81 | 4. Use your email address as the username and the token as the API key 82 | 83 | ## Configuration Parameters 84 | 85 | | Parameter | Type | Required | Default | Description | 86 | |-----------|------|----------|---------|-------------| 87 | | `enabled` | boolean | No | `false` | Enable the Jira adapter | 88 | | `base_url` | string | Yes | - | Your Jira instance URL (e.g., `https://your-domain.atlassian.net`) | 89 | | `username` | string | Yes | - | Your Jira username (usually your email) | 90 | | `api_key` | string | Yes | - | Your Jira API key | 91 | | `project_mappings` | array | Yes | - | List of Jira project keys and their corresponding OpenWebUI knowledge base IDs | 92 | | `page_limit` | integer | No | `100` | Maximum number of issues to fetch per project | 93 | 94 | ## File Processing 95 | 96 | ### Issue Content 97 | 98 | - Jira issues are converted from HTML to markdown format 99 | - Issues are saved as `.md` files with sanitized filenames 100 | - File paths follow the pattern: `{issue-id}.md` 101 | 102 | ### Issue Metadata 103 | 104 | Each issue file includes: 105 | - Issue key 106 | - Reporter name 107 | - Issue type 108 | - Status 109 | - Resolution status 110 | 111 | ### Comments 112 | 113 | - Comments are fetched and included in the markdown file 114 | - Each comment includes the author's display name and timestamp 115 | - Comments are formatted in markdown 116 | 117 | ## Error Handling 118 | 119 | - **Authentication Errors**: Invalid credentials will cause the adapter to fail initialization 120 | - **API Errors**: HTTP errors from Jira API are logged and may cause individual issue processing to fail 121 | - **File Processing Errors**: Individual file processing errors are logged but don't stop the overall sync 122 | - **Network Errors**: Connection timeouts and network issues are handled gracefully 123 | 124 | ## Limitations 125 | 126 | 1. **API Rate Limits**: Jira has API rate limits that may affect sync performance 127 | 2. **Large Projects**: Very large projects with many issues may take significant time to sync 128 | 3. **HTML Conversion**: The HTML to markdown conversion is basic and may not preserve all formatting 129 | 4. **Comment Limitations**: Comments are limited to the Jira API's available fields 130 | 131 | ## Troubleshooting 132 | 133 | ### Common Issues 134 | 135 | 1. **Authentication Failed** 136 | - Verify your username and API key are correct 137 | - Ensure your API key has the necessary permissions 138 | 139 | 2. **Project Not Found** 140 | - Check that the project key is correct 141 | - Verify you have access to the project 142 | 143 | 3. **No Content Synced** 144 | - Check that the project contains issues 145 | - Verify the `page_limit` setting is appropriate 146 | - Check logs for API errors 147 | 148 | ### Debug Mode 149 | 150 | Enable debug logging to see detailed information about the sync process: 151 | 152 | ```yaml 153 | log_level: debug 154 | ``` 155 | 156 | ## Example Usage 157 | 158 | ### Basic Configuration 159 | 160 | ```yaml 161 | jira: 162 | enabled: true 163 | base_url: "https://mycompany.atlassian.net" 164 | username: "john.doe@mycompany.com" 165 | api_key: "ATATT3xFfGF0..." 166 | project_mappings: 167 | - project_key: "DOCS" 168 | knowledge_id: "fbc18bc4-72c1-40f0-84b1-52055368c583" 169 | - project_key: "PROJ" 170 | knowledge_id: "a1b2c3d4-e5f6-7890-abcd-ef1234567890" 171 | page_limit: 100 172 | ``` 173 | 174 | ### Advanced Configuration 175 | 176 | ```yaml 177 | jira: 178 | enabled: true 179 | base_url: "https://mycompany.atlassian.net" 180 | username: "john.doe@mycompany.com" 181 | api_key: "ATATT3xFfGF0..." 182 | project_mappings: 183 | - project_key: "DOCS" 184 | knowledge_id: "fbc18bc4-72c1-40f0-84b1-52055368c583" 185 | - project_key: "PROJ" 186 | knowledge_id: "a1b2c3d4-e5f6-7890-abcd-ef1234567890" 187 | - project_key: "OPS" 188 | knowledge_id: "98765432-10fe-dcba-0987-6543210fedcb" 189 | page_limit: 500 190 | ``` 191 | 192 | This configuration will sync up to 500 issues from each of the three specified projects. 193 | -------------------------------------------------------------------------------- /ARCHITECTURE.md: -------------------------------------------------------------------------------- 1 | # OpenWebUI GitHub Connector - Architecture 2 | 3 | ## Overview 4 | 5 | The OpenWebUI GitHub Connector is a Kubernetes-native application that synchronizes files from GitHub repositories to OpenWebUI knowledge bases using an adapter architecture pattern. 6 | 7 | ## Architecture Components 8 | 9 | ### 1. Adapter Layer 10 | - **Interface**: `adapter.Adapter` defines the contract for data source adapters 11 | - **GitHub Adapter**: Implements GitHub API integration for repository file fetching 12 | - **Extensible**: Easy to add new adapters (GitLab, Bitbucket, etc.) 13 | 14 | ### 2. Sync Manager 15 | - **File Diffing**: Uses SHA256 hashing to detect file changes 16 | - **Local Storage**: Maintains files on persistent volumes 17 | - **OpenWebUI Integration**: Handles file uploads and knowledge base association 18 | 19 | ### 3. Scheduler 20 | - **Cron-based**: Uses robfig/cron for scheduled synchronization 21 | - **Configurable**: Supports various interval patterns (1h, 2h, etc.) 22 | - **Graceful Shutdown**: Properly handles termination signals 23 | 24 | ### 4. Configuration Management 25 | - **YAML-based**: Primary configuration via YAML files 26 | - **Environment Override**: Environment variables override file settings 27 | - **Kubernetes Integration**: ConfigMaps and Secrets support 28 | 29 | ### 5. Health Monitoring 30 | - **HTTP Endpoints**: `/health` and `/ready` for Kubernetes probes 31 | - **Structured Logging**: JSON-formatted logs with configurable levels 32 | - **Error Handling**: Comprehensive error handling and recovery 33 | 34 | ## Data Flow 35 | 36 | ``` 37 | GitHub Repository → GitHub Adapter → Sync Manager → OpenWebUI API 38 | ↓ 39 | Local Storage (PVC) 40 | ``` 41 | 42 | ### Detailed Flow: 43 | 44 | 1. **Scheduler Trigger**: Cron job triggers sync process 45 | 2. **Adapter Fetch**: GitHub adapter fetches repository files 46 | 3. **File Processing**: Files are filtered (text files only) and hashed 47 | 4. **Change Detection**: Compare hashes with previously synced files 48 | 5. **Local Storage**: Save files to persistent volume 49 | 6. **OpenWebUI Upload**: Upload new/changed files to OpenWebUI 50 | 7. **Knowledge Association**: Add files to specified knowledge base 51 | 8. **Index Update**: Update local file index for future comparisons 52 | 53 | ## API Integration 54 | 55 | ### OpenWebUI APIs Used: 56 | - `POST /api/v1/files/` - Upload files 57 | - `GET /api/v1/knowledge/` - List knowledge sources 58 | - `POST /api/v1/knowledge/{id}/file/add` - Add file to knowledge 59 | - `POST /api/v1/knowledge/{id}/file/remove` - Remove file from knowledge 60 | 61 | ### GitHub APIs Used: 62 | - `GET /repos/{owner}/{repo}/contents` - Fetch repository contents 63 | - File content retrieval via GitHub's content API 64 | 65 | ## File Processing 66 | 67 | ### Supported File Types: 68 | - Markdown (`.md`) 69 | - Text files (`.txt`) 70 | - Code files (`.go`, `.py`, `.js`, `.ts`, etc.) 71 | - Configuration files (`.yaml`, `.json`, `.env`) 72 | - Documentation files (`.rst`, `.adoc`) 73 | - And many more text-based formats 74 | 75 | ### File Filtering: 76 | - Binary files are automatically excluded 77 | - Large files are handled via GitHub's download URLs 78 | - File size limits can be configured 79 | 80 | ## Storage Strategy 81 | 82 | ### Local Storage: 83 | - **Persistent Volume**: Kubernetes PVC for data persistence 84 | - **File Organization**: Files organized by source and path 85 | - **Index Management**: JSON-based file index for change tracking 86 | 87 | ### File Index Structure: 88 | ```json 89 | { 90 | "source:path": { 91 | "path": "file.md", 92 | "hash": "sha256_hash", 93 | "file_id": "openwebui_file_id", 94 | "source": "github", 95 | "synced_at": "2024-01-01T00:00:00Z", 96 | "modified": "2024-01-01T00:00:00Z" 97 | } 98 | } 99 | ``` 100 | 101 | ## Error Handling 102 | 103 | ### Retry Logic: 104 | - Network failures are retried with exponential backoff 105 | - GitHub API rate limits are respected 106 | - OpenWebUI API failures are logged and retried 107 | 108 | ### Recovery: 109 | - Application can recover from crashes 110 | - File index is persisted and restored 111 | - Partial syncs are resumed on restart 112 | 113 | ## Security Considerations 114 | 115 | ### Authentication: 116 | - GitHub Personal Access Tokens 117 | - OpenWebUI API Keys 118 | - Kubernetes Secrets for credential management 119 | 120 | ### Network Security: 121 | - HTTPS for all external API calls 122 | - Configurable timeouts and retry limits 123 | - No sensitive data in logs 124 | 125 | ## Monitoring and Observability 126 | 127 | ### Logging: 128 | - Structured JSON logging 129 | - Configurable log levels 130 | - Request/response logging for debugging 131 | 132 | ### Health Checks: 133 | - Liveness probe: `/health` 134 | - Readiness probe: `/ready` 135 | - Kubernetes-native health monitoring 136 | 137 | ### Metrics: 138 | - Sync operation counts 139 | - File processing statistics 140 | - Error rates and types 141 | 142 | ## Scalability 143 | 144 | ### Horizontal Scaling: 145 | - Stateless design allows multiple replicas 146 | - File index can be shared via external storage 147 | - Adapter instances can be distributed 148 | 149 | ### Vertical Scaling: 150 | - Configurable resource limits 151 | - Memory usage scales with repository size 152 | - CPU usage scales with sync frequency 153 | 154 | ## Deployment 155 | 156 | ### Kubernetes Manifests: 157 | - Deployment with health checks 158 | - PersistentVolumeClaim for storage 159 | - ConfigMap for configuration 160 | - Secrets for credentials 161 | 162 | ### Docker: 163 | - Multi-stage build for minimal image size 164 | - Alpine Linux base image 165 | - Non-root user for security 166 | 167 | ## Configuration 168 | 169 | ### Environment Variables: 170 | - `OPENWEBUI_BASE_URL`: OpenWebUI instance URL 171 | - `OPENWEBUI_API_KEY`: API authentication 172 | - `GITHUB_TOKEN`: GitHub authentication 173 | - `GITHUB_KNOWLEDGE_ID`: Target knowledge base 174 | - `STORAGE_PATH`: Local storage path 175 | - `LOG_LEVEL`: Logging verbosity 176 | 177 | ### Configuration File: 178 | ```yaml 179 | log_level: info 180 | schedule: 181 | interval: 1h 182 | storage: 183 | path: /data 184 | openwebui: 185 | base_url: "http://localhost:8080" 186 | api_key: "" 187 | github: 188 | enabled: true 189 | token: "" 190 | repositories: 191 | - "owner/repo1" 192 | - "owner/repo2" 193 | knowledge_id: "" 194 | ``` 195 | 196 | ## Future Enhancements 197 | 198 | ### Planned Features: 199 | - Additional adapters (GitLab, Bitbucket) 200 | - Webhook-based real-time sync 201 | - File content transformation 202 | - Advanced filtering rules 203 | - Sync status dashboard 204 | - Metrics and alerting 205 | 206 | ### Extensibility: 207 | - Plugin architecture for custom adapters 208 | - Custom file processors 209 | - Integration with CI/CD pipelines 210 | - Multi-tenant support 211 | -------------------------------------------------------------------------------- /config.example.yaml: -------------------------------------------------------------------------------- 1 | # Example configuration for OpenWebUI GitHub Connector 2 | # Copy this file to config.yaml and update the values 3 | 4 | log_level: info 5 | 6 | # Sync schedule configuration 7 | schedule: 8 | interval: 1h # Options: 30m, 1h, 2h, 6h, 12h, 24h 9 | 10 | # Local storage configuration 11 | storage: 12 | path: /data # Path where files will be stored locally 13 | 14 | # OpenWebUI API configuration 15 | openwebui: 16 | base_url: "http://localhost:8080" # OpenWebUI instance URL 17 | api_key: "" # Set via OPENWEBUI_API_KEY environment variable 18 | 19 | # GitHub adapter configuration 20 | github: 21 | enabled: true 22 | token: "" # Set via GITHUB_TOKEN environment variable 23 | mappings: 24 | - repository: "owner/repo1" 25 | knowledge_id: "knowledge-base-1" 26 | - repository: "owner/repo2" 27 | knowledge_id: "knowledge-base-2" 28 | - repository: "microsoft/vscode" 29 | knowledge_id: "vscode-knowledge-base" 30 | 31 | # Confluence adapter configuration 32 | confluence: 33 | enabled: false 34 | base_url: "https://your-domain.atlassian.net" # Your Confluence instance URL 35 | username: "your-email@example.com" # Your Confluence username (usually email) 36 | api_key: "" # Set via CONFLUENCE_API_KEY environment variable 37 | 38 | # Space mappings (per-space knowledge IDs) 39 | space_mappings: 40 | - space_key: "SPACEKEY1" 41 | knowledge_id: "space1-knowledge-base" 42 | - space_key: "SPACEKEY2" 43 | knowledge_id: "space2-knowledge-base" 44 | - space_key: "DOCS" 45 | knowledge_id: "docs-knowledge-base" 46 | 47 | # Parent page mappings (per-parent-page knowledge IDs) 48 | parent_page_mappings: 49 | - parent_page_id: "3098214470" 50 | knowledge_id: "parent-page-knowledge-base" 51 | - parent_page_id: "1234567890" 52 | knowledge_id: "another-parent-page-knowledge-base" 53 | 54 | page_limit: 100 # Maximum pages to fetch per space (0 = no limit) 55 | include_attachments: true # Whether to download and sync page attachments 56 | 57 | # Local Folders adapter configuration 58 | local_folders: 59 | enabled: false 60 | mappings: 61 | - folder_path: "/path/to/docs" 62 | knowledge_id: "docs-knowledge-base" 63 | - folder_path: "/path/to/guides" 64 | knowledge_id: "guides-knowledge-base" 65 | - folder_path: "/path/to/notes" 66 | knowledge_id: "notes-knowledge-base" 67 | 68 | # Slack adapter configuration 69 | slack: 70 | enabled: false 71 | token: "" # Set via SLACK_TOKEN environment variable 72 | channel_mappings: 73 | - channel_id: "C1234567890" # Slack channel ID 74 | channel_name: "general" # Channel name for display 75 | knowledge_id: "general-knowledge-base" 76 | - channel_id: "C0987654321" 77 | channel_name: "dev-team" 78 | knowledge_id: "dev-knowledge-base" 79 | - channel_id: "C1122334455" 80 | channel_name: "support" 81 | knowledge_id: "support-knowledge-base" 82 | regex_patterns: 83 | # Auto-discover and join channels matching regex patterns 84 | - pattern: "^sales-.*-internal$" # Matches channels like "sales-team-internal", "sales-west-internal" 85 | knowledge_id: "sales-knowledge-base" 86 | auto_join: true # Automatically join matching channels 87 | - pattern: "^dev-.*" # Matches channels like "dev-frontend", "dev-backend", "dev-ops" 88 | knowledge_id: "dev-knowledge-base" 89 | auto_join: true 90 | - pattern: "^support-.*" # Matches channels like "support-tier1", "support-escalation" 91 | knowledge_id: "support-knowledge-base" 92 | auto_join: false # Discover but don't auto-join (manual approval required) 93 | - pattern: "^alert-.*" # Matches channels like "alert-production", "alert-staging" 94 | knowledge_id: "monitoring-knowledge-base" 95 | auto_join: true 96 | days_to_fetch: 30 # Number of days to fetch messages (default: 30) 97 | maintain_history: false # Whether to maintain indefinite history or age off (default: false) 98 | message_limit: 1000 # Max messages per channel per run (default: 1000) 99 | include_threads: true # Whether to include thread messages (default: true) 100 | include_reactions: false # Whether to include reaction data (default: false) 101 | # Jira adapter configuration 102 | jira: 103 | enabled: false 104 | base_url: "https://your-domain.atlassian.net" # Your Jira instance URL 105 | username: "your-email@example.com" # Your Jira username (usually email) 106 | api_key: "" # Set via JIRA_API_KEY environment variable 107 | page_limit: 100 # Maximum pages to fetch per space (0 = no limit) 108 | 109 | project_mappings: 110 | - project_key: "PROJ" 111 | knowledge_id: "your-knowledge-base-id" 112 | - project_key: "ANOTHER" 113 | knowledge_id: "another-knowledge-base-id" 114 | 115 | # Example configurations for different environments: 116 | 117 | # Development 118 | # log_level: debug 119 | # schedule: 120 | # interval: 5m 121 | # openwebui: 122 | # base_url: "http://localhost:8080" 123 | # api_key: "dev-api-key" 124 | # confluence: 125 | # enabled: true 126 | # base_url: "https://dev-company.atlassian.net" 127 | # username: "dev-user@company.com" 128 | # api_key: "dev-confluence-api-key" 129 | # spaces: 130 | # - "DEV" 131 | # knowledge_id: "dev-knowledge-base-id" 132 | # page_limit: 50 133 | # include_attachments: false 134 | 135 | # Production 136 | # log_level: info 137 | # schedule: 138 | # interval: 1h 139 | # openwebui: 140 | # base_url: "https://openwebui.yourdomain.com" 141 | # api_key: "prod-api-key" 142 | # confluence: 143 | # enabled: true 144 | # base_url: "https://company.atlassian.net" 145 | # username: "sync-bot@company.com" 146 | # api_key: "prod-confluence-api-key" 147 | # spaces: 148 | # - "DOCS" 149 | # - "WIKI" 150 | # - "PROJECTS" 151 | # knowledge_id: "prod-knowledge-base-id" 152 | # page_limit: 500 153 | # include_attachments: true 154 | 155 | # Kubernetes (using environment variables) 156 | # All values should be set via environment variables or ConfigMap 157 | # log_level: info 158 | # schedule: 159 | # interval: 1h 160 | # storage: 161 | # path: /data 162 | # openwebui: 163 | # base_url: "" # Set via OPENWEBUI_BASE_URL 164 | # api_key: "" # Set via OPENWEBUI_API_KEY 165 | # github: 166 | # enabled: true 167 | # token: "" # Set via GITHUB_TOKEN 168 | # repositories: [] # Set via ConfigMap 169 | # knowledge_id: "" # Set via GITHUB_KNOWLEDGE_ID 170 | # confluence: 171 | # enabled: false 172 | # base_url: "" # Set in ConfigMap 173 | # username: "" # Set in ConfigMap 174 | # api_key: "" # Set via CONFLUENCE_API_KEY 175 | # spaces: [] # Set in ConfigMap 176 | # knowledge_id: "" # Set in ConfigMap 177 | # page_limit: 100 178 | # include_attachments: true 179 | -------------------------------------------------------------------------------- /internal/adapter/local_test.go: -------------------------------------------------------------------------------- 1 | package adapter 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | "time" 9 | 10 | "github.com/openwebui-content-sync/internal/config" 11 | ) 12 | 13 | func TestNewLocalFolderAdapter(t *testing.T) { 14 | // Create a temporary directory for testing 15 | tempDir := t.TempDir() 16 | testFile := filepath.Join(tempDir, "test.txt") 17 | err := os.WriteFile(testFile, []byte("test content"), 0644) 18 | if err != nil { 19 | t.Fatalf("Failed to create test file: %v", err) 20 | } 21 | 22 | tests := []struct { 23 | name string 24 | config config.LocalFolderConfig 25 | wantErr bool 26 | }{ 27 | { 28 | name: "valid mapping configuration", 29 | config: config.LocalFolderConfig{ 30 | Enabled: true, 31 | Mappings: []config.LocalFolderMapping{ 32 | {FolderPath: tempDir, KnowledgeID: "test-knowledge"}, 33 | }, 34 | }, 35 | wantErr: false, 36 | }, 37 | { 38 | name: "valid configuration with multiple mappings", 39 | config: func() config.LocalFolderConfig { 40 | // Create subdirectory for the test 41 | subDir := filepath.Join(tempDir, "subdir") 42 | os.MkdirAll(subDir, 0755) 43 | return config.LocalFolderConfig{ 44 | Enabled: true, 45 | Mappings: []config.LocalFolderMapping{ 46 | {FolderPath: tempDir, KnowledgeID: "test-knowledge"}, 47 | {FolderPath: subDir, KnowledgeID: "another-knowledge"}, 48 | }, 49 | } 50 | }(), 51 | wantErr: false, 52 | }, 53 | { 54 | name: "disabled adapter", 55 | config: config.LocalFolderConfig{ 56 | Enabled: false, 57 | }, 58 | wantErr: true, 59 | }, 60 | { 61 | name: "non-existent folder", 62 | config: config.LocalFolderConfig{ 63 | Enabled: true, 64 | Mappings: []config.LocalFolderMapping{ 65 | {FolderPath: "/non/existent/path", KnowledgeID: "test-knowledge"}, 66 | }, 67 | }, 68 | wantErr: true, 69 | }, 70 | { 71 | name: "no folders configured", 72 | config: config.LocalFolderConfig{ 73 | Enabled: true, 74 | }, 75 | wantErr: true, 76 | }, 77 | } 78 | 79 | for _, tt := range tests { 80 | t.Run(tt.name, func(t *testing.T) { 81 | adapter, err := NewLocalFolderAdapter(tt.config) 82 | if (err != nil) != tt.wantErr { 83 | t.Errorf("NewLocalFolderAdapter() error = %v, wantErr %v", err, tt.wantErr) 84 | return 85 | } 86 | if !tt.wantErr && adapter == nil { 87 | t.Error("NewLocalFolderAdapter() returned nil adapter when no error expected") 88 | } 89 | }) 90 | } 91 | } 92 | 93 | func TestLocalFolderAdapter_Name(t *testing.T) { 94 | adapter := &LocalFolderAdapter{} 95 | if got := adapter.Name(); got != "local" { 96 | t.Errorf("Name() = %v, want %v", got, "local") 97 | } 98 | } 99 | 100 | func TestLocalFolderAdapter_GetSetLastSync(t *testing.T) { 101 | adapter := &LocalFolderAdapter{} 102 | 103 | // Test initial last sync 104 | initialSync := adapter.GetLastSync() 105 | if !initialSync.IsZero() { 106 | t.Error("GetLastSync() should return zero time initially") 107 | } 108 | 109 | // Test setting last sync 110 | newTime := time.Now() 111 | adapter.SetLastSync(newTime) 112 | if !adapter.GetLastSync().Equal(newTime) { 113 | t.Errorf("SetLastSync() did not update last sync time") 114 | } 115 | } 116 | 117 | func TestLocalFolderAdapter_FetchFiles(t *testing.T) { 118 | // Create a temporary directory with test files 119 | tempDir := t.TempDir() 120 | 121 | // Create test files 122 | testFiles := map[string]string{ 123 | "test1.txt": "content 1", 124 | "test2.md": "content 2", 125 | "subdir/test3.txt": "content 3", 126 | ".hidden.txt": "hidden content", // Should be ignored 127 | "binary.bin": string([]byte{0, 1, 2, 3, 4}), // Should be ignored 128 | } 129 | 130 | for filename, content := range testFiles { 131 | fullPath := filepath.Join(tempDir, filename) 132 | dir := filepath.Dir(fullPath) 133 | if dir != tempDir { 134 | err := os.MkdirAll(dir, 0755) 135 | if err != nil { 136 | t.Fatalf("Failed to create subdirectory: %v", err) 137 | } 138 | } 139 | err := os.WriteFile(fullPath, []byte(content), 0644) 140 | if err != nil { 141 | t.Fatalf("Failed to create test file %s: %v", filename, err) 142 | } 143 | } 144 | 145 | config := config.LocalFolderConfig{ 146 | Enabled: true, 147 | Mappings: []config.LocalFolderMapping{ 148 | {FolderPath: tempDir, KnowledgeID: "test-knowledge"}, 149 | }, 150 | } 151 | 152 | adapter, err := NewLocalFolderAdapter(config) 153 | if err != nil { 154 | t.Fatalf("NewLocalFolderAdapter() error = %v", err) 155 | } 156 | 157 | ctx := context.Background() 158 | files, err := adapter.FetchFiles(ctx) 159 | if err != nil { 160 | t.Fatalf("FetchFiles() error = %v", err) 161 | } 162 | 163 | // Should find 3 files (test1.txt, test2.md, subdir/test3.txt) 164 | // Hidden and binary files should be ignored 165 | expectedCount := 3 166 | if len(files) != expectedCount { 167 | t.Errorf("FetchFiles() returned %d files, want %d", len(files), expectedCount) 168 | } 169 | 170 | // Check that all files have the correct knowledge ID 171 | for _, file := range files { 172 | if file.KnowledgeID != "test-knowledge" { 173 | t.Errorf("File %s has knowledge ID %s, want %s", file.Path, file.KnowledgeID, "test-knowledge") 174 | } 175 | if file.Source != "local:"+tempDir { 176 | t.Errorf("File %s has source %s, want %s", file.Path, file.Source, "local:"+tempDir) 177 | } 178 | } 179 | } 180 | 181 | func TestLocalFolderAdapter_shouldIgnoreFile(t *testing.T) { 182 | adapter := &LocalFolderAdapter{} 183 | 184 | tests := []struct { 185 | filename string 186 | want bool 187 | }{ 188 | {"test.txt", false}, 189 | {".hidden", true}, 190 | {"node_modules", true}, 191 | {"test.log", true}, 192 | {"Thumbs.db", true}, 193 | {"normal_file.py", false}, 194 | {"__pycache__", true}, 195 | } 196 | 197 | for _, tt := range tests { 198 | t.Run(tt.filename, func(t *testing.T) { 199 | if got := adapter.shouldIgnoreFile(tt.filename); got != tt.want { 200 | t.Errorf("shouldIgnoreFile(%q) = %v, want %v", tt.filename, got, tt.want) 201 | } 202 | }) 203 | } 204 | } 205 | 206 | func TestLocalFolderAdapter_isBinaryFile(t *testing.T) { 207 | adapter := &LocalFolderAdapter{} 208 | 209 | tests := []struct { 210 | name string 211 | content []byte 212 | want bool 213 | }{ 214 | {"empty", []byte{}, false}, 215 | {"text", []byte("hello world"), false}, 216 | {"text with newlines", []byte("hello\nworld\r\n"), false}, 217 | {"binary with null", []byte{0, 1, 2, 3}, true}, 218 | {"high non-printable ratio", make([]byte, 1000), true}, 219 | {"normal text", []byte("This is normal text content"), false}, 220 | } 221 | 222 | for _, tt := range tests { 223 | t.Run(tt.name, func(t *testing.T) { 224 | if got := adapter.isBinaryFile(tt.content); got != tt.want { 225 | t.Errorf("isBinaryFile(%q) = %v, want %v", tt.name, got, tt.want) 226 | } 227 | }) 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/JohannesKaufmann/dom v0.2.0 h1:1bragmEb19K8lHAqgFgqCpiPCFEZMTXzOIEjuxkUfLQ= 2 | github.com/JohannesKaufmann/dom v0.2.0/go.mod h1:57iSUl5RKric4bUkgos4zu6Xt5LMHUnw3TF1l5CbGZo= 3 | github.com/JohannesKaufmann/html-to-markdown/v2 v2.4.0 h1:C0/TerKdQX9Y9pbYi1EsLr5LDNANsqunyI/btpyfCg8= 4 | github.com/JohannesKaufmann/html-to-markdown/v2 v2.4.0/go.mod h1:OLaKh+giepO8j7teevrNwiy/fwf8LXgoc9g7rwaE1jk= 5 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 6 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 7 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 8 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 9 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 10 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= 11 | github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= 12 | github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 13 | github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 14 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 15 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 16 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 17 | github.com/google/go-github/v56 v56.0.0 h1:TysL7dMa/r7wsQi44BjqlwaHvwlFlqkK8CtBWCX3gb4= 18 | github.com/google/go-github/v56 v56.0.0/go.mod h1:D8cdcX98YWJvi7TLo7zM4/h8ZTx6u6fwGEkCdisopo0= 19 | github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= 20 | github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= 21 | github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= 22 | github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= 23 | github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= 24 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 25 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 26 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 27 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 28 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 29 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 30 | github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= 31 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 32 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 33 | github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= 34 | github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= 35 | github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= 36 | github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= 37 | github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= 38 | github.com/sebdah/goldie/v2 v2.7.1 h1:PkBHymaYdtvEkZV7TmyqKxdmn5/Vcj+8TpATWZjnG5E= 39 | github.com/sebdah/goldie/v2 v2.7.1/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI= 40 | github.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw= 41 | github.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= 42 | github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= 43 | github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= 44 | github.com/slack-go/slack v0.17.3 h1:zV5qO3Q+WJAQ/XwbGfNFrRMaJ5T/naqaonyPV/1TP4g= 45 | github.com/slack-go/slack v0.17.3/go.mod h1:X+UqOufi3LYQHDnMG1vxf0J8asC6+WllXrVrhl8/Prk= 46 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 47 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 48 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 49 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 50 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 51 | github.com/yuin/goldmark v1.7.13 h1:GPddIs617DnBLFFVJFgpo1aBfe/4xcvMc3SB5t/D0pA= 52 | github.com/yuin/goldmark v1.7.13/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg= 53 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 54 | golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= 55 | golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= 56 | golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= 57 | golang.org/x/oauth2 v0.15.0 h1:s8pnnxNVzjWyrvYdFUQq5llS1PX2zhPXmccZv99h7uQ= 58 | golang.org/x/oauth2 v0.15.0/go.mod h1:q48ptWNTY5XWf+JNten23lcvHpLJ0ZSxF5ttTHKVCAM= 59 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 60 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 61 | golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= 62 | golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= 63 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 64 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 65 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 66 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 67 | google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= 68 | google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= 69 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= 70 | google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= 71 | google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= 72 | google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= 73 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 74 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 75 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 76 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 77 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 78 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 79 | -------------------------------------------------------------------------------- /internal/sync/manager_simple_test.go: -------------------------------------------------------------------------------- 1 | package sync 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | "time" 9 | 10 | "github.com/openwebui-content-sync/internal/adapter" 11 | "github.com/openwebui-content-sync/internal/config" 12 | "github.com/openwebui-content-sync/internal/mocks" 13 | "github.com/openwebui-content-sync/internal/openwebui" 14 | ) 15 | 16 | func TestNewManager(t *testing.T) { 17 | tempDir := t.TempDir() 18 | defer os.RemoveAll(tempDir) 19 | 20 | openwebuiConfig := config.OpenWebUIConfig{ 21 | BaseURL: "http://localhost:8080", 22 | APIKey: "test-key", 23 | } 24 | storageConfig := config.StorageConfig{ 25 | Path: tempDir, 26 | } 27 | 28 | manager, err := NewManager(openwebuiConfig, storageConfig) 29 | if err != nil { 30 | t.Fatalf("Failed to create manager: %v", err) 31 | } 32 | 33 | if manager == nil { 34 | t.Fatal("Expected manager to be created") 35 | } 36 | if manager.storagePath != tempDir { 37 | t.Errorf("Expected storage path %s, got %s", tempDir, manager.storagePath) 38 | } 39 | } 40 | 41 | func TestManager_SetKnowledgeID(t *testing.T) { 42 | tempDir := t.TempDir() 43 | defer os.RemoveAll(tempDir) 44 | 45 | manager := &Manager{ 46 | storagePath: tempDir, 47 | fileIndex: make(map[string]*FileMetadata), 48 | } 49 | 50 | knowledgeID := "test-knowledge-id" 51 | manager.SetKnowledgeID(knowledgeID) 52 | 53 | if manager.knowledgeID != knowledgeID { 54 | t.Errorf("Expected knowledge ID %s, got %s", knowledgeID, manager.knowledgeID) 55 | } 56 | } 57 | 58 | func TestManager_syncFile_NewFile(t *testing.T) { 59 | tempDir := t.TempDir() 60 | defer os.RemoveAll(tempDir) 61 | 62 | mockClient := &mocks.MockOpenWebUIClient{ 63 | UploadFileFunc: func(ctx context.Context, filename string, content []byte) (*openwebui.File, error) { 64 | return &openwebui.File{ 65 | ID: "mock-file-id", 66 | Filename: filename, 67 | }, nil 68 | }, 69 | } 70 | 71 | manager := &Manager{ 72 | openwebuiClient: mockClient, 73 | storagePath: tempDir, 74 | fileIndex: make(map[string]*FileMetadata), 75 | } 76 | 77 | file := &adapter.File{ 78 | Path: "new-file.md", 79 | Content: []byte("# New File"), 80 | Hash: "test-hash", 81 | Modified: time.Now(), 82 | Size: 10, 83 | Source: "test", 84 | } 85 | 86 | ctx := context.Background() 87 | err := manager.syncFile(ctx, file, "test-source") 88 | if err != nil { 89 | t.Fatalf("Failed to sync file: %v", err) 90 | } 91 | 92 | // Check that file was added to index 93 | fileKey := "new-file.md" // Now using filename as key 94 | if _, exists := manager.fileIndex[fileKey]; !exists { 95 | t.Errorf("Expected file to be added to index") 96 | } 97 | 98 | // Check that file was saved locally 99 | expectedPath := filepath.Join(tempDir, "files", "test-source", "new-file.md") 100 | if _, err := os.Stat(expectedPath); os.IsNotExist(err) { 101 | t.Errorf("Expected file to be saved locally at %s", expectedPath) 102 | } 103 | } 104 | 105 | func TestManager_syncFile_UnchangedFile(t *testing.T) { 106 | tempDir := t.TempDir() 107 | defer os.RemoveAll(tempDir) 108 | 109 | mockClient := &mocks.MockOpenWebUIClient{} 110 | manager := &Manager{ 111 | openwebuiClient: mockClient, 112 | storagePath: tempDir, 113 | fileIndex: make(map[string]*FileMetadata), 114 | } 115 | 116 | // Add file to index first 117 | fileKey := "unchanged-file.md" // Now using filename as key 118 | manager.fileIndex[fileKey] = &FileMetadata{ 119 | Path: "unchanged-file.md", 120 | Hash: "same-hash", 121 | FileID: "existing-file-id", 122 | Source: "test-source", 123 | SyncedAt: time.Now(), 124 | Modified: time.Now(), 125 | } 126 | 127 | file := &adapter.File{ 128 | Path: "unchanged-file.md", 129 | Content: []byte("# Unchanged File"), 130 | Hash: "same-hash", // Same hash as in index 131 | Modified: time.Now(), 132 | Size: 17, 133 | Source: "test", 134 | } 135 | 136 | ctx := context.Background() 137 | err := manager.syncFile(ctx, file, "test-source") 138 | if err != nil { 139 | t.Fatalf("Failed to sync file: %v", err) 140 | } 141 | 142 | // File should not be uploaded again (we can't easily test this without more complex mocking) 143 | // But we can verify the file index wasn't updated with a new file ID 144 | if manager.fileIndex[fileKey].FileID != "existing-file-id" { 145 | t.Errorf("Expected file ID to remain unchanged") 146 | } 147 | } 148 | 149 | func TestManager_saveFileLocally(t *testing.T) { 150 | tempDir := t.TempDir() 151 | defer os.RemoveAll(tempDir) 152 | 153 | manager := &Manager{ 154 | storagePath: tempDir, 155 | } 156 | 157 | filePath := filepath.Join(tempDir, "test", "nested", "file.md") 158 | content := []byte("# Test Content") 159 | 160 | err := manager.saveFileLocally(filePath, content) 161 | if err != nil { 162 | t.Fatalf("Failed to save file locally: %v", err) 163 | } 164 | 165 | if _, err := os.Stat(filePath); os.IsNotExist(err) { 166 | t.Errorf("Expected file to exist at %s", filePath) 167 | } 168 | 169 | // Check content 170 | readContent, err := os.ReadFile(filePath) 171 | if err != nil { 172 | t.Fatalf("Failed to read file: %v", err) 173 | } 174 | if string(readContent) != string(content) { 175 | t.Errorf("Expected content %s, got %s", string(content), string(readContent)) 176 | } 177 | } 178 | 179 | func TestGetFileHash(t *testing.T) { 180 | content := []byte("test content") 181 | // Calculate the actual expected hash 182 | expectedHash := "6ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72" 183 | 184 | hash := GetFileHash(content) 185 | if hash != expectedHash { 186 | t.Errorf("Expected hash %s, got %s", expectedHash, hash) 187 | } 188 | } 189 | 190 | func TestManager_loadFileIndex(t *testing.T) { 191 | tempDir := t.TempDir() 192 | defer os.RemoveAll(tempDir) 193 | 194 | manager := &Manager{ 195 | storagePath: tempDir, 196 | fileIndex: make(map[string]*FileMetadata), 197 | indexPath: filepath.Join(tempDir, "file_index.json"), 198 | } 199 | 200 | // Test loading non-existent index (should not error) 201 | err := manager.loadFileIndex() 202 | if err != nil { 203 | t.Fatalf("Failed to load non-existent index: %v", err) 204 | } 205 | 206 | // Create a test index file 207 | testIndex := map[string]*FileMetadata{ 208 | "file.md": { // Now using filename as key 209 | Path: "file.md", 210 | Hash: "test-hash", 211 | FileID: "test-file-id", 212 | Source: "test", 213 | SyncedAt: time.Now(), 214 | Modified: time.Now(), 215 | }, 216 | } 217 | 218 | // Save test index 219 | manager.fileIndex = testIndex 220 | err = manager.saveFileIndex() 221 | if err != nil { 222 | t.Fatalf("Failed to save test index: %v", err) 223 | } 224 | 225 | // Create new manager and load index 226 | newManager := &Manager{ 227 | storagePath: tempDir, 228 | fileIndex: make(map[string]*FileMetadata), 229 | indexPath: filepath.Join(tempDir, "file_index.json"), 230 | } 231 | 232 | err = newManager.loadFileIndex() 233 | if err != nil { 234 | t.Fatalf("Failed to load index: %v", err) 235 | } 236 | 237 | if len(newManager.fileIndex) != 1 { 238 | t.Errorf("Expected 1 file in index, got %d", len(newManager.fileIndex)) 239 | } 240 | 241 | fileKey := "file.md" // Now using filename as key 242 | if _, exists := newManager.fileIndex[fileKey]; !exists { 243 | t.Errorf("Expected file %s to be in index", fileKey) 244 | } 245 | } 246 | -------------------------------------------------------------------------------- /internal/mocks/mocks.go: -------------------------------------------------------------------------------- 1 | package mocks 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/openwebui-content-sync/internal/adapter" 8 | "github.com/openwebui-content-sync/internal/openwebui" 9 | ) 10 | 11 | // MockOpenWebUIClient is a mock implementation of OpenWebUI client 12 | type MockOpenWebUIClient struct { 13 | UploadFileFunc func(ctx context.Context, filename string, content []byte) (*openwebui.File, error) 14 | GetFileFunc func(ctx context.Context, fileID string) (*openwebui.File, error) 15 | ListKnowledgeFunc func(ctx context.Context) ([]*openwebui.Knowledge, error) 16 | AddFileToKnowledgeFunc func(ctx context.Context, knowledgeID, fileID string) error 17 | RemoveFileFromKnowledgeFunc func(ctx context.Context, knowledgeID, fileID string) error 18 | GetKnowledgeFilesFunc func(ctx context.Context, knowledgeID string) ([]*openwebui.File, error) 19 | DeleteFileFunc func(ctx context.Context, fileID string) error 20 | } 21 | 22 | // UploadFile mocks the UploadFile method 23 | func (m *MockOpenWebUIClient) UploadFile(ctx context.Context, filename string, content []byte) (*openwebui.File, error) { 24 | if m.UploadFileFunc != nil { 25 | return m.UploadFileFunc(ctx, filename, content) 26 | } 27 | return &openwebui.File{ 28 | ID: "mock-file-id", 29 | Filename: filename, 30 | UserID: "test-user", 31 | Hash: "mock-hash", 32 | Data: struct { 33 | Status string `json:"status"` 34 | }{ 35 | Status: "pending", 36 | }, 37 | Meta: struct { 38 | Name string `json:"name"` 39 | ContentType string `json:"content_type"` 40 | Size int64 `json:"size"` 41 | Data map[string]interface{} `json:"data"` 42 | }{ 43 | Name: filename, 44 | ContentType: "text/markdown", 45 | Size: 0, 46 | Data: map[string]interface{}{}, 47 | }, 48 | CreatedAt: time.Now().Unix(), 49 | UpdatedAt: time.Now().Unix(), 50 | Status: true, 51 | Path: "/app/backend/data/uploads/mock-file-id_" + filename, 52 | AccessControl: nil, 53 | }, nil 54 | } 55 | 56 | // GetFile mocks the GetFile method 57 | func (m *MockOpenWebUIClient) GetFile(ctx context.Context, fileID string) (*openwebui.File, error) { 58 | if m.GetFileFunc != nil { 59 | return m.GetFileFunc(ctx, fileID) 60 | } 61 | return &openwebui.File{ 62 | ID: fileID, 63 | Filename: "mock-file.md", 64 | UserID: "test-user", 65 | Hash: "mock-hash", 66 | Data: struct { 67 | Status string `json:"status"` 68 | }{ 69 | Status: "processed", // Default to processed status 70 | }, 71 | Meta: struct { 72 | Name string `json:"name"` 73 | ContentType string `json:"content_type"` 74 | Size int64 `json:"size"` 75 | Data map[string]interface{} `json:"data"` 76 | }{ 77 | Name: "mock-file.md", 78 | ContentType: "text/markdown", 79 | Size: 100, 80 | Data: map[string]interface{}{}, 81 | }, 82 | Status: true, 83 | }, nil 84 | } 85 | 86 | // ListKnowledge mocks the ListKnowledge method 87 | func (m *MockOpenWebUIClient) ListKnowledge(ctx context.Context) ([]*openwebui.Knowledge, error) { 88 | if m.ListKnowledgeFunc != nil { 89 | return m.ListKnowledgeFunc(ctx) 90 | } 91 | return []*openwebui.Knowledge{ 92 | { 93 | ID: "mock-knowledge-id", 94 | UserID: "test-user", 95 | Name: "Test Knowledge", 96 | Description: "Mock knowledge base", 97 | Data: nil, 98 | Meta: nil, 99 | AccessControl: map[string]interface{}{}, 100 | CreatedAt: time.Now().Unix(), 101 | UpdatedAt: time.Now().Unix(), 102 | }, 103 | }, nil 104 | } 105 | 106 | // AddFileToKnowledge mocks the AddFileToKnowledge method 107 | func (m *MockOpenWebUIClient) AddFileToKnowledge(ctx context.Context, knowledgeID, fileID string) error { 108 | if m.AddFileToKnowledgeFunc != nil { 109 | return m.AddFileToKnowledgeFunc(ctx, knowledgeID, fileID) 110 | } 111 | return nil 112 | } 113 | 114 | // RemoveFileFromKnowledge mocks the RemoveFileFromKnowledge method 115 | func (m *MockOpenWebUIClient) RemoveFileFromKnowledge(ctx context.Context, knowledgeID, fileID string) error { 116 | if m.RemoveFileFromKnowledgeFunc != nil { 117 | return m.RemoveFileFromKnowledgeFunc(ctx, knowledgeID, fileID) 118 | } 119 | return nil 120 | } 121 | 122 | // GetKnowledgeFiles mocks the GetKnowledgeFiles method 123 | func (m *MockOpenWebUIClient) GetKnowledgeFiles(ctx context.Context, knowledgeID string) ([]*openwebui.File, error) { 124 | if m.GetKnowledgeFilesFunc != nil { 125 | return m.GetKnowledgeFilesFunc(ctx, knowledgeID) 126 | } 127 | return []*openwebui.File{ 128 | { 129 | ID: "existing-file-1", 130 | Filename: "existing-file-1.md", 131 | UserID: "test-user", 132 | Hash: "existing-hash-1", 133 | Data: struct { 134 | Status string `json:"status"` 135 | }{ 136 | Status: "processed", 137 | }, 138 | Meta: struct { 139 | Name string `json:"name"` 140 | ContentType string `json:"content_type"` 141 | Size int64 `json:"size"` 142 | Data map[string]interface{} `json:"data"` 143 | }{ 144 | Name: "existing-file-1.md", 145 | ContentType: "text/markdown", 146 | Size: 1000, 147 | Data: map[string]interface{}{"source": "github"}, 148 | }, 149 | CreatedAt: time.Now().Unix() - 3600, // 1 hour ago 150 | UpdatedAt: time.Now().Unix() - 1800, // 30 minutes ago 151 | Status: true, 152 | Path: "existing-file-1.md", 153 | AccessControl: nil, 154 | }, 155 | }, nil 156 | } 157 | 158 | // DeleteFile mocks the DeleteFile method 159 | func (m *MockOpenWebUIClient) DeleteFile(ctx context.Context, fileID string) error { 160 | if m.DeleteFileFunc != nil { 161 | return m.DeleteFileFunc(ctx, fileID) 162 | } 163 | return nil 164 | } 165 | 166 | // MockAdapter is a mock implementation of the Adapter interface 167 | type MockAdapter struct { 168 | NameFunc func() string 169 | FetchFilesFunc func(ctx context.Context) ([]*adapter.File, error) 170 | GetLastSyncFunc func() time.Time 171 | SetLastSyncFunc func(t time.Time) 172 | lastSync time.Time 173 | } 174 | 175 | // Name mocks the Name method 176 | func (m *MockAdapter) Name() string { 177 | if m.NameFunc != nil { 178 | return m.NameFunc() 179 | } 180 | return "mock-adapter" 181 | } 182 | 183 | // FetchFiles mocks the FetchFiles method 184 | func (m *MockAdapter) FetchFiles(ctx context.Context) ([]*adapter.File, error) { 185 | if m.FetchFilesFunc != nil { 186 | return m.FetchFilesFunc(ctx) 187 | } 188 | return []*adapter.File{ 189 | { 190 | Path: "test.md", 191 | Content: []byte("# Test File"), 192 | Hash: "test-hash", 193 | Modified: time.Now(), 194 | Size: 10, 195 | Source: "mock", 196 | }, 197 | }, nil 198 | } 199 | 200 | // GetLastSync mocks the GetLastSync method 201 | func (m *MockAdapter) GetLastSync() time.Time { 202 | if m.GetLastSyncFunc != nil { 203 | return m.GetLastSyncFunc() 204 | } 205 | return m.lastSync 206 | } 207 | 208 | // SetLastSync mocks the SetLastSync method 209 | func (m *MockAdapter) SetLastSync(t time.Time) { 210 | if m.SetLastSyncFunc != nil { 211 | m.SetLastSyncFunc(t) 212 | } else { 213 | m.lastSync = t 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "os" 7 | "path/filepath" 8 | "testing" 9 | "time" 10 | 11 | "github.com/openwebui-content-sync/internal/config" 12 | ) 13 | 14 | func TestMain_WithConfigFile(t *testing.T) { 15 | // Create temporary config file 16 | tempDir := t.TempDir() 17 | configPath := filepath.Join(tempDir, "test-config.yaml") 18 | 19 | configContent := ` 20 | log_level: debug 21 | schedule: 22 | interval: 1h 23 | storage: 24 | path: /tmp/test-storage 25 | openwebui: 26 | base_url: "http://localhost:8080" 27 | api_key: "test-api-key" 28 | github: 29 | enabled: false 30 | ` 31 | 32 | err := os.WriteFile(configPath, []byte(configContent), 0644) 33 | if err != nil { 34 | t.Fatalf("Failed to write config file: %v", err) 35 | } 36 | 37 | // Test loading config 38 | cfg, err := config.Load(configPath) 39 | if err != nil { 40 | t.Fatalf("Failed to load config: %v", err) 41 | } 42 | 43 | if cfg.LogLevel != "debug" { 44 | t.Errorf("Expected log level 'debug', got '%s'", cfg.LogLevel) 45 | } 46 | if cfg.GitHub.Enabled != false { 47 | t.Errorf("Expected GitHub enabled false, got %v", cfg.GitHub.Enabled) 48 | } 49 | } 50 | 51 | func TestMain_WithInvalidConfigFile(t *testing.T) { 52 | // Create temporary config file with invalid YAML 53 | tempDir := t.TempDir() 54 | configPath := filepath.Join(tempDir, "invalid-config.yaml") 55 | 56 | invalidYAML := ` 57 | log_level: debug 58 | schedule: 59 | interval: 1h 60 | invalid: [unclosed list 61 | ` 62 | 63 | err := os.WriteFile(configPath, []byte(invalidYAML), 0644) 64 | if err != nil { 65 | t.Fatalf("Failed to write invalid config file: %v", err) 66 | } 67 | 68 | // Test loading invalid config 69 | _, err = config.Load(configPath) 70 | if err == nil { 71 | t.Errorf("Expected error for invalid config, got none") 72 | } 73 | } 74 | 75 | func TestMain_WithNonExistentConfigFile(t *testing.T) { 76 | // Test loading non-existent config file (should use defaults) 77 | cfg, err := config.Load("non-existent-config.yaml") 78 | if err != nil { 79 | t.Fatalf("Failed to load default config: %v", err) 80 | } 81 | 82 | // Check default values 83 | if cfg.LogLevel != "info" { 84 | t.Errorf("Expected log level 'info', got '%s'", cfg.LogLevel) 85 | } 86 | if cfg.Schedule.Interval != 1*time.Hour { 87 | t.Errorf("Expected schedule interval 1h, got %v", cfg.Schedule.Interval) 88 | } 89 | } 90 | 91 | func TestMain_FlagParsing(t *testing.T) { 92 | // Save original command line args 93 | originalArgs := os.Args 94 | defer func() { 95 | os.Args = originalArgs 96 | flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError) 97 | }() 98 | 99 | // Test with custom config path 100 | os.Args = []string{"cmd", "-config", "custom-config.yaml"} 101 | flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError) 102 | 103 | configPath := flag.String("config", "config.yaml", "Path to configuration file") 104 | flag.Parse() 105 | 106 | if *configPath != "custom-config.yaml" { 107 | t.Errorf("Expected config path 'custom-config.yaml', got '%s'", *configPath) 108 | } 109 | } 110 | 111 | func TestMain_DefaultFlagValue(t *testing.T) { 112 | // Save original command line args 113 | originalArgs := os.Args 114 | defer func() { 115 | os.Args = originalArgs 116 | flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError) 117 | }() 118 | 119 | // Test with no flags 120 | os.Args = []string{"cmd"} 121 | flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError) 122 | 123 | configPath := flag.String("config", "config.yaml", "Path to configuration file") 124 | flag.Parse() 125 | 126 | if *configPath != "config.yaml" { 127 | t.Errorf("Expected default config path 'config.yaml', got '%s'", *configPath) 128 | } 129 | } 130 | 131 | // Helper function to create test config 132 | func createTestConfig() *config.Config { 133 | return &config.Config{ 134 | LogLevel: "debug", 135 | Schedule: config.ScheduleConfig{ 136 | Interval: 1 * time.Hour, 137 | }, 138 | Storage: config.StorageConfig{ 139 | Path: "/tmp/test-storage", 140 | }, 141 | OpenWebUI: config.OpenWebUIConfig{ 142 | BaseURL: "http://localhost:8080", 143 | APIKey: "test-api-key", 144 | }, 145 | GitHub: config.GitHubConfig{ 146 | Enabled: false, 147 | Token: "test-token", 148 | Mappings: []config.RepositoryMapping{ 149 | {Repository: "owner/repo", KnowledgeID: "test-knowledge-id"}, 150 | }, 151 | }, 152 | } 153 | } 154 | 155 | func TestMain_ContextHandling(t *testing.T) { 156 | // Test context creation and cancellation 157 | ctx, cancel := context.WithCancel(context.Background()) 158 | 159 | // Verify context is not cancelled initially 160 | select { 161 | case <-ctx.Done(): 162 | t.Error("Context should not be cancelled initially") 163 | default: 164 | // Expected 165 | } 166 | 167 | // Cancel context 168 | cancel() 169 | 170 | // Verify context is cancelled 171 | select { 172 | case <-ctx.Done(): 173 | // Expected 174 | default: 175 | t.Error("Context should be cancelled after cancel()") 176 | } 177 | } 178 | 179 | func TestMain_SignalHandling(t *testing.T) { 180 | // Test signal channel creation 181 | sigChan := make(chan os.Signal, 1) 182 | 183 | // Verify channel is empty initially 184 | select { 185 | case <-sigChan: 186 | t.Error("Signal channel should be empty initially") 187 | default: 188 | // Expected 189 | } 190 | 191 | // Test sending a signal 192 | sigChan <- os.Interrupt 193 | 194 | // Verify signal was received 195 | select { 196 | case sig := <-sigChan: 197 | if sig != os.Interrupt { 198 | t.Errorf("Expected os.Interrupt signal, got %v", sig) 199 | } 200 | default: 201 | t.Error("Expected to receive signal") 202 | } 203 | } 204 | 205 | func TestMain_TimeHandling(t *testing.T) { 206 | // Test time operations used in main 207 | now := time.Now() 208 | 209 | // Test time addition 210 | future := now.Add(5 * time.Second) 211 | if future.Before(now) { 212 | t.Error("Future time should be after now") 213 | } 214 | 215 | // Test time comparison 216 | if !now.Before(future) { 217 | t.Error("Now should be before future time") 218 | } 219 | 220 | // Test duration 221 | duration := future.Sub(now) 222 | expectedDuration := 5 * time.Second 223 | if duration < expectedDuration-100*time.Millisecond || duration > expectedDuration+100*time.Millisecond { 224 | t.Errorf("Expected duration around %v, got %v", expectedDuration, duration) 225 | } 226 | } 227 | 228 | func TestMain_ErrorHandling(t *testing.T) { 229 | // Test various error scenarios that might occur in main 230 | 231 | // Test with invalid log level 232 | cfg := createTestConfig() 233 | cfg.LogLevel = "invalid-level" 234 | 235 | // This would normally cause an error in the main function 236 | // For testing, we'll just verify the config was set 237 | if cfg.LogLevel != "invalid-level" { 238 | t.Errorf("Expected log level 'invalid-level', got '%s'", cfg.LogLevel) 239 | } 240 | } 241 | 242 | func TestMain_ResourceCleanup(t *testing.T) { 243 | // Test that resources are properly cleaned up 244 | tempDir := t.TempDir() 245 | defer os.RemoveAll(tempDir) 246 | 247 | // Create a test file 248 | testFile := filepath.Join(tempDir, "test.txt") 249 | err := os.WriteFile(testFile, []byte("test content"), 0644) 250 | if err != nil { 251 | t.Fatalf("Failed to create test file: %v", err) 252 | } 253 | 254 | // Verify file exists 255 | if _, err := os.Stat(testFile); os.IsNotExist(err) { 256 | t.Error("Test file should exist") 257 | } 258 | 259 | // Cleanup 260 | os.RemoveAll(tempDir) 261 | 262 | // Verify file is cleaned up 263 | if _, err := os.Stat(testFile); !os.IsNotExist(err) { 264 | t.Error("Test file should be cleaned up") 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /internal/adapter/local.go: -------------------------------------------------------------------------------- 1 | // OpenWebUI Content Sync 2 | // Copyright (C) 2025 OpenWebUI Content Sync Contributors 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | package adapter 18 | 19 | import ( 20 | "context" 21 | "crypto/sha256" 22 | "fmt" 23 | "io/fs" 24 | "os" 25 | "path/filepath" 26 | "strings" 27 | "time" 28 | 29 | "github.com/openwebui-content-sync/internal/config" 30 | "github.com/sirupsen/logrus" 31 | ) 32 | 33 | // LocalFolderAdapter implements the Adapter interface for local folders 34 | type LocalFolderAdapter struct { 35 | config config.LocalFolderConfig 36 | lastSync time.Time 37 | folders []string 38 | mappings map[string]string // folder_path -> knowledge_id mapping 39 | } 40 | 41 | // NewLocalFolderAdapter creates a new local folder adapter 42 | func NewLocalFolderAdapter(cfg config.LocalFolderConfig) (*LocalFolderAdapter, error) { 43 | if !cfg.Enabled { 44 | return nil, fmt.Errorf("local folder adapter is disabled") 45 | } 46 | 47 | // Build folder mappings 48 | mappings := make(map[string]string) 49 | folders := []string{} 50 | 51 | // Process mappings 52 | for _, mapping := range cfg.Mappings { 53 | if mapping.FolderPath != "" && mapping.KnowledgeID != "" { 54 | // Validate folder exists 55 | if _, err := os.Stat(mapping.FolderPath); os.IsNotExist(err) { 56 | return nil, fmt.Errorf("folder does not exist: %s", mapping.FolderPath) 57 | } 58 | mappings[mapping.FolderPath] = mapping.KnowledgeID 59 | folders = append(folders, mapping.FolderPath) 60 | } 61 | } 62 | 63 | if len(folders) == 0 { 64 | return nil, fmt.Errorf("at least one local folder mapping must be configured") 65 | } 66 | 67 | return &LocalFolderAdapter{ 68 | config: cfg, 69 | folders: folders, 70 | mappings: mappings, 71 | lastSync: time.Now().Add(-24 * time.Hour), // Default to 24 hours ago 72 | }, nil 73 | } 74 | 75 | // Name returns the adapter name 76 | func (l *LocalFolderAdapter) Name() string { 77 | return "local" 78 | } 79 | 80 | // FetchFiles retrieves files from local folders 81 | func (l *LocalFolderAdapter) FetchFiles(ctx context.Context) ([]*File, error) { 82 | var files []*File 83 | 84 | for _, folder := range l.folders { 85 | logrus.Debugf("Fetching files from local folder: %s", folder) 86 | knowledgeID := l.mappings[folder] 87 | folderFiles, err := l.fetchFolderFiles(ctx, folder, knowledgeID) 88 | if err != nil { 89 | return nil, fmt.Errorf("failed to fetch files from folder %s: %w", folder, err) 90 | } 91 | logrus.Debugf("Found %d files in folder %s (knowledge_id: %s)", len(folderFiles), folder, knowledgeID) 92 | files = append(files, folderFiles...) 93 | } 94 | 95 | logrus.Debugf("Total files fetched: %d", len(files)) 96 | return files, nil 97 | } 98 | 99 | // fetchFolderFiles fetches files from a specific folder recursively 100 | func (l *LocalFolderAdapter) fetchFolderFiles(ctx context.Context, folderPath string, knowledgeID string) ([]*File, error) { 101 | var files []*File 102 | 103 | err := filepath.WalkDir(folderPath, func(path string, d fs.DirEntry, err error) error { 104 | if err != nil { 105 | logrus.Warnf("Error accessing path %s: %v", path, err) 106 | return nil // Continue walking 107 | } 108 | 109 | // Skip directories 110 | if d.IsDir() { 111 | return nil 112 | } 113 | 114 | // Skip hidden files and common ignore patterns 115 | baseName := filepath.Base(path) 116 | if strings.HasPrefix(baseName, ".") || l.shouldIgnoreFile(baseName) { 117 | return nil 118 | } 119 | 120 | // Read file content 121 | content, err := os.ReadFile(path) 122 | if err != nil { 123 | logrus.Warnf("Failed to read file %s: %v", path, err) 124 | return nil 125 | } 126 | 127 | // Skip binary files (basic check) 128 | if l.isBinaryFile(content) { 129 | logrus.Debugf("Skipping binary file: %s", path) 130 | return nil 131 | } 132 | 133 | // Get file info 134 | info, err := d.Info() 135 | if err != nil { 136 | logrus.Warnf("Failed to get file info for %s: %v", path, err) 137 | return nil 138 | } 139 | 140 | // Calculate relative path from the folder root 141 | relPath, err := filepath.Rel(folderPath, path) 142 | if err != nil { 143 | logrus.Warnf("Failed to calculate relative path for %s: %v", path, err) 144 | return nil 145 | } 146 | 147 | // Calculate hash 148 | hash := fmt.Sprintf("%x", sha256.Sum256(content)) 149 | 150 | file := &File{ 151 | Path: relPath, 152 | Content: content, 153 | Hash: hash, 154 | Modified: info.ModTime(), 155 | Size: info.Size(), 156 | Source: fmt.Sprintf("local:%s", folderPath), 157 | KnowledgeID: knowledgeID, 158 | } 159 | 160 | files = append(files, file) 161 | return nil 162 | }) 163 | 164 | if err != nil { 165 | return nil, fmt.Errorf("failed to walk directory %s: %w", folderPath, err) 166 | } 167 | 168 | return files, nil 169 | } 170 | 171 | // shouldIgnoreFile checks if a file should be ignored based on common patterns 172 | func (l *LocalFolderAdapter) shouldIgnoreFile(filename string) bool { 173 | // Check for hidden files (starting with .) 174 | if strings.HasPrefix(filename, ".") { 175 | return true 176 | } 177 | 178 | ignorePatterns := []string{ 179 | "node_modules", "vendor", ".git", ".svn", ".hg", 180 | "__pycache__", ".pytest_cache", ".coverage", 181 | } 182 | 183 | // Check for specific patterns 184 | lowerName := strings.ToLower(filename) 185 | for _, pattern := range ignorePatterns { 186 | if strings.Contains(lowerName, pattern) { 187 | return true 188 | } 189 | } 190 | 191 | // Check for specific filenames 192 | specificFiles := []string{"thumbs.db", ".ds_store", "desktop.ini"} 193 | for _, file := range specificFiles { 194 | if lowerName == file { 195 | return true 196 | } 197 | } 198 | 199 | // Check for file extensions 200 | extensions := []string{".log", ".tmp", ".temp", ".swp", ".swo"} 201 | for _, ext := range extensions { 202 | if strings.HasSuffix(lowerName, ext) { 203 | return true 204 | } 205 | } 206 | 207 | return false 208 | } 209 | 210 | // isBinaryFile checks if content appears to be binary 211 | func (l *LocalFolderAdapter) isBinaryFile(content []byte) bool { 212 | if len(content) == 0 { 213 | return false 214 | } 215 | 216 | // Check for null bytes (common in binary files) 217 | for i := 0; i < len(content) && i < 1024; i++ { 218 | if content[i] == 0 { 219 | return true 220 | } 221 | } 222 | 223 | // Check for high ratio of non-printable characters 224 | nonPrintable := 0 225 | checkLen := len(content) 226 | if checkLen > 1024 { 227 | checkLen = 1024 228 | } 229 | 230 | for i := 0; i < checkLen; i++ { 231 | if content[i] < 32 && content[i] != 9 && content[i] != 10 && content[i] != 13 { 232 | nonPrintable++ 233 | } 234 | } 235 | 236 | // If more than 30% of characters are non-printable, consider it binary 237 | return float64(nonPrintable)/float64(checkLen) > 0.3 238 | } 239 | 240 | // GetLastSync returns the last sync time 241 | func (l *LocalFolderAdapter) GetLastSync() time.Time { 242 | return l.lastSync 243 | } 244 | 245 | // SetLastSync sets the last sync time 246 | func (l *LocalFolderAdapter) SetLastSync(t time.Time) { 247 | l.lastSync = t 248 | } 249 | -------------------------------------------------------------------------------- /adapter_readme/CONFLUENCE_ADAPTER.md: -------------------------------------------------------------------------------- 1 | # Confluence Adapter 2 | 3 | The Confluence adapter allows you to sync content from Atlassian Confluence spaces into OpenWebUI knowledge bases. This adapter uses the Confluence REST API v2 to fetch pages and optionally attachments from specified Confluence spaces and uploads them to OpenWebUI. 4 | 5 | ## API Compatibility 6 | 7 | This adapter uses Confluence REST API v2, which provides: 8 | - Modern cursor-based pagination 9 | - Improved performance and reliability 10 | - Better support for large spaces 11 | - Enhanced metadata and content structure 12 | 13 | ## Features 14 | 15 | - **Page Content Sync**: Fetches all pages from specified Confluence spaces using Confluence API v2 16 | - **Attachment Support**: Optionally downloads and syncs page attachments 17 | - **HTML to Text Conversion**: Converts Confluence's HTML content to plain text 18 | - **Incremental Sync**: Tracks last sync time to avoid re-processing content 19 | - **Multi-Space Support**: Can sync from multiple Confluence spaces 20 | - **Configurable Limits**: Set page limits and control attachment inclusion 21 | - **Cursor-based Pagination**: Uses modern cursor-based pagination for efficient data retrieval 22 | 23 | ## Configuration 24 | 25 | ### YAML Configuration 26 | 27 | Add the following to your `config.yaml`: 28 | 29 | ```yaml 30 | confluence: 31 | enabled: true 32 | base_url: "https://your-domain.atlassian.net" 33 | username: "your-email@example.com" 34 | api_key: "your-confluence-api-key" 35 | spaces: 36 | - "SPACEKEY1" 37 | - "SPACEKEY2" 38 | knowledge_id: "your-knowledge-base-id" 39 | page_limit: 100 40 | include_attachments: true 41 | include_blog_posts: false 42 | ``` 43 | 44 | ### Environment Variables 45 | 46 | Only the API key can be configured via environment variable (for security): 47 | 48 | ```bash 49 | CONFLUENCE_API_KEY="your-confluence-api-key" 50 | ``` 51 | 52 | All other configuration should be done in the `config.yaml` file. 53 | 54 | ### Kubernetes Configuration 55 | 56 | #### ConfigMap 57 | 58 | ```yaml 59 | apiVersion: v1 60 | kind: ConfigMap 61 | metadata: 62 | name: connector-config 63 | data: 64 | config.yaml: | 65 | confluence: 66 | enabled: true 67 | base_url: "https://your-domain.atlassian.net" 68 | username: "your-email@example.com" 69 | spaces: 70 | - "SPACEKEY1" 71 | - "SPACEKEY2" 72 | knowledge_id: "your-knowledge-base-id" 73 | page_limit: 100 74 | include_attachments: true 75 | use_markdown_parser: true 76 | ``` 77 | 78 | #### Secrets 79 | 80 | ```yaml 81 | apiVersion: v1 82 | kind: Secret 83 | metadata: 84 | name: confluence-secrets 85 | type: Opaque 86 | data: 87 | api-key: 88 | ``` 89 | 90 | ## Authentication 91 | 92 | The Confluence adapter uses Basic Authentication with your Confluence username and API key. To get an API key: 93 | 94 | 1. Go to [Atlassian Account Settings](https://id.atlassian.com/manage-profile/security/api-tokens) 95 | 2. Click "Create API token" 96 | 3. Give it a label and copy the generated token 97 | 4. Use your email address as the username and the token as the API key 98 | 99 | ## Configuration Parameters 100 | 101 | | Parameter | Type | Required | Default | Description | 102 | |-----------|------|----------|---------|-------------| 103 | | `enabled` | boolean | No | `false` | Enable the Confluence adapter | 104 | | `base_url` | string | Yes | - | Your Confluence instance URL (e.g., `https://your-domain.atlassian.net`) | 105 | | `username` | string | Yes | - | Your Confluence username (usually your email) | 106 | | `api_key` | string | Yes | - | Your Confluence API key | 107 | | `spaces` | array | Yes | - | List of Confluence space keys to sync | 108 | | `knowledge_id` | string | No | - | OpenWebUI knowledge base ID to sync content to | 109 | | `page_limit` | integer | No | `100` | Maximum number of pages to fetch per space | 110 | | `include_attachments` | boolean | No | `true` | Whether to download and sync page attachments | 111 | | `include_blog_posts` | boolean | No | `false` | Whether to download and sync blog posts | 112 | | `use_markdown_parser` | boolean | No | `false` | Whether to use markdown parser for HTML content conversion (true = markdown, false = plain text) | 113 | | `add_additional_data` | boolean | No | `false` | Whether to fetch additional user data (display names) for pages and blog posts | 114 | 115 | ## File Processing 116 | 117 | ### Page Content 118 | 119 | - Confluence pages are converted from HTML to plain text 120 | - Pages are saved as `.md` files with sanitized filenames 121 | - File paths follow the pattern: `{space}/{page-title}.md` 122 | 123 | ### Attachments 124 | 125 | - Only text-based attachments are processed (based on file extension) 126 | - Binary files are skipped 127 | - Attachments are saved in: `{space}/attachments/{filename}` 128 | 129 | ### Supported File Types 130 | 131 | The adapter processes the following file types: 132 | - Markdown (`.md`) 133 | - Text (`.txt`) 134 | - JSON (`.json`) 135 | - YAML (`.yaml`, `.yml`) 136 | - Code files (`.go`, `.py`, `.js`, `.ts`, `.java`, etc.) 137 | - Configuration files (`.env`, `.gitignore`, etc.) 138 | - And many more text-based formats 139 | 140 | ## Error Handling 141 | 142 | - **Authentication Errors**: Invalid credentials will cause the adapter to fail initialization 143 | - **API Errors**: HTTP errors from Confluence API are logged and may cause individual page/attachment processing to fail 144 | - **File Processing Errors**: Individual file processing errors are logged but don't stop the overall sync 145 | - **Network Errors**: Connection timeouts and network issues are handled gracefully 146 | 147 | ## Logging 148 | 149 | The adapter provides detailed logging at the debug level: 150 | 151 | ``` 152 | DEBUG: Fetching files from Confluence space: SPACEKEY1 153 | DEBUG: Found 25 files in space SPACEKEY1 154 | DEBUG: Processing page: Page Title 155 | DEBUG: Downloading attachment: document.pdf 156 | ``` 157 | 158 | ## Limitations 159 | 160 | 1. **API Rate Limits**: Confluence has API rate limits that may affect sync performance 161 | 2. **Large Spaces**: Very large spaces with many pages may take significant time to sync 162 | 3. **HTML Conversion**: The HTML to text conversion is basic and may not preserve all formatting 163 | 4. **Attachment Size**: Large attachments may cause memory issues or timeouts 164 | 165 | ## Troubleshooting 166 | 167 | ### Common Issues 168 | 169 | 1. **Authentication Failed** 170 | - Verify your username and API key are correct 171 | - Ensure your API key has the necessary permissions 172 | 173 | 2. **Space Not Found** 174 | - Check that the space key is correct 175 | - Verify you have access to the space 176 | 177 | 3. **No Content Synced** 178 | - Check that the space contains pages 179 | - Verify the `page_limit` setting is appropriate 180 | - Check logs for API errors 181 | 182 | 4. **Attachments Not Synced** 183 | - Ensure `include_attachments` is set to `true` 184 | - Check that attachments are text-based files 185 | - Verify you have download permissions for attachments 186 | 187 | ### Debug Mode 188 | 189 | Enable debug logging to see detailed information about the sync process: 190 | 191 | ```yaml 192 | log_level: debug 193 | ``` 194 | 195 | ## Example Usage 196 | 197 | ### Basic Configuration 198 | 199 | ```yaml 200 | confluence: 201 | enabled: true 202 | base_url: "https://mycompany.atlassian.net" 203 | username: "john.doe@mycompany.com" 204 | api_key: "ATATT3xFfGF0..." 205 | spaces: 206 | - "DOCS" 207 | - "WIKI" 208 | knowledge_id: "fbc18bc4-72c1-40f0-84b1-52055368c583" 209 | ``` 210 | 211 | ### Advanced Configuration 212 | 213 | ```yaml 214 | confluence: 215 | enabled: true 216 | base_url: "https://mycompany.atlassian.net" 217 | username: "john.doe@mycompany.com" 218 | api_key: "ATATT3xFfGF0..." 219 | spaces: 220 | - "DOCS" 221 | - "WIKI" 222 | - "PROJECTS" 223 | knowledge_id: "fbc18bc4-72c1-40f0-84b1-52055368c583" 224 | page_limit: 500 225 | include_attachments: true 226 | ``` 227 | 228 | This configuration will sync up to 500 pages from each of the three specified spaces, including all text-based attachments. 229 | -------------------------------------------------------------------------------- /adapter_readme/LOCAL_ADAPTER.md: -------------------------------------------------------------------------------- 1 | # Local Folder Adapter 2 | 3 | The Local Folder adapter allows you to sync content from local directories on your filesystem into OpenWebUI knowledge bases. This is useful for syncing documentation, notes, or other content stored locally. 4 | 5 | ## Features 6 | 7 | - **Multi-directory support**: Sync from multiple local directories 8 | - **Knowledge base mapping**: Map each directory to a specific OpenWebUI knowledge base 9 | - **Recursive scanning**: Automatically scans subdirectories for content 10 | - **File filtering**: Automatically filters out binary files and common non-content files 11 | - **Incremental sync**: Only processes files that have changed since the last sync 12 | - **Path preservation**: Maintains directory structure in the knowledge base 13 | 14 | ## Configuration 15 | 16 | ### Configuration File 17 | 18 | Add the following section to your `config.yaml`: 19 | 20 | ```yaml 21 | local_folders: 22 | enabled: true 23 | mappings: 24 | - folder_path: "/path/to/documentation" 25 | knowledge_id: "docs-knowledge-base" 26 | - folder_path: "/path/to/notes" 27 | knowledge_id: "notes-knowledge-base" 28 | - folder_path: "/home/user/projects/docs" 29 | knowledge_id: "project-docs" 30 | ``` 31 | 32 | ### Configuration Options 33 | 34 | | Option | Type | Required | Default | Description | 35 | |--------|------|----------|---------|-------------| 36 | | `enabled` | boolean | Yes | `false` | Enable/disable the local folder adapter | 37 | | `mappings` | array | Yes | `[]` | List of folder mappings | 38 | 39 | ### Folder Mapping 40 | 41 | Each mapping in the `mappings` array should contain: 42 | 43 | | Field | Type | Required | Description | 44 | |-------|------|----------|-------------| 45 | | `folder_path` | string | Yes | Absolute path to the local directory | 46 | | `knowledge_id` | string | Yes | Target OpenWebUI knowledge base ID | 47 | 48 | ## Directory Requirements 49 | 50 | ### Path Format 51 | 52 | - Use **absolute paths** for all directory mappings 53 | - Paths must exist and be readable by the application 54 | - Avoid paths with spaces or special characters (use quotes if necessary) 55 | 56 | ### Permissions 57 | 58 | The application must have: 59 | - **Read access** to all configured directories 60 | - **Execute access** to traverse subdirectories 61 | - **Read access** to all files within the directories 62 | 63 | ## File Processing 64 | 65 | The Local Folder adapter processes files as follows: 66 | 67 | ### Supported File Types 68 | 69 | - **Markdown files** (`.md`, `.markdown`) 70 | - **Text files** (`.txt`, `.text`) 71 | - **Documentation files** (`.rst`, `.adoc`) 72 | - **Code files** (`.py`, `.js`, `.ts`, `.go`, `.java`, `.cpp`, `.c`, `.h`, `.hpp`) 73 | - **Configuration files** (`.yaml`, `.yml`, `.json`, `.toml`, `.ini`, `.cfg`) 74 | - **Shell scripts** (`.sh`, `.bash`, `.zsh`) 75 | - **HTML files** (`.html`, `.htm`) 76 | 77 | ### Excluded Files 78 | 79 | The adapter automatically excludes: 80 | - Binary files (images, videos, executables, etc.) 81 | - Common non-content files (`.gitignore`, `.gitattributes`, etc.) 82 | - Large files (> 1MB) 83 | - Hidden files and directories (starting with `.`) 84 | - Common exclusion directories (`node_modules/`, `vendor/`, `.git/`, etc.) 85 | 86 | ### File Path Structure 87 | 88 | Files are stored with paths that preserve the directory structure: 89 | ``` 90 | local/folder-name/subdirectory/file.md 91 | ``` 92 | 93 | ## Sync Behavior 94 | 95 | - **Initial sync**: Scans all configured directories and processes all supported files 96 | - **Incremental sync**: Only processes files modified since the last successful sync 97 | - **Error handling**: If a directory fails to sync, other directories continue processing 98 | - **File monitoring**: Uses file modification timestamps to detect changes 99 | 100 | ## Use Cases 101 | 102 | ### Documentation Sync 103 | 104 | Sync local documentation directories: 105 | 106 | ```yaml 107 | local_folders: 108 | enabled: true 109 | mappings: 110 | - folder_path: "/home/user/docs" 111 | knowledge_id: "user-docs" 112 | - folder_path: "/opt/company/docs" 113 | knowledge_id: "company-docs" 114 | ``` 115 | 116 | ### Project Documentation 117 | 118 | Sync project-specific documentation: 119 | 120 | ```yaml 121 | local_folders: 122 | enabled: true 123 | mappings: 124 | - folder_path: "/home/user/projects/my-app/docs" 125 | knowledge_id: "my-app-docs" 126 | - folder_path: "/home/user/projects/api-docs" 127 | knowledge_id: "api-docs" 128 | ``` 129 | 130 | ### Notes and Knowledge Base 131 | 132 | Sync personal or team notes: 133 | 134 | ```yaml 135 | local_folders: 136 | enabled: true 137 | mappings: 138 | - folder_path: "/home/user/notes" 139 | knowledge_id: "personal-notes" 140 | - folder_path: "/shared/team-notes" 141 | knowledge_id: "team-notes" 142 | ``` 143 | 144 | ## Troubleshooting 145 | 146 | ### Common Issues 147 | 148 | 1. **Directory not found** 149 | - Verify the directory path exists and is accessible 150 | - Check that the path is absolute (starts with `/`) 151 | - Ensure the application has read permissions 152 | 153 | 2. **Permission denied** 154 | - Check file and directory permissions 155 | - Ensure the application user can read the directories 156 | - Verify execute permissions on parent directories 157 | 158 | 3. **Empty knowledge base** 159 | - Check that directories contain supported file types 160 | - Verify files are not hidden or in excluded directories 161 | - Check file size limits (files > 1MB are excluded) 162 | 163 | 4. **Sync not updating** 164 | - Verify file modification timestamps are updating 165 | - Check that files are being modified (not just accessed) 166 | - Ensure the application has write access to the storage directory 167 | 168 | ### Debug Logging 169 | 170 | Enable debug logging to see detailed sync information: 171 | 172 | ```yaml 173 | log_level: debug 174 | ``` 175 | 176 | This will show: 177 | - Which directories are being scanned 178 | - File discovery and filtering details 179 | - File processing progress 180 | - Sync timing and statistics 181 | 182 | ## Security Considerations 183 | 184 | - **File access**: Only sync directories that contain appropriate content 185 | - **Path traversal**: The adapter validates paths to prevent directory traversal attacks 186 | - **Content filtering**: Review the content being synced to ensure it's appropriate 187 | - **Permissions**: Run the application with minimal required permissions 188 | 189 | ## Performance Tips 190 | 191 | - **Directory size**: Large directories with many files may take longer to sync 192 | - **File filtering**: The adapter automatically filters out unnecessary files 193 | - **Incremental sync**: Only changed files are processed after the initial sync 194 | - **Storage location**: Use fast storage for the application's data directory 195 | 196 | ## Example Configuration 197 | 198 | ```yaml 199 | # Complete example configuration 200 | log_level: info 201 | schedule: 202 | interval: 30m 203 | 204 | storage: 205 | path: "/data" 206 | 207 | openwebui: 208 | base_url: "http://localhost:8080" 209 | api_key: "your-openwebui-api-key" 210 | 211 | local_folders: 212 | enabled: true 213 | mappings: 214 | - folder_path: "/home/user/docs" 215 | knowledge_id: "user-docs" 216 | - folder_path: "/opt/company/knowledge-base" 217 | knowledge_id: "company-kb" 218 | - folder_path: "/shared/project-docs" 219 | knowledge_id: "project-docs" 220 | ``` 221 | 222 | ## Docker Considerations 223 | 224 | When running in Docker, ensure that: 225 | 226 | 1. **Volume mounts** are properly configured for local directories 227 | 2. **Permissions** are set correctly for the container user 228 | 3. **Paths** are accessible from within the container 229 | 230 | Example Docker volume mount: 231 | 232 | ```yaml 233 | volumes: 234 | - /host/path/to/docs:/container/path/to/docs:ro 235 | ``` 236 | 237 | ## File System Monitoring 238 | 239 | The adapter uses file modification timestamps to detect changes. For optimal performance: 240 | 241 | - Avoid frequently modifying files unnecessarily 242 | - Use proper file locking when editing files 243 | - Consider using a file system that supports efficient timestamp updates 244 | - Monitor disk space to ensure sufficient storage for the application 245 | -------------------------------------------------------------------------------- /internal/adapter/github.go: -------------------------------------------------------------------------------- 1 | package adapter 2 | 3 | import ( 4 | "context" 5 | "crypto/sha256" 6 | "fmt" 7 | "io" 8 | "path/filepath" 9 | "strings" 10 | "time" 11 | 12 | "github.com/google/go-github/v56/github" 13 | "github.com/openwebui-content-sync/internal/config" 14 | "github.com/sirupsen/logrus" 15 | "golang.org/x/oauth2" 16 | ) 17 | 18 | // GitHubAdapter implements the Adapter interface for GitHub repositories 19 | type GitHubAdapter struct { 20 | client *github.Client 21 | config config.GitHubConfig 22 | lastSync time.Time 23 | repositories []string 24 | mappings map[string]string // repository -> knowledge_id mapping 25 | } 26 | 27 | // NewGitHubAdapter creates a new GitHub adapter 28 | func NewGitHubAdapter(cfg config.GitHubConfig) (*GitHubAdapter, error) { 29 | if cfg.Token == "" { 30 | return nil, fmt.Errorf("GitHub token is required") 31 | } 32 | 33 | ctx := context.Background() 34 | ts := oauth2.StaticTokenSource( 35 | &oauth2.Token{AccessToken: cfg.Token}, 36 | ) 37 | tc := oauth2.NewClient(ctx, ts) 38 | 39 | client := github.NewClient(tc) 40 | 41 | // Build repository mappings 42 | mappings := make(map[string]string) 43 | repos := []string{} 44 | 45 | // Process mappings 46 | for _, mapping := range cfg.Mappings { 47 | if mapping.Repository != "" && mapping.KnowledgeID != "" { 48 | mappings[mapping.Repository] = mapping.KnowledgeID 49 | repos = append(repos, mapping.Repository) 50 | } 51 | } 52 | 53 | if len(repos) == 0 { 54 | return nil, fmt.Errorf("at least one repository mapping must be configured") 55 | } 56 | 57 | return &GitHubAdapter{ 58 | client: client, 59 | config: cfg, 60 | repositories: repos, 61 | mappings: mappings, 62 | lastSync: time.Now().Add(-24 * time.Hour), // Default to 24 hours ago 63 | }, nil 64 | } 65 | 66 | // Name returns the adapter name 67 | func (g *GitHubAdapter) Name() string { 68 | return "github" 69 | } 70 | 71 | // FetchFiles retrieves files from GitHub repositories 72 | func (g *GitHubAdapter) FetchFiles(ctx context.Context) ([]*File, error) { 73 | var files []*File 74 | 75 | for _, repo := range g.repositories { 76 | logrus.Debugf("Fetching files from repository: %s", repo) 77 | knowledgeID := g.mappings[repo] 78 | repoFiles, err := g.fetchRepositoryFiles(ctx, repo, knowledgeID) 79 | if err != nil { 80 | return nil, fmt.Errorf("failed to fetch files from repository %s: %w", repo, err) 81 | } 82 | logrus.Debugf("Found %d files in repository %s (knowledge_id: %s)", len(repoFiles), repo, knowledgeID) 83 | files = append(files, repoFiles...) 84 | } 85 | 86 | logrus.Debugf("Total files fetched: %d", len(files)) 87 | return files, nil 88 | } 89 | 90 | // fetchRepositoryFiles fetches files from a specific repository 91 | func (g *GitHubAdapter) fetchRepositoryFiles(ctx context.Context, repo string, knowledgeID string) ([]*File, error) { 92 | parts := strings.Split(repo, "/") 93 | if len(parts) != 2 { 94 | return nil, fmt.Errorf("invalid repository format, expected 'owner/repo'") 95 | } 96 | 97 | owner, repoName := parts[0], parts[1] 98 | 99 | // Get repository contents 100 | _, contents, _, err := g.client.Repositories.GetContents(ctx, owner, repoName, "", nil) 101 | if err != nil { 102 | return nil, fmt.Errorf("failed to get repository contents: %w", err) 103 | } 104 | 105 | var files []*File 106 | for _, content := range contents { 107 | fileList, err := g.processContent(ctx, owner, repoName, content, "", knowledgeID) 108 | if err != nil { 109 | continue // Skip files that can't be processed 110 | } 111 | if fileList != nil { 112 | files = append(files, fileList...) 113 | } 114 | } 115 | 116 | return files, nil 117 | } 118 | 119 | // processContent processes a GitHub content item recursively 120 | func (g *GitHubAdapter) processContent(ctx context.Context, owner, repo string, content *github.RepositoryContent, path string, knowledgeID string) ([]*File, error) { 121 | if content == nil { 122 | return nil, nil 123 | } 124 | 125 | currentPath := filepath.Join(path, content.GetName()) 126 | 127 | // Skip binary files and non-text files 128 | if content.GetType() == "file" { 129 | // Check if it's a text file 130 | if !isTextFile(content.GetName()) { 131 | return nil, nil 132 | } 133 | 134 | // Get file content 135 | fileContent, err := g.getFileContent(ctx, owner, repo, content) 136 | if err != nil { 137 | return nil, fmt.Errorf("failed to get file content: %w", err) 138 | } 139 | 140 | // Calculate hash 141 | hash := fmt.Sprintf("%x", sha256.Sum256(fileContent)) 142 | 143 | return []*File{{ 144 | Path: currentPath, 145 | Content: fileContent, 146 | Hash: hash, 147 | Modified: time.Now(), // GitHub API doesn't provide modification time for content 148 | Size: int64(len(fileContent)), 149 | Source: fmt.Sprintf("%s/%s", owner, repo), 150 | KnowledgeID: knowledgeID, 151 | }}, nil 152 | } 153 | 154 | // If it's a directory, recurse 155 | if content.GetType() == "dir" { 156 | _, contents, _, err := g.client.Repositories.GetContents(ctx, owner, repo, content.GetPath(), nil) 157 | if err != nil { 158 | return nil, fmt.Errorf("failed to get directory contents: %w", err) 159 | } 160 | 161 | var allFiles []*File 162 | for _, subContent := range contents { 163 | files, err := g.processContent(ctx, owner, repo, subContent, currentPath, knowledgeID) 164 | if err != nil { 165 | continue 166 | } 167 | if files != nil { 168 | allFiles = append(allFiles, files...) 169 | } 170 | } 171 | 172 | return allFiles, nil 173 | } 174 | 175 | return nil, nil 176 | } 177 | 178 | // getFileContent retrieves the actual content of a file 179 | func (g *GitHubAdapter) getFileContent(ctx context.Context, owner, repo string, content *github.RepositoryContent) ([]byte, error) { 180 | fileContent, err := content.GetContent() 181 | if err != nil { 182 | return nil, fmt.Errorf("failed to get content: %w", err) 183 | } 184 | 185 | if fileContent != "" { 186 | // Content is already available (for small files) 187 | return []byte(fileContent), nil 188 | } 189 | 190 | // For larger files, we need to download them 191 | url := content.GetDownloadURL() 192 | if url == "" { 193 | return nil, fmt.Errorf("no download URL available for file") 194 | } 195 | 196 | resp, err := g.client.Client().Get(url) 197 | if err != nil { 198 | return nil, fmt.Errorf("failed to download file: %w", err) 199 | } 200 | defer resp.Body.Close() 201 | 202 | return io.ReadAll(resp.Body) 203 | } 204 | 205 | // isTextFile checks if a file is likely to be a text file 206 | func isTextFile(filename string) bool { 207 | ext := strings.ToLower(filepath.Ext(filename)) 208 | 209 | // Common text file extensions 210 | textExts := map[string]bool{ 211 | ".md": true, 212 | ".txt": true, 213 | ".json": true, 214 | ".yaml": true, 215 | ".yml": true, 216 | ".go": true, 217 | ".py": true, 218 | ".js": true, 219 | ".ts": true, 220 | ".java": true, 221 | ".cpp": true, 222 | ".c": true, 223 | ".h": true, 224 | ".hpp": true, 225 | ".cs": true, 226 | ".php": true, 227 | ".rb": true, 228 | ".rs": true, 229 | ".swift": true, 230 | ".kt": true, 231 | ".scala": true, 232 | ".sh": true, 233 | ".bash": true, 234 | ".zsh": true, 235 | ".fish": true, 236 | ".ps1": true, 237 | ".sql": true, 238 | ".xml": true, 239 | ".html": true, 240 | ".css": true, 241 | ".scss": true, 242 | ".sass": true, 243 | ".less": true, 244 | ".dockerfile": true, 245 | ".gitignore": true, 246 | ".gitattributes": true, 247 | ".editorconfig": true, 248 | ".env": true, 249 | ".env.example": true, 250 | ".env.local": true, 251 | ".env.production": true, 252 | ".env.development": true, 253 | ".env.test": true, 254 | } 255 | 256 | return textExts[ext] || ext == "" 257 | } 258 | 259 | // GetLastSync returns the last sync timestamp 260 | func (g *GitHubAdapter) GetLastSync() time.Time { 261 | return g.lastSync 262 | } 263 | 264 | // SetLastSync updates the last sync timestamp 265 | func (g *GitHubAdapter) SetLastSync(t time.Time) { 266 | g.lastSync = t 267 | } 268 | -------------------------------------------------------------------------------- /internal/adapter/slack_test.go: -------------------------------------------------------------------------------- 1 | // OpenWebUI Content Sync 2 | // Copyright (C) 2025 OpenWebUI Content Sync Contributors 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | package adapter 18 | 19 | import ( 20 | "context" 21 | "os" 22 | "path/filepath" 23 | "testing" 24 | "time" 25 | 26 | "github.com/openwebui-content-sync/internal/config" 27 | ) 28 | 29 | func TestNewSlackAdapter(t *testing.T) { 30 | // Create a temporary directory for testing 31 | tempDir := t.TempDir() 32 | 33 | tests := []struct { 34 | name string 35 | config config.SlackConfig 36 | expectError bool 37 | }{ 38 | { 39 | name: "valid config", 40 | config: config.SlackConfig{ 41 | Enabled: true, 42 | Token: "xoxb-test-token", 43 | ChannelMappings: []config.ChannelMapping{ 44 | { 45 | ChannelID: "C1234567890", 46 | ChannelName: "test-channel", 47 | KnowledgeID: "test-knowledge", 48 | }, 49 | }, 50 | DaysToFetch: 30, 51 | MaintainHistory: false, 52 | MessageLimit: 1000, 53 | IncludeThreads: true, 54 | IncludeReactions: false, 55 | }, 56 | expectError: false, 57 | }, 58 | { 59 | name: "missing token", 60 | config: config.SlackConfig{ 61 | Enabled: true, 62 | Token: "", 63 | }, 64 | expectError: true, 65 | }, 66 | { 67 | name: "disabled adapter", 68 | config: config.SlackConfig{ 69 | Enabled: false, 70 | Token: "", 71 | }, 72 | expectError: false, // Should not error even without token when disabled 73 | }, 74 | } 75 | 76 | for _, tt := range tests { 77 | t.Run(tt.name, func(t *testing.T) { 78 | adapter, err := NewSlackAdapter(tt.config, tempDir) 79 | 80 | if tt.expectError { 81 | if err == nil { 82 | t.Errorf("Expected error but got none") 83 | } 84 | return 85 | } 86 | 87 | if err != nil { 88 | t.Errorf("Unexpected error: %v", err) 89 | return 90 | } 91 | 92 | if adapter == nil { 93 | t.Errorf("Expected adapter but got nil") 94 | return 95 | } 96 | 97 | // Test basic methods 98 | if adapter.Name() != "slack" { 99 | t.Errorf("Expected adapter name 'slack', got '%s'", adapter.Name()) 100 | } 101 | 102 | // Test GetLastSync returns zero time initially 103 | if !adapter.GetLastSync().IsZero() { 104 | t.Errorf("Expected zero time for GetLastSync, got %v", adapter.GetLastSync()) 105 | } 106 | 107 | // Test SetLastSync 108 | testTime := time.Now() 109 | adapter.SetLastSync(testTime) 110 | if !adapter.GetLastSync().Equal(testTime) { 111 | t.Errorf("Expected GetLastSync to return %v, got %v", testTime, adapter.GetLastSync()) 112 | } 113 | }) 114 | } 115 | } 116 | 117 | func TestSlackAdapter_FetchFiles_NoToken(t *testing.T) { 118 | tempDir := t.TempDir() 119 | 120 | config := config.SlackConfig{ 121 | Enabled: true, 122 | Token: "", // No token 123 | } 124 | 125 | _, err := NewSlackAdapter(config, tempDir) 126 | if err == nil { 127 | t.Errorf("Expected error for missing token, got none") 128 | return 129 | } 130 | } 131 | 132 | func TestSlackAdapter_FetchFiles_Disabled(t *testing.T) { 133 | tempDir := t.TempDir() 134 | 135 | config := config.SlackConfig{ 136 | Enabled: false, 137 | Token: "xoxb-test-token", 138 | } 139 | 140 | adapter, err := NewSlackAdapter(config, tempDir) 141 | if err != nil { 142 | t.Errorf("Unexpected error: %v", err) 143 | return 144 | } 145 | 146 | // FetchFiles should return empty slice when disabled 147 | files, err := adapter.FetchFiles(context.Background()) 148 | if err != nil { 149 | t.Errorf("Unexpected error: %v", err) 150 | return 151 | } 152 | 153 | if len(files) != 0 { 154 | t.Errorf("Expected empty files slice when disabled, got %d files", len(files)) 155 | } 156 | } 157 | 158 | func TestSlackAdapter_StorageDirectory(t *testing.T) { 159 | tempDir := t.TempDir() 160 | 161 | config := config.SlackConfig{ 162 | Enabled: true, 163 | Token: "xoxb-test-token", 164 | ChannelMappings: []config.ChannelMapping{ 165 | { 166 | ChannelID: "C1234567890", 167 | ChannelName: "test-channel", 168 | KnowledgeID: "test-knowledge", 169 | }, 170 | }, 171 | } 172 | 173 | adapter, err := NewSlackAdapter(config, tempDir) 174 | if err != nil { 175 | t.Errorf("Unexpected error: %v", err) 176 | return 177 | } 178 | 179 | // Check that adapter was created successfully 180 | if adapter == nil { 181 | t.Errorf("Expected adapter but got nil") 182 | } 183 | 184 | // Check that storage directory was created 185 | expectedStorageDir := filepath.Join(tempDir, "slack", "channels") 186 | if _, err := os.Stat(expectedStorageDir); os.IsNotExist(err) { 187 | t.Errorf("Expected storage directory %s to be created", expectedStorageDir) 188 | } 189 | } 190 | 191 | func TestSanitizeChannelName(t *testing.T) { 192 | tests := []struct { 193 | input string 194 | expected string 195 | }{ 196 | {"#general", "general"}, 197 | {"dev-team", "dev-team"}, 198 | {"test channel", "test_channel"}, 199 | {"test@channel", "test_channel"}, 200 | {"test#channel", "test_channel"}, 201 | {"test/channel", "test_channel"}, 202 | {"test\\channel", "test_channel"}, 203 | {"test:channel", "test_channel"}, 204 | {"test*channel", "test_channel"}, 205 | {"test?channel", "test_channel"}, 206 | {"test", "test_channel"}, 207 | {"test|channel", "test_channel"}, 208 | {"test\"channel\"", "test_channel"}, 209 | {"", ""}, 210 | } 211 | 212 | for _, tt := range tests { 213 | t.Run(tt.input, func(t *testing.T) { 214 | result := sanitizeChannelName(tt.input) 215 | if result != tt.expected { 216 | t.Errorf("sanitizeChannelName(%q) = %q, expected %q", tt.input, result, tt.expected) 217 | } 218 | }) 219 | } 220 | } 221 | 222 | func TestSlackAdapter_InterfaceCompliance(t *testing.T) { 223 | tempDir := t.TempDir() 224 | 225 | config := config.SlackConfig{ 226 | Enabled: true, 227 | Token: "xoxb-test-token", 228 | ChannelMappings: []config.ChannelMapping{ 229 | { 230 | ChannelID: "C1234567890", 231 | ChannelName: "test-channel", 232 | KnowledgeID: "test-knowledge", 233 | }, 234 | }, 235 | } 236 | 237 | adapter, err := NewSlackAdapter(config, tempDir) 238 | if err != nil { 239 | t.Errorf("Unexpected error: %v", err) 240 | return 241 | } 242 | 243 | // Test that SlackAdapter implements the Adapter interface 244 | var _ Adapter = adapter 245 | 246 | // Test all interface methods exist and work 247 | ctx := context.Background() 248 | 249 | // Name method 250 | name := adapter.Name() 251 | if name != "slack" { 252 | t.Errorf("Expected name 'slack', got '%s'", name) 253 | } 254 | 255 | // GetLastSync method 256 | syncTime := adapter.GetLastSync() 257 | if !syncTime.IsZero() { 258 | t.Errorf("Expected zero time for GetLastSync, got %v", syncTime) 259 | } 260 | 261 | // SetLastSync method 262 | testTime := time.Now() 263 | adapter.SetLastSync(testTime) 264 | if !adapter.GetLastSync().Equal(testTime) { 265 | t.Errorf("Expected GetLastSync to return %v, got %v", testTime, adapter.GetLastSync()) 266 | } 267 | 268 | // FetchFiles method (this will fail with actual API call, but we can test the method exists) 269 | // We'll use a context with timeout to avoid hanging 270 | ctx, cancel := context.WithTimeout(ctx, 100*time.Millisecond) 271 | defer cancel() 272 | 273 | _, err = adapter.FetchFiles(ctx) 274 | // We expect an error due to timeout or invalid token, but the method should exist 275 | if err == nil { 276 | t.Log("FetchFiles completed without error (unexpected)") 277 | } 278 | } 279 | 280 | // Benchmark tests 281 | func BenchmarkSanitizeChannelName(b *testing.B) { 282 | testName := "#test-channel-with-special-chars!@#$%^&*()" 283 | for i := 0; i < b.N; i++ { 284 | sanitizeChannelName(testName) 285 | } 286 | } 287 | 288 | func BenchmarkSlackAdapter_Creation(b *testing.B) { 289 | tempDir := b.TempDir() 290 | 291 | config := config.SlackConfig{ 292 | Enabled: true, 293 | Token: "xoxb-test-token", 294 | ChannelMappings: []config.ChannelMapping{ 295 | { 296 | ChannelID: "C1234567890", 297 | ChannelName: "test-channel", 298 | KnowledgeID: "test-knowledge", 299 | }, 300 | }, 301 | } 302 | 303 | b.ResetTimer() 304 | for i := 0; i < b.N; i++ { 305 | adapter, err := NewSlackAdapter(config, tempDir) 306 | if err != nil { 307 | b.Errorf("Unexpected error: %v", err) 308 | } 309 | if adapter == nil { 310 | b.Errorf("Expected adapter but got nil") 311 | } 312 | } 313 | } 314 | -------------------------------------------------------------------------------- /internal/config/config_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | func TestLoad_DefaultConfig(t *testing.T) { 11 | // Test loading with non-existent file (should use defaults) 12 | cfg, err := Load("non-existent-config.yaml") 13 | if err != nil { 14 | t.Fatalf("Failed to load default config: %v", err) 15 | } 16 | 17 | // Check default values 18 | if cfg.LogLevel != "info" { 19 | t.Errorf("Expected log level 'info', got '%s'", cfg.LogLevel) 20 | } 21 | if cfg.Schedule.Interval != 1*time.Hour { 22 | t.Errorf("Expected schedule interval 1h, got %v", cfg.Schedule.Interval) 23 | } 24 | if cfg.Storage.Path != "/data" { 25 | t.Errorf("Expected storage path '/data', got '%s'", cfg.Storage.Path) 26 | } 27 | if cfg.OpenWebUI.BaseURL != "http://localhost:8080" { 28 | t.Errorf("Expected OpenWebUI base URL 'http://localhost:8080', got '%s'", cfg.OpenWebUI.BaseURL) 29 | } 30 | if cfg.GitHub.Enabled != false { 31 | t.Errorf("Expected GitHub enabled false, got %v", cfg.GitHub.Enabled) 32 | } 33 | } 34 | 35 | func TestLoad_FromFile(t *testing.T) { 36 | // Create temporary config file 37 | tempDir := t.TempDir() 38 | configPath := filepath.Join(tempDir, "config.yaml") 39 | 40 | configContent := ` 41 | log_level: debug 42 | schedule: 43 | interval: 2h 44 | storage: 45 | path: /custom/data 46 | openwebui: 47 | base_url: "https://custom.openwebui.com" 48 | api_key: "custom-api-key" 49 | github: 50 | enabled: true 51 | token: "custom-token" 52 | mappings: 53 | - repository: "owner/repo1" 54 | knowledge_id: "custom-knowledge-id" 55 | - repository: "owner/repo2" 56 | knowledge_id: "custom-knowledge-id" 57 | ` 58 | 59 | err := os.WriteFile(configPath, []byte(configContent), 0644) 60 | if err != nil { 61 | t.Fatalf("Failed to write config file: %v", err) 62 | } 63 | 64 | cfg, err := Load(configPath) 65 | if err != nil { 66 | t.Fatalf("Failed to load config from file: %v", err) 67 | } 68 | 69 | // Check loaded values 70 | if cfg.LogLevel != "debug" { 71 | t.Errorf("Expected log level 'debug', got '%s'", cfg.LogLevel) 72 | } 73 | if cfg.Schedule.Interval != 2*time.Hour { 74 | t.Errorf("Expected schedule interval 2h, got %v", cfg.Schedule.Interval) 75 | } 76 | if cfg.Storage.Path != "/custom/data" { 77 | t.Errorf("Expected storage path '/custom/data', got '%s'", cfg.Storage.Path) 78 | } 79 | if cfg.OpenWebUI.BaseURL != "https://custom.openwebui.com" { 80 | t.Errorf("Expected OpenWebUI base URL 'https://custom.openwebui.com', got '%s'", cfg.OpenWebUI.BaseURL) 81 | } 82 | if cfg.OpenWebUI.APIKey != "custom-api-key" { 83 | t.Errorf("Expected OpenWebUI API key 'custom-api-key', got '%s'", cfg.OpenWebUI.APIKey) 84 | } 85 | if cfg.GitHub.Enabled != true { 86 | t.Errorf("Expected GitHub enabled true, got %v", cfg.GitHub.Enabled) 87 | } 88 | if cfg.GitHub.Token != "custom-token" { 89 | t.Errorf("Expected GitHub token 'custom-token', got '%s'", cfg.GitHub.Token) 90 | } 91 | if len(cfg.GitHub.Mappings) != 2 { 92 | t.Errorf("Expected 2 repository mappings, got %d", len(cfg.GitHub.Mappings)) 93 | } 94 | if cfg.GitHub.Mappings[0].Repository != "owner/repo1" { 95 | t.Errorf("Expected first repository 'owner/repo1', got '%s'", cfg.GitHub.Mappings[0].Repository) 96 | } 97 | if cfg.GitHub.Mappings[0].KnowledgeID != "custom-knowledge-id" { 98 | t.Errorf("Expected first knowledge ID 'custom-knowledge-id', got '%s'", cfg.GitHub.Mappings[0].KnowledgeID) 99 | } 100 | } 101 | 102 | func TestLoad_EnvironmentOverride(t *testing.T) { 103 | // Set environment variables 104 | os.Setenv("OPENWEBUI_BASE_URL", "https://env.openwebui.com") 105 | os.Setenv("OPENWEBUI_API_KEY", "env-api-key") 106 | os.Setenv("GITHUB_TOKEN", "env-github-token") 107 | os.Setenv("GITHUB_KNOWLEDGE_ID", "env-knowledge-id") 108 | os.Setenv("STORAGE_PATH", "/env/storage") 109 | defer func() { 110 | os.Unsetenv("OPENWEBUI_BASE_URL") 111 | os.Unsetenv("OPENWEBUI_API_KEY") 112 | os.Unsetenv("GITHUB_TOKEN") 113 | os.Unsetenv("GITHUB_KNOWLEDGE_ID") 114 | os.Unsetenv("STORAGE_PATH") 115 | }() 116 | 117 | cfg, err := Load("non-existent-config.yaml") 118 | if err != nil { 119 | t.Fatalf("Failed to load config: %v", err) 120 | } 121 | 122 | // Check environment overrides 123 | if cfg.OpenWebUI.BaseURL != "https://env.openwebui.com" { 124 | t.Errorf("Expected OpenWebUI base URL 'https://env.openwebui.com', got '%s'", cfg.OpenWebUI.BaseURL) 125 | } 126 | if cfg.OpenWebUI.APIKey != "env-api-key" { 127 | t.Errorf("Expected OpenWebUI API key 'env-api-key', got '%s'", cfg.OpenWebUI.APIKey) 128 | } 129 | if cfg.GitHub.Token != "env-github-token" { 130 | t.Errorf("Expected GitHub token 'env-github-token', got '%s'", cfg.GitHub.Token) 131 | } 132 | // Note: GitHub knowledge ID is now handled via mappings, not environment variables 133 | if cfg.Storage.Path != "/env/storage" { 134 | t.Errorf("Expected storage path '/env/storage', got '%s'", cfg.Storage.Path) 135 | } 136 | } 137 | 138 | func TestLoad_InvalidYAML(t *testing.T) { 139 | // Create temporary config file with invalid YAML 140 | tempDir := t.TempDir() 141 | configPath := filepath.Join(tempDir, "invalid-config.yaml") 142 | 143 | invalidYAML := ` 144 | log_level: debug 145 | schedule: 146 | interval: 2h 147 | invalid: [unclosed list 148 | ` 149 | 150 | err := os.WriteFile(configPath, []byte(invalidYAML), 0644) 151 | if err != nil { 152 | t.Fatalf("Failed to write invalid config file: %v", err) 153 | } 154 | 155 | _, err = Load(configPath) 156 | if err == nil { 157 | t.Errorf("Expected error for invalid YAML, got none") 158 | } 159 | } 160 | 161 | func TestLoad_FileAndEnvironment(t *testing.T) { 162 | // Create temporary config file 163 | tempDir := t.TempDir() 164 | configPath := filepath.Join(tempDir, "config.yaml") 165 | 166 | configContent := ` 167 | log_level: debug 168 | openwebui: 169 | base_url: "https://file.openwebui.com" 170 | api_key: "file-api-key" 171 | github: 172 | token: "file-token" 173 | ` 174 | 175 | err := os.WriteFile(configPath, []byte(configContent), 0644) 176 | if err != nil { 177 | t.Fatalf("Failed to write config file: %v", err) 178 | } 179 | 180 | // Set environment variables (should override file values) 181 | os.Setenv("OPENWEBUI_BASE_URL", "https://env.openwebui.com") 182 | os.Setenv("GITHUB_TOKEN", "env-token") 183 | defer func() { 184 | os.Unsetenv("OPENWEBUI_BASE_URL") 185 | os.Unsetenv("GITHUB_TOKEN") 186 | }() 187 | 188 | cfg, err := Load(configPath) 189 | if err != nil { 190 | t.Fatalf("Failed to load config: %v", err) 191 | } 192 | 193 | // Environment should override file values 194 | if cfg.OpenWebUI.BaseURL != "https://env.openwebui.com" { 195 | t.Errorf("Expected environment to override file value, got '%s'", cfg.OpenWebUI.BaseURL) 196 | } 197 | if cfg.GitHub.Token != "env-token" { 198 | t.Errorf("Expected environment to override file value, got '%s'", cfg.GitHub.Token) 199 | } 200 | 201 | // File values should be used where environment is not set 202 | if cfg.OpenWebUI.APIKey != "file-api-key" { 203 | t.Errorf("Expected file value to be used, got '%s'", cfg.OpenWebUI.APIKey) 204 | } 205 | } 206 | 207 | func TestGetEnv(t *testing.T) { 208 | // Test with existing environment variable 209 | os.Setenv("TEST_VAR", "test-value") 210 | defer os.Unsetenv("TEST_VAR") 211 | 212 | result := getEnv("TEST_VAR", "default") 213 | if result != "test-value" { 214 | t.Errorf("Expected 'test-value', got '%s'", result) 215 | } 216 | 217 | // Test with non-existing environment variable 218 | result = getEnv("NON_EXISTING_VAR", "default") 219 | if result != "default" { 220 | t.Errorf("Expected 'default', got '%s'", result) 221 | } 222 | 223 | // Test with empty environment variable 224 | os.Setenv("EMPTY_VAR", "") 225 | defer os.Unsetenv("EMPTY_VAR") 226 | 227 | result = getEnv("EMPTY_VAR", "default") 228 | if result != "default" { 229 | t.Errorf("Expected 'default' for empty env var, got '%s'", result) 230 | } 231 | } 232 | 233 | func TestConfig_StructFields(t *testing.T) { 234 | cfg := &Config{ 235 | LogLevel: "debug", 236 | Schedule: ScheduleConfig{ 237 | Interval: 2 * time.Hour, 238 | }, 239 | Storage: StorageConfig{ 240 | Path: "/test/path", 241 | }, 242 | OpenWebUI: OpenWebUIConfig{ 243 | BaseURL: "https://test.com", 244 | APIKey: "test-key", 245 | }, 246 | GitHub: GitHubConfig{ 247 | Enabled: true, 248 | Token: "github-token", 249 | Mappings: []RepositoryMapping{ 250 | {Repository: "owner/repo", KnowledgeID: "knowledge-id"}, 251 | }, 252 | }, 253 | } 254 | 255 | // Test that all fields can be set and accessed 256 | if cfg.LogLevel != "debug" { 257 | t.Errorf("LogLevel not set correctly") 258 | } 259 | if cfg.Schedule.Interval != 2*time.Hour { 260 | t.Errorf("Schedule.Interval not set correctly") 261 | } 262 | if cfg.Storage.Path != "/test/path" { 263 | t.Errorf("Storage.Path not set correctly") 264 | } 265 | if cfg.OpenWebUI.BaseURL != "https://test.com" { 266 | t.Errorf("OpenWebUI.BaseURL not set correctly") 267 | } 268 | if cfg.OpenWebUI.APIKey != "test-key" { 269 | t.Errorf("OpenWebUI.APIKey not set correctly") 270 | } 271 | if cfg.GitHub.Enabled != true { 272 | t.Errorf("GitHub.Enabled not set correctly") 273 | } 274 | if cfg.GitHub.Token != "github-token" { 275 | t.Errorf("GitHub.Token not set correctly") 276 | } 277 | if len(cfg.GitHub.Mappings) != 1 { 278 | t.Errorf("GitHub.Mappings not set correctly") 279 | } 280 | if cfg.GitHub.Mappings[0].Repository != "owner/repo" { 281 | t.Errorf("GitHub.Mappings[0].Repository not set correctly") 282 | } 283 | if cfg.GitHub.Mappings[0].KnowledgeID != "knowledge-id" { 284 | t.Errorf("GitHub.Mappings[0].KnowledgeID not set correctly") 285 | } 286 | } 287 | -------------------------------------------------------------------------------- /internal/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "time" 7 | 8 | "gopkg.in/yaml.v3" 9 | ) 10 | 11 | // Config represents the application configuration 12 | type Config struct { 13 | LogLevel string `yaml:"log_level"` 14 | Schedule ScheduleConfig `yaml:"schedule"` 15 | Storage StorageConfig `yaml:"storage"` 16 | OpenWebUI OpenWebUIConfig `yaml:"openwebui"` 17 | GitHub GitHubConfig `yaml:"github"` 18 | Confluence ConfluenceConfig `yaml:"confluence"` 19 | Jira JiraConfig `yaml:"jira"` 20 | LocalFolders LocalFolderConfig `yaml:"local_folders"` 21 | Slack SlackConfig `yaml:"slack"` 22 | } 23 | 24 | // ScheduleConfig defines the sync schedule 25 | type ScheduleConfig struct { 26 | Interval time.Duration `yaml:"interval"` 27 | } 28 | 29 | // StorageConfig defines local storage settings 30 | type StorageConfig struct { 31 | Path string `yaml:"path"` 32 | } 33 | 34 | // OpenWebUIConfig defines OpenWebUI API settings 35 | type OpenWebUIConfig struct { 36 | BaseURL string `yaml:"base_url"` 37 | APIKey string `yaml:"api_key"` 38 | } 39 | 40 | // RepositoryMapping defines a mapping between a GitHub repository and a knowledge base 41 | type RepositoryMapping struct { 42 | Repository string `yaml:"repository"` // Format: "owner/repo" 43 | KnowledgeID string `yaml:"knowledge_id"` 44 | } 45 | 46 | // SpaceMapping defines a mapping between a Confluence space and a knowledge base 47 | type SpaceMapping struct { 48 | SpaceKey string `yaml:"space_key"` 49 | KnowledgeID string `yaml:"knowledge_id"` 50 | } 51 | 52 | // ParentPageMapping defines a mapping between a Confluence parent page and a knowledge base 53 | type ParentPageMapping struct { 54 | ParentPageID string `yaml:"parent_page_id"` 55 | KnowledgeID string `yaml:"knowledge_id"` 56 | } 57 | 58 | // LocalFolderMapping defines a mapping between a local folder and a knowledge base 59 | type LocalFolderMapping struct { 60 | FolderPath string `yaml:"folder_path"` 61 | KnowledgeID string `yaml:"knowledge_id"` 62 | } 63 | 64 | // GitHubConfig defines GitHub adapter settings 65 | type GitHubConfig struct { 66 | Enabled bool `yaml:"enabled"` 67 | Token string `yaml:"token"` 68 | Mappings []RepositoryMapping `yaml:"mappings"` // Per-repository knowledge mappings 69 | } 70 | 71 | // ConfluenceConfig defines Confluence adapter settings 72 | type ConfluenceConfig struct { 73 | Enabled bool `yaml:"enabled"` 74 | BaseURL string `yaml:"base_url"` 75 | Username string `yaml:"username"` 76 | APIKey string `yaml:"api_key"` 77 | SpaceMappings []SpaceMapping `yaml:"space_mappings"` // Per-space knowledge mappings 78 | ParentPageMappings []ParentPageMapping `yaml:"parent_page_mappings"` // Per-parent-page knowledge mappings 79 | PageLimit int `yaml:"page_limit"` 80 | IncludeAttachments bool `yaml:"include_attachments"` 81 | UseMarkdownParser bool `yaml:"use_markdown_parser"` 82 | IncludeBlogPosts bool `yaml:"include_blog_posts"` 83 | AddAdditionalData bool `yaml:"add_additional_data"` 84 | } 85 | 86 | // LocalFolderConfig defines local folder adapter settings 87 | type LocalFolderConfig struct { 88 | Enabled bool `yaml:"enabled"` 89 | Mappings []LocalFolderMapping `yaml:"mappings"` // Per-folder knowledge mappings 90 | } 91 | 92 | // SlackConfig defines Slack adapter settings 93 | type SlackConfig struct { 94 | Enabled bool `yaml:"enabled"` 95 | Token string `yaml:"token"` 96 | ChannelMappings []ChannelMapping `yaml:"channel_mappings"` // Per-channel knowledge mappings 97 | RegexPatterns []RegexPattern `yaml:"regex_patterns"` // Regex patterns for auto-discovering channels 98 | DaysToFetch int `yaml:"days_to_fetch"` // Number of days to fetch messages 99 | MaintainHistory bool `yaml:"maintain_history"` // Whether to maintain indefinite history or age off 100 | MessageLimit int `yaml:"message_limit"` // Max messages per channel per run 101 | IncludeThreads bool `yaml:"include_threads"` // Whether to include thread messages 102 | IncludeReactions bool `yaml:"include_reactions"` // Whether to include reaction data 103 | } 104 | 105 | // ChannelMapping defines mapping between Slack channels and knowledge bases 106 | type ChannelMapping struct { 107 | ChannelID string `yaml:"channel_id"` // Slack channel ID 108 | ChannelName string `yaml:"channel_name"` // Slack channel name (for display) 109 | KnowledgeID string `yaml:"knowledge_id"` // Target knowledge base ID 110 | } 111 | 112 | // RegexPattern defines regex patterns for auto-discovering Slack channels 113 | type RegexPattern struct { 114 | Pattern string `yaml:"pattern"` // Regex pattern to match channel names 115 | KnowledgeID string `yaml:"knowledge_id"` // Target knowledge base ID for matching channels 116 | AutoJoin bool `yaml:"auto_join"` // Whether to automatically join matching channels 117 | } 118 | 119 | // JiraProjectMapping defines a mapping between a Jira project and a knowledge base 120 | type JiraProjectMapping struct { 121 | ProjectKey string `yaml:"project_key"` 122 | KnowledgeID string `yaml:"knowledge_id"` 123 | } 124 | 125 | // JiraConfig defines Jira adapter settings 126 | type JiraConfig struct { 127 | Enabled bool `yaml:"enabled"` 128 | BaseURL string `yaml:"base_url"` 129 | Username string `yaml:"username"` 130 | APIKey string `yaml:"api_key"` 131 | ProjectMappings []JiraProjectMapping `yaml:"project_mappings"` // Per-project knowledge mappings 132 | PageLimit int `yaml:"page_limit"` 133 | } 134 | 135 | // Load loads configuration from file and environment variables 136 | func Load(path string) (*Config, error) { 137 | fmt.Printf("Loading configuration from: %s\n", path) 138 | 139 | cfg := &Config{ 140 | LogLevel: "info", 141 | Schedule: ScheduleConfig{ 142 | Interval: 1 * time.Hour, 143 | }, 144 | Storage: StorageConfig{ 145 | Path: "/data", 146 | }, 147 | OpenWebUI: OpenWebUIConfig{ 148 | BaseURL: getEnv("OPENWEBUI_BASE_URL", "http://localhost:8080"), 149 | APIKey: getEnv("OPENWEBUI_API_KEY", ""), 150 | }, 151 | GitHub: GitHubConfig{ 152 | Enabled: false, 153 | Token: getEnv("GITHUB_TOKEN", ""), 154 | Mappings: []RepositoryMapping{}, 155 | }, 156 | Confluence: ConfluenceConfig{ 157 | Enabled: false, 158 | BaseURL: "", 159 | Username: "", 160 | APIKey: getEnv("CONFLUENCE_API_KEY", ""), 161 | SpaceMappings: []SpaceMapping{}, 162 | ParentPageMappings: []ParentPageMapping{}, 163 | PageLimit: 100, 164 | IncludeAttachments: true, 165 | UseMarkdownParser: false, 166 | IncludeBlogPosts: false, 167 | }, 168 | Jira: JiraConfig{ 169 | Enabled: false, 170 | BaseURL: "", 171 | Username: "", 172 | APIKey: getEnv("JIRA_API_KEY", ""), 173 | ProjectMappings: []JiraProjectMapping{}, 174 | }, 175 | LocalFolders: LocalFolderConfig{ 176 | Enabled: false, 177 | Mappings: []LocalFolderMapping{}, 178 | }, 179 | Slack: SlackConfig{ 180 | Enabled: false, 181 | Token: getEnv("SLACK_TOKEN", ""), 182 | ChannelMappings: []ChannelMapping{}, 183 | DaysToFetch: 30, 184 | MaintainHistory: false, 185 | MessageLimit: 1000, 186 | IncludeThreads: true, 187 | IncludeReactions: false, 188 | }, 189 | } 190 | 191 | fmt.Printf("Default OpenWebUI BaseURL: %s\n", cfg.OpenWebUI.BaseURL) 192 | fmt.Printf("Confluence API Key loaded: %s\n", func() string { 193 | if cfg.Confluence.APIKey != "" { 194 | return "***" + cfg.Confluence.APIKey[len(cfg.Confluence.APIKey)-4:] // Show last 4 chars 195 | } 196 | return "NOT SET" 197 | }()) 198 | 199 | // Load from file if it exists 200 | if _, err := os.Stat(path); err == nil { 201 | fmt.Printf("Config file exists, loading from: %s\n", path) 202 | data, err := os.ReadFile(path) 203 | if err != nil { 204 | return nil, fmt.Errorf("failed to read config file: %w", err) 205 | } 206 | 207 | // fmt.Printf("Config file content:\n%s\n", string(data)) 208 | 209 | if err := yaml.Unmarshal(data, cfg); err != nil { 210 | return nil, fmt.Errorf("failed to parse config file: %w", err) 211 | } 212 | 213 | fmt.Printf("After loading config file - OpenWebUI BaseURL: %s\n", cfg.OpenWebUI.BaseURL) 214 | } else { 215 | fmt.Printf("Config file does not exist at: %s (error: %v)\n", path, err) 216 | } 217 | 218 | // Override with environment variables 219 | cfg.OpenWebUI.BaseURL = getEnv("OPENWEBUI_BASE_URL", cfg.OpenWebUI.BaseURL) 220 | cfg.OpenWebUI.APIKey = getEnv("OPENWEBUI_API_KEY", cfg.OpenWebUI.APIKey) 221 | cfg.GitHub.Token = getEnv("GITHUB_TOKEN", cfg.GitHub.Token) 222 | cfg.Confluence.APIKey = getEnv("CONFLUENCE_API_KEY", cfg.Confluence.APIKey) 223 | cfg.Jira.APIKey = getEnv("CONFLUENCE_API_KEY", cfg.Jira.APIKey) 224 | cfg.Storage.Path = getEnv("STORAGE_PATH", cfg.Storage.Path) 225 | 226 | fmt.Printf("Final OpenWebUI BaseURL: %s\n", cfg.OpenWebUI.BaseURL) 227 | fmt.Printf("Environment OPENWEBUI_BASE_URL: %s\n", os.Getenv("OPENWEBUI_BASE_URL")) 228 | fmt.Printf("Final Confluence API Key: %s\n", func() string { 229 | if cfg.Confluence.APIKey != "" { 230 | return "***" + cfg.Confluence.APIKey[len(cfg.Confluence.APIKey)-4:] // Show last 4 chars 231 | } 232 | return "NOT SET" 233 | }()) 234 | fmt.Printf("Environment CONFLUENCE_API_KEY: %s\n", func() string { 235 | env := os.Getenv("CONFLUENCE_API_KEY") 236 | if env != "" { 237 | return "***" + env[len(env)-4:] // Show last 4 chars 238 | } 239 | return "NOT SET" 240 | }()) 241 | 242 | return cfg, nil 243 | } 244 | 245 | func getEnv(key, defaultValue string) string { 246 | if value := os.Getenv(key); value != "" { 247 | return value 248 | } 249 | return defaultValue 250 | } 251 | -------------------------------------------------------------------------------- /internal/openwebui/client_test.go: -------------------------------------------------------------------------------- 1 | package openwebui 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "net/http" 7 | "net/http/httptest" 8 | "strings" 9 | "testing" 10 | "time" 11 | ) 12 | 13 | func TestNewClient(t *testing.T) { 14 | client := NewClient("http://localhost:8080", "test-api-key") 15 | if client == nil { 16 | t.Fatal("Expected client to be created") 17 | } 18 | if client.baseURL != "http://localhost:8080" { 19 | t.Errorf("Expected baseURL 'http://localhost:8080', got '%s'", client.baseURL) 20 | } 21 | if client.apiKey != "test-api-key" { 22 | t.Errorf("Expected apiKey 'test-api-key', got '%s'", client.apiKey) 23 | } 24 | } 25 | 26 | func TestClient_UploadFile(t *testing.T) { 27 | tests := []struct { 28 | name string 29 | filename string 30 | content []byte 31 | serverResponse File 32 | serverStatus int 33 | expectError bool 34 | }{ 35 | { 36 | name: "successful upload", 37 | filename: "test.md", 38 | content: []byte("# Test"), 39 | serverResponse: File{ 40 | ID: "file-123", 41 | Filename: "test.md", 42 | UserID: "user-123", 43 | Hash: "hash-123", 44 | Data: struct { 45 | Status string `json:"status"` 46 | }{ 47 | Status: "pending", 48 | }, 49 | Meta: struct { 50 | Name string `json:"name"` 51 | ContentType string `json:"content_type"` 52 | Size int64 `json:"size"` 53 | Data map[string]interface{} `json:"data"` 54 | }{ 55 | Name: "test.md", 56 | ContentType: "text/markdown", 57 | Size: 6, 58 | Data: map[string]interface{}{}, 59 | }, 60 | CreatedAt: time.Now().Unix(), 61 | UpdatedAt: time.Now().Unix(), 62 | Status: true, 63 | Path: "/app/backend/data/uploads/file-123_test.md", 64 | AccessControl: nil, 65 | }, 66 | serverStatus: http.StatusOK, 67 | expectError: false, 68 | }, 69 | { 70 | name: "server error", 71 | filename: "test.md", 72 | content: []byte("# Test"), 73 | serverStatus: http.StatusInternalServerError, 74 | expectError: true, 75 | }, 76 | } 77 | 78 | for _, tt := range tests { 79 | t.Run(tt.name, func(t *testing.T) { 80 | requestCount := 0 81 | server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 82 | requestCount++ 83 | t.Logf("Request %d: %s %s", requestCount, r.Method, r.URL.Path) 84 | 85 | // Handle POST requests for file uploads 86 | if r.Method == "POST" && strings.Contains(r.URL.Path, "/api/v1/files/") { 87 | if r.Header.Get("Authorization") != "Bearer test-api-key" { 88 | t.Errorf("Expected Authorization header, got %s", r.Header.Get("Authorization")) 89 | } 90 | w.WriteHeader(tt.serverStatus) 91 | if tt.serverStatus == http.StatusOK { 92 | json.NewEncoder(w).Encode(tt.serverResponse) 93 | } else { 94 | w.Write([]byte("Server Error")) 95 | } 96 | } else if r.Method == "GET" && strings.Contains(r.URL.Path, "/api/v1/files/") { 97 | // Handle GET requests for file polling (file processing status) 98 | if r.Header.Get("Authorization") != "Bearer test-api-key" { 99 | t.Errorf("Expected Authorization header, got %s", r.Header.Get("Authorization")) 100 | } 101 | 102 | // Extract file ID from path 103 | pathParts := strings.Split(r.URL.Path, "/") 104 | fileID := pathParts[len(pathParts)-1] 105 | 106 | // Return file with "processed" status to complete polling quickly 107 | fileResponse := map[string]interface{}{ 108 | "id": fileID, 109 | "filename": "test-file.md", 110 | "user_id": "test-user", 111 | "hash": "test-hash", 112 | "data": map[string]interface{}{ 113 | "status": "processed", 114 | }, 115 | "meta": map[string]interface{}{ 116 | "name": "test-file.md", 117 | "content_type": "text/markdown", 118 | "size": 100, 119 | "data": map[string]interface{}{}, 120 | }, 121 | "status": true, 122 | } 123 | 124 | w.WriteHeader(http.StatusOK) 125 | json.NewEncoder(w).Encode(fileResponse) 126 | } else { 127 | // Handle other requests gracefully 128 | w.WriteHeader(http.StatusOK) 129 | w.Write([]byte("OK")) 130 | } 131 | })) 132 | defer server.Close() 133 | 134 | client := NewClient(server.URL, "test-api-key") 135 | ctx := context.Background() 136 | 137 | result, err := client.UploadFile(ctx, tt.filename, tt.content) 138 | 139 | if tt.expectError { 140 | if err == nil { 141 | t.Errorf("Expected error but got none") 142 | } 143 | return 144 | } 145 | 146 | if err != nil { 147 | t.Errorf("Unexpected error: %v", err) 148 | return 149 | } 150 | 151 | if result.ID != tt.serverResponse.ID { 152 | t.Errorf("Expected ID %s, got %s", tt.serverResponse.ID, result.ID) 153 | } 154 | if result.Filename != tt.serverResponse.Filename { 155 | t.Errorf("Expected Filename %s, got %s", tt.serverResponse.Filename, result.Filename) 156 | } 157 | }) 158 | } 159 | } 160 | 161 | func TestClient_ListKnowledge(t *testing.T) { 162 | expectedKnowledge := []*Knowledge{ 163 | { 164 | ID: "knowledge-123", 165 | UserID: "user-123", 166 | Name: "Test Knowledge", 167 | Description: "Test Description", 168 | Data: nil, 169 | Meta: nil, 170 | AccessControl: map[string]interface{}{}, 171 | CreatedAt: time.Now().Unix(), 172 | UpdatedAt: time.Now().Unix(), 173 | }, 174 | } 175 | 176 | server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 177 | if r.Method != "GET" { 178 | t.Errorf("Expected GET method, got %s", r.Method) 179 | } 180 | if !strings.Contains(r.URL.Path, "/api/v1/knowledge/") { 181 | t.Errorf("Expected path to contain /api/v1/knowledge/, got %s", r.URL.Path) 182 | } 183 | if r.Header.Get("Authorization") != "Bearer test-api-key" { 184 | t.Errorf("Expected Authorization header, got %s", r.Header.Get("Authorization")) 185 | } 186 | 187 | w.WriteHeader(http.StatusOK) 188 | json.NewEncoder(w).Encode(expectedKnowledge) 189 | })) 190 | defer server.Close() 191 | 192 | client := NewClient(server.URL, "test-api-key") 193 | ctx := context.Background() 194 | 195 | result, err := client.ListKnowledge(ctx) 196 | if err != nil { 197 | t.Fatalf("Unexpected error: %v", err) 198 | } 199 | 200 | if len(result) != len(expectedKnowledge) { 201 | t.Fatalf("Expected %d knowledge items, got %d", len(expectedKnowledge), len(result)) 202 | } 203 | 204 | if result[0].ID != expectedKnowledge[0].ID { 205 | t.Errorf("Expected ID %s, got %s", expectedKnowledge[0].ID, result[0].ID) 206 | } 207 | } 208 | 209 | func TestClient_AddFileToKnowledge(t *testing.T) { 210 | tests := []struct { 211 | name string 212 | knowledgeID string 213 | fileID string 214 | serverStatus int 215 | expectError bool 216 | }{ 217 | { 218 | name: "successful add", 219 | knowledgeID: "knowledge-123", 220 | fileID: "file-123", 221 | serverStatus: http.StatusOK, 222 | expectError: false, 223 | }, 224 | { 225 | name: "server error", 226 | knowledgeID: "knowledge-123", 227 | fileID: "file-123", 228 | serverStatus: http.StatusInternalServerError, 229 | expectError: true, 230 | }, 231 | } 232 | 233 | for _, tt := range tests { 234 | t.Run(tt.name, func(t *testing.T) { 235 | server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 236 | if r.Method != "POST" { 237 | t.Errorf("Expected POST method, got %s", r.Method) 238 | } 239 | expectedPath := "/api/v1/knowledge/" + tt.knowledgeID + "/file/add" 240 | if !strings.Contains(r.URL.Path, expectedPath) { 241 | t.Errorf("Expected path to contain %s, got %s", expectedPath, r.URL.Path) 242 | } 243 | if r.Header.Get("Authorization") != "Bearer test-api-key" { 244 | t.Errorf("Expected Authorization header, got %s", r.Header.Get("Authorization")) 245 | } 246 | 247 | // Check request body 248 | var requestBody map[string]string 249 | if err := json.NewDecoder(r.Body).Decode(&requestBody); err != nil { 250 | t.Errorf("Failed to decode request body: %v", err) 251 | } 252 | if requestBody["file_id"] != tt.fileID { 253 | t.Errorf("Expected file_id %s, got %s", tt.fileID, requestBody["file_id"]) 254 | } 255 | 256 | w.WriteHeader(tt.serverStatus) 257 | })) 258 | defer server.Close() 259 | 260 | client := NewClient(server.URL, "test-api-key") 261 | ctx := context.Background() 262 | 263 | err := client.AddFileToKnowledge(ctx, tt.knowledgeID, tt.fileID) 264 | 265 | if tt.expectError { 266 | if err == nil { 267 | t.Errorf("Expected error but got none") 268 | } 269 | return 270 | } 271 | 272 | if err != nil { 273 | t.Errorf("Unexpected error: %v", err) 274 | } 275 | }) 276 | } 277 | } 278 | 279 | func TestClient_RemoveFileFromKnowledge(t *testing.T) { 280 | tests := []struct { 281 | name string 282 | knowledgeID string 283 | fileID string 284 | serverStatus int 285 | expectError bool 286 | }{ 287 | { 288 | name: "successful remove", 289 | knowledgeID: "knowledge-123", 290 | fileID: "file-123", 291 | serverStatus: http.StatusOK, 292 | expectError: false, 293 | }, 294 | { 295 | name: "server error", 296 | knowledgeID: "knowledge-123", 297 | fileID: "file-123", 298 | serverStatus: http.StatusInternalServerError, 299 | expectError: true, 300 | }, 301 | } 302 | 303 | for _, tt := range tests { 304 | t.Run(tt.name, func(t *testing.T) { 305 | server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 306 | if r.Method != "POST" { 307 | t.Errorf("Expected POST method, got %s", r.Method) 308 | } 309 | expectedPath := "/api/v1/knowledge/" + tt.knowledgeID + "/file/remove" 310 | if !strings.Contains(r.URL.Path, expectedPath) { 311 | t.Errorf("Expected path to contain %s, got %s", expectedPath, r.URL.Path) 312 | } 313 | if r.Header.Get("Authorization") != "Bearer test-api-key" { 314 | t.Errorf("Expected Authorization header, got %s", r.Header.Get("Authorization")) 315 | } 316 | 317 | // Check request body 318 | var requestBody map[string]string 319 | if err := json.NewDecoder(r.Body).Decode(&requestBody); err != nil { 320 | t.Errorf("Failed to decode request body: %v", err) 321 | } 322 | if requestBody["file_id"] != tt.fileID { 323 | t.Errorf("Expected file_id %s, got %s", tt.fileID, requestBody["file_id"]) 324 | } 325 | 326 | w.WriteHeader(tt.serverStatus) 327 | })) 328 | defer server.Close() 329 | 330 | client := NewClient(server.URL, "test-api-key") 331 | ctx := context.Background() 332 | 333 | err := client.RemoveFileFromKnowledge(ctx, tt.knowledgeID, tt.fileID) 334 | 335 | if tt.expectError { 336 | if err == nil { 337 | t.Errorf("Expected error but got none") 338 | } 339 | return 340 | } 341 | 342 | if err != nil { 343 | t.Errorf("Unexpected error: %v", err) 344 | } 345 | }) 346 | } 347 | } 348 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /adapter_readme/SLACK_ADAPTER.md: -------------------------------------------------------------------------------- 1 | # Slack Adapter 2 | 3 | The Slack adapter allows you to sync messages from Slack channels into OpenWebUI knowledge bases. This enables you to search and reference Slack conversations, decisions, and discussions within your OpenWebUI interface. 4 | 5 | ## Features 6 | 7 | - **Multi-channel support**: Sync from multiple Slack channels 8 | - **Knowledge base mapping**: Map each channel to a specific OpenWebUI knowledge base 9 | - **Regex pattern discovery**: Automatically discover and sync channels matching regex patterns 10 | - **Auto-join functionality**: Automatically join channels that match configured patterns 11 | - **Thread support**: Include or exclude thread messages 12 | - **Reaction data**: Optionally include emoji reactions 13 | - **Message filtering**: Control the number of messages and time range 14 | - **History management**: Choose between maintaining indefinite history or aging off old messages 15 | - **Incremental sync**: Only fetches new messages since the last sync 16 | - **Channel caching**: Improved performance with intelligent channel caching 17 | - **Retry logic**: Robust error handling with exponential backoff 18 | - **Join error logging**: Detailed logging of channel join failures 19 | 20 | ## Configuration 21 | 22 | ### Environment Variables 23 | 24 | Set the following environment variable: 25 | 26 | ```bash 27 | export SLACK_TOKEN="xoxb-your-slack-bot-token" 28 | ``` 29 | 30 | ### Configuration File 31 | 32 | Add the following section to your `config.yaml`: 33 | 34 | ```yaml 35 | slack: 36 | enabled: true 37 | token: "" # Set via SLACK_TOKEN environment variable 38 | channel_mappings: 39 | - channel_id: "C1234567890" # Slack channel ID 40 | channel_name: "general" # Channel name for display 41 | knowledge_id: "general-knowledge-base" 42 | - channel_id: "C0987654321" 43 | channel_name: "dev-team" 44 | knowledge_id: "dev-knowledge-base" 45 | - channel_id: "C1122334455" 46 | channel_name: "support" 47 | knowledge_id: "support-knowledge-base" 48 | regex_patterns: 49 | - pattern: "sales-.*-internal.*" # Matches channels like sales-team-internal 50 | knowledge_id: "sales-knowledge-base" 51 | auto_join: true # Automatically join matching channels 52 | - pattern: "dev-.*" # Matches channels like dev-frontend, dev-backend 53 | knowledge_id: "dev-knowledge-base" 54 | auto_join: false # Don't auto-join, just sync if already a member 55 | - pattern: "support-.*" # Matches channels like support-tier1, support-tier2 56 | knowledge_id: "support-knowledge-base" 57 | auto_join: true 58 | days_to_fetch: 30 # Number of days to fetch messages (default: 30) 59 | maintain_history: false # Whether to maintain indefinite history or age off (default: false) 60 | message_limit: 1000 # Max messages per channel per run (default: 1000) 61 | include_threads: true # Whether to include thread messages (default: true) 62 | include_reactions: false # Whether to include reaction data (default: false) 63 | ``` 64 | 65 | ### Configuration Options 66 | 67 | | Option | Type | Required | Default | Description | 68 | |--------|------|----------|---------|-------------| 69 | | `enabled` | boolean | Yes | `false` | Enable/disable the Slack adapter | 70 | | `token` | string | Yes | - | Slack bot token (set via `SLACK_TOKEN` env var) | 71 | | `channel_mappings` | array | No | `[]` | List of explicit channel mappings | 72 | | `regex_patterns` | array | No | `[]` | List of regex patterns for auto-discovering channels | 73 | | `days_to_fetch` | integer | No | `30` | Number of days to fetch messages | 74 | | `maintain_history` | boolean | No | `false` | Whether to maintain indefinite history or age off | 75 | | `message_limit` | integer | No | `1000` | Max messages per channel per run | 76 | | `include_threads` | boolean | No | `true` | Whether to include thread messages | 77 | | `include_reactions` | boolean | No | `false` | Whether to include reaction data | 78 | 79 | ### Channel Mapping 80 | 81 | Each mapping in the `channel_mappings` array should contain: 82 | 83 | | Field | Type | Required | Description | 84 | |-------|------|----------|-------------| 85 | | `channel_id` | string | Yes | Slack channel ID (starts with 'C') | 86 | | `channel_name` | string | Yes | Channel name for display purposes | 87 | | `knowledge_id` | string | Yes | Target OpenWebUI knowledge base ID | 88 | 89 | ### Regex Pattern Discovery 90 | 91 | The `regex_patterns` feature allows you to automatically discover and sync channels that match specific patterns. This is useful for: 92 | 93 | - **Dynamic channel discovery**: Automatically find new channels that match your naming conventions 94 | - **Bulk channel management**: Sync multiple similar channels without manual configuration 95 | - **Auto-joining**: Automatically join channels that match patterns 96 | 97 | Each pattern in the `regex_patterns` array should contain: 98 | 99 | | Field | Type | Required | Description | 100 | |-------|------|----------|-------------| 101 | | `pattern` | string | Yes | Regex pattern to match channel names | 102 | | `knowledge_id` | string | Yes | Target OpenWebUI knowledge base ID for matching channels | 103 | | `auto_join` | boolean | No | Whether to automatically join matching channels (default: `false`) | 104 | 105 | #### Regex Pattern Examples 106 | 107 | ```yaml 108 | regex_patterns: 109 | # Match all sales internal channels 110 | - pattern: "sales-.*-internal.*" 111 | knowledge_id: "sales-knowledge-base" 112 | auto_join: true 113 | 114 | # Match all development channels 115 | - pattern: "dev-.*" 116 | knowledge_id: "dev-knowledge-base" 117 | auto_join: false 118 | 119 | # Match support channels 120 | - pattern: "support-.*" 121 | knowledge_id: "support-knowledge-base" 122 | auto_join: true 123 | 124 | # Match project-specific channels 125 | - pattern: "project-[a-zA-Z0-9]+-.*" 126 | knowledge_id: "project-knowledge-base" 127 | auto_join: true 128 | ``` 129 | 130 | #### How Regex Discovery Works 131 | 132 | 1. **Channel Discovery**: The adapter fetches all channels the bot can access 133 | 2. **Pattern Matching**: Each channel name is tested against configured regex patterns 134 | 3. **Auto-joining**: If `auto_join: true`, the bot attempts to join matching channels 135 | 4. **Sync Setup**: Matching channels are added to the sync list with the specified knowledge ID 136 | 5. **Caching**: Channel lists are cached to improve performance and reduce API calls 137 | 138 | #### Important Notes 139 | 140 | - **Channel Access**: The bot can only discover channels it has access to 141 | - **Auto-join Limitations**: Some channels may not allow bots to join (e.g., private channels requiring invitation) 142 | - **Performance**: Regex discovery happens once per sync session and results are cached 143 | - **Error Handling**: Failed joins are logged to `data/slack/join_errors.log` for troubleshooting 144 | 145 | ## Slack Bot Setup 146 | 147 | ### 1. Create a Slack App 148 | 149 | 1. Go to [api.slack.com/apps](https://api.slack.com/apps) 150 | 2. Click "Create New App" 151 | 3. Choose "From scratch" 152 | 4. Enter app name and select your workspace 153 | 154 | ### 2. Configure Bot Permissions 155 | 156 | In your app settings, go to "OAuth & Permissions" and add these scopes: 157 | 158 | **Bot Token Scopes:** 159 | - `channels:history` - View messages in public channels 160 | - `channels:read` - View basic information about public channels 161 | - `groups:history` - View messages in private channels 162 | - `groups:read` - View basic information about private channels 163 | - `im:history` - View messages in direct messages 164 | - `im:read` - View basic information about direct messages 165 | - `mpim:history` - View messages in group direct messages 166 | - `mpim:read` - View basic information about group direct messages 167 | - `reactions:read` - View emoji reactions (if including reactions) 168 | 169 | ### 3. Install the App 170 | 171 | 1. Go to "Install App" in your app settings 172 | 2. Click "Install to Workspace" 173 | 3. Review permissions and click "Allow" 174 | 4. Copy the "Bot User OAuth Token" (starts with `xoxb-`) 175 | 176 | ### 4. Get Channel IDs 177 | 178 | To find channel IDs: 179 | 180 | 1. Open Slack in your browser 181 | 2. Navigate to the channel 182 | 3. Look at the URL: `https://yourworkspace.slack.com/messages/C1234567890` 183 | 4. The channel ID is the part after `/messages/` 184 | 185 | Or use the Slack API: 186 | ```bash 187 | curl -H "Authorization: Bearer xoxb-your-token" \ 188 | "https://slack.com/api/conversations.list" 189 | ``` 190 | 191 | ## Message Processing 192 | 193 | ### Message Format 194 | 195 | Messages are processed and stored in markdown format: 196 | 197 | ```markdown 198 | # Channel: #general 199 | **User:** @john.doe 200 | **Timestamp:** 2024-01-15 10:30:00 201 | **Message:** 202 | This is the message content. 203 | 204 | **Thread Reply:** 205 | - @jane.smith: This is a thread reply 206 | - @bob.wilson: Another thread reply 207 | 208 | **Reactions:** :thumbsup: :heart: :laughing: 209 | ``` 210 | 211 | ### Message Types 212 | 213 | The adapter processes: 214 | - **Regular messages**: Text messages from users 215 | - **Thread messages**: Replies to messages (if enabled) 216 | - **File attachments**: File names and descriptions 217 | - **Reactions**: Emoji reactions (if enabled) 218 | - **System messages**: Channel join/leave notifications 219 | 220 | ### Excluded Content 221 | 222 | The adapter automatically excludes: 223 | - Messages from bots (unless specifically configured) 224 | - Deleted messages 225 | - Messages older than the configured `days_to_fetch` 226 | - Messages exceeding the `message_limit` 227 | 228 | ## Sync Behavior 229 | 230 | ### Initial Sync 231 | 232 | - Fetches messages from the last `days_to_fetch` days 233 | - Respects the `message_limit` per channel 234 | - Processes all configured channels 235 | 236 | ### Incremental Sync 237 | 238 | - Only fetches new messages since the last sync 239 | - Maintains sync state per channel 240 | - Handles rate limiting automatically 241 | 242 | ### History Management 243 | 244 | Two modes are available: 245 | 246 | 1. **Age-off mode** (`maintain_history: false`): 247 | - Only keeps messages from the last `days_to_fetch` days 248 | - Older messages are automatically removed 249 | - Reduces storage usage 250 | 251 | 2. **Indefinite history** (`maintain_history: true`): 252 | - Keeps all messages indefinitely 253 | - Only fetches new messages on subsequent syncs 254 | - Requires more storage but preserves all history 255 | 256 | ## Use Cases 257 | 258 | ### Team Knowledge Base 259 | 260 | Sync important team discussions: 261 | 262 | ```yaml 263 | slack: 264 | enabled: true 265 | channel_mappings: 266 | - channel_id: "C1234567890" 267 | channel_name: "general" 268 | knowledge_id: "team-general" 269 | - channel_id: "C0987654321" 270 | channel_name: "dev-team" 271 | knowledge_id: "dev-discussions" 272 | days_to_fetch: 90 273 | maintain_history: true 274 | include_threads: true 275 | ``` 276 | 277 | ### Support Documentation 278 | 279 | Sync support channel discussions: 280 | 281 | ```yaml 282 | slack: 283 | enabled: true 284 | channel_mappings: 285 | - channel_id: "C1122334455" 286 | channel_name: "support" 287 | knowledge_id: "support-knowledge" 288 | days_to_fetch: 30 289 | maintain_history: false 290 | include_threads: true 291 | include_reactions: true 292 | ``` 293 | 294 | ### Project Discussions 295 | 296 | Sync project-specific channels: 297 | 298 | ```yaml 299 | slack: 300 | enabled: true 301 | channel_mappings: 302 | - channel_id: "C5555666677" 303 | channel_name: "project-alpha" 304 | knowledge_id: "project-alpha-docs" 305 | - channel_id: "C8888999900" 306 | channel_name: "project-beta" 307 | knowledge_id: "project-beta-docs" 308 | days_to_fetch: 60 309 | maintain_history: true 310 | ``` 311 | 312 | ## Troubleshooting 313 | 314 | ### Common Issues 315 | 316 | 1. **Authentication errors** 317 | - Verify your Slack token is valid and starts with `xoxb-` 318 | - Check that the bot has been installed to your workspace 319 | - Ensure the token hasn't expired 320 | 321 | 2. **Channel access denied** 322 | - Verify the bot has been added to the channels you want to sync 323 | - Check that the bot has the required permissions 324 | - Ensure channel IDs are correct 325 | 326 | 3. **No messages synced** 327 | - Check that channels have messages within the `days_to_fetch` range 328 | - Verify the `message_limit` isn't too restrictive 329 | - Ensure channels aren't empty or archived 330 | 331 | 4. **Rate limit exceeded** 332 | - The adapter automatically handles rate limits with exponential backoff 333 | - Consider reducing sync frequency if this occurs frequently 334 | - Check if you're hitting Slack's API rate limits 335 | 336 | 5. **Channel join failures** 337 | - Check `data/slack/join_errors.log` for detailed join failure information 338 | - Common issues: archived channels, permission restrictions, private channel access 339 | - Verify bot permissions and channel settings 340 | 341 | ### Debug Logging 342 | 343 | Enable debug logging to see detailed sync information: 344 | 345 | ```yaml 346 | log_level: debug 347 | ``` 348 | 349 | This will show: 350 | - Which channels are being processed 351 | - Message fetching progress 352 | - API request/response details 353 | - Sync timing and statistics 354 | - Channel discovery and regex matching 355 | - Join attempts and results 356 | 357 | ### Error Logging 358 | 359 | The adapter provides detailed error logging for troubleshooting: 360 | 361 | - **Join Errors**: `data/slack/join_errors.log` - Detailed log of channel join failures 362 | - **Channel Tracking**: `data/slack/channels/channel_tracking.txt` - Overview of all discovered channels and their status 363 | - **Debug Logs**: Console output with detailed processing information 364 | 365 | ## Security Considerations 366 | 367 | - **Token security**: Store your Slack token securely and never commit it to version control 368 | - **Channel access**: Only sync channels that contain appropriate content 369 | - **Content filtering**: Review the content being synced to ensure it's appropriate 370 | - **Privacy**: Be mindful of private channels and sensitive information 371 | 372 | ## Performance Tips 373 | 374 | - **Message limits**: Set appropriate `message_limit` values to balance completeness with performance 375 | - **Days to fetch**: Adjust `days_to_fetch` based on your needs 376 | - **Thread inclusion**: Disable `include_threads` if you don't need thread context 377 | - **Reaction inclusion**: Disable `include_reactions` to reduce data volume 378 | 379 | ## Example Configuration 380 | 381 | ```yaml 382 | # Complete example configuration 383 | log_level: info 384 | schedule: 385 | interval: 1h 386 | 387 | storage: 388 | path: "/data" 389 | 390 | openwebui: 391 | base_url: "http://localhost:8080" 392 | api_key: "your-openwebui-api-key" 393 | 394 | slack: 395 | enabled: true 396 | token: "" # Set via SLACK_TOKEN environment variable 397 | channel_mappings: 398 | - channel_id: "C1234567890" 399 | channel_name: "general" 400 | knowledge_id: "general-discussions" 401 | - channel_id: "C0987654321" 402 | channel_name: "dev-team" 403 | knowledge_id: "dev-discussions" 404 | - channel_id: "C1122334455" 405 | channel_name: "support" 406 | knowledge_id: "support-knowledge" 407 | regex_patterns: 408 | - pattern: "sales-.*-internal.*" 409 | knowledge_id: "sales-knowledge-base" 410 | auto_join: true 411 | - pattern: "dev-.*" 412 | knowledge_id: "dev-knowledge-base" 413 | auto_join: false 414 | - pattern: "support-.*" 415 | knowledge_id: "support-knowledge-base" 416 | auto_join: true 417 | days_to_fetch: 30 418 | maintain_history: false 419 | message_limit: 1000 420 | include_threads: true 421 | include_reactions: false 422 | ``` 423 | 424 | ## Rate Limits 425 | 426 | Slack has the following rate limits for bots: 427 | 428 | - **Tier 1**: 1+ per minute 429 | - **Tier 2**: 20+ per minute 430 | - **Tier 3**: 50+ per minute 431 | - **Tier 4**: 100+ per minute 432 | 433 | The adapter automatically handles rate limiting with exponential backoff. For optimal performance: 434 | 435 | - Avoid syncing too many channels simultaneously 436 | - Use appropriate sync intervals 437 | - Monitor your API usage in the Slack app dashboard 438 | --------------------------------------------------------------------------------