├── .dockerignore ├── .env.example ├── .gitignore ├── .vscode └── settings.json ├── CLAUDE.md ├── DOCKER.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── config ├── litellm │ └── config.yaml └── localai │ └── gpt4all-j.yaml ├── dist ├── api │ ├── controllers │ │ ├── crawler.controller.js │ │ └── scraper.controller.js │ ├── middleware │ │ ├── auth.middleware.js │ │ └── validation.js │ └── routes │ │ ├── crawler.js │ │ ├── crawler.routes.js │ │ ├── scraper.js │ │ └── scraper.routes.js ├── index.js ├── scraper │ ├── crawler-processor.js │ ├── crawler.js │ ├── playwright-scraper.js │ └── scraper-manager.js ├── services │ ├── cache.service.js │ ├── llm-service-factory.js │ ├── playwright.service.js │ ├── queue.service.js │ └── redis.service.js ├── transformers │ ├── content-cleaner.js │ ├── html-to-markdown.js │ └── llm-extractor.js ├── types │ ├── crawler.js │ ├── index.js │ └── schema.js └── utils │ ├── html-utils.js │ └── logger.js ├── docker-compose.llm.yml ├── docker-compose.yml ├── docker └── llm-providers │ ├── docker-compose.custom.yml │ ├── docker-compose.litellm.yml │ ├── docker-compose.localai.yml │ ├── docker-compose.ollama-mac.yml │ ├── docker-compose.ollama.yml │ └── docker-compose.vllm.yml ├── docs └── LLM_PROVIDERS.md ├── healthcheck.sh ├── logs ├── .gitkeep ├── combined.log └── error.log ├── package.json ├── src ├── api │ ├── controllers │ │ ├── batch-scrape.controller.ts │ │ ├── crawler.controller.ts │ │ └── scraper.controller.ts │ ├── middleware │ │ ├── auth.middleware.ts │ │ └── validation.ts │ └── routes │ │ ├── batch-scrape.routes.ts │ │ ├── crawler.routes.ts │ │ ├── crawler.ts │ │ ├── scraper.routes.ts │ │ └── scraper.ts ├── index.ts ├── scraper │ ├── crawler-processor.ts │ ├── crawler.ts │ ├── http-scraper.ts │ ├── playwright-scraper.ts │ └── scraper-manager.ts ├── services │ ├── batch-scrape.service.ts │ ├── cache.service.ts │ ├── enhanced-queue.service.ts │ ├── file-export.service.ts │ ├── llm-service-factory.ts │ ├── local-llm.service.ts │ ├── openai.service.ts │ ├── playwright.service.ts │ ├── queue.service.ts │ ├── redis.service.ts │ └── url-normalization.service.ts ├── tests │ └── services │ │ └── llm-providers.test.ts ├── transformers │ ├── content-cleaner.ts │ ├── html-to-markdown.ts │ └── llm-extractor.ts ├── types │ ├── crawler.ts │ ├── index.ts │ ├── llm.types.ts │ └── schema.ts └── utils │ ├── html-utils.ts │ └── logger.ts ├── swagger.yaml └── tsconfig.json /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/.dockerignore -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/.env.example -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/.gitignore -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/CLAUDE.md -------------------------------------------------------------------------------- /DOCKER.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/DOCKER.md -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/README.md -------------------------------------------------------------------------------- /config/litellm/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/config/litellm/config.yaml -------------------------------------------------------------------------------- /config/localai/gpt4all-j.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/config/localai/gpt4all-j.yaml -------------------------------------------------------------------------------- /dist/api/controllers/crawler.controller.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/api/controllers/crawler.controller.js -------------------------------------------------------------------------------- /dist/api/controllers/scraper.controller.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/api/controllers/scraper.controller.js -------------------------------------------------------------------------------- /dist/api/middleware/auth.middleware.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/api/middleware/auth.middleware.js -------------------------------------------------------------------------------- /dist/api/middleware/validation.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/api/middleware/validation.js -------------------------------------------------------------------------------- /dist/api/routes/crawler.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/api/routes/crawler.js -------------------------------------------------------------------------------- /dist/api/routes/crawler.routes.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/api/routes/crawler.routes.js -------------------------------------------------------------------------------- /dist/api/routes/scraper.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/api/routes/scraper.js -------------------------------------------------------------------------------- /dist/api/routes/scraper.routes.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/api/routes/scraper.routes.js -------------------------------------------------------------------------------- /dist/index.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/index.js -------------------------------------------------------------------------------- /dist/scraper/crawler-processor.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/scraper/crawler-processor.js -------------------------------------------------------------------------------- /dist/scraper/crawler.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/scraper/crawler.js -------------------------------------------------------------------------------- /dist/scraper/playwright-scraper.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/scraper/playwright-scraper.js -------------------------------------------------------------------------------- /dist/scraper/scraper-manager.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/scraper/scraper-manager.js -------------------------------------------------------------------------------- /dist/services/cache.service.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/services/cache.service.js -------------------------------------------------------------------------------- /dist/services/llm-service-factory.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/services/llm-service-factory.js -------------------------------------------------------------------------------- /dist/services/playwright.service.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/services/playwright.service.js -------------------------------------------------------------------------------- /dist/services/queue.service.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/services/queue.service.js -------------------------------------------------------------------------------- /dist/services/redis.service.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/services/redis.service.js -------------------------------------------------------------------------------- /dist/transformers/content-cleaner.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/transformers/content-cleaner.js -------------------------------------------------------------------------------- /dist/transformers/html-to-markdown.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/transformers/html-to-markdown.js -------------------------------------------------------------------------------- /dist/transformers/llm-extractor.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/transformers/llm-extractor.js -------------------------------------------------------------------------------- /dist/types/crawler.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/types/crawler.js -------------------------------------------------------------------------------- /dist/types/index.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/types/index.js -------------------------------------------------------------------------------- /dist/types/schema.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/types/schema.js -------------------------------------------------------------------------------- /dist/utils/html-utils.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/utils/html-utils.js -------------------------------------------------------------------------------- /dist/utils/logger.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/dist/utils/logger.js -------------------------------------------------------------------------------- /docker-compose.llm.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/docker-compose.llm.yml -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/docker-compose.yml -------------------------------------------------------------------------------- /docker/llm-providers/docker-compose.custom.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/docker/llm-providers/docker-compose.custom.yml -------------------------------------------------------------------------------- /docker/llm-providers/docker-compose.litellm.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/docker/llm-providers/docker-compose.litellm.yml -------------------------------------------------------------------------------- /docker/llm-providers/docker-compose.localai.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/docker/llm-providers/docker-compose.localai.yml -------------------------------------------------------------------------------- /docker/llm-providers/docker-compose.ollama-mac.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/docker/llm-providers/docker-compose.ollama-mac.yml -------------------------------------------------------------------------------- /docker/llm-providers/docker-compose.ollama.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/docker/llm-providers/docker-compose.ollama.yml -------------------------------------------------------------------------------- /docker/llm-providers/docker-compose.vllm.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/docker/llm-providers/docker-compose.vllm.yml -------------------------------------------------------------------------------- /docs/LLM_PROVIDERS.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/docs/LLM_PROVIDERS.md -------------------------------------------------------------------------------- /healthcheck.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/healthcheck.sh -------------------------------------------------------------------------------- /logs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/logs/.gitkeep -------------------------------------------------------------------------------- /logs/combined.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/logs/combined.log -------------------------------------------------------------------------------- /logs/error.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/logs/error.log -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/package.json -------------------------------------------------------------------------------- /src/api/controllers/batch-scrape.controller.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/api/controllers/batch-scrape.controller.ts -------------------------------------------------------------------------------- /src/api/controllers/crawler.controller.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/api/controllers/crawler.controller.ts -------------------------------------------------------------------------------- /src/api/controllers/scraper.controller.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/api/controllers/scraper.controller.ts -------------------------------------------------------------------------------- /src/api/middleware/auth.middleware.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/api/middleware/auth.middleware.ts -------------------------------------------------------------------------------- /src/api/middleware/validation.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/api/middleware/validation.ts -------------------------------------------------------------------------------- /src/api/routes/batch-scrape.routes.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/api/routes/batch-scrape.routes.ts -------------------------------------------------------------------------------- /src/api/routes/crawler.routes.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/api/routes/crawler.routes.ts -------------------------------------------------------------------------------- /src/api/routes/crawler.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/api/routes/crawler.ts -------------------------------------------------------------------------------- /src/api/routes/scraper.routes.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/api/routes/scraper.routes.ts -------------------------------------------------------------------------------- /src/api/routes/scraper.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/api/routes/scraper.ts -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/index.ts -------------------------------------------------------------------------------- /src/scraper/crawler-processor.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/scraper/crawler-processor.ts -------------------------------------------------------------------------------- /src/scraper/crawler.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/scraper/crawler.ts -------------------------------------------------------------------------------- /src/scraper/http-scraper.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/scraper/http-scraper.ts -------------------------------------------------------------------------------- /src/scraper/playwright-scraper.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/scraper/playwright-scraper.ts -------------------------------------------------------------------------------- /src/scraper/scraper-manager.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/scraper/scraper-manager.ts -------------------------------------------------------------------------------- /src/services/batch-scrape.service.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/services/batch-scrape.service.ts -------------------------------------------------------------------------------- /src/services/cache.service.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/services/cache.service.ts -------------------------------------------------------------------------------- /src/services/enhanced-queue.service.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/services/enhanced-queue.service.ts -------------------------------------------------------------------------------- /src/services/file-export.service.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/services/file-export.service.ts -------------------------------------------------------------------------------- /src/services/llm-service-factory.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/services/llm-service-factory.ts -------------------------------------------------------------------------------- /src/services/local-llm.service.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/services/local-llm.service.ts -------------------------------------------------------------------------------- /src/services/openai.service.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/services/openai.service.ts -------------------------------------------------------------------------------- /src/services/playwright.service.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/services/playwright.service.ts -------------------------------------------------------------------------------- /src/services/queue.service.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/services/queue.service.ts -------------------------------------------------------------------------------- /src/services/redis.service.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/services/redis.service.ts -------------------------------------------------------------------------------- /src/services/url-normalization.service.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/services/url-normalization.service.ts -------------------------------------------------------------------------------- /src/tests/services/llm-providers.test.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/tests/services/llm-providers.test.ts -------------------------------------------------------------------------------- /src/transformers/content-cleaner.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/transformers/content-cleaner.ts -------------------------------------------------------------------------------- /src/transformers/html-to-markdown.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/transformers/html-to-markdown.ts -------------------------------------------------------------------------------- /src/transformers/llm-extractor.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/transformers/llm-extractor.ts -------------------------------------------------------------------------------- /src/types/crawler.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/types/crawler.ts -------------------------------------------------------------------------------- /src/types/index.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/types/index.ts -------------------------------------------------------------------------------- /src/types/llm.types.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/types/llm.types.ts -------------------------------------------------------------------------------- /src/types/schema.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/types/schema.ts -------------------------------------------------------------------------------- /src/utils/html-utils.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/utils/html-utils.ts -------------------------------------------------------------------------------- /src/utils/logger.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/src/utils/logger.ts -------------------------------------------------------------------------------- /swagger.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/swagger.yaml -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stretchcloud/deepscrape/HEAD/tsconfig.json --------------------------------------------------------------------------------