├── .github
    ├── FUNDING.yml
    ├── dependabot_npm.yml
    └── dependabot_pip.yml
├── .gitignore
├── Dockerfile.core
├── Dockerfile.crawler
├── Dockerfile.frontend
├── LICENSE
├── README.md
├── assets
    ├── imgs
    │   ├── aisearch_question_suggestion.png
    │   ├── aisearch_result.png
    │   ├── chat_view.png
    │   ├── code_view.png
    │   ├── financial-table-1.png
    │   ├── financial-table-2.png
    │   └── search_view.png
    ├── neosearch.png
    └── neosearch.webp
├── changelog.md
├── deploy_searxng_with_docker.sh
├── docker-compose.yaml
├── neosearch
    ├── .env.template
    ├── .gitignore
    ├── .python-version
    ├── README.md
    ├── __init__.py
    ├── api
    │   ├── __init__.py
    │   └── routers
    │   │   ├── __init__.py
    │   │   ├── chat.py
    │   │   ├── health_check.py
    │   │   ├── query.py
    │   │   └── search.py
    ├── app
    │   ├── __init__.py
    │   ├── rag.py
    │   ├── server.py
    │   └── worker_broker.py
    ├── config.py
    ├── config.yaml
    ├── constants
    │   ├── __init__.py
    │   ├── bedrock.py
    │   ├── circuitbreaker.py
    │   ├── embeddings.py
    │   ├── logging.py
    │   ├── memory.py
    │   ├── queue.py
    │   ├── rag_search.py
    │   ├── retriever.py
    │   ├── searxng.py
    │   └── trace.py
    ├── datastore
    │   ├── __init__.py
    │   ├── crud
    │   │   ├── __init__.py
    │   │   ├── chat.py
    │   │   ├── document.py
    │   │   ├── message.py
    │   │   └── vote.py
    │   ├── database.py
    │   ├── model
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── chat.py
    │   │   ├── document.py
    │   │   ├── message.py
    │   │   └── vote.py
    │   └── vectorstores
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── milvus_vector_stores.py
    │   │   ├── pg_vector_stores.py
    │   │   ├── pgrs_vector_stores.py
    │   │   └── qdrant_vector_stores.py
    ├── engine
    │   ├── __init__.py
    │   ├── agents
    │   │   ├── __init__.py
    │   │   ├── deep_research.py
    │   │   ├── research.py
    │   │   └── tools
    │   │   │   ├── __init__.py
    │   │   │   ├── research_tools.py
    │   │   │   └── web_search.py
    │   ├── constants.py
    │   ├── db_utils.py
    │   ├── index.py
    │   ├── loader.py
    │   ├── prompts
    │   │   ├── __init__.py
    │   │   ├── chat.py
    │   │   ├── crag_workflow.py
    │   │   ├── deep_research.py
    │   │   └── search_o1.py
    │   ├── query_filter.py
    │   ├── rag_engine
    │   │   ├── __init__.py
    │   │   ├── chat_engine.py
    │   │   └── query_engine.py
    │   ├── reranker
    │   │   ├── __init__.py
    │   │   └── cohere.py
    │   ├── retriever
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── bm25_hybrid.py
    │   │   ├── doc_hybrid.py
    │   │   ├── keyword_hybrid.py
    │   │   ├── paradedb.py
    │   │   └── searxng.py
    │   ├── search
    │   │   ├── __init__.py
    │   │   └── bing_search.py
    │   ├── tools
    │   │   ├── __init__.py
    │   │   ├── artifact.py
    │   │   ├── document_generator.py
    │   │   ├── duckduckgo.py
    │   │   ├── form_filling.py
    │   │   ├── img_gen.py
    │   │   ├── interpreter.py
    │   │   ├── openapi_action.py
    │   │   └── weather.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── chat.py
    │   │   ├── query.py
    │   │   ├── rag_search.py
    │   │   └── searxng.py
    │   └── workflow
    │   │   ├── __init__.py
    │   │   ├── crag.py
    │   │   └── events
    │   │       ├── __init__.py
    │   │       └── crag.py
    ├── exceptions
    │   ├── __init__.py
    │   ├── bedrock.py
    │   └── engine
    │   │   ├── __init__.py
    │   │   └── retriever.py
    ├── export_requirements_txt.sh
    ├── infrastructure
    │   ├── __init__.py
    │   └── aws
    │   │   ├── __init__.py
    │   │   ├── bedrock_adapter.py
    │   │   └── s3_adapter.py
    ├── main.py
    ├── middlewares
    │   ├── __init__.py
    │   ├── request_id.py
    │   └── request_logger.py
    ├── models
    │   ├── __init__.py
    │   ├── chat_models.py
    │   ├── health_check.py
    │   └── query_models.py
    ├── mypy.ini
    ├── pyproject.toml
    ├── response
    │   ├── __init__.py
    │   └── chat.py
    ├── search_with_lepton.py
    ├── services
    │   ├── __init__.py
    │   ├── file.py
    │   └── next_question_suggesion.py
    ├── settings
    │   ├── __init__.py
    │   ├── fastembed.py
    │   ├── gemini.py
    │   ├── huggingface.py
    │   ├── llmhub.py
    │   ├── mistral.py
    │   ├── ollama.py
    │   └── openai.py
    ├── tests
    │   └── __init__.py
    ├── utils
    │   ├── __init__.py
    │   ├── configs.py
    │   ├── events.py
    │   ├── gc_tuning.py
    │   ├── logging.py
    │   ├── ratelimitter.py
    │   ├── ray.py
    │   └── singleton.py
    ├── uv.lock
    └── worker.py
├── neosearch_ai
    ├── README.md
    ├── configs
    │   ├── __init__.py
    │   ├── app.py
    │   ├── embedding_param_manager.py
    │   └── reranker_param_manager.py
    ├── constants
    │   ├── __init__.py
    │   └── logging.py
    ├── embedding.py
    ├── engine
    │   ├── __init__.py
    │   ├── embeddings.py
    │   └── flash_reranker.py
    ├── flashrerank.py
    ├── main.py
    ├── pyproject.toml
    ├── utils
    │   ├── __init__.py
    │   ├── logger.py
    │   └── singleton.py
    └── uv.lock
├── neosearch_crawler
    ├── .env.sample
    ├── .gitignore
    ├── __init__.py
    ├── constants
    │   ├── __init__.py
    │   ├── crawl_seeds.py
    │   ├── logger.py
    │   └── modes.py
    ├── crawlers
    │   ├── __init__.py
    │   ├── base.py
    │   ├── github.py
    │   ├── linkedin.py
    │   ├── medium.py
    │   └── s3
    │   │   ├── __init__.py
    │   │   └── s3_crawler.py
    ├── datastore
    │   ├── __init__.py
    │   └── database.py
    ├── dispatchers
    │   ├── __init__.py
    │   ├── base.py
    │   └── lib.py
    ├── engine
    │   ├── __init__.py
    │   ├── agent
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── web_corpus_collector.py
    │   │   └── wikidump_parser.py
    │   ├── base_dispatcher.py
    │   └── runner
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   └── common_crawl.py
    ├── exception
    │   ├── __init__.py
    │   └── dispatcher.py
    ├── export_requirements_from_poetry.sh
    ├── main.py
    ├── mongo_db
    │   ├── __init__.py
    │   ├── documents.py
    │   ├── mongo.py
    │   └── mongo_config.py
    ├── pyproject.toml
    ├── sample_crawler.py
    ├── sample_offline_rag_for_web_search_agent.py
    ├── utils
    │   ├── __init__.py
    │   ├── domain_name_utils.py
    │   ├── errors.py
    │   ├── logger.py
    │   ├── pdf_util.py
    │   ├── singleton.py
    │   └── trafilatura_util.py
    └── uv.lock
├── neosearch_frontend
    ├── .env.example
    ├── .eslintrc.json
    ├── .gitignore
    ├── README.md
    ├── app
    │   ├── (auth)
    │   │   ├── actions.ts
    │   │   ├── api
    │   │   │   └── auth
    │   │   │   │   ├── [...nextauth]
    │   │   │   │       └── route.ts
    │   │   │   │   └── guest
    │   │   │   │       └── route.ts
    │   │   ├── auth.config.ts
    │   │   ├── auth.ts
    │   │   ├── login
    │   │   │   └── page.tsx
    │   │   └── register
    │   │   │   └── page.tsx
    │   ├── (chat)
    │   │   ├── actions.ts
    │   │   ├── api
    │   │   │   ├── chat
    │   │   │   │   ├── route.ts
    │   │   │   │   └── schema.ts
    │   │   │   ├── document
    │   │   │   │   └── route.ts
    │   │   │   ├── files
    │   │   │   │   └── upload
    │   │   │   │   │   └── route.ts
    │   │   │   ├── history
    │   │   │   │   └── route.ts
    │   │   │   ├── suggestions
    │   │   │   │   └── route.ts
    │   │   │   └── vote
    │   │   │   │   └── route.ts
    │   │   ├── chat
    │   │   │   └── [id]
    │   │   │   │   └── page.tsx
    │   │   ├── layout.tsx
    │   │   ├── opengraph-image.png
    │   │   ├── page.tsx
    │   │   └── twitter-image.png
    │   ├── aisearch
    │   │   ├── page.tsx
    │   │   ├── search
    │   │   │   ├── [id]
    │   │   │   │   └── page.tsx
    │   │   │   └── page.tsx
    │   │   └── share
    │   │   │   └── [id]
    │   │   │       └── page.tsx
    │   ├── api
    │   │   ├── advanced-search
    │   │   │   └── route.ts
    │   │   └── search-chat
    │   │   │   └── route.ts
    │   ├── favicon.ico
    │   ├── globals.css
    │   └── layout.tsx
    ├── artifacts
    │   ├── actions.ts
    │   ├── code
    │   │   ├── client.tsx
    │   │   └── server.ts
    │   ├── image
    │   │   ├── client.tsx
    │   │   └── server.ts
    │   ├── sheet
    │   │   ├── client.tsx
    │   │   └── server.ts
    │   └── text
    │   │   ├── client.tsx
    │   │   └── server.ts
    ├── biome.jsonc
    ├── components.json
    ├── components
    │   ├── Chat.tsx
    │   ├── action_search_bar.tsx
    │   ├── app-sidebar.tsx
    │   ├── artifact-actions.tsx
    │   ├── artifact-close-button.tsx
    │   ├── artifact-messages.tsx
    │   ├── artifact.tsx
    │   ├── auth-form.tsx
    │   ├── block-actions.tsx
    │   ├── block-close-button.tsx
    │   ├── block-messages.tsx
    │   ├── block.tsx
    │   ├── chat-header.tsx
    │   ├── code-block.tsx
    │   ├── code-editor.tsx
    │   ├── console.tsx
    │   ├── create-artifact.tsx
    │   ├── create-block.tsx
    │   ├── data-stream-handler.tsx
    │   ├── diffview.tsx
    │   ├── document-preview.tsx
    │   ├── document-skeleton.tsx
    │   ├── document.tsx
    │   ├── editor.tsx
    │   ├── greeting.tsx
    │   ├── icons.tsx
    │   ├── image-editor.tsx
    │   ├── markdown.tsx
    │   ├── message-actions.tsx
    │   ├── message-editor.tsx
    │   ├── message-reasoning.tsx
    │   ├── message.tsx
    │   ├── messages.tsx
    │   ├── model-selector.tsx
    │   ├── multimodal-input.tsx
    │   ├── overview.tsx
    │   ├── preview-attachment.tsx
    │   ├── sheet-editor.tsx
    │   ├── sidebar-history-item.tsx
    │   ├── sidebar-history.tsx
    │   ├── sidebar-toggle.tsx
    │   ├── sidebar-user-nav.tsx
    │   ├── sign-out-form.tsx
    │   ├── styles
    │   │   └── colors.ts
    │   ├── submit-button.tsx
    │   ├── suggested-actions.tsx
    │   ├── suggestion.tsx
    │   ├── text-editor.tsx
    │   ├── theme-provider.tsx
    │   ├── toast.tsx
    │   ├── toolbar.tsx
    │   ├── ui
    │   │   ├── accordion.tsx
    │   │   ├── alert-dialog.tsx
    │   │   ├── button.tsx
    │   │   ├── card.tsx
    │   │   ├── dropdown-menu.tsx
    │   │   ├── financials-table.tsx
    │   │   ├── input.tsx
    │   │   ├── label.tsx
    │   │   ├── news.tsx
    │   │   ├── select.tsx
    │   │   ├── separator.tsx
    │   │   ├── sheet.tsx
    │   │   ├── sidebar.tsx
    │   │   ├── skeleton.tsx
    │   │   ├── stock-chart.tsx
    │   │   ├── stock-screener-table.tsx
    │   │   ├── table.tsx
    │   │   ├── textarea.tsx
    │   │   └── tooltip.tsx
    │   ├── use-scroll-to-bottom.ts
    │   ├── version-footer.tsx
    │   ├── visibility-selector.tsx
    │   └── weather.tsx
    ├── docs
    │   ├── 01-quick-start.md
    │   ├── 02-update-models.md
    │   └── 03-blocks.md
    ├── drizzle.config.ts
    ├── hooks
    │   ├── use-artifact.ts
    │   ├── use-auto-resume.ts
    │   ├── use-chat-visibility.ts
    │   ├── use-messages.tsx
    │   ├── use-mobile.tsx
    │   └── use-scroll-to-bottom.tsx
    ├── lib
    │   ├── actions
    │   │   └── chat.ts
    │   ├── agents
    │   │   ├── generate-related-questions.ts
    │   │   ├── manual-researcher.ts
    │   │   └── researcher.ts
    │   ├── ai
    │   │   ├── entitlements.ts
    │   │   ├── models.test.ts
    │   │   ├── models.ts
    │   │   ├── prompts.ts
    │   │   ├── providers.ts
    │   │   └── tools
    │   │   │   ├── create-document.ts
    │   │   │   ├── get-weather.ts
    │   │   │   ├── request-suggestions.ts
    │   │   │   └── update-document.ts
    │   ├── api
    │   │   └── stock-filters.ts
    │   ├── artifacts
    │   │   └── server.ts
    │   ├── config
    │   │   ├── default-models.json
    │   │   └── models.ts
    │   ├── constants.ts
    │   ├── db
    │   │   ├── helpers
    │   │   │   └── 01-core-to-parts.ts
    │   │   ├── migrate.ts
    │   │   ├── migrations
    │   │   │   ├── 0000_curious_darwin.sql
    │   │   │   ├── 0000_keen_devos.sql
    │   │   │   ├── 0001_sparkling_blue_marvel.sql
    │   │   │   ├── 0002_wandering_riptide.sql
    │   │   │   ├── 0003_cloudy_glorian.sql
    │   │   │   ├── 0004_odd_slayback.sql
    │   │   │   ├── 0005_wooden_whistler.sql
    │   │   │   ├── 0006_marvelous_frog_thor.sql
    │   │   │   └── meta
    │   │   │   │   ├── 0000_snapshot.json
    │   │   │   │   └── _journal.json
    │   │   ├── queries.ts
    │   │   ├── schema.ts
    │   │   └── utils.ts
    │   ├── editor
    │   │   ├── config.ts
    │   │   ├── diff.js
    │   │   ├── functions.tsx
    │   │   ├── react-renderer.tsx
    │   │   └── suggestions.tsx
    │   ├── errors.ts
    │   ├── hooks
    │   │   └── use-copy-to-clipboard.ts
    │   ├── redis
    │   │   └── config.ts
    │   ├── schema
    │   │   ├── related.tsx
    │   │   ├── retrieve.tsx
    │   │   └── search.tsx
    │   ├── search_constants
    │   │   └── index.ts
    │   ├── search_utils
    │   │   ├── context-window.ts
    │   │   ├── cookies.ts
    │   │   ├── index.ts
    │   │   └── registry.ts
    │   ├── streaming
    │   │   ├── create-manual-tool-stream.ts
    │   │   ├── create-tool-calling-stream.ts
    │   │   ├── handle-stream-finish.ts
    │   │   ├── parse-tool-call.ts
    │   │   ├── tool-execution.ts
    │   │   └── types.ts
    │   ├── tools
    │   │   ├── finantial-news.ts
    │   │   ├── retrieve.ts
    │   │   ├── search.ts
    │   │   └── video-search.ts
    │   ├── types.ts
    │   ├── types
    │   │   ├── index.ts
    │   │   └── models.ts
    │   └── utils.ts
    ├── middleware.ts
    ├── next-env.d.ts
    ├── next.config.ts
    ├── package.json
    ├── playwright.config.ts
    ├── pnpm-lock.yaml
    ├── postcss.config.mjs
    ├── public
    │   ├── config
    │   │   └── models.json
    │   ├── fonts
    │   │   ├── geist-mono.woff2
    │   │   └── geist.woff2
    │   ├── images
    │   │   ├── demo-thumbnail.png
    │   │   └── placeholder-image.png
    │   └── providers
    │   │   └── logos
    │   │       ├── anthropic.svg
    │   │       ├── azure.svg
    │   │       ├── deepseek.svg
    │   │       ├── fireworks.svg
    │   │       ├── google.svg
    │   │       ├── groq.svg
    │   │       ├── ollama.svg
    │   │       ├── openai-compatible.svg
    │   │       ├── openai.svg
    │   │       └── xai.svg
    ├── search_components
    │   ├── answer-section.tsx
    │   ├── chat-messages.tsx
    │   ├── chat-panel.tsx
    │   ├── chat-share.tsx
    │   ├── chat.tsx
    │   ├── clear-history.tsx
    │   ├── collapsible-message.tsx
    │   ├── custom-link.tsx
    │   ├── default-skeleton.tsx
    │   ├── empty-screen.tsx
    │   ├── footer.tsx
    │   ├── header.tsx
    │   ├── history-container.tsx
    │   ├── history-item.tsx
    │   ├── history-list.tsx
    │   ├── history-skeleton.tsx
    │   ├── history.tsx
    │   ├── message-actions.tsx
    │   ├── message.tsx
    │   ├── mode-toggle.tsx
    │   ├── model-selector.tsx
    │   ├── reasoning-answer-section.tsx
    │   ├── related-questions.tsx
    │   ├── render-message.tsx
    │   ├── retrieve-section.tsx
    │   ├── search-mode-toggle.tsx
    │   ├── search-results-image.tsx
    │   ├── search-results.tsx
    │   ├── search-section.tsx
    │   ├── section.tsx
    │   ├── sidebar.tsx
    │   ├── theme-provider.tsx
    │   ├── tool-badge.tsx
    │   ├── tool-section.tsx
    │   ├── ui
    │   │   ├── alert-dialog.tsx
    │   │   ├── avatar.tsx
    │   │   ├── badge.tsx
    │   │   ├── button.tsx
    │   │   ├── card.tsx
    │   │   ├── carousel.tsx
    │   │   ├── checkbox.tsx
    │   │   ├── codeblock.tsx
    │   │   ├── collapsible.tsx
    │   │   ├── command.tsx
    │   │   ├── dialog.tsx
    │   │   ├── dropdown-menu.tsx
    │   │   ├── icons.tsx
    │   │   ├── input.tsx
    │   │   ├── label.tsx
    │   │   ├── markdown.tsx
    │   │   ├── popover.tsx
    │   │   ├── select.tsx
    │   │   ├── separator.tsx
    │   │   ├── sheet.tsx
    │   │   ├── skeleton.tsx
    │   │   ├── slider.tsx
    │   │   ├── sonner.tsx
    │   │   ├── spinner.tsx
    │   │   ├── status-indicator.tsx
    │   │   ├── switch.tsx
    │   │   ├── textarea.tsx
    │   │   ├── toggle.tsx
    │   │   └── tooltip.tsx
    │   ├── user-message.tsx
    │   ├── video-search-results.tsx
    │   └── video-search-section.tsx
    ├── tailwind.config.ts
    ├── tests
    │   ├── e2e
    │   │   ├── artifacts.test.ts
    │   │   ├── chat.test.ts
    │   │   ├── reasoning.test.ts
    │   │   └── session.test.ts
    │   ├── fixtures.ts
    │   ├── helpers.ts
    │   ├── pages
    │   │   ├── artifact.ts
    │   │   ├── auth.ts
    │   │   └── chat.ts
    │   ├── prompts
    │   │   ├── basic.ts
    │   │   ├── routes.ts
    │   │   └── utils.ts
    │   └── routes
    │   │   ├── chat.test.ts
    │   │   └── document.test.ts
    └── tsconfig.json
├── neosearch_llm
    ├── sglang
    │   ├── deploy_sgllm_docker.sh
    │   ├── export_requirements_from_poetry.sh
    │   ├── launch_sgllm.sh
    │   ├── launch_sgllm_tensor_parallel.sh
    │   └── pyproject.toml
    └── vllm
    │   ├── export_requirements_from_poetry.sh
    │   ├── inference.py
    │   ├── launch_vllm.sh
    │   ├── pyproject.toml
    │   ├── run_cluster.sh
    │   ├── run_with_ray_cluster.md
    │   └── uv.lock
├── public_icann_suffix.dat
├── public_suffix_list.dat
├── resources
    ├── docs
    │   ├── advanced_rag.md
    │   ├── crawling.md
    │   ├── data.md
    │   ├── deep_research.md
    │   ├── getting_started.md
    │   ├── gpt_deep_research_backend.md
    │   ├── helpful_resources.md
    │   ├── imgs
    │   │   ├── search_bench.png
    │   │   └── yandex_search_architecture.png
    │   ├── kuberay.md
    │   ├── migrate_from_poetry_to_uv.md
    │   ├── prompts
    │   │   ├── dense_x_prompt.md
    │   │   └── perplexica.md
    │   ├── references.md
    │   └── yandex_search_architecture.md
    ├── factors
    │   └── yandex_factors_gen.txt
    ├── postgres
    │   ├── .gitignore
    │   ├── cloudnative_pg
    │   │   ├── examples
    │   │   │   ├── auth-prod.yaml
    │   │   │   ├── backup-od.yaml
    │   │   │   ├── cluster-prod.yaml
    │   │   │   ├── cluster-restore.yaml
    │   │   │   ├── storageclass-gp3.yaml
    │   │   │   ├── storageclass.yaml
    │   │   │   └── world.sql
    │   │   ├── helm-files
    │   │   │   └── values.yaml
    │   │   └── monitoring
    │   │   │   ├── alerts.yaml
    │   │   │   ├── cnpg-prometheusrule.yaml
    │   │   │   ├── grafana-configmap.yaml
    │   │   │   ├── grafana-dashboard.json
    │   │   │   └── kube-stack-config.yaml
    │   ├── electric
    │   │   ├── README.md
    │   │   └── docker_compose
    │   │   │   ├── docker-compose.yml
    │   │   │   └── postgres.conf
    │   ├── hybrid_search.md
    │   ├── paradedb
    │   │   ├── README.md
    │   │   ├── autocomplete_tutorial.md
    │   │   ├── connect_psql.sh
    │   │   ├── extract_all_stored_procedures.sh
    │   │   ├── procedures_paradedb_0.13.1.sql
    │   │   ├── run_with_docker.sh
    │   │   ├── search_tutorial.md
    │   │   ├── values.yaml
    │   │   └── wikipedia_data.md
    │   ├── pgvectorscale
    │   │   └── README.md
    │   ├── postgis
    │   │   └── install_postgis_on_mac_with_postgresql16.md
    │   └── psql
    │   │   ├── efficient_search_engine.md
    │   │   ├── fulltext_search_english.sql
    │   │   ├── korean_dictionary_setup.sql
    │   │   ├── postgres_fulltext_search.sql
    │   │   ├── ts_config.sql
    │   │   └── vector_search.md
    ├── sample_codes
    │   ├── claude_contextual_retrieval
    │   │   ├── inference_adapter.py
    │   │   ├── lambda_function.py
    │   │   └── s3_adapter.py
    │   ├── golden_retriever.py
    │   ├── late-chunking
    │   │   ├── README.md
    │   │   ├── chunked_pooling
    │   │   │   ├── __init__.py
    │   │   │   ├── chunked_eval_tasks.py
    │   │   │   ├── chunking.py
    │   │   │   ├── mteb_chunked_eval.py
    │   │   │   └── wrappers.py
    │   │   ├── examples.ipynb
    │   │   ├── explanatory_contextual_retrieval.py
    │   │   ├── img
    │   │   │   ├── context-problem.png
    │   │   │   ├── method.png
    │   │   │   └── rag.png
    │   │   ├── pyproject.toml
    │   │   ├── run_chunked_eval.py
    │   │   └── tests
    │   │   │   ├── __init__.py
    │   │   │   ├── conftest.py
    │   │   │   ├── test_api.py
    │   │   │   ├── test_chunking_methods.py
    │   │   │   └── test_v3.py
    │   ├── pgvector_python
    │   │   ├── citus_with_pgvector.py
    │   │   ├── colbert_exact_match.py
    │   │   ├── hybrid_search
    │   │   │   ├── cross_encoder.py
    │   │   │   └── hybrid_search.py
    │   │   └── image_search.py
    │   ├── vectordb
    │   │   ├── __init__.py
    │   │   ├── chunking.py
    │   │   ├── embedding.py
    │   │   ├── memory.py
    │   │   ├── storage.py
    │   │   └── vector_search.py
    │   └── web-crawler
    │   │   ├── README.md
    │   │   ├── anacron
    │   │   ├── backupSearch Engine-20200705T185356Z-001.zip
    │   │   ├── build_db.py
    │   │   ├── main.py
    │   │   ├── readme.txt
    │   │   ├── run.sh
    │   │   └── tempclean.py
    ├── stopwords
    │   ├── Afrikaans.txt
    │   ├── Albanian.txt
    │   ├── Arabic.txt
    │   ├── Aragonese.txt
    │   ├── Armenian.txt
    │   ├── Aromanian.txt
    │   ├── Asturian.txt
    │   ├── Azerbaijani.txt
    │   ├── Basque.txt
    │   ├── Belarusian.txt
    │   ├── Belarusian_Taraskievica.txt
    │   ├── Bengali.txt
    │   ├── Bishnupriya_Manipuri.txt
    │   ├── Bosnian.txt
    │   ├── Breton.txt
    │   ├── Bulgarian.txt
    │   ├── Catalan.txt
    │   ├── Cebuano.txt
    │   ├── Chuvash.txt
    │   ├── Croatian.txt
    │   ├── Czech.txt
    │   ├── Danish.txt
    │   ├── Dutch.txt
    │   ├── English.txt
    │   ├── Esperanto.txt
    │   ├── Estonian.txt
    │   ├── Finnish.txt
    │   ├── French.txt
    │   ├── Galician.txt
    │   ├── Georgian.txt
    │   ├── German.txt
    │   ├── Greek.txt
    │   ├── Gujarati.txt
    │   ├── Haitian.txt
    │   ├── Hebrew.txt
    │   ├── Hindi.txt
    │   ├── Hungarian.txt
    │   ├── Icelandic.txt
    │   ├── Ido.txt
    │   ├── Igbo.txt
    │   ├── Indonesian.txt
    │   ├── Irish.txt
    │   ├── Italian.txt
    │   ├── Japanese.txt
    │   ├── Javanese.txt
    │   ├── Kannada.txt
    │   ├── Kazakh.txt
    │   ├── Korean.txt
    │   ├── Kurdish.txt
    │   ├── Kyrgyz.txt
    │   ├── Latin.txt
    │   ├── Latvian.txt
    │   ├── Lithuanian.txt
    │   ├── Lombard.txt
    │   ├── Low_Saxon.txt
    │   ├── Luxembourgish.txt
    │   ├── Macedonian.txt
    │   ├── Malay.txt
    │   ├── Malayalam.txt
    │   ├── Maltese.txt
    │   ├── Marathi.txt
    │   ├── Neapolitan.txt
    │   ├── Nepali.txt
    │   ├── Newar.txt
    │   ├── Norwegian_Bokmal.txt
    │   ├── Norwegian_Nynorsk.txt
    │   ├── Occitan.txt
    │   ├── Persian.txt
    │   ├── Piedmontese.txt
    │   ├── Polish.txt
    │   ├── Portuguese.txt
    │   ├── Quechua.txt
    │   ├── Romanian.txt
    │   ├── Russian.txt
    │   ├── Samogitian.txt
    │   ├── Serbian.txt
    │   ├── Serbo_Croatian.txt
    │   ├── Sicilian.txt
    │   ├── Simple_English.txt
    │   ├── Slovak.txt
    │   ├── Slovenian.txt
    │   ├── Spanish.txt
    │   ├── Sundanese.txt
    │   ├── Swahili.txt
    │   ├── Swedish.txt
    │   ├── Tagalog.txt
    │   ├── Tamil.txt
    │   ├── Telugu.txt
    │   ├── Turkish.txt
    │   ├── Turkmen.txt
    │   ├── Ukrainian.txt
    │   ├── Urdu.txt
    │   ├── Uzbek.txt
    │   ├── Vietnamese.txt
    │   ├── Volapuk.txt
    │   ├── Walloon.txt
    │   ├── Waray_Waray.txt
    │   ├── Welsh.txt
    │   ├── West_Frisian.txt
    │   ├── Western_Panjabi.txt
    │   └── Yoruba.txt
    └── system_prompts_leaks
    │   ├── ChatGPT-4o-image-safety-policies.md
    │   ├── ChatGPT-Advanced-voice-mode.md
    │   ├── chatgpt-4o-latest-injection
    │   ├── chatgpt-automation-tool.md
    │   ├── claude-3.7-full-system-message-with-all-tools.md
    │   ├── claude-3.7-sonnet-full-system-message-humanreadable.md
    │   ├── o3-o4-mini-api.md
    │   └── o4-mini-chatgpt.com.md
└── searxng
    ├── limiter.toml
    ├── settings.yml
    └── uwsgi.ini


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: YeonwooSung
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with open collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
12 | polar: # Replace with a single Polar username
13 | buy_me_a_coffee: blackbeenie
14 | thanks_dev: # Replace with a single thanks.dev username
15 | custom:


--------------------------------------------------------------------------------
/.github/dependabot_npm.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "npm" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "weekly"
12 | 


--------------------------------------------------------------------------------
/.github/dependabot_pip.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "weekly"
12 | 


--------------------------------------------------------------------------------
/Dockerfile.core:
--------------------------------------------------------------------------------
 1 | FROM python:3
 2 | 
 3 | ENV VERSION 0.1.0
 4 | 
 5 | WORKDIR /app
 6 | 
 7 | COPY neosearch .
 8 | 
 9 | # move to neosearch directory
10 | WORKDIR /app/neosearch
11 | 
12 | RUN pip install poetry
13 | RUN sh export_requirements_txt_from_poetry.sh
14 | RUN pip uninstall -y poetry
15 | 
16 | # Install dependencies
17 | RUN pip install --no-cache-dir -r requirements.txt
18 | 
19 | # Expose port for networking
20 | EXPOSE 8000
21 | 
22 | # Run the server
23 | CMD ["python", "main.py"]
24 | 


--------------------------------------------------------------------------------
/Dockerfile.crawler:
--------------------------------------------------------------------------------
 1 | FROM python:3.11
 2 | 
 3 | ENV VERSION 0.1.2
 4 | 
 5 | WORKDIR /app
 6 | 
 7 | COPY neosearch_crawler .
 8 | 
 9 | # move to the neosearch_crawler directory
10 | WORKDIR /app/neosearch_crawler
11 | 
12 | RUN pip install poetry
13 | RUN sh export_requirements_txt_from_poetry.sh
14 | RUN pip uninstall -y poetry
15 | 
16 | # Install dependencies
17 | RUN pip install --no-cache-dir -r requirements.txt
18 | 
19 | # run the crawler
20 | RUN ray start --head
21 | CMD ["python", "main.py"]
22 | 
23 | # Expose port for networking
24 | EXPOSE 8265
25 | 


--------------------------------------------------------------------------------
/Dockerfile.frontend:
--------------------------------------------------------------------------------
 1 | FROM node:20.18.0-alpine
 2 | 
 3 | ENV VERSION 0.1.0
 4 | ARG NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
 5 | ARG NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
 6 | ENV NEXT_PUBLIC_WS_URL=${NEXT_PUBLIC_WS_URL}
 7 | ENV NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL}
 8 | 
 9 | WORKDIR /app
10 | 
11 | COPY neosearch_frontend .
12 | 
13 | # move to neosearch-frontend directory
14 | WORKDIR /app/neosearch_frontend
15 | 
16 | # Install dependencies
17 | RUN pnpm install
18 | 
19 | # Expose port for networking
20 | EXPOSE 3000
21 | 
22 | # run the frontend
23 | CMD ["npm", "run", "dev"]
24 | 


--------------------------------------------------------------------------------
/assets/imgs/aisearch_question_suggestion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/assets/imgs/aisearch_question_suggestion.png


--------------------------------------------------------------------------------
/assets/imgs/aisearch_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/assets/imgs/aisearch_result.png


--------------------------------------------------------------------------------
/assets/imgs/chat_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/assets/imgs/chat_view.png


--------------------------------------------------------------------------------
/assets/imgs/code_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/assets/imgs/code_view.png


--------------------------------------------------------------------------------
/assets/imgs/financial-table-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/assets/imgs/financial-table-1.png


--------------------------------------------------------------------------------
/assets/imgs/financial-table-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/assets/imgs/financial-table-2.png


--------------------------------------------------------------------------------
/assets/imgs/search_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/assets/imgs/search_view.png


--------------------------------------------------------------------------------
/assets/neosearch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/assets/neosearch.png


--------------------------------------------------------------------------------
/assets/neosearch.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/assets/neosearch.webp


--------------------------------------------------------------------------------
/changelog.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## ~ 2025.03.13
 4 | 
 5 | - [x] Implementing AISearch view
 6 |     - [x] Make AISearch view to support tavily
 7 |     - [x] Make AISearch view to support searxng
 8 | 
 9 | - [x] Replace poetry with uv
10 |     - [x] Replace poetry with uv for `neosearch`
11 |     - [x] Replace poetry with uv for `neosearch_ai`
12 |     - [x] Replace poetry with uv for `neosearch_llm`
13 |     - [x] Replace poetry with uv for `neosearch_crawler`
14 | 
15 | - [x] Implement the batch system for spider
16 |     - [x] Implement the spider with Trafilatura
17 |     - [x] Implement the continuous batching for spider
18 | 
19 | - [x] Update Rag Retriever to use the searxng engine
20 | 
21 | - [x] Implement the CRAG workflow for the Rag Retriever
22 |     - [x] Add support for CRAG API that runs the CRAG workflow with user's query
23 | 
24 | - [x] Implement the reranker
25 |     - [x] Add support for Cohere Reranker
26 |     - [x] Add support for [FlashRank](https://github.com/PrithivirajDamodaran/FlashRank) Reranker
27 | 


--------------------------------------------------------------------------------
/deploy_searxng_with_docker.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | PORT=8080
 3 | docker pull searxng/searxng
 4 | docker run --rm \
 5 |     -d -p ${PORT}:8080 \
 6 |     -v "${PWD}/searxng:/etc/searxng:rw" \
 7 |     -e "BASE_URL=http://localhost:$PORT/" \
 8 |     -e "INSTANCE_NAME=searxng-instance" \
 9 |     searxng/searxng
10 | 


--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | # This is a Docker Compose file for setting up the neosearch-stack environment.
 2 | 
 3 | name: neosearch-stack
 4 | services:
 5 |   neosearch:
 6 |     build:
 7 |       context: . # The build context is the current directory
 8 |       dockerfile: Dockerfile.frontend
 9 |     command: npm run dev
10 |     env_file: neosearch_frontend/.env
11 |     ports:
12 |       - '3000:3000' # Maps port 3000 on the host to port 3000 in the container.
13 |     depends_on:
14 |       - redis
15 |       - searxng
16 | 
17 |   redis:
18 |     image: redis:alpine
19 |     ports:
20 |       - '6379:6379'
21 |     volumes:
22 |       - redis_data:/data
23 |     command: redis-server --appendonly yes
24 | 
25 |   searxng:
26 |     image: searxng/searxng
27 |     ports:
28 |       - '${SEARXNG_PORT:-8080}:8080'
29 |     volumes:
30 |       - ./searxng/limiter.toml:/etc/searxng/limiter.toml
31 |       - ./searxng/settings.yml:/etc/searxng/settings.yml
32 |       - searxng_data:/data
33 | 
34 | volumes:
35 |   redis_data:
36 |   searxng_data:
37 | 


--------------------------------------------------------------------------------
/neosearch/.env.template:
--------------------------------------------------------------------------------
 1 | LLM_TEMPERATURE=
 2 | LLM_MAX_TOKENS=
 3 | 
 4 | # anthropic
 5 | ANTHROPIC_MODEL=claude-3.7
 6 | ANTHROPIC_API_KEY=sk-2xX3
 7 | 
 8 | # openai
 9 | OPENAI_MODEL=gpt-4
10 | OPENAI_API_KEY=
11 | 
12 | # ollama
13 | OLLAMA_MODEL=llama3.1:latest
14 | OLLAMA_EMBEDDING_MODEL=bge-m3
15 | 
16 | # pg_vector
17 | PG_CONNECTION_STRING=
18 | 
19 | # qdrant
20 | QDRANT_URL=http://localhost:6333
21 | QDRANT_API_KEY=  # Optional, if not set, it will be ignored
22 | 
23 | # Web search API
24 | WEB_SEARCH_API="tavily"  # tavily, searxng
25 | 
26 | # Tavily
27 | TAVILY_API_KEY=tvly-...
28 | 


--------------------------------------------------------------------------------
/neosearch/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | storage
3 | .env
4 | 
5 | # sample query response
6 | sample_query_response.txt
7 | 


--------------------------------------------------------------------------------
/neosearch/.python-version:
--------------------------------------------------------------------------------
1 | 3.11


--------------------------------------------------------------------------------
/neosearch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/__init__.py


--------------------------------------------------------------------------------
/neosearch/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/api/__init__.py


--------------------------------------------------------------------------------
/neosearch/api/routers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/api/routers/__init__.py


--------------------------------------------------------------------------------
/neosearch/api/routers/health_check.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, status
 2 | 
 3 | # custom module
 4 | from neosearch.models.health_check import HealthCheck
 5 | from neosearch.utils.logging import Logger
 6 | 
 7 | 
 8 | logger = Logger()
 9 | 
10 | # Create a router for the chat endpoint
11 | health_router = r = APIRouter()
12 | 
13 | 
14 | @r.get(
15 |     "",
16 |     summary="Perform a Health Check",
17 |     response_description="Return HTTP Status Code 200 (OK)",
18 |     status_code=status.HTTP_200_OK,
19 |     response_model=HealthCheck,
20 | )
21 | async def health_check() -> dict:
22 |     """
23 |     Health check endpoint to verify the API is running.
24 |     """
25 |     return HealthCheck(status="OK")
26 | 


--------------------------------------------------------------------------------
/neosearch/app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/app/__init__.py


--------------------------------------------------------------------------------
/neosearch/app/worker_broker.py:
--------------------------------------------------------------------------------
 1 | from faststream.kafka import KafkaBroker
 2 | from faststream.redis import RedisBroker
 3 | 
 4 | # custom modules
 5 | from neosearch.constants.queue import USE_QUEUE, QUEUE_TYPE
 6 | 
 7 | 
 8 | # global singleton
 9 | _my_broker = None
10 | 
11 | 
12 | def get_worker_broker():
13 |     global _my_broker
14 |     if _my_broker is not None:
15 |         return _my_broker
16 | 
17 |     if not USE_QUEUE:
18 |         raise Exception("Queue is not enabled")
19 | 
20 |     if QUEUE_TYPE == "redis":
21 |         from neosearch.constants.queue import REDIS_URL, REDIS_DB
22 | 
23 |         broker = RedisBroker(
24 |             url=REDIS_URL, db=REDIS_DB
25 |         )
26 |     elif QUEUE_TYPE == "kafka":
27 |         from neosearch.constants.queue import (
28 |             KAFKA_BOOTSTRAP_SERVERS,
29 |             KAFKA_REQUEST_TIMEOUT_MS,
30 |             KAFKA_MAX_IDLE_MS,
31 |             KAFKA_COMPRESSION_TYPE,
32 |         )
33 | 
34 |         broker = KafkaBroker(
35 |             bootstrap_servers=KAFKA_BOOTSTRAP_SERVERS,
36 |             request_timeout_ms=KAFKA_REQUEST_TIMEOUT_MS,
37 |             connections_max_idle_ms=KAFKA_MAX_IDLE_MS,
38 |             compression_type=KAFKA_COMPRESSION_TYPE,
39 |         )
40 | 
41 |     else:
42 |         raise Exception("Invalid queue type")
43 | 
44 |     _my_broker = broker
45 |     return broker
46 | 


--------------------------------------------------------------------------------
/neosearch/config.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | DATA_DIR = "data"
4 | STATIC_DIR = os.getenv("STATIC_DIR", "static")
5 | 


--------------------------------------------------------------------------------
/neosearch/config.yaml:
--------------------------------------------------------------------------------
1 | neosearch:
2 |   llm:
3 |     # ["anthropic", "openai", "ollama"]
4 |     type: ollama
5 | 


--------------------------------------------------------------------------------
/neosearch/constants/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/constants/__init__.py


--------------------------------------------------------------------------------
/neosearch/constants/circuitbreaker.py:
--------------------------------------------------------------------------------
1 | CB_FAIL_MAX = 5
2 | CB_RESET_TIMEOUT = 60


--------------------------------------------------------------------------------
/neosearch/constants/embeddings.py:
--------------------------------------------------------------------------------
1 | # Use Ollama embeddings for true, otherwise use fastembed
2 | # Claude, Grok does not support embeddings, so we use either Ollama or FastEmbed
3 | USE_OLLAMA_FOR_DEFAULT_EMBEDDING = True
4 | 
5 | OLLAMA_EMBEDDING_MODEL_BASE = "bge-m3"
6 | 


--------------------------------------------------------------------------------
/neosearch/constants/logging.py:
--------------------------------------------------------------------------------
1 | LOG_DEFAULT_LOG_NAME = "neosearch"
2 | LOG_DEFAULT_LOG_LEVEL = "DEBUG"
3 | LOG_DEFAULT_CONSOLE_LOG_LEVEL = "WARNING"
4 | LOG_DEFAULT_MAX_BYTES = 10485760
5 | LOG_DEFAULT_BACKUP_COUNT = 10
6 | LOG_DEFAULT_LOGGING_WORKERS = 1
7 | 


--------------------------------------------------------------------------------
/neosearch/constants/memory.py:
--------------------------------------------------------------------------------
1 | MAX_MEMORY_TOKEN_SIZE = 8000
2 | 


--------------------------------------------------------------------------------
/neosearch/constants/queue.py:
--------------------------------------------------------------------------------
 1 | USE_QUEUE = True
 2 | QUEUE_TYPE = "redis"  # "redis" or "kafka"
 3 | 
 4 | # redis
 5 | REDIS_URL = "redis://localhost:6379"
 6 | REDIS_DB = "redis"
 7 | 
 8 | # kafka
 9 | KAFKA_BOOTSTRAP_SERVERS = "localhost:9092"
10 | KAFKA_REQUEST_TIMEOUT_MS = 3000
11 | KAFKA_MAX_IDLE_MS = 540000
12 | KAFKA_COMPRESSION_TYPE = "zstd"  # 'gzip', 'snappy', 'lz4', 'zstd'
13 | 


--------------------------------------------------------------------------------
/neosearch/constants/retriever.py:
--------------------------------------------------------------------------------
1 | VECTOR_INDEX_SIM_TOP_K=5
2 | VECTOR_INDEX_EMPTY_QUERY_TOP_K=10
3 | VECTOR_INDEX_VERBOSE=False
4 | 


--------------------------------------------------------------------------------
/neosearch/constants/searxng.py:
--------------------------------------------------------------------------------
1 | SEARXNG_BASE_URL = "https://searx.example.com"
2 | 


--------------------------------------------------------------------------------
/neosearch/constants/trace.py:
--------------------------------------------------------------------------------
1 | USE_TRACELOOP = False
2 | 


--------------------------------------------------------------------------------
/neosearch/datastore/__init__.py:
--------------------------------------------------------------------------------
1 | from .database import get_async_session, get_session, engine, async_engine
2 | 
3 | 
4 | __all__ = [
5 |     "get_async_session",
6 |     "get_session",
7 |     "engine",
8 |     "async_engine",
9 | ]


--------------------------------------------------------------------------------
/neosearch/datastore/crud/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/datastore/crud/__init__.py


--------------------------------------------------------------------------------
/neosearch/datastore/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/datastore/model/__init__.py


--------------------------------------------------------------------------------
/neosearch/datastore/model/chat.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, Text, ForeignKey, TIMESTAMP
 2 | from sqlalchemy.dialects.postgresql import UUID, ENUM
 3 | from sqlalchemy.sql import func
 4 | 
 5 | # custom modules
 6 | from .base import Base
 7 | 
 8 | 
 9 | visibility_enum = ENUM('public', 'private', name='visibility_enum', create_type=False)
10 | 
11 | class Chat(Base):
12 |     __tablename__ = 'Chat'
13 | 
14 |     id = Column(
15 |         UUID(as_uuid=True),
16 |         primary_key=True,
17 |         server_default=func.gen_random_uuid(),
18 |         nullable=False,
19 |     )
20 |     created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
21 |     title = Column(Text, nullable=False)
22 |     user_id = Column(UUID(as_uuid=True), ForeignKey('User.id'), nullable=False)
23 |     visibility = Column(
24 |         visibility_enum,
25 |         nullable=False,
26 |         server_default="private"
27 |     )
28 | 


--------------------------------------------------------------------------------
/neosearch/datastore/model/document.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, Text, ForeignKey, TIMESTAMP, PrimaryKeyConstraint
 2 | from sqlalchemy.dialects.postgresql import UUID, ENUM
 3 | from sqlalchemy.sql import func
 4 | from uuid_extensions import uuid7str
 5 | 
 6 | # custom modules
 7 | from .base import Base
 8 | 
 9 | 
10 | # Define ENUM for the 'kind' field
11 | kind_enum = ENUM('text', 'code', name='kind_enum', create_type=False)
12 | 
13 | 
14 | class Document(Base):
15 |     __tablename__ = 'Document'
16 | 
17 |     id = Column(UUID(as_uuid=True), nullable=False, server_default=uuid7str())
18 |     created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
19 |     title = Column(Text, nullable=False)
20 |     content = Column(Text, nullable=True)
21 |     kind = Column(kind_enum, nullable=False, server_default='text')
22 |     user_id = Column(UUID(as_uuid=True), ForeignKey('user.id'), nullable=False)
23 | 
24 |     # Composite primary key
25 |     __table_args__ = (
26 |         PrimaryKeyConstraint('id', 'created_at', name='document_pk'),
27 |     )
28 | 


--------------------------------------------------------------------------------
/neosearch/datastore/model/message.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, String, JSON, ForeignKey, TIMESTAMP
 2 | from sqlalchemy.dialects.postgresql import UUID
 3 | from sqlalchemy.sql import func
 4 | from uuid_extensions import uuid7str
 5 | 
 6 | # custom modules
 7 | from .base import Base
 8 | 
 9 | 
10 | class Message(Base):
11 |     __tablename__ = 'Message'
12 | 
13 |     id = Column(UUID(as_uuid=True), primary_key=True, server_default=uuid7str(), nullable=False)
14 |     chat_id = Column(UUID(as_uuid=True), ForeignKey('Chat.id'), nullable=False)
15 |     role = Column(String, nullable=False)
16 |     content = Column(JSON, nullable=False)
17 |     created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
18 | 
19 |     def __repr__(self):
20 |         return f"<Message(id={self.id}, chat_id={self.chat_id}, role={self.role}, content={self.content}, created_at={self.created_at})>"
21 | 


--------------------------------------------------------------------------------
/neosearch/datastore/model/vote.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, Boolean, ForeignKey, PrimaryKeyConstraint
 2 | from sqlalchemy.dialects.postgresql import UUID
 3 | 
 4 | # custom modules
 5 | from .base import Base
 6 | 
 7 | 
 8 | class Vote(Base):
 9 |     __tablename__ = 'Vote'
10 | 
11 |     chat_id = Column(UUID(as_uuid=True), ForeignKey('Chat.id'), nullable=False)
12 |     message_id = Column(UUID(as_uuid=True), ForeignKey('Message.id'), nullable=False)
13 |     is_upvoted = Column(Boolean, nullable=False)
14 | 
15 |     # Composite primary key
16 |     __table_args__ = (
17 |         PrimaryKeyConstraint('chat_id', 'message_id', name='vote_pk'),
18 |     )
19 | 


--------------------------------------------------------------------------------
/neosearch/datastore/vectorstores/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/datastore/vectorstores/__init__.py


--------------------------------------------------------------------------------
/neosearch/datastore/vectorstores/base.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | 
 3 | # custom module
 4 | from neosearch.utils.singleton import Singleton
 5 | 
 6 | 
 7 | class BaseVectorStore(metaclass=Singleton):
 8 |     @abstractmethod
 9 |     def _build_vector_store(self):
10 |         pass
11 | 
12 |     @abstractmethod
13 |     def get_store(self):
14 |         pass
15 | 
16 |     @abstractmethod
17 |     def refresh(self):
18 |         pass
19 | 


--------------------------------------------------------------------------------
/neosearch/datastore/vectorstores/qdrant_vector_stores.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from qdrant_client import QdrantClient
 3 | from llama_index.vector_stores.qdrant import QdrantVectorStore
 4 | 
 5 | # custom module
 6 | from neosearch.utils.singleton import Singleton
 7 | 
 8 | from .base import BaseVectorStore
 9 | 
10 | 
11 | class QdrantVectorStoreContainer(BaseVectorStore, metaclass=Singleton):
12 |     def __init__(self):
13 |         self._build_vector_store()
14 | 
15 |     def _build_vector_store(self):
16 |         self.vec_db_client = QdrantClient(
17 |             url=os.environ.get("QDRANT_URL"),
18 |             api_key=os.environ.get("QDRANT_API_KEY")
19 |         )
20 |         self.store = QdrantVectorStore(self.vec_db_client)
21 | 
22 |     def get_store(self):
23 |         return self.store
24 | 
25 |     def refresh(self):
26 |         self._build_vector_store()
27 |         return self.store
28 | 


--------------------------------------------------------------------------------
/neosearch/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/engine/__init__.py


--------------------------------------------------------------------------------
/neosearch/engine/agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/engine/agents/__init__.py


--------------------------------------------------------------------------------
/neosearch/engine/agents/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | from .research_tools import record_notes, review_report, write_report, save_generate_questions
 2 | from .web_search import search_web
 3 | 
 4 | 
 5 | __all__ = [
 6 |     # web_search.py
 7 |     "search_web",
 8 |     # research_tools.py
 9 |     "record_notes",
10 |     "review_report",
11 |     "write_report",
12 |     "save_generate_questions",
13 | ]


--------------------------------------------------------------------------------
/neosearch/engine/agents/tools/web_search.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from tavily import AsyncTavilyClient
 3 | from llama_index.core.workflow import Context
 4 | 
 5 | # custom modules
 6 | from neosearch.engine.utils.searxng import SearxngAdaptor
 7 | 
 8 | 
 9 | TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "tvly-...")
10 | WEB_SEARCH_API = os.getenv("WEB_SEARCH_API", "tavily")
11 | SEARXNG_BASE_URL = os.getenv("SEARXNG_BASE_URL", "http://localhost:8888")
12 | 
13 | 
14 | async def search_web(ctx: Context, query: str) -> str:
15 |     """Useful for using the web to answer questions."""
16 |     search_result = ""
17 | 
18 |     if WEB_SEARCH_API == "tavily":
19 |         client = AsyncTavilyClient(api_key=TAVILY_API_KEY)
20 |         search_result = await client.search(
21 |             query,
22 |             search_depth="basic", # "basic", advanced
23 |             topic="general", # "general", "news"
24 |             max_results=20,
25 |         )
26 |     elif WEB_SEARCH_API == "searxng":
27 |         # searxng_search_result = await searxng_search(query)
28 |         adaptor = SearxngAdaptor(SEARXNG_BASE_URL)
29 |         search_result = await adaptor.asearch(query)
30 | 
31 |     if search_result != "":
32 |         current_state = await ctx.get("state")
33 |         current_state["web_search_result"] = search_result
34 |         await ctx.set("state", current_state)
35 | 
36 |     return str(search_result)
37 | 


--------------------------------------------------------------------------------
/neosearch/engine/constants.py:
--------------------------------------------------------------------------------
1 | PGVECTOR_SCHEMA = "public"
2 | PGVECTOR_TABLE = "llamaindex_embedding"


--------------------------------------------------------------------------------
/neosearch/engine/db_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # custom module
 3 | from neosearch.datastore.vectorstores.pg_vector_stores import PgVectorStoreContainer
 4 | from neosearch.datastore.vectorstores.pgrs_vector_stores import PgRsVectorStoreContainer
 5 | 
 6 | 
 7 | def init_pg_vector_store_from_env():
 8 |     # use singleton to ensure only one instance of the vector store is created
 9 |     vectorstore = PgVectorStoreContainer()
10 |     return vectorstore.get_store()
11 | 
12 | 
13 | def init_pg_vecto_rs_store_from_env():
14 |     # use singleton to ensure only one instance of the vector store is created
15 |     vectorstore = PgRsVectorStoreContainer()
16 |     return vectorstore.get_store()
17 | 


--------------------------------------------------------------------------------
/neosearch/engine/index.py:
--------------------------------------------------------------------------------
 1 | from llama_index.core.indices.vector_store import VectorStoreIndex
 2 | 
 3 | # custom module
 4 | from neosearch.engine.db_utils import init_pg_vector_store_from_env
 5 | from neosearch.utils.logging import Logger
 6 | 
 7 | logger = Logger()
 8 | 
 9 | 
10 | def get_pg_index():
11 |     logger.log_info("Connecting to index from PGVector...")
12 |     store = init_pg_vector_store_from_env()
13 |     index = VectorStoreIndex.from_vector_store(store, use_async=True)
14 |     logger.log_info("Finished connecting to index from PGVector.")
15 |     return index
16 | 
17 | def get_index(vector_store_type: str = "pg") -> VectorStoreIndex:
18 |     if vector_store_type == "pg":
19 |         return get_pg_index()
20 |     else:
21 |         raise ValueError(f"Invalid vector store type: {vector_store_type}")
22 | 


--------------------------------------------------------------------------------
/neosearch/engine/loader.py:
--------------------------------------------------------------------------------
1 | from llama_index.core.readers import SimpleDirectoryReader
2 | 
3 | DATA_DIR = "data"  # directory to cache the generated index
4 | 
5 | 
6 | def get_documents():
7 |     return SimpleDirectoryReader(DATA_DIR).load_data()
8 | 


--------------------------------------------------------------------------------
/neosearch/engine/prompts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/engine/prompts/__init__.py


--------------------------------------------------------------------------------
/neosearch/engine/prompts/crag_workflow.py:
--------------------------------------------------------------------------------
 1 | CRAG_RELEVANCY_PROMPT_TEMPLATE = """As a grader, your task is to evaluate the relevance of a document retrieved in response to a user's question.
 2 | 
 3 | Retrieved Document:
 4 | -------------------
 5 | {context_str}
 6 | 
 7 | User Question:
 8 | --------------
 9 | {query_str}
10 | 
11 | Evaluation Criteria:
12 | - Consider whether the document contains keywords or topics related to the user's question.
13 | - The evaluation should not be overly stringent; the primary objective is to identify and filter out clearly irrelevant retrievals.
14 | 
15 | Decision:
16 | - Assign a binary score to indicate the document's relevance.
17 | - Use 'yes' if the document is relevant to the question, or 'no' if it is not.
18 | 
19 | Please provide your binary score ('yes' or 'no') below to indicate the document's relevance to the user question."""
20 | 
21 | 
22 | CRAG_TRANSFORM_QUERY_TEMPLATE = """Your task is to refine a query to ensure it is highly effective for retrieving relevant search results. \n
23 | Analyze the given input to grasp the core semantic intent or meaning. \n
24 | Original Query:
25 | \n ------- \n
26 | {query_str}
27 | \n ------- \n
28 | Your goal is to rephrase or enhance this query to improve its search performance. Ensure the revised query is concise and directly aligned with the intended search objective. \n
29 | Respond with the optimized query only:"""
30 | 


--------------------------------------------------------------------------------
/neosearch/engine/query_filter.py:
--------------------------------------------------------------------------------
 1 | from llama_index.core.vector_stores.types import MetadataFilter, MetadataFilters
 2 | 
 3 | 
 4 | def generate_filters(doc_ids):
 5 |     """
 6 |     Generate public/private document filters based on the doc_ids and the vector store.
 7 |     """
 8 |     public_doc_filter = MetadataFilter(
 9 |         key="private",
10 |         value="true",
11 |         operator="!=",  # type: ignore
12 |     )
13 |     selected_doc_filter = MetadataFilter(
14 |         key="doc_id",
15 |         value=doc_ids,
16 |         operator="in",  # type: ignore
17 |     )
18 |     if len(doc_ids) > 0:
19 |         # If doc_ids are provided, we will select both public and selected documents
20 |         filters = MetadataFilters(
21 |             filters=[
22 |                 public_doc_filter,
23 |                 selected_doc_filter,
24 |             ],
25 |             condition="or",  # type: ignore
26 |         )
27 |     else:
28 |         filters = MetadataFilters(
29 |             filters=[
30 |                 public_doc_filter,
31 |             ]
32 |         )
33 | 
34 |     return filters
35 | 


--------------------------------------------------------------------------------
/neosearch/engine/rag_engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/engine/rag_engine/__init__.py


--------------------------------------------------------------------------------
/neosearch/engine/reranker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/engine/reranker/__init__.py


--------------------------------------------------------------------------------
/neosearch/engine/reranker/cohere.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from llama_index.postprocessor.cohere_rerank import CohereRerank
 3 | 
 4 | 
 5 | api_key = os.getenv("COHERE_API_KEY", None)
 6 | 
 7 | 
 8 | def get_cohere_rerank(top_n: int = 2):
 9 |     if api_key is None:
10 |         raise ValueError("COHERE_API_KEY is not set")
11 |     return CohereRerank(api_key=api_key, top_n=top_n)
12 | 


--------------------------------------------------------------------------------
/neosearch/engine/retriever/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/engine/retriever/__init__.py


--------------------------------------------------------------------------------
/neosearch/engine/retriever/base.py:
--------------------------------------------------------------------------------
1 | from llama_index.core.retrievers import BaseRetriever
2 | 
3 | # custom modules
4 | from neosearch.engine.index import get_index
5 | 
6 | 
7 | def get_base_retriever() -> BaseRetriever:
8 |     return get_index().as_retriever()
9 | 


--------------------------------------------------------------------------------
/neosearch/engine/retriever/paradedb.py:
--------------------------------------------------------------------------------
 1 | from llama_index.core.retrievers import (
 2 |     BaseRetriever,
 3 |     VectorIndexRetriever,
 4 |     RouterRetriever,
 5 | )
 6 | from llama_index.core.tools import RetrieverTool
 7 | from llama_index.core.settings import Settings
 8 | 
 9 | # custom modules
10 | from neosearch.datastore import engine, async_engine, get_session, get_async_session
11 | 
12 | 
13 | class ParadeDBRetriever(BaseRetriever):
14 |     def __init__(self):
15 |         super().__init__()
16 |         self.engine = engine
17 |         self.async_engine = async_engine
18 | 
19 |     def _retrieve(self, query: str, **kwargs) -> list:
20 |         session = get_session(self.engine)
21 |         return []
22 | 
23 | 
24 |     async def _aretrieve(self, query, **kwargs) -> list:
25 |         session = get_async_session(self.async_engine)
26 |         return []
27 | 
28 | 
29 |     def create_router_retriever(self):
30 |         retriever_tools = [
31 |             RetrieverTool.from_defaults(
32 |                 retriever=self,
33 |                 description="Useful in most cases",
34 |             ),
35 |         ]
36 | 
37 |         # load settings
38 |         llm = Settings.llm
39 | 
40 |         return RouterRetriever.from_defaults(
41 |             retriever_tools=retriever_tools,
42 |             llm=llm,
43 |             select_multi=True,
44 |         )
45 | 


--------------------------------------------------------------------------------
/neosearch/engine/search/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/engine/search/__init__.py


--------------------------------------------------------------------------------
/neosearch/engine/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/engine/utils/__init__.py


--------------------------------------------------------------------------------
/neosearch/engine/utils/chat.py:
--------------------------------------------------------------------------------
 1 | from fastapi import HTTPException, status
 2 | from llama_index.core.llms import MessageRole
 3 | 
 4 | # custom imports
 5 | from neosearch.models.chat_models import ChatData
 6 | 
 7 | 
 8 | async def validate_chat_data(data: ChatData):
 9 |     # check preconditions and get last message
10 |     if len(data.messages) == 0:
11 |         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No messages provided",)  # noqa: E501
12 |     lastMessage = data.messages.pop()
13 |     if lastMessage.role != MessageRole.USER:
14 |         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Last message must be from user",)  # noqa: E501
15 |     return lastMessage
16 | 


--------------------------------------------------------------------------------
/neosearch/engine/utils/query.py:
--------------------------------------------------------------------------------
 1 | from fastapi import HTTPException, status
 2 | 
 3 | # custom imports
 4 | from neosearch.models.query_models import QueryData
 5 | 
 6 | 
 7 | async def validate_query_data(data: QueryData):
 8 |     query_data = data.query
 9 |     if query_data is None:
10 |         raise HTTPException(
11 |             status_code=status.HTTP_400_BAD_REQUEST, detail="No query provided",
12 |         )
13 |     return query_data
14 | 


--------------------------------------------------------------------------------
/neosearch/engine/workflow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/engine/workflow/__init__.py


--------------------------------------------------------------------------------
/neosearch/engine/workflow/events/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/engine/workflow/events/__init__.py


--------------------------------------------------------------------------------
/neosearch/engine/workflow/events/crag.py:
--------------------------------------------------------------------------------
 1 | from llama_index.core.workflow import Event
 2 | from llama_index.core.schema import NodeWithScore
 3 | 
 4 | 
 5 | class PrepEvent(Event):
 6 |     """Prep event (prepares for retrieval)."""
 7 | 
 8 |     pass
 9 | 
10 | 
11 | class RetrieveEvent(Event):
12 |     """Retrieve event (gets retrieved nodes)."""
13 | 
14 |     retrieved_nodes: list[NodeWithScore]
15 | 
16 | 
17 | class RelevanceEvalEvent(Event):
18 |     """Relevance evaluation event (gets results of relevance evaluation)."""
19 | 
20 |     relevant_results: list[str]
21 | 
22 | 
23 | class TextExtractEvent(Event):
24 |     """Text extract event. Extracts relevant text and concatenates."""
25 | 
26 |     relevant_text: str
27 | 
28 | 
29 | class QueryEvent(Event):
30 |     """Query event. Queries given relevant text and search text."""
31 | 
32 |     relevant_text: str
33 |     search_text: str
34 | 
35 | 
36 | # streaming events
37 | 
38 | class CragStreamingEvents(Event):
39 |     msg: str
40 | 
41 | class RetrieveSuccessEvent(CragStreamingEvents):
42 |     pass
43 | 
44 | class RetrieveFailureEvent(CragStreamingEvents):
45 |     pass
46 | 
47 | class TransformQueryResultEvent(CragStreamingEvents):
48 |     pass
49 | 


--------------------------------------------------------------------------------
/neosearch/exceptions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/exceptions/__init__.py


--------------------------------------------------------------------------------
/neosearch/exceptions/bedrock.py:
--------------------------------------------------------------------------------
1 | class BedrockInvalidModelIdException(Exception):
2 |     ...
3 | 


--------------------------------------------------------------------------------
/neosearch/exceptions/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/exceptions/engine/__init__.py


--------------------------------------------------------------------------------
/neosearch/exceptions/engine/retriever.py:
--------------------------------------------------------------------------------
1 | class VectorStoreIsNullError(Exception):
2 |     ...
3 | 


--------------------------------------------------------------------------------
/neosearch/export_requirements_txt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # check if requrirements.txt exists
 4 | if [ -f requirements.txt ]; then
 5 |     echo "requirements.txt already exists. Remove it first."
 6 |     rm requirements.txt
 7 | fi
 8 | 
 9 | # poetry export --without-hashes --format=requirements.txt > requirements.txt
10 | uv export --no-hashes --format requirements-txt > requirements.txt


--------------------------------------------------------------------------------
/neosearch/infrastructure/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/infrastructure/__init__.py


--------------------------------------------------------------------------------
/neosearch/infrastructure/aws/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/infrastructure/aws/__init__.py


--------------------------------------------------------------------------------
/neosearch/middlewares/__init__.py:
--------------------------------------------------------------------------------
1 | from .request_id import RequestID
2 | from .request_logger import RequestLogger
3 | 
4 | __all__ = ["RequestID", "RequestLogger"]
5 | 


--------------------------------------------------------------------------------
/neosearch/middlewares/request_id.py:
--------------------------------------------------------------------------------
 1 | """Adds uuid to the request header for debugging."""
 2 | 
 3 | from uuid import uuid4
 4 | from fastapi import Request
 5 | from starlette.middleware.base import BaseHTTPMiddleware
 6 | from starlette.responses import JSONResponse
 7 | 
 8 | # custom modules
 9 | from neosearch.utils.logging import Logger
10 | 
11 | logger = Logger()
12 | 
13 | 
14 | class RequestID(BaseHTTPMiddleware):
15 |     """Add a uuid to the request header.
16 | 
17 |     Args:
18 |         app (fastapi.Request): Instance of a FastAPI class.
19 |     """
20 | 
21 |     def __init__(self, app):
22 |         super().__init__(app)
23 | 
24 |     async def dispatch(self, request: Request, call_next):
25 |         """
26 |         Implement the dispatch method.
27 | 
28 |         Args:
29 |             request (fastapi.Request): Instance of a FastAPI class.
30 |             call_next (function): Function to call next middleware.
31 |         """
32 | 
33 |         try:
34 |             request_id = uuid4()
35 |             request.state.request_id = request_id
36 |             response = await call_next(request)
37 |             response.headers["request_id"] = str(request_id)
38 |             return response
39 |         except Exception as e:
40 |             logger.log_warning(
41 |                 f"method={request.method} | {request.url} | {request.state.request_id} | {e}"
42 |             )
43 |             return JSONResponse(status_code=500, content={"reason": str(e)})
44 | 


--------------------------------------------------------------------------------
/neosearch/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .chat_models import (
 2 |     AnnotationFileData,
 3 |     AgentAnnotation,
 4 |     ArtifactAnnotation,
 5 |     Annotation,
 6 |     ChatData,
 7 |     ChatConfig,
 8 |     Message,
 9 |     SourceNodes,
10 |     Result,
11 | )
12 | from .health_check import HealthCheck
13 | 
14 | 
15 | __all__ = [
16 |     # health check
17 |     "HealthCheck",
18 |     # chat models
19 |     "AnnotationFileData",
20 |     "AgentAnnotation",
21 |     "ArtifactAnnotation",
22 |     "Annotation",
23 |     "ChatData",
24 |     "ChatConfig",
25 |     "Message",
26 |     "SourceNodes",
27 |     "Result",
28 | ]


--------------------------------------------------------------------------------
/neosearch/models/health_check.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 | 
3 | 
4 | class HealthCheck(BaseModel):
5 |     """Response model to validate and return when performing a health check."""
6 | 
7 |     status: str = "OK"
8 | 


--------------------------------------------------------------------------------
/neosearch/models/query_models.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | from datetime import datetime
 3 | 
 4 | # custom module
 5 | from neosearch.constants.memory import MAX_MEMORY_TOKEN_SIZE
 6 | 
 7 | 
 8 | class QueryData(BaseModel):
 9 |     query: str
10 |     timezone: str = "UTC"
11 | 
12 | 
13 | class Memory(BaseModel):
14 |     """
15 |     Memory model.
16 | 
17 |     Attributes:
18 |         id (str): The memory ID. (ULID based)
19 |         updated_at (str): The updated timestamp.
20 |         content (str): The content of the memory.
21 |     """
22 |     id: str
23 |     updated_at: str = datetime.now().isoformat()
24 |     content: str
25 | 
26 | class MemoryResponse(BaseModel):
27 |     """
28 |     Memory data model.
29 |     This represents the memory data model, which contains the additional memory data.
30 | 
31 |     Attributes:
32 |         messages (list[Memory]): The list of messages.
33 |         memory_max_tokens (int): The maximum memory tokens.
34 |         memory_num_tokens (int): The number of memory tokens that are currently in use (cannot exceed memory_max_tokens).
35 |     """
36 |     messages: list[Memory]
37 |     memory_max_tokens: int = MAX_MEMORY_TOKEN_SIZE
38 |     memory_num_tokens: int = 0
39 | 


--------------------------------------------------------------------------------
/neosearch/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | follow_imports = skip
 3 | check_untyped_defs = True
 4 | disallow_untyped_defs = True
 5 | files = tests/challenges/**/*.py
 6 | 
 7 | [mypy-requests.*]
 8 | ignore_missing_imports = True
 9 | [mypy-yaml.*]
10 | ignore_missing_imports = True


--------------------------------------------------------------------------------
/neosearch/response/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/response/__init__.py


--------------------------------------------------------------------------------
/neosearch/services/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/services/__init__.py


--------------------------------------------------------------------------------
/neosearch/settings/fastembed.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from llama_index.core.settings import Settings
 3 | 
 4 | 
 5 | def init_fastembed(
 6 |     model_name: str = "BAAI/bge-m3",
 7 |     max_length: int = 512,
 8 |     threads: int = 4,
 9 | ):
10 |     try:
11 |         from llama_index.embeddings.fastembed import FastEmbedEmbedding
12 |     except ImportError:
13 |         raise ImportError(
14 |             "FastEmbed support is not installed. Please install it with `poetry add llama-index-embeddings-fastembed`"
15 |         )
16 | 
17 |     embedding_model = os.getenv("FASTEMBED_EMBEDDING_MODEL", model_name)
18 |     if embedding_model is None:
19 |         raise ValueError("EMBEDDING_MODEL environment variable is not set")
20 | 
21 |     # This will download the model automatically if it is not already downloaded
22 |     Settings.embed_model = FastEmbedEmbedding(
23 |         model_name=embedding_model,
24 |         max_length=max_length,
25 |         threads=threads,
26 |     )
27 | 


--------------------------------------------------------------------------------
/neosearch/settings/gemini.py:
--------------------------------------------------------------------------------
 1 | from llama_index.core.settings import Settings
 2 | import os
 3 | 
 4 | 
 5 | def init_gemini():
 6 |     try:
 7 |         from llama_index.embeddings.gemini import GeminiEmbedding
 8 |         from llama_index.llms.gemini import Gemini
 9 |     except ImportError:
10 |         raise ImportError(
11 |             "Gemini support is not installed. Please install it with `poetry add llama-index-llms-gemini` and `poetry add llama-index-embeddings-gemini`"
12 |         )
13 | 
14 |     model_name = f"models/{os.getenv('MODEL')}"
15 |     embed_model_name = f"models/{os.getenv('EMBEDDING_MODEL')}"
16 | 
17 |     Settings.llm = Gemini(model=model_name)
18 |     Settings.embed_model = GeminiEmbedding(model_name=embed_model_name)
19 | 


--------------------------------------------------------------------------------
/neosearch/settings/huggingface.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from llama_index.core.settings import Settings
 3 | 
 4 | 
 5 | def init_huggingface():
 6 |     try:
 7 |         from llama_index.llms.huggingface import HuggingFaceLLM
 8 |     except ImportError:
 9 |         raise ImportError(
10 |             "Hugging Face support is not installed. Please install it with `poetry add llama-index-llms-huggingface` and `poetry add llama-index-embeddings-huggingface`"
11 |         )
12 | 
13 |     Settings.llm = HuggingFaceLLM(
14 |         model_name=os.getenv("MODEL"),
15 |         tokenizer_name=os.getenv("MODEL"),
16 |     )
17 |     init_huggingface_embedding()
18 | 
19 | 
20 | def init_huggingface_embedding():
21 |     try:
22 |         from llama_index.embeddings.huggingface import HuggingFaceEmbedding
23 |     except ImportError:
24 |         raise ImportError(
25 |             "Hugging Face support is not installed. Please install it with `poetry add llama-index-embeddings-huggingface`"
26 |         )
27 | 
28 |     embedding_model = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
29 |     backend = os.getenv("EMBEDDING_BACKEND", "onnx")  # "torch", "onnx", or "openvino"
30 |     trust_remote_code = (
31 |         os.getenv("EMBEDDING_TRUST_REMOTE_CODE", "false").lower() == "true"
32 |     )
33 | 
34 |     Settings.embed_model = HuggingFaceEmbedding(
35 |         model_name=embedding_model,
36 |         trust_remote_code=trust_remote_code,
37 |         backend=backend,
38 |     )
39 | 


--------------------------------------------------------------------------------
/neosearch/settings/mistral.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from llama_index.core.settings import Settings
 3 | 
 4 | 
 5 | def init_mistral():
 6 |     from llama_index.embeddings.mistralai import MistralAIEmbedding
 7 |     from llama_index.llms.mistralai import MistralAI
 8 | 
 9 |     Settings.llm = MistralAI(model=os.getenv("MODEL"))
10 |     Settings.embed_model = MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL"))
11 | 


--------------------------------------------------------------------------------
/neosearch/settings/ollama.py:
--------------------------------------------------------------------------------
 1 | from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama
 2 | from llama_index.core.settings import Settings
 3 | import os
 4 | 
 5 | # custom modules
 6 | from neosearch.constants.embeddings import OLLAMA_EMBEDDING_MODEL_BASE
 7 | 
 8 | 
 9 | def init_ollama_embedding():
10 |     try:
11 |         from llama_index.embeddings.ollama import OllamaEmbedding
12 |     except ImportError:
13 |         raise ImportError(
14 |             "Ollama support is not installed. Please install it with `poetry add llama-index-llms-ollama` and `poetry add llama-index-embeddings-ollama`"
15 |         )
16 |     base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
17 |     Settings.embed_model = OllamaEmbedding(
18 |         base_url=base_url,
19 |         model_name=os.getenv("OLLAMA_EMBEDDING_MODEL", OLLAMA_EMBEDDING_MODEL_BASE),
20 |     )
21 | 
22 | 
23 | def init_ollama():
24 |     base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
25 |     request_timeout = float(
26 |         os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT)
27 |     )
28 |     init_ollama_embedding()
29 |     Settings.llm = Ollama(
30 |         base_url=base_url, model=os.getenv("OLLAMA_MODEL"), request_timeout=request_timeout
31 |     )
32 | 


--------------------------------------------------------------------------------
/neosearch/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch/tests/__init__.py


--------------------------------------------------------------------------------
/neosearch/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .logging import Logger
 2 | from .ratelimitter import limiter
 3 | from .singleton import Singleton
 4 | 
 5 | 
 6 | __ALL__ = [
 7 |     Logger,
 8 |     limiter,
 9 |     Singleton,
10 | ]
11 | 


--------------------------------------------------------------------------------
/neosearch/utils/configs.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | 
 3 | # custom imports
 4 | from neosearch.utils.singleton import Singleton
 5 | 
 6 | 
 7 | def get_config():
 8 |     with open("config.yaml", "r") as f:
 9 |         return yaml.safe_load(f)
10 | 
11 | 
12 | class Config(metaclass=Singleton):
13 |     def __init__(self):
14 |         self.config = get_config()
15 | 
16 |     def get(self, key):
17 |         return self.config.get(key)
18 | 
19 |     def get_llm_configs(self):
20 |         llm_config = self.config.get("neosearch", {}).get("llm", {})
21 |         return llm_config
22 | 


--------------------------------------------------------------------------------
/neosearch/utils/gc_tuning.py:
--------------------------------------------------------------------------------
 1 | import gc
 2 | 
 3 | 
 4 | def get_current_gc_threshold():
 5 |     return gc.get_threshold()
 6 | 
 7 | 
 8 | def gc_optimization_on_startup(debug:bool=False, disable_gc:bool=False):
 9 |     if debug:
10 |         # gc.DEBUG_STATS: print statistics
11 |         # gc.DEBUG_LEAK: print objects that are likely to be leaked
12 |         # gc.DEBUG_UNCOLLECTABLE: print objects that cannot be collected
13 |         gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_LEAK | gc.DEBUG_UNCOLLECTABLE)
14 | 
15 |     if disable_gc:
16 |         gc.disable()
17 |         return
18 | 
19 |     # numpy나 torch는 import를 통해 초기화 시, 내부적으로 많은 object를 생성한다.
20 |     # 이러한 object들이 reference count에 영향을 주고, gc가 더 자주 동작하도록 만든다.
21 |     gc.freeze()
22 | 
23 |     # gc가 너무 자주 불리는 것도 문제가 될 수 있음.
24 |     gc.set_threshold(80_000, 20, 20)
25 | 


--------------------------------------------------------------------------------
/neosearch/utils/ratelimitter.py:
--------------------------------------------------------------------------------
1 | from slowapi import Limiter
2 | from slowapi.util import get_remote_address
3 | 
4 | 
5 | limiter = Limiter(key_func=get_remote_address)
6 | 


--------------------------------------------------------------------------------
/neosearch/utils/ray.py:
--------------------------------------------------------------------------------
 1 | import ray
 2 | 
 3 | # custom modules
 4 | from neosearch.constants.queue import USE_QUEUE
 5 | 
 6 | 
 7 | # decorator for ray remote
 8 | def ray_remote_if_enabled(func):
 9 |     if not USE_QUEUE:
10 |         return ray.remote(func)
11 |     return func
12 | 


--------------------------------------------------------------------------------
/neosearch/utils/singleton.py:
--------------------------------------------------------------------------------
 1 | class Singleton(type):
 2 |     """The singleton metaclass."""
 3 | 
 4 |     _instances: dict = {}
 5 | 
 6 |     def __call__(cls, *args, **kwargs):
 7 |         """Override to create only one instance ever.
 8 | 
 9 |         Returns:
10 |             object: Instance of the class initialized.
11 |         """
12 |         if cls not in cls._instances:
13 |             cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
14 |         return cls._instances[cls]
15 | 


--------------------------------------------------------------------------------
/neosearch/worker.py:
--------------------------------------------------------------------------------
 1 | from dotenv import load_dotenv
 2 | import sys
 3 | import warnings
 4 | from faststream import FastStream
 5 | 
 6 | # Load environment variables
 7 | load_dotenv()
 8 | 
 9 | # Ignore warnings
10 | warnings.filterwarnings("ignore")
11 | 
12 | # Add the root directory to the path so that we can import the settings
13 | sys.path.append("..")
14 | 
15 | # custom module
16 | from neosearch.constants.queue import USE_QUEUE  # noqa: E402
17 | from neosearch.app.worker_broker import get_worker_broker  # noqa: E402
18 | 
19 | if not USE_QUEUE:
20 |     raise Exception("Queue is not enabled")
21 | 
22 | # init broker
23 | broker = get_worker_broker()
24 | 
25 | # init faststream app
26 | app = FastStream(broker)
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     from neosearch.engine.agents.deep_research import background_research_task  # noqa: E402
31 |     # app.run()
32 |     background_research_task("task_id", "How to build a google-level search engine?")
33 | 


--------------------------------------------------------------------------------
/neosearch_ai/README.md:
--------------------------------------------------------------------------------
 1 | # Neosearch AI
 2 | 
 3 | Run AI models for RAG search.
 4 | 
 5 | ## Embeddings
 6 | 
 7 | ```bash
 8 | #
 9 | # export environment variables
10 | #
11 | 
12 | # huggingface sentence transformers model
13 | export MODEL_NAME answerdotai/ModernBERT-large
14 | # device type (cpu, gpu, etc)
15 | export DEVICE cpu
16 | # precision (float32, float16, bfloat16, etc)
17 | export PRECISION float32
18 | # retriever batch size
19 | export RETRIEVER_BATCH_SIZE 8
20 | # reader batch size
21 | export READER_BATCH_SIZE 8
22 | # max batch size
23 | export max_batch_size 8
24 | 
25 | # if you use gpu, then set the num of gpus (otherwise, torch.cuda.device_count() is used)
26 | export NUM_GPUS 1
27 | 
28 | #
29 | # run ray serve
30 | #
31 | 
32 | serve run embedding:embedding_deployment
33 | ```
34 | 
35 | ## Reranker
36 | 
37 | ### FlashRerank
38 | 
39 | ```bash
40 | #
41 | # export environment variables
42 | #
43 | 
44 | # huggingface sentence transformers model
45 | export MODEL_NAME rank_zephyr_7b_v1_full
46 | # device type (cpu, gpu, etc)
47 | export DEVICE cpu
48 | # precision (float32, float16, bfloat16, etc)
49 | export PRECISION float32
50 | # retriever batch size
51 | export RETRIEVER_BATCH_SIZE 8
52 | # reader batch size
53 | export READER_BATCH_SIZE 8
54 | # max batch size
55 | export max_batch_size 8
56 | 
57 | # if you use gpu, then set the num of gpus (otherwise, torch.cuda.device_count() is used)
58 | export NUM_GPUS 1
59 | 
60 | #
61 | # run ray serve
62 | #
63 | 
64 | serve run flashrerank:rerank_deployment
65 | ```
66 | 


--------------------------------------------------------------------------------
/neosearch_ai/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_ai/configs/__init__.py


--------------------------------------------------------------------------------
/neosearch_ai/configs/app.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | import torch
 3 | import multiprocessing
 4 | 
 5 | 
 6 | @dataclass
 7 | class NeosAiConfig:
 8 |     num_of_cpus: int = multiprocessing.cpu_count()
 9 |     cuda_available: bool = torch.cuda.is_available()
10 |     use_llm2vec: bool = False
11 |     avoid_thread_contention: bool = True
12 |     run_monitoring: bool = True
13 |     monitoring_port: int = 8518
14 | 


--------------------------------------------------------------------------------
/neosearch_ai/configs/embedding_param_manager.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from dataclasses import dataclass
 3 | import torch
 4 | 
 5 | 
 6 | @dataclass
 7 | class ServerParameterManager:
 8 |     model_name: str = os.environ.get("MODEL_NAME", "answerdotai/ModernBERT-large")
 9 |     device: str = os.environ.get("DEVICE", "cpu")
10 |     precision: int | str | None = os.environ.get("PRECISION", "fp32")
11 |     retriever_batch_size: int = int(os.environ.get("RETRIEVER_BATCH_SIZE", 32))
12 |     reader_batch_size: int = int(os.environ.get("READER_BATCH_SIZE", 32))
13 |     max_batch_size: int = int(os.environ.get("MAX_BATCH_SIZE", 32))
14 | 
15 | 
16 | class RayParameterManager:
17 |     def __init__(self) -> None:
18 |         self.num_gpus = int(os.environ.get("NUM_GPUS", torch.cuda.device_count()))
19 |         self.min_replicas = int(os.environ.get("MIN_REPLICAS", 1))
20 |         self.max_replicas = int(os.environ.get("MAX_REPLICAS", 1))
21 | 


--------------------------------------------------------------------------------
/neosearch_ai/configs/reranker_param_manager.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from dataclasses import dataclass
 3 | import torch
 4 | 
 5 | 
 6 | @dataclass
 7 | class RerankServerParameterManager:
 8 |     model_name: str = os.environ.get("MODEL_NAME", "rank_zephyr_7b_v1_full")
 9 |     device: str = os.environ.get("DEVICE", "cpu")
10 |     precision: int | str | None = os.environ.get("PRECISION", "fp32")
11 |     retriever_batch_size: int = int(os.environ.get("RETRIEVER_BATCH_SIZE", 32))
12 |     reader_batch_size: int = int(os.environ.get("READER_BATCH_SIZE", 32))
13 |     max_batch_size: int = int(os.environ.get("MAX_BATCH_SIZE", 32))
14 | 
15 | 
16 | class RerankRayParameterManager:
17 |     def __init__(self) -> None:
18 |         self.num_gpus = int(os.environ.get("NUM_GPUS", torch.cuda.device_count()))
19 |         self.min_replicas = int(os.environ.get("MIN_REPLICAS", 1))
20 |         self.max_replicas = int(os.environ.get("MAX_REPLICAS", 1))
21 | 


--------------------------------------------------------------------------------
/neosearch_ai/constants/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_ai/constants/__init__.py


--------------------------------------------------------------------------------
/neosearch_ai/constants/logging.py:
--------------------------------------------------------------------------------
1 | LOG_DEFAULT_LOG_NAME = "neosearch_ai"
2 | LOG_DEFAULT_LOG_LEVEL = "DEBUG"
3 | LOG_DEFAULT_CONSOLE_LOG_LEVEL = "WARNING"
4 | LOG_DEFAULT_MAX_BYTES = 10485760
5 | LOG_DEFAULT_BACKUP_COUNT = 10
6 | LOG_DEFAULT_LOGGING_WORKERS = 1


--------------------------------------------------------------------------------
/neosearch_ai/embedding.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append(".")
 4 | sys.path.append("..")
 5 | 
 6 | # custom modules
 7 | from engine.embeddings import EmbeddingDeployment
 8 | 
 9 | # Deploy the Ray Serve application.
10 | embedding_deployment = EmbeddingDeployment.bind()
11 | 


--------------------------------------------------------------------------------
/neosearch_ai/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_ai/engine/__init__.py


--------------------------------------------------------------------------------
/neosearch_ai/flashrerank.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append("..")
 4 | 
 5 | # custom modules
 6 | from engine.flash_reranker import FlashRerankDeployment
 7 | 
 8 | # Deploy the Ray Serve application.
 9 | rerank_deployment = FlashRerankDeployment.bind()
10 | 


--------------------------------------------------------------------------------
/neosearch_ai/pyproject.toml:
--------------------------------------------------------------------------------
 1 | # [tool.pdm.build]
 2 | # includes = []
 3 | # [build-system]
 4 | # requires = ["pdm-backend"]
 5 | # build-backend = "pdm.backend"
 6 | 
 7 | 
 8 | [project]
 9 | authors = [
10 |     {name = "YeonwooSung", email = "neos960518@gmail.com"},
11 | ]
12 | requires-python = "<3.13,>=3.10"
13 | dependencies = [
14 |     "ray[serve]<3.0.0,>=2.34.0",
15 |     "fastapi==0.115.6",
16 |     "sentence-transformers<4.0.0,>=3.0.1",
17 |     "torch<3.0.0,>=2.4.0",
18 |     "transformers<5.0.0,>=4.48.3",
19 |     "psutil<7.0.0,>=6.0.0",
20 |     "vllm<1.0.0,>=0.6.2",
21 |     "flashrank[listwise]<1.0.0,>=0.2.9",
22 |     "uvloop<1.0.0,>=0.21.0",
23 | ]
24 | name = "neosearch-ai"
25 | version = "0.3.0"
26 | description = "AI components for neosearch"
27 | readme = "README.md"
28 | 


--------------------------------------------------------------------------------
/neosearch_ai/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_ai/utils/__init__.py


--------------------------------------------------------------------------------
/neosearch_ai/utils/singleton.py:
--------------------------------------------------------------------------------
 1 | class Singleton(type):
 2 |     """The singleton metaclass."""
 3 | 
 4 |     _instances: dict = {}
 5 | 
 6 |     def __call__(cls, *args, **kwargs):
 7 |         """Override to create only one instance ever.
 8 | 
 9 |         Returns:
10 |             object: Instance of the class initialized.
11 |         """
12 |         if cls not in cls._instances:
13 |             cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
14 |         return cls._instances[cls]


--------------------------------------------------------------------------------
/neosearch_crawler/.env.sample:
--------------------------------------------------------------------------------
1 | FOR_TEST=0
2 | PG_CONNECTION_STRING=
3 | 


--------------------------------------------------------------------------------
/neosearch_crawler/.gitignore:
--------------------------------------------------------------------------------
1 | # commoncrawl data
2 | data/
3 | 
4 | known_urls.txt
5 | web_corpus.parquet
6 | web_corpus.jsonl
7 | 


--------------------------------------------------------------------------------
/neosearch_crawler/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_crawler/__init__.py


--------------------------------------------------------------------------------
/neosearch_crawler/constants/__init__.py:
--------------------------------------------------------------------------------
 1 | from dotenv import load_dotenv
 2 | import os
 3 | 
 4 | # custom modules
 5 | from .modes import (
 6 |     BASE_WEB_CRAWL_AGENT_MODE,
 7 |     COMMON_CRAWL_RUNNER_MODE,
 8 |     PARSE_WIKI_TO_PARADEDB_MODE,
 9 | )
10 | 
11 | 
12 | # Load the environment variables
13 | load_dotenv()
14 | 
15 | FOR_TEST = os.getenv("FOR_TEST", "0") == "1"
16 | 
17 | __all__ = [
18 |     # __init__.py
19 |     "FOR_TEST",
20 |     # modes.py
21 |     "BASE_WEB_CRAWL_AGENT_MODE",
22 |     "COMMON_CRAWL_RUNNER_MODE",
23 |     "PARSE_WIKI_TO_PARADEDB_MODE",
24 | ]


--------------------------------------------------------------------------------
/neosearch_crawler/constants/crawl_seeds.py:
--------------------------------------------------------------------------------
 1 | INITIAL_SEEDS = [
 2 |     # Wiki english main portals
 3 |     'https://en.wikipedia.org/wiki/Main_Page',
 4 |     'https://en.wikipedia.org/wiki/Portal:Contents',
 5 | 
 6 |     # 위키백과 메인 포털들
 7 |     'https://ko.wikipedia.org/wiki/위키백과:대문',
 8 |     'https://ko.wikipedia.org/wiki/포털:목차',
 9 | 
10 |     # 네이버 지식백과 메인 카테고리
11 |     'https://terms.naver.com/',
12 | 
13 |     # scholar portals
14 |     # 학술정보 포털
15 |     'https://arxiv.org/',
16 |     'https://www.dbpia.co.kr/',
17 |     'https://scholar.google.co.kr/'
18 | ]
19 | 
20 | NEWS_SEEDS = [
21 |     'https://news.naver.com/',
22 |     'https://news.daum.net/',
23 |     'https://news.google.com/'
24 | ]
25 | 
26 | OPENSOURCE_DEV_SEEDS = [
27 |     # Open sources
28 |     'https://www.tensorflow.org/',
29 |     'https://pytorch.org/',
30 |     'https://react.dev/',
31 | ]
32 | 
33 | # Full seeds (concatenated)
34 | _FULL_SEEDS = INITIAL_SEEDS + NEWS_SEEDS + OPENSOURCE_DEV_SEEDS
35 | FULL_SEEDS = list(set(_FULL_SEEDS))
36 | 


--------------------------------------------------------------------------------
/neosearch_crawler/constants/logger.py:
--------------------------------------------------------------------------------
1 | LOG_DEFAULT_LOG_NAME = "neosearch-crawler"
2 | LOG_DEFAULT_LOG_LEVEL = "DEBUG"
3 | LOG_DEFAULT_CONSOLE_LOG_LEVEL = "WARNING"
4 | LOG_DEFAULT_MAX_BYTES = 10485760
5 | LOG_DEFAULT_BACKUP_COUNT = 10
6 | LOG_DEFAULT_LOGGING_WORKERS = 1
7 | 


--------------------------------------------------------------------------------
/neosearch_crawler/constants/modes.py:
--------------------------------------------------------------------------------
1 | BASE_WEB_CRAWL_AGENT_MODE = "web_crawl_agent"
2 | COMMON_CRAWL_RUNNER_MODE = "cc"
3 | PARSE_WIKI_TO_PARADEDB_MODE = "parse_wiki_to_paradedb"
4 | 


--------------------------------------------------------------------------------
/neosearch_crawler/crawlers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_crawler/crawlers/__init__.py


--------------------------------------------------------------------------------
/neosearch_crawler/crawlers/s3/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_crawler/crawlers/s3/__init__.py


--------------------------------------------------------------------------------
/neosearch_crawler/datastore/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_crawler/datastore/__init__.py


--------------------------------------------------------------------------------
/neosearch_crawler/dispatchers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_crawler/dispatchers/__init__.py


--------------------------------------------------------------------------------
/neosearch_crawler/dispatchers/base.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from crawlers.base import BaseCrawler
 4 | 
 5 | 
 6 | class CrawlerDispatcher:
 7 |     def __init__(self) -> None:
 8 |         self._crawlers = {}
 9 | 
10 |     def register(self, domain: str, crawler: type[BaseCrawler]) -> None:
11 |         self._crawlers[r"https://(www\.)?{}.com/*".format(re.escape(domain))] = crawler
12 | 
13 |     def get_crawler(self, url: str) -> BaseCrawler:
14 |         for pattern, crawler in self._crawlers.items():
15 |             if re.match(pattern, url):
16 |                 return crawler()
17 |         else:
18 |             raise ValueError("No crawler found for the provided link")
19 | 


--------------------------------------------------------------------------------
/neosearch_crawler/dispatchers/lib.py:
--------------------------------------------------------------------------------
 1 | from neosearch_crawler.exception.dispatcher import ImproperlyConfigured
 2 | 
 3 | 
 4 | def user_to_names(user: str | None) -> tuple[str, str]:
 5 |     if user is None:
 6 |         raise ImproperlyConfigured("User name is empty")
 7 | 
 8 |     name_tokens = user.split(" ")
 9 |     if len(name_tokens) == 0:
10 |         raise ImproperlyConfigured("User name is empty")
11 |     elif len(name_tokens) == 1:
12 |         first_name, last_name = name_tokens[0], name_tokens[0]
13 |     else:
14 |         first_name, last_name = " ".join(name_tokens[:-1]), name_tokens[-1]
15 | 
16 |     return first_name, last_name
17 | 


--------------------------------------------------------------------------------
/neosearch_crawler/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_crawler/engine/__init__.py


--------------------------------------------------------------------------------
/neosearch_crawler/engine/agent/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_crawler/engine/agent/__init__.py


--------------------------------------------------------------------------------
/neosearch_crawler/engine/agent/base.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | 
 3 | 
 4 | class BaseArgs(BaseModel):
 5 |     id: str
 6 | 
 7 | 
 8 | class BaseAgent:
 9 |     def __init__(self):
10 |         pass
11 | 
12 |     def run(self, args: BaseArgs):
13 |         pass
14 | 


--------------------------------------------------------------------------------
/neosearch_crawler/engine/runner/__init__.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | 
 3 | # custom modules
 4 | from .base import BaseRunner
 5 | 
 6 | 
 7 | def step(index):
 8 |     """Decorator to mark a method as a DAG step."""
 9 |     def decorator(func):
10 |         setattr(func, "_step_index", index)
11 | 
12 |         @wraps(func)
13 |         def wrapper(*args, **kwargs):
14 |             return func(*args, **kwargs)
15 |         return wrapper
16 |     return decorator
17 | 
18 | 
19 | __all__ = [
20 |     'BaseRunner',
21 |     'step'
22 | ]


--------------------------------------------------------------------------------
/neosearch_crawler/exception/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_crawler/exception/__init__.py


--------------------------------------------------------------------------------
/neosearch_crawler/exception/dispatcher.py:
--------------------------------------------------------------------------------
1 | class ScrabbleException(Exception):
2 |     pass
3 | 
4 | 
5 | class ImproperlyConfigured(ScrabbleException):
6 |     pass
7 | 


--------------------------------------------------------------------------------
/neosearch_crawler/export_requirements_from_poetry.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # check if requrirements.txt exists
 4 | if [ -f requirements.txt ]; then
 5 |     echo "requirements.txt already exists. Remove it first."
 6 |     rm requirements.txt
 7 | fi
 8 | 
 9 | # poetry export --without-hashes --format=requirements.txt > requirements.txt
10 | uv export --no-hashes --format requirements-txt > requirements.txt


--------------------------------------------------------------------------------
/neosearch_crawler/mongo_db/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_crawler/mongo_db/__init__.py


--------------------------------------------------------------------------------
/neosearch_crawler/mongo_db/mongo.py:
--------------------------------------------------------------------------------
 1 | from pymongo import MongoClient
 2 | from pymongo.errors import ConnectionFailure
 3 | 
 4 | # custom modules
 5 | from .mongo_config import settings
 6 | 
 7 | 
 8 | class MongoDatabaseConnector:
 9 |     """Singleton class to connect to MongoDB database."""
10 | 
11 |     _instance: MongoClient = None
12 | 
13 |     def __new__(cls, *args, **kwargs):
14 |         if cls._instance is None:
15 |             try:
16 |                 cls._instance = MongoClient(settings.MONGO_DATABASE_HOST)
17 |             except ConnectionFailure as e:
18 |                 print(f"Couldn't connect to the database: {str(e)}")
19 |                 raise
20 | 
21 |         print(
22 |             f"Connection to database with uri: {settings.MONGO_DATABASE_HOST} successful"
23 |         )
24 |         return cls._instance
25 | 
26 |     def get_database(self):
27 |         return self._instance[settings.MONGO_DATABASE_NAME]
28 | 
29 |     def close(self):
30 |         if self._instance:
31 |             self._instance.close()
32 |             print("Connected to database has been closed.")
33 | 
34 | 
35 | connection = MongoDatabaseConnector()


--------------------------------------------------------------------------------
/neosearch_crawler/mongo_db/mongo_config.py:
--------------------------------------------------------------------------------
 1 | from pydantic_settings import BaseSettings, SettingsConfigDict
 2 | 
 3 | 
 4 | class Settings(BaseSettings):
 5 |     model_config = SettingsConfigDict(env_file="../.env", env_file_encoding="utf-8")
 6 | 
 7 |     MONGO_DATABASE_HOST: str = (
 8 |         "mongodb://mongo1:30001,mongo2:30002,mongo3:30003/?replicaSet=my-replica-set"
 9 |     )
10 |     MONGO_DATABASE_NAME: str = "scrabble"
11 | 
12 |     # Optional LinkedIn credentials for scraping your profile
13 |     LINKEDIN_USERNAME: str | None = None
14 |     LINKEDIN_PASSWORD: str | None = None
15 | 
16 | 
17 | settings = Settings()
18 | 


--------------------------------------------------------------------------------
/neosearch_crawler/pyproject.toml:
--------------------------------------------------------------------------------
 1 | # [tool.pdm.build]
 2 | # includes = []
 3 | # [build-system]
 4 | # requires = ["pdm-backend"]
 5 | # build-backend = "pdm.backend"
 6 | 
 7 | 
 8 | [project]
 9 | authors = [
10 |     {name = "YeonwooSung", email = "neos960518@gmail.com"},
11 | ]
12 | requires-python = "<3.13,>=3.10"
13 | dependencies = [
14 |     "ray[serve]<3.0.0,>=2.10.0",
15 |     "trafilatura[all]==1.8.0",
16 |     "langchain==0.3.19",
17 |     "llama-index-core==0.12.35",
18 |     "llama-index==0.12.35",
19 |     "llama-index-llms-openai==0.3.20",
20 |     "llama-index-llms-replicate==0.4.0",
21 |     "llama-index-embeddings-huggingface==0.5.1",
22 |     "aws-lambda-powertools<4.0.0,>=3.0.0",
23 |     "selenium>=4.25.0,<5.0.0",
24 |     "pymongo<5.0.0,>=4.9.1",
25 |     "pydantic-settings<3.0.0,>=2.6.0",
26 |     "scrapy<3.0.0,>=2.12.0",
27 |     "mypy-boto3-s3<2.0.0,>=1.35.67",
28 |     "polars>=1.18.0,<2.0.0",
29 |     "scrapegraphai<2.0.0,>=1.37.1",
30 |     "sqlmodel>=0.0.23",
31 |     "psycopg2>=2.9.10",
32 |     "asyncpg>=0.30.0",
33 |     "requests>=2.32.3",
34 |     "pypdf2>=3.0.1",
35 | ]
36 | name = "neosearch_crawler"
37 | version = "0.2.1"
38 | description = ""
39 | readme = "README.md"
40 | 


--------------------------------------------------------------------------------
/neosearch_crawler/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .trafilatura_util import extract_url_content
2 | 
3 | 
4 | __all__ = ["extract_url_content"]
5 | 


--------------------------------------------------------------------------------
/neosearch_crawler/utils/domain_name_utils.py:
--------------------------------------------------------------------------------
 1 | def reverse_domain(domain: str):
 2 |     """
 3 |     Reverse a domain name in URI format.
 4 | 
 5 |     Args:
 6 |         domain (str): The domain name to reverse.
 7 | 
 8 |     Returns:
 9 |         str: The reversed domain name.
10 |     """
11 |     parts = domain.split('.')
12 |     return '.'.join(reversed(parts))
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     domain = "com.naver"
17 |     reversed_domain = reverse_domain(domain)
18 |     print(reversed_domain)  # naver.com
19 | 


--------------------------------------------------------------------------------
/neosearch_crawler/utils/errors.py:
--------------------------------------------------------------------------------
1 | class ScrabbleException(Exception):
2 |     pass
3 | 
4 | 
5 | class ImproperlyConfigured(ScrabbleException):
6 |     pass
7 | 


--------------------------------------------------------------------------------
/neosearch_crawler/utils/pdf_util.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from io import BytesIO
 3 | from PyPDF2 import PdfReader
 4 | import orjson
 5 | 
 6 | 
 7 | def extract_pdf_from_url(url):
 8 |     response = requests.get(url)
 9 |     response.raise_for_status()
10 | 
11 |     with BytesIO(response.content) as pdf_file:
12 |         reader = PdfReader(pdf_file)
13 |         metadata = reader.metadata
14 | 
15 |         # 제목(title) 추출
16 |         title = metadata.title if metadata.title else ""
17 |         description = None
18 | 
19 |         # 본문(content) 추출
20 |         content = ""
21 |         for page in reader.pages:
22 |             content += page.extract_text()
23 | 
24 |         # 메타데이터(metadata)를 딕셔너리 형태로 변환
25 |         metadata_dict = {key[1:]: value for key, value in metadata.items()}
26 | 
27 |     metadata_dict_str = orjson.dumps(metadata_dict).decode("utf-8")
28 | 
29 |     return {
30 |         "title": title,
31 |         "url":url,
32 |         "content":content,
33 |         "description":description,
34 |         "metadata":metadata_dict_str,
35 |     }
36 | 


--------------------------------------------------------------------------------
/neosearch_crawler/utils/singleton.py:
--------------------------------------------------------------------------------
 1 | class Singleton(type):
 2 |     """The singleton metaclass."""
 3 | 
 4 |     _instances: dict = {}
 5 | 
 6 |     def __call__(cls, *args, **kwargs):
 7 |         """Override to create only one instance ever.
 8 | 
 9 |         Returns:
10 |             object: Instance of the class initialized.
11 |         """
12 |         if cls not in cls._instances:
13 |             cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
14 |         return cls._instances[cls]
15 | 


--------------------------------------------------------------------------------
/neosearch_frontend/.eslintrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": [
 3 |     "next/core-web-vitals",
 4 |     "plugin:import/recommended",
 5 |     "plugin:import/typescript",
 6 |     "prettier",
 7 |     "plugin:tailwindcss/recommended"
 8 |   ],
 9 |   "plugins": ["tailwindcss"],
10 |   "rules": {
11 |     "tailwindcss/no-custom-classname": "off",
12 |     "tailwindcss/classnames-order": "off"
13 |   },
14 |   "settings": {
15 |     "import/resolver": {
16 |       "typescript": {
17 |         "alwaysTryTypes": true
18 |       }
19 |     }
20 |   },
21 |   "ignorePatterns": ["**/components/ui/**"]
22 | }
23 | 


--------------------------------------------------------------------------------
/neosearch_frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | node_modules
 5 | .pnp
 6 | .pnp.js
 7 | 
 8 | # testing
 9 | coverage
10 | 
11 | # next.js
12 | .next/
13 | out/
14 | build
15 | 
16 | # misc
17 | .DS_Store
18 | *.pem
19 | 
20 | # debug
21 | npm-debug.log*
22 | yarn-debug.log*
23 | yarn-error.log*
24 | .pnpm-debug.log*
25 | 
26 | # local env files
27 | .env.local
28 | .env.development.local
29 | .env.test.local
30 | .env.production.local
31 | 
32 | # turbo
33 | .turbo
34 | 
35 | .env
36 | .vercel
37 | .vscode
38 | .env*.local
39 | 


--------------------------------------------------------------------------------
/neosearch_frontend/app/(auth)/api/auth/[...nextauth]/route.ts:
--------------------------------------------------------------------------------
1 | export { GET, POST } from '@/app/(auth)/auth';
2 | 


--------------------------------------------------------------------------------
/neosearch_frontend/app/(auth)/api/auth/guest/route.ts:
--------------------------------------------------------------------------------
 1 | import { signIn } from '@/app/(auth)/auth';
 2 | import { isDevelopmentEnvironment } from '@/lib/constants';
 3 | import { getToken } from 'next-auth/jwt';
 4 | import { NextResponse } from 'next/server';
 5 | 
 6 | export async function GET(request: Request) {
 7 |   const { searchParams } = new URL(request.url);
 8 |   const redirectUrl = searchParams.get('redirectUrl') || '/';
 9 | 
10 |   const token = await getToken({
11 |     req: request,
12 |     secret: process.env.AUTH_SECRET,
13 |     secureCookie: !isDevelopmentEnvironment,
14 |   });
15 | 
16 |   if (token) {
17 |     return NextResponse.redirect(new URL('/', request.url));
18 |   }
19 | 
20 |   return signIn('guest', { redirect: true, redirectTo: redirectUrl });
21 | }
22 | 


--------------------------------------------------------------------------------
/neosearch_frontend/app/(auth)/auth.config.ts:
--------------------------------------------------------------------------------
 1 | import type { NextAuthConfig } from 'next-auth';
 2 | 
 3 | export const authConfig = {
 4 |   pages: {
 5 |     signIn: '/login',
 6 |     newUser: '/',
 7 |   },
 8 |   providers: [
 9 |     // added later in auth.ts since it requires bcrypt which is only compatible with Node.js
10 |     // while this file is also used in non-Node.js environments
11 |   ],
12 |   callbacks: {},
13 | } satisfies NextAuthConfig;
14 | 


--------------------------------------------------------------------------------
/neosearch_frontend/app/(chat)/api/chat/schema.ts:
--------------------------------------------------------------------------------
 1 | import { z } from 'zod';
 2 | 
 3 | const textPartSchema = z.object({
 4 |   text: z.string().min(1).max(2000),
 5 |   type: z.enum(['text']),
 6 | });
 7 | 
 8 | export const postRequestBodySchema = z.object({
 9 |   id: z.string().uuid(),
10 |   message: z.object({
11 |     id: z.string().uuid(),
12 |     createdAt: z.coerce.date(),
13 |     role: z.enum(['user']),
14 |     content: z.string().min(1).max(2000),
15 |     parts: z.array(textPartSchema),
16 |     experimental_attachments: z
17 |       .array(
18 |         z.object({
19 |           url: z.string().url(),
20 |           name: z.string().min(1).max(2000),
21 |           contentType: z.enum(['image/png', 'image/jpg', 'image/jpeg']),
22 |         }),
23 |       )
24 |       .optional(),
25 |   }),
26 |   selectedChatModel: z.enum(['chat-model', 'chat-model-reasoning']),
27 |   selectedVisibilityType: z.enum(['public', 'private']),
28 | });
29 | 
30 | export type PostRequestBody = z.infer<typeof postRequestBodySchema>;
31 | 


--------------------------------------------------------------------------------
/neosearch_frontend/app/(chat)/api/history/route.ts:
--------------------------------------------------------------------------------
 1 | import { auth } from '@/app/(auth)/auth';
 2 | import type { NextRequest } from 'next/server';
 3 | import { getChatsByUserId } from '@/lib/db/queries';
 4 | import { ChatSDKError } from '@/lib/errors';
 5 | 
 6 | export async function GET(request: NextRequest) {
 7 |   const { searchParams } = request.nextUrl;
 8 | 
 9 |   const limit = Number.parseInt(searchParams.get('limit') || '10');
10 |   const startingAfter = searchParams.get('starting_after');
11 |   const endingBefore = searchParams.get('ending_before');
12 | 
13 |   if (startingAfter && endingBefore) {
14 |     return new ChatSDKError(
15 |       'bad_request:api',
16 |       'Only one of starting_after or ending_before can be provided.',
17 |     ).toResponse();
18 |   }
19 | 
20 |   const session = await auth();
21 | 
22 |   if (!session?.user) {
23 |     return new ChatSDKError('unauthorized:chat').toResponse();
24 |   }
25 | 
26 |   const chats = await getChatsByUserId({
27 |     id: session.user.id,
28 |     limit,
29 |     startingAfter,
30 |     endingBefore,
31 |   });
32 | 
33 |   return Response.json(chats);
34 | }
35 | 


--------------------------------------------------------------------------------
/neosearch_frontend/app/(chat)/api/suggestions/route.ts:
--------------------------------------------------------------------------------
 1 | import { auth } from '@/app/(auth)/auth';
 2 | import { getSuggestionsByDocumentId } from '@/lib/db/queries';
 3 | import { ChatSDKError } from '@/lib/errors';
 4 | 
 5 | export async function GET(request: Request) {
 6 |   const { searchParams } = new URL(request.url);
 7 |   const documentId = searchParams.get('documentId');
 8 | 
 9 |   if (!documentId) {
10 |     return new ChatSDKError(
11 |       'bad_request:api',
12 |       'Parameter documentId is required.',
13 |     ).toResponse();
14 |   }
15 | 
16 |   const session = await auth();
17 | 
18 |   if (!session?.user) {
19 |     return new ChatSDKError('unauthorized:suggestions').toResponse();
20 |   }
21 | 
22 |   const suggestions = await getSuggestionsByDocumentId({
23 |     documentId,
24 |   });
25 | 
26 |   const [suggestion] = suggestions;
27 | 
28 |   if (!suggestion) {
29 |     return Response.json([], { status: 200 });
30 |   }
31 | 
32 |   if (suggestion.userId !== session.user.id) {
33 |     return new ChatSDKError('forbidden:api').toResponse();
34 |   }
35 | 
36 |   return Response.json(suggestions, { status: 200 });
37 | }
38 | 


--------------------------------------------------------------------------------
/neosearch_frontend/app/(chat)/layout.tsx:
--------------------------------------------------------------------------------
 1 | import { cookies } from 'next/headers';
 2 | 
 3 | import { AppSidebar } from '@/components/app-sidebar';
 4 | import { SidebarInset, SidebarProvider } from '@/components/ui/sidebar';
 5 | 
 6 | import { auth } from '../(auth)/auth';
 7 | import Script from 'next/script';
 8 | 
 9 | export const experimental_ppr = true;
10 | 
11 | export default async function Layout({
12 |   children,
13 | }: {
14 |   children: React.ReactNode;
15 | }) {
16 |   const [session, cookieStore] = await Promise.all([auth(), cookies()]);
17 |   const isCollapsed = cookieStore.get('sidebar:state')?.value !== 'true';
18 | 
19 |   return (
20 |     <>
21 |       <Script
22 |         src="https://cdn.jsdelivr.net/pyodide/v0.23.4/full/pyodide.js"
23 |         strategy="beforeInteractive"
24 |       />
25 |       <SidebarProvider defaultOpen={!isCollapsed}>
26 |         <AppSidebar user={session?.user} />
27 |         <SidebarInset>{children}</SidebarInset>
28 |       </SidebarProvider>
29 |     </>
30 |   );
31 | }
32 | 


--------------------------------------------------------------------------------
/neosearch_frontend/app/(chat)/opengraph-image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_frontend/app/(chat)/opengraph-image.png


--------------------------------------------------------------------------------
/neosearch_frontend/app/(chat)/twitter-image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_frontend/app/(chat)/twitter-image.png


--------------------------------------------------------------------------------
/neosearch_frontend/app/aisearch/page.tsx:
--------------------------------------------------------------------------------
 1 | import { Chat } from '@/search_components/chat'
 2 | import { getModels } from '@/lib/config/models'
 3 | import { generateId } from 'ai'
 4 | 
 5 | 
 6 | export default async function Page() {
 7 |   const id = generateId()
 8 |   const models = await getModels()
 9 |   return <Chat id={id} models={models} />
10 | }
11 | 


--------------------------------------------------------------------------------
/neosearch_frontend/app/aisearch/search/[id]/page.tsx:
--------------------------------------------------------------------------------
 1 | import { Chat } from '@/search_components/chat'
 2 | import { getChat } from '@/lib/actions/chat'
 3 | import { getModels } from '@/lib/config/models'
 4 | import { convertToUIMessages } from '@/lib/search_utils'
 5 | import { notFound, redirect } from 'next/navigation'
 6 | 
 7 | export const maxDuration = 60
 8 | 
 9 | export async function generateMetadata(props: {
10 |   params: Promise<{ id: string }>
11 | }) {
12 |   const { id } = await props.params
13 |   const chat = await getChat(id, 'anonymous')
14 |   return {
15 |     title: chat?.title.toString().slice(0, 50) || 'Search'
16 |   }
17 | }
18 | 
19 | export default async function SearchPage(props: {
20 |   params: Promise<{ id: string }>
21 | }) {
22 |   const userId = 'anonymous'
23 |   const { id } = await props.params
24 | 
25 |   const chat = await getChat(id, userId)
26 |   // convertToUIMessages for useChat hook
27 |   const messages = convertToUIMessages(chat?.messages || [])
28 | 
29 |   if (!chat) {
30 |     redirect('/')
31 |   }
32 | 
33 |   if (chat?.userId !== userId) {
34 |     notFound()
35 |   }
36 | 
37 |   const models = await getModels()
38 |   return <Chat id={id} savedMessages={messages} models={models} />
39 | }
40 | 


--------------------------------------------------------------------------------
/neosearch_frontend/app/aisearch/search/page.tsx:
--------------------------------------------------------------------------------
 1 | import { Chat } from '@/search_components/chat'
 2 | import { getModels } from '@/lib/config/models'
 3 | import { generateId } from 'ai'
 4 | import { redirect } from 'next/navigation'
 5 | 
 6 | export const maxDuration = 60
 7 | 
 8 | export default async function SearchPage(props: {
 9 |   searchParams: Promise<{ q: string }>
10 | }) {
11 |   const { q } = await props.searchParams
12 |   if (!q) {
13 |     redirect('/')
14 |   }
15 | 
16 |   const id = generateId()
17 |   const models = await getModels()
18 |   return <Chat id={id} query={q} models={models} />
19 | }
20 | 


--------------------------------------------------------------------------------
/neosearch_frontend/app/aisearch/share/[id]/page.tsx:
--------------------------------------------------------------------------------
 1 | import { Chat } from '@/search_components/chat'
 2 | import { getSharedChat } from '@/lib/actions/chat'
 3 | import { getModels } from '@/lib/config/models'
 4 | import { convertToUIMessages } from '@/lib/search_utils'
 5 | import { notFound } from 'next/navigation'
 6 | 
 7 | export async function generateMetadata(props: {
 8 |   params: Promise<{ id: string }>
 9 | }) {
10 |   const { id } = await props.params
11 |   const chat = await getSharedChat(id)
12 | 
13 |   if (!chat || !chat.sharePath) {
14 |     return notFound()
15 |   }
16 | 
17 |   return {
18 |     title: chat?.title.toString().slice(0, 50) || 'Search'
19 |   }
20 | }
21 | 
22 | export default async function SharePage(props: {
23 |   params: Promise<{ id: string }>
24 | }) {
25 |   const { id } = await props.params
26 |   const chat = await getSharedChat(id)
27 | 
28 |   if (!chat || !chat.sharePath) {
29 |     return notFound()
30 |   }
31 | 
32 |   const models = await getModels()
33 |   return (
34 |     <Chat
35 |       id={chat.id}
36 |       savedMessages={convertToUIMessages(chat.messages)}
37 |       models={models}
38 |     />
39 |   )
40 | }
41 | 


--------------------------------------------------------------------------------
/neosearch_frontend/app/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_frontend/app/favicon.ico


--------------------------------------------------------------------------------
/neosearch_frontend/artifacts/actions.ts:
--------------------------------------------------------------------------------
1 | 'use server';
2 | 
3 | import { getSuggestionsByDocumentId } from '@/lib/db/queries';
4 | 
5 | export async function getSuggestions({ documentId }: { documentId: string }) {
6 |   const suggestions = await getSuggestionsByDocumentId({ documentId });
7 |   return suggestions ?? [];
8 | }
9 | 


--------------------------------------------------------------------------------
/neosearch_frontend/artifacts/image/server.ts:
--------------------------------------------------------------------------------
 1 | import { myProvider } from '@/lib/ai/providers';
 2 | import { createDocumentHandler } from '@/lib/artifacts/server';
 3 | import { experimental_generateImage } from 'ai';
 4 | 
 5 | export const imageDocumentHandler = createDocumentHandler<'image'>({
 6 |   kind: 'image',
 7 |   onCreateDocument: async ({ title, dataStream }) => {
 8 |     let draftContent = '';
 9 | 
10 |     const { image } = await experimental_generateImage({
11 |       model: myProvider.imageModel('small-model'),
12 |       prompt: title,
13 |       n: 1,
14 |     });
15 | 
16 |     draftContent = image.base64;
17 | 
18 |     dataStream.writeData({
19 |       type: 'image-delta',
20 |       content: image.base64,
21 |     });
22 | 
23 |     return draftContent;
24 |   },
25 |   onUpdateDocument: async ({ description, dataStream }) => {
26 |     let draftContent = '';
27 | 
28 |     const { image } = await experimental_generateImage({
29 |       model: myProvider.imageModel('small-model'),
30 |       prompt: description,
31 |       n: 1,
32 |     });
33 | 
34 |     draftContent = image.base64;
35 | 
36 |     dataStream.writeData({
37 |       type: 'image-delta',
38 |       content: image.base64,
39 |     });
40 | 
41 |     return draftContent;
42 |   },
43 | });
44 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://ui.shadcn.com/schema.json",
 3 |   "style": "default",
 4 |   "rsc": true,
 5 |   "tsx": true,
 6 |   "tailwind": {
 7 |     "config": "tailwind.config.ts",
 8 |     "css": "app/globals.css",
 9 |     "baseColor": "zinc",
10 |     "cssVariables": true,
11 |     "prefix": ""
12 |   },
13 |   "aliases": {
14 |     "components": "@/components",
15 |     "search_components": "@/search_components",
16 |     "utils": "@/lib/utils",
17 |     "ui": "@/components/ui",
18 |     "lib": "@/lib",
19 |     "hooks": "@/hooks"
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/artifact-close-button.tsx:
--------------------------------------------------------------------------------
 1 | import { memo } from 'react';
 2 | import { CrossIcon } from './icons';
 3 | import { Button } from './ui/button';
 4 | import { initialArtifactData, useArtifact } from '@/hooks/use-artifact';
 5 | 
 6 | function PureArtifactCloseButton() {
 7 |   const { setArtifact } = useArtifact();
 8 | 
 9 |   return (
10 |     <Button
11 |       data-testid="artifact-close-button"
12 |       variant="outline"
13 |       className="h-fit p-2 dark:hover:bg-zinc-700"
14 |       onClick={() => {
15 |         setArtifact((currentArtifact) =>
16 |           currentArtifact.status === 'streaming'
17 |             ? {
18 |                 ...currentArtifact,
19 |                 isVisible: false,
20 |               }
21 |             : { ...initialArtifactData, status: 'idle' },
22 |         );
23 |       }}
24 |     >
25 |       <CrossIcon size={18} />
26 |     </Button>
27 |   );
28 | }
29 | 
30 | export const ArtifactCloseButton = memo(PureArtifactCloseButton, () => true);
31 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/block-close-button.tsx:
--------------------------------------------------------------------------------
 1 | import { memo } from 'react';
 2 | import { CrossIcon } from './icons';
 3 | import { Button } from './ui/button';
 4 | import { initialBlockData, useBlock } from '@/hooks/use-block';
 5 | 
 6 | function PureBlockCloseButton() {
 7 |   const { setBlock } = useBlock();
 8 | 
 9 |   return (
10 |     <Button
11 |       variant="outline"
12 |       className="h-fit p-2 dark:hover:bg-zinc-700"
13 |       onClick={() => {
14 |         setBlock((currentBlock) =>
15 |           currentBlock.status === 'streaming'
16 |             ? {
17 |                 ...currentBlock,
18 |                 isVisible: false,
19 |               }
20 |             : { ...initialBlockData, status: 'idle' },
21 |         );
22 |       }}
23 |     >
24 |       <CrossIcon size={18} />
25 |     </Button>
26 |   );
27 | }
28 | 
29 | export const BlockCloseButton = memo(PureBlockCloseButton, () => true);
30 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/code-block.tsx:
--------------------------------------------------------------------------------
 1 | 'use client';
 2 | 
 3 | interface CodeBlockProps {
 4 |   node: any;
 5 |   inline: boolean;
 6 |   className: string;
 7 |   children: any;
 8 | }
 9 | 
10 | export function CodeBlock({
11 |   node,
12 |   inline,
13 |   className,
14 |   children,
15 |   ...props
16 | }: CodeBlockProps) {
17 |   if (!inline) {
18 |     return (
19 |       <div className="not-prose flex flex-col">
20 |         <pre
21 |           {...props}
22 |           className={`text-sm w-full overflow-x-auto dark:bg-zinc-900 p-4 border border-zinc-200 dark:border-zinc-700 rounded-xl dark:text-zinc-50 text-zinc-900`}
23 |         >
24 |           <code className="whitespace-pre-wrap break-words">{children}</code>
25 |         </pre>
26 |       </div>
27 |     );
28 |   } else {
29 |     return (
30 |       <code
31 |         className={`${className} text-sm bg-zinc-100 dark:bg-zinc-800 py-0.5 px-1 rounded-md`}
32 |         {...props}
33 |       >
34 |         {children}
35 |       </code>
36 |     );
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/greeting.tsx:
--------------------------------------------------------------------------------
 1 | import { motion } from 'framer-motion';
 2 | 
 3 | export const Greeting = () => {
 4 |   return (
 5 |     <div
 6 |       key="overview"
 7 |       className="max-w-3xl mx-auto md:mt-20 px-8 size-full flex flex-col justify-center"
 8 |     >
 9 |       <motion.div
10 |         initial={{ opacity: 0, y: 10 }}
11 |         animate={{ opacity: 1, y: 0 }}
12 |         exit={{ opacity: 0, y: 10 }}
13 |         transition={{ delay: 0.5 }}
14 |         className="text-2xl font-semibold"
15 |       >
16 |         Hello there!
17 |       </motion.div>
18 |       <motion.div
19 |         initial={{ opacity: 0, y: 10 }}
20 |         animate={{ opacity: 1, y: 0 }}
21 |         exit={{ opacity: 0, y: 10 }}
22 |         transition={{ delay: 0.6 }}
23 |         className="text-2xl text-zinc-500"
24 |       >
25 |         How can I help you today?
26 |       </motion.div>
27 |     </div>
28 |   );
29 | };
30 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/image-editor.tsx:
--------------------------------------------------------------------------------
 1 | import { LoaderIcon } from './icons';
 2 | import cn from 'classnames';
 3 | 
 4 | interface ImageEditorProps {
 5 |   title: string;
 6 |   content: string;
 7 |   isCurrentVersion: boolean;
 8 |   currentVersionIndex: number;
 9 |   status: string;
10 |   isInline: boolean;
11 | }
12 | 
13 | export function ImageEditor({
14 |   title,
15 |   content,
16 |   status,
17 |   isInline,
18 | }: ImageEditorProps) {
19 |   return (
20 |     <div
21 |       className={cn('flex flex-row items-center justify-center w-full', {
22 |         'h-[calc(100dvh-60px)]': !isInline,
23 |         'h-[200px]': isInline,
24 |       })}
25 |     >
26 |       {status === 'streaming' ? (
27 |         <div className="flex flex-row gap-4 items-center">
28 |           {!isInline && (
29 |             <div className="animate-spin">
30 |               <LoaderIcon />
31 |             </div>
32 |           )}
33 |           <div>Generating Image...</div>
34 |         </div>
35 |       ) : (
36 |         <picture>
37 |           <img
38 |             className={cn('w-full h-fit max-w-[800px]', {
39 |               'p-0 md:p-20': !isInline,
40 |             })}
41 |             src={`data:image/png;base64,${content}`}
42 |             alt={title}
43 |           />
44 |         </picture>
45 |       )}
46 |     </div>
47 |   );
48 | }
49 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/sidebar-toggle.tsx:
--------------------------------------------------------------------------------
 1 | import type { ComponentProps } from 'react';
 2 | 
 3 | import { type SidebarTrigger, useSidebar } from '@/components/ui/sidebar';
 4 | import {
 5 |   Tooltip,
 6 |   TooltipContent,
 7 |   TooltipTrigger,
 8 | } from '@/components/ui/tooltip';
 9 | 
10 | import { SidebarLeftIcon } from './icons';
11 | import { Button } from './ui/button';
12 | 
13 | export function SidebarToggle({
14 |   className,
15 | }: ComponentProps<typeof SidebarTrigger>) {
16 |   const { toggleSidebar } = useSidebar();
17 | 
18 |   return (
19 |     <Tooltip>
20 |       <TooltipTrigger asChild>
21 |         <Button
22 |           data-testid="sidebar-toggle-button"
23 |           onClick={toggleSidebar}
24 |           variant="outline"
25 |           className="md:px-2 md:h-fit"
26 |         >
27 |           <SidebarLeftIcon size={16} />
28 |         </Button>
29 |       </TooltipTrigger>
30 |       <TooltipContent align="start">Toggle Sidebar</TooltipContent>
31 |     </Tooltip>
32 |   );
33 | }
34 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/sign-out-form.tsx:
--------------------------------------------------------------------------------
 1 | import Form from 'next/form';
 2 | 
 3 | import { signOut } from '@/app/(auth)/auth';
 4 | 
 5 | export const SignOutForm = () => {
 6 |   return (
 7 |     <Form
 8 |       className="w-full"
 9 |       action={async () => {
10 |         'use server';
11 | 
12 |         await signOut({
13 |           redirectTo: '/',
14 |         });
15 |       }}
16 |     >
17 |       <button
18 |         type="submit"
19 |         className="w-full text-left px-1 py-0.5 text-red-500"
20 |       >
21 |         Sign out
22 |       </button>
23 |     </Form>
24 |   );
25 | };
26 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/styles/colors.ts:
--------------------------------------------------------------------------------
 1 | export const BackgroundColor = "#0A0C10"
 2 | export const DarkGrayishBackgroundColor = "#141820"
 3 | export const BorderColor = "gray.600"
 4 | export const SelectedBackgroundColor = "#f1f1f1"
 5 | export const HoverBackgroundColor = "#gray.700"
 6 | export const BlackGray = "#121212"
 7 | export const Green = "#18BC9C"
 8 | export const Pink = "#E63B5D"
 9 | export const Black = "#000000"
10 | export const Blue = "#2383F4"
11 | export const DarkCharcoal = "#333333"
12 | export const Gray = "#515151"
13 | export const PrimaryTextColor = "#DFE2E5"
14 | export const SecondaryTextColor = "#9FA2A5"
15 | export const LightGray = "#f4f4f4"
16 | export const White = "#ffffff"
17 | export const ElectricViolet = "#8F00FF"


--------------------------------------------------------------------------------
/neosearch_frontend/components/submit-button.tsx:
--------------------------------------------------------------------------------
 1 | 'use client';
 2 | 
 3 | import { useFormStatus } from 'react-dom';
 4 | 
 5 | import { LoaderIcon } from '@/components/icons';
 6 | 
 7 | import { Button } from './ui/button';
 8 | 
 9 | 
10 | export function SubmitButton({
11 |   children,
12 |   isSuccessful,
13 | }: {
14 |   children: React.ReactNode;
15 |   isSuccessful: boolean;
16 | }) {
17 |   const { pending } = useFormStatus();
18 | 
19 |   return (
20 |     <Button
21 |       type={pending ? 'button' : 'submit'}
22 |       aria-disabled={pending || isSuccessful}
23 |       disabled={pending || isSuccessful}
24 |       className="relative"
25 |     >
26 |       {children}
27 | 
28 |       {(pending || isSuccessful) && (
29 |         <span className="animate-spin absolute right-4">
30 |           <LoaderIcon />
31 |         </span>
32 |       )}
33 | 
34 |       <output aria-live="polite" className="sr-only">
35 |         {pending || isSuccessful ? 'Loading' : 'Submit form'}
36 |       </output>
37 |     </Button>
38 |   );
39 | }
40 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/theme-provider.tsx:
--------------------------------------------------------------------------------
1 | 'use client';
2 | 
3 | import { ThemeProvider as NextThemesProvider } from 'next-themes';
4 | import type { ThemeProviderProps } from 'next-themes/dist/types';
5 | 
6 | export function ThemeProvider({ children, ...props }: ThemeProviderProps) {
7 |   return <NextThemesProvider {...props}>{children}</NextThemesProvider>;
8 | }
9 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/ui/input.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from 'react';
 2 | 
 3 | import { cn } from '@/lib/utils';
 4 | 
 5 | const Input = React.forwardRef<HTMLInputElement, React.ComponentProps<'input'>>(
 6 |   ({ className, type, ...props }, ref) => {
 7 |     return (
 8 |       <input
 9 |         type={type}
10 |         className={cn(
11 |           'flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-base ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 md:text-sm',
12 |           className,
13 |         )}
14 |         ref={ref}
15 |         {...props}
16 |       />
17 |     );
18 |   },
19 | );
20 | Input.displayName = 'Input';
21 | 
22 | export { Input };
23 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/ui/label.tsx:
--------------------------------------------------------------------------------
 1 | 'use client';
 2 | 
 3 | import * as React from 'react';
 4 | import * as LabelPrimitive from '@radix-ui/react-label';
 5 | import { cva, type VariantProps } from 'class-variance-authority';
 6 | 
 7 | import { cn } from '@/lib/utils';
 8 | 
 9 | const labelVariants = cva(
10 |   'text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70',
11 | );
12 | 
13 | const Label = React.forwardRef<
14 |   React.ElementRef<typeof LabelPrimitive.Root>,
15 |   React.ComponentPropsWithoutRef<typeof LabelPrimitive.Root> &
16 |     VariantProps<typeof labelVariants>
17 | >(({ className, ...props }, ref) => (
18 |   <LabelPrimitive.Root
19 |     ref={ref}
20 |     className={cn(labelVariants(), className)}
21 |     {...props}
22 |   />
23 | ));
24 | Label.displayName = LabelPrimitive.Root.displayName;
25 | 
26 | export { Label };
27 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/ui/separator.tsx:
--------------------------------------------------------------------------------
 1 | 'use client';
 2 | 
 3 | import * as React from 'react';
 4 | import * as SeparatorPrimitive from '@radix-ui/react-separator';
 5 | 
 6 | import { cn } from '@/lib/utils';
 7 | 
 8 | const Separator = React.forwardRef<
 9 |   React.ElementRef<typeof SeparatorPrimitive.Root>,
10 |   React.ComponentPropsWithoutRef<typeof SeparatorPrimitive.Root>
11 | >(
12 |   (
13 |     { className, orientation = 'horizontal', decorative = true, ...props },
14 |     ref,
15 |   ) => (
16 |     <SeparatorPrimitive.Root
17 |       ref={ref}
18 |       decorative={decorative}
19 |       orientation={orientation}
20 |       className={cn(
21 |         'shrink-0 bg-border',
22 |         orientation === 'horizontal' ? 'h-[1px] w-full' : 'h-full w-[1px]',
23 |         className,
24 |       )}
25 |       {...props}
26 |     />
27 |   ),
28 | );
29 | Separator.displayName = SeparatorPrimitive.Root.displayName;
30 | 
31 | export { Separator };
32 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/ui/skeleton.tsx:
--------------------------------------------------------------------------------
 1 | import { cn } from '@/lib/utils';
 2 | 
 3 | function Skeleton({
 4 |   className,
 5 |   ...props
 6 | }: React.HTMLAttributes<HTMLDivElement>) {
 7 |   return (
 8 |     <div
 9 |       className={cn('animate-pulse rounded-md bg-muted', className)}
10 |       {...props}
11 |     />
12 |   );
13 | }
14 | 
15 | export { Skeleton };
16 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/ui/textarea.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from 'react';
 2 | 
 3 | import { cn } from '@/lib/utils';
 4 | 
 5 | const Textarea = React.forwardRef<
 6 |   HTMLTextAreaElement,
 7 |   React.ComponentProps<'textarea'>
 8 | >(({ className, ...props }, ref) => {
 9 |   return (
10 |     <textarea
11 |       className={cn(
12 |         'flex min-h-[80px] w-full rounded-md border border-input bg-background px-3 py-2 text-base ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 md:text-sm',
13 |         className,
14 |       )}
15 |       ref={ref}
16 |       {...props}
17 |     />
18 |   );
19 | });
20 | Textarea.displayName = 'Textarea';
21 | 
22 | export { Textarea };
23 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/ui/tooltip.tsx:
--------------------------------------------------------------------------------
 1 | 'use client';
 2 | 
 3 | import * as React from 'react';
 4 | import * as TooltipPrimitive from '@radix-ui/react-tooltip';
 5 | 
 6 | import { cn } from '@/lib/utils';
 7 | 
 8 | const TooltipProvider = TooltipPrimitive.Provider;
 9 | 
10 | const Tooltip = TooltipPrimitive.Root;
11 | 
12 | const TooltipTrigger = TooltipPrimitive.Trigger;
13 | 
14 | const TooltipContent = React.forwardRef<
15 |   React.ElementRef<typeof TooltipPrimitive.Content>,
16 |   React.ComponentPropsWithoutRef<typeof TooltipPrimitive.Content>
17 | >(({ className, sideOffset = 4, ...props }, ref) => (
18 |   <TooltipPrimitive.Content
19 |     ref={ref}
20 |     sideOffset={sideOffset}
21 |     className={cn(
22 |       'z-50 overflow-hidden rounded-md border bg-popover px-3 py-1.5 text-sm text-popover-foreground shadow-md animate-in fade-in-0 zoom-in-95 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2',
23 |       className,
24 |     )}
25 |     {...props}
26 |   />
27 | ));
28 | TooltipContent.displayName = TooltipPrimitive.Content.displayName;
29 | 
30 | export { Tooltip, TooltipTrigger, TooltipContent, TooltipProvider };
31 | 


--------------------------------------------------------------------------------
/neosearch_frontend/components/use-scroll-to-bottom.ts:
--------------------------------------------------------------------------------
 1 | import { useEffect, useRef, type RefObject } from 'react';
 2 | 
 3 | export function useScrollToBottom<T extends HTMLElement>(): [
 4 |   RefObject<T>,
 5 |   RefObject<T>,
 6 | ] {
 7 |   const containerRef = useRef<T>(null);
 8 |   const endRef = useRef<T>(null);
 9 | 
10 |   useEffect(() => {
11 |     const container = containerRef.current;
12 |     const end = endRef.current;
13 | 
14 |     if (container && end) {
15 |       const observer = new MutationObserver(() => {
16 |         end.scrollIntoView({ behavior: 'instant', block: 'end' });
17 |       });
18 | 
19 |       observer.observe(container, {
20 |         childList: true,
21 |         subtree: true,
22 |         attributes: true,
23 |         characterData: true,
24 |       });
25 | 
26 |       return () => observer.disconnect();
27 |     }
28 |   }, []);
29 | 
30 |   return [containerRef, endRef];
31 | }
32 | 


--------------------------------------------------------------------------------
/neosearch_frontend/drizzle.config.ts:
--------------------------------------------------------------------------------
 1 | import { config } from 'dotenv';
 2 | import { defineConfig } from 'drizzle-kit';
 3 | 
 4 | config({
 5 |   path: '.env.local',
 6 | });
 7 | 
 8 | export default defineConfig({
 9 |   schema: './lib/db/schema.ts',
10 |   out: './lib/db/migrations',
11 |   dialect: 'postgresql',
12 |   dbCredentials: {
13 |     // biome-ignore lint: Forbidden non-null assertion.
14 |     url: process.env.POSTGRES_URL!,
15 |   },
16 | });
17 | 


--------------------------------------------------------------------------------
/neosearch_frontend/hooks/use-auto-resume.ts:
--------------------------------------------------------------------------------
 1 | 'use client';
 2 | 
 3 | import { useEffect } from 'react';
 4 | import type { UIMessage } from 'ai';
 5 | import type { UseChatHelpers } from '@ai-sdk/react';
 6 | import type { DataPart } from '@/lib/types';
 7 | 
 8 | export interface UseAutoResumeParams {
 9 |   autoResume: boolean;
10 |   initialMessages: UIMessage[];
11 |   experimental_resume: UseChatHelpers['experimental_resume'];
12 |   data: UseChatHelpers['data'];
13 |   setMessages: UseChatHelpers['setMessages'];
14 | }
15 | 
16 | export function useAutoResume({
17 |   autoResume,
18 |   initialMessages,
19 |   experimental_resume,
20 |   data,
21 |   setMessages,
22 | }: UseAutoResumeParams) {
23 |   useEffect(() => {
24 |     if (!autoResume) return;
25 | 
26 |     const mostRecentMessage = initialMessages.at(-1);
27 | 
28 |     if (mostRecentMessage?.role === 'user') {
29 |       experimental_resume();
30 |     }
31 | 
32 |     // we intentionally run this once
33 |     // eslint-disable-next-line react-hooks/exhaustive-deps
34 |   }, []);
35 | 
36 |   useEffect(() => {
37 |     if (!data) return;
38 |     if (data.length === 0) return;
39 | 
40 |     const dataPart = data[0] as DataPart;
41 | 
42 |     if (dataPart.type === 'append-message') {
43 |       const message = JSON.parse(dataPart.message) as UIMessage;
44 |       setMessages([...initialMessages, message]);
45 |     }
46 |   }, [data, initialMessages, setMessages]);
47 | }
48 | 


--------------------------------------------------------------------------------
/neosearch_frontend/hooks/use-messages.tsx:
--------------------------------------------------------------------------------
 1 | import { useState, useEffect } from 'react';
 2 | import { useScrollToBottom } from './use-scroll-to-bottom';
 3 | import type { UseChatHelpers } from '@ai-sdk/react';
 4 | 
 5 | export function useMessages({
 6 |   chatId,
 7 |   status,
 8 | }: {
 9 |   chatId: string;
10 |   status: UseChatHelpers['status'];
11 | }) {
12 |   const {
13 |     containerRef,
14 |     endRef,
15 |     isAtBottom,
16 |     scrollToBottom,
17 |     onViewportEnter,
18 |     onViewportLeave,
19 |   } = useScrollToBottom();
20 | 
21 |   const [hasSentMessage, setHasSentMessage] = useState(false);
22 | 
23 |   useEffect(() => {
24 |     if (chatId) {
25 |       scrollToBottom('instant');
26 |       setHasSentMessage(false);
27 |     }
28 |   }, [chatId, scrollToBottom]);
29 | 
30 |   useEffect(() => {
31 |     if (status === 'submitted') {
32 |       setHasSentMessage(true);
33 |     }
34 |   }, [status]);
35 | 
36 |   return {
37 |     containerRef,
38 |     endRef,
39 |     isAtBottom,
40 |     scrollToBottom,
41 |     onViewportEnter,
42 |     onViewportLeave,
43 |     hasSentMessage,
44 |   };
45 | }
46 | 


--------------------------------------------------------------------------------
/neosearch_frontend/hooks/use-mobile.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from 'react';
 2 | 
 3 | const MOBILE_BREAKPOINT = 768;
 4 | 
 5 | export function useIsMobile() {
 6 |   const [isMobile, setIsMobile] = React.useState<boolean | undefined>(
 7 |     undefined,
 8 |   );
 9 | 
10 |   React.useEffect(() => {
11 |     const mql = window.matchMedia(`(max-width: ${MOBILE_BREAKPOINT - 1}px)`);
12 |     const onChange = () => {
13 |       setIsMobile(window.innerWidth < MOBILE_BREAKPOINT);
14 |     };
15 |     mql.addEventListener('change', onChange);
16 |     setIsMobile(window.innerWidth < MOBILE_BREAKPOINT);
17 |     return () => mql.removeEventListener('change', onChange);
18 |   }, []);
19 | 
20 |   return !!isMobile;
21 | }
22 | 


--------------------------------------------------------------------------------
/neosearch_frontend/hooks/use-scroll-to-bottom.tsx:
--------------------------------------------------------------------------------
 1 | import useSWR from 'swr';
 2 | import { useRef, useEffect, useCallback } from 'react';
 3 | 
 4 | type ScrollFlag = ScrollBehavior | false;
 5 | 
 6 | export function useScrollToBottom() {
 7 |   const containerRef = useRef<HTMLDivElement>(null);
 8 |   const endRef = useRef<HTMLDivElement>(null);
 9 | 
10 |   const { data: isAtBottom = false, mutate: setIsAtBottom } = useSWR(
11 |     'messages:is-at-bottom',
12 |     null,
13 |     { fallbackData: false },
14 |   );
15 | 
16 |   const { data: scrollBehavior = false, mutate: setScrollBehavior } =
17 |     useSWR<ScrollFlag>('messages:should-scroll', null, { fallbackData: false });
18 | 
19 |   useEffect(() => {
20 |     if (scrollBehavior) {
21 |       endRef.current?.scrollIntoView({ behavior: scrollBehavior });
22 |       setScrollBehavior(false);
23 |     }
24 |   }, [setScrollBehavior, scrollBehavior]);
25 | 
26 |   const scrollToBottom = useCallback(
27 |     (scrollBehavior: ScrollBehavior = 'smooth') => {
28 |       setScrollBehavior(scrollBehavior);
29 |     },
30 |     [setScrollBehavior],
31 |   );
32 | 
33 |   function onViewportEnter() {
34 |     setIsAtBottom(true);
35 |   }
36 | 
37 |   function onViewportLeave() {
38 |     setIsAtBottom(false);
39 |   }
40 | 
41 |   return {
42 |     containerRef,
43 |     endRef,
44 |     isAtBottom,
45 |     scrollToBottom,
46 |     onViewportEnter,
47 |     onViewportLeave,
48 |   };
49 | }
50 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/ai/entitlements.ts:
--------------------------------------------------------------------------------
 1 | import type { UserType } from '@/app/(auth)/auth';
 2 | import type { ChatModel } from './models';
 3 | 
 4 | interface Entitlements {
 5 |   maxMessagesPerDay: number;
 6 |   availableChatModelIds: Array<ChatModel['id']>;
 7 | }
 8 | 
 9 | export const entitlementsByUserType: Record<UserType, Entitlements> = {
10 |   /*
11 |    * For users without an account
12 |    */
13 |   guest: {
14 |     maxMessagesPerDay: 20,
15 |     availableChatModelIds: ['chat-model', 'chat-model-reasoning'],
16 |   },
17 | 
18 |   /*
19 |    * For users with an account
20 |    */
21 |   regular: {
22 |     maxMessagesPerDay: 100,
23 |     availableChatModelIds: ['chat-model', 'chat-model-reasoning'],
24 |   },
25 | 
26 |   /*
27 |    * TODO: For users with an account and a paid membership
28 |    */
29 | };
30 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/ai/models.ts:
--------------------------------------------------------------------------------
 1 | export const DEFAULT_CHAT_MODEL: string = 'chat-model';
 2 | 
 3 | export interface ChatModel {
 4 |   id: string;
 5 |   name: string;
 6 |   description: string;
 7 | }
 8 | 
 9 | export const chatModels: Array<ChatModel> = [
10 |   {
11 |     id: 'chat-model',
12 |     name: 'Chat model',
13 |     description: 'Primary model for all-purpose chat',
14 |   },
15 |   {
16 |     id: 'chat-model-reasoning',
17 |     name: 'Reasoning model',
18 |     description: 'Uses advanced reasoning',
19 |   },
20 | ];
21 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/ai/providers.ts:
--------------------------------------------------------------------------------
 1 | import {
 2 |   customProvider,
 3 |   extractReasoningMiddleware,
 4 |   wrapLanguageModel,
 5 | } from 'ai';
 6 | import { openai } from '@ai-sdk/openai';
 7 | import { isTestEnvironment } from '../constants';
 8 | import {
 9 |   artifactModel,
10 |   chatModel,
11 |   reasoningModel,
12 |   titleModel,
13 | } from './models.test';
14 | 
15 | 
16 | export const myProvider = isTestEnvironment
17 |   ? customProvider({
18 |       languageModels: {
19 |         'chat-model': chatModel,
20 |         'chat-model-reasoning': reasoningModel,
21 |         'title-model': titleModel,
22 |         'artifact-model': artifactModel,
23 |       },
24 |     })
25 |   : customProvider({
26 |       languageModels: {
27 |         'chat-model': openai('gpt-4o'),
28 |         'chat-model-reasoning': wrapLanguageModel({
29 |           model: openai('o3'),
30 |           middleware: extractReasoningMiddleware({ tagName: 'think' }),
31 |         }),
32 |         'title-model': openai('gpt-4o'),
33 |         'artifact-model': openai('gpt-4o'),
34 |       },
35 |       imageModels: {
36 |         'small-model': openai.image('dall-e-3'),
37 |       },
38 |     });
39 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/ai/tools/get-weather.ts:
--------------------------------------------------------------------------------
 1 | import { tool } from 'ai';
 2 | import { z } from 'zod';
 3 | 
 4 | export const getWeather = tool({
 5 |   description: 'Get the current weather at a location',
 6 |   parameters: z.object({
 7 |     latitude: z.number(),
 8 |     longitude: z.number(),
 9 |   }),
10 |   execute: async ({ latitude, longitude }) => {
11 |     const response = await fetch(
12 |       `https://api.open-meteo.com/v1/forecast?latitude=${latitude}&longitude=${longitude}&current=temperature_2m&hourly=temperature_2m&daily=sunrise,sunset&timezone=auto`,
13 |     );
14 | 
15 |     const weatherData = await response.json();
16 |     return weatherData;
17 |   },
18 | });
19 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/constants.ts:
--------------------------------------------------------------------------------
 1 | import { generateDummyPassword } from './db/utils';
 2 | 
 3 | export const isProductionEnvironment = process.env.NODE_ENV === 'production';
 4 | export const isDevelopmentEnvironment = process.env.NODE_ENV === 'development';
 5 | export const isTestEnvironment = Boolean(
 6 |   process.env.PLAYWRIGHT_TEST_BASE_URL ||
 7 |     process.env.PLAYWRIGHT ||
 8 |     process.env.CI_PLAYWRIGHT,
 9 | );
10 | 
11 | export const guestRegex = /^guest-\d+$/;
12 | 
13 | export const DUMMY_PASSWORD = generateDummyPassword();
14 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/db/migrate.ts:
--------------------------------------------------------------------------------
 1 | import { config } from 'dotenv';
 2 | import { drizzle } from 'drizzle-orm/postgres-js';
 3 | import { migrate } from 'drizzle-orm/postgres-js/migrator';
 4 | import postgres from 'postgres';
 5 | 
 6 | config({
 7 |   path: '.env',
 8 | });
 9 | 
10 | const runMigrate = async () => {
11 |   if (!process.env.POSTGRES_URL) {
12 |     throw new Error('POSTGRES_URL is not defined');
13 |   }
14 | 
15 |   const connection = postgres(process.env.POSTGRES_URL, { max: 1 });
16 |   const db = drizzle(connection);
17 | 
18 |   console.log('⏳ Running migrations...');
19 | 
20 |   const start = Date.now();
21 |   await migrate(db, { migrationsFolder: './lib/db/migrations' });
22 |   const end = Date.now();
23 | 
24 |   console.log('✅ Migrations completed in', end - start, 'ms');
25 |   process.exit(0);
26 | };
27 | 
28 | runMigrate().catch((err) => {
29 |   console.error('❌ Migration failed');
30 |   console.error(err);
31 |   process.exit(1);
32 | });
33 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/db/migrations/0000_keen_devos.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE IF NOT EXISTS "Chat" (
 2 | 	"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
 3 | 	"createdAt" timestamp NOT NULL,
 4 | 	"messages" json NOT NULL,
 5 | 	"userId" uuid NOT NULL
 6 | );
 7 | --> statement-breakpoint
 8 | CREATE TABLE IF NOT EXISTS "User" (
 9 | 	"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
10 | 	"email" varchar(64) NOT NULL,
11 | 	"password" varchar(64)
12 | );
13 | --> statement-breakpoint
14 | DO $$ BEGIN
15 |  ALTER TABLE "Chat" ADD CONSTRAINT "Chat_userId_User_id_fk" FOREIGN KEY ("userId") REFERENCES "public"."User"("id") ON DELETE no action ON UPDATE no action;
16 | EXCEPTION
17 |  WHEN duplicate_object THEN null;
18 | END $$;
19 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/db/migrations/0002_wandering_riptide.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE IF NOT EXISTS "Message" (
 2 | 	"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
 3 | 	"chatId" uuid NOT NULL,
 4 | 	"role" varchar NOT NULL,
 5 | 	"content" json NOT NULL,
 6 | 	"createdAt" timestamp NOT NULL
 7 | );
 8 | --> statement-breakpoint
 9 | CREATE TABLE IF NOT EXISTS "Vote" (
10 | 	"chatId" uuid NOT NULL,
11 | 	"messageId" uuid NOT NULL,
12 | 	"isUpvoted" boolean NOT NULL,
13 | 	CONSTRAINT "Vote_chatId_messageId_pk" PRIMARY KEY("chatId","messageId")
14 | );
15 | --> statement-breakpoint
16 | ALTER TABLE "Chat" ADD COLUMN "title" text NOT NULL;--> statement-breakpoint
17 | DO $$ BEGIN
18 |  ALTER TABLE "Message" ADD CONSTRAINT "Message_chatId_Chat_id_fk" FOREIGN KEY ("chatId") REFERENCES "public"."Chat"("id") ON DELETE no action ON UPDATE no action;
19 | EXCEPTION
20 |  WHEN duplicate_object THEN null;
21 | END $$;
22 | --> statement-breakpoint
23 | DO $$ BEGIN
24 |  ALTER TABLE "Vote" ADD CONSTRAINT "Vote_chatId_Chat_id_fk" FOREIGN KEY ("chatId") REFERENCES "public"."Chat"("id") ON DELETE no action ON UPDATE no action;
25 | EXCEPTION
26 |  WHEN duplicate_object THEN null;
27 | END $$;
28 | --> statement-breakpoint
29 | DO $$ BEGIN
30 |  ALTER TABLE "Vote" ADD CONSTRAINT "Vote_messageId_Message_id_fk" FOREIGN KEY ("messageId") REFERENCES "public"."Message"("id") ON DELETE no action ON UPDATE no action;
31 | EXCEPTION
32 |  WHEN duplicate_object THEN null;
33 | END $$;
34 | --> statement-breakpoint
35 | ALTER TABLE "Chat" DROP COLUMN IF EXISTS "messages";


--------------------------------------------------------------------------------
/neosearch_frontend/lib/db/migrations/0003_cloudy_glorian.sql:
--------------------------------------------------------------------------------
1 | ALTER TABLE "Chat" ADD COLUMN "visibility" varchar DEFAULT 'private' NOT NULL;


--------------------------------------------------------------------------------
/neosearch_frontend/lib/db/migrations/0004_odd_slayback.sql:
--------------------------------------------------------------------------------
1 | ALTER TABLE "Document" ADD COLUMN "text" varchar DEFAULT 'text' NOT NULL;


--------------------------------------------------------------------------------
/neosearch_frontend/lib/db/migrations/0005_wooden_whistler.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE IF NOT EXISTS "Message_v2" (
 2 | 	"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
 3 | 	"chatId" uuid NOT NULL,
 4 | 	"role" varchar NOT NULL,
 5 | 	"parts" json NOT NULL,
 6 | 	"attachments" json NOT NULL,
 7 | 	"createdAt" timestamp NOT NULL
 8 | );
 9 | --> statement-breakpoint
10 | CREATE TABLE IF NOT EXISTS "Vote_v2" (
11 | 	"chatId" uuid NOT NULL,
12 | 	"messageId" uuid NOT NULL,
13 | 	"isUpvoted" boolean NOT NULL,
14 | 	CONSTRAINT "Vote_v2_chatId_messageId_pk" PRIMARY KEY("chatId","messageId")
15 | );
16 | --> statement-breakpoint
17 | DO $$ BEGIN
18 |  ALTER TABLE "Message_v2" ADD CONSTRAINT "Message_v2_chatId_Chat_id_fk" FOREIGN KEY ("chatId") REFERENCES "public"."Chat"("id") ON DELETE no action ON UPDATE no action;
19 | EXCEPTION
20 |  WHEN duplicate_object THEN null;
21 | END $$;
22 | --> statement-breakpoint
23 | DO $$ BEGIN
24 |  ALTER TABLE "Vote_v2" ADD CONSTRAINT "Vote_v2_chatId_Chat_id_fk" FOREIGN KEY ("chatId") REFERENCES "public"."Chat"("id") ON DELETE no action ON UPDATE no action;
25 | EXCEPTION
26 |  WHEN duplicate_object THEN null;
27 | END $$;
28 | --> statement-breakpoint
29 | DO $$ BEGIN
30 |  ALTER TABLE "Vote_v2" ADD CONSTRAINT "Vote_v2_messageId_Message_v2_id_fk" FOREIGN KEY ("messageId") REFERENCES "public"."Message_v2"("id") ON DELETE no action ON UPDATE no action;
31 | EXCEPTION
32 |  WHEN duplicate_object THEN null;
33 | END $$;
34 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/db/migrations/0006_marvelous_frog_thor.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE IF NOT EXISTS "Stream" (
 2 | 	"id" uuid DEFAULT gen_random_uuid() NOT NULL,
 3 | 	"chatId" uuid NOT NULL,
 4 | 	"createdAt" timestamp NOT NULL,
 5 | 	CONSTRAINT "Stream_id_pk" PRIMARY KEY("id")
 6 | );
 7 | --> statement-breakpoint
 8 | DO $$ BEGIN
 9 |  ALTER TABLE "Stream" ADD CONSTRAINT "Stream_chatId_Chat_id_fk" FOREIGN KEY ("chatId") REFERENCES "public"."Chat"("id") ON DELETE no action ON UPDATE no action;
10 | EXCEPTION
11 |  WHEN duplicate_object THEN null;
12 | END $$;
13 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/db/migrations/meta/_journal.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "7",
 3 |   "dialect": "postgresql",
 4 |   "entries": [
 5 |     {
 6 |       "idx": 0,
 7 |       "version": "7",
 8 |       "when": 1747455675644,
 9 |       "tag": "0000_curious_darwin",
10 |       "breakpoints": true
11 |     }
12 |   ]
13 | }


--------------------------------------------------------------------------------
/neosearch_frontend/lib/db/utils.ts:
--------------------------------------------------------------------------------
 1 | import { generateId } from 'ai';
 2 | import { genSaltSync, hashSync } from 'bcrypt-ts';
 3 | 
 4 | export function generateHashedPassword(password: string) {
 5 |   const salt = genSaltSync(10);
 6 |   const hash = hashSync(password, salt);
 7 | 
 8 |   return hash;
 9 | }
10 | 
11 | export function generateDummyPassword() {
12 |   const password = generateId(12);
13 |   const hashedPassword = generateHashedPassword(password);
14 | 
15 |   return hashedPassword;
16 | }
17 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/editor/react-renderer.tsx:
--------------------------------------------------------------------------------
 1 | import { createRoot } from 'react-dom/client';
 2 | 
 3 | export class ReactRenderer {
 4 |   static render(component: React.ReactElement, dom: HTMLElement) {
 5 |     const root = createRoot(dom);
 6 |     root.render(component);
 7 | 
 8 |     return {
 9 |       destroy: () => root.unmount(),
10 |     };
11 |   }
12 | }
13 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/hooks/use-copy-to-clipboard.ts:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import { useState } from 'react'
 4 | 
 5 | export interface useCopyToClipboardProps {
 6 |   timeout?: number
 7 | }
 8 | 
 9 | export function useCopyToClipboard({
10 |   timeout = 2000
11 | }: useCopyToClipboardProps) {
12 |   const [isCopied, setIsCopied] = useState<Boolean>(false)
13 | 
14 |   const copyToClipboard = (value: string) => {
15 |     if (typeof window === 'undefined' || !navigator.clipboard?.writeText) {
16 |       return
17 |     }
18 | 
19 |     if (!value) {
20 |       return
21 |     }
22 | 
23 |     navigator.clipboard.writeText(value).then(() => {
24 |       setIsCopied(true)
25 | 
26 |       setTimeout(() => {
27 |         setIsCopied(false)
28 |       }, timeout)
29 |     })
30 |   }
31 | 
32 |   return { isCopied, copyToClipboard }
33 | }
34 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/schema/related.tsx:
--------------------------------------------------------------------------------
 1 | import { DeepPartial } from 'ai'
 2 | import { z } from 'zod'
 3 | 
 4 | 
 5 | export const relatedSchema = z.object({
 6 |   items: z
 7 |     .array(
 8 |       z.object({
 9 |         query: z.string()
10 |       })
11 |     )
12 |     .length(3)
13 | })
14 | export type PartialRelated = DeepPartial<typeof relatedSchema>
15 | 
16 | export type Related = z.infer<typeof relatedSchema>
17 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/schema/retrieve.tsx:
--------------------------------------------------------------------------------
1 | import { DeepPartial } from 'ai'
2 | import { z } from 'zod'
3 | 
4 | export const retrieveSchema = z.object({
5 |   url: z.string().describe('The url to retrieve')
6 | })
7 | 
8 | export type PartialInquiry = DeepPartial<typeof retrieveSchema>
9 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/schema/search.tsx:
--------------------------------------------------------------------------------
 1 | import { DeepPartial } from 'ai'
 2 | import { z } from 'zod'
 3 | 
 4 | 
 5 | export const searchSchema = z.object({
 6 |   query: z.string().describe('The query to search for'),
 7 |   max_results: z
 8 |     .number()
 9 |     .describe('The maximum number of results to return. default is 20'),
10 |   search_depth: z
11 |     .string()
12 |     .describe(
13 |       'The depth of the search. Allowed values are "basic" or "advanced"'
14 |     ),
15 |   include_domains: z
16 |     .array(z.string())
17 |     .describe(
18 |       'A list of domains to specifically include in the search results. Default is None, which includes all domains.'
19 |     ),
20 |   exclude_domains: z
21 |     .array(z.string())
22 |     .describe(
23 |       "A list of domains to specifically exclude from the search results. Default is None, which doesn't exclude any domains."
24 |     )
25 | })
26 | 
27 | export type PartialInquiry = DeepPartial<typeof searchSchema>
28 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/search_constants/index.ts:
--------------------------------------------------------------------------------
1 | export const CHAT_ID = 'search' as const
2 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/search_utils/context-window.ts:
--------------------------------------------------------------------------------
 1 | import { CoreMessage } from 'ai'
 2 | import { Model } from '../types/models'
 3 | 
 4 | 
 5 | const DEFAULT_CONTEXT_WINDOW = 128_000
 6 | const DEFAULT_RESERVE_TOKENS = 30_000
 7 | 
 8 | export function getMaxAllowedTokens(model: Model): number {
 9 |   let contextWindow: number
10 |   let reserveTokens: number
11 | 
12 |   if (model.id.includes('deepseek')) {
13 |     contextWindow = 64_000
14 |     reserveTokens = 27_000
15 |   } else if (model.id.includes('claude')) {
16 |     contextWindow = 200_000
17 |     reserveTokens = 40_000
18 |   } else {
19 |     contextWindow = DEFAULT_CONTEXT_WINDOW
20 |     reserveTokens = DEFAULT_RESERVE_TOKENS
21 |   }
22 | 
23 |   return contextWindow - reserveTokens
24 | }
25 | 
26 | export function truncateMessages(
27 |   messages: CoreMessage[],
28 |   maxTokens: number
29 | ): CoreMessage[] {
30 |   let totalTokens = 0
31 |   const tempMessages: CoreMessage[] = []
32 | 
33 |   for (let i = messages.length - 1; i >= 0; i--) {
34 |     const message = messages[i]
35 |     const messageTokens = message.content?.length || 0
36 | 
37 |     if (totalTokens + messageTokens <= maxTokens) {
38 |       tempMessages.push(message)
39 |       totalTokens += messageTokens
40 |     } else {
41 |       break
42 |     }
43 |   }
44 | 
45 |   const orderedMessages = tempMessages.reverse()
46 | 
47 |   while (orderedMessages.length > 0 && orderedMessages[0].role !== 'user') {
48 |     orderedMessages.shift()
49 |   }
50 | 
51 |   return orderedMessages
52 | }
53 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/search_utils/cookies.ts:
--------------------------------------------------------------------------------
 1 | export function setCookie(name: string, value: string, days = 30) {
 2 |   const date = new Date()
 3 |   date.setTime(date.getTime() + days * 24 * 60 * 60 * 1000)
 4 |   const expires = `expires=${date.toUTCString()}`
 5 |   document.cookie = `${name}=${value};${expires};path=/`
 6 | }
 7 | 
 8 | export function getCookie(name: string): string | null {
 9 |   const cookies = document.cookie.split(';')
10 |   for (const cookie of cookies) {
11 |     const [cookieName, cookieValue] = cookie.trim().split('=')
12 |     if (cookieName === name) {
13 |       return cookieValue
14 |     }
15 |   }
16 |   return null
17 | }
18 | 
19 | export function deleteCookie(name: string) {
20 |   document.cookie = `${name}=;expires=Thu, 01 Jan 1970 00:00:00 GMT;path=/`
21 | }
22 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/streaming/types.ts:
--------------------------------------------------------------------------------
 1 | import { Message } from 'ai'
 2 | import { Model } from '../types/models'
 3 | 
 4 | export interface BaseStreamConfig {
 5 |   messages: Message[]
 6 |   model: Model
 7 |   chatId: string
 8 |   searchMode: boolean
 9 | }
10 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/tools/video-search.ts:
--------------------------------------------------------------------------------
 1 | import { tool } from 'ai'
 2 | import { searchSchema } from '@/lib/schema/search'
 3 | 
 4 | 
 5 | export const videoSearchTool = tool({
 6 |   description: 'Search for videos from YouTube',
 7 |   parameters: searchSchema,
 8 |   execute: async ({ query }) => {
 9 |     try {
10 |       const response = await fetch('https://google.serper.dev/videos', {
11 |         method: 'POST',
12 |         headers: {
13 |           'X-API-KEY': process.env.SERPER_API_KEY || '',
14 |           'Content-Type': 'application/json'
15 |         },
16 |         body: JSON.stringify({ q: query })
17 |       })
18 | 
19 |       if (!response.ok) {
20 |         throw new Error('Network response was not ok')
21 |       }
22 | 
23 |       return await response.json()
24 |     } catch (error) {
25 |       console.error('Video Search API error:', error)
26 |       return null
27 |     }
28 |   }
29 | })
30 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/types.ts:
--------------------------------------------------------------------------------
1 | export type DataPart = { type: 'append-message'; message: string };
2 | 


--------------------------------------------------------------------------------
/neosearch_frontend/lib/types/models.ts:
--------------------------------------------------------------------------------
 1 | export interface Model {
 2 |   id: string
 3 |   name: string
 4 |   provider: string
 5 |   providerId: string
 6 |   enabled: boolean
 7 |   toolCallType: 'native' | 'manual'
 8 |   toolCallModel?: string
 9 | }
10 | 


--------------------------------------------------------------------------------
/neosearch_frontend/middleware.ts:
--------------------------------------------------------------------------------
 1 | import type { NextRequest } from 'next/server'
 2 | import { NextResponse } from 'next/server'
 3 | 
 4 | export function middleware(request: NextRequest) {
 5 |   // Create a response
 6 |   const response = NextResponse.next()
 7 | 
 8 |   // Get the protocol from X-Forwarded-Proto header or request protocol
 9 |   const protocol =
10 |     request.headers.get('x-forwarded-proto') || request.nextUrl.protocol
11 | 
12 |   // Get the host from X-Forwarded-Host header or request host
13 |   const host =
14 |     request.headers.get('x-forwarded-host') || request.headers.get('host') || ''
15 | 
16 |   // Construct the base URL - ensure protocol has :// format
17 |   const baseUrl = `${protocol}${protocol.endsWith(':') ? '//' : '://'}${host}`
18 | 
19 |   // Add request information to response headers
20 |   response.headers.set('x-url', request.url)
21 |   response.headers.set('x-host', host)
22 |   response.headers.set('x-protocol', protocol)
23 |   response.headers.set('x-base-url', baseUrl)
24 | 
25 |   return response
26 | }
27 | 


--------------------------------------------------------------------------------
/neosearch_frontend/next-env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="next" />
2 | /// <reference types="next/image-types/global" />
3 | 
4 | // NOTE: This file should not be edited
5 | // see https://nextjs.org/docs/app/api-reference/config/typescript for more information.
6 | 


--------------------------------------------------------------------------------
/neosearch_frontend/next.config.ts:
--------------------------------------------------------------------------------
 1 | import type { NextConfig } from 'next';
 2 | 
 3 | const nextConfig: NextConfig = {
 4 |   /* config options here */
 5 |   experimental: {
 6 |     ppr: false,
 7 |   },
 8 |   images: {
 9 |     remotePatterns: [
10 |       {
11 |         hostname: 'avatar.vercel.sh',
12 |       },
13 |     ],
14 |   },
15 | };
16 | 
17 | export default nextConfig;
18 | 


--------------------------------------------------------------------------------
/neosearch_frontend/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('postcss-load-config').Config} */
2 | const config = {
3 |   plugins: {
4 |     tailwindcss: {},
5 |   },
6 | };
7 | 
8 | export default config;
9 | 


--------------------------------------------------------------------------------
/neosearch_frontend/public/fonts/geist-mono.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_frontend/public/fonts/geist-mono.woff2


--------------------------------------------------------------------------------
/neosearch_frontend/public/fonts/geist.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_frontend/public/fonts/geist.woff2


--------------------------------------------------------------------------------
/neosearch_frontend/public/images/demo-thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_frontend/public/images/demo-thumbnail.png


--------------------------------------------------------------------------------
/neosearch_frontend/public/images/placeholder-image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/neosearch_frontend/public/images/placeholder-image.png


--------------------------------------------------------------------------------
/neosearch_frontend/public/providers/logos/anthropic.svg:
--------------------------------------------------------------------------------
1 | <svg width="100" height="100" viewBox="0 0 100 100" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <path d="M54.8717 28L72.392 71.945H82L64.4796 28H54.8717Z" fill="#181818"/>
3 | <path d="M34.5457 54.5553L40.5406 39.1118L46.5355 54.5553H34.5457ZM35.5176 28L18 71.945H27.7948L31.3774 62.7165H49.7044L53.2864 71.945H63.0812L45.5636 28H35.5176Z" fill="#181818"/>
4 | </svg>
5 | 


--------------------------------------------------------------------------------
/neosearch_frontend/public/providers/logos/fireworks.svg:
--------------------------------------------------------------------------------
1 | <svg width="162" height="162" viewBox="0 0 162 162" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <path fill-rule="evenodd" clip-rule="evenodd" d="M97.9839 46L81.3322 85.9032L64.6648 46H53.9724L72.2409 89.621C73.7561 93.2606 77.3098 95.6115 81.2697 95.6115C85.2296 95.6115 88.7755 93.2606 90.2985 89.6366L108.676 46H97.9839ZM105.099 106.796L135.56 75.9997L131.405 66.1976L98.1323 99.8994C95.3518 102.719 94.5552 106.874 96.0938 110.514C97.6246 114.122 101.163 116.457 105.107 116.457L105.123 116.473L152.68 116.356L148.525 106.554L105.107 106.796H105.099ZM27.1204 75.9762L31.2755 66.1742L64.5477 99.876C67.3282 102.688 68.1326 106.858 66.5862 110.49C65.0554 114.107 61.5016 116.434 57.573 116.434L10.0156 116.325L10 116.34L14.1551 106.538L57.573 106.78L27.1204 75.9762Z" fill="black"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/neosearch_frontend/public/providers/logos/groq.svg:
--------------------------------------------------------------------------------
 1 | <svg width="100" height="100" viewBox="0 0 100 100" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <rect width="100" height="100" fill="#F55036"/>
 3 | <g clip-path="url(#clip0_4_12)">
 4 | <path d="M50.0502 10.0009C34.9224 9.87911 22.5475 22.0105 22.4013 37.1382C22.2551 52.266 34.411 64.641 49.5386 64.7872C49.7092 64.7872 49.8796 64.7872 50.0502 64.7872H59.0636V54.5314H50.0502C40.5984 54.6532 32.8518 47.0772 32.73 37.6254C32.6082 28.1737 40.1842 20.4271 49.636 20.3053C49.7578 20.3053 49.904 20.3053 50.0258 20.3053C59.4532 20.3053 67.1754 27.9788 67.1754 37.4306V62.6434C67.1754 72.0222 59.5508 79.6468 50.1964 79.7686C45.714 79.72 41.451 77.9416 38.2842 74.7504L31.0248 81.9854C36.0674 87.0524 42.8882 89.927 50.0258 90H50.3912C65.324 89.7808 77.3336 77.6738 77.4068 62.7408V36.7242C77.0414 21.8644 64.8856 10.0253 50.0502 10.0009Z" fill="white"/>
 5 | </g>
 6 | <defs>
 7 | <clipPath id="clip0_4_12">
 8 | <rect width="80" height="80" fill="white" transform="translate(10 10)"/>
 9 | </clipPath>
10 | </defs>
11 | </svg>
12 | 


--------------------------------------------------------------------------------
/neosearch_frontend/public/providers/logos/openai-compatible.svg:
--------------------------------------------------------------------------------
1 | <svg width="100" height="100" viewBox="0 0 100 100" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <circle cx="50" cy="50" r="32" fill="black"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/neosearch_frontend/public/providers/logos/xai.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <svg width="727.27" height="778.68" version="1.1" viewBox="0 0 727.27 778.68" xmlns="http://www.w3.org/2000/svg">
3 |   <polygon transform="translate(-134,-113.32)" points="508.67 574.07 761.27 213.32 639.19 213.32 447.64 486.9"/>
4 |   <polygon transform="translate(-134,-113.32)" points="356.08 792 417.12 704.83 356.08 617.66 234 792"/>
5 |   <polygon transform="translate(-134,-113.32)" points="508.67 792 630.75 792 356.08 399.72 234 399.72"/>
6 |   <polygon transform="translate(-134,-113.32)" points="761.27 256.91 661.27 399.72 671.27 792 751.27 792"/>
7 | </svg>
8 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/answer-section.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import { Text } from 'lucide-react'
 4 | import { CollapsibleMessage } from './collapsible-message'
 5 | import { DefaultSkeleton } from './default-skeleton'
 6 | import { BotMessage } from './message'
 7 | import { MessageActions } from './message-actions'
 8 | 
 9 | export type AnswerSectionProps = {
10 |   content: string
11 |   isOpen: boolean
12 |   onOpenChange: (open: boolean) => void
13 |   chatId?: string
14 | }
15 | 
16 | export function AnswerSection({
17 |   content,
18 |   isOpen,
19 |   onOpenChange,
20 |   chatId
21 | }: AnswerSectionProps) {
22 |   const enableShare = process.env.NEXT_PUBLIC_ENABLE_SHARE === 'true'
23 | 
24 |   const header = (
25 |     <div className="flex items-center gap-1">
26 |       <Text size={16} />
27 |       <div>Answer</div>
28 |     </div>
29 |   )
30 |   const message = content ? (
31 |     <div className="flex flex-col gap-1">
32 |       <BotMessage message={content} />
33 |       <MessageActions
34 |         message={content}
35 |         chatId={chatId}
36 |         enableShare={enableShare}
37 |       />
38 |     </div>
39 |   ) : (
40 |     <DefaultSkeleton />
41 |   )
42 |   return (
43 |     <CollapsibleMessage
44 |       role="assistant"
45 |       isCollapsible={false}
46 |       header={header}
47 |       isOpen={isOpen}
48 |       onOpenChange={onOpenChange}
49 |       showBorder={false}
50 |     >
51 |       {message}
52 |     </CollapsibleMessage>
53 |   )
54 | }
55 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/custom-link.tsx:
--------------------------------------------------------------------------------
 1 | import { cn } from '@/lib/search_utils'
 2 | import { AnchorHTMLAttributes, DetailedHTMLProps, ReactNode } from 'react'
 3 | 
 4 | 
 5 | type CustomLinkProps = Omit<
 6 |   DetailedHTMLProps<AnchorHTMLAttributes<HTMLAnchorElement>, HTMLAnchorElement>,
 7 |   'ref'
 8 | > & {
 9 |   children: ReactNode
10 | }
11 | 
12 | 
13 | export function Citing({
14 |   href,
15 |   children,
16 |   className,
17 |   ...props
18 | }: CustomLinkProps) {
19 |   const childrenText = children?.toString() || ''
20 |   const isNumber = /^\d+$/.test(childrenText)
21 |   const linkClasses = cn(
22 |     isNumber
23 |       ? 'text-[10px] bg-muted text-muted-froreground rounded-full w-4 h-4 px-0.5 inline-flex items-center justify-center hover:bg-muted/50 duration-200 no-underline -translate-y-0.5'
24 |       : 'hover:underline inline-flex items-center gap-1.5',
25 |     className
26 |   )
27 | 
28 |   return (
29 |     <a
30 |       href={href}
31 |       target="_blank"
32 |       rel="noopener noreferrer"
33 |       className={linkClasses}
34 |       {...props}
35 |     >
36 |       {children}
37 |     </a>
38 |   )
39 | }
40 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/default-skeleton.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import { Skeleton } from './ui/skeleton'
 4 | 
 5 | export const DefaultSkeleton = () => {
 6 |   return (
 7 |     <div className="flex flex-col gap-2 pb-4 pt-2">
 8 |       <Skeleton className="h-6 w-48" />
 9 |       <Skeleton className="w-full h-6" />
10 |     </div>
11 |   )
12 | }
13 | 
14 | export function SearchSkeleton() {
15 |   return (
16 |     <div className="flex flex-wrap gap-2 pb-0.5">
17 |       {[...Array(4)].map((_, index) => (
18 |         <div
19 |           key={index}
20 |           className="w-[calc(50%-0.5rem)] md:w-[calc(25%-0.5rem)]"
21 |         >
22 |           <Skeleton className="h-20 w-full" />
23 |         </div>
24 |       ))}
25 |     </div>
26 |   )
27 | }
28 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/footer.tsx:
--------------------------------------------------------------------------------
 1 | import Link from 'next/link'
 2 | import React from 'react'
 3 | import { SiDiscord, SiGithub, SiX } from 'react-icons/si'
 4 | import { Button } from './ui/button'
 5 | 
 6 | const Footer: React.FC = () => {
 7 |   return (
 8 |     <footer className="w-fit p-1 md:p-2 fixed bottom-0 right-0 hidden lg:block">
 9 |       <div className="flex justify-end">
10 |         <Button
11 |           variant={'ghost'}
12 |           size={'icon'}
13 |           className="text-muted-foreground/50"
14 |         >
15 |           <Link href="https://discord.gg/zRxaseCuGq" target="_blank">
16 |             <SiDiscord size={18} />
17 |           </Link>
18 |         </Button>
19 |         <Button
20 |           variant={'ghost'}
21 |           size={'icon'}
22 |           className="text-muted-foreground/50"
23 |         >
24 |           <Link href="https://x.com/morphic_ai" target="_blank">
25 |             <SiX size={18} />
26 |           </Link>
27 |         </Button>
28 |         <Button
29 |           variant={'ghost'}
30 |           size={'icon'}
31 |           className="text-muted-foreground/50"
32 |         >
33 |           <Link href="https://git.new/morphic" target="_blank">
34 |             <SiGithub size={18} />
35 |           </Link>
36 |         </Button>
37 |       </div>
38 |     </footer>
39 |   )
40 | }
41 | 
42 | export default Footer
43 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/header.tsx:
--------------------------------------------------------------------------------
 1 | import { cn } from '@/lib/search_utils'
 2 | import React from 'react'
 3 | import HistoryContainer from './history-container'
 4 | import { ModeToggle } from './mode-toggle'
 5 | import { IconLogo } from './ui/icons'
 6 | 
 7 | export const Header: React.FC = async () => {
 8 |   return (
 9 |     <header className="fixed w-full p-2 flex justify-between items-center z-10 backdrop-blur lg:backdrop-blur-none bg-background/80 lg:bg-transparent">
10 |       <div>
11 |         <a href="/">
12 |           <IconLogo className={cn('w-5 h-5')} />
13 |           <span className="sr-only">Morphic</span>
14 |         </a>
15 |       </div>
16 |       <div className="flex gap-0.5">
17 |         <ModeToggle />
18 |         <HistoryContainer />
19 |       </div>
20 |     </header>
21 |   )
22 | }
23 | 
24 | export default Header
25 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/history-container.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react'
 2 | import { History } from './history'
 3 | import { HistoryList } from './history-list'
 4 | 
 5 | const HistoryContainer: React.FC = async () => {
 6 |   const enableSaveChatHistory = process.env.ENABLE_SAVE_CHAT_HISTORY === 'true'
 7 |   if (!enableSaveChatHistory) {
 8 |     return null
 9 |   }
10 | 
11 |   return (
12 |     <div>
13 |       <History>
14 |         <HistoryList userId="anonymous" />
15 |       </History>
16 |     </div>
17 |   )
18 | }
19 | 
20 | export default HistoryContainer
21 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/history-list.tsx:
--------------------------------------------------------------------------------
 1 | import React, { cache } from 'react'
 2 | import HistoryItem from './history-item'
 3 | import { Chat } from '@/lib/types'
 4 | import { getChats } from '@/lib/actions/chat'
 5 | import { ClearHistory } from './clear-history'
 6 | 
 7 | type HistoryListProps = {
 8 |   userId?: string
 9 | }
10 | 
11 | const loadChats = cache(async (userId?: string) => {
12 |   return await getChats(userId)
13 | })
14 | 
15 | // Start of Selection
16 | export async function HistoryList({ userId }: HistoryListProps) {
17 |   const chats = await loadChats(userId)
18 | 
19 |   return (
20 |     <div className="flex flex-col flex-1 space-y-3 h-full">
21 |       <div className="flex flex-col space-y-0.5 flex-1 overflow-y-auto">
22 |         {!chats?.length ? (
23 |           <div className="text-foreground/30 text-sm text-center py-4">
24 |             No search history
25 |           </div>
26 |         ) : (
27 |           chats?.map(
28 |             (chat: Chat) => chat && <HistoryItem key={chat.id} chat={chat} />
29 |           )
30 |         )}
31 |       </div>
32 |       <div className="mt-auto">
33 |         <ClearHistory empty={!chats?.length} />
34 |       </div>
35 |     </div>
36 |   )
37 | }
38 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/history-skeleton.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react'
 2 | import { Skeleton } from './ui/skeleton'
 3 | 
 4 | export function HistorySkeleton() {
 5 |   return (
 6 |     <div className="flex flex-col flex-1 space-y-1.5 overflow-auto">
 7 |       {Array.from({ length: 3 }).map((_, i) => (
 8 |         <Skeleton key={i} className="w-full h-12 rounded" />
 9 |       ))}
10 |     </div>
11 |   )
12 | }
13 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/message-actions.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import { CHAT_ID } from '@/lib/search_constants'
 4 | import { cn } from '@/lib/search_utils'
 5 | import { useChat } from 'ai/react'
 6 | import { Copy } from 'lucide-react'
 7 | import { toast } from 'sonner'
 8 | import { ChatShare } from './chat-share'
 9 | import { Button } from './ui/button'
10 | 
11 | interface MessageActionsProps {
12 |   message: string
13 |   chatId?: string
14 |   enableShare?: boolean
15 |   className?: string
16 | }
17 | 
18 | export function MessageActions({
19 |   message,
20 |   chatId,
21 |   enableShare,
22 |   className
23 | }: MessageActionsProps) {
24 |   const { isLoading } = useChat({
25 |     id: CHAT_ID
26 |   })
27 |   async function handleCopy() {
28 |     await navigator.clipboard.writeText(message)
29 |     toast.success('Message copied to clipboard')
30 |   }
31 | 
32 |   if (isLoading) {
33 |     return <div className="size-10" />
34 |   }
35 | 
36 |   return (
37 |     <div className={cn('flex items-center gap-0.5 self-end', className)}>
38 |       <Button
39 |         variant="ghost"
40 |         size="icon"
41 |         onClick={handleCopy}
42 |         className="rounded-full"
43 |       >
44 |         <Copy size={14} />
45 |       </Button>
46 |       {enableShare && chatId && <ChatShare chatId={chatId} />}
47 |     </div>
48 |   )
49 | }
50 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/mode-toggle.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import * as React from 'react'
 4 | import { Moon, Sun } from 'lucide-react'
 5 | import { useTheme } from 'next-themes'
 6 | 
 7 | import { Button } from '@/search_components/ui/button'
 8 | import {
 9 |   DropdownMenu,
10 |   DropdownMenuContent,
11 |   DropdownMenuItem,
12 |   DropdownMenuTrigger
13 | } from '@/search_components/ui/dropdown-menu'
14 | 
15 | export function ModeToggle() {
16 |   const { setTheme } = useTheme()
17 | 
18 |   return (
19 |     <DropdownMenu>
20 |       <DropdownMenuTrigger asChild>
21 |         <Button variant="ghost" size="icon">
22 |           <Sun className="h-[1.2rem] w-[1.2rem] rotate-0 scale-100 transition-all dark:-rotate-90 dark:scale-0" />
23 |           <Moon className="absolute h-[1.2rem] w-[1.2rem] rotate-90 scale-0 transition-all dark:rotate-0 dark:scale-100" />
24 |           <span className="sr-only">Toggle theme</span>
25 |         </Button>
26 |       </DropdownMenuTrigger>
27 |       <DropdownMenuContent align="end">
28 |         <DropdownMenuItem onClick={() => setTheme('light')}>
29 |           Light
30 |         </DropdownMenuItem>
31 |         <DropdownMenuItem onClick={() => setTheme('dark')}>
32 |           Dark
33 |         </DropdownMenuItem>
34 |         <DropdownMenuItem onClick={() => setTheme('system')}>
35 |           System
36 |         </DropdownMenuItem>
37 |       </DropdownMenuContent>
38 |     </DropdownMenu>
39 |   )
40 | }
41 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/retrieve-section.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import { Section, ToolArgsSection } from '@/search_components/section'
 4 | import { SearchResults } from '@/search_components/search-results'
 5 | import { SearchResults as SearchResultsType } from '@/lib/types'
 6 | import { ToolInvocation } from 'ai'
 7 | import { DefaultSkeleton } from './default-skeleton'
 8 | import { CollapsibleMessage } from './collapsible-message'
 9 | 
10 | interface RetrieveSectionProps {
11 |   tool: ToolInvocation
12 |   isOpen: boolean
13 |   onOpenChange: (open: boolean) => void
14 | }
15 | 
16 | export function RetrieveSection({
17 |   tool,
18 |   isOpen,
19 |   onOpenChange
20 | }: RetrieveSectionProps) {
21 |   const isLoading = tool.state === 'call'
22 |   const data: SearchResultsType =
23 |     tool.state === 'result' ? tool.result : undefined
24 |   const url = tool.args.url as string | undefined
25 | 
26 |   const header = <ToolArgsSection tool="retrieve">{url}</ToolArgsSection>
27 | 
28 |   return (
29 |     <CollapsibleMessage
30 |       role="assistant"
31 |       isCollapsible={true}
32 |       header={header}
33 |       isOpen={isOpen}
34 |       onOpenChange={onOpenChange}
35 |     >
36 |       {!isLoading && data ? (
37 |         <Section title="Sources">
38 |           <SearchResults results={data.results} />
39 |         </Section>
40 |       ) : (
41 |         <DefaultSkeleton />
42 |       )}
43 |     </CollapsibleMessage>
44 |   )
45 | }
46 | 
47 | export default RetrieveSection
48 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/search-mode-toggle.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import { cn } from '@/lib/search_utils'
 4 | import { getCookie, setCookie } from '@/lib/search_utils/cookies'
 5 | import { Globe } from 'lucide-react'
 6 | import { useEffect, useState } from 'react'
 7 | import { Toggle } from './ui/toggle'
 8 | 
 9 | 
10 | export function SearchModeToggle() {
11 |   const [isSearchMode, setIsSearchMode] = useState(true)
12 | 
13 |   useEffect(() => {
14 |     const savedMode = getCookie('search-mode')
15 |     if (savedMode !== null) {
16 |       setIsSearchMode(savedMode === 'true')
17 |     }
18 |   }, [])
19 | 
20 |   const handleSearchModeChange = (pressed: boolean) => {
21 |     setIsSearchMode(pressed)
22 |     setCookie('search-mode', pressed.toString())
23 |     console.log('Search mode:', pressed ? 'on' : 'off')
24 |   }
25 | 
26 |   return (
27 |     <Toggle
28 |       aria-label="Toggle search mode"
29 |       pressed={isSearchMode}
30 |       onPressedChange={handleSearchModeChange}
31 |       variant="outline"
32 |       className={cn(
33 |         'gap-1 px-3 border border-input text-muted-foreground bg-background',
34 |         'data-[state=on]:bg-accent-blue',
35 |         'data-[state=on]:text-accent-blue-foreground',
36 |         'data-[state=on]:border-accent-blue-border',
37 |         'hover:bg-accent hover:text-accent-foreground rounded-full'
38 |       )}
39 |     >
40 |       <Globe className="size-4" />
41 |       <span className="text-xs">Search</span>
42 |     </Toggle>
43 |   )
44 | }
45 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/sidebar.tsx:
--------------------------------------------------------------------------------
 1 | import HistoryContainer from './history-container'
 2 | 
 3 | export async function Sidebar() {
 4 |   return (
 5 |     <div className="h-screen p-2 fixed top-0 right-0 flex-col justify-center pb-24 hidden lg:flex">
 6 |       <HistoryContainer />
 7 |     </div>
 8 |   )
 9 | }
10 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/theme-provider.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import * as React from 'react'
 4 | import { ThemeProvider as NextThemesProvider } from 'next-themes'
 5 | import { type ThemeProviderProps } from 'next-themes/dist/types'
 6 | 
 7 | export function ThemeProvider({ children, ...props }: ThemeProviderProps) {
 8 |   return <NextThemesProvider {...props}>{children}</NextThemesProvider>
 9 | }
10 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/tool-badge.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react'
 2 | import { Link, Search, Video } from 'lucide-react'
 3 | import { Badge } from './ui/badge'
 4 | 
 5 | type ToolBadgeProps = {
 6 |   tool: string
 7 |   children: React.ReactNode
 8 |   className?: string
 9 | }
10 | 
11 | export const ToolBadge: React.FC<ToolBadgeProps> = ({
12 |   tool,
13 |   children,
14 |   className
15 | }) => {
16 |   const icon: Record<string, React.ReactNode> = {
17 |     search: <Search size={14} />,
18 |     retrieve: <Link size={14} />,
19 |     video_search: <Video size={14} />
20 |   }
21 | 
22 |   return (
23 |     <Badge className={className} variant={'secondary'}>
24 |       {icon[tool]}
25 |       <span className="ml-1">{children}</span>
26 |     </Badge>
27 |   )
28 | }
29 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/tool-section.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import { ToolInvocation } from 'ai'
 4 | import { SearchSection } from './search-section'
 5 | import { VideoSearchSection } from './video-search-section'
 6 | import RetrieveSection from './retrieve-section'
 7 | 
 8 | interface ToolSectionProps {
 9 |   tool: ToolInvocation
10 |   isOpen: boolean
11 |   onOpenChange: (open: boolean) => void
12 | }
13 | 
14 | export function ToolSection({ tool, isOpen, onOpenChange }: ToolSectionProps) {
15 |   switch (tool.toolName) {
16 |     case 'search':
17 |       return (
18 |         <SearchSection
19 |           tool={tool}
20 |           isOpen={isOpen}
21 |           onOpenChange={onOpenChange}
22 |         />
23 |       )
24 |     case 'video_search':
25 |       return (
26 |         <VideoSearchSection
27 |           tool={tool}
28 |           isOpen={isOpen}
29 |           onOpenChange={onOpenChange}
30 |         />
31 |       )
32 |     case 'retrieve':
33 |       return (
34 |         <RetrieveSection
35 |           tool={tool}
36 |           isOpen={isOpen}
37 |           onOpenChange={onOpenChange}
38 |         />
39 |       )
40 |     default:
41 |       return null
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/badge.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from 'react'
 2 | import { cva, type VariantProps } from 'class-variance-authority'
 3 | 
 4 | import { cn } from '@/lib/search_utils'
 5 | 
 6 | const badgeVariants = cva(
 7 |   'inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2',
 8 |   {
 9 |     variants: {
10 |       variant: {
11 |         default:
12 |           'border-transparent bg-primary text-primary-foreground hover:bg-primary/80',
13 |         secondary:
14 |           'border-transparent bg-secondary text-secondary-foreground hover:bg-secondary/80',
15 |         destructive:
16 |           'border-transparent bg-destructive text-destructive-foreground hover:bg-destructive/80',
17 |         outline: 'text-foreground'
18 |       }
19 |     },
20 |     defaultVariants: {
21 |       variant: 'default'
22 |     }
23 |   }
24 | )
25 | 
26 | export interface BadgeProps
27 |   extends React.HTMLAttributes<HTMLDivElement>,
28 |     VariantProps<typeof badgeVariants> {}
29 | 
30 | function Badge({ className, variant, ...props }: BadgeProps) {
31 |   return (
32 |     <div className={cn(badgeVariants({ variant }), className)} {...props} />
33 |   )
34 | }
35 | 
36 | export { Badge, badgeVariants }
37 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/checkbox.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import * as React from 'react'
 4 | import * as CheckboxPrimitive from '@radix-ui/react-checkbox'
 5 | import { Check } from 'lucide-react'
 6 | 
 7 | import { cn } from '@/lib/search_utils'
 8 | 
 9 | const Checkbox = React.forwardRef<
10 |   React.ElementRef<typeof CheckboxPrimitive.Root>,
11 |   React.ComponentPropsWithoutRef<typeof CheckboxPrimitive.Root>
12 | >(({ className, ...props }, ref) => (
13 |   <CheckboxPrimitive.Root
14 |     ref={ref}
15 |     className={cn(
16 |       'peer h-4 w-4 shrink-0 rounded-sm border border-primary ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground',
17 |       className
18 |     )}
19 |     {...props}
20 |   >
21 |     <CheckboxPrimitive.Indicator
22 |       className={cn('flex items-center justify-center text-current')}
23 |     >
24 |       <Check className="h-4 w-4" />
25 |     </CheckboxPrimitive.Indicator>
26 |   </CheckboxPrimitive.Root>
27 | ))
28 | Checkbox.displayName = CheckboxPrimitive.Root.displayName
29 | 
30 | export { Checkbox }
31 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/collapsible.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import * as CollapsiblePrimitive from "@radix-ui/react-collapsible"
 4 | 
 5 | const Collapsible = CollapsiblePrimitive.Root
 6 | 
 7 | const CollapsibleTrigger = CollapsiblePrimitive.CollapsibleTrigger
 8 | 
 9 | const CollapsibleContent = CollapsiblePrimitive.CollapsibleContent
10 | 
11 | export { Collapsible, CollapsibleTrigger, CollapsibleContent }
12 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/icons.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import { cn } from '@/lib/search_utils'
 4 | 
 5 | function IconLogo({ className, ...props }: React.ComponentProps<'svg'>) {
 6 |   return (
 7 |     <svg
 8 |       fill="currentColor"
 9 |       viewBox="0 0 256 256"
10 |       role="img"
11 |       xmlns="http://www.w3.org/2000/svg"
12 |       className={cn('h-4 w-4', className)}
13 |       {...props}
14 |     >
15 |       <circle cx="128" cy="128" r="128" fill="black"></circle>
16 |       <circle cx="102" cy="128" r="18" fill="white"></circle>
17 |       <circle cx="154" cy="128" r="18" fill="white"></circle>
18 |     </svg>
19 |   )
20 | }
21 | 
22 | export { IconLogo }
23 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/input.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from 'react'
 2 | 
 3 | import { cn } from '@/lib/search_utils'
 4 | 
 5 | export interface InputProps
 6 |   extends React.InputHTMLAttributes<HTMLInputElement> {}
 7 | 
 8 | const Input = React.forwardRef<HTMLInputElement, InputProps>(
 9 |   ({ className, type, ...props }, ref) => {
10 |     return (
11 |       <input
12 |         type={type}
13 |         className={cn(
14 |           'flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50',
15 |           className
16 |         )}
17 |         ref={ref}
18 |         {...props}
19 |       />
20 |     )
21 |   }
22 | )
23 | Input.displayName = 'Input'
24 | 
25 | export { Input }
26 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/label.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import * as React from 'react'
 4 | import * as LabelPrimitive from '@radix-ui/react-label'
 5 | import { cva, type VariantProps } from 'class-variance-authority'
 6 | 
 7 | import { cn } from '@/lib/search_utils'
 8 | 
 9 | const labelVariants = cva(
10 |   'text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70'
11 | )
12 | 
13 | const Label = React.forwardRef<
14 |   React.ElementRef<typeof LabelPrimitive.Root>,
15 |   React.ComponentPropsWithoutRef<typeof LabelPrimitive.Root> &
16 |     VariantProps<typeof labelVariants>
17 | >(({ className, ...props }, ref) => (
18 |   <LabelPrimitive.Root
19 |     ref={ref}
20 |     className={cn(labelVariants(), className)}
21 |     {...props}
22 |   />
23 | ))
24 | Label.displayName = LabelPrimitive.Root.displayName
25 | 
26 | export { Label }
27 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/markdown.tsx:
--------------------------------------------------------------------------------
 1 | import { FC, memo } from 'react'
 2 | import ReactMarkdown, { Options } from 'react-markdown'
 3 | 
 4 | export const MemoizedReactMarkdown: FC<Options> = memo(
 5 |   ReactMarkdown,
 6 |   (prevProps, nextProps) =>
 7 |     prevProps.children === nextProps.children &&
 8 |     prevProps.className === nextProps.className
 9 | )
10 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/popover.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import * as React from "react"
 4 | import * as PopoverPrimitive from "@radix-ui/react-popover"
 5 | 
 6 | import { cn } from "@/lib/search_utils"
 7 | 
 8 | const Popover = PopoverPrimitive.Root
 9 | 
10 | const PopoverTrigger = PopoverPrimitive.Trigger
11 | 
12 | const PopoverContent = React.forwardRef<
13 |   React.ElementRef<typeof PopoverPrimitive.Content>,
14 |   React.ComponentPropsWithoutRef<typeof PopoverPrimitive.Content>
15 | >(({ className, align = "center", sideOffset = 4, ...props }, ref) => (
16 |   <PopoverPrimitive.Portal>
17 |     <PopoverPrimitive.Content
18 |       ref={ref}
19 |       align={align}
20 |       sideOffset={sideOffset}
21 |       className={cn(
22 |         "z-50 w-72 rounded-md border bg-popover p-4 text-popover-foreground shadow-md outline-none data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
23 |         className
24 |       )}
25 |       {...props}
26 |     />
27 |   </PopoverPrimitive.Portal>
28 | ))
29 | PopoverContent.displayName = PopoverPrimitive.Content.displayName
30 | 
31 | export { Popover, PopoverTrigger, PopoverContent }
32 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/separator.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import * as React from 'react'
 4 | import * as SeparatorPrimitive from '@radix-ui/react-separator'
 5 | 
 6 | import { cn } from '@/lib/search_utils'
 7 | 
 8 | const Separator = React.forwardRef<
 9 |   React.ElementRef<typeof SeparatorPrimitive.Root>,
10 |   React.ComponentPropsWithoutRef<typeof SeparatorPrimitive.Root>
11 | >(
12 |   (
13 |     { className, orientation = 'horizontal', decorative = true, ...props },
14 |     ref
15 |   ) => (
16 |     <SeparatorPrimitive.Root
17 |       ref={ref}
18 |       decorative={decorative}
19 |       orientation={orientation}
20 |       className={cn(
21 |         'shrink-0 bg-border',
22 |         orientation === 'horizontal' ? 'h-[1px] w-full' : 'h-full w-[1px]',
23 |         className
24 |       )}
25 |       {...props}
26 |     />
27 |   )
28 | )
29 | Separator.displayName = SeparatorPrimitive.Root.displayName
30 | 
31 | export { Separator }
32 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/skeleton.tsx:
--------------------------------------------------------------------------------
 1 | import { cn } from '@/lib/search_utils'
 2 | 
 3 | function Skeleton({
 4 |   className,
 5 |   ...props
 6 | }: React.HTMLAttributes<HTMLDivElement>) {
 7 |   return (
 8 |     <div
 9 |       className={cn('animate-pulse rounded-md bg-muted', className)}
10 |       {...props}
11 |     />
12 |   )
13 | }
14 | 
15 | export { Skeleton }
16 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/slider.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import * as React from 'react'
 4 | import * as SliderPrimitive from '@radix-ui/react-slider'
 5 | 
 6 | import { cn } from '@/lib/search_utils'
 7 | 
 8 | const Slider = React.forwardRef<
 9 |   React.ElementRef<typeof SliderPrimitive.Root>,
10 |   React.ComponentPropsWithoutRef<typeof SliderPrimitive.Root>
11 | >(({ className, ...props }, ref) => (
12 |   <SliderPrimitive.Root
13 |     ref={ref}
14 |     className={cn(
15 |       'relative flex w-full touch-none select-none items-center',
16 |       className
17 |     )}
18 |     {...props}
19 |   >
20 |     <SliderPrimitive.Track className="relative h-2 w-full grow overflow-hidden rounded-full bg-secondary">
21 |       <SliderPrimitive.Range className="absolute h-full bg-primary" />
22 |     </SliderPrimitive.Track>
23 |     <SliderPrimitive.Thumb className="block h-5 w-5 rounded-full border-2 border-primary bg-background ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50" />
24 |   </SliderPrimitive.Root>
25 | ))
26 | Slider.displayName = SliderPrimitive.Root.displayName
27 | 
28 | export { Slider }
29 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/sonner.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import { useTheme } from "next-themes"
 4 | import { Toaster as Sonner } from "sonner"
 5 | 
 6 | type ToasterProps = React.ComponentProps<typeof Sonner>
 7 | 
 8 | const Toaster = ({ ...props }: ToasterProps) => {
 9 |   const { theme = "system" } = useTheme()
10 | 
11 |   return (
12 |     <Sonner
13 |       theme={theme as ToasterProps["theme"]}
14 |       className="toaster group"
15 |       toastOptions={{
16 |         classNames: {
17 |           toast:
18 |             "group toast group-[.toaster]:bg-background group-[.toaster]:text-foreground group-[.toaster]:border-border group-[.toaster]:shadow-lg",
19 |           description: "group-[.toast]:text-muted-foreground",
20 |           actionButton:
21 |             "group-[.toast]:bg-primary group-[.toast]:text-primary-foreground",
22 |           cancelButton:
23 |             "group-[.toast]:bg-muted group-[.toast]:text-muted-foreground",
24 |         },
25 |       }}
26 |       {...props}
27 |     />
28 |   )
29 | }
30 | 
31 | export { Toaster }
32 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/spinner.tsx:
--------------------------------------------------------------------------------
 1 | // Based on: https://github.com/vercel/ai/blob/main/examples/next-ai-rsc/components/llm-stocks/spinner.tsx
 2 | 
 3 | import { Card } from './card'
 4 | import { IconLogo } from './icons'
 5 | 
 6 | export const Spinner = () => (
 7 |   <svg
 8 |     fill="none"
 9 |     stroke="currentColor"
10 |     strokeWidth="1.5"
11 |     viewBox="0 0 24 24"
12 |     strokeLinecap="round"
13 |     strokeLinejoin="round"
14 |     xmlns="http://www.w3.org/2000/svg"
15 |     className="h-5 w-5 animate-spin stroke-zinc-400"
16 |   >
17 |     <path d="M12 3v3m6.366-.366-2.12 2.12M21 12h-3m.366 6.366-2.12-2.12M12 21v-3m-6.366.366 2.12-2.12M3 12h3m-.366-6.366 2.12 2.12"></path>
18 |   </svg>
19 | )
20 | 
21 | export const LogoSpinner = () => (
22 |   <div className="p-4 border border-background">
23 |     <IconLogo className="w-4 h-4 animate-spin" />
24 |   </div>
25 | )
26 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/status-indicator.tsx:
--------------------------------------------------------------------------------
 1 | import { LucideIcon } from 'lucide-react'
 2 | import { ReactNode } from 'react'
 3 | 
 4 | interface StatusIndicatorProps {
 5 |   icon: LucideIcon
 6 |   iconClassName?: string
 7 |   children: ReactNode
 8 | }
 9 | 
10 | export function StatusIndicator({
11 |   icon: Icon,
12 |   iconClassName,
13 |   children
14 | }: StatusIndicatorProps) {
15 |   return (
16 |     <span className="flex items-center gap-1 text-muted-foreground text-xs">
17 |       <Icon size={16} className={iconClassName} />
18 |       <span>{children}</span>
19 |     </span>
20 |   )
21 | }
22 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/switch.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import * as React from 'react'
 4 | import * as SwitchPrimitives from '@radix-ui/react-switch'
 5 | 
 6 | import { cn } from '@/lib/search_utils'
 7 | 
 8 | const Switch = React.forwardRef<
 9 |   React.ElementRef<typeof SwitchPrimitives.Root>,
10 |   React.ComponentPropsWithoutRef<typeof SwitchPrimitives.Root>
11 | >(({ className, ...props }, ref) => (
12 |   <SwitchPrimitives.Root
13 |     className={cn(
14 |       'peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-input',
15 |       className
16 |     )}
17 |     {...props}
18 |     ref={ref}
19 |   >
20 |     <SwitchPrimitives.Thumb
21 |       className={cn(
22 |         'pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0'
23 |       )}
24 |     />
25 |   </SwitchPrimitives.Root>
26 | ))
27 | Switch.displayName = SwitchPrimitives.Root.displayName
28 | 
29 | export { Switch }
30 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/textarea.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from 'react'
 2 | 
 3 | import { cn } from '@/lib/search_utils'
 4 | 
 5 | export interface TextareaProps
 6 |   extends React.TextareaHTMLAttributes<HTMLTextAreaElement> {}
 7 | 
 8 | const Textarea = React.forwardRef<HTMLTextAreaElement, TextareaProps>(
 9 |   ({ className, ...props }, ref) => {
10 |     return (
11 |       <textarea
12 |         className={cn(
13 |           'flex min-h-[80px] w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50',
14 |           className
15 |         )}
16 |         ref={ref}
17 |         {...props}
18 |       />
19 |     )
20 |   }
21 | )
22 | Textarea.displayName = 'Textarea'
23 | 
24 | export { Textarea }
25 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/ui/tooltip.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import * as React from "react"
 4 | import * as TooltipPrimitive from "@radix-ui/react-tooltip"
 5 | 
 6 | import { cn } from "@/lib/search_utils"
 7 | 
 8 | const TooltipProvider = TooltipPrimitive.Provider
 9 | 
10 | const Tooltip = TooltipPrimitive.Root
11 | 
12 | const TooltipTrigger = TooltipPrimitive.Trigger
13 | 
14 | const TooltipContent = React.forwardRef<
15 |   React.ElementRef<typeof TooltipPrimitive.Content>,
16 |   React.ComponentPropsWithoutRef<typeof TooltipPrimitive.Content>
17 | >(({ className, sideOffset = 4, ...props }, ref) => (
18 |   <TooltipPrimitive.Content
19 |     ref={ref}
20 |     sideOffset={sideOffset}
21 |     className={cn(
22 |       "z-50 overflow-hidden rounded-md border bg-popover px-3 py-1.5 text-sm text-popover-foreground shadow-md animate-in fade-in-0 zoom-in-95 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
23 |       className
24 |     )}
25 |     {...props}
26 |   />
27 | ))
28 | TooltipContent.displayName = TooltipPrimitive.Content.displayName
29 | 
30 | export { Tooltip, TooltipTrigger, TooltipContent, TooltipProvider }
31 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/user-message.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react'
 2 | import { CollapsibleMessage } from './collapsible-message'
 3 | 
 4 | type UserMessageProps = {
 5 |   message: string
 6 | }
 7 | 
 8 | export const UserMessage: React.FC<UserMessageProps> = ({ message }) => {
 9 |   return (
10 |     <CollapsibleMessage role="user">
11 |       <div className="flex-1 break-words w-full">{message}</div>
12 |     </CollapsibleMessage>
13 |   )
14 | }
15 | 


--------------------------------------------------------------------------------
/neosearch_frontend/search_components/video-search-section.tsx:
--------------------------------------------------------------------------------
 1 | 'use client'
 2 | 
 3 | import { DefaultSkeleton } from './default-skeleton'
 4 | import { Section, ToolArgsSection } from './section'
 5 | import type { SerperSearchResults } from '@/lib/types'
 6 | import { ToolInvocation } from 'ai'
 7 | import { VideoSearchResults } from './video-search-results'
 8 | import { CollapsibleMessage } from './collapsible-message'
 9 | 
10 | interface VideoSearchSectionProps {
11 |   tool: ToolInvocation
12 |   isOpen: boolean
13 |   onOpenChange: (open: boolean) => void
14 | }
15 | 
16 | export function VideoSearchSection({
17 |   tool,
18 |   isOpen,
19 |   onOpenChange
20 | }: VideoSearchSectionProps) {
21 |   const isLoading = tool.state === 'call'
22 |   const searchResults: SerperSearchResults =
23 |     tool.state === 'result' ? tool.result : undefined
24 |   const query = tool.args.q as string | undefined
25 | 
26 |   const header = <ToolArgsSection tool="video_search">{query}</ToolArgsSection>
27 | 
28 |   return (
29 |     <CollapsibleMessage
30 |       role="assistant"
31 |       isCollapsible={true}
32 |       header={header}
33 |       isOpen={isOpen}
34 |       onOpenChange={onOpenChange}
35 |     >
36 |       {!isLoading && searchResults ? (
37 |         <Section title="Videos">
38 |           <VideoSearchResults results={searchResults} />
39 |         </Section>
40 |       ) : (
41 |         <DefaultSkeleton />
42 |       )}
43 |     </CollapsibleMessage>
44 |   )
45 | }
46 | 


--------------------------------------------------------------------------------
/neosearch_frontend/tests/prompts/routes.ts:
--------------------------------------------------------------------------------
 1 | import { generateUUID } from '@/lib/utils';
 2 | 
 3 | export const TEST_PROMPTS = {
 4 |   SKY: {
 5 |     MESSAGE: {
 6 |       id: generateUUID(),
 7 |       createdAt: new Date().toISOString(),
 8 |       role: 'user',
 9 |       content: 'Why is the sky blue?',
10 |       parts: [{ type: 'text', text: 'Why is the sky blue?' }],
11 |     },
12 |     OUTPUT_STREAM: [
13 |       '0:"It\'s "',
14 |       '0:"just "',
15 |       '0:"blue "',
16 |       '0:"duh! "',
17 |       'e:{"finishReason":"stop","usage":{"promptTokens":3,"completionTokens":10},"isContinued":false}',
18 |       'd:{"finishReason":"stop","usage":{"promptTokens":3,"completionTokens":10}}',
19 |     ],
20 |   },
21 |   GRASS: {
22 |     MESSAGE: {
23 |       id: generateUUID(),
24 |       createdAt: new Date().toISOString(),
25 |       role: 'user',
26 |       content: 'Why is grass green?',
27 |       parts: [{ type: 'text', text: 'Why is grass green?' }],
28 |     },
29 | 
30 |     OUTPUT_STREAM: [
31 |       '0:"It\'s "',
32 |       '0:"just "',
33 |       '0:"green "',
34 |       '0:"duh! "',
35 |       'e:{"finishReason":"stop","usage":{"promptTokens":3,"completionTokens":10},"isContinued":false}',
36 |       'd:{"finishReason":"stop","usage":{"promptTokens":3,"completionTokens":10}}',
37 |     ],
38 |   },
39 | };
40 | 


--------------------------------------------------------------------------------
/neosearch_frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ESNext",
 4 |     "lib": ["dom", "dom.iterable", "esnext"],
 5 |     "allowJs": true,
 6 |     "skipLibCheck": true,
 7 |     "strict": true,
 8 |     "noEmit": true,
 9 |     "esModuleInterop": true,
10 |     "module": "esnext",
11 |     "moduleResolution": "bundler",
12 |     "resolveJsonModule": true,
13 |     "isolatedModules": true,
14 |     "jsx": "preserve",
15 |     "incremental": true,
16 |     "plugins": [
17 |       {
18 |         "name": "next"
19 |       }
20 |     ],
21 |     "paths": {
22 |       "@/*": ["./*"]
23 |     }
24 |   },
25 |   "include": [
26 |     "next-env.d.ts",
27 |     "**/*.ts",
28 |     "**/*.tsx",
29 |     ".next/types/**/*.ts",
30 |     "next.config.js"
31 |   ],
32 |   "exclude": ["node_modules"]
33 | }
34 | 


--------------------------------------------------------------------------------
/neosearch_llm/sglang/deploy_sgllm_docker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Before running this script, make sure to replace <secret> with secret key from huggingface
 4 | docker run --gpus all \
 5 |     -p 30000:30000 \
 6 |     -v ~/.cache/huggingface:/root/.cache/huggingface \
 7 |     --env "HF_TOKEN=<secret>" \
 8 |     --ipc=host \
 9 |     lmsysorg/sglang:latest \
10 |     python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --host 0.0.0.0 --port 30000
11 | 


--------------------------------------------------------------------------------
/neosearch_llm/sglang/export_requirements_from_poetry.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # check if requrirements.txt exists
4 | if [ -f requirements.txt ]; then
5 |     echo "requirements.txt already exists. Remove it first."
6 |     rm requirements.txt
7 | fi
8 | 
9 | poetry export --without-hashes --format=requirements.txt > requirements.txt


--------------------------------------------------------------------------------
/neosearch_llm/sglang/launch_sgllm.sh:
--------------------------------------------------------------------------------
 1 | pip install --upgrade pip
 2 | 
 3 | sh export_requirements_from_poetry.sh
 4 | pip install -r requirements.txt
 5 | 
 6 | # For CUDA 12.4 & torch 2.4
 7 | pip install flashinfer -i https://flashinfer.ai/whl/cu124/torch2.4
 8 | # For other CUDA & torch versions, please check https://docs.flashinfer.ai/installation.html
 9 | 
10 | python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000


--------------------------------------------------------------------------------
/neosearch_llm/sglang/launch_sgllm_tensor_parallel.sh:
--------------------------------------------------------------------------------
 1 | pip install --upgrade pip
 2 | 
 3 | sh export_requirements_from_poetry.sh
 4 | pip install -r requirements.txt
 5 | 
 6 | # For CUDA 12.4 & torch 2.4
 7 | pip install flashinfer -i https://flashinfer.ai/whl/cu124/torch2.4
 8 | # For other CUDA & torch versions, please check https://docs.flashinfer.ai/installation.html
 9 | 
10 | # For 4 GPUs total (2 data parallel, 2 tensor parallel)
11 | python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --dp 2 --tp 2


--------------------------------------------------------------------------------
/neosearch_llm/sglang/pyproject.toml:
--------------------------------------------------------------------------------
 1 | # [tool.pdm.build]
 2 | # includes = []
 3 | # [build-system]
 4 | # requires = ["pdm-backend"]
 5 | # build-backend = "pdm.backend"
 6 | 
 7 | 
 8 | [project]
 9 | authors = [
10 |     {name = "YeonwooSung", email = "neos960518@gmail.com"},
11 | ]
12 | requires-python = "<3.13,>=3.10"
13 | dependencies = [
14 |     "sglang[all]<1.0.0.0,>=0.2.14.post2",
15 |     "torch==2.6.0",
16 | ]
17 | name = "neosearch_sgllm"
18 | version = "0.1.0"
19 | description = ""
20 | readme = "README.md"
21 | 


--------------------------------------------------------------------------------
/neosearch_llm/vllm/export_requirements_from_poetry.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # check if requrirements.txt exists
 4 | if [ -f requirements.txt ]; then
 5 |     echo "requirements.txt already exists. Remove it first."
 6 |     rm requirements.txt
 7 | fi
 8 | 
 9 | # poetry export --without-hashes --format=requirements.txt > requirements.txt
10 | uv export --no-hashes --format requirements-txt > requirements.txt


--------------------------------------------------------------------------------
/neosearch_llm/vllm/launch_vllm.sh:
--------------------------------------------------------------------------------
1 | pip install --upgrade pip
2 | 
3 | sh export_requirements_from_poetry.sh
4 | pip install -r requirements.txt
5 | 
6 | # For passing the API key, you could use it for authentication with OpenAI python client
7 | vllm serve NousResearch/Meta-Llama-3-8B-Instruct --dtype auto --api-key nsk-myapikey
8 | 


--------------------------------------------------------------------------------
/neosearch_llm/vllm/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | authors = [
 3 |     {name = "YeonwooSung", email = "neos960518@gmail.com"},
 4 | ]
 5 | requires-python = "<3.13,>=3.10"
 6 | dependencies = [
 7 |     "vllm>=0.6.2,<1.0.0",
 8 |     "torch==2.6.0",
 9 |     "accelerate<2.0.0,>=1.0.0",
10 |     "fire<1.0.0,>=0.7.0",
11 | ]
12 | name = "neosearch_vllm"
13 | version = "0.1.0"
14 | description = "vLLM server for neosearch"
15 | readme = "README.md"
16 | 


--------------------------------------------------------------------------------
/resources/docs/crawling.md:
--------------------------------------------------------------------------------
 1 | # Crawling From Web
 2 | 
 3 | ## Choosing seed URLs
 4 | 
 5 | Seed URLs are the starting points for a web crawler. They are the URLs from which the crawler will start navigating the web.
 6 | The selection of seed URLs is crucial for the success of the crawling process.
 7 | Here are some strategies for choosing seed URLs:
 8 | 
 9 | 1. **Starting from a known website**: If you are interested in crawling a specific website, you can start with the homepage or any other page on that site.
10 | 2. **Starting from a search engine**: You can start with the search results of a search engine query related to your topic of interest.
11 | 3. **Starting from a list of URLs**: You can provide a list of URLs that you want the crawler to start from.
12 | 4. **Starting from a sitemap**: If the website has a sitemap, you can start from the URLs listed in the sitemap.
13 | 
14 | ## Crawling Strategy
15 | 
16 | <!-- TODO -->
17 | 
18 | ## References
19 | 
20 | - [Trafilatura :: Tutorial: From a list of links to a frequency list](https://trafilatura.readthedocs.io/en/latest/tutorial1.html)
21 | 


--------------------------------------------------------------------------------
/resources/docs/data.md:
--------------------------------------------------------------------------------
 1 | # Data
 2 | 
 3 | - [Wikipedia backup dumps](https://dumps.wikimedia.org/backup-index.html)
 4 | 
 5 | - [Common Crawl](https://data.commoncrawl.org/)
 6 |     - [Common Crawl URL index](https://commoncrawl.org/blog/common-crawl-url-index)
 7 |         - [Index Common Crawl archives in tabular format](https://github.com/commoncrawl/cc-index-table)
 8 |     - [Common Crawl archives](https://data.commoncrawl.org/crawl-data/index.html)
 9 |     - [Common Crawl Web Graph 2024](https://data.commoncrawl.org/projects/hyperlinkgraph/cc-main-2024-oct-nov-dec/index.html)
10 | 
11 | - [Trafilatura web corpus](https://trafilatura.readthedocs.io/en/latest/sources.html)
12 | 


--------------------------------------------------------------------------------
/resources/docs/deep_research.md:
--------------------------------------------------------------------------------
 1 | # Deep Research
 2 | 
 3 | OpenAI's Deep Research is a powerful research tool for generating high-quality research reports.
 4 | It is empowered by the reasoning models like O1 and O3.
 5 | 
 6 | ## Sample projects
 7 | 
 8 | - [Ollama-deep-researcher](https://github.com/langchain-ai/ollama-deep-researcher) is an AI-powered research assistant tool that uses langchain and langraph
 9 | - [deep-research](https://github.com/dzhng/deep-research) is a research assistant tool that uses GPT O1 and firecrawl
10 | - [local-deep-research](https://github.com/LearningCircuit/local-deep-research)
11 | - [deep-searcher](https://github.com/zilliztech/deep-searcher)
12 | - [llama-4-researcher](https://github.com/AstraBert/llama-4-researcher)
13 | 
14 | ## Specifications
15 | 
16 | - As an end-user, I want deep-research to be able to generate research reports with reasoning models
17 | - As an end-user, I want deep-research to answer in same language that I asked the question
18 | - As an end-user, I want to export the research result as PDF, etc
19 | - As an end-user, I am expecting the research agent to use Corrective RAG for better answer
20 | - As an end-user, I am expecting the research agent to generate neat output (i.e. no duplicated sentences, concise, with nice table for comparison, etc)
21 | 


--------------------------------------------------------------------------------
/resources/docs/helpful_resources.md:
--------------------------------------------------------------------------------
 1 | # Helpful Resources
 2 | 
 3 | ## Search
 4 | 
 5 | - [Improving Recommendation Systems & Search in the Age of LLMs](https://eugeneyan.com//writing/recsys-llm/)
 6 | 
 7 | ### RAG
 8 | 
 9 | - [RAG_Techniques](https://github.com/NirDiamant/RAG_Techniques)
10 | 
11 | ### Vector Search
12 | 
13 | - [벡터 검색 알고리즘 살펴보기(1): Similarity Search와 HNSW](https://pangyoalto.com/faiss-1-hnsw/)
14 | - [벡터 검색 알고리즘 살펴보기(2): HNSW, SPANN](https://pangyoalto.com/hnsw-spann/)
15 | 
16 | ### Full-Text Search
17 | 
18 | - [작은 청크 검색 문제를 해결하는 Contextual BM25F 전략 엿보기 👀](https://blog.sionic.ai/introducing-contextual-bm25f)
19 | 
20 | ### Benchmarking Search services for LLMs
21 | 
22 | - [Context is King — Evaluating real-time LLM context quality with Ragas](https://emergentmethods.medium.com/context-is-king-evaluating-real-time-llm-context-quality-with-ragas-a8df8e815dc9)
23 |     * AskNews showed the best accuracy and the shortest retrieval time
24 |     * JinaAI retrieval retrieves the least number of tokens, but took the longest time for the search
25 |     * Exa was the worst in terms of accuracy and efficiency (too many tokens, and the worst accuracy)
26 |     * Tavily was the Top-2 in terms of accuracy, but retrieved too many tokens
27 | 
28 | ![search_bench.png](./imgs/search_bench.png)
29 | 
30 | ## Cache
31 | 
32 | ### Semantic Cache
33 | 
34 | - [zilliztech/GPTCache](https://github.com/zilliztech/GPTCache)
35 | 


--------------------------------------------------------------------------------
/resources/docs/imgs/search_bench.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/resources/docs/imgs/search_bench.png


--------------------------------------------------------------------------------
/resources/docs/imgs/yandex_search_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/resources/docs/imgs/yandex_search_architecture.png


--------------------------------------------------------------------------------
/resources/docs/migrate_from_poetry_to_uv.md:
--------------------------------------------------------------------------------
 1 | # Migrating from Poetry to uv
 2 | 
 3 | `uv` is an extremely fast Python package and project manager, written in Rust.
 4 | 
 5 | ## How to migrate from Poetry to uv
 6 | 
 7 | First, you need to convert your poetry compatible pyproject.toml to be compatible with uv. You can actually do it quite easily with pdm.
 8 | ```bash
 9 | uvx pdm import pyproject.toml
10 | ```
11 | 
12 | Next, remove all poetry sections in the pyproject (i.e. [tool.poetry...] sections)
13 | 
14 | Then, replace `[tool.pdm.dev-dependencies]` with `[dependency-groups]`.
15 | 
16 | You are done now!
17 | 
18 | ## Exporting the requirements from uv to a requirements.txt file
19 | 
20 | ```bash
21 | uv export --no-hashes --format requirements-txt > requirements.txt
22 | ```
23 | 


--------------------------------------------------------------------------------
/resources/docs/references.md:
--------------------------------------------------------------------------------
 1 | # References
 2 | 
 3 | ## RAG
 4 | 
 5 | For advanced RAG techniques, we refered to the following projects:
 6 | 
 7 | - [RAG_Techniques](https://github.com/NirDiamant/RAG_Techniques)
 8 | - [crag-new](https://gitlab.aicrowd.com/shizueyy/crag-new)
 9 | 
10 | ## Frontend
11 | 
12 | Neosearch frontend is built on the [vercel ai-chatbot](https://github.com/vercel/ai-chatbot) project.
13 | Credits to vercel and all contributors of ai-chatbot project.
14 | 
15 | ### Search view
16 | 
17 | For the search view, we refered to [morphic](https://github.com/miurla/morphic) project.
18 | Credits to @miurla and all contributors of morphic project.
19 | 
20 | ## Web Search
21 | 
22 | For search-based RAG, we refered to [search-with-lepton](https://github.com/leptonai/search_with_lepton) project.
23 | 
24 | ### Searxng
25 | 
26 | For utilizing searxng, we refered to [perplexica](https://github.com/ItzCrazyKns/Perplexica) project.
27 | 
28 | ## Embeddings
29 | 
30 | - [Infinity](https://github.com/michaelfeil/infinity) is a high-throughput, low-latency serving engine for text-embeddings, reranking models, clip, clap and colpali
31 | 


--------------------------------------------------------------------------------
/resources/postgres/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/resources/postgres/.gitignore


--------------------------------------------------------------------------------
/resources/postgres/cloudnative_pg/examples/auth-prod.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: Namespace
 4 | metadata:
 5 |   name: demo
 6 |   labels:
 7 |       name: demo
 8 | ---
 9 | apiVersion: v1
10 | kind: Secret
11 | metadata:
12 |   name: app-auth
13 |   namespace: demo
14 | type: kubernetes.io/basic-auth
15 | stringData:
16 |   username: app      # required field for kubernetes.io/basic-auth
17 |   password: testing # required field for kubernetes.io/basic-auth
18 | 


--------------------------------------------------------------------------------
/resources/postgres/cloudnative_pg/examples/backup-od.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: postgresql.cnpg.io/v1
2 | kind: Backup
3 | metadata:
4 |   name: ondemand
5 | spec:
6 |   cluster:
7 |     name: prod
8 | 


--------------------------------------------------------------------------------
/resources/postgres/cloudnative_pg/examples/storageclass-gp3.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: storage.k8s.io/v1
 3 | kind: StorageClass
 4 | metadata:
 5 |   annotations:
 6 |     storageclass.kubernetes.io/is-default-class: "true"
 7 |   name: storageclass-gp3
 8 | provisioner: ebs.csi.aws.com
 9 | volumeBindingMode: WaitForFirstConsumer
10 | allowVolumeExpansion: true
11 | parameters:
12 |   csi.storage.k8s.io/fstype: xfs
13 |   type: gp3
14 |   encrypted: "true"
15 | 


--------------------------------------------------------------------------------
/resources/postgres/cloudnative_pg/examples/storageclass.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: storage.k8s.io/v1
 3 | kind: StorageClass
 4 | metadata:
 5 |   annotations:
 6 |     storageclass.kubernetes.io/is-default-class: "true"
 7 |   name: storageclass-io2
 8 | provisioner: ebs.csi.aws.com
 9 | volumeBindingMode: WaitForFirstConsumer
10 | allowVolumeExpansion: true
11 | parameters:
12 |   csi.storage.k8s.io/fstype: xfs
13 |   encrypted: "true"
14 |   type: io2
15 |   iopsPerGB: "50"
16 | 


--------------------------------------------------------------------------------
/resources/postgres/cloudnative_pg/examples/world.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/resources/postgres/cloudnative_pg/examples/world.sql


--------------------------------------------------------------------------------
/resources/postgres/electric/README.md:
--------------------------------------------------------------------------------
 1 | # Electric
 2 | 
 3 | [Electric](https://github.com/electric-sql/electric) is a Postgres sync engine. It does real-time partial replication of Postgres data into local apps and services.
 4 | 
 5 | ## Running Insturction
 6 | 
 7 | ### Docker
 8 | 
 9 | ```bash
10 | docker compose -f docker_compose/docker-compose.yml up
11 | ```
12 | 


--------------------------------------------------------------------------------
/resources/postgres/electric/docker_compose/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.3"
 2 | name: "electric_example-${PROJECT_NAME:-default}"
 3 | 
 4 | services:
 5 |   postgres:
 6 |     image: postgres:16-alpine
 7 |     environment:
 8 |       POSTGRES_DB: electric
 9 |       POSTGRES_USER: postgres
10 |       POSTGRES_PASSWORD: password
11 |     ports:
12 |       - 54321:5432
13 |     volumes:
14 |       - ./postgres.conf:/etc/postgresql/postgresql.conf:ro
15 |     tmpfs:
16 |       - /var/lib/postgresql/data
17 |       - /tmp
18 |     command:
19 |       - postgres
20 |       - -c
21 |       - config_file=/etc/postgresql/postgresql.conf
22 | 
23 |   backend:
24 |     image: electricsql/electric:canary
25 |     environment:
26 |       DATABASE_URL: postgresql://postgres:password@postgres:5432/electric?sslmode=disable
27 |     ports:
28 |       - 3000:3000
29 |     build:
30 |       context: ../packages/sync-service/
31 |     depends_on:
32 |       - postgres


--------------------------------------------------------------------------------
/resources/postgres/electric/docker_compose/postgres.conf:
--------------------------------------------------------------------------------
1 | listen_addresses = '*'
2 | wal_level = logical      # minimal, replica, or logical


--------------------------------------------------------------------------------
/resources/postgres/hybrid_search.md:
--------------------------------------------------------------------------------
 1 | # Hybrid Search
 2 | 
 3 | ## ParadeDB
 4 | 
 5 | With ParadeDB, you can create a hybrid search engine that combines the BM25 algorithm with the vector search algorithm ([reference](https://docs.paradedb.com/documentation/guides/hybrid)).
 6 | 
 7 | Sample:
 8 | ```sql
 9 | WITH semantic_search AS (
10 |     SELECT id, RANK () OVER (ORDER BY embedding <=> '[1,2,3]') AS rank
11 |     FROM mock_items ORDER BY embedding <=> '[1,2,3]' LIMIT 20
12 | ),
13 | bm25_search AS (
14 |     SELECT id, RANK () OVER (ORDER BY paradedb.score(id) DESC) as rank
15 |     FROM mock_items WHERE description @@@ 'keyboard' LIMIT 20
16 | )
17 | SELECT
18 |     COALESCE(semantic_search.id, bm25_search.id) AS id,
19 |     COALESCE(1.0 / (60 + semantic_search.rank), 0.0) +
20 |     COALESCE(1.0 / (60 + bm25_search.rank), 0.0) AS score,
21 |     mock_items.description,
22 |     mock_items.embedding
23 | FROM semantic_search
24 | FULL OUTER JOIN bm25_search ON semantic_search.id = bm25_search.id
25 | JOIN mock_items ON mock_items.id = COALESCE(semantic_search.id, bm25_search.id)
26 | ORDER BY score DESC, description
27 | LIMIT 5;
28 | ```
29 | 
30 | ## References
31 | 
32 | - [ParadeDB blog: A New Postgres Block Storage Layout for Full Text Search](https://www.paradedb.com/blog/block_storage_part_one)
33 | 


--------------------------------------------------------------------------------
/resources/postgres/paradedb/connect_psql.sh:
--------------------------------------------------------------------------------
1 | # Connect to the parade database using psql
2 | sudo docker exec -it paradedb psql -U postgres
3 | 


--------------------------------------------------------------------------------
/resources/postgres/paradedb/extract_all_stored_procedures.sh:
--------------------------------------------------------------------------------
 1 | # Extract all stored procedures
 2 | sudo docker exec paradedb psql -U postgres -d postgres -c "
 3 | SELECT
 4 |     n.nspname as schema_name,
 5 |     p.proname as procedure_name,
 6 |     pg_get_functiondef(p.oid) as definition
 7 | FROM pg_proc p
 8 | INNER JOIN pg_namespace n ON p.pronamespace = n.oid
 9 | WHERE n.nspname NOT IN ('pg_catalog', 'information_schema')
10 | AND p.prokind = 'f'  -- Only select normal functions (not aggregate or window functions)
11 | ORDER BY schema_name, procedure_name;" > ./procedures_paradedb.sql
12 | 


--------------------------------------------------------------------------------
/resources/postgres/paradedb/run_with_docker.sh:
--------------------------------------------------------------------------------
1 | # start a ParadeDB instance with default user postgres and password password
2 | sudo docker run --name paradedb -e POSTGRES_PASSWORD=password paradedb/paradedb
3 | 


--------------------------------------------------------------------------------
/resources/postgres/paradedb/values.yaml:
--------------------------------------------------------------------------------
 1 | type: paradedb
 2 | mode: standalone
 3 | 
 4 | cluster:
 5 |   instances: 2
 6 |   storage:
 7 |     size: 3Gi
 8 | 
 9 | backups:
10 |   scheduledBackups:
11 |     - name: daily-backup
12 |       schedule: "0 0 0 * * *" # Daily at midnight
13 |       backupOwnerReference: self
14 |   data:
15 |     compression: gzip


--------------------------------------------------------------------------------
/resources/postgres/psql/korean_dictionary_setup.sql:
--------------------------------------------------------------------------------
 1 | --
 2 | -- 한국어 hunspell 사전의 경우 아래 링크에서 다운가능:
 3 | -- <https://github.com/spellcheck-ko/hunspell-dict-ko>
 4 | --
 5 | -- 깃허브에서 설명을 참고해서 affix 파일과 dict 파일을 다운 및 cp 하면 됨.
 6 | --
 7 | -- 참고: <https://postgresql.kr/blog/hunspell_postgresql.html>
 8 | --
 9 | 
10 | -- korean_hunspell dictionary 생성
11 | CREATE TEXT SEARCH DICTIONARY korean_hunspell (
12 |     TEMPLATE = ispell,
13 |     DictFile = hunspell_korean,
14 |     AffFile = hunspell_korean
15 | );
16 | 
17 | -- 구문 분석 config 생성
18 | CREATE TEXT SEARCH CONFIGURATION korean_hunspell (copy=english);
19 | 
20 | ALTER TEXT SEARCH CONFIGURATION korean_hunspell alter mapping for word with korean_hunspell, simple;
21 | 
22 | -- 생성된 conifg 확인
23 | \dF+ korean_hunspell
24 | 
25 | -- 파이썬 unicodedata 모듈 기반의 자음/모음 분리 모듈 추가
26 | CREATE EXTENSION plpython3u;
27 | CREATE OR REPLACE FUNCTION public.to_nfd(s text)
28 |     RETURNS text
29 |     LANGUAGE plpython3u
30 |     IMMUTABLE PARALLEL SAFE
31 | AS $function$
32 | import unicodedata
33 | return unicodedata.normalize('NFD', s)
34 | $function$;
35 | 
36 | -- 테스트
37 | select * from ts_debug('korean_hunspell', '무궁화 꽃이 피었습니다');
38 | 


--------------------------------------------------------------------------------
/resources/postgres/psql/ts_config.sql:
--------------------------------------------------------------------------------
1 | -- Admin query to retrieve all available text search configurations
2 | \dF
3 | 
4 | -- Check all available text search configurations
5 | SELECT cfgname FROM pg_ts_config;
6 | 


--------------------------------------------------------------------------------
/resources/postgres/psql/vector_search.md:
--------------------------------------------------------------------------------
 1 | # Vector Search
 2 | 
 3 | ## pg_vector
 4 | 
 5 | Add pgvector v0.6.0:
 6 | 
 7 | ```bash
 8 | cd /tmp
 9 | git clone --branch v0.6.0 https://github.com/pgvector/pgvector.git
10 | cd pgvector
11 | make
12 | make install # may need sudo
13 | ```
14 | 
15 | Then, create extension for pg_vector:
16 | 
17 | ```sql
18 | CREATE EXTENSION vector;
19 | ```
20 | 
21 | ## pg_embeddings
22 | 
23 | Add pg_embeddings
24 | 
25 | ```bash
26 | cd /tmp
27 | git clone https://github.com/neondatabase/pg_embedding.git
28 | cd pg_embedding
29 | make
30 | make install
31 | ```
32 | 
33 | Then, create extension for pg_embedding:
34 | 
35 | ```sql
36 | CREATE EXTENSION embedding;
37 | ```
38 | 


--------------------------------------------------------------------------------
/resources/sample_codes/late-chunking/img/context-problem.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/resources/sample_codes/late-chunking/img/context-problem.png


--------------------------------------------------------------------------------
/resources/sample_codes/late-chunking/img/method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/resources/sample_codes/late-chunking/img/method.png


--------------------------------------------------------------------------------
/resources/sample_codes/late-chunking/img/rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/resources/sample_codes/late-chunking/img/rag.png


--------------------------------------------------------------------------------
/resources/sample_codes/late-chunking/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "late_chunking"
 3 | requires-python = "~=3.8"
 4 | dependencies = [
 5 |     "jupyterlab==4.2.5",
 6 |     "transformers==4.50.0",
 7 |     "torch==2.7.0",
 8 |     "mteb==1.14.20",
 9 |     "datasets==2.19.1",
10 |     "llama-index-embeddings-huggingface==0.3.1",
11 |     "llama-index==0.12.9",
12 |     "click==8.1.7",
13 |     "einops==0.6.1",
14 | ]
15 | version = "0.0.0"
16 | 
17 | [project.optional-dependencies]
18 | dev = [
19 |     "pytest~=7.3.2",
20 |     "black==23.3.0",
21 |     "isort==5.12.0",
22 |     "ruff==0.0.265",
23 | ]
24 | 
25 | [tool.setuptools.packages.find]
26 | include = ["chunked_pooling"]
27 | 


--------------------------------------------------------------------------------
/resources/sample_codes/late-chunking/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/resources/sample_codes/late-chunking/tests/__init__.py


--------------------------------------------------------------------------------
/resources/sample_codes/late-chunking/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from mteb.abstasks.TaskMetadata import TaskMetadata
 3 | 
 4 | from chunked_pooling.mteb_chunked_eval import AbsTaskChunkedRetrieval
 5 | 
 6 | 
 7 | class DummyTask(AbsTaskChunkedRetrieval):
 8 |     metadata = TaskMetadata(
 9 |         dataset={
10 |             'path': '~',
11 |             'revision': '',
12 |         },
13 |         name='dummy',
14 |         description='',
15 |         type='Retrieval',
16 |         category='s2p',
17 |         reference=None,
18 |         eval_splits=[],
19 |         eval_langs=[],
20 |         main_score='ndcg_at_10',
21 |         date=None,
22 |         form=None,
23 |         domains=None,
24 |         task_subtypes=None,
25 |         license=None,
26 |         socioeconomic_status=None,
27 |         annotations_creators=None,
28 |         dialect=None,
29 |         text_creation=None,
30 |         bibtex_citation=None,
31 |         n_samples=None,
32 |         avg_character_length=None,
33 |     )
34 | 
35 |     def load_data():
36 |         pass
37 | 
38 |     def __init__(self, **kwargs):
39 |         super().__init__(**kwargs)
40 | 
41 | 
42 | @pytest.fixture()
43 | def dummy_task_factory():
44 |     def _create_dummy_task(*args, **kwargs):
45 |         return DummyTask(*args, **kwargs)
46 | 
47 |     return _create_dummy_task
48 | 


--------------------------------------------------------------------------------
/resources/sample_codes/late-chunking/tests/test_v3.py:
--------------------------------------------------------------------------------
 1 | from transformers import AutoTokenizer
 2 | 
 3 | from run_chunked_eval import DEFAULT_CHUNK_SIZE, load_model
 4 | 
 5 | MODEL_NAME = 'jinaai/jina-embeddings-v3'
 6 | 
 7 | 
 8 | def test_instruction_handling(dummy_task_factory):
 9 |     model, has_instructions = load_model(MODEL_NAME)
10 |     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
11 |     task = dummy_task_factory(
12 |         chunking_strategy='fixed',
13 |         chunk_size=DEFAULT_CHUNK_SIZE,
14 |         tokenizer=tokenizer,
15 |         model_has_instructions=has_instructions,
16 |     )
17 |     n_instruction_tokens = len(
18 |         tokenizer(model.get_instructions()[1], add_special_tokens=False)['input_ids']
19 |     )
20 |     annotations_one_token = task._calculate_annotations(model, ['A'])[0]
21 |     assert len(annotations_one_token) == 1
22 |     assert annotations_one_token[0] == (0, n_instruction_tokens + 3)
23 | 


--------------------------------------------------------------------------------
/resources/sample_codes/vectordb/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable = line-too-long, trailing-whitespace, trailing-newlines, line-too-long, missing-module-docstring, import-error, too-few-public-methods, too-many-instance-attributes, too-many-locals
2 | 
3 | from .memory import Memory
4 | 


--------------------------------------------------------------------------------
/resources/sample_codes/web-crawler/README.md:
--------------------------------------------------------------------------------
1 | # Python Search Engine
2 | 
3 | ## About
4 | 
5 | This is a search engine crawler that starts with the wikipedia homepage to crawl and parse webpages and save their body text into raw textfiles.
6 | 
7 | The anacron file should go in /etc/cron.d and runs via anacron in case you are not running this on a dedicated server. 
8 | Eventually this should be moved to docker to make initialization easier.
9 | 


--------------------------------------------------------------------------------
/resources/sample_codes/web-crawler/anacron:
--------------------------------------------------------------------------------
1 | 30 7-23 * * *   root    [ -x /etc/init.d/anacron ] && if [ ! -d /run/systemd/system ]; then /usr/sbin/invoke-rc.d anacron start >/dev/null; fi
2 | 


--------------------------------------------------------------------------------
/resources/sample_codes/web-crawler/backupSearch Engine-20200705T185356Z-001.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NEOS-AI/Neosearch/144921c094eae48e3aaac078e6f726c342720b78/resources/sample_codes/web-crawler/backupSearch Engine-20200705T185356Z-001.zip


--------------------------------------------------------------------------------
/resources/sample_codes/web-crawler/readme.txt:
--------------------------------------------------------------------------------
 1 | Folder Structure:
 2 | 
 3 | -page_text-
 4 | visible text pulled from page for quick indexing
 5 | 
 6 | -parsed_html-
 7 | HTML pages that 
 8 | 
 9 | -query_index-
10 | json files of each query that has ever been searched
11 | 
12 | -to_parse-
13 | HTML pages that have been visited but have not had links pulled and page text isolated
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 
20 | Files:
21 | -to_visit-
22 | list of pages that need to be crawled
23 | 
24 | -links_visited-
25 | list of all pages ever crawled and when last visited
26 | 
27 | 
28 | 
29 | Features to add:
30 | 


--------------------------------------------------------------------------------
/resources/sample_codes/web-crawler/run.sh:
--------------------------------------------------------------------------------
1 | # totally unrelated, but why make another .sh for just one .py call?
2 | python3 main.py


--------------------------------------------------------------------------------
/resources/sample_codes/web-crawler/tempclean.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | new_fn_key = {}
 5 | 
 6 | with open('url_filename_key.json','r') as f: # preserves url relation to filenames
 7 | 	url_filename_key = json.load(f)
 8 | 
 9 | filenames = os.listdir('./parsed_html/')
10 | 
11 | domains = []
12 | 
13 | errcount = 0
14 | 
15 | for fn in filenames:
16 | 	try:
17 | 		url = url_filename_key[fn.replace('.txt','')]
18 | 	except:
19 | 		errcount += 1
20 | 		continue
21 | 	dom_name = url.split('/')[0]
22 | 	if dom_name not in domains:
23 | 		print('domain made')
24 | 		domains.append(dom_name)
25 | 		os.mkdir(f'parsed_html/{dom_name}/')
26 | 		os.mkdir(f'parsed_text/{dom_name}/')
27 | 		new_fn_key[dom_name] = {}
28 | 
29 | 	# check how many files exist in this domain
30 | 	page_index = len(os.listdir(f'./parsed_html/{dom_name}/'))
31 | 
32 | 	# add filename to key
33 | 	new_fn_key[dom_name][page_index] = url
34 | 	# move file to domain folder
35 | 	os.rename(f'./parsed_html/{fn}',f'./parsed_html/{dom_name}/{page_index}')
36 | 	os.rename(f'./parsed_text/{fn}',f'./parsed_text/{dom_name}/{page_index}')
37 | 
38 | 
39 | with open('new_url_to_fn.json','w') as f:
40 | 	json.dump(new_fn_key,f)
41 | 
42 | 
43 | print('old: ',len(url_filename_key))
44 | print('new: ',len(new_fn_key))
45 | 
46 | print()
47 | print('errors: ',errcount)


--------------------------------------------------------------------------------
/resources/stopwords/Bishnupriya_Manipuri.txt:
--------------------------------------------------------------------------------
 1 | বারো
 2 | উপাত্ত.
 3 | হারহান
 4 | সাক্ষরতার
 5 | মা
 6 | অতার
 7 | মানু
 8 | জনসংখ্যার
 9 | ইউনিয়ন
10 | হান
11 | ইলাতাই
12 | বর্গ
13 | মারির
14 | জনসংখ্যা
15 | মানুলেহা
16 | (লোক
17 | গননা)
18 | গ।
19 | ভৌগলিক
20 | পৌরসভা
21 | মুনি
22 | ইলতাই
23 | জেলা/বেয়াপা
24 | ভারতর
25 | হারি
26 | এহার
27 | অনুসারে
28 | এগত
29 | এহানর
30 | রাজ্যর
31 | আহান।
32 | বাংলাদেশর
33 | এহান
34 | জিলার
35 | ব্রাজিলর
36 | শহর
37 | এরে
38 | আগ।
39 | আসে।
40 | মাপাহানর
41 | থাইতারা।
42 | ২০০০
43 | দ্রাঘিমাংশ
44 | অক্ষাংশ
45 | আসি।
46 | ইউনিয়নর
47 | বা
48 | ইউনিট
49 | ঘরর
50 | কিলোমিটারে
51 | পানিহান
52 | মাইলে
53 | বসর
54 | পরিসি।
55 | গড়
56 | 


--------------------------------------------------------------------------------
/resources/stopwords/Cebuano.txt:
--------------------------------------------------------------------------------
 1 | sa
 2 | ka
 3 | Ang
 4 | ug
 5 | usa
 6 | may
 7 | Kini
 8 | rehiyon
 9 | departamento
10 | Pransiya.
11 | 


--------------------------------------------------------------------------------
/resources/stopwords/Haitian.txt:
--------------------------------------------------------------------------------
 1 | nan
 2 | yon
 3 | se
 4 | ayisyen
 5 | vil
 6 | lang
 7 | ki
 8 | lane
 9 | Etazini.
10 | Kiba
11 | pou
12 | ak
13 | peyi
14 | eta
15 | moun
16 | Li
17 | gen
18 | an
19 | Popilasyon
20 | pwovens
21 | menm
22 | kò
23 | Ayiti.
24 | rive
25 | li
26 | pale
27 | Kiba.
28 | l.
29 | dezyèm
30 | Kreyòl
31 | radyo
32 | Relasyon
33 | Kiba,
34 | 


--------------------------------------------------------------------------------
/resources/stopwords/Ido.txt:
--------------------------------------------------------------------------------
 1 | la
 2 | esas
 3 | di
 4 | e
 5 | de
 6 | La
 7 | en
 8 | o
 9 | mezala
10 | qua
11 | havas
12 | urbo
13 | sub
14 | qui
15 | revenuo
16 | evo
17 | til
18 | por
19 | familii
20 | populo
21 | habitas
22 | Segun
23 | evas
24 | yari
25 | tota
26 | plu
27 | km²
28 | Esas
29 | homuli.
30 | grandeso
31 | hemanari
32 | Po
33 | homini
34 | povreso-lineo.
35 | plus
36 | kun
37 | evoza.
38 | areo
39 | aquo.
40 | esis
41 | mi²)
42 | ye
43 | lojanti
44 | du
45 | altra
46 | kontado
47 | 


--------------------------------------------------------------------------------
/resources/stopwords/Japanese.txt:
--------------------------------------------------------------------------------
 1 | これ
 2 | それ
 3 | あれ
 4 | この
 5 | その
 6 | あの
 7 | ここ
 8 | そこ
 9 | あそこ
10 | こちら
11 | どこ
12 | だれ
13 | なに
14 | なん
15 | 何
16 | 私
17 | 貴方
18 | 貴方方
19 | 我々
20 | 私達
21 | あの人
22 | あのかた
23 | 彼女
24 | 彼
25 | です
26 | あります
27 | おります
28 | います
29 | は
30 | が
31 | の
32 | に
33 | を
34 | で
35 | え
36 | から
37 | まで
38 | より
39 | も
40 | どの
41 | と
42 | し
43 | それで
44 | しかし
45 | 


--------------------------------------------------------------------------------
/resources/stopwords/Lombard.txt:
--------------------------------------------------------------------------------
  1 | de
  2 | la
  3 | del
  4 | e
  5 | l'è
  6 | a
  7 | in
  8 | El
  9 | al
 10 | 'na
 11 | cümü
 12 | el
 13 | i
 14 | che
 15 | cumün
 16 | da
 17 | di
 18 | una
 19 | 'n
 20 | La
 21 | en
 22 | km²
 23 | se
 24 | un
 25 | g'ha
 26 | 'l
 27 | per
 28 | densità
 29 | regiù
 30 | coi
 31 | cunfìna
 32 | ab./km².
 33 | presapóch
 34 | superfìce
 35 | italià,
 36 | Pruvìncia
 37 | abitàncc,
 38 | San
 39 | sura
 40 | ai
 41 | sò
 42 | süperfiss
 43 | è
 44 | Al
 45 | Canton
 46 | abitant.
 47 | tröva
 48 | part
 49 | tacaa
 50 | dal
 51 | anca
 52 | le
 53 | meter
 54 | (en
 55 | livel
 56 | altitüden
 57 | svizzer
 58 | an
 59 | l'ha
 60 | I
 61 | gh'eva
 62 | o
 63 | In
 64 | distret
 65 | gh'ha
 66 | mar
 67 | ol
 68 | teritori
 69 | staa
 70 | popolazion
 71 | fà
 72 | cità
 73 | on
 74 | mar.
 75 | Un
 76 | l'era
 77 | A
 78 | km²,
 79 | con
 80 | tróa
 81 | l’è
 82 | stat
 83 | ma
 84 | gh'è
 85 | cun
 86 | abitant
 87 | ul
 88 | prima
 89 | dei
 90 | par
 91 | cunt
 92 | col
 93 | L'è
 94 | piö
 95 | hinn
 96 | méter
 97 | popolasiù
 98 | distrèt
 99 | püssee
100 | l'
101 | nom
102 | pressapoch
103 | comun
104 | teretóre
105 | Pruvincia
106 | lengua
107 | gh'ìa
108 | sü
109 | é
110 | süperfìce
111 | ü
112 | sa
113 | paes
114 | ona
115 | ghe
116 | minga
117 | mia
118 | sö
119 | Piemónt,
120 | piemontés:
121 | Baden-Württemberg,
122 | magioranza
123 | abitàncc.
124 | leèl
125 | dì
126 | ("Regierungsbezirk")
127 | rüràl
128 | 


--------------------------------------------------------------------------------
/resources/stopwords/Newar.txt:
--------------------------------------------------------------------------------
 1 | थ्व
 2 | व
 3 | दु।
 4 | थाय्‌
 5 | ख।
 6 | खः।
 7 | खने
 8 | नं
 9 | भारतया
10 | कथं
11 | छ्येलिगु
12 | निसें
13 | भाषा
14 | थासय्
15 | राज्यया
16 | छगू
17 | छगु
18 | तक्क
19 | थन
20 | धुंका
21 | थाय्‌या
22 | उत्तराखण्ड
23 | दूगु
24 | दु
25 | यक्व
26 | सन्
27 | कुमाँउ
28 | छ्येलेज्या
29 | पहाडी
30 | गां
31 | प्रभाव
32 | भाषाया
33 | लिपा
34 | नोभेम्बर
35 | या।
36 | मू
37 | जुइ।
38 | ला।
39 | भाय्
40 | थाय्
41 | संकिपा
42 | मण्डलया
43 | संस्कृत
44 | जुगु
45 | हिन्दू
46 | आदि
47 | कथलं
48 | जुल।
49 | नेपाःया
50 | रुपय्
51 | खँग्वयागु
52 | ब्रिटिसतेसं
53 | ॠतु
54 | भाषे
55 | गढवाली
56 | वर्ग
57 | धर्मया
58 | मध्य
59 | थःगु
60 | नापं
61 | जनपदया
62 | पलिस्था
63 | थासय्‌
64 | वर्णन
65 | अंग्रेजी,
66 | भाषा.
67 | मण्डलवार
68 | जुन
69 | परिवारया
70 | ज्यानुवरी
71 | उत्तर
72 | लावन।
73 | तःधंगु
74 | संस्कृतय्
75 | मेमेगु
76 | भारोपेली
77 | थासे
78 | छत्तीसगढ
79 | भूगोल.
80 | धर्म
81 | थाय्‌यात
82 | the
83 | इतिहास.
84 | नाप
85 | हिन्दी,
86 | स्वापू
87 | जूगुलिं
88 | ग्रन्थय्
89 | गढवाल
90 | 


--------------------------------------------------------------------------------
/resources/stopwords/Piedmontese.txt:
--------------------------------------------------------------------------------
 1 | a
 2 | ëd
 3 | l'é
 4 | na
 5 | an
 6 | e
 7 | con
 8 | la
 9 | dël
10 | ël
11 | A
12 | ant
13 | për
14 | dla
15 | comun-a
16 | La
17 | densità
18 | lenga
19 | che
20 | region
21 | Ël
22 | un
23 | da
24 | fa
25 | abitant,
26 | part
27 | o
28 | le
29 | al
30 | censiment
31 | l'ha
32 | km²,
33 | lenghe
34 | së
35 | ab/km².
36 | Lenga
37 | Pais
38 | stend
39 | parlà
40 | parlà.
41 | surfassa
42 | dle
43 | l’é
44 | provincia
45 | dova
46 | fransèisa
47 | dipartiment
48 | scond
49 | ch'a
50 | popolassion
51 | ij
52 | dij
53 | aministrativa
54 | pì
55 | abitant.
56 | confin-a
57 | dzortut
58 | l'era
59 | comun
60 | Aministrassion.
61 | sirca
62 | son
63 | San
64 | sìndich
65 | specialment
66 | n'aira
67 | abitant
68 | as
69 | 'd
70 | fin
71 | sò
72 | 


--------------------------------------------------------------------------------
/resources/stopwords/Volapuk.txt:
--------------------------------------------------------------------------------
 1 | in
 2 | e
 3 | mö
 4 | binon
 5 | a
 6 | u
 7 | äbinon
 8 | bäldoti
 9 | lifayelas
10 | plu
11 | bäldotü
12 | mens
13 | km².
14 | zif
15 | patedik
16 | topon
17 | videtü
18 | lunetü
19 | ela
20 | lifayels
21 | N
22 | äbinons
23 | jü
24 | Sürfat
25 | pösods
26 | läs
27 | labon
28 | L.
29 | belödanis
30 | me
31 | yels
32 | Ädabinons
33 | äbinädons
34 | lomanefs
35 | Demü
36 | voms
37 | mans
38 | ädabinoms
39 | Lemesed
40 | bidädas
41 | topäd:
42 | utanas
43 | ziläk:
44 | Fransän.
45 | tat:
46 | no
47 | Nüns
48 | taledavik.
49 | km²
50 | Lamerikän.
51 | Lödanef.
52 | nen
53 | valodik
54 | Lödanadensit
55 | lödanefa
56 | komot:
57 | el
58 | fa
59 | De
60 | bal
61 | älödons
62 | yela:
63 | Timü
64 | Ma
65 | pöpinumam
66 | sürfati
67 | Census
68 | älabons
69 | Lamerikänik),
70 | (Pöpinumamabür
71 | "U.S.
72 | Bureau"
73 | äbinädon
74 | pösod
75 | Lödanef
76 | 


--------------------------------------------------------------------------------
/resources/stopwords/Waray_Waray.txt:
--------------------------------------------------------------------------------
 1 | han
 2 | ha
 3 | An
 4 | amo
 5 | usa
 6 | ka
 7 | in
 8 | nasod
 9 | ngan
10 | rehiyon
11 | nga
12 | 


--------------------------------------------------------------------------------
/resources/system_prompts_leaks/chatgpt-4o-latest-injection:
--------------------------------------------------------------------------------
1 | Knowledge cutoff: 2024-06
2 | 


--------------------------------------------------------------------------------
/resources/system_prompts_leaks/chatgpt-automation-tool.md:
--------------------------------------------------------------------------------
 1 | ````
 2 | You are running in the context of an automation job. Automation jobs run asynchronously on a schedule.
 3 | 
 4 | This is automation turn number 1. The current date and time is Wednesday, 2025-05-07 05:43:22 +0000
 5 | 
 6 | Adhere to these important guidelines when answering:
 7 | 
 8 | - Do not repeat previous assistant replies unless explicitly instructed to do so.
 9 | - This is a non-interactive mode. Do not ask follow-up questions or solicit information from the user.
10 | - You can see previous runs of the automation. Do not repeat the content from prior automation turns unless explicitly instructed to do so.
11 | - If the instructions are to "Remind me ..." or "Tell me ..." then simply say the reminder.
12 | - Continue to run tools like web, dall-e, or python even if there are previous failures in the conversation.
13 | 
14 | Current automation state:
15 | 
16 | Title: Put content in markdown code block
17 | Schedule: BEGIN:VEVENT
18 | DTSTART:20250507T054324Z
19 | END:VEVENT
20 | Timezone: {{Region}}/{{City}}
21 | Notifications enabled: False
22 | Email enabled: False
23 | ````
24 | 


--------------------------------------------------------------------------------
/searxng/limiter.toml:
--------------------------------------------------------------------------------
1 | #https://docs.searxng.org/admin/searx.limiter.html
2 | 
3 | [botdetection.ip_limit]
4 | # activate link_token method in the ip_limit method
5 | link_token = true


--------------------------------------------------------------------------------
/searxng/uwsgi.ini:
--------------------------------------------------------------------------------
 1 | [uwsgi]
 2 | # Who will run the code
 3 | uid = searxng
 4 | gid = searxng
 5 | 
 6 | # Number of workers (usually CPU count)
 7 | # default value: %k (= number of CPU core, see Dockerfile)
 8 | workers = %k
 9 | 
10 | # Number of threads per worker
11 | # default value: 4 (see Dockerfile)
12 | threads = 4
13 | 
14 | # The right granted on the created socket
15 | chmod-socket = 666
16 | 
17 | # Plugin to use and interpreter config
18 | single-interpreter = true
19 | master = true
20 | plugin = python3
21 | lazy-apps = true
22 | enable-threads = 4
23 | 
24 | # Module to import
25 | module = searx.webapp
26 | 
27 | # Virtualenv and python path
28 | pythonpath = /usr/local/searxng/
29 | chdir = /usr/local/searxng/searx/
30 | 
31 | # automatically set processes name to something meaningful
32 | auto-procname = true
33 | 
34 | # Disable request logging for privacy
35 | disable-logging = true
36 | log-5xx = true
37 | 
38 | # Set the max size of a request (request-body excluded)
39 | buffer-size = 8192
40 | 
41 | # No keep alive
42 | # See https://github.com/searx/searx-docker/issues/24
43 | add-header = Connection: close
44 | 
45 | # uwsgi serves the static files
46 | static-map = /static=/usr/local/searxng/searx/static
47 | # expires set to one day
48 | static-expires = /* 86400
49 | static-gzip-all = True
50 | offload-threads = 4


--------------------------------------------------------------------------------