├── .github
    └── workflows
    │   ├── comment-action.yml
    │   ├── deploy-githubpages.yml
    │   ├── github-release.yml
    │   ├── python-publish.yml
    │   ├── python-typecheck.yml
    │   └── python-unittest.yml
├── .gitignore
├── .python-version
├── CLAUDE.md
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── build_scripts
    └── export_docstrings.py
├── config
    ├── .gitkeep
    ├── README.md
    ├── agent_config.yaml
    ├── cloud_run_urls.json
    ├── llm_config.yaml
    ├── loader_config.yaml
    ├── model_lookup.yaml
    ├── platform_config.yaml
    ├── prompt_config.yaml
    └── users_config.yaml
├── docs
    ├── .env.example
    ├── .gitignore
    ├── README.md
    ├── babel.config.js
    ├── blog
    │   ├── 2024-10-09-cognitive-design.md
    │   ├── 2024-10-15-dynamic-output-with-mdx.mdx
    │   ├── 2024-10-22-subconscious-genai.md
    │   ├── authors.yml
    │   └── img
    │   │   ├── agent-e-autogen-setup.png
    │   │   ├── agentic_patterns.png
    │   │   ├── alteria-cd.jpg
    │   │   ├── baby-agi.png
    │   │   ├── bat-pie.png
    │   │   ├── bertha-cog-design.png
    │   │   ├── bertha-cog-design2.png
    │   │   ├── bertha.webp
    │   │   ├── chase-sql.png
    │   │   ├── cognitive-design.webp
    │   │   ├── dynamic-ui-banner.png
    │   │   ├── langfuse-demo.png
    │   │   ├── mdx-prompt-langfuse.png
    │   │   └── subconscious.png
    ├── docs
    │   ├── VACs
    │   │   ├── eduvac-demo.png
    │   │   ├── eduvac.md
    │   │   ├── eduvac_small.png
    │   │   ├── index.mdx
    │   │   ├── pirate_talk.md
    │   │   ├── vac-pirate-speak.png
    │   │   └── vertex-llamaindex.md
    │   ├── cli.md
    │   ├── config.md
    │   ├── databases
    │   │   ├── alloydb.md
    │   │   ├── discovery_engine.md
    │   │   ├── index.md
    │   │   ├── lancedb.md
    │   │   ├── postgres.md
    │   │   └── supabase.md
    │   ├── howto
    │   │   ├── creating_a_vac.md
    │   │   ├── embedding.md
    │   │   ├── flask_app.md
    │   │   ├── grounded_vertex.md
    │   │   ├── img
    │   │   │   └── livekit-question.png
    │   │   ├── index.md
    │   │   ├── parallel.md
    │   │   ├── streaming.md
    │   │   └── talk_to_alloydb.md
    │   ├── img
    │   │   ├── browser_tool.gif
    │   │   ├── browser_tool_files.png
    │   │   ├── config-list.gif
    │   │   ├── embed-edmonbrain.gif
    │   │   ├── install.gif
    │   │   ├── jan-config.png
    │   │   ├── jan-gemini-openai.png
    │   │   ├── jan-image.png
    │   │   ├── langfuse_eval_pubsub.png
    │   │   ├── sunholo-vac-chat-headless.gif
    │   │   ├── sunholo-vac-chat-with-files.gif
    │   │   ├── sunholo-vac-chat.gif
    │   │   ├── vac-dag.png
    │   │   └── vac-overview.png
    │   ├── index.md
    │   ├── integrations
    │   │   ├── azure.md
    │   │   ├── cloudlogging.md
    │   │   ├── genai.md
    │   │   ├── index.md
    │   │   ├── jan.md
    │   │   ├── langchain.md
    │   │   ├── langfuse.md
    │   │   ├── llamaindex.md
    │   │   ├── ollama.md
    │   │   ├── playwright.md
    │   │   └── vertexai.md
    │   ├── multivac
    │   │   └── index.md
    │   └── ui
    │   │   ├── api.md
    │   │   ├── chainlit.md
    │   │   ├── cli.md
    │   │   ├── discord.md
    │   │   ├── gchat.md
    │   │   ├── index.md
    │   │   ├── jan.md
    │   │   ├── livekit.md
    │   │   ├── openai.md
    │   │   └── slack.md
    ├── docusaurus.config.js
    ├── package-lock.json
    ├── package.json
    ├── sidebars.js
    ├── src
    │   ├── components
    │   │   ├── HomepageFeatures
    │   │   │   ├── index.js
    │   │   │   └── styles.module.css
    │   │   ├── audio.js
    │   │   ├── mdxComponents.js
    │   │   ├── multivacChat.js
    │   │   └── reactFlow.js
    │   ├── css
    │   │   └── custom.css
    │   ├── pages
    │   │   ├── index.js
    │   │   ├── index.module.css
    │   │   └── markdown-page.md
    │   ├── plugins
    │   │   └── proxy.js
    │   └── theme
    │   │   ├── Footer.js
    │   │   ├── Header.js
    │   │   ├── SitemarkIcon.js
    │   │   └── getLPTheme.js
    ├── static
    │   ├── .nojekyll
    │   ├── CNAME
    │   └── img
    │   │   ├── eclipse1.png
    │   │   ├── favicon.ico
    │   │   ├── logo-warm-colors-final.png
    │   │   ├── multivac-arch.png
    │   │   ├── multivac-demo.png
    │   │   ├── multivac-deployments.png
    │   │   ├── multivac-venn.png
    │   │   ├── undraw_docusaurus_mountain.svg
    │   │   ├── undraw_docusaurus_react.svg
    │   │   └── undraw_docusaurus_tree.svg
    ├── tapes
    │   ├── config-list.tape
    │   ├── embed-demo.tape
    │   ├── install.tape
    │   ├── sunholo-vac-chat-headless.tape
    │   ├── sunholo-vac-chat-with-files.tape
    │   └── sunholo-vac-chat.tape
    └── yarn.lock
├── ellipsis.yaml
├── pyproject.toml
├── src
    └── sunholo
    │   ├── __init__.py
    │   ├── agents
    │       ├── __init__.py
    │       ├── chat_history.py
    │       ├── dispatch_to_qa.py
    │       ├── fastapi
    │       │   ├── __init__.py
    │       │   ├── base.py
    │       │   └── qna_routes.py
    │       ├── flask
    │       │   ├── __init__.py
    │       │   ├── base.py
    │       │   └── vac_routes.py
    │       ├── langserve.py
    │       ├── pubsub.py
    │       ├── route.py
    │       ├── special_commands.py
    │       └── swagger.py
    │   ├── archive
    │       ├── __init__.py
    │       └── archive.py
    │   ├── auth
    │       ├── __init__.py
    │       ├── gcloud.py
    │       ├── refresh.py
    │       └── run.py
    │   ├── azure
    │       ├── __init__.py
    │       ├── auth.py
    │       ├── blobs.py
    │       └── event_grid.py
    │   ├── bots
    │       ├── __init__.py
    │       ├── discord.py
    │       ├── github_webhook.py
    │       └── webapp.py
    │   ├── chunker
    │       ├── README.md
    │       ├── __init__.py
    │       ├── azure.py
    │       ├── doc_handling.py
    │       ├── encode_metadata.py
    │       ├── images.py
    │       ├── loaders.py
    │       ├── message_data.py
    │       ├── pdfs.py
    │       ├── process_chunker_data.py
    │       ├── publish.py
    │       ├── pubsub.py
    │       └── splitter.py
    │   ├── cli
    │       ├── __init__.py
    │       ├── chat_vac.py
    │       ├── cli.py
    │       ├── cli_init.py
    │       ├── configs.py
    │       ├── deploy.py
    │       ├── embedder.py
    │       ├── merge_texts.py
    │       ├── run_proxy.py
    │       ├── sun_rich.py
    │       ├── swagger.py
    │       └── vertex.py
    │   ├── components
    │       ├── __init__.py
    │       ├── llm.py
    │       ├── retriever.py
    │       └── vectorstore.py
    │   ├── custom_logging.py
    │   ├── database
    │       ├── __init__.py
    │       ├── alloydb.py
    │       ├── alloydb_client.py
    │       ├── database.py
    │       ├── lancedb.py
    │       ├── sql
    │       │   └── sb
    │       │   │   ├── create_function.sql
    │       │   │   ├── create_function_time.sql
    │       │   │   ├── create_table.sql
    │       │   │   ├── delete_source_row.sql
    │       │   │   ├── return_sources.sql
    │       │   │   └── setup.sql
    │       ├── static_dbs.py
    │       └── uuid.py
    │   ├── discovery_engine
    │       ├── __init__.py
    │       ├── chunker_handler.py
    │       ├── cli.py
    │       ├── create_new.py
    │       ├── discovery_engine_client.py
    │       ├── get_ai_search_chunks.py
    │       └── search_filter_syntax.txt
    │   ├── embedder
    │       ├── README.md
    │       ├── __init__.py
    │       ├── embed_chunk.py
    │       └── embed_metadata.py
    │   ├── excel
    │       ├── __init__.py
    │       ├── call_vac.vba.template
    │       └── plugin.py
    │   ├── gcs
    │       ├── __init__.py
    │       ├── add_file.py
    │       ├── download_folder.py
    │       ├── download_gcs_text.py
    │       ├── download_url.py
    │       ├── extract_and_sign.py
    │       └── metadata.py
    │   ├── genai
    │       ├── __init__.py
    │       ├── file_handling.py
    │       ├── genaiv2.py
    │       ├── images.py
    │       ├── init.py
    │       ├── process_funcs_cls.py
    │       └── safety.py
    │   ├── invoke
    │       ├── __init__.py
    │       ├── async_class.py
    │       ├── direct_vac_func.py
    │       └── invoke_vac_utils.py
    │   ├── langchain_types.py
    │   ├── langfuse
    │       ├── __init__.py
    │       ├── callback.py
    │       ├── evals.py
    │       └── prompts.py
    │   ├── llamaindex
    │       ├── __init__.py
    │       ├── get_files.py
    │       ├── import_files.py
    │       ├── llamaindex_class.py
    │       └── user_history.py
    │   ├── lookup
    │       ├── __init__.py
    │       └── model_lookup.yaml
    │   ├── mcp
    │       ├── __init__.py
    │       └── cli.py
    │   ├── ollama
    │       ├── __init__.py
    │       └── ollama_images.py
    │   ├── pubsub
    │       ├── __init__.py
    │       ├── process_pubsub.py
    │       └── pubsub_manager.py
    │   ├── qna
    │       ├── __init__.py
    │       ├── parsers.py
    │       └── retry.py
    │   ├── senses
    │       ├── README.md
    │       ├── __init__.py
    │       └── stream_voice.py
    │   ├── streaming
    │       ├── __init__.py
    │       ├── content_buffer.py
    │       ├── langserve.py
    │       ├── stream_lookup.py
    │       └── streaming.py
    │   ├── summarise
    │       ├── __init__.py
    │       └── summarise.py
    │   ├── templates
    │       ├── agent
    │       │   ├── README.md
    │       │   ├── __init__.py
    │       │   ├── agent_service.py
    │       │   ├── app.py
    │       │   ├── cloudbuild.yaml
    │       │   ├── config
    │       │   │   └── vac_config.yaml
    │       │   ├── my_log.py
    │       │   ├── requirements.txt
    │       │   ├── tools
    │       │   │   ├── __init__.py
    │       │   │   └── your_agent.py
    │       │   └── vac_service.py
    │       ├── project
    │       │   ├── README.md
    │       │   ├── __init__.py
    │       │   ├── app.py
    │       │   ├── cloudbuild.yaml
    │       │   ├── config
    │       │   │   └── vac_config.yaml
    │       │   ├── my_log.py
    │       │   ├── requirements.txt
    │       │   └── vac_service.py
    │       └── system_services
    │       │   ├── README.md
    │       │   ├── __init__.py
    │       │   ├── app.py
    │       │   ├── cloudbuild.yaml
    │       │   ├── my_log.py
    │       │   └── requirements.txt
    │   ├── terraform
    │       ├── __init__.py
    │       └── tfvars_editor.py
    │   ├── tools
    │       ├── __init__.py
    │       └── web_browser.py
    │   ├── utils
    │       ├── .gitkeep
    │       ├── __init__.py
    │       ├── api_key.py
    │       ├── big_context.py
    │       ├── config.py
    │       ├── config_class.py
    │       ├── config_schema.py
    │       ├── gcp.py
    │       ├── gcp_project.py
    │       ├── mime.py
    │       ├── parsers.py
    │       ├── proto_convert.py
    │       ├── timedelta.py
    │       ├── user_ids.py
    │       └── version.py
    │   └── vertex
    │       ├── __init__.py
    │       ├── extension_yaml
    │           ├── code_interpreter.yaml
    │           ├── hello.yaml
    │           └── vertex_ai_search.yaml
    │       ├── extensions_call.py
    │       ├── extensions_class.py
    │       ├── genai_functions.py
    │       ├── init.py
    │       ├── memory_tools.py
    │       ├── safety.py
    │       └── type_dict_to_json.py
└── tests
    ├── _test_datastore.py
    ├── test_async.py
    ├── test_async_genai2.py
    ├── test_chat_history.py
    ├── test_config.py
    ├── test_genai2.py
    └── test_unstructured.py


/.github/workflows/comment-action.yml:
--------------------------------------------------------------------------------
 1 | name: Comment Action Workflow
 2 | 
 3 | on:
 4 |   pull_request_review_comment:
 5 |     types: [created]
 6 | 
 7 | jobs:
 8 |   process-comment:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |     - name: Checkout repository
12 |       uses: actions/checkout@v2
13 | 
14 |     - name: Extract Comment and File Context
15 |       id: extract
16 |       run: |
17 |         # Extract PR number and comment
18 |         PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH")
19 |         COMMENT=$(jq --raw-output .comment.body "$GITHUB_EVENT_PATH")
20 | 
21 |         # Extract the file name associated with the comment
22 |         FILE_NAME=$(jq --raw-output .comment.path "$GITHUB_EVENT_PATH")
23 | 
24 |         # Set outputs
25 |         echo "::set-output name=pr_number::$PR_NUMBER"
26 |         echo "::set-output name=comment::$COMMENT"
27 |         echo "::set-output name=file_name::$FILE_NAME"
28 | 
29 |       env:
30 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
31 |         GITHUB_EVENT_PATH: ${{ github.event_path }}
32 | 
33 |     - name: Call External API
34 |       run: |
35 |         curl -X POST -H "Content-Type: application/json" \
36 |              -H "Authorization: Bearer ${{ secrets.EXTERNAL_API_KEY }}" \
37 |              -d '{"pr_number": "${{ steps.extract.outputs.pr_number }}", "comment": "${{ steps.extract.outputs.comment }}", "file_name": "${{ steps.extract.outputs.file_name }}"}' \
38 |              https://yourapi.example.com/endpoint
39 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-githubpages.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy to GitHub Pages
 2 | # https://jaimestill.github.io/docusaur-gh/config
 3 | defaults:
 4 |   run:
 5 |     working-directory: docs
 6 | 
 7 | on:
 8 |   push:
 9 |     branches:
10 |       - main
11 |     paths:
12 |     - 'docs/**'
13 |     - '.github/workflows/deploy-githubpages.yml'
14 |     - 'sunholo/**'
15 |     - 'setup.py'
16 |     - 'build_scripts/export_docstrings.py'
17 | 
18 |   # Allow manual trigger 
19 |   workflow_dispatch:   
20 | 
21 | jobs:
22 |   build:
23 |     name: Build Docusaurus
24 |     runs-on: ubuntu-latest
25 |     steps:
26 |       - uses: actions/checkout@v4
27 |         with:
28 |           fetch-depth: 0
29 |           
30 |       - uses: actions/setup-node@v4
31 |         with:
32 |           node-version: 18
33 |           cache: yarn
34 |           cache-dependency-path: docs/yarn.lock
35 | 
36 |       - name: Install dependencies
37 |         run: yarn install
38 | 
39 |       - name: Set up Python
40 |         uses: actions/setup-python@v3
41 |         with:
42 |           python-version: '3.x'
43 | 
44 |       - name: Export Docstrings
45 |         run: pip install --upgrade pip && pip install setuptools && pip install -e .[all] && python build_scripts/export_docstrings.py
46 |         working-directory: ${{ github.workspace }}
47 | 
48 |       # Expose environment variable for the API key (via GitHub Secrets)
49 |       - name: Set environment variable
50 |         run: echo "REACT_APP_MULTIVAC_API_KEY=${{ secrets.MULTIVAC_API_KEY }}" >> $GITHUB_ENV
51 | 
52 | 
53 |       - name: Build website
54 |         run: yarn build
55 | 
56 |       - name: Deploy to GitHub Pages
57 |         uses: peaceiris/actions-gh-pages@v3
58 |         with:
59 |           github_token: ${{ secrets.GITHUB_TOKEN }}
60 |           publish_dir: docs/build
61 |           user_name: github-actions[bot]
62 |           user_email: 41898282+github-actions[bot]@users.noreply.github.com


--------------------------------------------------------------------------------
/.github/workflows/github-release.yml:
--------------------------------------------------------------------------------
 1 | name: Create GitHub Release on Tag Push
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*'  # Listen for any tag starting with 'v'
 7 | 
 8 | jobs:
 9 |   create_release:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - name: Checkout repository
14 |         uses: actions/checkout@v3
15 | 
16 |       - name: Create GitHub Release
17 |         id: create_release
18 |         uses: actions/create-release@v1
19 |         with:
20 |           tag_name: ${{ github.ref }}  # The tag that triggered the workflow
21 |           release_name: Release ${{ github.ref }}
22 |           body: |
23 |             Automated release for tag ${{ github.ref }}.
24 |           draft: false
25 |           prerelease: false
26 |         env:
27 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 |   push:
15 |     tags:
16 |       - 'v*'  # Listen for any tag starting with 'v'
17 | 
18 | permissions:
19 |   contents: write
20 | 
21 | jobs:
22 |   deploy:
23 | 
24 |     runs-on: ubuntu-latest
25 | 
26 |     steps:
27 |     - uses: actions/checkout@v3
28 |     - name: Set up Python
29 |       uses: actions/setup-python@v3
30 |       with:
31 |         python-version: '3.x'
32 |     - name: Install dependencies
33 |       run: |
34 |         python -m pip install --upgrade pip
35 |         pip install build pytest
36 |     - name: Build package
37 |       run: python -m build
38 |     - name: Test minimal install
39 |       run: |
40 |         python -m venv venv
41 |         source venv/bin/activate
42 |         pip install dist/sunholo*.whl  # Install the built package
43 |         python -c "import sunholo"  # Test that the package is importable
44 |         deactivate
45 |     - name: Run unit tests
46 |       run: |
47 |         pip install .[test]  # Install the current package
48 |         pytest tests
49 |     - name: Publish package
50 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
51 |       with:
52 |         user: __token__
53 |         password: ${{ secrets.PYPI_API_TOKEN }}
54 | 


--------------------------------------------------------------------------------
/.github/workflows/python-typecheck.yml:
--------------------------------------------------------------------------------
 1 | name: Type Check Python Package
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - 'sunholo/**'
 7 |       - 'setup.py'
 8 |       - 'tests/**'
 9 |   pull_request:
10 |     paths:
11 |       - 'sunholo/**'
12 |       - 'setup.py'
13 |       - 'tests/**'
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   type-check:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@v3
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v3
27 |       with:
28 |         python-version: '3.x'
29 |     - name: Install dependencies
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         pip install mypy
33 |     - name: Install package
34 |       run: pip install .
35 |     - name: Run mypy type checking
36 |       continue-on-error: true  # This allows the job to continue even if mypy finds type errors
37 |       run: |
38 |         mypy sunholo --show-error-codes
39 |     - name: Archive mypy results
40 |       if: failure()
41 |       run: |
42 |         mkdir -p mypy-results
43 |         mypy sunholo --show-error-codes > mypy-results/results.txt
44 |     - name: Upload mypy results
45 |       uses: actions/upload-artifact@v3
46 |       with:
47 |         name: mypy-results
48 |         path: mypy-results/results.txt


--------------------------------------------------------------------------------
/.github/workflows/python-unittest.yml:
--------------------------------------------------------------------------------
 1 | name: Test Python Package
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - 'sunholo/**'
 7 |       - 'setup.py'
 8 |       - 'tests/**'
 9 |   pull_request:
10 |     paths:
11 |       - 'sunholo/**'
12 |       - 'setup.py'
13 |       - 'tests/**'
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   test:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@v3
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v3
27 |       with:
28 |         python-version: '3.x'
29 |     - name: Install dependencies
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         pip install pytest
33 |     - name: Install package
34 |       run: pip install .
35 |     - name: Run unit tests
36 |       run: pytest tests


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.10
2 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include sunholo/database/sql/sb/*.sql
2 | include sunholo/lookup/*.yaml
3 | exclude *.md
4 | exclude docs/*
5 | include README.md
6 | 


--------------------------------------------------------------------------------
/config/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/config/.gitkeep


--------------------------------------------------------------------------------
/config/README.md:
--------------------------------------------------------------------------------
1 | # Configuration files
2 | 
3 | Examples in this folder of the configuration files that the `sunholo` library supports.


--------------------------------------------------------------------------------
/config/cloud_run_urls.json:
--------------------------------------------------------------------------------
 1 | {"chunker":"https://chunker-xxxxxxxxxxxxx.a.run.app",
 2 | "crewai":"https://crewai-xxxxxxxxxxxxx.a.run.app",
 3 | "eduvac":"https://eduvac-xxxxxxxxxxxxx.a.run.app",
 4 | "discord-server":"https://discord-server-xxxxxxxxxxxxx.a.run.app",
 5 | "autogen":"https://autogen-xxxxxxxxxxxxx.a.run.app",
 6 | "dreamer":"https://dreamer-xxxxxxxxxxxxx.a.run.app",
 7 | "edmonbrain":"https://edmonbrain-xxxxxxxxxxxxx.a.run.app",
 8 | "edmonbrain-agent":"https://edmonbrain-agent-xxxxxxxxxxxxx.a.run.app",
 9 | "embedder":"https://embedder-xxxxxxxxxxxxx.a.run.app",
10 | "image-talk":"https://image-talk-xxxxxxxxxxxxx.a.run.app",
11 | "langserve":"https://langserve-xxxxxxxxxxxxx.a.run.app",
12 | "litellm":"https://litellm-xxxxxxxxxxxxx.a.run.app",
13 | "openinterpreter":"https://openinterpreter-xxxxxxxxxxxxx.a.run.app",
14 | "rags":"https://rags-xxxxxxxxxxxxx.a.run.app",
15 | "reactapp":"https://reactapp-xxxxxxxxxxxxx.a.run.app",
16 | "slack":"https://slack-xxxxxxxxxxxxx.a.run.app",
17 | "unstructured":"https://unstructured-xxxxxxxxxxxxx.a.run.app",
18 | "webapp":"https://webapp-xxxxxxxxxxxxx.a.run.app"}
19 | 


--------------------------------------------------------------------------------
/config/loader_config.yaml:
--------------------------------------------------------------------------------
 1 | kind: loaderConfig
 2 | apiVersion: v1
 3 | loader:
 4 |   code:
 5 |     extensions: 
 6 |       - ".py"
 7 |       - ".js"
 8 |       - ".java"
 9 |       - ".c"
10 |       - ".cpp"
11 |       - ".cs"
12 |       - ".rb"
13 |       - ".php"
14 |       - ".txt"
15 |       - ".md"
16 |       - ".yaml"
17 |       - ".sql"
18 |       - ".r"
19 |       - ".sh"


--------------------------------------------------------------------------------
/config/model_lookup.yaml:
--------------------------------------------------------------------------------
 1 | kind: modelLookup
 2 | apiVersion: v1
 3 | models:
 4 |   gpt-4:
 5 |     max_tokens: 8192
 6 |   gpt-4-32k:
 7 |     max_tokens: 32768
 8 |   gpt-4-1106-preview:
 9 |     max_tokens: 128000
10 |   gpt-4-vision-preview:
11 |     max_tokens: 128000
12 |   gpt-3.5-turbo:
13 |     max_tokens: 4096
14 |   gpt-3.5-turbo-16k:
15 |     max_tokens: 16385
16 |   gemini-pro:
17 |     max_tokens: 32760
18 |   gemini-pro-vision:
19 |     max_tokens: 16384
20 |     max_images_per_prompt: 16
21 |     max_video_length: 2min
22 |     max_videos_per_prompt: 1
23 |   text-bison:
24 |     max_tokens: 8192
25 |   text-unicorn:
26 |     max_tokens: 8192
27 |   text-bison-32k:
28 |     max_tokens: 32768
29 |   chat-bison:
30 |     max_tokens: 8192
31 |   chat-bison-32k:
32 |     max_tokens: 32768
33 |   code-bison:
34 |     max_tokens: 6144
35 |   code-bison-32k:
36 |     max_tokens: 32768
37 |   codechat-bison:
38 |     max_tokens: 6144
39 |   codechat-bison-32k:
40 |     max_tokens: 32768
41 |   medlm-medium:
42 |     max_tokens: 32768
43 |   medlm-large:
44 |     max_tokens: 8192


--------------------------------------------------------------------------------
/config/platform_config.yaml:
--------------------------------------------------------------------------------
 1 | kind: platformConfig
 2 | apiVersion: v1
 3 | platforms:
 4 |   slack:
 5 |     teams:
 6 |       XXXXX:
 7 |         bots:
 8 |           XXXXX:
 9 |             llm: edmonbrain
10 | 
11 |   discord:
12 |     users:
13 |       "Guild1":
14 |         id: "1234467"
15 |         llm: edmonbrain
16 |       "Guild2":
17 |         id: "123432424"
18 |         llm: fnd
19 |       "Guilde3":
20 |         id: "1107554583192031232"
21 |         llm: sanne
22 | 
23 |     bot_settings:
24 |       edmonbrain:
25 |         stream: true
26 |       fnd: {}
27 |       edmonbrain_agent:
28 |         agent: true
29 |       sanne: {}
30 |       jesper: {}
31 | 


--------------------------------------------------------------------------------
/config/users_config.yaml:
--------------------------------------------------------------------------------
 1 | kind: userConfig
 2 | apiVersion: v1
 3 | user_groups:
 4 |   - name: "admin"
 5 |     domain: "sunholo.com"
 6 |     role: "ADMIN"
 7 |     tags:
 8 |       - "admin_user"
 9 | 
10 |   - name: "eduvac"
11 |     emails:
12 |       - "multivac@sunholo.com"
13 |     role: "eduvac"
14 |     tags:
15 |       - "eduvac"
16 | 
17 |   # Example of another firm using both domain and specific emails
18 |   - name: "another_firm"
19 |     domain: "anotherfirm.com"
20 |     emails:
21 |       - "specialcase@anotherfirm.com"
22 |     role: "partner"
23 |     tags:
24 |       - "partner"
25 | 
26 | default_user:
27 |   role: "USER"
28 |   tags:
29 |     - "user"
30 | 
31 | free_user:
32 |   role: "USER-FREE"
33 |   tags:
34 |     - "user"
35 |     - "free"
36 | 


--------------------------------------------------------------------------------
/docs/.env.example:
--------------------------------------------------------------------------------
1 | REACT_APP_MULTIVAC_API_KEY=your_api_key_here
2 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependencies
 2 | /node_modules
 3 | 
 4 | # Production
 5 | /build
 6 | 
 7 | # Generated files
 8 | .docusaurus
 9 | .cache-loader
10 | 
11 | # Misc
12 | .DS_Store
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 | 
18 | npm-debug.log*
19 | yarn-debug.log*
20 | yarn-error.log*
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Website
 2 | 
 3 | This website is built using [Docusaurus](https://docusaurus.io/), a modern static website generator.
 4 | 
 5 | ### Installation
 6 | 
 7 | ```
 8 | $ yarn
 9 | ```
10 | 
11 | ### Any dependencies
12 | 
13 | ```
14 | $ yarn add @docusaurus/plugin-google-tag-manager
15 | ```
16 | 
17 | ### Local Development
18 | 
19 | ```
20 | $ yarn start
21 | ```
22 | 
23 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.
24 | 
25 | ### Build
26 | 
27 | ```
28 | $ yarn build
29 | ```
30 | 
31 | This command generates static content into the `build` directory and can be served using any static contents hosting service.
32 | 
33 | ### Deployment
34 | 
35 | Using SSH:
36 | 
37 | ```
38 | $ USE_SSH=true yarn deploy
39 | ```
40 | 
41 | Not using SSH:
42 | 
43 | ```
44 | $ GIT_USER=<Your GitHub username> yarn deploy
45 | ```
46 | 
47 | If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch.
48 | 


--------------------------------------------------------------------------------
/docs/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
3 | };
4 | 


--------------------------------------------------------------------------------
/docs/blog/authors.yml:
--------------------------------------------------------------------------------
1 | me:
2 |   name: Mark Edmondson
3 |   title: Founder
4 |   url: https://sunholo.com/
5 |   imageURL: https://code.markedmondson.me/images/gde_avatar.jpg
6 |   socials:
7 |     github: MarkEdmondson1234
8 |     linkedin: https://www.linkedin.com/in/markpeteredmondson/


--------------------------------------------------------------------------------
/docs/blog/img/agent-e-autogen-setup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/blog/img/agent-e-autogen-setup.png


--------------------------------------------------------------------------------
/docs/blog/img/agentic_patterns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/blog/img/agentic_patterns.png


--------------------------------------------------------------------------------
/docs/blog/img/alteria-cd.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/blog/img/alteria-cd.jpg


--------------------------------------------------------------------------------
/docs/blog/img/baby-agi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/blog/img/baby-agi.png


--------------------------------------------------------------------------------
/docs/blog/img/bat-pie.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/blog/img/bat-pie.png


--------------------------------------------------------------------------------
/docs/blog/img/bertha-cog-design.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/blog/img/bertha-cog-design.png


--------------------------------------------------------------------------------
/docs/blog/img/bertha-cog-design2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/blog/img/bertha-cog-design2.png


--------------------------------------------------------------------------------
/docs/blog/img/bertha.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/blog/img/bertha.webp


--------------------------------------------------------------------------------
/docs/blog/img/chase-sql.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/blog/img/chase-sql.png


--------------------------------------------------------------------------------
/docs/blog/img/cognitive-design.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/blog/img/cognitive-design.webp


--------------------------------------------------------------------------------
/docs/blog/img/dynamic-ui-banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/blog/img/dynamic-ui-banner.png


--------------------------------------------------------------------------------
/docs/blog/img/langfuse-demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/blog/img/langfuse-demo.png


--------------------------------------------------------------------------------
/docs/blog/img/mdx-prompt-langfuse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/blog/img/mdx-prompt-langfuse.png


--------------------------------------------------------------------------------
/docs/blog/img/subconscious.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/blog/img/subconscious.png


--------------------------------------------------------------------------------
/docs/docs/VACs/eduvac-demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/VACs/eduvac-demo.png


--------------------------------------------------------------------------------
/docs/docs/VACs/eduvac_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/VACs/eduvac_small.png


--------------------------------------------------------------------------------
/docs/docs/VACs/index.mdx:
--------------------------------------------------------------------------------
 1 | import IdealImage from '@theme/IdealImage';
 2 | import vacOverview from '@site/docs/img/vac-overview.png';
 3 | import vacDag from '@site/docs/img/vac-dag.png';
 4 | 
 5 | 
 6 | # Virtual Agent Computers (VAC)
 7 | 
 8 | Virtual Agent Computers (VAC) is an abstraction that packages up GenAI applications so they can benefit from wider meta-services for those applications.
 9 | 
10 | <IdealImage img={vacOverview} alt="VAC Overview" />
11 | 
12 | Some examples are within this section and some free demo VACs are available at the [GitHub](https://github.com/sunholo-data/vacs-public) 
13 | 
14 | ## VAC benefits
15 | 
16 | The abstraction provides a way to coordinate different GenAI applications together, whilst benefiting from shared resources such as databases, security and authentication services and user interfaces.
17 | 
18 | <IdealImage img={vacDag} alt="VAC DAG" />
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/docs/VACs/pirate_talk.md:
--------------------------------------------------------------------------------
 1 | # Pirate Talk
 2 | 
 3 | This VAC is a 'hello world' Langserve app that is taken from the official [pirate_talk Langserve template](https://templates.langchain.com/?integration_name=pirate-speak). 
 4 | 
 5 | It demonstrates how to deploy a Langserve application on Multivac, and the configuration needed.  Its a good starter VAC to try first.
 6 | 
 7 | ## Summary
 8 | 
 9 | This VAC application translates your questions into pirate speak! Ohh arr.
10 | 
11 | ![](vac-pirate-speak.png)
12 | 
13 | ## Config yaml
14 | 
15 | An explanation of the configuration is below:
16 | 
17 | * `vac.pirate_speak` - this is the key that all other configurations are derived from, referred to as "vector_name"
18 | * `llm`: The configuration specifies an LLM model.  You can swap this for any model supported by `sunholo` so that it can work with the `pick_llm()` function via `model = pick_llm("pirate_speak")`.
19 | * `agent`: Required to specify what type of agent this VAC is, which determines which Cloud Run or other runtime is queried via the endpoints
20 | * `display_name`: Used by end clients such as the webapp for the UI.
21 | * `avatar_url`: Used by end clients such as the webapp for the UI.
22 | * `description`: Used by end clients such as the webapp for the UI.
23 | * `tags`: Used to specify which users are authorized to see this VAC, defined via `users_config.yaml`
24 | 
25 | ```yaml
26 | kind: vacConfig
27 | apiVersion: v1
28 | vac:
29 |     pirate_speak:
30 |         llm: openai
31 |         agent: langserve
32 |         #agent_url: you can specify manually your URL endpoint here, or on Multivac it will be populated automatically
33 |         display_name: Pirate Speak
34 |         tags: ["free"] # for user access, matches users_config.yaml
35 |         avatar_url: https://avatars.githubusercontent.com/u/126733545?s=48&v=4
36 |         description: A Langserve demo using a demo [Langchain Template](https://templates.langchain.com/) that will repeat back what you say but in a pirate accent.  Ooh argh me hearties!  Langchain templates cover many different GenAI use cases and all can be streamed to Multivac clients.
37 | ```
38 | 


--------------------------------------------------------------------------------
/docs/docs/VACs/vac-pirate-speak.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/VACs/vac-pirate-speak.png


--------------------------------------------------------------------------------
/docs/docs/databases/alloydb.md:
--------------------------------------------------------------------------------
 1 | # AlloyDB
 2 | 
 3 | AlloyDB is a managed PostgreSQL compatible database with AI features such as `pgvector` and the ability to call Vertex AI endpoitns within its SQL.  It also comes with indexes that are high performant over millions of rows.
 4 | 
 5 | ## Configuration
 6 | 
 7 | Below is an example of a `vacConfig` that uses AlloyDB as a vectorstore and docstore
 8 | 
 9 | ```yaml
10 | ...
11 |     docstore:
12 |       - alloydb-docstore:
13 |           type: alloydb
14 |     memory:
15 |       - edmonbrain-vectorstore:
16 |           vectorstore: alloydb
17 |           k: 20
18 |     alloydb_config:
19 |       project_id: multivac-alloydb
20 |       region: europe-west1
21 |       cluster: multivac-alloydb-cluster
22 |       instance: your-instance
23 | ```


--------------------------------------------------------------------------------
/docs/docs/databases/discovery_engine.md:
--------------------------------------------------------------------------------
 1 | # Vertex AI Search / Discovery Engine
 2 | 
 3 | This has had a few names etc. Vertex AI Search and Conversation but its this service: https://cloud.google.com/enterprise-search
 4 | 
 5 | Not to be confused with [Vertex AI Vector Search](https://cloud.google.com/vertex-ai/docs/vector-search/overview) which is the new name for Matching Engine (confused yet?)
 6 | 
 7 | ## Serverless chunking
 8 | 
 9 | Regardless, it is a very low code way to create RAG apps.  It takes care of the chunking and indexing meaning you can just send in your documents and use them within your GenAI apps smoothly.
10 | 
11 | An example config is shown below:
12 | 
13 | ```yaml
14 | kind: vacConfig
15 | apiVersion: v1
16 | vac:
17 |   sample_vector:
18 |     llm: vertex
19 |     agent: edmonbrain
20 |     memory:
21 |       - discovery_engine_vertex_ai_search:
22 |           vectorstore: vertex_ai_search # or discovery_engine
23 | ```
24 | 
25 | Since no chunking is necessary, documents are not indexed via the embedding service, but directly sent to the Vertex AI Search data store, with the same id as the VAC name e.g. sample_vector in above example.  Make new Data Stores by creating new VACs.


--------------------------------------------------------------------------------
/docs/docs/databases/index.md:
--------------------------------------------------------------------------------
1 | # Databases
2 | 
3 | This section covers the various databases the `sunholo` library helps interact with to support your GenAI application VAC.


--------------------------------------------------------------------------------
/docs/docs/databases/lancedb.md:
--------------------------------------------------------------------------------
1 | # LanceDB


--------------------------------------------------------------------------------
/docs/docs/databases/postgres.md:
--------------------------------------------------------------------------------
 1 | # PostgreSQL databases
 2 | 
 3 | When connecting to a PostgreSQL database you can use them for several different GenAI services, such as embeddings, document stores, chat history or analytics.
 4 | 
 5 | An alternative to running your own PostgreSQL database is to use the AlloyDB managed service on Google Cloud Platform.
 6 | 
 7 | ## Embedding
 8 | 
 9 | When connecting to a PostgreSQL database for vector embeddings, the `pgvector` extension needs to be installed within the database - refer to the documentation for the specific provider.
10 | 
11 | When you have the requisite details, then you need a username, password, ip of the PostgreSQL database and the database used to be put into a connection string and set to the `PGVECTOR_CONNECTION_STRING` environment variable - an example is shown below:
12 | 
13 | ```bash
14 | PGVECTOR_CONNECTION_STRING=postgresql://user:password@1.2.3.4:5432/database
15 | ```
16 | 
17 | To use within sunholo, you can use the [`pick_retriever()`](../sunholo/components/retriever) function to pull in the configuration dynamically according to the `vector_name` argument.
18 | 
19 | A configuration can be set that will send embeddings after chunking to the database.  
20 | 
21 | An example config is shown below:
22 | 
23 | ```yaml
24 | kind: vacConfig
25 | apiVersion: v1
26 | vac:
27 |   sample_vector:
28 |     llm: openai
29 |     agent: langserve
30 |     memory:
31 |       - azure_postgres:
32 |           vectorstore: "postgres"
33 | ```
34 | 
35 | This will then return the retriever via 
36 | 
37 | ```python
38 | retriever = pick_retriever("sample_vector")
39 | ```
40 | 
41 | 


--------------------------------------------------------------------------------
/docs/docs/databases/supabase.md:
--------------------------------------------------------------------------------
 1 | # Supabase
 2 | 
 3 | [Supabase](https://supabase.com/) is a popular GenAI database that has many great GenAI features build in.
 4 | 
 5 | ## Usage
 6 | 
 7 | To start using Supabase, set your configuration to use it as a memory:
 8 | 
 9 | ```yaml
10 |     memory:
11 |       - supabase-vectorstore:
12 |           vectorstore: supabase
13 | ```
14 | 
15 | When you create your Supabse account, you will receive these values that need to be added as an environment variable:
16 | 
17 | - SUPABASE_URL
18 | - SUPABASE_KEY
19 | 
20 | Supabase also requires a `DB_CONNECTION_STRING` environment variable with the connection string to your deployed Supabase instance.
21 | This will look something like this:
22 | 
23 | `postgres://postgres.<your-supabase-uri>@aws-0-eu-central-1.pooler.supabase.com:6543/postgres`
24 | 
25 | ## Auto-creation of tables
26 | 
27 | On first embed, if no table is specified the name of the `vector_name`, it will attempt to setup and create a vector store database, using the [SQL within this github folder](https://github.com/sunholo-data/sunholo-py/tree/main/sunholo/database/sql/sb).
28 | 
29 | 


--------------------------------------------------------------------------------
/docs/docs/howto/img/livekit-question.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/howto/img/livekit-question.png


--------------------------------------------------------------------------------
/docs/docs/howto/index.md:
--------------------------------------------------------------------------------
1 | # How To
2 | 
3 | This section covers various use cases for the `sunholo` library.


--------------------------------------------------------------------------------
/docs/docs/howto/streaming.md:
--------------------------------------------------------------------------------
1 | # Streaming
2 | 


--------------------------------------------------------------------------------
/docs/docs/img/browser_tool.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/img/browser_tool.gif


--------------------------------------------------------------------------------
/docs/docs/img/browser_tool_files.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/img/browser_tool_files.png


--------------------------------------------------------------------------------
/docs/docs/img/config-list.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/img/config-list.gif


--------------------------------------------------------------------------------
/docs/docs/img/embed-edmonbrain.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/img/embed-edmonbrain.gif


--------------------------------------------------------------------------------
/docs/docs/img/install.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/img/install.gif


--------------------------------------------------------------------------------
/docs/docs/img/jan-config.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/img/jan-config.png


--------------------------------------------------------------------------------
/docs/docs/img/jan-gemini-openai.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/img/jan-gemini-openai.png


--------------------------------------------------------------------------------
/docs/docs/img/jan-image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/img/jan-image.png


--------------------------------------------------------------------------------
/docs/docs/img/langfuse_eval_pubsub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/img/langfuse_eval_pubsub.png


--------------------------------------------------------------------------------
/docs/docs/img/sunholo-vac-chat-headless.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/img/sunholo-vac-chat-headless.gif


--------------------------------------------------------------------------------
/docs/docs/img/sunholo-vac-chat-with-files.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/img/sunholo-vac-chat-with-files.gif


--------------------------------------------------------------------------------
/docs/docs/img/sunholo-vac-chat.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/img/sunholo-vac-chat.gif


--------------------------------------------------------------------------------
/docs/docs/img/vac-dag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/img/vac-dag.png


--------------------------------------------------------------------------------
/docs/docs/img/vac-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/img/vac-overview.png


--------------------------------------------------------------------------------
/docs/docs/index.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | sidebar_position: 1
  3 | slug: /
  4 | ---
  5 | import AudioPlayer from '@site/src/components/audio';
  6 | 
  7 | # Introduction
  8 | 
  9 | Welcome to the dev portal for the `sunholo` project, which is the open-source component for the Sunholo Multivac.
 10 | 
 11 | `sunholo` is a library of helpful utilities for deploying GenAI applications on the cloud.  It includes various python modules and functions that have been needed to help develop GenAI applications called VACs (Virtual Agent Computers) on the Multivac system.  Whilst its primary purpose is to enable Multivac applications, it may also be useful for more general GenAI applications, for instance if you are looking for ways to manage many GenAI configurations.
 12 | 
 13 | <AudioPlayer src="https://storage.googleapis.com/sunholo-public-podcasts/sunholo-podcasts.wav" />
 14 | 
 15 | 
 16 | > "Ever wish you could build your own AI?..."
 17 | 
 18 | 
 19 | ## Skills needed
 20 | 
 21 | To start using the package, a good background is:
 22 | 
 23 | * Basic Python skills
 24 | * Knowledge about GenAI models and components such as vectorstores
 25 | * Can deploy a GenAI application, such as a Langchain Langserve template or a LlamaIndex app.
 26 | * Familiar with cloud providers, in particular Google Cloud Platform
 27 | 
 28 | If you have the above, then you should be able to get some value from the `sunholo` package.
 29 | 
 30 | ## Getting started
 31 | 
 32 | `sunholo` is available on pip https://pypi.org/project/sunholo/ 
 33 | 
 34 | Minimal deps:
 35 | 
 36 | ```sh
 37 | pip install sunholo
 38 | ```
 39 | 
 40 | All dependencies:
 41 | 
 42 | ```sh
 43 | pip install sunholo[all]
 44 | ```
 45 | 
 46 | Sunholo [CLI](cli):
 47 | 
 48 | ```sh
 49 | pip install sunholo[cli]
 50 | ```
 51 | 
 52 | [Databases](databases):
 53 | 
 54 | ```sh
 55 | pip install sunholo[database]
 56 | ```
 57 | 
 58 | Google Cloud Platform:
 59 | 
 60 | ```sh
 61 | pip install sunholo[gcp]
 62 | ```
 63 | 
 64 | OpenAI
 65 | 
 66 | ```sh
 67 | pip install sunholo[openai]
 68 | ```
 69 | 
 70 | Anthropic
 71 | 
 72 | ```sh
 73 | pip install sunholo[anthropic]
 74 | ```       
 75 | 
 76 | HTTP tools
 77 | 
 78 | ```sh
 79 | pip install sunholo[http]
 80 | ```
 81 | 
 82 | Chunking and embedding pipeline
 83 | 
 84 | ```sh
 85 | pip install sunholo[pipeline]
 86 | ```
 87 | 
 88 | ## Tests via pytest
 89 | 
 90 | If loading from GitHub, run tests:
 91 | 
 92 | ```bash
 93 | pip install pytest
 94 | pip install . --use-feature=in-tree-build
 95 | pytest tests
 96 | ```
 97 | 
 98 | ## Legacy
 99 | 
100 | Sunholo is derived from the Edmonbrain project, the original blog post you can read here: https://code.markedmondson.me/running-llms-on-gcp/ and owes a lot to Langchain ( https://github.com/langchain-ai/langchain )
101 | 


--------------------------------------------------------------------------------
/docs/docs/integrations/azure.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/integrations/azure.md


--------------------------------------------------------------------------------
/docs/docs/integrations/cloudlogging.md:
--------------------------------------------------------------------------------
1 | Set env var `GOOGLE_CLOUD_LOGGING=1` to activate Google Cloud Logging.


--------------------------------------------------------------------------------
/docs/docs/integrations/index.md:
--------------------------------------------------------------------------------
1 | # Integrations
2 | 
3 | `sunholo` integrates with many other open source and cloud platforms.  This section looks to include references to help integrate them with your own GenAI apps.
4 | 
5 | See also the [Databases section](../databases/) for integrations with specific database types.
6 | 


--------------------------------------------------------------------------------
/docs/docs/integrations/jan.md:
--------------------------------------------------------------------------------
 1 | # Jan.ai
 2 | 
 3 | [Jan.ai](https://jan.ai/) is a desktop application that is similar to the OpenAI desktop application but you can use it with any local or online model.
 4 | 
 5 | ## /openai/v1/chat/completions
 6 | 
 7 | If you leave the `<vector_name>` blank, then the proxy will attempt to look in the config for the "model" name
 8 | 
 9 | e.g. if calling `/openai/v1/chat/completions/` then in the config you will need a VAC called "gpt-4o"
10 | 
11 | ```yaml
12 | ...
13 |   gpt-4o:
14 |     llm: openai
15 |     model: gpt-4o
16 |     agent: langserve
17 | ...
18 | ```
19 | 
20 | ### Custom VAC configuration
21 | 
22 | Otherwise you can use `/openai/v1/chat/completions/<vector_name>` to tailor the request to the VAC.
23 | 
24 | When proxying or using online, you can use this as the OpenAI endpoint configuration:
25 | 
26 | ![Configuring JAN to use the local VAC URL](../img/jan-config.png)
27 | 
28 | Below is an example of calling Gemini using VAC [`personal_llama`](../VACs/vertex-llamaindex) with Google Search.
29 | 
30 | ![calling a VAC via an OpenAI endpoint](../img/jan-gemini-openai.png)
31 | 
32 | ### Image models
33 | 
34 | If you specify a vision multimodal model, then the OpenAI request will be parsed to upload to Google Cloud Storage:
35 | 
36 | ![specify a vision model and upload an image](../img/jan-image.png)


--------------------------------------------------------------------------------
/docs/docs/integrations/langchain.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/integrations/langchain.md


--------------------------------------------------------------------------------
/docs/docs/integrations/langfuse.md:
--------------------------------------------------------------------------------
1 | # Langfuse
2 | 
3 | [Langfuse](https://langfuse.com/) is a GenAI analytics platform available via its own cloud, Multivac Cloud or self-hosted.
4 | 
5 | Langfuse provides traces, evals, prompt management and metrics to debug and improve your LLM application.


--------------------------------------------------------------------------------
/docs/docs/integrations/llamaindex.md:
--------------------------------------------------------------------------------
1 | # LlamaIndex on VertexAI
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/docs/docs/integrations/ollama.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/integrations/ollama.md


--------------------------------------------------------------------------------
/docs/docs/ui/api.md:
--------------------------------------------------------------------------------
1 | # API Access via Cloud Endpoints
2 | 
3 | 


--------------------------------------------------------------------------------
/docs/docs/ui/chainlit.md:
--------------------------------------------------------------------------------
1 | # Web Application via Chainlit
2 | 
3 | 


--------------------------------------------------------------------------------
/docs/docs/ui/cli.md:
--------------------------------------------------------------------------------
1 | # Terminal Access via CLI
2 | 
3 | See the [CLI section](../cli.md) for more information about sunholo's terminal CLI.
4 | 


--------------------------------------------------------------------------------
/docs/docs/ui/discord.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/ui/discord.md


--------------------------------------------------------------------------------
/docs/docs/ui/gchat.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/ui/gchat.md


--------------------------------------------------------------------------------
/docs/docs/ui/index.md:
--------------------------------------------------------------------------------
 1 | # User Interfaces
 2 | 
 3 | A focus for Multivac is providing flexible user interfaces for GenAI applications.  So far this includes:
 4 | 
 5 | * A Web Application using Chainlit found at https://multivac.sunholo.com
 6 | * API access to underlying VAC microservices when you have been issued a `MULTIVAC_API_KEY`
 7 | * Terminal Command Line Interface (CLI) via the `sunholo[cli]` extension
 8 | * Chat bot interfaces such as Discord, GChat and Teams
 9 | * Streaming audio/video via LiveKit integrations
10 | * Desktop client applications via tools such as https://jan.ai
11 | * Any OpenAPI compatible tool will work with a VAC API call - even if you are calling a non-OpenAI model.  
12 | 
13 | An individual VAC could also create its own UI, since its being served via a HTTP container.
14 | 
15 | Get in touch if you would like to see other ways to interact with GenAI!   
16 | 


--------------------------------------------------------------------------------
/docs/docs/ui/jan.md:
--------------------------------------------------------------------------------
1 | # Desktop application via Jan.ai
2 | 
3 | 


--------------------------------------------------------------------------------
/docs/docs/ui/livekit.md:
--------------------------------------------------------------------------------
1 | # Streaming Audio/Video via LiveKit
2 | 
3 | 


--------------------------------------------------------------------------------
/docs/docs/ui/openai.md:
--------------------------------------------------------------------------------
1 | # OpenAI API Compatibility
2 | 
3 | Any application that works with the OpenAI API can work with a sunholo VAC, which provides compatible API endpoints.
4 | 


--------------------------------------------------------------------------------
/docs/docs/ui/slack.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/docs/ui/slack.md


--------------------------------------------------------------------------------
/docs/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "docs",
 3 |   "version": "0.0.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "docusaurus": "docusaurus",
 7 |     "start": "docusaurus start",
 8 |     "build": "docusaurus build",
 9 |     "swizzle": "docusaurus swizzle",
10 |     "deploy": "docusaurus deploy",
11 |     "clear": "docusaurus clear",
12 |     "serve": "docusaurus serve",
13 |     "write-translations": "docusaurus write-translations",
14 |     "write-heading-ids": "docusaurus write-heading-ids"
15 |   },
16 |   "dependencies": {
17 |     "@dagrejs/dagre": "^1.1.4",
18 |     "@docusaurus/core": "3.5.2",
19 |     "@docusaurus/plugin-content-blog": "3.5.2",
20 |     "@docusaurus/plugin-google-tag-manager": "^3.5.2",
21 |     "@docusaurus/plugin-ideal-image": "^3.5.2",
22 |     "@docusaurus/preset-classic": "^3.5.2",
23 |     "@emotion/react": "^11.11.4",
24 |     "@emotion/styled": "^11.11.5",
25 |     "@fortawesome/fontawesome-svg-core": "^6.6.0",
26 |     "@fortawesome/free-solid-svg-icons": "^6.6.0",
27 |     "@fortawesome/react-fontawesome": "^0.2.2",
28 |     "@mdx-js/react": "^3.0.0",
29 |     "@mui/icons-material": "^5.15.16",
30 |     "@mui/material": "^5.15.16",
31 |     "@xyflow/react": "^12.3.2",
32 |     "clsx": "^2.0.0",
33 |     "dotenv": "^16.4.5",
34 |     "http-proxy-middleware": "^3.0.3",
35 |     "plotly.js": "^2.35.2",
36 |     "prism-react-renderer": "^2.3.0",
37 |     "react": "^18.0.0",
38 |     "react-dom": "^18.0.0",
39 |     "react-jsx-parser": "^2.2.0",
40 |     "react-plotly.js": "^2.6.0"
41 |   },
42 |   "devDependencies": {
43 |     "@docusaurus/module-type-aliases": "3.2.1",
44 |     "@docusaurus/types": "3.2.1"
45 |   },
46 |   "browserslist": {
47 |     "production": [
48 |       ">0.5%",
49 |       "not dead",
50 |       "not op_mini all"
51 |     ],
52 |     "development": [
53 |       "last 3 chrome version",
54 |       "last 3 firefox version",
55 |       "last 5 safari version"
56 |     ]
57 |   },
58 |   "engines": {
59 |     "node": ">=18.0"
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/docs/sidebars.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Creating a sidebar enables you to:
 3 |  - create an ordered group of docs
 4 |  - render a sidebar for each doc of that group
 5 |  - provide next/previous navigation
 6 | 
 7 |  The sidebars can be generated from the filesystem, or explicitly defined here.
 8 | 
 9 |  Create as many sidebars as you want.
10 |  */
11 | 
12 | // @ts-check
13 | 
14 | /** @type {import('@docusaurus/plugin-content-docs').SidebarsConfig} */
15 | const sidebars = {
16 |   // By default, Docusaurus generates a sidebar from the docs folder structure
17 |   tutorialSidebar: [{type: 'autogenerated', dirName: '.'}],
18 | 
19 |   // But you can create a sidebar manually
20 |   /*
21 |   tutorialSidebar: [
22 |     'intro',
23 |     'hello',
24 |     {
25 |       type: 'category',
26 |       label: 'Tutorial',
27 |       items: ['tutorial-basics/create-a-document'],
28 |     },
29 |   ],
30 |    */
31 | };
32 | 
33 | export default sidebars;
34 | 


--------------------------------------------------------------------------------
/docs/src/components/HomepageFeatures/index.js:
--------------------------------------------------------------------------------
 1 | import clsx from 'clsx';
 2 | import Heading from '@theme/Heading';
 3 | import styles from './styles.module.css';
 4 | import IdealImage from '@theme/IdealImage';  // Import IdealImage component
 5 | import multivacDeployments from '@site/static/img/multivac-deployments.png'; // Use ES module import
 6 | import multivacVenn from '@site/static/img/multivac-venn.png';
 7 | import multivacArch from '@site/static/img/multivac-arch.png';
 8 | 
 9 | const FeatureList = [
10 |   {
11 |     title: 'GenAI Experimentation',
12 |     Img: multivacDeployments, 
13 |     description: (
14 |       <>
15 |         Update GenAI service dependencies via a config file. 
16 |         Launch new configurations in minutes, leveraging common resources such
17 |         as VPC, IAM, analytics, prompt libraries, model evals and database instances.
18 |       </>
19 |     ),
20 |   },
21 |   {
22 |     title: 'GenAI in the Cloud',
23 |     Img: multivacVenn,
24 |     description: (
25 |       <>
26 |         The Sunholo Multivac system offers an abstraction between your GenAI application
27 |         and the Cloud.  Deploy applications running Langchain/LlamaIndex or your
28 |         custom code to cloud services such as vectorstores and serverless compute.
29 |       </>
30 |     ),
31 |   },
32 |   {
33 |     title: 'Flexible and Scalable',
34 |     Img: multivacArch,
35 |     description: (
36 |       <>
37 |         Develop Locally and deploy Globally by publishing to the Multivac SaaS, or your own Cloud PaaS.
38 |         Event based serverless backend allows flexiblity to use bundled UIs such as webapps or chatbots, or
39 |         hook into APIs to create your own user experience.
40 |       </>
41 |     ),
42 |   },
43 | ];
44 | 
45 | function Feature({Img, title, description}) {
46 |   return (
47 |     <div className={clsx('col col--4')}>
48 |       <div className="text--center">
49 |       <IdealImage img={Img} className={styles.featureImg} alt={title} />
50 |       </div>
51 |       <div className="text--center padding-horiz--md">
52 |         <Heading as="h3">{title}</Heading>
53 |         <p>{description}</p>
54 |       </div>
55 |     </div>
56 |   );
57 | }
58 | 
59 | export default function HomepageFeatures() {
60 |   return (
61 |     <section className={styles.features}>
62 |       <div className="container">
63 |         <div className="row">
64 |           {FeatureList.map((props, idx) => (
65 |             <Feature key={idx} {...props} />
66 |           ))}
67 |         </div>
68 |       </div>
69 |     </section>
70 |   );
71 | }
72 | 


--------------------------------------------------------------------------------
/docs/src/components/HomepageFeatures/styles.module.css:
--------------------------------------------------------------------------------
 1 | .features {
 2 |   display: flex;
 3 |   align-items: center;
 4 |   padding: 2rem 0;
 5 |   width: 100%;
 6 | }
 7 | 
 8 | .featureSvg {
 9 |   height: 200px;
10 |   width: 200px;
11 | }
12 | 


--------------------------------------------------------------------------------
/docs/src/components/audio.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import '@site/src/css/custom.css';
 3 | import { FontAwesomeIcon } from '@fortawesome/react-fontawesome';
 4 | import { faPodcast } from '@fortawesome/free-solid-svg-icons';
 5 | 
 6 | 
 7 | const AudioPlayer = ({ src }) => {
 8 |     return (
 9 |         <div className="audio-player-container">
10 |           <div className="audio-header">
11 |             <FontAwesomeIcon icon={faPodcast} className="icon" />
12 |             <p className="audio-description">
13 |               Listen to a <a href='https://notebooklm.google/' target="_blank">NotebookLM</a> generated podcast about this blogpost:
14 |             </p>
15 |           </div>
16 |           <audio controls className="custom-audio">
17 |             <source src={src} type="audio/mpeg" />
18 |             Your browser does not support the audio element.
19 |           </audio>
20 |           <p className="alt-link">
21 |             <a href={src} target="_blank" rel="noopener noreferrer">
22 |               Alternatively, listen to the audio file directly
23 |             </a>
24 |           </p>
25 |         </div>
26 |       );
27 | };
28 | 
29 | export default AudioPlayer;


--------------------------------------------------------------------------------
/docs/src/components/mdxComponents.js:
--------------------------------------------------------------------------------
 1 | import React, { useState, useEffect } from 'react';
 2 | 
 3 | export const Highlight = ({ children, color }) => (
 4 |   <span
 5 |     style={{
 6 |       backgroundColor: color,
 7 |       borderRadius: '2px',
 8 |       color: '#fff',
 9 |       padding: '0.2rem',
10 |     }}
11 |   >
12 |     {children}
13 |   </span>
14 | );
15 | 
16 | const CustomPlot = ({ data, layout }) => {
17 |   const [Plot, setPlot] = useState(null);
18 | 
19 |   // Dynamically import `react-plotly.js` on the client side
20 |   useEffect(() => {
21 |     let isMounted = true;
22 |     import('react-plotly.js').then((module) => {
23 |       if (isMounted) {
24 |         setPlot(() => module.default);
25 |       }
26 |     });
27 | 
28 |     return () => {
29 |       isMounted = false; // Cleanup to prevent memory leaks
30 |     };
31 |   }, []);
32 | 
33 |   if (!Plot) {
34 |     return <div>Loading Plot...</div>; // Show a loading state while Plotly is being imported
35 |   }
36 | 
37 |   return (
38 |     <Plot
39 |       data={data}
40 |       layout={layout || {
41 |         title: 'Dynamic UI Plot',
42 |         autosize: true,
43 |         margin: { t: 30, l: 30, r: 30, b: 30 },
44 |       }}
45 |       useResizeHandler
46 |       style={{ width: '100%', height: '300px' }}
47 |     />
48 |   );
49 | };
50 | 
51 | export default CustomPlot;
52 | 


--------------------------------------------------------------------------------
/docs/src/pages/index.js:
--------------------------------------------------------------------------------
 1 | import clsx from 'clsx';
 2 | import Link from '@docusaurus/Link';
 3 | import useDocusaurusContext from '@docusaurus/useDocusaurusContext';
 4 | import Layout from '@theme/Layout';
 5 | import HomepageFeatures from '@site/src/components/HomepageFeatures';
 6 | 
 7 | import Heading from '@theme/Heading';
 8 | import styles from './index.module.css';
 9 | 
10 | function HomepageHeader() {
11 |   const {siteConfig} = useDocusaurusContext();
12 |   return (
13 |     <header className={clsx('hero hero--primary', styles.heroBanner)}>
14 |       <div className="container">
15 |         <Heading as="h1" className="hero__title">
16 |           {siteConfig.title}
17 |         </Heading>
18 |         <p className="hero__subtitle">{siteConfig.tagline}</p>
19 |         <div className={styles.buttons}>
20 |           <Link
21 |             className="button button--secondary button--lg"
22 |             to="/docs">
23 |             Get Started
24 |           </Link>
25 |         </div>
26 |       </div>
27 |     </header>
28 |   );
29 | }
30 | 
31 | export default function Home() {
32 |   const {siteConfig} = useDocusaurusContext();
33 |   return (
34 |     <Layout
35 |       title={`Hello from ${siteConfig.title}`}
36 |       description="Description will go into a meta tag in <head />">
37 |       <HomepageHeader />
38 |       <main>
39 |         <HomepageFeatures />
40 |       </main>
41 |     </Layout>
42 |   );
43 | }
44 | 


--------------------------------------------------------------------------------
/docs/src/pages/index.module.css:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * CSS files with the .module.css suffix will be treated as CSS modules
 3 |  * and scoped locally.
 4 |  */
 5 | 
 6 | .heroBanner {
 7 |   padding: 4rem 0;
 8 |   text-align: center;
 9 |   position: relative;
10 |   overflow: hidden;
11 | }
12 | 
13 | @media screen and (max-width: 996px) {
14 |   .heroBanner {
15 |     padding: 2rem;
16 |   }
17 | }
18 | 
19 | .buttons {
20 |   display: flex;
21 |   align-items: center;
22 |   justify-content: center;
23 | }
24 | 


--------------------------------------------------------------------------------
/docs/src/pages/markdown-page.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Markdown page example
3 | ---
4 | 
5 | # Markdown page example
6 | 
7 | You don't need React to write simple standalone pages.
8 | 


--------------------------------------------------------------------------------
/docs/src/plugins/proxy.js:
--------------------------------------------------------------------------------
 1 | const { createProxyMiddleware } = require('http-proxy-middleware');
 2 | 
 3 | module.exports = function (context, options) {
 4 |   return {
 5 |     name: 'docusaurus-proxy-plugin',
 6 |     configureWebpack(config, isServer) {
 7 |       return {
 8 |         devServer: {
 9 |           onBeforeSetupMiddleware(devServer) {
10 |             devServer.app.use(
11 |               '/api',
12 |               createProxyMiddleware({
13 |                 target: 'https://multivac-api.sunholo.com',
14 |                 changeOrigin: true,
15 |                 pathRewrite: { '^/api': '' },
16 |               })
17 |             );
18 |           },
19 |         },
20 |       };
21 |     },
22 |   };
23 | };


--------------------------------------------------------------------------------
/docs/src/theme/SitemarkIcon.js:
--------------------------------------------------------------------------------
 1 | import * as React from 'react';
 2 | import SvgIcon from '@mui/material/SvgIcon';
 3 | 
 4 | export default function SitemarkIcon() {
 5 |   return (
 6 |     <img
 7 |     src="/img/logo-warm-colors-final.png"  // Use an absolute path
 8 |     alt="Sunholo Multivac"
 9 |     style={{ width: '120px', height: 'auto', marginRight: '8px', marginBottom: '11px' }}
10 |     />
11 |   );
12 | }
13 | 


--------------------------------------------------------------------------------
/docs/static/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/static/.nojekyll


--------------------------------------------------------------------------------
/docs/static/CNAME:
--------------------------------------------------------------------------------
1 | dev.sunholo.com
2 | 


--------------------------------------------------------------------------------
/docs/static/img/eclipse1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/static/img/eclipse1.png


--------------------------------------------------------------------------------
/docs/static/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/static/img/favicon.ico


--------------------------------------------------------------------------------
/docs/static/img/logo-warm-colors-final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/static/img/logo-warm-colors-final.png


--------------------------------------------------------------------------------
/docs/static/img/multivac-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/static/img/multivac-arch.png


--------------------------------------------------------------------------------
/docs/static/img/multivac-demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/static/img/multivac-demo.png


--------------------------------------------------------------------------------
/docs/static/img/multivac-deployments.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/static/img/multivac-deployments.png


--------------------------------------------------------------------------------
/docs/static/img/multivac-venn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/docs/static/img/multivac-venn.png


--------------------------------------------------------------------------------
/docs/tapes/config-list.tape:
--------------------------------------------------------------------------------
 1 | Set Shell zsh
 2 | Set Height 800
 3 | Output docs/docs/img/config-list.gif
 4 | Sleep 2.5s
 5 | Type "sunholo list-configs -h"
 6 | Enter
 7 | Sleep 5s
 8 | Type "sunholo list-configs"
 9 | Enter
10 | Sleep 5s
11 | Type "sunholo list-configs --kind 'vacConfig'"
12 | Enter
13 | Sleep 5s           
14 | Type "sunholo list-configs --kind=vacConfig --vac=edmonbrain"
15 | Enter
16 | Sleep 5s       
17 | Type "sunholo list-configs --kind=vacConfig --vac=edmonbrain --validate"
18 | Enter
19 | Sleep 5s
20 | 
21 | 


--------------------------------------------------------------------------------
/docs/tapes/embed-demo.tape:
--------------------------------------------------------------------------------
 1 | Set Shell zsh
 2 | Sleep 1s
 3 | Output docs/docs/img/embed-edmonbrain.gif
 4 | Type "sunholo vac chat edmonbrain"
 5 | Enter
 6 | Sleep 5s
 7 | Type "!saveurl https://a16z.com/how-generative-ai-is-remaking-ui-ux-design/"
 8 | Enter
 9 | Sleep 3.5s
10 | 


--------------------------------------------------------------------------------
/docs/tapes/install.tape:
--------------------------------------------------------------------------------
 1 | Set Shell zsh
 2 | Set Height 1000
 3 | Output install.gif
 4 | Sleep 2.5s
 5 | Type 'pip install sunholo"['
 6 | Sleep 500ms
 7 | Type "cli"
 8 | Sleep 500ms
 9 | Type ']"'
10 | Enter
11 | Sleep 3s
12 | Type "sunholo --help"
13 | Enter
14 | Sleep 10s
15 | 
16 | 


--------------------------------------------------------------------------------
/docs/tapes/sunholo-vac-chat-headless.tape:
--------------------------------------------------------------------------------
1 | Set Shell zsh
2 | Set Height 1000
3 | Output sunholo-vac-chat-headless.gif
4 | Sleep 1.5s
5 | Type "sunholo vac chat multivac_docs 'What is Sunholo Multivac?' --headless"
6 | Enter
7 | Sleep 10s
8 | 


--------------------------------------------------------------------------------
/docs/tapes/sunholo-vac-chat-with-files.tape:
--------------------------------------------------------------------------------
 1 | Set Shell zsh
 2 | Set Height 1000
 3 | Output sunholo-vac-chat-with-files.gif
 4 | Sleep 1.5s
 5 | Type "sunholo vac chat multivac_docs"
 6 | Enter
 7 | Sleep 3s
 8 | Type "!ls"
 9 | Enter
10 | Sleep 3s
11 | Type "!tree"
12 | Enter
13 | Sleep 3s
14 | Type "!read reactapp"
15 | Enter
16 | Sleep 3s
17 | Type "can you summarise what this react app does and provide an improvement to App.js"
18 | Enter
19 | Sleep 5s
20 | Type "exit"
21 | Enter
22 | Sleep 2s
23 | 
24 | 


--------------------------------------------------------------------------------
/docs/tapes/sunholo-vac-chat.tape:
--------------------------------------------------------------------------------
 1 | Set Shell zsh
 2 | Set Height 1000
 3 | Output sunholo-vac-chat.gif
 4 | Sleep 1.5s
 5 | Type "sunholo vac chat multivac_docs"
 6 | Enter
 7 | Sleep 3s
 8 | Type "What is Sunholo Multivac?"
 9 | Enter
10 | Sleep 7s
11 | Type "exit"
12 | Enter
13 | Sleep 5s
14 | 
15 | 


--------------------------------------------------------------------------------
/ellipsis.yaml:
--------------------------------------------------------------------------------
 1 | version: 1.3
 2 | 
 3 | about:
 4 |   - "This is a codebase for a code generation tool called Ellipsis. It can review GitHub pull requests, answer questions about code, and even generate bug fixes!"
 5 |   - "We're migrating away from using AWS CDK. Our infrastructure is no longer part of this repository."
 6 | 
 7 | build:
 8 |   file: "ellipsis.Dockerfile"
 9 |   commands: 
10 |     - name: "lint_fix"
11 |       description: "Lints the code in fix mode, which will fix some errors, format some files, and throw and error when there are violations."
12 |       command: "./scripts/lint.sh"
13 |     - name: "unit_tests"
14 |       description: "Runs the unit tests."
15 |       command: ./scripts/unit_test.sh
16 | 
17 | pr_review:
18 |   confidence_threshold: 0.7
19 |   rules:
20 |     - "Code should be DRY (Dont Repeat Yourself)"
21 |     - "There should no secrets or credentials in the code"
22 |     - "Extremely Complicated Code Needs Comments"
23 |     - "Use Descriptive Variable and Constant Names"
24 |     - "API routes must have error handling, they shouldn't intentionally return a HTTP 500"
25 |     - "Use retries when calling external API services"
26 |     - "Don't log sensitive data"
27 |     - "Follow the Single Responsibility Principle"
28 |     - "Function and Method Naming Should Follow Consistent Patterns"


--------------------------------------------------------------------------------
/src/sunholo/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import agents
 2 | from . import archive
 3 | from . import auth
 4 | from . import bots
 5 | from . import chunker
 6 | from . import cli
 7 | from . import components
 8 | from . import database
 9 | from . import discovery_engine
10 | from . import embedder
11 | from . import excel
12 | from . import gcs
13 | from . import genai
14 | from . import invoke
15 | from . import langfuse
16 | from . import llamaindex
17 | from . import lookup
18 | from . import mcp
19 | from . import ollama
20 | from . import pubsub
21 | from . import qna
22 | from . import senses
23 | from . import streaming
24 | from . import terraform
25 | from . import tools
26 | from . import utils
27 | from . import vertex
28 | import logging
29 | 
30 | 
31 | __all__ = ['agents', 
32 |            'archive', 
33 |            'auth',
34 |            'bots',
35 |            'chunker', 
36 |            'cli',
37 |            'components', 
38 |            'database', 
39 |            'discovery_engine',
40 |            'embedder', 
41 |            'excel',
42 |            'gcs', 
43 |            'genai',
44 |            'invoke',
45 |            'langfuse', 
46 |            'llamaindex',
47 |            'lookup', 
48 |            'mcp',
49 |            'ollama', 
50 |            'pubsub',
51 |            'qna', 
52 |            'senses',
53 |            'streaming', 
54 |            'terraform',
55 |            'tools',
56 |            'utils', 
57 |            'vertex',
58 |            'logging']
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/src/sunholo/agents/__init__.py:
--------------------------------------------------------------------------------
1 | from .chat_history import extract_chat_history
2 | from .dispatch_to_qa import send_to_qa, send_to_qa_async
3 | from .pubsub import process_pubsub
4 | from .special_commands import handle_special_commands, app_to_store, handle_files
5 | from .flask import create_app, VACRoutes
6 | from .fastapi import register_qna_fastapi_routes, create_fastapi_app
7 | from .swagger import config_to_swagger
8 | 


--------------------------------------------------------------------------------
/src/sunholo/agents/fastapi/__init__.py:
--------------------------------------------------------------------------------
1 | from .qna_routes import register_qna_fastapi_routes
2 | from .base import create_fastapi_app


--------------------------------------------------------------------------------
/src/sunholo/agents/fastapi/base.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import socketio
 3 | except ImportError:
 4 |     socketio = None
 5 | 
 6 | try:
 7 |     from fastapi import FastAPI, Request
 8 |     from fastapi.responses import HTMLResponse
 9 |     from fastapi.middleware.cors import CORSMiddleware
10 |     from fastapi.openapi.docs import get_swagger_ui_html, get_redoc_html
11 | except ImportError:
12 |     FastAPI = None
13 | 
14 | from ...custom_logging import log
15 | 
16 | def create_fastapi_app(origins = ["*"],
17 |                        origin_regex = r"https://(.*\.)?sunholo\.com"):
18 |     """Creates and configures a FastAPI app for GenAI with Socket.IO integration.
19 | 
20 |     Args:
21 |         image_qna_stream_fn: Streaming Q&A function (likely from gemini.genai)
22 |         image_qna_fn: Non-streaming Q&A function (likely from gemini.genai)
23 | 
24 |     Returns:
25 |         FastAPI: The configured FastAPI app instance.
26 |     """
27 | 
28 |     if not socketio:
29 |         raise ImportError("socketio is not available, please install via `pip install fastapi-socketio`")
30 | 
31 | 
32 |     if not FastAPI:
33 |         raise ImportError("FastAPI is not available, please install via `pip install fastapi`")
34 | 
35 |     # Create Socket.IO server and FastAPI app     
36 |     sio = socketio.AsyncServer(async_mode='asgi')
37 |     app = FastAPI()
38 |     socket_app = socketio.ASGIApp(sio, other_asgi_app=app)
39 |     app.mount("/ws/socket.io/", socket_app)
40 | 
41 |     # CORS Configuration 
42 |     app.add_middleware(
43 |         CORSMiddleware,
44 |         allow_origins=origins,
45 |         allow_origin_regex=origin_regex,
46 |         allow_credentials=True,
47 |         allow_methods=["*"],
48 |         allow_headers=["*"],
49 |     )
50 | 
51 |     # Handle Socket.IO events, e.g., a connection
52 |     @sio.event
53 |     async def connect(sid, environ):
54 |         log.info("Socket.IO client connected", sid)
55 | 
56 |     # Homepage Route
57 |     @app.get("/", response_class=HTMLResponse)
58 |     async def homepage(request: Request):
59 |         return """
60 |         <!DOCTYPE html>
61 |         <html>
62 |         <head>
63 |             <title>Multivac Service Homepage</title>
64 |         </head>
65 |         <body>
66 |             <h1>Welcome to Multivac</h1>
67 |             <p>This is a debug homepage to confirm the service is up and running.</p>
68 |         </body>
69 |         </html>
70 |         """
71 |     
72 |     @app.get("/docs")
73 |     async def get_documentation():
74 |         """Endpoint to serve Swagger UI for API documentation"""
75 |         return get_swagger_ui_html(openapi_url="/openapi.json", title="docs")
76 | 
77 | 
78 |     @app.get("/redoc")
79 |     async def get_documentation():
80 |         """Endpoint to serve ReDoc for API documentation"""
81 |         return get_redoc_html(openapi_url="/openapi.json", title="redoc")
82 | 
83 | 
84 |     return app 


--------------------------------------------------------------------------------
/src/sunholo/agents/flask/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import create_app
2 | from .vac_routes import VACRoutes
3 | 


--------------------------------------------------------------------------------
/src/sunholo/agents/flask/base.py:
--------------------------------------------------------------------------------
 1 | #   Copyright [2024] [Holosun ApS]
 2 | #
 3 | #   Licensed under the Apache License, Version 2.0 (the "License");
 4 | #   you may not use this file except in compliance with the License.
 5 | #   You may obtain a copy of the License at
 6 | #
 7 | #       http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #   Unless required by applicable law or agreed to in writing, software
10 | #   distributed under the License is distributed on an "AS IS" BASIS,
11 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #   See the License for the specific language governing permissions and
13 | #   limitations under the License.
14 | 
15 | from ...custom_logging import log
16 | import time
17 | 
18 | def create_app(name):
19 |     from flask import Flask, request
20 |     
21 |     app = Flask(name)
22 | 
23 |     app.config['TRAP_HTTP_EXCEPTIONS'] = True
24 |     app.config['PROPAGATE_EXCEPTIONS'] = True
25 |    
26 |     @app.before_request
27 |     def start_timer():
28 |         request.start_time = time.time()
29 | 
30 |     @app.after_request  
31 |     def log_timing(response):
32 |         if hasattr(request, 'start_time'):
33 |             duration = time.time() - request.start_time
34 |             
35 |             # Log all VAC requests with different detail levels
36 |             if request.path.startswith('/vac/streaming/'):
37 |                 log.info(f"🚀 STREAMING: {duration:.3f}s - {request.path}")
38 |             elif request.path.startswith('/vac/'):
39 |                 log.info(f"⚡ VAC: {duration:.3f}s - {request.path}")
40 |             elif duration > 1.0:  # Log any slow requests
41 |                 log.warning(f"🐌 SLOW REQUEST: {duration:.3f}s - {request.path}")
42 |             
43 |             # Add response headers with timing info for debugging
44 |             response.headers['X-Response-Time'] = f"{duration:.3f}s"
45 |             
46 |         return response
47 |     
48 |     return app
49 | 


--------------------------------------------------------------------------------
/src/sunholo/agents/pubsub.py:
--------------------------------------------------------------------------------
 1 | #   Copyright [2024] [Holosun ApS]
 2 | #
 3 | #   Licensed under the Apache License, Version 2.0 (the "License");
 4 | #   you may not use this file except in compliance with the License.
 5 | #   You may obtain a copy of the License at
 6 | #
 7 | #       http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #   Unless required by applicable law or agreed to in writing, software
10 | #   distributed under the License is distributed on an "AS IS" BASIS,
11 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #   See the License for the specific language governing permissions and
13 | #   limitations under the License.
14 | from ..custom_logging import log
15 | 
16 | 
17 | import base64
18 | import json
19 | 
20 | def process_pubsub(data):
21 | 
22 |     log.debug(f'process_pubsub: {data}')
23 |     message_data = base64.b64decode(data['message']['data']).decode('utf-8')
24 |     messageId = data['message'].get('messageId')
25 |     publishTime = data['message'].get('publishTime')
26 | 
27 |     log.debug(f"This Function was triggered by messageId {messageId} published at {publishTime}")
28 |     # DANGER: Will trigger this dunction recursivly
29 |     #log.info(f"bot_help.process_pubsub message data: {message_data}")
30 | 
31 |     try:
32 |         message_data = json.loads(message_data)
33 |     except:
34 |         log.debug("Its not a json")
35 | 
36 |     if message_data:
37 |         return message_data
38 |     
39 |     log.info(f"message_data was empty")
40 |     return ''


--------------------------------------------------------------------------------
/src/sunholo/agents/route.py:
--------------------------------------------------------------------------------
 1 | #   Copyright [2024] [Holosun ApS]
 2 | #
 3 | #   Licensed under the Apache License, Version 2.0 (the "License");
 4 | #   you may not use this file except in compliance with the License.
 5 | #   You may obtain a copy of the License at
 6 | #
 7 | #       http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #   Unless required by applicable law or agreed to in writing, software
10 | #   distributed under the License is distributed on an "AS IS" BASIS,
11 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #   See the License for the specific language governing permissions and
13 | #   limitations under the License.
14 | from ..custom_logging import log
15 | from ..utils import load_config, ConfigManager
16 | 
17 | def read_cloud_run_url(agent, cloud_run_urls_file='config/cloud_run_urls.json'):
18 |     agent_route, _ = load_config(cloud_run_urls_file)
19 |     log.info(f'agent_route: {agent_route}')
20 | 
21 |     try:
22 |         agent_url = agent_route[agent]
23 |     except KeyError:
24 |         raise ValueError(f'agent_url not found for {agent}')
25 |     
26 |     log.info(f'agent_url: {agent_url}')
27 | 
28 |     return agent_url
29 | 
30 | def route_vac(vector_name: str=None, config=None) -> str :
31 |     """
32 |     Considers what VAC this vector_name belongs to
33 |     """
34 |     if not vector_name and not config:
35 |         raise ValueError("Must provide config or vector_name argument")
36 |     
37 |     if not config:
38 |         config = ConfigManager(vector_name)
39 | 
40 |     agent_url = config.vacConfig('agent_url')
41 |     if agent_url:
42 |         log.info('agent_url found in llm_config.yaml')
43 |         return agent_url
44 | 
45 |     agent = config.vacConfig('agent')
46 | 
47 |     return read_cloud_run_url(agent)
48 | 
49 | def route_endpoint(vector_name=None, method = 'post', override_endpoint=None, config=None):
50 | 
51 |     if vector_name is None and config is None:
52 |         raise ValueError('vector_name and config can not both be None')
53 |     
54 |     if config:
55 |         vector_name = config.vector_name
56 | 
57 |     if not config:
58 |         config = ConfigManager(vector_name)
59 | 
60 |     agent_type = config.vacConfig('agent_type')
61 |     if not agent_type:
62 |         agent_type = config.vacConfig('agent')
63 | 
64 |     stem = route_vac(config=config) if not override_endpoint else override_endpoint
65 |     
66 |     agents_config = config.agentConfig(agent_type)
67 |     
68 |     log.debug(f"agents_config: {agents_config}")
69 |     if method not in agents_config:
70 |         raise ValueError(f"Invalid method '{method}' for agent configuration.")
71 | 
72 |     # 'post' or 'get'
73 |     endpoints_config = agents_config[method]
74 | 
75 |     log.debug(f"endpoints_config: {endpoints_config}")
76 |     # Replace placeholders in the config
77 |     endpoints = {}
78 |     for key, value in endpoints_config.items():
79 |         format_args = {'stem': stem}
80 |         if '{vector_name}' in value and vector_name is not None:
81 |             format_args['vector_name'] = vector_name
82 |         
83 |         if not isinstance(value, str):
84 |             log.warning('endpoint value not string? format_args: {format_args} - value: {value} - key: {key}')
85 |             
86 |         endpoints[key] = value.format(**format_args)
87 | 
88 |     return endpoints
89 | 
90 | 


--------------------------------------------------------------------------------
/src/sunholo/archive/__init__.py:
--------------------------------------------------------------------------------
1 | from .archive import archive_qa


--------------------------------------------------------------------------------
/src/sunholo/archive/archive.py:
--------------------------------------------------------------------------------
 1 | #   Copyright [2024] [Holosun ApS]
 2 | #
 3 | #   Licensed under the Apache License, Version 2.0 (the "License");
 4 | #   you may not use this file except in compliance with the License.
 5 | #   You may obtain a copy of the License at
 6 | #
 7 | #       http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #   Unless required by applicable law or agreed to in writing, software
10 | #   distributed under the License is distributed on an "AS IS" BASIS,
11 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #   See the License for the specific language governing permissions and
13 | #   limitations under the License.
14 | from ..pubsub import PubSubManager
15 | from ..custom_logging import log
16 | 
17 | 
18 | 
19 | import datetime
20 | 
21 | 
22 | def archive_qa(bot_output, vector_name):
23 |     try:
24 |         pubsub_manager = PubSubManager(vector_name, pubsub_topic="qna-to-pubsub-bq-archive")
25 |         the_data = {"bot_output": bot_output,
26 |                     "vector_name": vector_name,
27 |                     "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
28 |         
29 |         pubsub_manager.publish_message(the_data)
30 |     except Exception as e:
31 |         log.warning(f"Could not publish message for {vector_name} to qna-to-pubsub-bq-archive - {str(e)}")


--------------------------------------------------------------------------------
/src/sunholo/auth/__init__.py:
--------------------------------------------------------------------------------
1 | from .run import get_header, get_cloud_run_token
2 | from .gcloud import get_local_gcloud_token


--------------------------------------------------------------------------------
/src/sunholo/auth/gcloud.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | 
 3 | def get_local_gcloud_token():
 4 |     # Use gcloud credentials locally
 5 | 
 6 |     return (
 7 |         subprocess.run(
 8 |             ["gcloud", "auth", "print-identity-token"],
 9 |             stdout=subprocess.PIPE,
10 |             check=True,
11 |         )
12 |         .stdout.strip()
13 |         .decode()
14 |     ) 


--------------------------------------------------------------------------------
/src/sunholo/auth/refresh.py:
--------------------------------------------------------------------------------
 1 | # needs to be in minimal to check gcp
 2 | import os
 3 | 
 4 | import google.auth 
 5 | from google.auth.transport import requests
 6 | from ..utils.gcp import is_running_on_gcp
 7 | from ..custom_logging import log
 8 | 
 9 | def get_default_email():
10 | 
11 |     # https://stackoverflow.com/questions/64234214/how-to-generate-a-blob-signed-url-in-google-cloud-run
12 |     gcs_credentials, project_id = refresh_credentials()
13 | 
14 |     if gcs_credentials is None:
15 |         log.error("Could not refresh the credentials properly.")
16 |         return None, None
17 | 
18 |     service_account_email = getattr(gcs_credentials, 'service_account_email', None)
19 |     # If you use a service account credential, you can use the embedded email
20 |     if not service_account_email:
21 |         service_account_email = os.getenv('GCS_MAIL_USER')
22 |         if not service_account_email:
23 |             log.error("Could not create the credentials for signed requests - no credentials.service_account_email or GCS_MAIL_USER with roles/iam.serviceAccountTokenCreator")
24 |             
25 |             return None, None
26 |     
27 |     log.info(f"Found default email: {service_account_email=} for {project_id=}")
28 |     return service_account_email, gcs_credentials
29 | 
30 | def get_default_creds():
31 |     gcs_credentials = None
32 |     project_id = None
33 |     gcs_credentials, project_id = google.auth.default()
34 | 
35 |     return gcs_credentials, project_id
36 | 
37 | def refresh_credentials():
38 |     """
39 |     Need to refresh to get a valid email/token for signing URLs from a default service account
40 |     """
41 |     if not is_running_on_gcp():
42 |         log.debug("Not running on Google Cloud so no credentials available for GCS.")
43 |         return None, None
44 |     
45 |     gcs_credentials, project_id = get_default_creds()
46 | 
47 |     if not gcs_credentials.token or gcs_credentials.expired or not gcs_credentials.valid:
48 |         try:
49 |             r = requests.Request()
50 |             gcs_credentials.refresh(r)
51 | 
52 |             return gcs_credentials, project_id
53 |         
54 |         except Exception as e:
55 |             log.error(f"Failed to refresh gcs credentials: {e}")
56 | 
57 |             return None, None
58 |     
59 | 


--------------------------------------------------------------------------------
/src/sunholo/auth/run.py:
--------------------------------------------------------------------------------
 1 | # from https://github.com/sunholo-data/genai-databases-retrieval-app/blob/main/langchain_tools_demo/agent.py
 2 | import inspect
 3 | 
 4 | from typing import Dict, Optional
 5 | from ..utils.config import load_config
 6 | from ..utils import ConfigManager
 7 | from ..utils.gcp import is_running_on_cloudrun
 8 | from ..utils.api_key import has_multivac_api_key, get_multivac_api_key
 9 | from ..custom_logging import log
10 | from ..agents.route import route_vac
11 | from .gcloud import get_local_gcloud_token
12 | 
13 | def get_run_url(config):
14 | 
15 |     if not config:
16 |         raise ValueError('Vector name was not specified')
17 |     
18 |     cloud_urls = route_vac(config=config)
19 |     
20 |     cloud_urls, _ = load_config('config/cloud_run_urls.json')
21 |     agent = config.vacConfig("agent")
22 | 
23 |     try:
24 |         log.info(f'Looking up URL for {agent}')
25 |         url = cloud_urls[agent]
26 |         return url
27 |     except KeyError:
28 |         raise ValueError(f'Could not find cloud_run_url for {agent} within {cloud_urls}')
29 | 
30 | def get_id_token(url: str) -> str:
31 |     """Helper method to generate ID tokens for authenticated requests"""
32 |     # Use Application Default Credentials on Cloud Run
33 |     if is_running_on_cloudrun():
34 |         import google.auth.transport.requests  # type: ignore
35 |         import google.oauth2.id_token  # type: ignore
36 |         auth_req = google.auth.transport.requests.Request()
37 |         log.info(f'Got id_token for {url}')
38 | 
39 |         return google.oauth2.id_token.fetch_id_token(auth_req, url)
40 | 
41 |     return get_local_gcloud_token()
42 | 
43 | def get_cloud_run_token(vector_name):
44 |     if has_multivac_api_key():
45 |         
46 |         return {"x-api-key": get_multivac_api_key()}
47 | 
48 |     #if is_running_on_cloudrun():
49 |     #    run_url = get_run_url(vector_name)
50 |     #else:
51 |     #    run_url = "http://127.0.0.1:8080"
52 |     
53 |     config = ConfigManager(vector_name)
54 |     run_url = get_run_url(config)
55 | 
56 | 
57 |     # this logging can cause issues so only active when debugging
58 |     # Append ID Token to make authenticated requests to Cloud Run services
59 |     #frame = inspect.currentframe()
60 |     #caller_frame = frame.f_back if frame is not None else None  # One level up in the stack
61 |     #deets = {
62 |     #    'message': 'Authenticating for run_url',
63 |     #    'run_url': run_url
64 |     #}
65 |     #if caller_frame:
66 |     #    deets = {
67 |     #            'message': 'Authenticating for run_url',
68 |     #            'file': caller_frame.f_code.co_filename,
69 |     #            'line': str(caller_frame.f_lineno),  
70 |     #            'function': caller_frame.f_code.co_name,
71 |     #            'run_url': run_url
72 |     #        }
73 |     #log.info(f"Authenticating for run_url {run_url} from {caller_frame.f_code.co_name}")
74 | 
75 |     id_token = get_id_token(run_url)
76 |     return id_token
77 | 
78 | def get_header(vector_name) -> Optional[dict]:
79 |     id_token = get_cloud_run_token(vector_name)
80 | 
81 |     headers = {"Authorization": f"Bearer {id_token}"}
82 |     
83 |     return headers


--------------------------------------------------------------------------------
/src/sunholo/azure/__init__.py:
--------------------------------------------------------------------------------
1 | from .event_grid import process_azure_blob_event


--------------------------------------------------------------------------------
/src/sunholo/azure/auth.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | try:
 3 |     from azure.identity import DefaultAzureCredential, ClientSecretCredential
 4 | except ImportError:
 5 |     DefaultAzureCredential = None
 6 |     ClientSecretCredential = None
 7 | 
 8 | from ..custom_logging import log
 9 | 
10 | def azure_auth():
11 |     """
12 |     Will attempt to authenticate using default credentials first (e.g. you are running within Azure Container Apps or similar)
13 | 
14 |     If default credentials are not available, will attempt to authenticate via env vars - set up via:
15 | 
16 |     ```bash
17 |     az ad sp create-for-rbac --name "myApp" --role contributor \
18 |         --scopes /subscriptions/{subscription-id}/resourceGroups/{resource-group} \
19 |         --sdk-auth
20 | 
21 |     export AZURE_CLIENT_ID="your-client-id"
22 |     export AZURE_CLIENT_SECRET="your-client-secret"
23 |     export AZURE_TENANT_ID="your-tenant-id"
24 |     ```
25 | 
26 |     """
27 |     if DefaultAzureCredential is None: 
28 |         raise ImportError("Azure identity credentials library needed - install via `pip install sunholo[azure]`")
29 |     
30 |     # Use DefaultAzureCredential to authenticate
31 |     try:
32 |         credential = DefaultAzureCredential()
33 |         return credential
34 |     
35 |     except Exception as e:
36 |         log.error(f"Failed to authenticate with default credentials: {str(e)}")
37 |         log.info("Attempting to authenticate using ClientSecretCredential")
38 | 
39 |         # Use ClientSecretCredential to authenticate with a service principal
40 |         client_id = os.getenv("AZURE_CLIENT_ID")
41 |         client_secret = os.getenv("AZURE_CLIENT_SECRET")
42 |         tenant_id = os.getenv("AZURE_TENANT_ID")
43 | 
44 |         if not client_id or not client_secret or not tenant_id:
45 |             log.error("Service principal credentials are not set in environment variables")
46 |             return None
47 | 
48 |         if ClientSecretCredential is None:
49 |             raise ImportError("Azure identity credentials library needed - install via `pip install sunholo[azure]`")
50 | 
51 |         try:
52 |             credential = ClientSecretCredential(
53 |                 client_id=client_id,
54 |                 client_secret=client_secret,
55 |                 tenant_id=tenant_id
56 |             )
57 |             return credential
58 |         except Exception as e:
59 |             log.error(f"Failed to authenticate with service principal: {str(e)}")
60 |             return None
61 |         
62 | 


--------------------------------------------------------------------------------
/src/sunholo/azure/blobs.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from ..custom_logging import log
 3 | 
 4 | 
 5 | def is_azure_blob(message_data):
 6 |     """
 7 |     Checks if the provided URL is an Azure Blob Storage URL.
 8 | 
 9 |     Args:
10 |         message_data (str): The URL to be checked.
11 | 
12 |     Returns:
13 |         bool: True if the URL is an Azure Blob Storage URL, False otherwise.
14 |     """
15 |     blob_url_pattern = r"https://(.*).blob.core.windows.net/(.*)/(.*)"
16 |     match = re.match(blob_url_pattern, message_data)
17 |     if not match:
18 |         return False
19 |     
20 |     return True
21 | 
22 | def extract_blob_parts(message_data):
23 |     """
24 |     Extracts the account name, container name, and blob name from an Azure Blob Storage URL.
25 | 
26 |     Args:
27 |         message_data (str): The Azure Blob Storage URL.
28 | 
29 |     Returns:
30 |         tuple: A tuple containing the account name, container name, and blob name.
31 |                Returns (None, None, None) if the URL is invalid.
32 |     """
33 |     if not is_azure_blob(message_data):
34 |         return None, None, None
35 |     
36 |     log.debug("Detected Azure blob storage URL")
37 |     # Extract the account name, container name, and blob name from the URL
38 |     blob_url_pattern = r"https://(.*).blob.core.windows.net/(.*)/(.*)"
39 |     match = re.match(blob_url_pattern, message_data)
40 |     if not match:
41 |         log.error("Invalid Azure blob URL format")
42 |         return None, None
43 | 
44 |     account_name, container_name, blob_name = match.groups()
45 | 
46 |     return account_name, container_name, blob_name
47 | 
48 | 


--------------------------------------------------------------------------------
/src/sunholo/azure/event_grid.py:
--------------------------------------------------------------------------------
 1 | # process_azure_blob_event.py
 2 | from ..custom_logging import log
 3 | 
 4 | def process_azure_blob_event(events: list) -> tuple:
 5 |     """
 6 |     Extracts message data and metadata from an Azure Blob Storage event.
 7 | 
 8 |     Args:
 9 |         events (list): The list of Azure Event Grid event data.
10 | 
11 |     Returns:
12 |         tuple: A tuple containing the blob URL, attributes as metadata, and the vector name.
13 |         
14 |     Example of Event Grid schema:
15 |     ```
16 |     {
17 |         "topic": "/subscriptions/subscription-id/resourceGroups/resource-group/providers/Microsoft.Storage/storageAccounts/storage-account",
18 |         "subject": "/blobServices/default/containers/container/blobs/blob",
19 |         "eventType": "Microsoft.Storage.BlobCreated",
20 |         "eventTime": "2021-01-01T12:34:56.789Z",
21 |         "id": "event-id",
22 |         "data": {
23 |             "api": "PutBlob",
24 |             "clientRequestId": "client-request-id",
25 |             "requestId": "request-id",
26 |             "eTag": "etag",
27 |             "contentType": "application/octet-stream",
28 |             "contentLength": 524288,
29 |             "blobType": "BlockBlob",
30 |             "url": "https://storage-account.blob.core.windows.net/container/blob",
31 |             "sequencer": "0000000000000000000000000",
32 |             "storageDiagnostics": {
33 |                 "batchId": "batch-id"
34 |             }
35 |         },
36 |         "dataVersion": "",
37 |         "metadataVersion": "1"
38 |     }
39 |     ```
40 |     """
41 |     storage_blob_created_event = "Microsoft.Storage.BlobCreated"
42 |     
43 |     for event in events:
44 |         event_type = event['eventType']
45 |         data = event['data']
46 | 
47 |         if event_type == storage_blob_created_event:
48 |             blob_url = data['url']
49 |             event_time = event['eventTime']
50 |             event_id = event['id']
51 |             subject = event['subject']
52 |             attributes = {
53 |                 'event_type': event_type,
54 |                 'event_time': event_time,
55 |                 'event_id': event_id,
56 |                 'subject': subject,
57 |                 'url': blob_url
58 |             }
59 | 
60 |             vector_name = subject.split('/')[4]  # Extracting the container name
61 |             
62 |             log.info(f"Process Azure Blob Event was triggered by eventId {event_id} at {event_time}")
63 |             log.debug(f"Process Azure Blob Event data: {blob_url}")
64 |             
65 |             # Check for a valid Azure Blob Storage event type
66 |             if event_type == "Microsoft.Storage.BlobCreated":
67 |                 log.info(f"Got valid event from Azure Blob Storage: {blob_url}")
68 |             
69 |             return blob_url, attributes, vector_name
70 | 
71 |     return None, None, None


--------------------------------------------------------------------------------
/src/sunholo/bots/__init__.py:
--------------------------------------------------------------------------------
1 | from .discord import generate_discord_output, discord_webhook
2 | from .webapp import generate_webapp_output, craft_config_description


--------------------------------------------------------------------------------
/src/sunholo/bots/discord.py:
--------------------------------------------------------------------------------
 1 | #   Copyright [2024] [Holosun ApS]
 2 | #
 3 | #   Licensed under the Apache License, Version 2.0 (the "License");
 4 | #   you may not use this file except in compliance with the License.
 5 | #   You may obtain a copy of the License at
 6 | #
 7 | #       http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #   Unless required by applicable law or agreed to in writing, software
10 | #   distributed under the License is distributed on an "AS IS" BASIS,
11 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #   See the License for the specific language governing permissions and
13 | #   limitations under the License.
14 | import json, os
15 | import requests
16 | from ..custom_logging import log
17 | 
18 | 
19 | 
20 | def generate_discord_output(bot_output):
21 |     source_documents = []
22 |     if bot_output.get('source_documents', None) is not None:
23 |         source_documents = []
24 |         for doc in bot_output['source_documents']:
25 |             metadata = doc.get("metadata",{})
26 |             filtered_metadata = {}
27 |             if metadata.get("source", None) is not None:
28 |                 filtered_metadata["source"] = metadata["source"]
29 |             if metadata.get("type", None) is not None:
30 |                 filtered_metadata["type"] = metadata["type"]
31 |             source_doc = {
32 |                 'page_content': doc["page_content"],
33 |                 'metadata': filtered_metadata
34 |             }
35 |             source_documents.append(source_doc)
36 | 
37 |     return {
38 |         'result': bot_output.get('answer', "No answer available"),
39 |         'source_documents': source_documents
40 |     }
41 | 
42 | def discord_webhook(message_data):
43 |     webhook_url = os.getenv('DISCORD_URL', None)  # replace with your webhook url
44 |     if webhook_url is None:
45 |         return None
46 |     
47 |     log.info(f'webhook url: {webhook_url}')
48 | 
49 |     # If the message_data is not a dict, wrap it in a dict.
50 |     if not isinstance(message_data, dict):
51 |         message_data = {'content': message_data}
52 |     else:
53 |         # if it is a dict, turn it into a string
54 |         message_data = {'content': json.dumps(message_data)}
55 |         #TODO parse out message_data into other discord webhook objects like embed
56 |         # https://birdie0.github.io/discord-webhooks-guide/discord_webhook.html
57 |     
58 |     data = message_data
59 | 
60 |     log.info(f'Sending discord this data: {data}')
61 |     response = requests.post(webhook_url, json=data,
62 |                             headers={'Content-Type': 'application/json'})
63 |     log.debug(f'Sent data to discord: {response}')
64 |     
65 |     return response


--------------------------------------------------------------------------------
/src/sunholo/bots/webapp.py:
--------------------------------------------------------------------------------
 1 | #   Copyright [2024] [Holosun ApS]
 2 | #
 3 | #   Licensed under the Apache License, Version 2.0 (the "License");
 4 | #   you may not use this file except in compliance with the License.
 5 | #   You may obtain a copy of the License at
 6 | #
 7 | #       http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #   Unless required by applicable law or agreed to in writing, software
10 | #   distributed under the License is distributed on an "AS IS" BASIS,
11 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #   See the License for the specific language governing permissions and
13 | #   limitations under the License.
14 | 
15 | from ..utils.config import load_config
16 | 
17 | def generate_webapp_output(bot_output):
18 |     source_documents = []
19 |     if bot_output.get('source_documents', None) is not None:
20 |         source_documents = []
21 |         for doc in bot_output['source_documents']:
22 |             metadata = doc.get("metadata",{})
23 |             filtered_metadata = {}
24 |             if metadata.get("source", None) is not None:
25 |                 filtered_metadata["source"] = metadata["source"]
26 |             if metadata.get("type", None) is not None:
27 |                 filtered_metadata["type"] = metadata["type"]
28 |             source_doc = {
29 |                 'page_content': doc["page_content"],
30 |                 'metadata': filtered_metadata
31 |             }
32 |             source_documents.append(source_doc)
33 | 
34 |     return {
35 |         'result': bot_output.get('answer', "No answer available"),
36 |         'source_documents': source_documents
37 |     }
38 | 
39 | def to_proper_case(s):
40 |     return ' '.join(word.capitalize() for word in s.replace('_', ' ').replace('-', ' ').split())
41 | 
42 | def craft_config_description(name, filename="config/llm_config.yaml"):
43 |     configs, filename = load_config(filename)
44 |     description = configs[name].get('description')
45 | 
46 |     if description is None:
47 |         description = f"Agent vac: {configs[name].get('agent')} LLM: {configs[name].get('llm')}"
48 | 
49 |     return description


--------------------------------------------------------------------------------
/src/sunholo/chunker/README.md:
--------------------------------------------------------------------------------
 1 | # Chunker service
 2 | 
 3 | Turns docs,urls and gs:// urls into document chunks.
 4 | 
 5 | 
 6 | # Chunks
 7 | 
 8 | ```json
 9 | # encoded
10 | # eyJwYWdlX2NvbnRlbnQiOiJUZXN0IGNvbnRlbnQiLCAibWV0YWRhdGEiOnsic291cmNlIjoidGVzdF9zb3VyY2UifX0=
11 | {"page_content":"Test content", "metadata":{"source":"test_source"}}
12 | ```
13 | 
14 | If not return_chunks=True, will send it on to the PubSub async service 
15 | TODO: implement other message queues
16 | 
17 | ```bash 
18 | export FLASK_URL=https://chunker-url
19 | curl -X POST ${FLASK_URL}/pubsub_to_store \
20 |      -H "Content-Type: application/json" \
21 |      -d '{
22 |           "message": {
23 |             "data": "eyJwYWdlX2NvbnRlbnQiOiJUZXN0IGNvbnRlbnQiLCAibWV0YWRhdGEiOnsic291cmNlIjoidGVzdF9zb3VyY2UifX0=",
24 |             "attributes": {
25 |               "namespace": "sample_vector",
26 |               "return_chunks": true
27 |             }
28 |           }
29 |         }'
30 | ```
31 | 


--------------------------------------------------------------------------------
/src/sunholo/chunker/__init__.py:
--------------------------------------------------------------------------------
1 | from .pubsub import data_to_embed_pubsub
2 | from .azure import data_to_embed_azure
3 | from .process_chunker_data import direct_file_to_embed
4 | 


--------------------------------------------------------------------------------
/src/sunholo/chunker/encode_metadata.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | import base64
 3 | import json
 4 | from datetime import datetime, timezone
 5 | 
 6 | from ..custom_logging import log
 7 | 
 8 | def create_metadata(vac, metadata):
 9 |     now_utc = datetime.now(timezone.utc)
10 |     formatted_time = now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
11 | 
12 |     # Default metadata if none provided
13 |     default_metadata = {"vector_name": vac, "source": "sunholo-cli", "eventTime": formatted_time}
14 | 
15 |     try:
16 |         # Merge default metadata with provided metadata
17 |         if metadata:
18 |             if not isinstance(metadata, dict):
19 |                 metadata = json.loads(metadata)
20 |         else:
21 |             metadata = {}    
22 |     except Exception as err:
23 |         log.error(f"[bold red]ERROR: metadata not parsed: {err} for {metadata}")
24 | 
25 |     # Update metadata with default values if not present
26 |     metadata.update(default_metadata)
27 | 
28 |     return metadata
29 | 
30 | def encode_data(vac, content, metadata=None, local_chunks=False):
31 | 
32 |     metadata = create_metadata(vac, metadata)
33 | 
34 |     # Encode the content (URL)
35 |     if isinstance(content, str):
36 |         message_data = base64.b64encode(content.encode('utf-8')).decode('utf-8')
37 |     else:
38 |         raise ValueError(f"Unsupported content type: {type(content)}")
39 | 
40 |     now_utc = datetime.now(timezone.utc)
41 |     formatted_time = now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
42 | 
43 |     # Construct the message dictionary
44 |     messageId = str(uuid.uuid4())
45 |     message = {
46 |         "message": {
47 |             "data": message_data,
48 |             "messageId": messageId,
49 |             "publishTime": formatted_time,
50 |             "attributes": {
51 |                 "namespace": vac,
52 |                 "return_chunks": str(local_chunks).lower()
53 |             },
54 |         }
55 |     }
56 | 
57 |     # Merge metadata with attributes
58 |     message["message"]["attributes"].update(metadata)
59 | 
60 |     #console.print()
61 |     #console.print(f"Sending message: {messageId} with metadata:")
62 |     #console.print(f"{message['message']['attributes']}")
63 | 
64 |     return message


--------------------------------------------------------------------------------
/src/sunholo/chunker/images.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import datetime
 3 | import tempfile
 4 | import os
 5 | from ..gcs.add_file import add_file_to_gcs, get_image_file_name
 6 | from ..custom_logging import log
 7 | from ..utils.gcp import is_running_on_gcp
 8 | 
 9 | 
10 | def upload_doc_images(metadata):
11 |     image_base64 = metadata.get('image_base64')
12 |     # upload an image to the objectId/img folder
13 |     if image_base64 and len(image_base64) > 100:
14 |         image_data = base64.b64decode(image_base64)
15 | 
16 |         # Determine the file extension based on the MIME type
17 |         mime_type = metadata.get("image_mime_type", "")
18 |         object_id = metadata.get("objectId", "image")
19 |         log.info(f"Found image_base64 for {object_id}")
20 |         timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
21 |         image_path = get_image_file_name(object_id, image_name=timestamp, mime_type=mime_type)
22 |         
23 |         # Write image data to a temporary file
24 |         with tempfile.NamedTemporaryFile(mode='wb', delete=False) as temp_image:
25 |             temp_image.write(image_data)
26 |             temp_image.flush()  # Make sure all data is written to the file
27 | 
28 |         temp_image_path = temp_image.name
29 | 
30 |         # wipe this so it doesn't get stuck in loop
31 |         metadata["image_base64"] = None
32 |         metadata["uploaded_to_bucket"] = True
33 | 
34 |         if is_running_on_gcp():
35 |             # Use the provided function to upload the file to GCS
36 |             image_gsurl = add_file_to_gcs(
37 |                 filename=temp_image_path,
38 |                 vector_name=metadata["vector_name"],
39 |                 bucket_name=metadata["bucket_name"],
40 |                 metadata=metadata,
41 |                 bucket_filepath=image_path
42 |             )
43 |             os.remove(temp_image.name)
44 |             log.info(f"Uploaded image to GCS: {image_gsurl}")
45 | 
46 |             return image_gsurl
47 |         
48 |         else:
49 |             #TODO: other blob storage
50 |             return None
51 | 
52 |         
53 | 
54 | 


--------------------------------------------------------------------------------
/src/sunholo/chunker/pdfs.py:
--------------------------------------------------------------------------------
 1 | #   Copyright [2024] [Holosun ApS]
 2 | #
 3 | #   Licensed under the Apache License, Version 2.0 (the "License");
 4 | #   you may not use this file except in compliance with the License.
 5 | #   You may obtain a copy of the License at
 6 | #
 7 | #       http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #   Unless required by applicable law or agreed to in writing, software
10 | #   distributed under the License is distributed on an "AS IS" BASIS,
11 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #   See the License for the specific language governing permissions and
13 | #   limitations under the License.
14 | import os
15 | import pathlib
16 | from ..custom_logging import log
17 | from ..langchain_types import Document
18 | 
19 | def split_pdf_to_pages(pdf_path, temp_dir):
20 | 
21 |     log.info(f"Splitting PDF {pdf_path} into pages...")
22 | 
23 |     pdf_path = pathlib.Path(pdf_path)
24 |     from pypdf import PdfReader, PdfWriter
25 |     pdf = PdfReader(pdf_path)
26 | 
27 |     log.info(f"PDF file {pdf_path} contains {len(pdf.pages)} pages")
28 | 
29 |     # Get base name without extension
30 |     basename = os.path.splitext(os.path.basename(pdf_path))[0]
31 | 
32 |     page_files = []
33 |     
34 |     if len(pdf.pages) == 1:
35 |         #log.debug(f"Only one page in PDF {pdf_path} - sending back")
36 |         return [str(pdf_path)]
37 |     
38 |     for page in range(len(pdf.pages)):
39 |         pdf_writer = PdfWriter()
40 |         pdf_writer.add_page(pdf.pages[page])
41 |         
42 |         page_str = "{:02d}".format(page + 1)  
43 |         output_filename = pathlib.Path(temp_dir, f'{basename}_p{page_str}.pdf')
44 | 
45 |         with open(output_filename, 'wb') as out:
46 |             pdf_writer.write(out)
47 | 
48 |         log.info(f'Created PDF page: {output_filename}')
49 |         page_files.append(str(output_filename))
50 | 
51 |     log.info(f"Split PDF {pdf_path} into {len(page_files)} pages...")
52 |     return page_files
53 | 
54 | def read_pdf_file(pdf_path, metadata):
55 |     from pypdf import PdfReader
56 |     log.info(f"Trying local PDF parsing.  Reading PDF {pdf_path}...")
57 | 
58 |     pdf_path = pathlib.Path(pdf_path)
59 |     
60 |     pdf = PdfReader(pdf_path)
61 | 
62 |     try:
63 |         text = ""
64 |         for page in pdf.pages:
65 |             text += page.extract_text() + "\n"
66 |     except Exception as err:
67 |         log.warning(f"Could not extract PDF via pypdf ERROR - {str(err)}")
68 |         return None
69 |     
70 |     if len(text) < 10:
71 |         log.info(f"Could not read PDF {pdf_path} via pypdf - too short, only got {text}")
72 |         return None
73 |     
74 |     log.info(f"Successfully read PDF {pdf_path}...")
75 |     return Document(page_content=text, metadata=metadata)


--------------------------------------------------------------------------------
/src/sunholo/chunker/publish.py:
--------------------------------------------------------------------------------
 1 | from ..custom_logging import log
 2 | from ..pubsub import PubSubManager
 3 | from ..utils.parsers import contains_url, extract_urls
 4 | from ..utils.gcp_project import get_gcp_project
 5 | from ..langchain_types import Document
 6 |     
 7 | def publish_if_urls(the_content, vector_name):
 8 |     """
 9 |     Extracts URLs and puts them in a queue for processing on PubSub
10 |     """
11 |     if contains_url(the_content):
12 |         log.info("Detected http://")
13 | 
14 |         urls = extract_urls(the_content)
15 |             
16 |         for url in urls:
17 |             publish_text(url, vector_name)
18 | 
19 | 
20 | def publish_chunks(chunks: list[Document], vector_name: str):
21 |     project = get_gcp_project()
22 |     if not project:
23 |         log.warning("No GCP project found for PubSub, no message sent")
24 | 
25 |         return None
26 |     
27 |     log.info("Publishing chunks to embed_chunk")
28 |     
29 |     try:
30 |         pubsub_manager = PubSubManager(vector_name, 
31 |                                     pubsub_topic="chunk-to-pubsub-embed", 
32 |                                     project_id=project)
33 |     except Exception as err:
34 |         log.error(f"PubSubManager init error: Could not publish chunks to {project} {vector_name} pubsub_topic chunk-to-pubsub-embed - {str(err)}")
35 |         
36 |         return None
37 |         
38 |     for chunk in chunks:
39 |         # Convert chunk to string, as Pub/Sub messages must be strings or bytes
40 |         chunk_str = chunk.json()
41 |         if len(chunk_str) < 10:
42 |             log.warning(f"Not publishing {chunk_str} as too small < 10 chars")
43 |             continue
44 |         log.info(f"Publishing chunk: {chunk_str}")
45 |         pubsub_manager.publish_message(chunk_str)
46 |     
47 | 
48 | def publish_text(text:str, vector_name: str):
49 |     project = get_gcp_project()
50 |     if not project:
51 |         log.warning("No GCP project found for PubSub, no message sent")
52 | 
53 |         return None
54 |     
55 |     log.info(f"Publishing text: {text} to app-to-pubsub-chunk")
56 |     pubsub_manager = PubSubManager(vector_name, 
57 |                                    pubsub_topic="app-to-pubsub-chunk",
58 |                                    project_id=project)
59 |     
60 |     pubsub_manager.publish_message(text)
61 | 
62 | def process_docs_chunks_vector_name(chunks, vector_name, metadata):
63 |     project = get_gcp_project()
64 |     if not project:
65 |         log.warning("No GCP project found for PubSub, no message sent")
66 | 
67 |         return None
68 |         
69 |     pubsub_manager = PubSubManager(vector_name, 
70 |                                    pubsub_topic="pubsub_state_messages",
71 |                                    project_id=project)
72 |     if chunks is None:
73 |         log.info("No chunks found")
74 |         pubsub_manager.publish_message(f"No chunks for: {metadata} to {vector_name} embedding")
75 |         return None
76 |         
77 |     publish_chunks(chunks, vector_name=vector_name)
78 | 
79 |     msg = f"data_to_embed_pubsub published chunks with metadata: {metadata}"
80 | 
81 |     log.info(msg)
82 |     
83 |     pubsub_manager.publish_message(f"Sent doc chunks with metadata: {metadata} to {vector_name} embedding")
84 | 
85 |     return metadata   


--------------------------------------------------------------------------------
/src/sunholo/chunker/pubsub.py:
--------------------------------------------------------------------------------
 1 | #   Copyright [2024] [Holosun ApS]
 2 | #
 3 | #   Licensed under the Apache License, Version 2.0 (the "License");
 4 | #   you may not use this file except in compliance with the License.
 5 | #   You may obtain a copy of the License at
 6 | #
 7 | #       http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #   Unless required by applicable law or agreed to in writing, software
10 | #   distributed under the License is distributed on an "AS IS" BASIS,
11 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #   See the License for the specific language governing permissions and
13 | #   limitations under the License.
14 | 
15 | from ..custom_logging import log
16 | from ..pubsub import process_pubsub_message
17 | from .process_chunker_data import process_chunker_data
18 | 
19 | def data_to_embed_pubsub(data: dict):
20 |     """Triggered from a message on a Cloud Pub/Sub topic.
21 |     Args:
22 |          data JSON
23 |     """
24 | 
25 |     message_data, metadata, vector_name = process_pubsub_message(data)
26 | 
27 |     return process_chunker_data(message_data, metadata, vector_name)
28 | 
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/src/sunholo/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/src/sunholo/cli/__init__.py


--------------------------------------------------------------------------------
/src/sunholo/cli/deploy.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from subprocess import Popen
 3 | from ..utils.config import load_all_configs
 4 | 
 5 | def deploy_vac(args):
 6 |     """
 7 |     Deploys the VAC by running a Flask app locally.
 8 |     """
 9 |     print(f"Deploying VAC: {args.vac_name} locally")
10 | 
11 |     # Load the vacConfig
12 |     configs_by_kind = load_all_configs()
13 |     vac_config = configs_by_kind.get('vacConfig', {}).get('vac', {}).get(args.vac_name)
14 | 
15 |     if not vac_config:
16 |         raise ValueError(f"No configuration found for VAC: {args.vac_name}")
17 | 
18 |     # Assuming the Flask app is in 'app.py' within the config path
19 |     app_path = os.path.join(args.config_path, 'app.py')
20 |     if not os.path.exists(app_path):
21 |         raise ValueError(f"app.py not found in {args.config_path}")
22 | 
23 |     print(f"Running Flask app from {app_path}")
24 | 
25 |     # Run the Flask app
26 |     command = ["python", app_path]
27 |     print(f"Running Flask app with command: {' '.join(command)}")
28 |     process = Popen(command)
29 |     process.communicate()
30 | 
31 | def setup_deploy_subparser(subparsers):
32 |     """
33 |     Sets up an argparse subparser for the 'deploy' command.
34 | 
35 |     Example command:
36 |     ```bash
37 |     sunholo deploy "vac_name" --config_path .
38 |     ```
39 |     """
40 |     deploy_parser = subparsers.add_parser('deploy', help='Triggers a deployment of a VAC.')
41 |     deploy_parser.add_argument('vac_name', help='The name of the VAC to deploy.')
42 |     deploy_parser.add_argument('--config_path', default='.', help='Path to the directory containing the config folder `config/` and Flask app `app.py`, defaults to current directory.  Set _CONFIG_FOLDER env var to change config location.')
43 |     deploy_parser.set_defaults(func=deploy_vac)
44 | 


--------------------------------------------------------------------------------
/src/sunholo/cli/merge_texts.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pprint import pprint
 3 | 
 4 | from ..utils.big_context import load_gitignore_patterns, merge_text_files
 5 | 
 6 | def setup_merge_text_subparser(subparsers):
 7 |     """
 8 |     Sets up an argparse subparser for the 'merge-text' command.
 9 | 
10 |     Args:
11 |         subparsers: The subparsers object from argparse.ArgumentParser().
12 |     """
13 |     merge_text_parser = subparsers.add_parser('merge-text', help='Merge text files from a source folder into a single output file.')
14 |     merge_text_parser.add_argument('source_folder', help='Folder containing the text files.')
15 |     merge_text_parser.add_argument('output_file', help='Output file to write the merged text.')
16 |     merge_text_parser.add_argument('--gitignore', help='Path to .gitignore file to exclude patterns.', default=None)
17 |     merge_text_parser.add_argument('--output_tree', action='store_true', help='Set to output the file tree in the console after merging', default=None)
18 |         
19 |     merge_text_parser.set_defaults(func=merge_text_files_command)
20 | 
21 | def merge_text_files_command(args):
22 |     """
23 |     Command to merge text files based on the provided arguments.
24 |     
25 |     Args:
26 |         args: Command-line arguments.
27 |     """
28 |     gitignore_path = os.path.join(args.source_folder, '.gitignore') if not args.gitignore else args.gitignore
29 | 
30 |     if os.path.exists(gitignore_path):
31 |         patterns = load_gitignore_patterns(gitignore_path)
32 |         print(f"Ignoring patterns from {gitignore_path}")
33 |     else:
34 |         patterns = []  # Empty list if no .gitignore
35 | 
36 |     print(f"Merging text files within {args.source_folder} to {args.output_file}")
37 |     file_tree = merge_text_files(args.source_folder, args.output_file, patterns)
38 |     print(f"OK: Merged files available in {args.output_file}")
39 |     if args.output_tree:
40 |         print(f"==File Tree for {args.source_folder}")
41 |         pprint(file_tree)
42 | 


--------------------------------------------------------------------------------
/src/sunholo/cli/sun_rich.py:
--------------------------------------------------------------------------------
1 | from rich.console import Console
2 | 
3 | console = Console()
4 | 


--------------------------------------------------------------------------------
/src/sunholo/cli/swagger.py:
--------------------------------------------------------------------------------
 1 | from ..agents.swagger import generate_swagger
 2 | from ..utils.config import load_all_configs
 3 | from ruamel.yaml import YAML
 4 | import sys
 5 | 
 6 | def cli_swagger(args):
 7 | 
 8 |     configs = load_all_configs()
 9 | 
10 |     vac_config = args.vac_config_path or configs.get('vacConfig')
11 |     agent_config = args.agent_config_path or configs.get('agentConfig')
12 |     if not agent_config:
13 |         raise ValueError('Need an agentConfig path')
14 | 
15 |     if not vac_config:
16 |         raise ValueError('Need a vacConfig path')
17 |     
18 |     swag = generate_swagger(vac_config, agent_config)
19 | 
20 |     yaml = YAML()
21 |     yaml.width = 4096 # to avoid breaking urls
22 |     yaml.indent(mapping=2, sequence=4, offset=2)  # Set indentation levels
23 | 
24 |     yaml.dump(yaml.load(swag), sys.stdout) 
25 | 
26 |     return swag
27 | 
28 | def setup_swagger_subparser(subparsers):
29 |     """
30 |     Sets up an argparse subparser for the 'swagger' command.
31 | 
32 |     By default will use the 'vacConfig' configuration within the folder specified by '_CONFIG_FOLDER'
33 | 
34 |     Example command:
35 |     ```bash
36 |     sunholo swagger --config .
37 |     ```
38 |     """
39 |     deploy_parser = subparsers.add_parser('swagger', help='Create a swagger specification based off a "vacConfig" configuration')
40 |     deploy_parser.add_argument('--vac_config_path', help='Path to the vacConfig file.  Set _CONFIG_FOLDER env var and place file in there to change default config location.')
41 |     deploy_parser.add_argument('--agent_config_path', help='Path to agentConfig file. Set _CONFIG_FOLDER env var and place file in there to change default config location.')
42 |     deploy_parser.set_defaults(func=cli_swagger)
43 | 


--------------------------------------------------------------------------------
/src/sunholo/cli/vertex.py:
--------------------------------------------------------------------------------
 1 | from ..vertex import VertexAIExtensions  
 2 | 
 3 | from .sun_rich import console
 4 | 
 5 | def deploy_extension(args):
 6 |     vex = VertexAIExtensions(args.project)
 7 |     console.rule(f"Creating Vertex extension '{args.display_name}' within '{args.project}'")
 8 | 
 9 |     vex.create_extension(
10 |         args.display_name,
11 |         description=args.description,
12 |         tool_example_file=args.tool_example_file,
13 |         open_api_file=args.open_api_file,
14 |         service_account=args.service_account,
15 |         bucket_name=args.bucket_name
16 |     )
17 |     extensions = vex.list_extensions()
18 |     console.print(extensions)
19 | 
20 | def list_extensions(args):
21 |     vex = VertexAIExtensions(args.project)
22 |     extensions = vex.list_extensions()
23 |     console.print(extensions)
24 | 
25 | def setup_vertex_subparser(subparsers):
26 |     """
27 |     Sets up an argparse subparser for the 'vertex' command.
28 | 
29 |     Args:
30 |         subparsers: The subparsers object to add the 'vertex' subcommand to.
31 |     """
32 |     vertex_parser = subparsers.add_parser('vertex', help='Work with Google Vertex AI')
33 |     vertex_subparsers = vertex_parser.add_subparsers(dest='subcommand', help='Vertex AI subcommands')
34 | 
35 |     create_parser = vertex_subparsers.add_parser('create-extension', help='Create a Vertex AI extension')
36 |     create_parser.add_argument('--display_name', required=True, help='Display name of the extension')
37 |     create_parser.add_argument('--description', required=True, help='Description of the extension')
38 |     create_parser.add_argument('--tool_example_file', required=True, help='Tool example file path')
39 |     create_parser.add_argument('--open_api_file', required=True, help='OpenAPI file path')
40 |     create_parser.add_argument('--service_account', required=True, help='Service account email')
41 |     create_parser.add_argument('--bucket_name', help='Bucket name to upload files to.  Uses EXTENSION_BUCKET env var if not specified')
42 |     create_parser.set_defaults(func=deploy_extension)
43 | 
44 |     list_parser = vertex_subparsers.add_parser('list-extensions', help='List all Vertex AI extensions')
45 |     list_parser.set_defaults(func=list_extensions)


--------------------------------------------------------------------------------
/src/sunholo/components/__init__.py:
--------------------------------------------------------------------------------
1 | from .vectorstore import pick_vectorstore
2 | from .retriever import pick_retriever, load_memories
3 | from .llm import pick_llm, get_embeddings, get_llm, get_llm_chat, pick_embedding
4 | 


--------------------------------------------------------------------------------
/src/sunholo/database/__init__.py:
--------------------------------------------------------------------------------
1 | from .database import setup_supabase
2 | from .database import setup_cloudsql
3 | from .database import setup_database
4 | from .database import return_sources_last24
5 | from .database import delete_row_from_source
6 | from .static_dbs import get_db_directory
7 | from .alloydb_client import AlloyDBClient
8 | 


--------------------------------------------------------------------------------
/src/sunholo/database/lancedb.py:
--------------------------------------------------------------------------------
 1 | from ..custom_logging import log
 2 | 
 3 | def create_lancedb_index(bucket, vector_name, num_partitions=256, num_sub_vectors=96, text_key="text"):
 4 |     try:
 5 |         import lancedb
 6 |         #import tantivy
 7 |     except ImportError:
 8 |         raise ValueError("Could not import lancedb module, install via `pip install sunholo[database]`")
 9 |         
10 |     try:
11 |         db = lancedb.connect(bucket) 
12 |         tbl = db.open_table(vector_name)
13 | 
14 |         tbl.create_index(num_partitions=num_partitions, num_sub_vectors=num_sub_vectors)
15 |         tbl.create_fts_index(text_key)
16 |         log.info(f'Index creation for {vector_name} success')
17 |     except Exception as e: 
18 |         log.info(f'Index creation for {vector_name} failed: {e}')
19 |     
20 |     
21 | 


--------------------------------------------------------------------------------
/src/sunholo/database/sql/sb/create_function.sql:
--------------------------------------------------------------------------------
 1 | CREATE OR REPLACE FUNCTION match_documents_{vector_name}(
 2 |     query_embedding vector({vector_size}), 
 3 |     match_count int DEFAULT 5,
 4 |     filter jsonb DEFAULT '{{}}')
 5 |            RETURNS TABLE(
 6 |                id uuid,
 7 |                content text,
 8 |                metadata jsonb,
 9 |                -- we return matched vectors to enable maximal marginal relevance searches
10 |                embedding vector({vector_size}),
11 |                similarity float)
12 |            LANGUAGE plpgsql
13 |            AS $$
14 |            # variable_conflict use_column
15 |        BEGIN
16 |            RETURN query
17 |            SELECT
18 |                id,
19 |                content,
20 |                metadata,
21 |                embedding,
22 |                1 -({vector_name}.embedding <=> query_embedding) AS similarity
23 |            FROM
24 |                {vector_name}
25 |            where 1 - ({vector_name}.embedding <=> query_embedding) > 0.6 AND metadata @> filter
26 |            ORDER BY
27 |                {vector_name}.embedding <=> query_embedding
28 |            LIMIT match_count;
29 |        END;
30 |        $$;


--------------------------------------------------------------------------------
/src/sunholo/database/sql/sb/create_function_time.sql:
--------------------------------------------------------------------------------
 1 | CREATE OR REPLACE FUNCTION calculate_age_in_days(objectId text, eventTime text)
 2 |     RETURNS float
 3 |     LANGUAGE plpgsql
 4 |     AS $$
 5 | BEGIN
 6 |     RETURN EXTRACT(EPOCH FROM NOW() - TO_TIMESTAMP(COALESCE(SUBSTRING(eventTime FROM 1 FOR 19), SUBSTRING(objectId FROM 14 FOR 13)), 'YYYY-MM-DD"T"HH24:MI:SS')) / (60*60*24);
 7 | END;
 8 | $$;
 9 | 
10 | CREATE OR REPLACE FUNCTION match_documents_{vector_name}(query_embedding vector({vector_size}), match_count int)
11 |     RETURNS TABLE(
12 |         id bigint,
13 |         content text,
14 |         metadata jsonb,
15 |         embedding vector({vector_size}),
16 |         similarity float)
17 |     LANGUAGE plpgsql
18 |     AS $$
19 |     # variable_conflict use_column
20 | BEGIN
21 |     RETURN query
22 |     WITH latest_documents AS (
23 |         SELECT *
24 |         FROM {vector_name}
25 |         WHERE (metadata->>'objectId', TO_TIMESTAMP(COALESCE(SUBSTRING(metadata->>'eventTime' FROM 1 FOR 19), SUBSTRING(metadata->>'objectId' FROM 14 FOR 13)), 'YYYY-MM-DD"T"HH24:MI:SS')) IN (
26 |             SELECT metadata->>'objectId', MAX(TO_TIMESTAMP(COALESCE(SUBSTRING(metadata->>'eventTime' FROM 1 FOR 19), SUBSTRING(metadata->>'objectId' FROM 14 FOR 13)), 'YYYY-MM-DD"T"HH24:MI:SS'))
27 |             FROM {vector_name}
28 |             GROUP BY metadata->>'objectId'
29 |         )
30 |     )
31 |     SELECT
32 |         id,
33 |         content,
34 |         metadata,
35 |         embedding,
36 |         1 -(latest_documents.embedding <=> query_embedding) - calculate_age_in_days(metadata->>'objectId', metadata->>'eventTime') AS similarity
37 |     FROM
38 |         latest_documents
39 |     ORDER BY
40 |         2 * (1 -(latest_documents.embedding <=> query_embedding)) - calculate_age_in_days(metadata->>'objectId', metadata->>'eventTime') DESC,
41 |         TO_TIMESTAMP(COALESCE(SUBSTRING(metadata->>'eventTime' FROM 1 FOR 19), SUBSTRING(metadata->>'objectId' FROM 14 FOR 13)), 'YYYY-MM-DD"T"HH24:MI:SS') DESC
42 |     LIMIT match_count;
43 | END;
44 | $$;
45 | 


--------------------------------------------------------------------------------
/src/sunholo/database/sql/sb/create_table.sql:
--------------------------------------------------------------------------------
1 | -- Create a table to store your documents
2 | create table {vector_name} (
3 | id bigserial primary key,
4 | content text, -- corresponds to Document.pageContent
5 | metadata jsonb, -- corresponds to Document.metadata
6 | embedding vector({vector_size}) -- 1536 works for OpenAI embeddings, change if needed
7 | );
8 | 


--------------------------------------------------------------------------------
/src/sunholo/database/sql/sb/delete_source_row.sql:
--------------------------------------------------------------------------------
1 | DELETE FROM {vector_name}
2 |     WHERE metadata->>'source' = '{source_delete}'


--------------------------------------------------------------------------------
/src/sunholo/database/sql/sb/return_sources.sql:
--------------------------------------------------------------------------------
1 | SELECT DISTINCT metadata->>'source' AS source
2 | FROM {vector_name}
3 | WHERE TO_TIMESTAMP(COALESCE(SUBSTRING(metadata->>'eventTime' FROM 1 FOR 19), SUBSTRING(metadata->>'objectId' FROM 14 FOR 13)), 'YYYY-MM-DD"T"HH24:MI:SS') > NOW() - INTERVAL '{time_period}';
4 | 


--------------------------------------------------------------------------------
/src/sunholo/database/sql/sb/setup.sql:
--------------------------------------------------------------------------------
1 | -- Enable the pgvector extension to work with embedding vectors
2 | create extension vector;
3 | 


--------------------------------------------------------------------------------
/src/sunholo/database/static_dbs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from ..custom_logging import log_folder_location
 3 | 
 4 | def get_db_directory(db_dir='db'):
 5 |     current_script_directory = os.path.dirname(os.path.abspath(__file__))
 6 |     # Get the parent directory of the current script directory
 7 |     parent_directory = os.path.abspath(os.path.join(current_script_directory, os.pardir))
 8 |     db_directory = os.path.join(parent_directory, db_dir)
 9 |     log_folder_location(db_directory)
10 | 
11 |     return db_directory


--------------------------------------------------------------------------------
/src/sunholo/database/uuid.py:
--------------------------------------------------------------------------------
1 | import uuid
2 | 
3 | def generate_uuid_from_object_id(object_id):
4 |     # UUID namespace for example purposes; in a real application, you might choose a fixed namespace
5 |     namespace = uuid.NAMESPACE_URL
6 |     return str(uuid.uuid5(namespace, object_id))


--------------------------------------------------------------------------------
/src/sunholo/discovery_engine/__init__.py:
--------------------------------------------------------------------------------
1 | from .discovery_engine_client import DiscoveryEngineClient
2 | from .get_ai_search_chunks import get_all_chunks, async_get_all_chunks
3 | 


--------------------------------------------------------------------------------
/src/sunholo/discovery_engine/create_new.py:
--------------------------------------------------------------------------------
 1 | from .discovery_engine_client import DiscoveryEngineClient
 2 | from ..utils import ConfigManager
 3 | from ..utils.gcp_project import get_gcp_project
 4 | from ..custom_logging import log
 5 | 
 6 | def create_new_discovery_engine(config:ConfigManager):
 7 | 
 8 |     chunker_config = config.vacConfig("chunker")
 9 | 
10 |     chunk_size = 500
11 |     if chunker_config:
12 |         if "chunk_size" in chunker_config:
13 |             chunk_size = chunker_config["chunk_size"]    
14 | 
15 |     gcp_config = config.vacConfig("gcp_config")
16 |     if not gcp_config:
17 |         log.info("Found no gcp_config in configuration so using get_gcp_project()")
18 |         project_id = get_gcp_project()
19 |     else:
20 |         project_id = gcp_config.get("project_id") or get_gcp_project()
21 |     if not project_id:
22 |         raise ValueError("Could not find project_id in gcp_config or global")
23 |     
24 |     #location = gcp_config.get('location')
25 | 
26 |     de = DiscoveryEngineClient(
27 |                     data_store_id=config.vector_name, 
28 |                     project_id=project_id,
29 |                     # location needs to be 'eu' or 'us' which doesn't work with other configurations
30 |                     #location=location
31 |                     )
32 | 
33 |     new_store = de.create_data_store(chunk_size=chunk_size)
34 | 
35 |     return new_store


--------------------------------------------------------------------------------
/src/sunholo/discovery_engine/search_filter_syntax.txt:
--------------------------------------------------------------------------------
 1 |   # A single expression or multiple expressions that are joined by "AND" or "OR".
 2 |   filter = expression, { " AND " | "OR", expression };
 3 |   # Expressions can be prefixed with "-" or "NOT" to express a negation.
 4 |   expression = [ "-" | "NOT " ],
 5 |     # A parenthetical expression.
 6 |     | "(", expression, ")"
 7 |     # A simple expression applying to a text field.
 8 |     # Function "ANY" returns true if the field exactly matches any of the literals.
 9 |     ( text_field, ":", "ANY", "(", literal, { ",", literal }, ")"
10 |     # A simple expression applying to a numerical field. Function "IN" returns true
11 |     # if a field value is within the range. By default, lower_bound is inclusive and
12 |     # upper_bound is exclusive.
13 |     | numerical_field, ":", "IN", "(", lower_bound, ",", upper_bound, ")"
14 |     # A simple expression that applies to a numerical field and compares with a double value.
15 |     | numerical_field, comparison, double
16 |     # An expression that applies to a geolocation field with text/street/postal address.
17 |     |  geolocation_field, ":", "GEO_DISTANCE(", literal, ",", distance_in_meters, ")"
18 |     # An expression that applies to a geolocation field with latitude and longitude.
19 |     | geolocation_field, ":", "GEO_DISTANCE(", latitude_double, ",", longitude_double, ",", distance_in_meters, ")"
20 |     # Datetime field
21 |     | datetime_field, comparison, literal_iso_8601_datetime_format);
22 |   # A lower_bound is either a double or "*", which represents negative infinity.
23 |   # Explicitly specify inclusive bound with the character 'i' or exclusive bound
24 |   # with the character 'e'.
25 |   lower_bound = ( double, [ "e" | "i" ] ) | "*";
26 |   # An upper_bound is either a double or "*", which represents infinity.
27 |   # Explicitly specify inclusive bound with the character 'i' or exclusive bound
28 |   # with the character 'e'.
29 |   upper_bound = ( double, [ "e" | "i" ] ) | "*";
30 |   # Supported comparison operators.
31 |   comparison = "<=" | "<" | ">=" | ">" | "=";
32 |   # A literal is any double quoted string. You must escape backslash (\) and
33 |   # quote (") characters.
34 |   literal = double quoted string;
35 |   text_field = text field - for example, category;
36 |   numerical_field = numerical field - for example, score;
37 |   geolocation_field = field of geolocation data type - for example home_address, location;
38 |   datetime_field = field of datetime data type - for example creation_date, expires_on;
39 |   literal_iso_8601_datetime_format = either a double quoted string representing ISO 8601 datetime or a numerical field representing microseconds from unix epoch.


--------------------------------------------------------------------------------
/src/sunholo/embedder/README.md:
--------------------------------------------------------------------------------
 1 | # Embedder
 2 | 
 3 | Turn documents into embedded vectors
 4 | 
 5 | 
 6 | ## test
 7 | 
 8 | Sample:
 9 | 
10 | ```
11 | {"page_content": "This is a sample page content. It needs to be at least 100 characters long to pass the test validation.", "metadata": {"vector_name": "sample_vector", "source": "unknown", "eventTime": "2024-04-24T12:00:00Z", "doc_id": "sample_doc_id"}}
12 | ```
13 | That encodes as:
14 | 
15 | ```json
16 | {
17 |   "message": {
18 |     "data": "eyJwYWdlX2NvbnRlbnQiOiAiVGhpcyBpcyBhIHNhbXBsZSBwYWdlIGNvbnRlbnQuIEl0IG5lZWRzIHRvIGJlIGF0IGxlYXN0IDEwMCBjaGFyYWN0ZXJzIGxvbmcgdG8gcGFzcyB0aGUgdGVzdCB2YWxpZGF0aW9uLiIsICJtZXRhZGF0YSI6IHsidmVjdG9yX25hbWUiOiAic2FtcGxlX3ZlY3RvciIsICJzb3VyY2UiOiAidW5rbm93biIsICJldmVudFRpbWUiOiAiMjAyNC0wNC0yNFQxMjowMDowMFoiLCAiZG9jX2lkIjogInNhbXBsZV9kb2NfaWQifX0=",
19 |     "messageId": "123456789",
20 |     "publishTime": "2024-04-24T12:00:00Z"
21 |   }
22 | }
23 | ```
24 | 
25 | ## curl
26 | 
27 | ```sh
28 | export FLASK_URL=https://embedder-url
29 | curl -X POST ${FLASK_URL}/embed_chunk \
30 |      -H "Content-Type: application/json" \
31 |      -d '{
32 |           "message": {
33 |             "data": "eyJwYWdlX2NvbnRlbnQiOiAiVGhpcyBpcyBhIHNhbXBsZSBwYWdlIGNvbnRlbnQuIEl0IG5lZWRzIHRvIGJlIGF0IGxlYXN0IDEwMCBjaGFyYWN0ZXJzIGxvbmcgdG8gcGFzcyB0aGUgdGVzdCB2YWxpZGF0aW9uLiIsICJtZXRhZGF0YSI6IHsidmVjdG9yX25hbWUiOiAic2FtcGxlX3ZlY3RvciIsICJzb3VyY2UiOiAidW5rbm93biIsICJldmVudFRpbWUiOiAiMjAyNC0wNC0yNFQxMjowMDowMFoiLCAiZG9jX2lkIjogInNhbXBsZV9kb2NfaWQifX0=",
34 |             "messageId": "123456789",
35 |             "publishTime": "2024-04-24T12:00:00Z"
36 |           }
37 |         }'
38 | ```


--------------------------------------------------------------------------------
/src/sunholo/embedder/__init__.py:
--------------------------------------------------------------------------------
1 | from .embed_chunk import embed_pubsub_chunk
2 | 


--------------------------------------------------------------------------------
/src/sunholo/excel/__init__.py:
--------------------------------------------------------------------------------
1 | from .plugin import excel_plugin, setup_excel_subparser
2 | 


--------------------------------------------------------------------------------
/src/sunholo/excel/call_vac.vba.template:
--------------------------------------------------------------------------------
 1 | Option Explicit
 2 | 
 3 | ' Custom Excel function to call a Python command directly via xlwings
 4 | ' The function takes two arguments: cell content and vac_name (API endpoint identifier)
 5 | ' If vac_name is not provided, it defaults to "aitana"
 6 | Function MULTIVAC(inputCell As Range, Optional vac_name As String = "") As String
 7 |     Dim result As Variant
 8 |     Dim envPath As String
 9 |     Dim defaultVacName As String
10 |     
11 |     ' Set the default vac_name if not provided
12 |     If vac_name = "" Then
13 |         defaultVacName = "aitana"
14 |     Else
15 |         defaultVacName = vac_name
16 |     End If
17 |     
18 |     ' Build the Python code as a string
19 |     Dim pythonCode As String
20 |     pythonCode = "from sunholo.excel import excel_plugin;" & _
21 |                  "result = excel_plugin('" & inputCell.Value & "', '" & defaultVacName & "')"
22 |     
23 |     ' Execute the Python code using xlwings
24 |     result = RunPython(pythonCode)
25 |     
26 |     ' Return the result to the Excel cell
27 |     MULTIVAC = result
28 | End Function


--------------------------------------------------------------------------------
/src/sunholo/gcs/__init__.py:
--------------------------------------------------------------------------------
1 | from .download_url import construct_download_link, get_bytes_from_gcs, get_image_from_gcs
2 | 


--------------------------------------------------------------------------------
/src/sunholo/gcs/download_folder.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | try:
 4 |     from google.cloud import storage 
 5 | except ImportError:
 6 |     storage = None
 7 | 
 8 | from ..custom_logging import log
 9 | 
10 | def download_files_from_gcs(bucket_name: str, source_folder: str, destination_folder: str=None):
11 |     """
12 |     Download all files from a specified folder in a Google Cloud Storage bucket to a local directory.
13 |     
14 |     Parameters:
15 |     - bucket_name: The name of the GCS bucket.
16 |     - source_folder: The folder (prefix) in the GCS bucket to download files from.
17 |     - destination_folder: The local directory to save the downloaded files, or os.getcwd() if None
18 |     """
19 |     try:
20 |         storage_client = storage.Client()
21 |     except Exception as err:
22 |         log.error(f"Error creating storage client: {str(err)}")
23 |         return None
24 | 
25 |     # Get the bucket
26 |     bucket = storage_client.bucket(bucket_name)
27 | 
28 |     # List blobs in the specified folder
29 |     blobs = bucket.list_blobs(prefix=source_folder)
30 | 
31 |     if not destination_folder:
32 |         destination_folder = os.getcwd()
33 | 
34 |     # Ensure the destination folder exists
35 |     os.makedirs(destination_folder, exist_ok=True)
36 | 
37 |     for blob in blobs:
38 |         # Skip if the blob is a directory
39 |         if blob.name.endswith('/'):
40 |             continue
41 |         
42 |         # Define the local path
43 |         local_path = os.path.join(destination_folder, os.path.relpath(blob.name, source_folder))
44 |         
45 |         # Ensure the local folder exists
46 |         os.makedirs(os.path.dirname(local_path), exist_ok=True)
47 | 
48 |         # Download the blob to a local file
49 |         blob.download_to_filename(local_path)
50 |         log.info(f"Downloaded {blob.name} to {local_path}")


--------------------------------------------------------------------------------
/src/sunholo/gcs/extract_and_sign.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import asyncio
 3 | 
 4 | from .download_url import construct_download_link
 5 | from ..utils.mime import guess_mime_type
 6 | from ..custom_logging import log
 7 | 
 8 | async def extract_gs_uris_and_sign(content, pattern=r'gs://[^\n]+\.(?:png|jpg|jpeg|pdf|txt|md)'):
 9 | 
10 |     gs_matches = re.findall(pattern, content)
11 |     unique_gs_matches = set(gs_matches)
12 |     image_signed_urls = []
13 |     if unique_gs_matches:
14 |         log.info(f"Got gs matches: {unique_gs_matches}")
15 | 
16 |         async def process_link(gs_url):
17 |             log.info(f"Processing {gs_url}")
18 |             link, encoded_filename, signed = await asyncio.to_thread(construct_download_link, gs_url)
19 |             if signed:
20 |                 try:
21 |                     mime_type = guess_mime_type(gs_url)
22 |                 except Exception as err:
23 |                     log.error(f"Could not find mime_type for {link} - {str(err)}")
24 |                     mime_type = "application/octet-stream"
25 |                     
26 |                 return {
27 |                     "original": gs_url,
28 |                     "link": link,
29 |                     "name": encoded_filename,
30 |                     "mime": mime_type,
31 |                     "signed": signed
32 |                 }
33 |             else:
34 |                 log.info(f"Could not sign this GS_URI: {gs_url} - skipping")
35 |                 return None
36 | 
37 |         # Gather all tasks and run them concurrently
38 |         image_signed_urls = await asyncio.gather(*(process_link(gs_url) for gs_url in unique_gs_matches))
39 | 
40 |     log.info(f"found files to msg: {image_signed_urls}")
41 |     return image_signed_urls


--------------------------------------------------------------------------------
/src/sunholo/gcs/metadata.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from google.cloud import storage
 3 | except ImportError:
 4 |     storage = None
 5 | 
 6 | from ..custom_logging import log
 7 | 
 8 | 
 9 | def get_object_metadata(bucket_name, object_name):
10 | 
11 |     if not storage:
12 |         return None
13 | 
14 |     if bucket_name is None or object_name is None:
15 |         log.warning("Got invalid bucket name and object name")
16 |         return None
17 |     try:
18 |         storage_client = storage.Client()
19 |     except Exception as e:
20 |         log.warning(f"Could not connect to Google Cloud Storage for metadata: {str(e)}")
21 |         return None
22 | 
23 |     bucket = storage_client.bucket(bucket_name)
24 |     blob = bucket.blob(object_name)
25 | 
26 |     # Fetch the blob's metadata
27 |     blob.reload()  # Make sure to reload the blob to get the most up-to-date metadata
28 | 
29 |     # Access custom metadata
30 |     custom_metadata = blob.metadata
31 | 
32 |     log.info(f"Custom Metadata for {object_name}: {custom_metadata}")
33 |     return custom_metadata
34 | 
35 | def check_gcs_file_size(source: str) -> int:
36 |     """
37 |     Check the size of a file in Google Cloud Storage without downloading the entire file.
38 |     
39 |     Args:
40 |         source: str The Google Cloud Storage URI of the file to check (e.g., 'gs://bucket_name/file_name').
41 |         
42 |     Returns:
43 |         int: The size of the file in bytes, or -1 if the size cannot be determined.
44 |     """
45 |     from google.cloud import storage
46 |     
47 |     try:
48 |         # Parse the GCS URI
49 |         if not source.startswith('gs://'):
50 |             log.warning(f"Invalid GCS URI format: {source}")
51 |             return -1
52 |             
53 |         bucket_name, blob_path = source[5:].split('/', 1)
54 |         
55 |         # Create a client and get the bucket
56 |         storage_client = storage.Client()
57 |         bucket = storage_client.bucket(bucket_name)
58 |         
59 |         # Get the blob (file) and retrieve its metadata
60 |         blob = bucket.blob(blob_path)
61 |         blob.reload()  # Fetch the latest metadata
62 |         
63 |         return blob.size
64 |     except Exception as err:
65 |         log.error(f"Error checking file size for {source}: {str(err)}")
66 |         return -1


--------------------------------------------------------------------------------
/src/sunholo/genai/__init__.py:
--------------------------------------------------------------------------------
1 | from .process_funcs_cls import GenAIFunctionProcessor
2 | from .safety import genai_safety
3 | from .init import init_genai
4 | from .file_handling import download_gcs_upload_genai, construct_file_content
5 | from .genaiv2 import GoogleAI, GoogleAIConfig
6 | 


--------------------------------------------------------------------------------
/src/sunholo/genai/images.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from ..utils.mime import guess_mime_type
 3 | from ..gcs import get_bytes_from_gcs
 4 | from ..custom_logging import log
 5 | import os
 6 | import tempfile
 7 | try:
 8 |     import google.generativeai as genai
 9 | except ImportError:
10 |     genai = None
11 | 
12 | def extract_gs_images_and_genai_upload(content: str, limit:int=20):
13 |     # Regular expression to find gs:// URLs 
14 |     pattern = r'gs://[^ ]+\.(?:png|jpg|jpeg|pdf)'
15 | 
16 |     gs_matches = re.findall(pattern, content)
17 |     # only 20 images by default
18 |     unique_gs_matches = list(set(gs_matches))[:limit] 
19 |     output_gs_images = []
20 |     
21 |     for gs_uri in unique_gs_matches:
22 |         mime_type = guess_mime_type(gs_uri)
23 |         if mime_type is None:
24 |             continue
25 |         
26 |         log.info(f"Getting bytes from GCS: {gs_uri}")
27 |         image_bytes = get_bytes_from_gcs(gs_uri)
28 |         if image_bytes is None:
29 |             continue
30 | 
31 |         # Get the basename from the gs_uri to use as the file name
32 |         file_name = os.path.basename(gs_uri)
33 | 
34 |         # Create a temporary directory and write the file with the basename
35 |         with tempfile.TemporaryDirectory() as temp_dir:
36 |             temp_file_path = os.path.join(temp_dir, file_name)
37 |             
38 |             # Write the BytesIO object to the file
39 |             with open(temp_file_path, 'wb') as temp_file:
40 |                 temp_file.write(image_bytes)
41 | 
42 |             # Pass the temporary file's path to the upload function
43 |             try:
44 |                 uploaded_file = genai.upload_file(temp_file_path)
45 |                 output_gs_images.append(uploaded_file)
46 |             except Exception as e:
47 |                 log.error(f"Error adding {gs_uri} to base64: {str(e)}")
48 |     
49 |     return output_gs_images
50 | 


--------------------------------------------------------------------------------
/src/sunholo/genai/init.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def init_genai():
 4 |     """
 5 |     There are some features that come to the google.generativeai first, 
 6 |     which needs to be authenticated via a GOOGLE_API_KEY environment variable, 
 7 |     created via the Google AI Console at https://aistudio.google.com/app/apikey 
 8 |     """
 9 |     try:
10 |         import google.generativeai as genai
11 |     except ImportError:
12 |         raise ImportError("google.generativeai not installed, please install via 'pip install sunholo'[gcp]'")
13 |     
14 |     GOOGLE_API_KEY=os.getenv('GOOGLE_API_KEY')
15 |     if not GOOGLE_API_KEY:
16 |         raise ValueError("google.generativeai needs GOOGLE_API_KEY set in environment variable")
17 | 
18 |     genai.configure(api_key=GOOGLE_API_KEY)


--------------------------------------------------------------------------------
/src/sunholo/genai/safety.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def genai_safety(threshold: str = "BLOCK_ONLY_HIGH"):
 3 |     """
 4 |     BLOCK_ONLY_HIGH - block when high probability of unsafe content is detected
 5 |     BLOCK_MEDIUM_AND_ABOVE - block when medium or high probability of content is detected
 6 |     BLOCK_LOW_AND_ABOVE - block when low, medium, or high probability of unsafe content is detected
 7 |     BLOCK_NONE - no block, but need to be on an allow list to use
 8 |     """
 9 |     from google.generativeai.types import (
10 |         HarmCategory,
11 |         HarmBlockThreshold
12 |     )
13 | 
14 |     if threshold == 'BLOCK_ONLY_HIGH':
15 |         thresh = HarmBlockThreshold.BLOCK_ONLY_HIGH
16 |     elif threshold == 'BLOCK_MEDIUM_AND_ABOVE':
17 |         thresh = HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
18 |     elif threshold == 'BLOCK_LOW_AND_ABOVE':
19 |         thresh = HarmBlockThreshold.BLOCK_LOW_AND_ABOVE
20 |     elif threshold == 'BLOCK_NONE':
21 |         thresh = HarmBlockThreshold.BLOCK_NONE
22 |     else:
23 |         raise ValueError("Invalid threshold")
24 | 
25 |     safety_settings = {
26 |         HarmCategory.HARM_CATEGORY_HARASSMENT: thresh,
27 |         HarmCategory.HARM_CATEGORY_HATE_SPEECH: thresh,
28 |         HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: thresh,
29 |         HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: thresh,
30 |     }
31 | 
32 |     return safety_settings


--------------------------------------------------------------------------------
/src/sunholo/invoke/__init__.py:
--------------------------------------------------------------------------------
1 | from .invoke_vac_utils import invoke_vac
2 | from .direct_vac_func import direct_vac, direct_vac_stream, async_direct_vac, async_direct_vac_stream
3 | from .async_class import AsyncTaskRunner
4 | 


--------------------------------------------------------------------------------
/src/sunholo/invoke/invoke_vac_utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import requests
 3 | 
 4 | from pathlib import Path
 5 | 
 6 | from ..custom_logging import log
 7 | 
 8 | def invoke_vac(service_url, data, vector_name=None, metadata=None, is_file=False):
 9 |     """
10 |     This lets a VAC be invoked by directly calling its URL, used for file uploads
11 |     """
12 |     try:
13 |         if is_file:
14 |             log.info("Uploading file...")
15 |             # Handle file upload
16 |             if not isinstance(data, Path) or not data.is_file():
17 |                 raise ValueError("For file uploads, 'data' must be a Path object pointing to a valid file.")
18 |             
19 |             files = {
20 |                 'file': (data.name, open(data, 'rb')),
21 |             }
22 |             form_data = {
23 |                 'vector_name': vector_name,
24 |                 'metadata': json.dumps(metadata) if metadata else '',
25 |             }
26 | 
27 |             response = requests.post(service_url, files=files, data=form_data)
28 |         else:
29 |             log.info("Uploading JSON...")
30 |             try:
31 |                 if isinstance(data, dict):
32 |                     json_data = data
33 |                 else:
34 |                     json_data = json.loads(data)
35 |             except json.JSONDecodeError as err:
36 |                 log.error(f"ERROR: invalid JSON: {str(err)}")
37 |                 raise err
38 |             except Exception as err:
39 |                 log.error(f"ERROR: could not parse JSON: {str(err)}")
40 |                 raise err
41 | 
42 |             log.debug(f"Sending data: {data} or json_data: {json.dumps(json_data)}")
43 |             # Handle JSON data
44 |             headers = {"Content-Type": "application/json"}
45 |             response = requests.post(service_url, headers=headers, data=json.dumps(json_data))
46 | 
47 |         response.raise_for_status()
48 | 
49 |         the_data = response.json()
50 |         log.info(the_data)
51 | 
52 |         return the_data
53 |     
54 |     except requests.exceptions.RequestException as e:
55 |         log.error(f"[bold red]ERROR: Failed to invoke VAC: {e}[/bold red]")
56 |         raise e
57 |     except Exception as e:
58 |         log.error(f"[bold red]ERROR: An unexpected error occurred: {e}[/bold red]")
59 |         raise e
60 | 


--------------------------------------------------------------------------------
/src/sunholo/langchain_types.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, asdict
 2 | import json
 3 | from typing import Dict, Any
 4 | 
 5 | # Note: Moved TYPE_CHECKING to only be used where needed
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | @dataclass
 9 | class Document:
10 |     """A simple document class with content and metadata.
11 |     
12 |     Used for storing text content and associated metadata when not using LangChain.
13 |     Maintains the same basic interface (page_content and metadata) for compatibility.
14 |     
15 |     Using @dataclass makes it automatically serializable and provides nice defaults.
16 |     """
17 |     page_content: str
18 |     metadata: Dict[str, Any] = None
19 | 
20 |     def __post_init__(self) -> None:
21 |         """Initialize metadata if None."""
22 |         if self.metadata is None:
23 |             self.metadata = {}
24 | 
25 |     def to_dict(self) -> Dict[str, Any]:
26 |         """Convert to dictionary."""
27 |         return asdict(self)
28 | 
29 |     @classmethod
30 |     def from_dict(cls, data: Dict[str, Any]) -> "Document":
31 |         """Create from dictionary."""
32 |         return cls(**data)
33 | 
34 |     def json(self) -> str:
35 |         """Convert to JSON string - for compatibility with LangChain's Document."""
36 |         return json.dumps(self.to_dict())
37 | 
38 | # Move the type checking import and annotation inside the function
39 | def convert_to_langchain_doc(doc: Document) -> Any:  # Remove Union and LangchainDocument from return type
40 |     """Convert our Document to a LangChain Document.
41 |     
42 |     Returns Any when LangChain isn't available to avoid type errors.
43 |     Only imports LangChain when the function is actually called.
44 |     """
45 |     try:
46 |         from langchain.schema import Document as LangchainDocument
47 |         return LangchainDocument(
48 |             page_content=doc.page_content,
49 |             metadata=doc.metadata
50 |         )
51 |     except ImportError:
52 |         raise ImportError("LangChain is required for this conversion. Please install langchain.")


--------------------------------------------------------------------------------
/src/sunholo/langfuse/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/src/sunholo/langfuse/__init__.py


--------------------------------------------------------------------------------
/src/sunholo/langfuse/callback.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from typing import Dict, Any
 3 | from ..custom_logging import log
 4 | 
 5 | try:
 6 |     from langfuse.callback import CallbackHandler
 7 | except ImportError:
 8 |     CallbackHandler = None
 9 | 
10 | from ..utils.version import sunholo_version
11 | 
12 | def create_langfuse_callback(**kwargs):
13 | 
14 |     if not CallbackHandler:
15 |         log.warning("No CallbackHandler found, install langfuse? `pip install langfuse`")
16 |         return None
17 | 
18 |     # TODO: maybe use langfuse.trace here instead later
19 |     langfuse_handler = CallbackHandler(**kwargs)
20 | 
21 |     return langfuse_handler
22 | 
23 | def add_langfuse_tracing(
24 |         config: Dict[str, Any],
25 |         request) -> Dict[str, Any]:
26 |     """
27 |     Config modifier function to add a tracing callback
28 |     By @jmaness https://github.com/langchain-ai/langserve/issues/311
29 | 
30 |     :param config: config dict
31 |     :param request: HTTP request
32 |     :return: updated config
33 |     """
34 | 
35 | 
36 |     log.debug(f"add_langfuse_tracing config: {config} {request}")
37 | 
38 |     if "callbacks" not in config:
39 |         config["callbacks"] = []
40 | 
41 |     user_id = request.headers.get("X-User-ID")
42 |     session_id = request.headers.get("X-Session-ID")
43 |     message_source = request.headers.get("X-Message-Source")
44 | 
45 |     tags = [sunholo_version()]
46 |     if message_source:
47 |         tags.append(message_source)
48 | 
49 |     log.info(f"Adding langfuse tags to trace: {tags}")
50 |     langfuse_handler = create_langfuse_callback(
51 |         user_id = user_id,
52 |         session_id = session_id,
53 |         tags = tags
54 |     )
55 |     config["callbacks"].extend([langfuse_handler])
56 | 
57 |     log.debug(f"add_langfuse_tracing modified config {config}")
58 |     return config
59 | 
60 | 
61 | #add_routes(app, my_chain,
62 | #           path="/my-chain",
63 | #           per_req_config_modifier=add_langfuse_tracing)


--------------------------------------------------------------------------------
/src/sunholo/langfuse/prompts.py:
--------------------------------------------------------------------------------
 1 | from ..custom_logging import log
 2 | from ..utils import ConfigManager
 3 | import threading
 4 | try:
 5 |     from langfuse import Langfuse
 6 |     langfuse = Langfuse()
 7 | except ImportError:
 8 |     langfuse = None
 9 | 
10 | # Load the YAML file
11 | def load_prompt_from_yaml(key, prefix="sunholo", load_from_file=False, f_string=True):
12 |     """
13 |     Returns a string you can use with prompts.
14 | 
15 |     If load_from_file=False, by default it will try to load from Langfuse, if fails (which is laggy so not ideal) then load from file.
16 | 
17 |     Prompts on Langfuse should be specified with a name with {prefix}-{key} e.g. "sunholo-hello"
18 |     
19 |     Prompts in files will use yaml:
20 | 
21 |     ```yaml
22 |     kind: promptConfig
23 |     apiVersion: v1
24 |     prompts:
25 |       sunholo:
26 |         hello: |
27 |             Say hello to {name} 
28 |     ```
29 | 
30 |     And load via utils.ConfigManager:
31 | 
32 |     ```python
33 |     # equivalent to load_prompt_from_yaml("hello", load_from_file=True)
34 |     config = ConfigManager("sunholo")
35 |     config.promptConfig("hello")
36 |     ```
37 | 
38 |     If f_string is True will be in a Langchain style prompt e.g. { one brace }
39 |     If f_string is False will be Langfuse style prompt e.g. {{ two braces }} - see https://langfuse.com/docs/prompts/get-started
40 | 
41 |     Example:
42 | 
43 |     ```python
44 |     from sunholo.langfuse.prompts import load_prompt_from_yaml
45 |     # f_string
46 |     hello_template = load_prompt_from_yaml("hello")
47 |     hello_template.format(name="Bob")
48 | 
49 |     #langfuse style
50 |     hello_template = load_prompt_from_yaml("hello", f_string=False)
51 |     hello_template.compile(name="Bob")
52 | 
53 |     # if prompt not available on langfuse, will attempt to load from local promptConfig file
54 |     hello_template = load_prompt_from_yaml("hello", load_from_file=True)
55 | 
56 |     ```
57 | 
58 |     """
59 |     # Initialize Langfuse client
60 |     if load_from_file:
61 |         config = ConfigManager(prefix)
62 |         
63 |         return config.promptConfig(key)
64 | 
65 |     if langfuse is None:
66 |         log.warning("No Langfuse import available - install via sunholo[http]")
67 |     else:
68 |         langfuse_result = [None]
69 |         
70 |         def langfuse_load():
71 |             try:
72 |                 template = f"{prefix}-{key}" if prefix else key
73 |                 prompt = langfuse.get_prompt(template, cache_ttl_seconds=300)
74 |                 langfuse_result[0] = prompt.get_langchain_prompt() if f_string else prompt
75 |             except Exception as err:
76 |                 log.warning(f"Langfuse error: {template} - {str(err)}")
77 |                 config = ConfigManager(prefix)
78 |                 langfuse_result[0] = config.promptConfig(key)
79 | 
80 |         thread = threading.Thread(target=langfuse_load)
81 |         thread.start()
82 |         thread.join(timeout=1)
83 | 
84 |         if langfuse_result[0]:
85 |             return langfuse_result[0]
86 | 
87 |     config = ConfigManager(prefix)
88 |     return config.promptConfig(key)


--------------------------------------------------------------------------------
/src/sunholo/llamaindex/__init__.py:
--------------------------------------------------------------------------------
1 | from .llamaindex_class import setup_llamaindex_subparser, LlamaIndexVertexCorpusManager
2 | 


--------------------------------------------------------------------------------
/src/sunholo/llamaindex/get_files.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from vertexai.preview import rag
 3 | except ImportError:
 4 |     rag = None
 5 | 
 6 | # Create a RAG Corpus, Import Files
 7 | def fetch_corpus(project_id, location, rag_id):
 8 |     corpus_name = f"projects/{project_id}/locations/{location}/ragCorpora/{rag_id}"  
 9 | 
10 |     try:
11 |         return rag.get_corpus(name=corpus_name)
12 |     except Exception as err:
13 |         #log.warning(f"Failed to fetch corpus - creating new corpus {str(err)}")
14 |         # it does not create a unique corpus, display_name can be in multiple rag_ids
15 |         #try:
16 |         #    corpus = rag.create_corpus(display_name=vector_name, description=description)
17 |         #except Exception as err:
18 |         #    log.error(f"Failed to get or create corpus {str(err)}")
19 |         raise ValueError(f"Failed to get or create corpus: {str(err)}")


--------------------------------------------------------------------------------
/src/sunholo/llamaindex/user_history.py:
--------------------------------------------------------------------------------
 1 | from ..utils import ConfigManager
 2 | from ..custom_logging import log
 3 | from .llamaindex_class import LlamaIndexVertexCorpusManager
 4 | 
 5 | import datetime
 6 | 
 7 | def add_user_history_rag(
 8 |         user_id:str, 
 9 |         config:ConfigManager, 
10 |         question:str, 
11 |         answer:str, 
12 |         metadata:dict={},
13 |         user_history_template:str=None):
14 |     # add user history to its own RAG store
15 | 
16 |     log.info(f"Adding user history to RAG store: {question} and {answer}")
17 | 
18 |     manager = LlamaIndexVertexCorpusManager(config)
19 | 
20 |     corpus = manager.create_corpus(user_id, description=f"Personal user history for {user_id}")
21 | 
22 |     current_datetime = datetime.datetime.now()
23 | 
24 |     # Convert to string with desired format
25 |     current_datetime_str = current_datetime.strftime('%Y-%m-%d %H:%M:%S')
26 | 
27 |     if user_history_template is None:
28 |         user_history_template="""Question from {user_id} at {the_date}: {the_question}\nAnswer: {the_answer}\nMetadata:{the_metadata}"""
29 | 
30 |     log.info(f"Found corpus for {user_id}: {corpus}")
31 |     user_history = user_history_template.format(
32 |         user_id=user_id, 
33 |         the_date=current_datetime_str,
34 |         the_question=question, 
35 |         the_answer=answer,
36 |         the_metadata=metadata
37 |         )
38 | 
39 |     try:
40 |         manager.upload_text(
41 |             text=user_history, 
42 |             corpus_display_name=user_id, 
43 |             description=f"{user_id} chat history for {current_datetime}"
44 |             )
45 |     except Exception as err:
46 |         log.error(f"Could not upload LlamaIndex QNA RAG history: {str(err)}")
47 |     
48 |     return user_history
49 | 
50 | def get_user_history_chunks(user_id:str, config:ConfigManager, query):
51 | 
52 |     try:
53 |         manager = LlamaIndexVertexCorpusManager(config)
54 | 
55 |         manager.create_corpus(user_id)
56 | 
57 |         response = manager.query_corpus(query, user_id)
58 |         log.info(f"User history got: {response=}")
59 |         user_history_memory = []
60 |         for chunk in response.contexts.contexts:
61 |             user_history_memory.append(chunk.text)
62 |         
63 |         log.info(f"User history chunks: {user_history_memory}")
64 | 
65 |         return "\n".join(user_history_memory)
66 |     except Exception as err:
67 |         log.error(f"Could not find user history due to error: {str(err)}")
68 | 
69 |         return f"No user history available due to error: {str(err)}"
70 | 


--------------------------------------------------------------------------------
/src/sunholo/lookup/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/src/sunholo/lookup/__init__.py


--------------------------------------------------------------------------------
/src/sunholo/lookup/model_lookup.yaml:
--------------------------------------------------------------------------------
 1 | gpt-4:
 2 |   max_tokens: 8192
 3 | gpt-4-32k:
 4 |   max_tokens: 32768
 5 | gpt-4-1106-preview:
 6 |   max_tokens: 128000
 7 | gpt-4-vision-preview:
 8 |   max_tokens: 128000
 9 | gpt-3.5-turbo:
10 |   max_tokens: 4096
11 | gpt-3.5-turbo-16k:
12 |   max_tokens: 16385
13 | gemini-pro:
14 |   max_tokens: 32760
15 | gemini-pro-vision:
16 |   max_tokens: 16384
17 |   max_images_per_prompt: 16
18 |   max_video_length: 2min
19 |   max_videos_per_prompt: 1
20 | text-bison:
21 |   max_tokens: 8192
22 | text-unicorn:
23 |   max_tokens: 8192
24 | text-bison-32k:
25 |   max_tokens: 32768
26 | chat-bison:
27 |   max_tokens: 8192
28 | chat-bison-32k:
29 |   max_tokens: 32768
30 | code-bison:
31 |   max_tokens: 6144
32 | code-bison-32k:
33 |   max_tokens: 32768
34 | codechat-bison:
35 |   max_tokens: 6144
36 | codechat-bison-32k:
37 |   max_tokens: 32768
38 | medlm-medium:
39 |   max_tokens: 32768
40 | medlm-large:
41 |   max_tokens: 8192


--------------------------------------------------------------------------------
/src/sunholo/mcp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/src/sunholo/mcp/__init__.py


--------------------------------------------------------------------------------
/src/sunholo/ollama/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/src/sunholo/ollama/__init__.py


--------------------------------------------------------------------------------
/src/sunholo/ollama/ollama_images.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import argparse
 3 | from typing import List, Optional
 4 | import sys
 5 | from ..custom_logging import log
 6 | 
 7 | try:
 8 |     import PIL.Image
 9 |     from ollama import generate
10 | except ImportError:
11 |     generate = None
12 | 
13 | CHAT_MODEL_NAME = os.getenv("MODEL_NAME_LATEST")
14 | 
15 | def chat_ollama(msg, model_name, the_images=None):
16 |     
17 |     if not generate:
18 |         raise ImportError("Import ollama via `pip install ollama`")
19 |     
20 |     chat_images = []
21 |     if the_images:
22 |         for the_image in the_images:
23 |             chat_image = PIL.Image.open(the_image)
24 |             chat_images.append(chat_image)
25 | 
26 |     log.info(f"Ollama [{model_name}]: Chatting...{msg=}")
27 |     for response in generate(model_name, msg, images=chat_images, stream=True):
28 |         print(response['response'], end='', flush=True)
29 | 
30 | def main():
31 |     parser = argparse.ArgumentParser(description='Chat with Ollama models from the command line')
32 |     parser.add_argument('--model', '-m', type=str, default=CHAT_MODEL_NAME,
33 |                         help='Model name to use (defaults to MODEL_NAME_LATEST env var)')
34 |     parser.add_argument('--images', '-i', type=str, nargs='+',
35 |                         help='Image file paths to include in the prompt')
36 |     parser.add_argument('--message', '-p', type=str,
37 |                         help='Message to send')
38 |     
39 |     args = parser.parse_args()
40 |     
41 |     if not args.model:
42 |         print("Error: No model specified. Either set MODEL_NAME_LATEST environment variable or use --model flag.")
43 |         sys.exit(1)
44 |     
45 |     # If no message provided via args, read from stdin
46 |     if not args.message:
47 |         print(f"Enter your message to {args.model} (Ctrl+D to send):")
48 |         user_input = sys.stdin.read().strip()
49 |     else:
50 |         user_input = args.message
51 |     
52 |     if not user_input:
53 |         print("Error: Empty message. Exiting.")
54 |         sys.exit(1)
55 |     
56 |     try:
57 |         chat_ollama(user_input, args.model, args.images)
58 |         print()  # Add a newline after the response
59 |     except ImportError as e:
60 |         print(f"Error: {e}")
61 |         sys.exit(1)
62 |     except Exception as e:
63 |         print(f"Error: {e}")
64 |         sys.exit(1)
65 | 
66 | if __name__ == "__main__":
67 |     # uv run src/sunholo/ollama/ollama_images.py --model=gemma3:12b - chat and then CTRL+D
68 |     # uv run src/sunholo/ollama/ollama_images.py --model gemma3:12b --message "Tell me about quantum computing"
69 | 
70 | 
71 |     main()


--------------------------------------------------------------------------------
/src/sunholo/pubsub/__init__.py:
--------------------------------------------------------------------------------
1 | from .pubsub_manager import PubSubManager
2 | from .process_pubsub import process_pubsub_message, decode_pubsub_message
3 | 
4 | 


--------------------------------------------------------------------------------
/src/sunholo/pubsub/process_pubsub.py:
--------------------------------------------------------------------------------
 1 | #   Copyright [2024] [Holosun ApS]
 2 | #
 3 | #   Licensed under the Apache License, Version 2.0 (the "License");
 4 | #   you may not use this file except in compliance with the License.
 5 | #   You may obtain a copy of the License at
 6 | #
 7 | #       http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #   Unless required by applicable law or agreed to in writing, software
10 | #   distributed under the License is distributed on an "AS IS" BASIS,
11 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #   See the License for the specific language governing permissions and
13 | #   limitations under the License.
14 | import base64
15 | from ..custom_logging import log
16 | from ..gcs.metadata import get_object_metadata
17 | 
18 | 
19 | def decode_pubsub_message(data: dict) -> tuple:
20 |     """Extracts message data and metadata from a Pub/Sub message.
21 | 
22 |     Args:
23 |         data (dict): The Pub/Sub message data.
24 | 
25 |     Returns:
26 |         tuple: A tuple containing the message data and attributes as metadata.
27 |     """
28 |     message_data = base64.b64decode(data['message']['data']).decode('utf-8')
29 |     attributes = data['message'].get('attributes', {})
30 |     messageId = data['message'].get('messageId')
31 |     publishTime = data['message'].get('publishTime')
32 |     vector_name = attributes.get('namespace', None)
33 |     if vector_name:
34 |         attributes['vector_name'] = vector_name
35 | 
36 |     log.info(f"Process Pub/Sub was triggered by messageId {messageId} published at {publishTime}")
37 |     log.debug("Processing Pub/Sub data", log_struct=message_data)
38 | 
39 |     return message_data, attributes, vector_name
40 | 
41 | def process_pubsub_message(data: dict) -> tuple:
42 |     """Extracts message data and metadata from a Pub/Sub message for a Cloud Storage event.
43 | 
44 |     Args:
45 |         data (dict): The Pub/Sub message data.
46 | 
47 |     Returns:
48 |         tuple: A tuple containing the message data and attributes as metadata.
49 |     """
50 |     # Decode the message data
51 |     message_data, attributes, vector_name = decode_pubsub_message(data)
52 | 
53 |     # Check for a valid GCS event type and payload format
54 |     if attributes.get("eventType") == "OBJECT_FINALIZE" and attributes.get("payloadFormat") == "JSON_API_V1":
55 |         objectId = attributes.get("objectId")
56 |         log.info(f"Got valid event from Google Cloud Storage: {objectId}")
57 | 
58 |         # Ignore config files
59 |         if objectId.startswith("config"):
60 |             log.info("Ignoring config file")
61 |             return None, None, None
62 | 
63 |         # Construct the message_data
64 |         message_data = 'gs://' + attributes.get("bucketId") + '/' + objectId
65 |         
66 |         if '/' in objectId:
67 |             bucket_vector_name = objectId.split('/')[0]
68 |             if len(bucket_vector_name) > 0 and vector_name != bucket_vector_name:
69 |                 log.info(f"Overwriting vector_name {vector_name} with {bucket_vector_name}")
70 |                 vector_name = bucket_vector_name
71 |         
72 |         # get metadata for object
73 |         metadata = get_object_metadata(attributes.get("bucketId"), attributes.get("objectId"))
74 |         if metadata:
75 |             metadata.update(attributes)
76 |             attributes = metadata
77 | 
78 |     return message_data, attributes, vector_name


--------------------------------------------------------------------------------
/src/sunholo/qna/__init__.py:
--------------------------------------------------------------------------------
1 | from .retry import retry_qna


--------------------------------------------------------------------------------
/src/sunholo/qna/parsers.py:
--------------------------------------------------------------------------------
 1 | #   Copyright [2024] [Holosun ApS]
 2 | #
 3 | #   Licensed under the Apache License, Version 2.0 (the "License");
 4 | #   you may not use this file except in compliance with the License.
 5 | #   You may obtain a copy of the License at
 6 | #
 7 | #       http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #   Unless required by applicable law or agreed to in writing, software
10 | #   distributed under the License is distributed on an "AS IS" BASIS,
11 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #   See the License for the specific language governing permissions and
13 | #   limitations under the License.
14 | 
15 | from sunholo.custom_logging import log
16 | 
17 | def document_to_dict(document):
18 |     return {
19 |         "page_content": document.page_content,
20 |         "metadata": document.metadata
21 |     }
22 | 
23 | def parse_output(bot_output):
24 |     """
25 |     Parses VAC output assuming it has an 'answer' and an optional 'source_documents' key
26 |     
27 |     """
28 |     if isinstance(bot_output, str):
29 | 
30 |         return {"answer": bot_output}
31 |     
32 |     if isinstance(bot_output, dict) and 'source_documents' in bot_output:
33 |         bot_output['source_documents'] = [document_to_dict(doc) for doc in bot_output['source_documents']]
34 |         if not bot_output.get("answer") or bot_output.get("answer") == "":
35 |             bot_output['answer'] = "(No text was returned)"
36 | 
37 |         return bot_output
38 |     
39 |     elif isinstance(bot_output, dict) and 'metadata' in bot_output and isinstance(bot_output.get('metadata'), dict) and 'source_documents' in bot_output.get('metadata'):
40 |         metadata = bot_output.get('metadata')
41 |         bot_output['source_documents'] = [document_to_dict(doc) for doc in metadata['source_documents']]
42 |         if not bot_output.get("answer") or bot_output.get("answer") == "":
43 |             bot_output['answer'] = "(No text was returned)"
44 | 
45 |         return bot_output
46 |     
47 |     elif isinstance(bot_output, dict) and 'output' in bot_output and isinstance(bot_output['output'], dict) and 'content' in bot_output['output']:
48 |         the_output = bot_output['output']
49 | 
50 |         return {
51 |             'answer': the_output.get('content'),
52 |             'metadata': the_output.get('metadata')
53 |         }
54 |     
55 |     elif isinstance(bot_output, dict):
56 |         if not bot_output.get("answer"):
57 |             raise ValueError(f"VAC output was not a string or a dict with the key 'answer' - got: {bot_output} {type(bot_output)}")
58 |         else:
59 | 
60 |             return bot_output
61 |         
62 |     else:
63 |         log.error(f"Couldn't parse output for:\n {bot_output}")


--------------------------------------------------------------------------------
/src/sunholo/qna/retry.py:
--------------------------------------------------------------------------------
 1 | #   Copyright [2024] [Holosun ApS]
 2 | #
 3 | #   Licensed under the Apache License, Version 2.0 (the "License");
 4 | #   you may not use this file except in compliance with the License.
 5 | #   You may obtain a copy of the License at
 6 | #
 7 | #       http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #   Unless required by applicable law or agreed to in writing, software
10 | #   distributed under the License is distributed on an "AS IS" BASIS,
11 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #   See the License for the specific language governing permissions and
13 | #   limitations under the License.
14 | import time
15 | import traceback
16 | from ..custom_logging import log
17 | 
18 | 
19 | def retry_qna(qa_function, question, max_retries=1, initial_delay=5):
20 |     from httpcore import ReadTimeout
21 |     from httpx import ReadTimeout
22 |     for retry in range(max_retries):
23 |         try:
24 |             return qa_function(question)
25 |         except ReadTimeout as err:
26 |             delay = initial_delay * (retry + 1)
27 |             log.warning(f"Read timeout while asking: {question} - trying again after {delay} seconds. Error: {str(err)}")
28 |             time.sleep(delay)
29 |             try:
30 |                 result = qa_function(question)
31 |                 result["answer"] = result["answer"] + " (Sorry for the delay, brain was a bit slow - should be quicker next time)"
32 |                 return result
33 |             except ReadTimeout:
34 |                 if retry == max_retries - 1:
35 |                     raise
36 |         except Exception:
37 |             delay = initial_delay * (retry + 1)
38 |             log.error(f"General error: {traceback.format_exc()}")
39 |             time.sleep(delay)
40 |             try:
41 |                 result = qa_function(question)
42 |                 result["answer"] = result["answer"] + " (Sorry for the delay, had to warm up the brain - should be quicker next time)"
43 |                 return result
44 |             except Exception:
45 |                 if retry == max_retries - 1:
46 |                     raise
47 | 
48 |     raise Exception(f"Max retries exceeded for question: {question}")


--------------------------------------------------------------------------------
/src/sunholo/senses/README.md:
--------------------------------------------------------------------------------
 1 | # Senses
 2 | 
 3 | Helping models see and hear and talk
 4 | 
 5 | ## Livekit
 6 | 
 7 | https://docs.livekit.io/home/cli/cli-setup/
 8 | 
 9 | ```sh
10 | brew install livekit livekit-cli
11 | ```
12 | 
13 | Start local server:
14 | 
15 | ```sh
16 | livekit-server --dev --bind 0.0.0.0
17 | ```
18 | 
19 | Note URL: wss://127.0.0.1:7881
20 | 
21 | Create token:
22 | 
23 | ```sh
24 | livekit-cli create-token \
25 |     --api-key devkey --api-secret secret \
26 |     --join --room my-first-room --identity user1 \
27 |     --valid-for 24h
28 | ```


--------------------------------------------------------------------------------
/src/sunholo/senses/__init__.py:
--------------------------------------------------------------------------------
1 | from .stream_voice import StreamingTTS
2 | 


--------------------------------------------------------------------------------
/src/sunholo/streaming/__init__.py:
--------------------------------------------------------------------------------
1 | from .streaming import start_streaming_chat, generate_proxy_stream, generate_proxy_stream_async, start_streaming_chat_async
2 | from .langserve import parse_langserve_token, parse_langserve_token_async
3 | from .stream_lookup import can_agent_stream


--------------------------------------------------------------------------------
/src/sunholo/streaming/stream_lookup.py:
--------------------------------------------------------------------------------
 1 | from ..utils import load_config_key
 2 | from ..custom_logging import log
 3 | 
 4 | def can_agent_stream(agent_name: str):
 5 | 
 6 |     log.debug(f"agent_type: {agent_name} checking streaming...")
 7 |     endpoints_config = load_config_key(agent_name, "dummy_value", kind="agentConfig")
 8 |     post_endpoints = endpoints_config['post']
 9 |     
10 |     return 'stream' in post_endpoints
11 | 
12 |     
13 |     


--------------------------------------------------------------------------------
/src/sunholo/summarise/__init__.py:
--------------------------------------------------------------------------------
1 | from .summarise import summarise_docs
2 | 


--------------------------------------------------------------------------------
/src/sunholo/templates/agent/README.md:
--------------------------------------------------------------------------------
 1 | # Template VAC Project
 2 | 
 3 | This is a template VAC project created with `sunholo init my_vac_project`
 4 | 
 5 | 
 6 | ## Test calls
 7 | 
 8 | 
 9 | ```shell
10 | export FLASK_URL=https://template-url
11 | curl -X POST ${FLASK_URL}/vac/template \
12 |   -H "Content-Type: application/json" \
13 |   -d '{
14 |     "user_input": "What do you know about MLOps?"
15 | }'
16 | 
17 | curl $VAC_URL/vac/streaming/template \
18 |   -H "Content-Type: application/json" \
19 |   -d '{
20 |     "user_input": "What do you know about MLOps?"
21 | }'
22 | ```


--------------------------------------------------------------------------------
/src/sunholo/templates/agent/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/src/sunholo/templates/agent/__init__.py


--------------------------------------------------------------------------------
/src/sunholo/templates/agent/app.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from sunholo.agents import VACRoutes, create_app
 4 | 
 5 | from vac_service import vac_stream, vac
 6 | 
 7 | app = create_app(__name__)
 8 | 
 9 | # Register the Q&A routes with the specific interpreter functions
10 | # creates /vac/<vector_name> and /vac/streaming/<vector_name>
11 | VACRoutes(app, vac_stream, vac)
12 | 
13 | if __name__ == "__main__":
14 |     import os
15 |     app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 8080)), debug=True)
16 | 
17 | 


--------------------------------------------------------------------------------
/src/sunholo/templates/agent/cloudbuild.yaml:
--------------------------------------------------------------------------------
 1 | substitutions:
 2 |   _SERVICE_NAME: CHANGE_ME
 3 |   _BUILD_FOLDER: CHANGE_ME
 4 |   _IMAGE_NAME: terraform_managed
 5 |   _REGION: terraform_managed
 6 |   _ARTIFACT_REGISTRY_REPO_URL: terraform_managed
 7 |   _GCS_BUCKET: terraform_managed
 8 |   _CONFIG_FOLDER: terraform_managed
 9 |   _CONFIG_BUCKET: terraform_managed
10 |   _PROJECT_ID: terraform_managed
11 |   _LANCEDB_BUCKET: terraform_managed
12 |   _ALLOYDB_DB: terraform_managed
13 | 
14 | tags: ['${_PROJECT_ID}', '${_SERVICE_NAME}']
15 | 
16 | options:
17 |   logging: GCS_ONLY
18 | logsBucket: gs://multivac-deploy-logging-bucket
19 | 
20 | steps:
21 |   - name: 'gcr.io/cloud-builders/docker'
22 |     entrypoint: 'bash'
23 |     dir: ${_BUILD_FOLDER}
24 |     args:
25 |     - '-c'
26 |     - |
27 |       cat <<EOF >Dockerfile_cloudrun
28 |       FROM ${_ARTIFACT_REGISTRY_REPO_URL}/prebuild/${_IMAGE_NAME}:${BRANCH_NAME}
29 | 
30 |       COPY . .
31 | 
32 |       EXPOSE 8080
33 | 
34 |       RUN pip install --upgrade -r requirements.txt
35 | 
36 |       RUN pip list && python --version
37 | 
38 |       ENV _CONFIG_FOLDER=/gcs_config
39 |       ENV VAC_CONFIG_FOLDER=/gcs_config/config
40 |       ENV ALLOYDB_DB=${_ALLOYDB_DB}
41 | 
42 |       CMD exec gunicorn --bind :\$$PORT --workers 4 --timeout 0 app:app
43 |       EOF
44 | 
45 |   - name: 'gcr.io/cloud-builders/docker'
46 |     dir: ${_BUILD_FOLDER}
47 |     args: ['build', '-t', '${_ARTIFACT_REGISTRY_REPO_URL}/${_IMAGE_NAME}/${_SERVICE_NAME}:${BRANCH_NAME}', '-f', 'Dockerfile_cloudrun', '.', '--network=cloudbuild']
48 | 
49 |   - name: 'gcr.io/cloud-builders/docker'
50 |     dir: ${_BUILD_FOLDER}
51 |     args: ['push', '${_ARTIFACT_REGISTRY_REPO_URL}/${_IMAGE_NAME}/${_SERVICE_NAME}:${BRANCH_NAME}']
52 | 
53 |   # Deploy the image to Cloud Run
54 |   - name: "gcr.io/cloud-builders/gcloud"
55 |     id: deploy cloud run
56 |     entrypoint: bash
57 |     dir: ${_BUILD_FOLDER}
58 |     args:
59 |       - "-c"
60 |       - |
61 |         gcloud beta run deploy ${_SERVICE_NAME} --image ${_ARTIFACT_REGISTRY_REPO_URL}/${_IMAGE_NAME}/${_SERVICE_NAME}:${BRANCH_NAME} \
62 |            --region ${_REGION} \
63 |            --project ${_PROJECT_ID} \
64 |            --platform managed \
65 |            --allow-unauthenticated \
66 |            --memory 2Gi \
67 |            --cpu 1 \
68 |            --max-instances 3 \
69 |            --update-secrets=LANGFUSE_HOST=LANGFUSE_URL:latest \
70 |            --update-secrets=LANGFUSE_SECRET_KEY=LANGFUSE_API_KEY:latest \
71 |            --update-secrets=LANGFUSE_PUBLIC_KEY=LANGFUSE_PUBLIC_KEY:latest \
72 |            --session-affinity \
73 |            --add-volume name=gcs_config,type=cloud-storage,bucket=${_CONFIG_BUCKET},readonly=true \
74 |            --add-volume-mount volume=gcs_config,mount-path=/gcs_config
75 | 
76 |   - name: 'python:3.9'
77 |     id: validate config
78 |     entrypoint: 'bash'
79 |     dir: ${_BUILD_FOLDER}
80 |     waitFor: ["-"]
81 |     args:
82 |     - '-c'
83 |     - |
84 |       pip install --no-cache sunholo[cli]
85 |       sunholo list-configs --validate || exit 1
86 |       sunholo list-configs --kind=vacConfig --vac=${_SERVICE_NAME} --validate || exit 1
87 | 
88 | 


--------------------------------------------------------------------------------
/src/sunholo/templates/agent/config/vac_config.yaml:
--------------------------------------------------------------------------------
 1 | kind: vacConfig
 2 | apiVersion: v1
 3 | vac:
 4 |   template_vac: #TODO: update to your own vac configuration
 5 |     llm: vertex
 6 |     model: gemini-1.5-flash
 7 |     agent: vertex-genai
 8 |     display_name: Template VAC
 9 |     memory:
10 |       - llamaindex-native:
11 |           vectorstore: llamaindex
12 | 


--------------------------------------------------------------------------------
/src/sunholo/templates/agent/my_log.py:
--------------------------------------------------------------------------------
1 | from sunholo.custom_logging import setup_logging
2 | 
3 | log = setup_logging("sunholo")
4 | 


--------------------------------------------------------------------------------
/src/sunholo/templates/agent/requirements.txt:
--------------------------------------------------------------------------------
1 | sunholo[gcp,http]>=0.77.3
2 | 


--------------------------------------------------------------------------------
/src/sunholo/templates/agent/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/src/sunholo/templates/agent/tools/__init__.py


--------------------------------------------------------------------------------
/src/sunholo/templates/agent/tools/your_agent.py:
--------------------------------------------------------------------------------
 1 | from sunholo.genai import GenAIFunctionProcessor
 2 | from sunholo.utils import ConfigManager
 3 | 
 4 | from my_log import log
 5 | 
 6 | 
 7 | class QuartoProcessor(GenAIFunctionProcessor):
 8 |     def construct_tools(self) -> dict:
 9 |         tools = self.config.vacConfig("tools")
10 |         quarto_config = tools.get("quarto")
11 |         
12 |         def decide_to_go_on(go_on: bool):
13 |             """
14 |             Examine the chat history.  If the answer to the user's question has been answered, then go_on=False.
15 |             If the chat history indicates the answer is still being looked for, then go_on=True.
16 |             If there is no chat history, then go_on=True.
17 |             If there is an error that can't be corrected or solved by you, then go_on=False.
18 |             If there is an error but you think you can solve it by correcting your function arguments (such as an incorrect source), then go_on=True
19 |             If you want to ask the user a question or for some more feedback, then go_on=False.
20 |             
21 |             Args:
22 |                 go_on: boolean Whether to continue searching or fetching from the AlloyDB database
23 |             
24 |             Returns:
25 |                 boolean: True to carry on, False to continue
26 |             """
27 |             return go_on
28 | 
29 |         def quarto_render() -> dict:
30 |             """
31 |             ...
32 |             
33 |             Args:
34 |             
35 |             
36 |             Returns:
37 |                 
38 |             """
39 |             pass
40 | 
41 |         return {
42 |             "quarto_render": quarto_render,
43 |             "decide_to_go_on": decide_to_go_on
44 |         }
45 | 
46 | def quarto_content(question: str, chat_history=[]) -> str:
47 |     prompt_config = ConfigManager("quarto")
48 |     alloydb_template = prompt_config.promptConfig("quarto_template")
49 |     
50 |     conversation_text = ""
51 |     for human, ai in chat_history:
52 |         conversation_text += f"Human: {human}\nAI: {ai}\n"
53 | 
54 |     return alloydb_template.format(the_question=question, chat_history=conversation_text[-10000:])
55 | 
56 | 
57 | def get_quarto(config:ConfigManager, processor:QuartoProcessor):
58 | 
59 |     tools = config.vacConfig('tools')
60 | 
61 |     if tools and tools.get('quarto'):
62 |         model_name = None
63 |         if config.vacConfig('llm') != "vertex":
64 |             model_name = 'gemini-1.5-flash'
65 |         alloydb_model = processor.get_model(
66 |             system_instruction=(
67 |                     "You are a helpful Quarto agent that helps users create and render Quarto documents. "
68 |                     "When you think the answer has been given to the satisfaction of the user, or you think no answer is possible, or you need user confirmation or input, you MUST use the decide_to_go_on(go_on=False) function"
69 |                     "When you want to ask the question to the user, mark the go_on=False in the function"
70 |                 ),
71 |             model_name=model_name
72 |         )
73 | 
74 |         if alloydb_model:
75 |             return alloydb_model
76 | 
77 |     log.error("Error initializing quarto model")    
78 |     return None


--------------------------------------------------------------------------------
/src/sunholo/templates/agent/vac_service.py:
--------------------------------------------------------------------------------
 1 | from my_log import log
 2 | from sunholo.utils import ConfigManager
 3 | 
 4 | # VAC specific imports 
 5 | 
 6 | #TODO: Developer to update to their own implementation
 7 | from sunholo.vertex import init_vertex, get_vertex_memories
 8 | from vertexai.preview.generative_models import GenerativeModel
 9 | 
10 | #TODO: change this to a streaming VAC function
11 | def vac_stream(question: str, vector_name, chat_history=[], callback=None, **kwargs):
12 | 
13 |     rag_model = create_model(vector_name)
14 | 
15 |     # streaming model calls
16 |     response = rag_model.generate_content(question, stream=True)
17 |     for chunk in response:
18 |         try:
19 |             callback.on_llm_new_token(token=chunk.text)
20 |         except ValueError as err:
21 |             callback.on_llm_new_token(token=str(err))
22 |     
23 |     callback.on_llm_end(response=response)
24 |     log.info(f"rag_model.response: {response}")
25 | 
26 |     metadata = {
27 |         "chat_history": chat_history
28 |     }
29 | 
30 |     return {"answer": response.text, "metadata": metadata}
31 | 
32 | 
33 | 
34 | #TODO: change this to a batch VAC function
35 | def vac(question: str, vector_name: str, chat_history=[], **kwargs):
36 |     # Create a callback that does nothing for streaming if you don't want intermediate outputs
37 |     class NoOpCallback:
38 |         def on_llm_new_token(self, token):
39 |             pass
40 |         def on_llm_end(self, response):
41 |             pass
42 | 
43 |     # Use the NoOpCallback for non-streaming behavior
44 |     callback = NoOpCallback()
45 | 
46 |     # Pass all arguments to vac_stream and use the final return
47 |     result = vac_stream(
48 |         question=question, 
49 |         vector_name=vector_name, 
50 |         chat_history=chat_history, 
51 |         callback=callback, 
52 |         **kwargs
53 |     )
54 | 
55 |     return result
56 | 
57 | 
58 | # TODO: common model setup to both batching and streaming
59 | def create_model(vac):
60 |     config = ConfigManager(vac)
61 | 
62 |     init_vertex()
63 |     corpus_tools = get_vertex_memories(config)
64 | 
65 |     model = config.vacConfig("model")
66 | 
67 |     # Create a gemini-pro model instance
68 |     # https://ai.google.dev/api/python/google/generativeai/GenerativeModel#streaming
69 |     rag_model = GenerativeModel(
70 |         model_name=model or "gemini-1.5-flash", tools=[corpus_tools]
71 |     )
72 | 
73 |     return rag_model


--------------------------------------------------------------------------------
/src/sunholo/templates/project/README.md:
--------------------------------------------------------------------------------
 1 | # Template VAC Project
 2 | 
 3 | This is a template VAC project created with `sunholo init my_vac_project`
 4 | 
 5 | 
 6 | ## Test calls
 7 | 
 8 | 
 9 | ```shell
10 | export FLASK_URL=https://template-url
11 | curl -X POST ${FLASK_URL}/vac/template \
12 |   -H "Content-Type: application/json" \
13 |   -d '{
14 |     "user_input": "What do you know about MLOps?"
15 | }'
16 | 
17 | curl $VAC_URL/vac/streaming/template \
18 |   -H "Content-Type: application/json" \
19 |   -d '{
20 |     "user_input": "What do you know about MLOps?"
21 | }'
22 | 
23 | curl $VAC_URL/vac/streaming/template \
24 |   -H "Content-Type: application/json" \
25 |   -d '{
26 |     "user_input": "Can you summarise what the white house executive order will enable in the regulation of LLMs and AI?",
27 |     "chat_history": [{"name": "Human", "content":"Hi! "}, {"name": "AI", "content": "Hi!"}, ]
28 | }'
29 | ```


--------------------------------------------------------------------------------
/src/sunholo/templates/project/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/src/sunholo/templates/project/__init__.py


--------------------------------------------------------------------------------
/src/sunholo/templates/project/app.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from sunholo.agents import VACRoutes, create_app
 4 | 
 5 | from vac_service import vac_stream
 6 | 
 7 | app = create_app(__name__)
 8 | 
 9 | # Register the Q&A routes with the specific interpreter functions
10 | # creates endpoints /vac/streaming/<vector_name> and /vac/<vector_name> etc.
11 | VACRoutes(app, vac_stream)
12 | 
13 | # start via `python app.py`
14 | if __name__ == "__main__":
15 |     import os
16 |     app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 8080)), debug=True)
17 | 
18 | 


--------------------------------------------------------------------------------
/src/sunholo/templates/project/cloudbuild.yaml:
--------------------------------------------------------------------------------
 1 | substitutions:
 2 |   _SERVICE_NAME: CHANGE_ME
 3 |   _BUILD_FOLDER: CHANGE_ME
 4 |   _IMAGE_NAME: terraform_managed
 5 |   _REGION: terraform_managed
 6 |   _ARTIFACT_REGISTRY_REPO_URL: terraform_managed
 7 |   _GCS_BUCKET: terraform_managed
 8 |   _CONFIG_FOLDER: terraform_managed
 9 |   _CONFIG_BUCKET: terraform_managed
10 |   _PROJECT_ID: terraform_managed
11 |   _LANCEDB_BUCKET: terraform_managed
12 |   _ALLOYDB_DB: terraform_managed
13 | 
14 | tags: ['${_PROJECT_ID}', '${_SERVICE_NAME}']
15 | 
16 | options:
17 |   logging: GCS_ONLY
18 | logsBucket: gs://multivac-deploy-logging-bucket
19 | 
20 | steps:
21 |   - name: 'gcr.io/cloud-builders/docker'
22 |     entrypoint: 'bash'
23 |     dir: ${_BUILD_FOLDER}
24 |     args:
25 |     - '-c'
26 |     - |
27 |       cat <<EOF >Dockerfile_cloudrun
28 |       FROM ${_ARTIFACT_REGISTRY_REPO_URL}/prebuild/${_IMAGE_NAME}:${BRANCH_NAME}
29 | 
30 |       COPY . .
31 | 
32 |       EXPOSE 8080
33 | 
34 |       RUN pip install --upgrade -r requirements.txt
35 | 
36 |       RUN pip list && python --version
37 | 
38 |       ENV _CONFIG_FOLDER=/gcs_config
39 |       ENV VAC_CONFIG_FOLDER=/gcs_config/config
40 |       ENV ALLOYDB_DB=${_ALLOYDB_DB}
41 | 
42 |       CMD exec gunicorn --bind :\$$PORT --workers 4 --timeout 0 app:app
43 |       EOF
44 | 
45 |   - name: 'gcr.io/cloud-builders/docker'
46 |     dir: ${_BUILD_FOLDER}
47 |     args: ['build', '-t', '${_ARTIFACT_REGISTRY_REPO_URL}/${_IMAGE_NAME}/${_SERVICE_NAME}:${BRANCH_NAME}', '-f', 'Dockerfile_cloudrun', '.', '--network=cloudbuild']
48 | 
49 |   - name: 'gcr.io/cloud-builders/docker'
50 |     dir: ${_BUILD_FOLDER}
51 |     args: ['push', '${_ARTIFACT_REGISTRY_REPO_URL}/${_IMAGE_NAME}/${_SERVICE_NAME}:${BRANCH_NAME}']
52 | 
53 |   # Deploy the image to Cloud Run
54 |   - name: "gcr.io/cloud-builders/gcloud"
55 |     id: deploy cloud run
56 |     entrypoint: bash
57 |     dir: ${_BUILD_FOLDER}
58 |     args:
59 |       - "-c"
60 |       - |
61 |         gcloud beta run deploy ${_SERVICE_NAME} --image ${_ARTIFACT_REGISTRY_REPO_URL}/${_IMAGE_NAME}/${_SERVICE_NAME}:${BRANCH_NAME} \
62 |            --region ${_REGION} \
63 |            --project ${_PROJECT_ID} \
64 |            --platform managed \
65 |            --allow-unauthenticated \
66 |            --memory 2Gi \
67 |            --cpu 1 \
68 |            --max-instances 3 \
69 |            --update-secrets=LANGFUSE_HOST=LANGFUSE_URL:latest \
70 |            --update-secrets=LANGFUSE_SECRET_KEY=LANGFUSE_API_KEY:latest \
71 |            --update-secrets=LANGFUSE_PUBLIC_KEY=LANGFUSE_PUBLIC_KEY:latest \
72 |            --update-secrets=OPENAI_API_KEY=OPENAI_API_KEY:latest \
73 |            --update-secrets=ANTHROPIC_API_KEY=ANTHROPIC_API_KEY:latest \
74 |            --update-secrets=GOOGLE_API_KEY=GOOGLE_API_KEY:latest \
75 |            --session-affinity \
76 |            --add-volume name=gcs_config,type=cloud-storage,bucket=${_CONFIG_BUCKET},readonly=true \
77 |            --add-volume-mount volume=gcs_config,mount-path=/gcs_config
78 | 
79 |   - name: 'python:3.9'
80 |     id: validate config
81 |     entrypoint: 'bash'
82 |     dir: ${_BUILD_FOLDER}
83 |     waitFor: ["-"]
84 |     args:
85 |     - '-c'
86 |     - |
87 |       pip install --no-cache sunholo[cli]
88 |       sunholo list-configs --validate || exit 1
89 |       sunholo list-configs --kind=vacConfig --vac=${_SERVICE_NAME} --validate || exit 1
90 | 
91 | 


--------------------------------------------------------------------------------
/src/sunholo/templates/project/config/vac_config.yaml:
--------------------------------------------------------------------------------
 1 | kind: vacConfig
 2 | apiVersion: v1
 3 | vac:
 4 |   template_vac: #TODO: update to your own vac configuration
 5 |     llm: vertex
 6 |     model: gemini-1.5-flash
 7 |     agent: vertex-genai
 8 |     display_name: Template VAC
 9 |     memory:
10 |       - llamaindex-native:
11 |           vectorstore: llamaindex
12 | 


--------------------------------------------------------------------------------
/src/sunholo/templates/project/my_log.py:
--------------------------------------------------------------------------------
1 | from sunholo.custom_logging import setup_logging
2 | 
3 | log = setup_logging("sunholo")
4 | 


--------------------------------------------------------------------------------
/src/sunholo/templates/project/requirements.txt:
--------------------------------------------------------------------------------
1 | sunholo[gcp,http]>=0.77.3
2 | 


--------------------------------------------------------------------------------
/src/sunholo/templates/project/vac_service.py:
--------------------------------------------------------------------------------
 1 | from my_log import log
 2 | from sunholo.utils import ConfigManager
 3 | 
 4 | # VAC specific imports 
 5 | 
 6 | #TODO: Developer to update to their own implementation
 7 | from sunholo.genai import init_genai, genai_safety
 8 | import google.generativeai as genai
 9 | 
10 | #TODO: change this to a streaming VAC function for your use case
11 | def vac_stream(question: str, vector_name:str, chat_history=[], callback=None, **kwargs):
12 | 
13 |     model = create_model(vector_name)
14 | 
15 |     # create chat history for genai model
16 |     # https://ai.google.dev/api/generate-content
17 |     contents = []
18 |     for human, ai in chat_history:
19 |         if human:
20 |             contents.append({"role":"user", "parts":[{"text": human}]})
21 |         
22 |         if ai:
23 |             contents.append({"role":"model", "parts":[{"text": ai}]})
24 | 
25 | 
26 |     # the user question at the end of contents list
27 |     contents.append({"role":"user", "parts":[{"text": question}]})
28 | 
29 |     log.info(contents)
30 |     # streaming model calls
31 |     response = model.generate_content(contents, stream=True)
32 |     chunks=""
33 |     for chunk in response:
34 |         if chunk and chunk.text:
35 |             try:
36 |                 callback.on_llm_new_token(token=chunk.text)
37 |                 chunks += chunk.text
38 |             except ValueError as err:
39 |                 callback.on_llm_new_token(token=str(err))
40 |     
41 |     # stream has finished, full response is also returned
42 |     callback.on_llm_end(response=response)
43 |     log.info(f"model.response: {response}")
44 | 
45 |     metadata = {
46 |         "question": question,
47 |         "vector_name": vector_name,
48 |         "chat_history": chat_history
49 |     }
50 | 
51 |     # to not return this dict at the end of the stream, pass stream_only: true in request
52 |     return {"answer": chunks, "metadata": metadata}
53 | 
54 | 
55 | # TODO: example model setup function
56 | def create_model(vac):
57 |     config = ConfigManager(vac)
58 | 
59 |     init_genai()
60 | 
61 |     # get a setting from the config vacConfig object (returns None if not found)
62 |     model = config.vacConfig("model")
63 | 
64 |     # Create a gemini-flash model instance
65 |     # https://ai.google.dev/api/python/google/generativeai/GenerativeModel#streaming
66 |     genai_model = genai.GenerativeModel(
67 |         model_name=model or "gemini-1.5-flash",
68 |          safety_settings=genai_safety()
69 |     )
70 | 
71 |     return genai_model
72 | 


--------------------------------------------------------------------------------
/src/sunholo/templates/system_services/README.md:
--------------------------------------------------------------------------------
 1 | # Evals
 2 | 
 3 | A Cloud Run service that is sent Langfuse IDs, performs evals based on customisable evaluation functions then adds it to the Langfuse database.
 4 | 
 5 | ## Local
 6 | 
 7 | ```sh
 8 | cd application/system_services/evals 
 9 | python app.py
10 | ```
11 | 
12 | ```sh
13 | curl http://127.0.0.1:8080
14 | 
15 | curl http://127.0.0.1:8080/direct_evals \
16 |   -H "Content-Type: application/json" \
17 |   -d '{
18 |     "trace_id": "9f43dd30-e3d9-4299-9e10-464cae352c7c"
19 |     }'
20 | 
21 | # only trigger 5% of the time eval_percent=0.05
22 | curl http://127.0.0.1:8080/direct_evals \
23 |   -H "Content-Type: application/json" \
24 |   -d '{
25 |     "trace_id": "9f43dd30-e3d9-4299-9e10-464cae352c7c",
26 |     "eval_percent": 0.05
27 |     }'
28 | ```
29 | 
30 | ## Test calls
31 | 
32 | ```sh
33 | curl https://evals-blqtqfexwa-ew.a.run.app
34 | # {"message":"Hello, evals!"}
35 | 
36 | curl https://evals-blqtqfexwa-ew.a.run.app/direct_evals \
37 |   -H "Content-Type: application/json" \
38 |   -d '{
39 |     "trace_id": "9f43dd30-e3d9-4299-9e10-464cae352c7c"
40 |     }'
41 | ```


--------------------------------------------------------------------------------
/src/sunholo/templates/system_services/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/src/sunholo/templates/system_services/__init__.py


--------------------------------------------------------------------------------
/src/sunholo/templates/system_services/app.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import traceback
 3 | 
 4 | # app.py
 5 | from fastapi import FastAPI, Request
 6 | from fastapi.responses import JSONResponse
 7 | 
 8 | from my_log import log
 9 | 
10 | app = FastAPI()
11 | 
12 | @app.get("/")
13 | def home():
14 |     """Simple endpoint to indicate that the app is running."""
15 |     return {"message": "Hello, service!"}
16 | 
17 | @app.post("/system_service/<param>")
18 | async def system_service(request: Request):
19 |     """
20 |     Pubsub message parsed and sent to Langfuse ID server
21 |     """
22 |     data = await request.json()
23 | 
24 |     try:
25 |         #TODO: add stuff here
26 |         meta = ""
27 |         return {"status": "success", "message": meta}
28 |     except Exception as err:
29 |         log.error(f'EVAL_ERROR: Error when sending {data} to /pubsub_to_langfuse: {str(err)} traceback: {traceback.format_exc()}')
30 |         return JSONResponse(status_code=200, content={"status": "error", "message": f'{str(err)} traceback: {traceback.format_exc()}'})
31 | 
32 | @app.post("/test_endpoint")
33 | async def test_me(request: Request):
34 |     """
35 |     Endpoint to send trace_ids directly for evals then sent to Langfuse ID server
36 |     """
37 |     data = await request.json()
38 | 
39 |     try:
40 |         #TODO: do something here
41 |         meta = ""
42 |         return {"status": "success", "message": meta}
43 |     except Exception as err:
44 |         log.error(f'EVAL_ERROR: Error when sending {data} to /direct_evals: {str(err)} traceback: {traceback.format_exc()}')
45 |         return JSONResponse(status_code=500, content={"status": "error", "message": f'{str(err)} traceback: {traceback.format_exc()}'})
46 | 
47 | if __name__ == "__main__":
48 |     import uvicorn
49 |     uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)), debug=True)


--------------------------------------------------------------------------------
/src/sunholo/templates/system_services/cloudbuild.yaml:
--------------------------------------------------------------------------------
 1 | substitutions:
 2 |   _SERVICE_NAME: CHANGE_ME
 3 |   _BUILD_FOLDER: CHANGE_ME
 4 |   _IMAGE_NAME: terraform_managed
 5 |   _REGION: terraform_managed
 6 |   _ARTIFACT_REGISTRY_REPO_URL: terraform_managed
 7 |   _GCS_BUCKET: terraform_managed
 8 |   _CONFIG_FOLDER: terraform_managed
 9 |   _CONFIG_BUCKET: terraform_managed
10 |   _PROJECT_ID: terraform_managed
11 |   _LANCEDB_BUCKET: terraform_managed
12 |   _ALLOYDB_DB: terraform_managed
13 | 
14 | tags: ['${_PROJECT_ID}', '${_SERVICE_NAME}']
15 | 
16 | options:
17 |   logging: GCS_ONLY
18 | logsBucket: gs://multivac-deploy-logging-bucket
19 | 
20 | steps:
21 |   - name: 'gcr.io/cloud-builders/docker'
22 |     entrypoint: 'bash'
23 |     dir: ${_BUILD_FOLDER}
24 |     args:
25 |     - '-c'
26 |     - |
27 |       cat <<EOF >Dockerfile_cloudrun
28 |       FROM ${_ARTIFACT_REGISTRY_REPO_URL}/prebuild/${_IMAGE_NAME}:${BRANCH_NAME}
29 | 
30 |       COPY . .
31 | 
32 |       EXPOSE 8080
33 | 
34 |       RUN pip install --upgrade -r requirements.txt
35 | 
36 |       RUN pip list && python --version
37 | 
38 |       ENV _CONFIG_FOLDER=/gcs_config
39 |       ENV VAC_CONFIG_FOLDER=/gcs_config/config
40 |       ENV ALLOYDB_DB=${_ALLOYDB_DB}
41 | 
42 |       CMD exec uvicorn app:app --host 0.0.0.0 --port \$$PORT --workers 4 --loop asyncio
43 |       EOF
44 | 
45 |   - name: 'gcr.io/cloud-builders/docker'
46 |     dir: ${_BUILD_FOLDER}
47 |     args: ['build', '-t', '${_ARTIFACT_REGISTRY_REPO_URL}/${_IMAGE_NAME}/${_SERVICE_NAME}:${BRANCH_NAME}', '-f', 'Dockerfile_cloudrun', '.', '--network=cloudbuild']
48 | 
49 |   - name: 'gcr.io/cloud-builders/docker'
50 |     dir: ${_BUILD_FOLDER}
51 |     args: ['push', '${_ARTIFACT_REGISTRY_REPO_URL}/${_IMAGE_NAME}/${_SERVICE_NAME}:${BRANCH_NAME}']
52 | 
53 |   # Deploy the image to Cloud Run
54 |   - name: "gcr.io/cloud-builders/gcloud"
55 |     id: deploy cloud run
56 |     entrypoint: bash
57 |     dir: ${_BUILD_FOLDER}
58 |     args:
59 |       - "-c"
60 |       - |
61 |         gcloud beta run deploy ${_SERVICE_NAME} --image ${_ARTIFACT_REGISTRY_REPO_URL}/${_IMAGE_NAME}/${_SERVICE_NAME}:${BRANCH_NAME} \
62 |            --region ${_REGION} \
63 |            --project ${_PROJECT_ID} \
64 |            --platform managed \
65 |            --allow-unauthenticated \
66 |            --memory 2Gi \
67 |            --cpu 1 \
68 |            --max-instances 3 \
69 |            --update-secrets=LANGFUSE_HOST=LANGFUSE_URL:latest \
70 |            --update-secrets=LANGFUSE_SECRET_KEY=LANGFUSE_API_KEY:latest \
71 |            --update-secrets=LANGFUSE_PUBLIC_KEY=LANGFUSE_PUBLIC_KEY:latest \
72 |            --update-secrets=OPENAI_API_KEY=OPENAI_API_KEY:latest \
73 |            --update-secrets=ANTHROPIC_API_KEY=ANTHROPIC_API_KEY:latest \
74 |            --session-affinity \
75 |            --add-volume name=gcs_config,type=cloud-storage,bucket=${_CONFIG_BUCKET},readonly=true \
76 |            --add-volume-mount volume=gcs_config,mount-path=/gcs_config
77 | 


--------------------------------------------------------------------------------
/src/sunholo/templates/system_services/my_log.py:
--------------------------------------------------------------------------------
1 | from sunholo.custom_logging import setup_logging
2 | 
3 | log = setup_logging("system")
4 | 


--------------------------------------------------------------------------------
/src/sunholo/templates/system_services/requirements.txt:
--------------------------------------------------------------------------------
1 | sunholo[gcp,http]==0.89.9
2 | deepeval
3 | 


--------------------------------------------------------------------------------
/src/sunholo/terraform/__init__.py:
--------------------------------------------------------------------------------
1 | from .tfvars_editor import setup_tfvarseditor_subparser, TerraformVarsEditor


--------------------------------------------------------------------------------
/src/sunholo/tools/__init__.py:
--------------------------------------------------------------------------------
1 | from .web_browser import BrowseWebWithImagePromptsBot


--------------------------------------------------------------------------------
/src/sunholo/utils/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunholo-data/sunholo-py/c8b1a7830358356fa54797f327001d6187030a63/src/sunholo/utils/.gitkeep


--------------------------------------------------------------------------------
/src/sunholo/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import load_config_key, load_config
2 | from .config_class import ConfigManager
3 | 


--------------------------------------------------------------------------------
/src/sunholo/utils/api_key.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def has_multivac_api_key():
 4 |     if os.getenv('MULTIVAC_API_KEY'):
 5 |         return True
 6 |     
 7 |     return False
 8 | 
 9 | def get_multivac_api_key():
10 |     if has_multivac_api_key():
11 |         return os.getenv('MULTIVAC_API_KEY')
12 |     
13 |     return None


--------------------------------------------------------------------------------
/src/sunholo/utils/gcp_project.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from .config import load_config_key
 3 | from .gcp import get_metadata, is_running_on_gcp
 4 | import logging
 5 | 
 6 | def get_env_project_id():
 7 |     """
 8 |     Attempts to retrieve the project ID from environment variables.
 9 | 
10 |     Returns:
11 |         str or None: The project ID if found in environment variables, None otherwise.
12 |     """
13 |     return os.environ.get('GCP_PROJECT') or os.environ.get('GOOGLE_CLOUD_PROJECT')
14 | 
15 | 
16 | def get_gcp_project(include_config=False):
17 |     """
18 |     Retrieve the GCP project ID from environment variables or the GCP metadata server.
19 | 
20 |     Returns:
21 |         str or None: The project ID if found, None otherwise.
22 |     """
23 |     if include_config: # to avoid circular imports, must be specified
24 |         gcp_config = load_config_key("gcp_config", "global", "vacConfig")
25 |         if gcp_config:
26 |             if gcp_config.get('project_id'):
27 |                 logging.info("Using project_id from vacConfig.gcp_config.project_id")
28 |                 return gcp_config.get('project_id')
29 | 
30 |     project_id = get_env_project_id()
31 |     if project_id:
32 |         return project_id
33 |     
34 |     project_id = get_metadata('project/project-id')
35 |     if project_id:
36 |         os.environ["GCP_PROJECT"] = project_id 
37 |         return project_id
38 | 
39 |     if not is_running_on_gcp():
40 |         return None
41 | 
42 |     logging.warning("GCP Project ID not found. Ensure you are running on GCP or have the GCP_PROJECT environment variable set.")
43 |     return None


--------------------------------------------------------------------------------
/src/sunholo/utils/proto_convert.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def convert_composite_to_native(value):
 4 |     """
 5 |     Recursively converts a proto MapComposite or RepeatedComposite object to native Python types.
 6 | 
 7 |     Args:
 8 |         value: The proto object, which could be a MapComposite, RepeatedComposite, or a primitive.
 9 | 
10 |     Returns:
11 |         The equivalent Python dictionary, list, or primitive type.
12 |     """
13 |     import proto
14 |     
15 |     if isinstance(value, proto.marshal.collections.maps.MapComposite):
16 |         # Convert MapComposite to a dictionary, recursively processing its values
17 |         return {key: convert_composite_to_native(val) for key, val in value.items()}
18 |     elif isinstance(value, proto.marshal.collections.repeated.RepeatedComposite):
19 |         # Convert RepeatedComposite to a list, recursively processing its elements
20 |         return [convert_composite_to_native(item) for item in value]
21 |     else:
22 |         # If it's a primitive value, return it as is
23 |         return value
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/src/sunholo/utils/timedelta.py:
--------------------------------------------------------------------------------
 1 | def format_timedelta(td):
 2 |     days = td.days
 3 |     seconds = td.seconds
 4 |     hours, remainder = divmod(seconds, 3600)
 5 |     minutes, seconds = divmod(remainder, 60)
 6 | 
 7 |     if days > 0:
 8 |         return f"{days} day(s), {hours} hour(s), {minutes} minute(s), {seconds} second(s)"
 9 |     elif hours > 0:
10 |         return f"{hours} hour(s), {minutes} minute(s), {seconds} second(s)"
11 |     elif minutes > 0:
12 |         return f"{minutes} minute(s), {seconds} second(s)"
13 |     else:
14 |         return f"{seconds} second(s)"


--------------------------------------------------------------------------------
/src/sunholo/utils/user_ids.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | import hashlib
 3 | import platform
 4 | import socket
 5 | 
 6 | def generate_user_id():
 7 |     data = f"{socket.gethostname()}-{platform.platform()}-{platform.processor()}"
 8 |     hashed_id = hashlib.sha256(data.encode('utf-8')).hexdigest()
 9 |     
10 |     return str(uuid.uuid5(uuid.NAMESPACE_DNS, hashed_id))
11 | 


--------------------------------------------------------------------------------
/src/sunholo/utils/version.py:
--------------------------------------------------------------------------------
1 | _cached_version = None
2 | 
3 | def sunholo_version():
4 |     global _cached_version
5 |     if _cached_version is None:
6 |         from importlib.metadata import version
7 |         _cached_version = f"sunholo-{version('sunholo')}"
8 |     return _cached_version


--------------------------------------------------------------------------------
/src/sunholo/vertex/__init__.py:
--------------------------------------------------------------------------------
1 | from .init import init_vertex, init_genai
2 | from .memory_tools import get_vertex_memories, print_grounding_response, get_google_search_grounding
3 | from .safety import vertex_safety, genai_safety
4 | from .extensions_class import VertexAIExtensions
5 | from .extensions_call import get_extension_content, parse_extension_input, dynamic_extension_call
6 | 
7 | 


--------------------------------------------------------------------------------
/src/sunholo/vertex/extension_yaml/hello.yaml:
--------------------------------------------------------------------------------
 1 |   openapi: "3.0.0"
 2 |   info:
 3 |     version: 1.0.0
 4 |     title: Hello Extension
 5 |     description: Learn to build Vertex AI extensions
 6 |   servers:
 7 |     - url: 'API_SERVICE_URL'
 8 |   paths:
 9 |     /hello:
10 |       get:
11 |         operationId: "say_hello"
12 |         description: "Say hello in prompted language."
13 |         parameters:
14 |           - name: "apiServicePrompt"
15 |             in: query
16 |             description: "Language"
17 |             required: true
18 |             schema:
19 |               type: string
20 |         responses:
21 |           '200':
22 |             description: Successful operation.
23 |             content:
24 |               application/json:
25 |                 schema:
26 |                   $ref: "#/components/schemas/Result"
27 |   components:
28 |     schemas:
29 |       Result:
30 |         description: "Hello in the requested language."
31 |         properties:
32 |           apiServiceOutput:
33 |             type: string
34 | 


--------------------------------------------------------------------------------
/src/sunholo/vertex/extension_yaml/vertex_ai_search.yaml:
--------------------------------------------------------------------------------
 1 | openapi: "3.0.0"
 2 | info:
 3 |   title: Vertex AI Search
 4 |   version: v1alpha
 5 |   description: >
 6 |     Performs search on user ingested data including website and unstructured data type.
 7 | 
 8 |     This extension is used when user wants to search or retrieve meaningful results from their ingested data in the Vertex AI Search service.
 9 | 
10 |     User needs to create a data store in global region and specify search scope in Vertex AI Search service first. For quality purpose, we encourage to turn on advanced indexing for website data and turn on enterprise edition for unstructured data.
11 | 
12 |     The search results can be controlled by the serving config provided in the runtime config.
13 | 
14 |     Supported AuthTypes:
15 |     - GOOGLE_SERVICE_ACCOUNT_AUTH: (only supports using Vertex AI Extension Service Agent).
16 | paths:
17 |   /search:
18 |     get:
19 |       operationId: search
20 |       description: Performs a search operation based on user's natural language query
21 |       parameters:
22 |       - name: query
23 |         in: query
24 |         schema:
25 |           type: string
26 |         description: User natural language instructions for search.
27 |         required: true
28 |       responses:
29 |         default:
30 |           description: Search execution result.
31 |           content:
32 |             application/json:
33 |               schema:
34 |                 $ref: "#/components/schemas/SearchResult"
35 | 
36 | components:
37 |   schemas:
38 |     SearchResult:
39 |       description: Top results from search response.
40 |       type: object
41 |       properties:
42 |         results:
43 |           type: array
44 |           items:
45 |             type: object
46 |             properties:
47 |               title:
48 |                 type: string
49 |                 description: Retrieved document title.
50 |               display_link:
51 |                 type: string
52 |                 description: Retrieved document link to display.
53 |               link:
54 |                 type: string
55 |                 description: Retrieved document link.
56 |               extractive_segments:
57 |                 type: array
58 |                 description: Extractive segments from the retrieved file.
59 |                 items:
60 |                   type: string
61 |               extractive_answers:
62 |                 type: array
63 |                 description: Extractive answers from the retrieved file. These are generated from the extractive segments.
64 |                 items:
65 |                   type: string
66 | 


--------------------------------------------------------------------------------
/src/sunholo/vertex/genai_functions.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import google.generativeai as genai
 3 | except ImportError:
 4 |     genai = None
 5 | 
 6 | from .init import init_genai
 7 | from .safety import genai_safety
 8 | from ..custom_logging import log
 9 | import json
10 | from .type_dict_to_json import describe_typed_dict, openapi_to_typed_dict, is_typed_dict
11 | 
12 | def genai_structured_output(
13 |         openapi_spec, 
14 |         system_prompt: str = "", 
15 |         model_name: str = "models/gemini-1.5-pro",
16 |         **kwargs):
17 |     """
18 |     Generate AI function output with the specified configuration.
19 |     
20 |     Parameters:
21 |     - output_schema: The schema for the response output.
22 |     - system_prompt: Optional system prompt to guide the generation.
23 |     - model_name: The name of the model to use (default is 'models/gemini-1.5-flash').
24 |     - output_schema_json: The JSON schema with descriptions.
25 |     - **kwargs: Additional keyword arguments to customize the generation config.
26 |     
27 |     Returns:
28 |     - model: The configured generative model.
29 |     """
30 |     
31 |     init_genai()
32 | 
33 |     # Generate the JSON schema with descriptions
34 |     output_schema, descriptions = openapi_to_typed_dict(openapi_spec, 'Input')
35 | 
36 |     # Convert TypedDict to JSON schema if necessary
37 |     if is_typed_dict(output_schema):
38 |         output_schema_json = describe_typed_dict(output_schema, descriptions)
39 | 
40 |     # Base generation configuration
41 |     generation_config = {
42 |         "response_mime_type": "application/json",
43 |         #"response_schema": output_schema, # didn't work yet as couldn't deal with Optional values
44 |         "temperature": 0.5
45 |     }
46 |     
47 |     # Merge additional kwargs into generation_config
48 |     generation_config.update(kwargs)
49 | 
50 |     if output_schema_json:
51 |         system_prompt = f"{system_prompt}\n##OUTPUT JSON SCHEMA:\n{json.dumps(output_schema_json, indent=2)}\n"
52 | 
53 |     model = genai.GenerativeModel(
54 |         model_name=model_name,
55 |         safety_settings=genai_safety(),
56 |         generation_config=generation_config,
57 |         system_instruction=system_prompt,
58 |         tool_config={'function_calling_config': 'ANY'} # pro models only
59 |     )
60 |     
61 |     return model


--------------------------------------------------------------------------------
/src/sunholo/vertex/init.py:
--------------------------------------------------------------------------------
 1 | from ..custom_logging import log
 2 | from ..utils.gcp_project import get_gcp_project
 3 | from ..auth.refresh import get_default_email
 4 | import os
 5 | 
 6 | def init_genai():
 7 |     """
 8 |     There are some features that come to the google.generativeai first, 
 9 |     which needs to be authenticated via a GOOGLE_API_KEY environment variable, 
10 |     created via the Google AI Console at https://aistudio.google.com/app/apikey 
11 |     """
12 |     try:
13 |         import google.generativeai as genai
14 |     except ImportError:
15 |         raise ImportError("google.generativeai not installed, please install via 'pip install sunholo'[gcp]'")
16 |     
17 |     GOOGLE_API_KEY=os.getenv('GOOGLE_API_KEY')
18 |     if not GOOGLE_API_KEY:
19 |         raise ValueError("google.generativeai needs GOOGLE_API_KEY set in environment variable")
20 | 
21 |     genai.configure(api_key=GOOGLE_API_KEY)
22 | 
23 | def init_vertex(gcp_config=None, location="eu", project_id=None):
24 |     """
25 |     Initializes the Vertex AI environment using the provided Google Cloud Platform configuration.
26 | 
27 |     This function configures the Vertex AI API session with specified project and location details
28 |     from the gcp_config dictionary. It is essential to call this function at the beginning of a session
29 |     before performing any operations related to Vertex AI.
30 | 
31 |     Parameters:
32 |         gcp_config (dict): A dictionary containing the Google Cloud Platform configuration with keys:
33 |             - 'project_id': The Google Cloud project ID to configure for Vertex AI.
34 |             - 'location': The Google Cloud region to configure for Vertex AI.
35 |             If default None it will derive it from the environment
36 | 
37 |     Raises:
38 |         KeyError: If the necessary keys ('project_id' or 'location') are missing in the gcp_config dictionary.
39 |         ModuleNotFoundError: If the Vertex AI module is not installed and needs to be installed via pip.
40 | 
41 |     Example:
42 |     ```python
43 |     gcp_config = {
44 |          'project_id': 'your-project-id',
45 |          'location': 'us-central1'
46 |     }
47 |     init_vertex(gcp_config)
48 |     # This will initialize the Vertex AI session with the provided project ID and location.
49 | 
50 |     Note:
51 |         Ensure that the 'vertexai' module is installed and correctly configured before calling this function.
52 |         The function assumes that the required 'vertexai' library is available and that the logging setup is already in place.
53 |     """
54 |     try:
55 |         import vertexai
56 |     except ImportError:
57 |         log.error("Need to install vertexai module via `pip install sunholo[gcp]`")
58 | 
59 |         return None
60 |     
61 |     if gcp_config:
62 |         # Initialize Vertex AI API once per session
63 |         project_id = gcp_config.get('project_id')
64 |         location = gcp_config.get('location') or location
65 |     else:
66 |         project_id = project_id or get_gcp_project()
67 | 
68 |     log.info(f"Auth with email: {get_default_email()} in {project_id}")
69 | 
70 |     vertexai.init(project=project_id, location=location)
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/src/sunholo/vertex/safety.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def genai_safety(threshold: str = "BLOCK_ONLY_HIGH"):
 4 |     """
 5 |     BLOCK_ONLY_HIGH - block when high probability of unsafe content is detected
 6 |     BLOCK_MEDIUM_AND_ABOVE - block when medium or high probability of content is detected
 7 |     BLOCK_LOW_AND_ABOVE - block when low, medium, or high probability of unsafe content is detected
 8 |     BLOCK_NONE - no block, but need to be on an allow list to use
 9 |     """
10 |     from google.generativeai.types import (
11 |         HarmCategory,
12 |         HarmBlockThreshold
13 |     )
14 | 
15 |     if threshold == 'BLOCK_ONLY_HIGH':
16 |         thresh = HarmBlockThreshold.BLOCK_ONLY_HIGH
17 |     elif threshold == 'BLOCK_MEDIUM_AND_ABOVE':
18 |         thresh = HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
19 |     elif threshold == 'BLOCK_LOW_AND_ABOVE':
20 |         thresh = HarmBlockThreshold.BLOCK_LOW_AND_ABOVE
21 |     elif threshold == 'BLOCK_NONE':
22 |         thresh = HarmBlockThreshold.BLOCK_NONE
23 |     else:
24 |         raise ValueError("Invalid threshold")
25 | 
26 |     safety_settings = {
27 |         HarmCategory.HARM_CATEGORY_HARASSMENT: thresh,
28 |         HarmCategory.HARM_CATEGORY_HATE_SPEECH: thresh,
29 |         HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: thresh,
30 |         HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: thresh,
31 |     }
32 | 
33 |     return safety_settings
34 | 
35 | 
36 | def vertex_safety(threshold: str = "BLOCK_ONLY_HIGH"):
37 |     """
38 |     BLOCK_ONLY_HIGH - block when high probability of unsafe content is detected
39 |     BLOCK_MEDIUM_AND_ABOVE - block when medium or high probability of content is detected
40 |     BLOCK_LOW_AND_ABOVE - block when low, medium, or high probability of unsafe content is detected
41 |     BLOCK_NONE - no block, but need to be on an allow list to use
42 |     """
43 |     from vertexai.generative_models import (
44 |         HarmCategory,
45 |         HarmBlockThreshold,
46 |     )
47 | 
48 |     if threshold == 'BLOCK_ONLY_HIGH':
49 |         thresh = HarmBlockThreshold.BLOCK_ONLY_HIGH
50 |     elif threshold == 'BLOCK_MEDIUM_AND_ABOVE':
51 |         thresh = HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
52 |     elif threshold == 'BLOCK_LOW_AND_ABOVE':
53 |         thresh = HarmBlockThreshold.BLOCK_LOW_AND_ABOVE
54 |     elif threshold == 'BLOCK_NONE':
55 |         thresh = HarmBlockThreshold.BLOCK_NONE
56 |     else:
57 |         raise ValueError("Invalid threshold")
58 | 
59 |     safety_settings = {
60 |         HarmCategory.HARM_CATEGORY_HARASSMENT: thresh,
61 |         HarmCategory.HARM_CATEGORY_HATE_SPEECH: thresh,
62 |         HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: thresh,
63 |         HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: thresh,
64 |     }
65 | 
66 |     return safety_settings


--------------------------------------------------------------------------------
/tests/test_async_genai2.py:
--------------------------------------------------------------------------------
 1 | from sunholo.genai import GoogleAI, GoogleAIConfig
 2 | import os
 3 | import asyncio
 4 | 
 5 | async def main():
 6 |     config = GoogleAIConfig(
 7 |         api_key=os.getenv("GOOGLE_API_KEY"),
 8 |     )
 9 |     ai = GoogleAI(config)
10 |     
11 |     prompts = [
12 |         "Tell me a short story",
13 |         "What's the weather like?",
14 |         "How do computers work?"
15 |     ]
16 |     
17 |     for prompt in prompts:
18 |         print(f"\nTesting prompt: {prompt}")
19 |         try:
20 |             await ai.live_async(prompt)
21 |         except Exception as e:
22 |             print(f"Error: {e}")
23 |     
24 |     #print("## Speak NOW for 5 seconds...")
25 |     #response = await ai.live_async(
26 |     #    input_type="audio",
27 |     #    duration=5.0  # Record for 5 seconds
28 |     #)
29 |     #print(response)
30 | 
31 |     #print("## Video NOW for 3 seconds...")
32 |     #response = await ai.live_async(
33 |     #    input_type="video",
34 |     #    duration=3.0  # Record for 3 seconds
35 |     #)
36 |     #print(response)
37 | 
38 | if __name__ == "__main__":
39 |     asyncio.run(main())


--------------------------------------------------------------------------------
/tests/test_chat_history.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from sunholo.agents.chat_history import extract_chat_history, embeds_to_json, create_message_element, is_human, is_bot, is_ai
 3 | 
 4 | # Test cases for extract_chat_history function
 5 | @pytest.mark.parametrize("chat_history,expected", [
 6 |     ([], []),
 7 |     ([{"name": "Human", "text": "Hello, AI!"}, {"name": "AI", "text": "Hello, Human! How can I help you today?"}], [("Hello, AI!", "Hello, Human! How can I help you today?")])
 8 | ])
 9 | def test_extract_chat_history(chat_history, expected):
10 |     assert extract_chat_history(chat_history) == expected
11 | 
12 | # Test cases for embeds_to_json function
13 | @pytest.mark.parametrize("message,expected", [
14 |     ({"embeds": []}, ""),
15 |     ({"embeds": [{"type": "image", "url": "https://example.com/image.png"}]}, '[{"type": "image", "url": "https://example.com/image.png"}]')
16 | ])
17 | def test_embeds_to_json(message, expected):
18 |     assert embeds_to_json(message) == expected
19 | 
20 | # Test cases for create_message_element function
21 | @pytest.mark.parametrize("message,expected", [
22 |     ({"text": "Hello, AI!"}, "Hello, AI!"),
23 |     ({"content": "Hello, AI!"}, "Hello, AI!")
24 | ])
25 | def test_create_message_element(message, expected):
26 |     assert create_message_element(message) == expected
27 | 
28 | # Test cases for is_human function
29 | @pytest.mark.parametrize("message,expected", [
30 |     ({"name": "Human"}, True),
31 |     ({"name": "AI"}, False)
32 | ])
33 | def test_is_human(message, expected):
34 |     assert is_human(message) == expected
35 | 
36 | # Test cases for is_bot function
37 | @pytest.mark.parametrize("message,expected", [
38 |     ({"name": "AI"}, True),
39 |     ({"name": "Human"}, False)
40 | ])
41 | def test_is_bot(message, expected):
42 |     assert is_bot(message) == expected
43 | 
44 | # Test cases for is_ai function
45 | @pytest.mark.parametrize("message,expected", [
46 |     ({"name": "AI"}, True),
47 |     ({"name": "Human"}, False)
48 | ])
49 | def test_is_ai(message, expected):
50 |     assert is_ai(message) == expected
51 | 


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from unittest.mock import patch, mock_open
 3 | from sunholo.utils import config
 4 | 
 5 | def test_load_config():
 6 |     expected_config = {"key": "value"}
 7 |     with pytest.raises(FileNotFoundError):
 8 |         config.load_config("non_existent_file")
 9 |     with patch("builtins.open", mock_open(read_data='{"key": "value"}'), create=True):
10 |         result, _ = config.load_config("mock_file.json")
11 |         assert result == expected_config
12 | 
13 | # Test cases for load_config_key function
14 | @patch("sunholo.utils.config.load_all_configs")
15 | def test_load_config_key(mock_load_all_configs):
16 |     mock_load_all_configs.return_value = {
17 |         "vacConfig": {
18 |             "apiVersion": "test",
19 |             "vac": {
20 |                 "test_vector": {
21 |                     "key1": "value1",
22 |                     "key2": "value2"
23 |                 }
24 |             }
25 |         }
26 |     }
27 |     # Test existing key
28 |     assert config.load_config_key("key1", "test_vector", "vacConfig") == "value1"
29 |     # Test non-existing key
30 |     result = config.load_config_key("non_existing_key", "test_vector", "vacConfig")
31 |     assert result is None
32 | 
33 |     # Test invalid configuration
34 |     with pytest.raises(KeyError):
35 |         config.load_config_key("key1", "test_vector", "invalidConfig")
36 | 


--------------------------------------------------------------------------------
/tests/test_unstructured.py:
--------------------------------------------------------------------------------
1 | def main():
2 |     from sunholo.chunker.loaders import read_file_to_documents
3 | 
4 |     result = read_file_to_documents("README.md")
5 |     print(result)
6 | 
7 | 
8 | if __name__ == "__main__":
9 |     main()


--------------------------------------------------------------------------------