├── .dockerignore
├── .env.docker
├── .env.local
├── .gitignore
├── .gitmodules
├── Dockerfile
├── README.md
├── build_docker.ps1
├── chainlit.md
├── data
    ├── questionnaire.pkl
    ├── questionnaire_all.pkl
    ├── report_doc_classification_01JDYZ52AD3KYM2Q7VF5NHV3ER.json
    ├── report_item_count_01JDZBAA5T6Z0MHZ10KY0HF938.json
    ├── sample_questionnaire1.md
    ├── sample_questionnaire2.md
    └── sample_questionnaire3.md
├── data_questionnaire_agent
    ├── __init__.py
    ├── bootstrap
    │   ├── __init__.py
    │   ├── consultant_loader.py
    │   └── sql_script.py
    ├── build.py
    ├── cli
    │   └── data_questionnaire_agent_cli.py
    ├── config.py
    ├── config_support.py
    ├── log_init.py
    ├── model
    │   ├── application_schema.py
    │   ├── confidence_schema.py
    │   ├── consultant_rating.py
    │   ├── global_configuration.py
    │   ├── initial_question_data.py
    │   ├── jwt_token.py
    │   ├── languages.py
    │   ├── mail_data.py
    │   ├── ontology_schema.py
    │   ├── openai_schema.py
    │   ├── question_suggestion.py
    │   ├── questionnaire_status.py
    │   ├── report_aggregation_schema.py
    │   ├── server_model.py
    │   └── session_configuration.py
    ├── server
    │   ├── __init__.py
    │   ├── agent_session.py
    │   ├── questionnaire_server.py
    │   ├── questionnaire_server_backend.py
    │   ├── questionnaire_server_consultants.py
    │   ├── questionnaire_server_main.py
    │   └── server_support.py
    ├── service
    │   ├── advice_service.py
    │   ├── clarifications_agent.py
    │   ├── confidence_service.py
    │   ├── consultant_service.py
    │   ├── db_bootstrap_service.py
    │   ├── embedding_service.py
    │   ├── graph_service.py
    │   ├── graphrag_service.py
    │   ├── html_generator.py
    │   ├── initial_question_service.py
    │   ├── jwt_token_service.py
    │   ├── knowledge_base_service.py
    │   ├── language_adapter.py
    │   ├── mail_sender.py
    │   ├── ontology_service.py
    │   ├── persistence_service_async.py
    │   ├── persistence_service_consultants_async.py
    │   ├── persistence_service_questions_async.py
    │   ├── prompt_support.py
    │   ├── query_support.py
    │   ├── question_clarifications.py
    │   ├── question_generation_service.py
    │   ├── report_aggregation_main_service.py
    │   ├── report_aggregation_service.py
    │   ├── report_aggregation_summarization_service.py
    │   ├── report_enhancement_service.py
    │   ├── report_interactions_service.py
    │   ├── secondary_question_processor.py
    │   ├── similarity_search.py
    │   └── tagging_service.py
    ├── test
    │   ├── __init__.py
    │   ├── advice_service_test.py
    │   ├── conditional_advice_test.py
    │   ├── confidence_schema_test.py
    │   ├── confidence_service_test.py
    │   ├── consultant_service_test.py
    │   ├── graph_service_test.py
    │   ├── graphrag_service_test.py
    │   ├── initial_question_service_test.py
    │   ├── jwt_token_service_test.py
    │   ├── mail_sender_test.py
    │   ├── ontology_service_test.py
    │   ├── persistence_service_async_test.py
    │   ├── persistence_service_consultants_async_test.py
    │   ├── persistence_service_questions_async_test.py
    │   ├── provider
    │   │   ├── __init__.py
    │   │   ├── advice_provider.py
    │   │   ├── confidence_provider.py
    │   │   ├── consultant_provider.py
    │   │   ├── jwt_token_data_provider.py
    │   │   ├── knowledge_base_provider.py
    │   │   ├── multipart_provider.py
    │   │   ├── ontology_provider.py
    │   │   ├── question_answer_provider.py
    │   │   ├── question_answers_provider.py
    │   │   ├── questionnaire_provider.py
    │   │   ├── questionnaire_status_provider.py
    │   │   ├── session_configuration_provider.py
    │   │   └── session_properties_provider.py
    │   ├── question_answer_test.py
    │   ├── question_generation_service_test.py
    │   ├── questionnaire_server_test.py
    │   ├── questionnaire_status_test.py
    │   ├── report_aggregation_main_service_test.py
    │   ├── report_aggregation_service_test.py
    │   ├── report_aggregation_summarization_service_test.py
    │   ├── report_enhancement_service_test.py
    │   ├── server_message_test.py
    │   ├── session_configuration_test.py
    │   └── toml_support_test.py
    ├── toml_support.py
    ├── translation.py
    ├── ui
    │   ├── advice_processor.py
    │   ├── avatar_factory.py
    │   ├── chat_settings_factory.py
    │   ├── clarifications_chainlit.py
    │   ├── data_questionnaire_chainlit.py
    │   ├── image_support.py
    │   ├── mail_processor.py
    │   ├── model
    │   │   └── session_number_container.py
    │   ├── pdf_processor.py
    │   └── session_chainlit_playground.py
    └── utils
    │   ├── advice_parser.py
    │   ├── tracker_db_init.py
    │   ├── tracker_db_lister.py
    │   └── tracker_db_server.py
├── deploy_ui.sh
├── docs
    ├── advice_agent.bpmn
    ├── agent_interaction_flow.bpmn
    ├── data_wellness_workflow.bpmn
    ├── data_wellness_workflow.png
    ├── low_level_protocol
    │   ├── clarification.json
    │   ├── confidence_rating.json
    │   ├── generate_first_question.json
    │   ├── generate_secondary_question.json
    │   ├── ontology_creation.json
    │   └── report_generation.json
    └── prompt_examples
    │   ├── clarification.txt
    │   ├── confidence_rating.txt
    │   ├── ontology_creation.txt
    │   ├── question_generation.txt
    │   └── report_generation.txt
├── format.ps1
├── i18n
    ├── messages.de.yml
    ├── messages.en.yml
    └── messages.fa.yml
├── notes.md
├── package-lock.json
├── poetry.lock
├── prompts_de.toml
├── prompts_en.toml
├── prompts_fa.toml
├── public
    ├── css
    │   └── styles.css
    ├── images
    │   ├── D-Well_Report_Banner.png
    │   ├── Hero_Image_with_Logo_and_Titles.jpg
    │   ├── banner_blank.png
    │   ├── banner_with_titles.png
    │   ├── companion_icon.png
    │   ├── data_ecosystem_areas.png
    │   └── natural-language-processing.png
    ├── js
    │   └── onepoint.js
    ├── logo_dark.png
    └── logo_light.png
├── pyproject.toml
├── run_app.sh
├── sql
    ├── db_creation_default.sql
    ├── db_creation_hypergility.sql
    ├── db_setup.sql
    ├── initial_data.sql
    ├── other_queries.sql
    └── verification_queries.sql
├── start.ps1
├── start.sh
└── templates
    ├── consultants-template.md
    ├── mail-template.html
    └── results-template.html


/.dockerignore:
--------------------------------------------------------------------------------
 1 | Dockerfile
 2 | .chainlit
 3 | .pytest_cache
 4 | .env
 5 | ./wheels
 6 | .gitignore
 7 | .gitmodules
 8 | chainlit.md
 9 | ./*.ps1
10 | ./report.csv
11 | src/phoenix/server/static/
12 | .env.local


--------------------------------------------------------------------------------
/.env.docker:
--------------------------------------------------------------------------------
 1 | # OPENAI_API_KEY=<key>
 2 | OPENAI_MODEL=gpt-4o
 3 | REQUEST_TIMEOUT=300
 4 | 
 5 | VERBOSE_LLM=true
 6 | LANGCHAIN_CACHE=false
 7 | CHATGPT_STREAMING=false
 8 | 
 9 | UI_TIMEOUT = 60
10 | 
11 | # Email related
12 | # Production Settings
13 | MAIL_FROM_PERSON=Onepoint Data Wellness
14 | # MAIL_USER=<user>
15 | # MAIL_PASSWORD=<pwd>
16 | # MAIL_FROM=<email>
17 | # MAIL_SERVER=<some_server>
18 | MAIL_SUBJECT=Your recommendations from the Onepoint Data Wellness Companion™
19 | FEEDBACK_EMAIL=<feedback_email>
20 | 
21 | # General stuff
22 | PROJECT_ROOT=/app
23 | QUESTION_CACHE_FOLDER=/tmp/data-questionnaire-agent/cache
24 | 
25 | # PDF Related
26 | WKHTMLTOPDF_BINARY=/usr/bin/wkhtmltopdf
27 | TEMPLATE_LOCATION=/app/templates
28 | PDF_FOLDER=/tmp/data_questionnaire_agent/pdfs
29 | PDF_BANNER=/app/public/images/Hero_Image_with_Logo_and_Titles.jpg
30 | 
31 | # Embedding related
32 | RAW_TEXT_FOLDER=/app/docs/raw_text
33 | EMBEDDINGS_PERSISTENCE_DIR=/app/embeddings
34 | EMBEDDINGS_CHUNK_SIZE=2500
35 | SEARCH_RESULTS_HOW_MANY=2
36 | 
37 | # Question generation related
38 | QUESTIONS_PER_BATCH=1
39 | MINIMUM_QUESTIONNAIRE_SIZE=4
40 | 
41 | # Token limit for chatgpt 4. Important to extend the context as much as possible using the vector DB search
42 | TOKEN_LIMIT=6000
43 | 
44 | IMAGE_LLM_TEMPERATURE=0.9
45 | 
46 | # Show session cost
47 | SHOW_SESSION_COST=false
48 | OPENAI_RETRY_ATTEMPTS=3
49 | OPENAI_WAIT_FIXED=30
50 | 
51 | # Related to the tracker DB download
52 | # TRACKER_DB_LOGS_PASSWORD=<pwd>
53 | 
54 | # Database related
55 | # DB_NAME=data_wellness_companion
56 | # DB_USER=<user>
57 | # DB_HOST=127.0.0.1
58 | # DB_PORT=5432
59 | # DB_PASSWORD=<pwd>
60 | # DB_CREATE=true
61 | 
62 | # Websocket server related
63 | WEBSOCKET_SERVER=0.0.0.0
64 | WEBSOCKET_PORT=8085
65 | WEBSOCKET_CORS_ALLOWED_ORIGINS=*
66 | 
67 | # Webserver related
68 | UI_FOLDER=/app/ui
69 | 
70 | # Langsmith related
71 | LANGCHAIN_TRACING_V2=false
72 | LANGCHAIN_PROJECT=Data Questionnaire Agent
73 | LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
74 | # LANGCHAIN_API_KEY=<key>
75 | 
76 | USE_GRAPHRAG=true
77 | GRAPHRAG_BASE_URL=http://176.34.128.143:8087
78 | GRAPHRAG_MODE=all
79 | GRAPHRAG_CONTEXT_SIZE=5000


--------------------------------------------------------------------------------
/.env.local:
--------------------------------------------------------------------------------
  1 | OPENAI_API_KEY=<key>
  2 | # Other options
  3 | # OPENAI_MODEL=gpt-4-1106-preview
  4 | # OPENAI_MODEL=gpt-4-turbo-2024-04-09
  5 | # OPENAI_MODEL=gpt-4-0125-preview
  6 | OPENAI_MODEL=gpt-4o
  7 | OPENAI_API_TEMPERATURE=0.5
  8 | REQUEST_TIMEOUT=300
  9 | 
 10 | VERBOSE_LLM=true
 11 | LANGCHAIN_CACHE=false
 12 | CHATGPT_STREAMING=false
 13 | 
 14 | UI_TIMEOUT = 60
 15 | 
 16 | # Email related
 17 | # Production Settings
 18 | MAIL_FROM_PERSON=Onepoint Data Wellness
 19 | MAIL_USER=<user>
 20 | MAIL_PASSWORD=<password>
 21 | MAIL_FROM=<mail>
 22 | MAIL_SERVER=<mail_server>
 23 | MAIL_SUBJECT=Your recommendations from the Onepoint Data Wellness Companion™
 24 | FEEDBACK_EMAIL=<feedback_mail>
 25 | 
 26 | # General stuff
 27 | PROJECT_ROOT=/development/playground/langchain/data_questionnaire_agent
 28 | QUESTION_CACHE_FOLDER=/tmp/data_questionnaire_agent/cache
 29 | TRANSLATION_PATH=/development/playground/langchain/data_questionnaire_agent/i18n
 30 | 
 31 | # PDF Related
 32 | WKHTMLTOPDF_BINARY=/Program Files/wkhtmltopdf/bin/wkhtmltopdf.exe
 33 | TEMPLATE_LOCATION=/development/playground/langchain/data_questionnaire_agent/templates
 34 | PDF_FOLDER=/tmp/data_questionnaire_agent/pdfs
 35 | PDF_BANNER=/development/playground/langchain/data_questionnaire_agent/public/images/Hero_Image_with_Logo_and_Titles.jpg
 36 | 
 37 | # Embedding related
 38 | RAW_TEXT_FOLDER=/development/playground/langchain/data_questionnaire_agent/docs/raw_text
 39 | EMBEDDINGS_PERSISTENCE_DIR=/development/playground/langchain/data_questionnaire_agent/embeddings
 40 | EMBEDDINGS_CHUNK_SIZE=2500
 41 | SEARCH_RESULTS_HOW_MANY=2
 42 | 
 43 | # Question generation related
 44 | QUESTIONS_PER_BATCH=1
 45 | MINIMUM_QUESTIONNAIRE_SIZE=4
 46 | 
 47 | # Token limit for chatgpt 4. Important to extend the context as much as possible using the vector DB search
 48 | TOKEN_LIMIT=6000
 49 | 
 50 | IMAGE_LLM_TEMPERATURE=0.9
 51 | 
 52 | # Show session cost
 53 | SHOW_SESSION_COST=false
 54 | OPENAI_RETRY_ATTEMPTS=3
 55 | OPENAI_WAIT_FIXED=30
 56 | 
 57 | # Related to the tracker DB download
 58 | TRACKER_DB_LOGS_PASSWORD=<pass>
 59 | 
 60 | # Database related
 61 | DB_NAME=data_wellness_languages
 62 | DB_USER=<user>
 63 | DB_HOST=127.0.0.1
 64 | DB_PORT=5432
 65 | DB_PASSWORD=<pass>
 66 | DB_CREATE=true
 67 | 
 68 | # Websocket server related
 69 | WEBSOCKET_SERVER=0.0.0.0
 70 | WEBSOCKET_PORT=8085
 71 | WEBSOCKET_CORS_ALLOWED_ORIGINS=*
 72 | 
 73 | # Webserver related
 74 | UI_FOLDER=/development/playground/langchain/data_questionnaire_agent/ui
 75 | IMAGES_FOLDER=/development/playground/langchain/data_questionnaire_agent/public/images
 76 | 
 77 | # Langsmith
 78 | LANGCHAIN_TRACING_V2=true
 79 | LANGCHAIN_PROJECT=Data Questionnaire Agent
 80 | LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
 81 | LANGCHAIN_API_KEY=<key>
 82 | 
 83 | # JWT token related
 84 | JWT_SECRET=<your secret>
 85 | JWT_ALGORITHM=HS256
 86 | JWT_TIME_DELTA_MINUTES=120
 87 | JWT_GEN_FOLDER=/tmp/data_wellness/jwt_gen
 88 | DWELL_URL=http://localhost:5173
 89 | DWISE_URL=http://localhost:5174
 90 | 
 91 | 
 92 | # If Graph RAG being used, then extra parameters are needed
 93 | USE_GRAPHRAG=true
 94 | GRAPHRAG_BASE_URL=http://176.34.128.143:10000/protected/project
 95 | GRAPHRAG_MODE=local
 96 | GRAPHRAG_CONTEXT_SIZE=3000
 97 | GRAPHRAG_JWT=<token>
 98 | GRAPHRAG_PROJECT=dwell
 99 | GRAPHRAG_READ_TIMEOUT=20
100 | 
101 | # Report Aggregation
102 | REPORT_TOKEN_LIMIT=30000
103 | AGGREGATOR_REPORT_FOLDER=/tmp/data_wellness
104 | 
105 | LINKEDIN_USER=<user email>
106 | LINKEDIN_PASSWORD=<password>
107 | 
108 | GEMINI_API_KEY=<key>
109 | GEMINI_MODEL=gemini-2.0-flash-lite-preview-02-05


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | # Avoid pushing the embedding database.
163 | embeddings/
164 | 
165 | docker_start.bat


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "data_questionnaire_agent/data-wellness-companion-ui"]
2 | 	path = data_questionnaire_agent/data-wellness-companion-ui
3 | 	url = https://github.com/onepointconsulting/data-wellness-companion-ui.git
4 | [submodule "data-wellness-companion-ui"]
5 | 	path = data-wellness-companion-ui
6 | 	url = https://github.com/onepointconsulting/data-wellness-companion-ui.git
7 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Please note that this Docker file does not yet produce anything useful. Work In Progress
 2 | FROM ubuntu:latest
 3 | 
 4 | SHELL ["/bin/bash", "-c"]
 5 | 
 6 | RUN apt update
 7 | RUN apt install python3 -y
 8 | RUN apt install python3-pip -y
 9 | RUN apt install python3.12-venv -y
10 | RUN apt install swig
11 | 
12 | WORKDIR /app
13 | 
14 | RUN python3 -m venv venv
15 | RUN . venv/bin/activate
16 | RUN ./venv/bin/pip install poetry
17 | 
18 | COPY . .
19 | 
20 | RUN ./venv/bin/poetry install
21 | 
22 | RUN chmod +x ./start.sh
23 | RUN mv .env.docker .env
24 | 
25 | # Install PDF converter
26 | RUN apt-get update && apt-get install -y wkhtmltopdf
27 | 
28 | # Install node, npm and yarn for the JS side
29 | RUN apt install nodejs -y
30 | RUN apt install npm -y
31 | RUN npm install --global yarn
32 | 
33 | # Make sure the PDF advice folder is available
34 | RUN mkdir -p /tmp/data_questionnaire_agent/pdfs
35 | 
36 | CMD ["/bin/bash", "./start.sh"]


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Data Wellness Q&A Chatbot
  2 | 
  3 | This is a reverse chatbot that asks the users questions about data integration practices and then gives advice based on a body of knowledge.
  4 | This version operates a bit like an agent which tries to gather enough information to be able to give advice. So it may ask an unspecified number of questions.
  5 | 
  6 | ## Cloning the project 
  7 | 
  8 | You should clone the project and then initialize the UI module with these commands:
  9 | 
 10 | ```
 11 | git submodule init
 12 | git submodule update
 13 | ```
 14 | 
 15 | ## Setup
 16 | 
 17 | We suggest to use [Conda](https://docs.conda.io/en/latest/) to manage the virtual environment and then install poetry.
 18 | 
 19 | ```
 20 | conda activate base
 21 | conda remove -n data_wellness_agent --all
 22 | conda create -n data_wellness_agent python=3.13.1
 23 | conda activate data_wellness_agent
 24 | pip install poetry
 25 | ```
 26 | 
 27 | ## Installation
 28 | 
 29 | Change the directory to the root folder of the project.
 30 | 
 31 | ``` 
 32 | poetry install
 33 | ```
 34 | 
 35 | ## UI Installation
 36 | 
 37 | Before you run, you should compile the UI with this command from the root folder of the project. 
 38 | Please note that this requires the installation of Yarn and node 18.18.0 or later.
 39 | 
 40 | ```
 41 | poetry run build-ui
 42 | ```
 43 | 
 44 | The UI connects the websocket to port 8085 natively. If the Python server runs on a different port, please change the port accordingly.
 45 | 
 46 | ## Running
 47 | 
 48 | ### Prepare the Postgres DB
 49 | 
 50 | Before you run for the first time, you will need to create the Postgres database. Execute this command to start psql (make sure Postgres is installed)
 51 | 
 52 | On Linux you can install Postgres like this:
 53 | 
 54 | ```
 55 | sudo -u postgres psql
 56 | ```
 57 | 
 58 | On Windows you will need to download the distribution from here: https://www.postgresql.org/download/
 59 | 
 60 | Then create the database:
 61 | 
 62 | ```
 63 | CREATE DATABASE data_wellness_companion
 64 |     WITH
 65 |     OWNER = postgres
 66 |     ENCODING = 'UTF8'
 67 |     CONNECTION LIMIT = -1
 68 |     IS_TEMPLATE = False;
 69 | ```
 70 | 
 71 | and add the initial tables with some data after exiting psql:
 72 | 
 73 | Run on the console:
 74 | 
 75 | ```
 76 | sudo -u postgres psql data_wellness_companion
 77 | ```
 78 | 
 79 | And run the script sql/db_setup.sql
 80 | 
 81 | 
 82 | ### Knowledge base 
 83 | 
 84 | The knowledge base is based on a directory (see environment variable `RAW_TEXT_FOLDER`). The folder needs to have *.txt documents in it.
 85 | 
 86 | The server fails if there are no documents in this folder.
 87 | 
 88 | ## Configuration
 89 | 
 90 | You will have to have a `.env` file. To get you started you can copy the `.env.local` to `.env` and then change the configuration parameters accordingly.
 91 | 
 92 | You will need to have an OpenAI API key and a Gemini key.
 93 | 
 94 | ### Running the main application
 95 | 
 96 | Make sure the Postgres DB is available.
 97 | 
 98 | ```bash
 99 | python ./data_questionnaire_agent/server/questionnaire_server_main.py
100 | ```
101 | 
102 | You can then check the UI on http://localhost:8085/index.html
103 | 
104 | On Windows you can use the `.\start.ps1` script to start the server.
105 | 
106 | ## Running Tests
107 | 
108 | You can run tests like this:
109 | 
110 | ```bash
111 | pytest
112 | ```


--------------------------------------------------------------------------------
/build_docker.ps1:
--------------------------------------------------------------------------------
1 | 
2 | docker build -t onepoint-data-wellness-companion .


--------------------------------------------------------------------------------
/chainlit.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/chainlit.md


--------------------------------------------------------------------------------
/data/questionnaire.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/data/questionnaire.pkl


--------------------------------------------------------------------------------
/data/questionnaire_all.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/data/questionnaire_all.pkl


--------------------------------------------------------------------------------
/data/sample_questionnaire1.md:
--------------------------------------------------------------------------------
 1 | # Questionnaire
 2 | 
 3 | Q: Which area of your data ecosystem are you most concerned about?
 4 | A: Data silos - Data trapped in departmental silos can be inaccessible to other parts.
 5 | 
 6 | 
 7 | Q: What specific challenges are you facing with data silos in your organisation?
 8 | A: Our departments use different systems that don't communicate well, leading to data being isolated.
 9 | 
10 | 
11 | Q: What are the main reasons for the lack of communication between the systems used by different departments in your organisation?
12 | A: The systems are built on different technologies that are not compatible with each other.
13 | 
14 | 
15 | Q: How do you currently manage data integration across the different systems within your organisation?
16 | A: We use manual processes to transfer data between systems, which is time-consuming and prone to errors.
17 | 
18 | 
19 | Q: What are the main consequences of using manual processes for data integration in your organisation?
20 | A: Manual processes are time-consuming and can lead to delays in accessing necessary data.
21 | Manual data integration is prone to human errors, which can result in data inaccuracies.
22 | 
23 | 
24 | Q: What are the most significant challenges you face in ensuring data quality during the manual data integration process?
25 | A: The manual nature of the process introduces frequent human errors, leading to inaccuracies.
26 | It is difficult to maintain data consistency across different systems during manual integration.
27 | 
28 | 
29 | Q: What are the main obstacles your organisation faces when trying to automate data integration processes across different systems?
30 | A: The lack of skilled personnel to design and implement automated processes.
31 | Budget constraints that limit the ability to invest in automation tools.
32 | 
33 | 
34 | Q: What are the key pain points your organisation faces due to data being trapped in departmental silos?
35 | A: Data silos lead to inefficiencies as teams cannot access the data they need promptly, affecting decision-making.
36 | The isolated data results in duplicated efforts across departments, wasting resources.
37 | 
38 | 
39 | Q: What are the specific types of data or systems most affected by the lack of integration and communication in your organisation?
40 | A: Customer data is most affected because it is spread across multiple systems, leading to inconsistencies.
41 | Financial data is often delayed in reporting due to integration issues, impacting decision-making.
42 | 
43 | 
44 | # Recommendations
45 | 
46 | - Consider implementing a **DataOps** strategy to automate data integration processes. This approach can help streamline workflows and reduce the manual errors currently affecting your data quality. By adopting DataOps, your organisation can enhance collaboration between data engineers, data scientists, and business teams, leading to more efficient data handling and integration.
47 | - Invest in **data integration tools** that support interoperability between different systems. These tools can bridge the technological gaps that currently exist between departmental systems, facilitating smoother data flow and reducing data silos.
48 | - Develop a **data governance framework** that includes policies and procedures for data sharing across departments. This framework will ensure that data is managed consistently and securely, reducing the risk of errors and enhancing data quality.
49 | - Implement **data quality training** for employees involved in data handling. Training can equip them with the necessary skills to manage data responsibly, thereby reducing the likelihood of errors during manual integration processes.
50 | - Explore the use of **self-service data platforms** to empower departments to access and manage their data independently. Such platforms can reduce the dependency on IT for data integration tasks, improving efficiency and accessibility.
51 | 
52 | # What to avoid
53 | 
54 | - Avoid continuing to rely solely on manual data integration processes, as this approach is prone to errors and inefficiencies.
55 | - Do not ignore the importance of data consistency across systems, as inconsistencies can lead to inaccurate reporting and decision-making.
56 | - Avoid neglecting the need for skilled personnel in data integration and governance roles. Investing in training and hiring can significantly improve your data management capabilities.
57 | 
58 | # Positive outcomes
59 | 
60 | - Improved data accuracy and consistency, leading to more reliable decision-making processes.
61 | - Increased efficiency in data management and integration, reducing time delays and resource wastage.
62 | - Enhanced collaboration and data sharing across departments, breaking down silos and fostering innovation.
63 | 


--------------------------------------------------------------------------------
/data/sample_questionnaire2.md:
--------------------------------------------------------------------------------
 1 | # Questionnaire
 2 | 
 3 | Q: Which area of your data ecosystem are you most concerned about?
 4 | A: Cost and complexity - A robust data analytics infrastructure requires significant investment of resources.
 5 | 
 6 | 
 7 | Q: What specific challenges are you facing in managing the cost and complexity of your data analytics infrastructure?
 8 | A: We struggle with the high costs of data storage and processing.
 9 | 
10 | 
11 | Q: What strategies have you explored or implemented to manage the high costs associated with data storage and processing in your analytics infrastructure?
12 | A: We are considering data compression techniques to manage storage expenses.
13 | We have not yet implemented any specific strategies, as we are still in the research phase.
14 | 
15 | 
16 | Q: What types of data are you primarily dealing with in your analytics infrastructure, and how does this impact your storage and processing costs?
17 | A: Our infrastructure handles a lot of unstructured data, leading to increased storage costs due to its volume and complexity.
18 | 
19 | 
20 | Q: How do you prioritise your data governance initiatives within the organisation, especially when dealing with large volumes of unstructured data?
21 | A: We focus on data quality and security as top priorities to ensure compliance and reliability.
22 | 
23 | 
24 | Q: What methods or technologies are you currently using to handle and process unstructured data, and how effective have they been in managing costs and complexity?
25 | A: We have implemented data lakes to store unstructured data, but managing them has proven complex.
26 | 
27 | 
28 | # Recommendations
29 | 
30 | - **Implement data compression techniques** to reduce storage expenses. Given the high costs associated with storing unstructured data, employing data compression can significantly decrease the amount of space required, thereby lowering costs.
31 | - **Leverage a Modern Data Strategy** to handle unstructured data more effectively. This strategy can help consolidate unstructured data on a data lake, making it easier to manage and potentially reducing costs.
32 | - **Utilise data lakehouse architecture** to simplify data management and reduce complexity. By integrating the benefits of data lakes and data warehouses, a data lakehouse can provide a more streamlined approach to managing data, which can help in handling large volumes of unstructured data.
33 | - **Adopt DataOps practices** to enhance data processing efficiency. DataOps can streamline workflows and improve the speed and quality of data analytics, helping to manage the complexity and costs of your data infrastructure.
34 | - **Invest in active metadata management** to improve data governance. This can help in maintaining data quality and security, particularly for unstructured data, ensuring compliance and reliability.
35 | 
36 | # What to avoid
37 | 
38 | - Avoid neglecting the importance of metadata management, as it is crucial for maintaining data quality and governance.
39 | - Do not overlook the potential of a Modern Data Strategy to manage unstructured data effectively, as this can lead to inefficiencies and increased costs.
40 | - Avoid relying solely on traditional data warehouses for unstructured data, as they may not be cost-effective or efficient for managing such data types.
41 | 
42 | # Positive outcomes
43 | 
44 | - Reduced storage costs through effective data compression and management strategies.
45 | - Enhanced data governance and quality, leading to improved compliance and reliability.
46 | - Streamlined data management processes that reduce complexity and improve efficiency.
47 | 


--------------------------------------------------------------------------------
/data/sample_questionnaire3.md:
--------------------------------------------------------------------------------
 1 | # Questionnaire
 2 | 
 3 | Q: Which area of your data ecosystem are you most concerned about?
 4 | A: Lack of skilled personnel - Missing skills in data science, analytics, AI and ML can impede the effective use of data.
 5 | 
 6 | 
 7 | Q: How do you currently address the skill gaps in data science, analytics, AI, and ML within your organisation?
 8 | A: We do both:
 9 | We provide internal training programmes and workshops to upskill our existing staff.
10 | We hire external consultants or temporary staff to fill the gaps in expertise.
11 | 
12 | 
13 | Q: What specific data quality issues are you currently facing that impact your organisation's decision-making process?
14 | A: We frequently encounter incomplete or outdated data, which affects the accuracy of our reports.
15 | 
16 | 
17 | Q: How does your organisation currently ensure data integrity across different systems and during various stages of the data lifecycle?
18 | A: We do not have a formal process in place, which sometimes leads to data integrity issues.
19 | We perform manual data checks whenever there is a need.
20 | 
21 | 
22 | Q: What steps has your organisation taken to address the issue of incomplete or outdated data?
23 | A: We rely on periodic manual reviews to identify and correct data issues.
24 | 
25 | 
26 | Q: What are the main challenges you face in maintaining accurate and up-to-date documentation about your data sources and processes?
27 | A: We struggle with keeping documentation current due to rapid changes in our data systems.
28 | 
29 | 
30 | Q: How does your organisation currently handle data governance, particularly in terms of establishing roles, responsibilities, and processes for data management?
31 | A: We do not right now because we are planning to create a dedicated data governance team. Here we need to find candidates for the job.
32 | 
33 | 
34 | Q: How do you plan to address the challenges of keeping your data documentation current amidst rapid changes in your data systems?
35 | A: We plan to assign dedicated personnel to regularly update documentation as part of their responsibilities.
36 | 
37 | 
38 | # Recommendations
39 | 
40 | - **Invest in Data Quality Training**: As your organisation is facing issues with incomplete or outdated data, it's vital to provide comprehensive data quality training to your staff. This will equip them with the necessary skills to identify and rectify data quality issues proactively, reducing the reliance on manual reviews.
41 | - **Establish Formal Data Governance Policies**: Since you are planning to create a dedicated data governance team, it is crucial to establish clear data governance policies. These should define roles, responsibilities, and processes for data management, ensuring a consistent approach to data governance across the organisation.
42 | - **Implement Automated Data Integrity Checks**: To address the lack of formal processes for ensuring data integrity, consider implementing automated data integrity checks. This will help maintain data consistency and reliability across different systems and stages of the data lifecycle, reducing the dependency on manual checks.
43 | - **Utilise Data Cleansing Tools**: Given your challenges with incomplete or outdated data, employing data cleansing tools can be beneficial. These tools can automate the process of identifying and correcting data issues, ensuring that the data remains accurate and up-to-date.
44 | - **Regularly Update Data Documentation**: Assign dedicated personnel to maintain and update data documentation regularly. This will help keep your documentation current amidst rapid changes, ensuring that all stakeholders have access to accurate and up-to-date information about data sources and processes.
45 | 
46 | # What to avoid
47 | 
48 | - **Avoid Relying Solely on Manual Checks**: Manual checks for data integrity and quality are time-consuming and prone to human error. Relying solely on them can lead to inconsistencies and inaccuracies.
49 | - **Do Not Delay the Formation of a Data Governance Team**: Procrastinating on establishing a data governance team will only prolong the issues related to data management and governance. Swift action is necessary to address these challenges effectively.
50 | - **Avoid Neglecting Data Documentation**: Inadequate or outdated documentation can lead to misunderstandings and errors. Ensure that documentation is not neglected and is regularly updated to reflect any changes in data systems.
51 | 
52 | # Positive outcomes
53 | 
54 | - **Improved Data Quality and Accuracy**: By investing in training and automated tools, the quality and accuracy of your data will improve, leading to more reliable decision-making.
55 | - **Enhanced Data Governance**: Establishing a dedicated team and clear policies will streamline data management processes, ensuring better governance and compliance.
56 | - **Increased Operational Efficiency**: Automated processes for data integrity checks and cleansing will reduce the time and effort required for manual interventions, enhancing overall operational efficiency.
57 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/data_questionnaire_agent/__init__.py


--------------------------------------------------------------------------------
/data_questionnaire_agent/bootstrap/__init__.py:
--------------------------------------------------------------------------------
1 | from data_questionnaire_agent.config import db_cfg
2 | 
3 | __all__ = [db_cfg]
4 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/bootstrap/consultant_loader.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | import click
 4 | from consultant_info_generator.consultant_info_tools import extract_consultant
 5 | 
 6 | from data_questionnaire_agent.bootstrap import db_cfg
 7 | from data_questionnaire_agent.service.persistence_service_consultants_async import (
 8 |     save_consultant,
 9 | )
10 | 
11 | 
12 | @click.command()
13 | @click.option(
14 |     "--linkedin_id", "-lid", help="The linked in ID", multiple=True, required=True
15 | )
16 | def save_consultants(linkedin_id: list[str]):
17 |     for id in linkedin_id:
18 |         try:
19 |             consultant = extract_consultant(id)
20 |             click.echo(f"Processed {id}")
21 |             asyncio.run(save_consultant(consultant))
22 |         except Exception as e:
23 |             click.echo(f"Failed to retrieve {id}", err=True)
24 |             click.echo(f"Error message {e}", err=True)
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     print(db_cfg.db_conn_str)
29 |     save_consultants()
30 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/bootstrap/sql_script.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import sys
 3 | from pathlib import Path
 4 | 
 5 | from data_questionnaire_agent.config import db_cfg
 6 | from data_questionnaire_agent.log_init import logger
 7 | from data_questionnaire_agent.service.db_bootstrap_service import execute_script
 8 | 
 9 | if __name__ == "__main__":
10 |     if len(sys.argv) < 2:
11 |         logger.error("Please enter one or more scripts as arguments.")
12 |     if db_cfg.db_create:
13 |         for script in sys.argv[1:]:
14 |             logger.info(f"Processing {script}")
15 |             script_path = Path(script)
16 |             assert script_path.exists(), f"Cannot find {script}."
17 |             res = asyncio.run(execute_script(script_path))
18 |             if isinstance(res, bool):
19 |                 if res:
20 |                     logger.info(f"Script {script} executed successfully.")
21 |                 else:
22 |                     logger.error(f"Script {script} failed.")
23 |             else:
24 |                 logger.error(f"An error has occurred: {res}")
25 |     else:
26 |         logger.info("DB creation turned off.")
27 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | 
 4 | 
 5 | def ui():
 6 |     os.chdir("./data-wellness-companion-ui")
 7 |     os.system("yarn")
 8 |     if os.path.exists("./dist"):
 9 |         shutil.rmtree("./dist")
10 |     os.system("yarn run build")
11 |     if os.path.exists("../ui"):
12 |         shutil.rmtree("../ui")
13 |     shutil.copytree("./dist", "../ui", dirs_exist_ok=True)
14 | 
15 | 
16 | def check():
17 |     os.system("black .")
18 |     os.system("ruff check --fix .")
19 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/config_support.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def create_db_conn_str() -> str:
 5 |     db_name = os.getenv("DB_NAME")
 6 |     assert db_name is not None
 7 |     db_user = os.getenv("DB_USER")
 8 |     assert db_user is not None
 9 |     db_host = os.getenv("DB_HOST")
10 |     assert db_host is not None
11 |     db_port = os.getenv("DB_PORT")
12 |     assert db_port is not None
13 |     db_port = int(db_port)
14 |     db_password = os.getenv("DB_PASSWORD")
15 |     assert db_password is not None
16 | 
17 |     return f"dbname={db_name} user={db_user} password={db_password} host={db_host} port={db_port}"
18 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/log_init.py:
--------------------------------------------------------------------------------
1 | import logging
2 | 
3 | logging.basicConfig(level="INFO")
4 | 
5 | logger = logging.getLogger("data_integration_agent")
6 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/model/application_schema.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import List
 3 | 
 4 | from data_questionnaire_agent.model.openai_schema import ResponseQuestions
 5 | 
 6 | 
 7 | @dataclass
 8 | class QuestionAnswer:
 9 |     id: int | None
10 |     question: str
11 |     answer: str | dict
12 |     clarification: str | None
13 |     possible_answers: List[str] = field(default_factory=list)
14 | 
15 |     def answer_str(self):
16 |         if not self.answer:
17 |             return ""
18 |         elif isinstance(self.answer, str):
19 |             return self.answer
20 |         else:
21 |             return self.answer["content"]
22 | 
23 |     def __repr__(self) -> str:
24 |         return f"""{self.question}
25 | {self.answer_str()}"""
26 | 
27 |     @staticmethod
28 |     def question_answer_factory(question: str, answer: dict):
29 |         return QuestionAnswer(
30 |             id=None, question=question, answer=answer, clarification=""
31 |         )
32 | 
33 |     @staticmethod
34 |     def question_factory(question: str):
35 |         return QuestionAnswer(id=None, question=question, answer="", clarification="")
36 | 
37 | 
38 | @dataclass
39 | class Questionnaire:
40 |     questions: List[QuestionAnswer]
41 | 
42 |     def __repr__(self) -> str:
43 |         return "\n\n".join([str(qa) for qa in self.questions])
44 | 
45 |     def __len__(self):
46 |         return len(self.questions)
47 | 
48 |     def answers_str(self) -> str:
49 |         return "\n\n".join(
50 |             [
51 |                 (
52 |                     qa.answer["content"] or ""
53 |                     if isinstance(qa.answer, dict)
54 |                     else qa.answer or ""
55 |                 )
56 |                 for qa in self.questions
57 |             ]
58 |         )
59 | 
60 |     def to_html(self) -> str:
61 |         html = """<table>       
62 | """
63 |         for qa in self.questions:
64 |             answer = qa.answer
65 |             html += f"""
66 | <tr>
67 |     <td class="onepoint-blue">
68 |         <br />
69 |         Q: {qa.question}
70 |     </td>
71 | </tr>
72 | <tr>
73 |     <td class="onepoint-answer">A: {answer}</td>
74 | </tr>
75 | """
76 |         html += "</table>"
77 |         return html
78 | 
79 | 
80 | def convert_to_question_answers(
81 |     response_questions: ResponseQuestions,
82 | ) -> List[QuestionAnswer]:
83 |     question_answers = []
84 |     for i, q in enumerate(response_questions.questions):
85 |         question_answer = QuestionAnswer.question_factory(q)
86 |         question_answers.append(question_answer)
87 |         if i == 0:
88 |             question_answer.possible_answers = response_questions.possible_answers
89 |     return question_answers
90 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/model/confidence_schema.py:
--------------------------------------------------------------------------------
 1 | from enum import StrEnum
 2 | from typing import Union
 3 | 
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from data_questionnaire_agent.translation import t
 7 | 
 8 | 
 9 | class ConfidenceDegree(StrEnum):
10 |     outstanding = "outstanding"
11 |     high = "high"
12 |     medium = "medium"
13 |     mediocre = "mediocre"
14 |     low = "low"
15 | 
16 | 
17 | CONFIDENCE_DEGREE_DICT = {
18 |     ConfidenceDegree.outstanding: 5,
19 |     ConfidenceDegree.high: 4,
20 |     ConfidenceDegree.medium: 3,
21 |     ConfidenceDegree.mediocre: 2,
22 |     ConfidenceDegree.low: 1,
23 | }
24 | 
25 | 
26 | class ConfidenceRating(BaseModel):
27 |     """Represents a rating of how confident the model is to give advice to a customer based on a questionnaire"""
28 | 
29 |     id: Union[int, None] = Field(
30 |         default=None, description="The identifier of this session configuration"
31 |     )
32 | 
33 |     reasoning: str = Field(
34 |         ..., description="The models's reasoning behind the confidence rating."
35 |     )
36 | 
37 |     rating: ConfidenceDegree = Field(
38 |         ...,
39 |         description="The confidence rating of the model to give advice to a customer based on a questionnaire",
40 |     )
41 | 
42 |     def _value(self) -> int:
43 |         return CONFIDENCE_DEGREE_DICT[self.rating]
44 | 
45 |     def __lt__(self, other):
46 |         if isinstance(other, ConfidenceRating):
47 |             return self._value() < other._value()
48 |         return NotImplemented
49 | 
50 |     def __le__(self, other):
51 |         if isinstance(other, ConfidenceRating):
52 |             return self._value() <= other._value()
53 |         return NotImplemented
54 | 
55 |     def __gt__(self, other):
56 |         if isinstance(other, ConfidenceRating):
57 |             return self._value() > other._value()
58 |         return NotImplemented
59 | 
60 |     def __ge__(self, other):
61 |         if isinstance(other, ConfidenceRating):
62 |             return self._value() >= other._value()
63 |         return NotImplemented
64 | 
65 |     def to_markdown(self, locale: str = "en") -> str:
66 |         return f"""
67 | # {t("Confidence Degree", locale=locale)}
68 | 
69 | `{t("confidence_degree_" + self.rating, locale=locale)}`
70 | 
71 | ## {t("Reasoning", locale=locale)}
72 | 
73 | {self.reasoning}
74 | """
75 | 
76 |     def to_html(self, language: str = "en") -> str:
77 |         return f"""
78 | <div style="text-align: center"><b>{t("confidence_degree_" + self.rating)}</b></div>
79 | 
80 | <h2>{t("Reasoning", locale=language)}</h2>
81 | 
82 | <p>{self.reasoning}</p>
83 | """
84 | 
85 |     
86 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/model/consultant_rating.py:
--------------------------------------------------------------------------------
 1 | from enum import StrEnum
 2 | from typing import Optional
 3 | 
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | 
 7 | class AnalystRating(StrEnum):
 8 |     very_suitable = "very suitable"
 9 |     suitable = "suitable"
10 |     moderately_suitable = "moderately suitable"
11 |     hardly_suitable = "hardly suitable"
12 |     unsuitable = "unsuitable"
13 | 
14 | 
15 | SCORES = {
16 |     AnalystRating.very_suitable: 5,
17 |     AnalystRating.suitable: 4,
18 |     AnalystRating.moderately_suitable: 3,
19 |     AnalystRating.hardly_suitable: 2,
20 |     AnalystRating.unsuitable: 1,
21 | }
22 | 
23 | 
24 | class ConsultantRating(BaseModel):
25 |     """Represents the degree to which a consultant with a specific profile can help a customer that answered a questionnaire."""
26 | 
27 |     analyst_name: str = Field(..., description="The analyst's name.")
28 | 
29 |     analyst_linkedin_url: Optional[str] = Field(
30 |         ..., description="The optional analyst LinkedIN URL"
31 |     )
32 | 
33 |     reasoning: str = Field(
34 |         ...,
35 |         description="""The models's reasoning behind the analyst rating. 
36 |         Why is this analyst suitable or not to support the client which answered the questionnnaire.
37 |         You can use markdown to highlight the main keywords in the reasoning.
38 | """,
39 |     )
40 | 
41 |     rating: AnalystRating = Field(
42 |         ...,
43 |         description="The rating of the consulting capabilities of the consultant to help the customer",
44 |     )
45 | 
46 | 
47 | class ScoredConsultantRating(ConsultantRating):
48 |     """Rrepresents a scored consultant rating."""
49 | 
50 |     score: int = Field(
51 |         ...,
52 |         description="The score applied to this consultant for the current customer.",
53 |     )
54 | 
55 | 
56 | class ConsultantRatings(BaseModel):
57 |     consultant_ratings: list[ConsultantRating] = Field(
58 |         ..., description="The list of consultant ratings"
59 |     )
60 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/model/global_configuration.py:
--------------------------------------------------------------------------------
 1 | from pydantic.v1 import BaseModel, Field
 2 | 
 3 | from data_questionnaire_agent.log_init import logger
 4 | 
 5 | 
 6 | class GlobalConfigurationProperty(BaseModel):
 7 |     config_key: str = Field(..., description="The configuration key")
 8 |     config_value: str = Field(..., description="The configuration value")
 9 | 
10 | 
11 | class GlobalConfiguration(BaseModel):
12 |     properties: list[GlobalConfigurationProperty] = Field(
13 |         ..., description="The list of properties in the configuration"
14 |     )
15 | 
16 |     def get_default_session_steps(self, default_steps: int):
17 |         for p in self.properties:
18 |             if p.config_key == "MESSAGE_LOWER_LIMIT":
19 |                 try:
20 |                     return int(p.config_value)
21 |                 except ValueError as e:
22 |                     logger.warn(
23 |                         f"Could not extract default steps from configuration: {e}"
24 |                     )
25 |                     return default_steps
26 |         return default_steps
27 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/model/initial_question_data.py:
--------------------------------------------------------------------------------
 1 | special_question_data = [
 2 |     {
 3 |         "img_src": "poor_data_quality.png",
 4 |         "img_alt": "Poor data quality",
 5 |         "title": "Poor data quality",
 6 |         "text": "Low-quality data can lead to incorrect insights and poor decision-making.",
 7 |     },
 8 |     {
 9 |         "img_src": "compliance_risks.png",
10 |         "img_alt": "Compliance and security risks",
11 |         "title": "Compliance and security risks",
12 |         "text": "Mishandling data can lead to legal troubles and reputational damage.",
13 |     },
14 |     {
15 |         "img_src": "data_silos.png",
16 |         "img_alt": "Data silos",
17 |         "title": "Data silos",
18 |         "text": "Data trapped in departmental silos can be inaccessible to other parts.",
19 |     },
20 |     {
21 |         "img_src": "lack_of_skilled_personnel.png",
22 |         "img_alt": "Lack of skilled personnel",
23 |         "title": "Lack of skilled personnel",
24 |         "text": "Missing skills in data science, analytics, AI and ML can impede the effective use of data.",
25 |     },
26 |     {
27 |         "img_src": "data_overload.png",
28 |         "img_alt": "Data overload",
29 |         "title": "Data overload",
30 |         "text": '"Data glut" can slow down processes and make it difficult to identify what data is actually useful.',
31 |     },
32 |     {
33 |         "img_src": "cost_and_complexity.png",
34 |         "img_alt": "Cost and complexity",
35 |         "title": "Cost and complexity",
36 |         "text": "A robust data analytics infrastructure requires significant investment of resources.",
37 |     },
38 |     {
39 |         "img_src": "inconsistent_data_strategies.png",
40 |         "img_alt": "Inconsistent data strategies",
41 |         "title": "Inconsistent data strategies",
42 |         "text": "Difficult to align with modern concepts like Data Fabric, Mesh and Generative AI.",
43 |     },
44 |     {
45 |         "img_src": "resistence_to_change.png",
46 |         "img_alt": "Resistance to change",
47 |         "title": "Resistance to change",
48 |         "text": "Employees need to adapt to new ways of operating to make data-driven transformation work.",
49 |     },
50 | ]
51 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/model/jwt_token.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic.v1 import BaseModel, Field
 4 | 
 5 | 
 6 | class JWTToken(BaseModel):
 7 |     """JWT Token Data"""
 8 | 
 9 |     email: str = Field(..., description="The email")
10 |     token: str = Field(..., description="The whole token")
11 | 
12 | 
13 | class JWTTokenData(BaseModel):
14 |     name: str = Field(..., description="The subject on the token")
15 |     email: str = Field(..., description="The email")
16 |     time_delta_minutes: Optional[int] = Field(
17 |         ..., description="Determines the expiry date of the token"
18 |     )
19 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/model/languages.py:
--------------------------------------------------------------------------------
1 | ENGLISH = "en"
2 | FARSI = "fa"
3 | 
4 | DEFAULT_LANGUAGE = ENGLISH
5 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/model/mail_data.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class MailData(BaseModel):
 7 |     person_name: str = Field(..., description="The name of the person")
 8 |     email: str = Field(..., description="The actual name of the person")
 9 | 
10 | 
11 | class Email(BaseModel):
12 |     recipient: str = Field(..., description="The recipient email")
13 |     subject: str = Field(..., description="The email subject")
14 |     html_body: str = Field(..., description="The html email body")
15 |     files: list[Path] = Field(..., description="The attachment files")
16 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/model/ontology_schema.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List
 2 | 
 3 | from pydantic.v1 import BaseModel, Field
 4 | 
 5 | 
 6 | class Edge(BaseModel):
 7 |     """Represents single triplet with source, target and relationship names"""
 8 | 
 9 |     source: str = Field(..., description="The source node")
10 |     relationship: str = Field(
11 |         ..., description="The relationship between the source and the target node"
12 |     )
13 |     target: str = Field(..., description="The target node")
14 | 
15 | 
16 | class Ontology(BaseModel):
17 |     """Represents a container with a list of source, target and relationship names"""
18 | 
19 |     relationships: List[Edge] = Field(
20 |         ...,
21 |         description="The list of all edges in the questionnaire",
22 |     )
23 | 
24 | 
25 | class AnalyzedOntology(BaseModel):
26 |     """Represents a container with a list of source, target and relationship names with extended measurements"""
27 | 
28 |     relationships: List[Edge] = Field(
29 |         ...,
30 |         description="The list of all edges with extended information in the questionnaire",
31 |     )
32 |     degree_centrality: Dict[str, float] = Field(
33 |         ...,
34 |         description="The dictionary of node to degree centrality",
35 |     )
36 |     betweenness_centrality: Dict[str, float] = Field(
37 |         ...,
38 |         description="The dictionary of node to degree betweenness",
39 |     )
40 |     connected_component_importance_dict: Dict[str, int] = Field(
41 |         ...,
42 |         description="Dictionary containing the number of nodes in the subgraph of the node which is the key",
43 |     )
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     print(Ontology.schema_json())
48 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/model/question_suggestion.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from pydantic.v1 import BaseModel, Field
 4 | 
 5 | 
 6 | class QuestionSuggestion(BaseModel):
 7 |     id: Union[int, None] = Field(default=None, description="The unique identifier")
 8 |     img_src: str = Field(..., description="The image associated to the suggestion")
 9 |     img_alt: str = Field(..., description="The alternative description of the image")
10 |     title: str = Field(..., description="The suggestion title")
11 |     main_text: str = Field(..., description="The suggestion text")
12 |     svg_image: str = Field(..., description="The SVG image")
13 | 
14 | 
15 | class QuestionAndSuggestions(BaseModel):
16 |     id: Union[int, None] = Field(default=None, description="The unique identifier")
17 |     question: str = Field(default=None, description="The actual question")
18 |     suggestions: list[QuestionSuggestion] = Field(
19 |         ..., description="The list of suggested question suggestions"
20 |     )
21 | 
22 | 
23 | class QuestionInfo(BaseModel):
24 |     question_and_suggestions: list[QuestionAndSuggestions] = Field(
25 |         ..., description="Theh list of question and suggestions"
26 |     )
27 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/model/questionnaire_status.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import Union
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class QuestionnaireStatus(BaseModel):
 8 |     id: Union[int, None] = Field(default=None, description="The unique identifier")
 9 |     session_id: str = Field(..., description="The session identifier")
10 |     question: str = Field(..., description="The question")
11 |     answer: Union[str, None] = Field(default="", description="The optional answer")
12 |     final_report: bool = Field(..., description="Whether this is the final report")
13 |     created_at: Union[datetime, None] = Field(
14 |         default=None, description="The created timestamp"
15 |     )
16 |     updated_at: Union[datetime, None] = Field(
17 |         default=None, description="The updated timestamp"
18 |     )
19 |     total_cost: float = Field(default=0, description="")
20 |     clarification: Union[str, None] = Field(
21 |         default=None, description="The clarification or explanation of the question"
22 |     )
23 |     question_id: int | None = Field(
24 |         default=None,
25 |         description="The question identifier in case a question is NOT generated",
26 |     )
27 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/model/server_model.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, List, Union
 2 | 
 3 | from pydantic.v1 import BaseModel, Field
 4 | 
 5 | from data_questionnaire_agent.model.global_configuration import GlobalConfiguration
 6 | from data_questionnaire_agent.model.questionnaire_status import QuestionnaireStatus
 7 | from data_questionnaire_agent.model.session_configuration import SessionConfiguration
 8 | 
 9 | 
10 | class ServerMessage(BaseModel):
11 |     session_id: str = Field(..., description="The application's source identifier")
12 |     question: str = Field(..., description="The question")
13 |     answer: str = Field(
14 |         default="", description="The answer to the question. Should come from the user"
15 |     )
16 |     final_report: bool = Field(
17 |         default=False,
18 |         description="Whether the question is to be seen as a final report",
19 |     )
20 |     suggestions: List[Any] = Field(
21 |         default=[], description="The list of suggested responses"
22 |     )
23 |     clarification: Union[str, None] = Field(
24 |         ..., description="The clarification or explanation of the question"
25 |     )
26 |     question_id: int | None = Field(
27 |         default=None,
28 |         description="The question identifier in case a question is NOT generated",
29 |     )
30 | 
31 | 
32 | class ServerMessages(BaseModel):
33 |     session_id: str = Field(..., description="The application's source identifier")
34 |     server_messages: List[ServerMessage] = Field(
35 |         ..., description="A list with server messages"
36 |     )
37 |     session_configuration: Union[SessionConfiguration, None] = Field(
38 |         default=None, description="The session configuration"
39 |     )
40 |     global_configuration: Union[GlobalConfiguration, None] = Field(
41 |         default=None, description="The global configuration with key value pairs"
42 |     )
43 | 
44 | 
45 | class ErrorMessage(BaseModel):
46 |     session_id: str = Field(..., description="The application's source identifier")
47 |     error: str = Field(..., description="The error message")
48 | 
49 | 
50 | def assert_server_messages_factory(questionnaire: List[QuestionnaireStatus]):
51 |     assert questionnaire is not None, "The questionnaire is none"
52 |     assert len(questionnaire) > 0, "Questionnaire is empty"
53 | 
54 | 
55 | def convert_questionnaire(
56 |     questionnaire: List[QuestionnaireStatus],
57 | ) -> List[ServerMessage]:
58 |     return [
59 |         ServerMessage(
60 |             session_id=q.session_id,
61 |             question=q.question,
62 |             answer="" if q.answer is None else q.answer,
63 |             final_report=q.final_report,
64 |             clarification=q.clarification,
65 |             question_id=q.question_id,
66 |         )
67 |         for q in questionnaire
68 |     ]
69 | 
70 | 
71 | def server_messages_factory(questionnaire: List[QuestionnaireStatus]) -> ServerMessages:
72 |     assert_server_messages_factory(questionnaire)
73 |     session_id = questionnaire[0].session_id
74 |     return ServerMessages(
75 |         session_id=session_id,
76 |         server_messages=convert_questionnaire(questionnaire),
77 |     )
78 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/model/session_configuration.py:
--------------------------------------------------------------------------------
 1 | from enum import StrEnum
 2 | from typing import List, Union
 3 | 
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | DEFAULT_SESSION_STEPS = 6
 7 | 
 8 | SESSION_STEPS_CONFIG_KEY = "session-steps"
 9 | SESSION_STEPS_LANGUAGE_KEY = "session-language"
10 | SESSION_CHAT_TYPE = "session-chat-type"
11 | CLIENT_ID_KEY = "session-client-id"
12 | 
13 | 
14 | class ChatType(StrEnum):
15 |     DIVERGING = "diverging"
16 |     TO_THE_POINT = "to_the_point"
17 | 
18 | 
19 | DEFAULT_CHAT_TYPE = ChatType.DIVERGING
20 | 
21 | 
22 | def chat_type_factory(s: str) -> ChatType:
23 |     try:
24 |         return ChatType(s)
25 |     except ValueError:
26 |         return DEFAULT_CHAT_TYPE
27 | 
28 | 
29 | class SessionConfigurationEntry(BaseModel):
30 |     id: Union[int, None] = Field(
31 |         default=None, description="The identifier of this session configuration"
32 |     )
33 |     session_id: str = Field(..., description="The application's source identifier")
34 |     config_key: str = Field(..., description="The configuration key")
35 |     config_value: str = Field(..., description="The configuration value")
36 | 
37 | 
38 | class SessionConfiguration(BaseModel):
39 |     configuration_entries: List[SessionConfigurationEntry] = Field(
40 |         ..., description="All session configuration entries"
41 |     )
42 | 
43 | 
44 | class SessionProperties(BaseModel):
45 |     session_steps: int = Field(..., description="The session steps")
46 |     session_language: str = Field(..., description="The session language")
47 |     chat_type: ChatType = Field(..., description="The type of the chat")
48 | 
49 | 
50 | def create_session_configurations(
51 |     session_id: str,
52 |     session_properties: SessionProperties,
53 |     client_id: str = "",
54 | ) -> List[SessionConfigurationEntry]:
55 |     session_steps = session_properties.session_steps
56 |     chat_type = session_properties.chat_type
57 |     language = session_properties.session_language
58 |     session_configuration_entry = SessionConfigurationEntry(
59 |         session_id=session_id,
60 |         config_key=SESSION_STEPS_CONFIG_KEY,
61 |         config_value=str(session_steps),
62 |     )
63 |     session_configuration_language = SessionConfigurationEntry(
64 |         session_id=session_id,
65 |         config_key=SESSION_STEPS_LANGUAGE_KEY,
66 |         config_value=language,
67 |     )
68 |     session_configuration_chat_type = SessionConfigurationEntry(
69 |         session_id=session_id,
70 |         config_key=SESSION_CHAT_TYPE,
71 |         config_value=chat_type.value,
72 |     )
73 |     session_keys = [
74 |         session_configuration_entry,
75 |         session_configuration_language,
76 |         session_configuration_chat_type,
77 |     ]
78 |     if client_id is not None and len(client_id.strip()) > 0:
79 |         session_keys.append(
80 |             SessionConfigurationEntry(
81 |                 session_id=session_id,
82 |                 config_key=CLIENT_ID_KEY,
83 |                 config_value=client_id,
84 |             )
85 |         )
86 |     return session_keys
87 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/server/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/data_questionnaire_agent/server/__init__.py


--------------------------------------------------------------------------------
/data_questionnaire_agent/server/agent_session.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Optional
 2 | 
 3 | from ulid import ULID
 4 | 
 5 | 
 6 | class AgentSession:
 7 |     def __init__(
 8 |         self,
 9 |         # Id from the session cookie
10 |         socket_id: str,
11 |         session_id: Optional[str],
12 |     ):
13 |         self.socket_id = socket_id
14 |         self.session_id = (
15 |             session_id
16 |             if session_id is not None and len(session_id) > 0
17 |             else str(ULID())
18 |         )
19 |         agent_sessions[self.session_id] = self
20 | 
21 | 
22 | agent_sessions: Dict[str, AgentSession] = {}
23 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/server/questionnaire_server_consultants.py:
--------------------------------------------------------------------------------
 1 | from aiohttp import web
 2 | 
 3 | from data_questionnaire_agent.log_init import logger
 4 | from data_questionnaire_agent.server.server_support import (
 5 |     CORS_HEADERS,
 6 |     extract_language,
 7 |     extract_session,
 8 |     routes,
 9 | )
10 | from data_questionnaire_agent.service.consultant_service import (
11 |     calculate_consultant_ratings_for,
12 | )
13 | 
14 | 
15 | @routes.options("/consultant/ratings/{session_id}")
16 | async def consultant_ratings(_: web.Request) -> web.Response:
17 |     return web.json_response({"message": "Accept all hosts"}, headers=CORS_HEADERS)
18 | 
19 | 
20 | @routes.get("/consultant/ratings/{session_id}")
21 | async def consultant_ratings(request: web.Request) -> web.Response:
22 |     session_id = extract_session(request)
23 |     language = extract_language(request)
24 |     consultant_ratings = await calculate_consultant_ratings_for(session_id, language)
25 |     if not consultant_ratings:
26 |         raise web.HTTPBadRequest(
27 |             text="Cannot find any consultant ratings for that report, because either the session does not exist or does not have a final report.",
28 |             headers=CORS_HEADERS,
29 |         )
30 |     logger.info("Generating consultant advice for: %s", session_id)
31 |     return web.json_response(consultant_ratings.dict(), headers=CORS_HEADERS)
32 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/server/questionnaire_server_main.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from aiohttp import web
 4 | 
 5 | from data_questionnaire_agent.config import web_server_cfg, websocket_cfg
 6 | from data_questionnaire_agent.server.questionnaire_server import (
 7 |     MAX_SESSION_STEPS,
 8 |     app,
 9 |     routes,
10 | )
11 | from data_questionnaire_agent.server.questionnaire_server_backend import (
12 |     routes as config_routes,
13 | )
14 | from data_questionnaire_agent.server.questionnaire_server_consultants import (
15 |     routes as consultant_routes,
16 | )
17 | 
18 | assert config_routes == routes
19 | assert consultant_routes == routes
20 | 
21 | FILE_INDEX = "index.html"
22 | PATH_INDEX = web_server_cfg.ui_folder / FILE_INDEX
23 | INDEX_LINKS = ["/", "/admin"]
24 | 
25 | 
26 | async def get_index(_: web.Request) -> web.Response:
27 |     return web.FileResponse(PATH_INDEX)
28 | 
29 | 
30 | def run_server():
31 |     for i in range(MAX_SESSION_STEPS):
32 |         app.router.add_get(f"/{i}", get_index)
33 |     for url in INDEX_LINKS:
34 |         app.router.add_get(url, get_index)
35 |     app.add_routes(routes)
36 |     app.router.add_static(
37 |         "/images", path=web_server_cfg.images_folder.as_posix(), name="images"
38 |     )
39 |     app.router.add_static("/", path=web_server_cfg.ui_folder.as_posix(), name="ui")
40 |     loop = asyncio.new_event_loop()
41 | 
42 |     web.run_app(
43 |         app,
44 |         host=websocket_cfg.websocket_server,
45 |         port=websocket_cfg.websocket_port,
46 |         loop=loop,
47 |     )
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     run_server()
52 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/server/server_support.py:
--------------------------------------------------------------------------------
 1 | from typing import Awaitable
 2 | 
 3 | from aiohttp import web
 4 | 
 5 | from data_questionnaire_agent.log_init import logger
 6 | 
 7 | CORS_HEADERS = {"Access-Control-Allow-Origin": "*", "Access-Control-Allow-Headers": "*"}
 8 | 
 9 | routes = web.RouteTableDef()
10 | 
11 | 
12 | async def handle_error(fun: Awaitable, **kwargs) -> any:
13 |     try:
14 |         return await fun(kwargs["request"])
15 |     except Exception as e:
16 |         logger.error(f"Error occurred: {e}", exc_info=True)
17 |         raise web.HTTPBadRequest(
18 |             text="Please make sure the JSON body is available and well formatted."
19 |         )
20 | 
21 | 
22 | def extract_session(request: web.Request):
23 |     session_id = request.match_info.get("session_id", None)
24 |     logger.info("PDF session_id: %s", session_id)
25 |     if session_id is None:
26 |         raise web.HTTPNotFound(text="No session id specified")
27 |     return session_id
28 | 
29 | 
30 | def extract_language(request: web.Request):
31 |     return request.rel_url.query.get("language", "en")
32 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/advice_service.py:
--------------------------------------------------------------------------------
 1 | from langchain.prompts import ChatPromptTemplate
 2 | from langchain_core.runnables.base import RunnableSequence
 3 | 
 4 | from data_questionnaire_agent.config import cfg
 5 | from data_questionnaire_agent.model.openai_schema import ConditionalAdvice
 6 | from data_questionnaire_agent.service.prompt_support import (
 7 |     prompt_factory_generic,
 8 | )
 9 | from data_questionnaire_agent.toml_support import get_prompts
10 | 
11 | 
12 | def prompt_factory_conditional_advice(language: str) -> ChatPromptTemplate:
13 |     # Assuming get_prompts() returns the required dictionary
14 |     prompts = get_prompts(language)
15 |     section = prompts["advice"]
16 |     return prompt_factory_generic(
17 |         section=section,
18 |         input_variables=["knowledge_base", "questions_answers"],
19 |         prompts=prompts,
20 |     )
21 | 
22 | 
23 | def chain_factory_advice(language: str) -> RunnableSequence:
24 |     model = cfg.llm.with_structured_output(ConditionalAdvice)
25 |     prompt = prompt_factory_conditional_advice(language)
26 |     return prompt | model
27 | 
28 | 
29 | def create_structured_question_call(language: str) -> RunnableSequence:
30 |     model = cfg.llm.with_structured_output(ConditionalAdvice)
31 |     prompt = prompt_factory_conditional_advice(language)
32 |     return prompt | model
33 | 
34 | 
35 | def prepare_conditional_advice(knowledge_base: str, questions_answers: str) -> dict:
36 |     return {"knowledge_base": knowledge_base, "questions_answers": questions_answers}
37 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/clarifications_agent.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, List, Optional, Type
 2 | 
 3 | from duckduckgo_search import DDGS
 4 | from langchain.agents import AgentExecutor, AgentType, initialize_agent
 5 | from langchain.tools import BaseTool
 6 | from pydantic.v1 import BaseModel, Field
 7 | 
 8 | from data_questionnaire_agent.config import cfg
 9 | 
10 | RESULT_LIMIT = 5
11 | 
12 | 
13 | class SearchTermsInput(BaseModel):
14 |     """Search terms used for the search"""
15 | 
16 |     search_terms: List[str] = Field(
17 |         ...,
18 |         description="Search terms used to get more information from the search engine",
19 |     )
20 | 
21 | 
22 | class DuckDuckGoTool(BaseTool):
23 |     name = "get_search_duck_duck_go"
24 |     description = "Useful to find search results on all possible topics. It produces searches for information via duck duck go search engine."
25 | 
26 |     def _run(self, search_terms: List[str]) -> Any:
27 |         """Use the tool.
28 | 
29 |         Add run_manager: Optional[CallbackManagerForToolRun] = None
30 |         to child implementations to enable tracing,
31 |         """
32 |         search_res = text_search("".join(search_terms), limit=RESULT_LIMIT)
33 |         return "\n".join([r["body"] for r in search_res])
34 | 
35 |     args_schema: Optional[Type[BaseModel]] = SearchTermsInput
36 | 
37 | 
38 | def create_clarification_agent() -> AgentExecutor:
39 |     tools = [DuckDuckGoTool()]
40 |     return initialize_agent(
41 |         tools, cfg.llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True
42 |     )
43 | 
44 | 
45 | def text_search(input: str, limit: int = 10) -> List[dict]:
46 |     from itertools import islice
47 | 
48 |     res_list = []
49 |     with DDGS() as ddgs:
50 |         for r in islice(
51 |             ddgs.text(input, region="wt-wt", safesearch="off", timelimit="y"), limit
52 |         ):
53 |             res_list.append(r)
54 |     return res_list
55 | 
56 | 
57 | def answers(input: str):
58 |     with DDGS() as ddgs:
59 |         for r in ddgs.answers(input):
60 |             print(r)
61 | 
62 | 
63 | if __name__ == "__main__":
64 | 
65 |     def test_search():
66 |         res_list = text_search("Dark data")
67 |         for res in res_list:
68 |             print(res)
69 | 
70 |     tool = DuckDuckGoTool(
71 |         name="search_duck_duck_go",
72 |         description="Used to find answers and explanations based on keywords",
73 |     )
74 |     res = tool.run({"search_terms": ["Weather in London tomorrow"]})
75 |     from data_questionnaire_agent.log_init import logger
76 | 
77 |     logger.info(res)
78 |     agent_executor = create_clarification_agent()
79 |     response = agent_executor.run("What is dark data?")
80 |     logger.info("Agent response: %s", response)
81 |     logger.info("Agent response type: %s", type(response))
82 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/confidence_service.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from langchain.prompts import ChatPromptTemplate
 4 | from langchain_core.runnables.base import RunnableSequence
 5 | 
 6 | from data_questionnaire_agent.config import cfg
 7 | from data_questionnaire_agent.log_init import logger
 8 | from data_questionnaire_agent.model.application_schema import Questionnaire
 9 | from data_questionnaire_agent.model.confidence_schema import ConfidenceRating
10 | from data_questionnaire_agent.service.ontology_service import PARAM_QUESTIONS_ANSWERS
11 | from data_questionnaire_agent.service.prompt_support import (
12 |     prompt_factory_generic,
13 | )
14 | from data_questionnaire_agent.toml_support import get_prompts
15 | 
16 | 
17 | def prompt_factory_confidence(language: str) -> ChatPromptTemplate:
18 |     # Assuming get_prompts() returns the required dictionary
19 |     prompts = get_prompts(language)
20 |     assert (
21 |         "confidence_prompt" in prompts
22 |     ), "Make sure that you have the confidence prompt in your prompts file."
23 |     section = prompts["confidence_prompt"]
24 |     return prompt_factory_generic(
25 |         section=section,
26 |         input_variables=[PARAM_QUESTIONS_ANSWERS],
27 |         prompts=prompts,
28 |     )
29 | 
30 | 
31 | def create_structured_question_call(language: str) -> RunnableSequence:
32 |     model = cfg.llm.with_structured_output(ConfidenceRating)
33 |     prompt = prompt_factory_confidence(language)
34 |     return prompt | model
35 | 
36 | 
37 | def prepare_confidence_chain_call(questionnaire: Questionnaire) -> dict:
38 |     return {PARAM_QUESTIONS_ANSWERS: str(questionnaire)}
39 | 
40 | 
41 | async def calculate_confidence_rating(
42 |     questionnaire: Questionnaire, language: str
43 | ) -> Union[ConfidenceRating, None]:
44 |     assert questionnaire is not None, "Missing questionnaire"
45 |     try:
46 |         call_params = prepare_confidence_chain_call(questionnaire)
47 |         chain = create_structured_question_call(language)
48 |         return await chain.ainvoke(call_params)
49 |     except Exception:
50 |         logger.exception("Failed to calculate confidence rating.")
51 |         return None
52 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/db_bootstrap_service.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Union
 3 | 
 4 | from psycopg import AsyncCursor
 5 | 
 6 | from data_questionnaire_agent.log_init import logger
 7 | from data_questionnaire_agent.service.query_support import create_connection
 8 | 
 9 | 
10 | async def execute_script(path: Path) -> Union[str, bool]:
11 |     assert path.exists(), f"Path {path} does not exist."
12 |     with open(path, "r") as file:
13 |         sql_script = file.read()
14 |     assert len(sql_script) > 0, "SQL script seems to be empty"
15 | 
16 |     conn = None
17 |     try:
18 |         conn = await create_connection()
19 |         await conn.set_autocommit(True)
20 |         async with conn.cursor() as cursor:
21 |             exists = await table_exists("tb_session_configuration", cursor)
22 |             if not exists or "create database" in sql_script.lower():
23 |                 # If there are no table or no questions the script is executed.
24 |                 cursor = await cursor.execute(sql_script)
25 |             else:
26 |                 return False
27 |         return True
28 |     except Exception as e:
29 |         logger.exception("Cannot execute script")
30 |         return str(e)
31 |     finally:
32 |         if conn:
33 |             await conn.close()
34 | 
35 | 
36 | async def table_exists(table: str, cursor: AsyncCursor) -> bool:
37 |     await cursor.execute(
38 |         """
39 | SELECT EXISTS (
40 |    SELECT FROM information_schema.tables 
41 |    WHERE  table_schema = 'public'
42 |    AND    table_name   = %(table)s
43 |    );
44 |             """,
45 |         {"table": table},
46 |     )
47 |     rows = await cursor.fetchone()
48 |     return rows[0]
49 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/embedding_service.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from pathlib import Path
 3 | from typing import List, TypeVar
 4 | 
 5 | import numpy as np
 6 | from langchain.schema import Document
 7 | from langchain_community.document_loaders import TextLoader
 8 | from langchain_community.vectorstores import FAISS
 9 | 
10 | from data_questionnaire_agent.config import cfg
11 | from data_questionnaire_agent.log_init import logger
12 | 
13 | VST = TypeVar("VST", bound="VectorStore")
14 | 
15 | 
16 | def load_text(path: Path) -> List[Document]:
17 |     """
18 |     Loads the text files and extracts chunks of a pre-defined size.
19 |     It simply loads whole documents without any splitting.
20 | 
21 |     Parameters:
22 |     path (Path): The path where the documents are saved.
23 | 
24 |     Returns:
25 |     List[Document]: Returns a list of documents
26 |     """
27 |     assert path.exists(), f"Path {path} does not exist"
28 |     all_pages = []
29 |     for text_file in path.glob("*.txt"):
30 |         loader = TextLoader(text_file.as_posix(), encoding="utf-8")
31 |         pages: List[Document] = loader.load()
32 |         for i, p in enumerate(pages):
33 |             file_name = re.sub(r".+[\\/]", "", p.metadata["source"])
34 |             p.metadata["source"] = f"{file_name} page {i + 1}"
35 |         all_pages.extend(pages)
36 |         logger.info(f"Processed {text_file}, all_pages size: {len(all_pages)}")
37 |     log_stats(all_pages)
38 |     return all_pages
39 | 
40 | 
41 | def log_stats(documents: List[Document]):
42 |     """
43 |     Logs statistics about a list of documents.
44 | 
45 |     Parameters:
46 |     documents (List[Document]): The list of documents with the knowledge base.
47 |     """
48 |     logger.info(f"Total number of documents {len(documents)}")
49 |     counts = []
50 |     for d in documents:
51 |         counts.append(count_words(d))
52 |     logger.info(f"Tokens Max {np.max(counts)}")
53 |     logger.info(f"Tokens Min {np.min(counts)}")
54 |     logger.info(f"Tokens Min {np.mean(counts)}")
55 | 
56 | 
57 | def count_words(document: Document) -> int:
58 |     splits = [s for s in re.split("[\s,.]", document.page_content) if len(s) > 0]
59 |     return len(splits)
60 | 
61 | 
62 | def generate_embeddings(documents: List[Document], persist_directory: str) -> VST:
63 |     """
64 |     Receives a list of documents and generates the embeddings via OpenAI API.
65 | 
66 |     Parameters:
67 |     documents (List[Document]): The document list with one page per document.
68 |     path (Path): The path where the documents are found.
69 | 
70 |     Returns:
71 |     VST: Recturs a reference to the vector store.
72 |     """
73 |     try:
74 |         docsearch = FAISS.from_documents(documents, cfg.embeddings)
75 |         docsearch.save_local(persist_directory)
76 |         logger.info("Vector database persisted")
77 |     except Exception:
78 |         logger.exception("Failed to process documents")
79 |         if "docsearch" in vars() or "docsearch" in globals():
80 |             docsearch.persist()
81 |         return None
82 |     return docsearch
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     from data_questionnaire_agent.config import cfg
87 | 
88 |     raw_text_folder = cfg.raw_text_folder
89 |     document_list = load_text(raw_text_folder)
90 |     logger.info("Amount of documents: %d", len(document_list))
91 |     generate_embeddings(document_list, cfg.embeddings_persistence_dir.as_posix())
92 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/graph_service.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Tuple
 2 | 
 3 | import networkx as nx
 4 | 
 5 | from data_questionnaire_agent.model.ontology_schema import AnalyzedOntology, Ontology
 6 | 
 7 | 
 8 | def generate_analyzed_ontology(ontology_data: Ontology) -> AnalyzedOntology:
 9 |     relationships = ontology_data.relationships
10 |     if relationships is None:
11 |         return AnalyzedOntology(relationships=[])
12 |     name_id_dict, id_name_dict = extract_nodes(relationships)
13 |     edges = extract_edges(relationships, name_id_dict)
14 | 
15 |     G = nx.Graph()
16 |     for name, id in name_id_dict.items():
17 |         G.add_node(id, name=name)
18 |     G.add_edges_from(edges)
19 | 
20 |     degree_centrality = nx.degree_centrality(G)
21 |     betweenness_centrality = nx.betweenness_centrality(G)
22 |     connected_component_importance_dict = {
23 |         id_name_dict[c]: len(cc) for cc in nx.connected_components(G) for c in cc
24 |     }
25 |     degree_centrality_: Dict[str, float] = {
26 |         id_name_dict[id]: centrality for id, centrality in degree_centrality.items()
27 |     }
28 |     betweenness_centrality_: Dict[str, float] = {
29 |         id_name_dict[id]: centrality
30 |         for id, centrality in betweenness_centrality.items()
31 |     }
32 |     return AnalyzedOntology(
33 |         relationships=relationships,
34 |         degree_centrality=degree_centrality_,
35 |         betweenness_centrality=betweenness_centrality_,
36 |         connected_component_importance_dict=connected_component_importance_dict,
37 |     )
38 | 
39 | 
40 | def extract_nodes(
41 |     relationships: List[Dict[str, str]],
42 | ) -> Tuple[Dict[str, int], Dict[int, str]]:
43 |     node_set = set()
44 |     for r in relationships:
45 |         node_set.add(r.source)
46 |         node_set.add(r.target)
47 |     name_id_dict = {r: i for i, r in enumerate(node_set)}
48 |     id_name_dict = {i: r for i, r in enumerate(node_set)}
49 |     return (name_id_dict, id_name_dict)
50 | 
51 | 
52 | def extract_edges(
53 |     relationships: List[Dict[str, str]], node_map: Dict[int, str]
54 | ) -> List[Tuple[int, int]]:
55 |     edges = []
56 |     for rel in relationships:
57 |         source_id = node_map[rel.source]
58 |         target_id = node_map[rel.target]
59 |         edges.append((source_id, target_id))
60 |     return edges
61 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/graphrag_service.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | import httpx
 4 | 
 5 | from data_questionnaire_agent.config import cfg
 6 | from data_questionnaire_agent.log_init import logger
 7 | 
 8 | 
 9 | async def graphrag_context(question: str) -> Union[str, None]:
10 |     params = {
11 |         "question": question,
12 |         "use_context_records": False,
13 |         "search": cfg.graphrag_mode,
14 |         "context_size": cfg.graphrag_context_size,
15 |         "project": cfg.graphrag_project,
16 |     }
17 | 
18 |     params = {key: value for key, value in params.items() if value is not None}
19 | 
20 |     headers = {
21 |         "Authorization": f"Bearer {cfg.graphrag_jwt}"  # Add the JWT to the Authorization header
22 |     }
23 | 
24 |     async with httpx.AsyncClient() as client:
25 |         timeout = httpx.Timeout(
26 |             connect=5.0,  # 5 seconds for connection establishment
27 |             read=cfg.graphrag_read_timeout,  # 20 seconds for reading response
28 |             write=10.0,  # 10 seconds for sending request data
29 |             pool=5.0,  # 5 seconds for acquiring a connection from the pool
30 |         )
31 |         context_url = f"{cfg.graphrag_base_url}/context"
32 |         try:
33 |             response = await client.get(
34 |                 context_url, params=params, headers=headers, timeout=timeout
35 |             )
36 | 
37 |             if response.status_code == 200:
38 |                 json_result = response.json()
39 |                 if "context_text" in json_result:
40 |                     return json_result["context_text"]
41 |                 else:
42 |                     logger.warning("Could not find context_text field in json")
43 |                     return None
44 |             else:
45 |                 logger.warning(
46 |                     f"Failed with status code {response.status_code}: {response.text}"
47 |                 )
48 |                 return None
49 |         except httpx.RequestError:
50 |             logger.exception(
51 |                 f"Failed to process query ({context_url}) to fetch graphrag context."
52 |             )
53 |             return None
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     import asyncio
58 | 
59 |     res = asyncio.run(
60 |         graphrag_context(
61 |             "Which areas of your data ecosystem are you most converned about?"
62 |         )
63 |     )
64 |     print(res)
65 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/html_generator.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from pathlib import Path
 3 | 
 4 | import jinja2
 5 | import pdfkit
 6 | 
 7 | from data_questionnaire_agent.config import cfg
 8 | from data_questionnaire_agent.log_init import logger
 9 | from data_questionnaire_agent.model.application_schema import Questionnaire
10 | from data_questionnaire_agent.model.openai_schema import ConditionalAdvice
11 | from data_questionnaire_agent.service.report_enhancement_service import (
12 |     replace_bold_markdown,
13 | )
14 | from data_questionnaire_agent.translation import t
15 | 
16 | 
17 | def generate_html(
18 |     questionnaire: Questionnaire, advices: ConditionalAdvice, language: str = "en"
19 | ) -> str:
20 |     timestamp = datetime.today().strftime("%A, %b %d %Y")
21 |     context = {
22 |         "banner": t("banner_link", locale=language),
23 |         "questionnaire": questionnaire.to_html(),
24 |         "advices": replace_bold_markdown(advices.to_advice_html()),
25 |         "avoids": replace_bold_markdown(advices.to_avoid_html()),
26 |         "title_confidence": t("Confidence Degree", locale=language),
27 |         "confidence": replace_bold_markdown(advices.confidence_html(language)),
28 |         "positive_outcomes": replace_bold_markdown(advices.positive_outcomes_html()),
29 |         "timestamp": timestamp,
30 |         "big_thank_you": t(
31 |             "A big thank you for completing a session with",
32 |             name=cfg.product_title,
33 |             locale=language,
34 |         ),
35 |         "intro_advice": t("intro_advice", locale=language),
36 |         "offering_long": t("offering_long", locale=language),
37 |         "personal_offer": t("A personal offer for you", locale=language),
38 |         "produced_on": t("Produced on", locale=language),
39 |         "love_feedback": t("We would love your feedback", locale=language),
40 |         "for_more_info": t("for_more_info", locale=language),
41 |         "title_potential_outcomes": t("Potential positive outcomes", locale=language),
42 |         "title_transcript": t("Transcript", locale=language),
43 |         "title_what_to_do": t("What you should do", locale=language),
44 |         "title_what_to_avoid": t("What to avoid", locale=language),
45 |     }
46 |     template_loader = jinja2.FileSystemLoader(cfg.template_location)
47 |     template_env = jinja2.Environment(loader=template_loader)
48 |     template = template_env.get_template("results-template.html")
49 |     return template.render(context)
50 | 
51 | 
52 | def generate_pdf_from(
53 |     questionnaire: Questionnaire, advices: ConditionalAdvice, language: str = "en"
54 | ) -> Path:
55 |     if questionnaire is None:
56 |         return None
57 |     html = generate_html(questionnaire, advices, language)
58 |     logger.info("PDF html: %s", html)
59 |     file_name = cfg.pdf_folder / f"generated_advice_{generate_iso()}.pdf"
60 |     logger.info("PDF to be created file name: %s", file_name)
61 |     config = pdfkit.configuration(wkhtmltopdf=cfg.wkhtmltopdf_binary.as_posix())
62 |     pdfkit.from_string(
63 |         html,
64 |         file_name,
65 |         configuration=config,
66 |         verbose=True,
67 |         options={"--enable-local-file-access": True},
68 |     )
69 |     logger.info("Created PDF: %s", file_name)
70 |     return file_name
71 | 
72 | 
73 | def generate_iso() -> str:
74 |     current_time = datetime.now()
75 |     return current_time.isoformat().replace(":", ".")
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     from data_questionnaire_agent.test.provider.advice_provider import (
80 |         create_simple_advice,
81 |     )
82 |     from data_questionnaire_agent.test.provider.questionnaire_provider import (
83 |         create_questionnaire_2_questions,
84 |     )
85 | 
86 |     questionnaire: Questionnaire = create_questionnaire_2_questions()
87 |     advices: ConditionalAdvice = create_simple_advice()
88 |     logger.info("PDF Path: %s", generate_pdf_from(questionnaire, advices))
89 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/initial_question_service.py:
--------------------------------------------------------------------------------
 1 | from langchain.chains.llm import LLMChain
 2 | from langchain.chains.openai_functions import create_structured_output_chain
 3 | from langchain.prompts import (
 4 |     ChatPromptTemplate,
 5 | )
 6 | 
 7 | from data_questionnaire_agent.config import cfg
 8 | from data_questionnaire_agent.model.openai_schema import ResponseQuestions
 9 | from data_questionnaire_agent.service.prompt_support import prompt_factory_generic
10 | from data_questionnaire_agent.toml_support import get_prompts
11 | 
12 | 
13 | def prompt_factory_initial_questions(language: str) -> ChatPromptTemplate:
14 |     prompts = get_prompts(language)
15 |     section = prompts["questionnaire"]["initial"]
16 |     return prompt_factory_generic(
17 |         section,
18 |         [
19 |             "knowledge_base",
20 |             "question",
21 |             "answer",
22 |             "questions_per_batch",
23 |         ],
24 |         prompts,
25 |     )
26 | 
27 | 
28 | def chain_factory_initial_question(language: str) -> LLMChain:
29 |     return create_structured_output_chain(
30 |         ResponseQuestions,
31 |         cfg.llm,
32 |         prompt_factory_initial_questions(language),
33 |         verbose=cfg.verbose_llm,
34 |     )
35 | 
36 | 
37 | def prepare_initial_question(
38 |     question: str,
39 |     answer: str,
40 |     questions_per_batch: int = 1,
41 |     knowledge_base: str = "",
42 | ) -> dict:
43 |     return {
44 |         "knowledge_base": knowledge_base,
45 |         "question": question,
46 |         "answer": answer,
47 |         "questions_per_batch": questions_per_batch,
48 |     }
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     import asyncio
53 | 
54 |     from data_questionnaire_agent.log_init import logger
55 |     from data_questionnaire_agent.service.knowledge_base_service import fetch_context
56 | 
57 |     language = "en"
58 |     initial_question = get_prompts(language)["questionnaire"]["initial"]["question"]
59 |     assert initial_question is not None
60 | 
61 |     answer = "Expired Passport"  # Supposed the client answer
62 |     search_res = asyncio.run(fetch_context(answer))
63 |     input = prepare_initial_question(
64 |         question=initial_question,
65 |         answer=answer,
66 |         questions_per_batch=1,
67 |         knowledge_base=search_res,
68 |     )
69 |     chain = chain_factory_initial_question(language)
70 |     res: dict = chain.invoke(input)
71 |     assert res is not None
72 |     response_questions: ResponseQuestions = res["function"]
73 | 
74 |     logger.info("Results: ")
75 |     logger.info(response_questions)
76 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/knowledge_base_service.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from data_questionnaire_agent.config import cfg
 4 | from data_questionnaire_agent.log_init import logger
 5 | from data_questionnaire_agent.model.application_schema import Questionnaire
 6 | from data_questionnaire_agent.service.graphrag_service import graphrag_context
 7 | from data_questionnaire_agent.service.similarity_search import (
 8 |     init_vector_search,
 9 |     similarity_search,
10 | )
11 | 
12 | docsearch = None
13 | 
14 | 
15 | async def fetch_context(questionnaire: Union[str, Questionnaire]) -> str:
16 |     global docsearch
17 | 
18 |     try:
19 |         questionnaire_str = (
20 |             str(questionnaire)
21 |             if isinstance(questionnaire, Questionnaire)
22 |             else questionnaire
23 |         )
24 |         if cfg.use_graphrag:
25 |             knowledge_base = await graphrag_context(questionnaire_str)
26 |             return knowledge_base or ""
27 |         else:
28 |             if docsearch is None:
29 |                 docsearch = init_vector_search()
30 |             knowledge_base = similarity_search(
31 |                 docsearch, questionnaire_str, how_many=cfg.search_results_how_many
32 |             )
33 |             return knowledge_base or ""
34 |     except Exception as e:
35 |         logger.exception("Could not fetch context.")
36 |         logger.error(str(e))
37 |         return ""
38 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/language_adapter.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | ACCEPTED_LANGUAGES = ["en", "de"]
 4 | 
 5 | 
 6 | def adapt_language(language: str) -> str:
 7 |     first_chars = re.sub(r"^([a-z]{2}).*", r"\1", language)
 8 |     if first_chars in ACCEPTED_LANGUAGES:
 9 |         return first_chars
10 |     return ACCEPTED_LANGUAGES[0]
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     assert adapt_language("en-GB") == "en"
15 |     assert adapt_language("en") == "en"
16 |     assert adapt_language("de-DB") == "de"
17 |     assert adapt_language("de-AT") == "de"
18 |     assert adapt_language("pt-PT") == "en"
19 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/ontology_service.py:
--------------------------------------------------------------------------------
 1 | from langchain.chains.llm import LLMChain
 2 | from langchain.chains.openai_functions import create_structured_output_chain
 3 | from langchain.prompts import ChatPromptTemplate
 4 | from langchain_core.runnables.base import RunnableSequence
 5 | 
 6 | from data_questionnaire_agent.config import cfg
 7 | from data_questionnaire_agent.model.application_schema import Questionnaire
 8 | from data_questionnaire_agent.model.ontology_schema import Ontology
 9 | from data_questionnaire_agent.model.openai_schema import ConditionalAdvice
10 | from data_questionnaire_agent.service.prompt_support import (
11 |     prompt_factory_generic,
12 | )
13 | from data_questionnaire_agent.toml_support import get_prompts
14 | 
15 | PARAM_QUESTIONS_ANSWERS = "questions_answers"
16 | PARAM_ADVICE = "conditional_advice"
17 | 
18 | 
19 | def prompt_factory_ontology(language: str) -> ChatPromptTemplate:
20 |     # Assuming get_prompts() returns the required dictionary
21 |     prompts = get_prompts(language)
22 |     assert (
23 |         "extract_ontology" in prompts
24 |     ), "Make sure that you have the ontolgy prompt in your prompts file."
25 |     section = prompts["extract_ontology"]
26 |     return prompt_factory_generic(
27 |         section=section,
28 |         input_variables=[PARAM_ADVICE, PARAM_QUESTIONS_ANSWERS],
29 |         prompts=prompts,
30 |     )
31 | 
32 | 
33 | def chain_factory_ontology(language: str) -> LLMChain:
34 |     return create_structured_output_chain(
35 |         Ontology,
36 |         cfg.llm,
37 |         prompt_factory_ontology(language),
38 |         verbose=cfg.verbose_llm,
39 |     )
40 | 
41 | 
42 | def create_structured_question_call(language: str) -> RunnableSequence:
43 |     model = cfg.llm.with_structured_output(Ontology)
44 |     prompt = prompt_factory_ontology(language)
45 |     return prompt | model
46 | 
47 | 
48 | def prepare_ontology_chain_call(
49 |     questionnaire: Questionnaire, conditional_advice: ConditionalAdvice
50 | ) -> dict:
51 |     return {
52 |         PARAM_QUESTIONS_ANSWERS: str(questionnaire),
53 |         PARAM_ADVICE: str(conditional_advice),
54 |     }
55 | 
56 | 
57 | async def create_ontology(
58 |     questionnaire: Questionnaire, conditional_advice: ConditionalAdvice, language: str
59 | ) -> Ontology:
60 |     assert conditional_advice is not None, "Missing conditional advice"
61 |     assert questionnaire is not None, "Missing questionnaire"
62 |     call_params = prepare_ontology_chain_call(questionnaire, conditional_advice)
63 |     chain = create_structured_question_call(language)
64 |     return await chain.ainvoke(call_params)
65 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/prompt_support.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, List, Union
 2 | 
 3 | from langchain.prompts import (
 4 |     ChatPromptTemplate,
 5 |     HumanMessagePromptTemplate,
 6 |     PromptTemplate,
 7 |     SystemMessagePromptTemplate,
 8 | )
 9 | 
10 | from data_questionnaire_agent.toml_support import get_prompts
11 | 
12 | 
13 | def factory_prompt(
14 |     find_prompt: Callable, params: list[str], language: str = "en"
15 | ) -> ChatPromptTemplate:
16 |     prompts = get_prompts(language)
17 |     section = find_prompt(prompts)
18 |     return prompt_factory_generic(section, params, prompts)
19 | 
20 | 
21 | def prompt_factory_generic(
22 |     section: dict,
23 |     input_variables: List[str],
24 |     prompts: object,
25 |     prompt_transform: Union[Callable, None] = None,
26 | ) -> ChatPromptTemplate:
27 |     human_message = section["human_message"]
28 |     human_message = (
29 |         prompt_transform(human_message)
30 |         if prompt_transform is not None
31 |         else human_message
32 |     )
33 |     prompt_msgs = [
34 |         SystemMessagePromptTemplate(
35 |             prompt=PromptTemplate(
36 |                 template=section["system_message"], input_variables=[]
37 |             )
38 |         ),
39 |         HumanMessagePromptTemplate(
40 |             prompt=PromptTemplate(
41 |                 template=human_message,
42 |                 input_variables=input_variables,
43 |             )
44 |         ),
45 |         HumanMessagePromptTemplate(
46 |             prompt=PromptTemplate(
47 |                 template=prompts["general_messages"]["tip_correct_format"],
48 |                 input_variables=[],
49 |             )
50 |         ),
51 |         HumanMessagePromptTemplate(
52 |             prompt=PromptTemplate(
53 |                 template=prompts["general_messages"]["tip_language"],
54 |                 input_variables=[],
55 |             )
56 |         ),
57 |     ]
58 |     return ChatPromptTemplate(messages=prompt_msgs)
59 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/query_support.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import sys
 3 | from typing import Any, Callable, Coroutine
 4 | 
 5 | from psycopg import AsyncConnection, AsyncCursor
 6 | 
 7 | from data_questionnaire_agent.config import db_cfg
 8 | from data_questionnaire_agent.log_init import logger
 9 | 
10 | if sys.platform == "win32":
11 |     asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
12 | 
13 | 
14 | async def create_connection(conninfo: str = db_cfg.db_conn_str) -> AsyncConnection:
15 |     return await AsyncConnection.connect(conninfo)
16 | 
17 | 
18 | async def create_cursor(func: Callable, commit=False, conn_info: str = None) -> Any:
19 |     # await asynch_pool.check()
20 |     try:
21 |         conn = (
22 |             (await create_connection())
23 |             if conn_info is None
24 |             else (await create_connection(conn_info))
25 |         )
26 |         # async with asynch_pool.connection() as conn:
27 |         async with conn.cursor() as cur:
28 |             return await func(cur)
29 |     except Exception as e:
30 |         logger.error(str(e))
31 |         logger.exception("Could not create cursor.")
32 |         return None
33 |     finally:
34 |         if "conn" in locals() and conn is not None:
35 |             if commit:
36 |                 await conn.commit()
37 |             await conn.close()
38 | 
39 | 
40 | async def use_connection(func: Coroutine, commit=True) -> any:
41 |     try:
42 |         conn = await create_connection()
43 |         return await func(conn)
44 |     except Exception as e:
45 |         logger.exception(f"Could not create database connection: {e}")
46 |     finally:
47 |         if conn is not None:
48 |             if commit:
49 |                 await conn.commit()
50 |             await conn.close()
51 | 
52 | 
53 | async def handle_select_func(query: str, query_params: dict):
54 |     async def func(cur: AsyncCursor):
55 |         await cur.execute(
56 |             query,
57 |             query_params,
58 |         )
59 |         return list(await cur.fetchall())
60 | 
61 |     return func
62 | 
63 | 
64 | async def select_from(query: str, parameter_map: dict, conn_info: str = None) -> list:
65 |     handle_select = await handle_select_func(query, parameter_map)
66 |     return await create_cursor(handle_select, False, conn_info)
67 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/question_clarifications.py:
--------------------------------------------------------------------------------
 1 | from typing import AsyncIterator
 2 | 
 3 | from langchain.prompts import ChatPromptTemplate
 4 | from langchain_core.messages import BaseMessageChunk
 5 | 
 6 | from data_questionnaire_agent.config import cfg
 7 | from data_questionnaire_agent.toml_support import get_prompts
 8 | 
 9 | 
10 | def prompt_factory_question_clarifications(language: str) -> ChatPromptTemplate:
11 |     extraction_prompts = get_prompts(language)["questionnaire"]["clarification"]
12 |     return ChatPromptTemplate.from_messages(
13 |         [
14 |             ("system", extraction_prompts["system_message"]),
15 |             ("human", extraction_prompts["human_message"]),
16 |         ]
17 |     )
18 | 
19 | 
20 | async def chain_factory_question_clarifications(
21 |     question: str, language: str
22 | ) -> AsyncIterator[BaseMessageChunk]:
23 |     input = prompt_factory_question_clarifications(language).format(question=question)
24 |     return cfg.llm_stream.astream(input)
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     import asyncio
29 | 
30 |     async def stream_response():
31 |         question = "What is the meaning of data governance?"
32 |         async for token in await chain_factory_question_clarifications(question):
33 |             print(token.content, end="", flush=True)
34 | 
35 |     asyncio.run(stream_response())
36 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/question_generation_service.py:
--------------------------------------------------------------------------------
  1 | from langchain.chains.llm import LLMChain
  2 | from langchain.chains.openai_functions import create_structured_output_chain
  3 | from langchain.prompts import (
  4 |     ChatPromptTemplate,
  5 |     HumanMessagePromptTemplate,
  6 |     PromptTemplate,
  7 | )
  8 | from langchain_core.runnables.base import RunnableSequence
  9 | 
 10 | from data_questionnaire_agent.config import cfg
 11 | from data_questionnaire_agent.model.application_schema import Questionnaire
 12 | from data_questionnaire_agent.model.openai_schema import ResponseQuestions
 13 | from data_questionnaire_agent.model.session_configuration import (
 14 |     ChatType,
 15 |     SessionProperties,
 16 | )
 17 | from data_questionnaire_agent.service.prompt_support import (
 18 |     prompt_factory_generic,
 19 | )
 20 | from data_questionnaire_agent.toml_support import get_prompts
 21 | 
 22 | 
 23 | def divergent_prompt_transformer(prompt: str, language: str = "en") -> str:
 24 |     defaultExclusions = [
 25 |         "Main questionnaire topic:",
 26 |         "The questions should explore topics related to the main topic",
 27 |     ]
 28 |     exclusions_by_language = {
 29 |         "en": defaultExclusions,
 30 |         "de": [
 31 |             "Hauptfragebogen-Thema:",
 32 |             "Die Fragen sollten Themen im Zusammenhang mit dem Hauptthema",
 33 |         ],
 34 |     }
 35 | 
 36 |     # Get the exclusions for the selected language
 37 |     exclusions = exclusions_by_language.get(language, defaultExclusions)
 38 | 
 39 |     # Process the lines
 40 |     lines = [
 41 |         line for line in prompt.splitlines() if not any(e in line for e in exclusions)
 42 |     ]
 43 | 
 44 |     return "\n".join(lines)
 45 | 
 46 | 
 47 | def prompt_factory_secondary_questions(
 48 |     session_properties: SessionProperties,
 49 | ) -> ChatPromptTemplate:
 50 |     language = session_properties.session_language
 51 |     prompt_transformer = None
 52 |     if session_properties.chat_type == ChatType.DIVERGING:
 53 |         prompt_transformer = lambda p: divergent_prompt_transformer(
 54 |             p, language=language
 55 |         )
 56 | 
 57 |     prompts = get_prompts(language)
 58 |     section = prompts["questionnaire"]["secondary"]
 59 |     return prompt_factory_generic(
 60 |         section,
 61 |         [
 62 |             "knowledge_base",
 63 |             "questions_answers",
 64 |             "answers",
 65 |             "questions_per_batch",
 66 |         ],
 67 |         prompts,
 68 |         prompt_transformer,
 69 |     )
 70 | 
 71 | 
 72 | def prompt_factory_recreate_question(
 73 |     session_properties: SessionProperties,
 74 | ) -> ChatPromptTemplate:
 75 |     language = session_properties.session_language
 76 |     regenerate_template = prompt_factory_secondary_questions(session_properties)
 77 |     # Build the normal prompt template and then modify it to avoid code duplication
 78 |     main_message_index = 1
 79 |     main_template: HumanMessagePromptTemplate = regenerate_template.messages[
 80 |         main_message_index
 81 |     ]
 82 |     main_template_prompt: PromptTemplate = main_template.prompt
 83 |     template, input_variables = (
 84 |         main_template_prompt.template,
 85 |         main_template_prompt.input_variables,
 86 |     )
 87 |     input_variables.append("previous_question")
 88 |     insertion_mark = "\n==== KNOWLEDGE BASE START ====\n"
 89 |     insertion_index = template.find(insertion_mark)
 90 |     prompts = get_prompts(language)
 91 |     # manipulate the template
 92 |     section_mod_human_message = prompts["questionnaire"]["secondary_regenerate"][
 93 |         "human_message"
 94 |     ]
 95 |     previous = template[0:insertion_index]
 96 |     after = template[insertion_index:]
 97 |     changed_template = f"""{previous}
 98 | {section_mod_human_message}
 99 | {after}
100 | """
101 |     main_template_prompt.template = changed_template
102 |     return regenerate_template
103 | 
104 | 
105 | def create_structured_question_call(
106 |     session_properties: SessionProperties, is_recreate: bool = False
107 | ) -> RunnableSequence:
108 |     model = cfg.llm.with_structured_output(ResponseQuestions)
109 |     prompt = (
110 |         prompt_factory_secondary_questions(session_properties)
111 |         if not is_recreate
112 |         else prompt_factory_recreate_question(session_properties)
113 |     )
114 |     return prompt | model
115 | 
116 | 
117 | def chain_factory_secondary_question(session_properties: SessionProperties) -> LLMChain:
118 |     return create_structured_output_chain(
119 |         ResponseQuestions,
120 |         cfg.llm,
121 |         prompt_factory_secondary_questions(session_properties),
122 |         verbose=cfg.verbose_llm,
123 |     )
124 | 
125 | 
126 | def prepare_secondary_question(
127 |     questionnaire: Questionnaire,
128 |     knowledge_base: str,
129 |     questions_per_batch: int = cfg.questions_per_batch,
130 |     is_recreate: bool = False,
131 | ) -> dict:
132 |     params = {
133 |         "knowledge_base": knowledge_base,
134 |         "questions_answers": str(questionnaire),
135 |         "answers": questionnaire.answers_str(),
136 |         "questions_per_batch": questions_per_batch,
137 |         "questionnaire_topic": (
138 |             questionnaire.questions[0].answer
139 |             if len(questionnaire.questions) > 0
140 |             else ""
141 |         ),
142 |     }
143 |     if is_recreate:
144 |         params["previous_question"] = questionnaire.questions[-1].question
145 |     return params
146 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/report_aggregation_service.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from data_questionnaire_agent.model.openai_schema import ConditionalAdvice
 4 | from data_questionnaire_agent.model.questionnaire_status import QuestionnaireStatus
 5 | 
 6 | 
 7 | def create_bullet_list(items: List[str]) -> str:
 8 |     return "\n".join(f"- {item}" for item in items)
 9 | 
10 | 
11 | def convert_to_str(questionnaire_statuses: List[QuestionnaireStatus]) -> List[str]:
12 |     questionnaires = []
13 |     questionnaire = []
14 |     for i, qs in enumerate(questionnaire_statuses):
15 |         question, answer, final_report = qs.question, qs.answer, qs.final_report
16 |         if not final_report:
17 |             if (
18 |                 i + 1 != len(questionnaire_statuses)
19 |                 and qs.session_id != questionnaire_statuses[i + 1].session_id
20 |             ):
21 |                 # There is a missing final report in this case
22 |                 # Flush the report to the output list
23 |                 questionnaire.insert(0, "# Questionnaire")
24 |                 questionnaire.append(
25 |                     f"""
26 | Q: {question}
27 | A: {answer}
28 | """
29 |                 )
30 |                 questionnaires.append("\n".join(questionnaire))
31 |                 questionnaire.clear()
32 |                 continue
33 |             questionnaire.append(
34 |                 f"""
35 | Q: {question}
36 | A: {answer}
37 | """
38 |             )
39 |             if i + 1 == len(questionnaire_statuses):
40 |                 # Last questions of last questionnaire
41 |                 questionnaire.insert(0, "# Questionnaire")
42 |                 questionnaires.append("\n".join(questionnaire))
43 |                 questionnaire.clear()
44 |         elif final_report:
45 |             conditional_advice: ConditionalAdvice = ConditionalAdvice.parse_raw(
46 |                 question
47 |             )
48 |             advices, what_you_should_avoid, positive_outcomes = (
49 |                 conditional_advice.advices,
50 |                 conditional_advice.what_you_should_avoid,
51 |                 conditional_advice.positive_outcomes,
52 |             )
53 |             advice_str = create_bullet_list(advices)
54 |             what_you_should_avoid_str = create_bullet_list(what_you_should_avoid)
55 |             positive_outcomes_str = create_bullet_list(positive_outcomes)
56 |             questionnaire.append(
57 |                 f"""
58 | # Recommendations
59 | 
60 | {advice_str}
61 | 
62 | # What to avoid
63 | 
64 | {what_you_should_avoid_str}
65 | 
66 | # Positive outcomes
67 | 
68 | {positive_outcomes_str}
69 | """
70 |             )
71 |             questionnaire.insert(0, "# Questionnaire")
72 |             questionnaires.append("\n".join(questionnaire))
73 |             questionnaire.clear()
74 |     return questionnaires
75 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/report_aggregation_summarization_service.py:
--------------------------------------------------------------------------------
 1 | from langchain.prompts import ChatPromptTemplate
 2 | from langchain_core.runnables.base import RunnableSequence
 3 | 
 4 | from data_questionnaire_agent.config import cfg
 5 | from data_questionnaire_agent.log_init import logger
 6 | from data_questionnaire_agent.model.report_aggregation_schema import (
 7 |     ReportDocumentSummarization,
 8 | )
 9 | from data_questionnaire_agent.service.prompt_support import (
10 |     factory_prompt,
11 | )
12 | 
13 | KEY_QUESTIONNAIRE = "full_questionnaire"
14 | 
15 | 
16 | def prompt_factory_summarization_prompt(language: str) -> ChatPromptTemplate:
17 |     return factory_prompt(
18 |         lambda prompt: prompt["reporting"]["summarization_prompt"],
19 |         [KEY_QUESTIONNAIRE],
20 |         language,
21 |     )
22 | 
23 | 
24 | def create_summarization_call(language: str = "en") -> RunnableSequence:
25 |     model = cfg.llm.with_structured_output(ReportDocumentSummarization)
26 |     prompt = prompt_factory_summarization_prompt(language)
27 |     return prompt | model
28 | 
29 | 
30 | async def aexecute_summarization_batch(
31 |     inputs: list[str], batch_size: int = 2, language: str = "en"
32 | ) -> list[ReportDocumentSummarization]:
33 |     chain = create_summarization_call(language)
34 |     summaries = []
35 |     inputs_dict = [{KEY_QUESTIONNAIRE: s} for s in inputs]
36 |     batches = [
37 |         inputs_dict[i : i + batch_size] for i in range(len(inputs_dict))[::batch_size]
38 |     ]
39 |     for i, b in enumerate(batches):
40 |         try:
41 |             res = await chain.abatch(b)
42 |             summaries.extend(res)
43 |             logger.info(f"Summarized {(i + batch_size) * batch_size} records.")
44 |         except Exception as e:
45 |             logger.exception(e)
46 |     return summaries
47 | 
48 | 
49 | async def aexecute_summarization_batch_str(
50 |     inputs: list[str], batch_size: int = 2, language: str = "en"
51 | ) -> list[str]:
52 |     logger.info("Summarizing %d questionnaires.", len(inputs))
53 |     return [
54 |         summ.summary
55 |         for summ in await aexecute_summarization_batch(inputs, batch_size, language)
56 |     ]
57 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/report_enhancement_service.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from urllib.parse import quote
 3 | 
 4 | PATTERN_MARKDOWN_BOLD = re.compile(r"\*\*(?P<content>.+?)\*\*")
 5 | 
 6 | 
 7 | def replace_bold_markdown(
 8 |     html: str,
 9 |     format_str="<b>{content}</b>",
10 | ) -> str:
11 |     # Output string initialization
12 |     output = []
13 |     last_end = 0  # This keeps track of the end of the last match
14 |     for match in PATTERN_MARKDOWN_BOLD.finditer(html):
15 |         start, end = match.span()
16 |         content = match.group("content")
17 |         quoted_content = quote(content)
18 |         output.append(html[last_end:start])
19 |         replacement = format_str.format(quoted_content=quoted_content, content=content)
20 |         output.append(replacement)
21 |         last_end = end
22 |     output.append(html[last_end:])
23 | 
24 |     return "".join(output)
25 | 
26 | 
27 | def replace_markdown_bold_with_links(text: str) -> str:
28 |     return replace_bold_markdown(
29 |         text, "**[{content}](https://www.google.com/search?q={quoted_content})**"
30 |     )
31 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/secondary_question_processor.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from typing import List
 3 | 
 4 | from tenacity import AsyncRetrying
 5 | 
 6 | from data_questionnaire_agent.config import cfg
 7 | from data_questionnaire_agent.model.application_schema import (
 8 |     QuestionAnswer,
 9 |     Questionnaire,
10 |     convert_to_question_answers,
11 | )
12 | from data_questionnaire_agent.model.openai_schema import (
13 |     ResponseQuestions,
14 | )
15 | from data_questionnaire_agent.model.session_configuration import SessionProperties
16 | from data_questionnaire_agent.service.knowledge_base_service import fetch_context
17 | from data_questionnaire_agent.service.persistence_service_async import (
18 |     check_question_exists,
19 | )
20 | from data_questionnaire_agent.service.question_generation_service import (
21 |     create_structured_question_call,
22 |     prepare_secondary_question,
23 | )
24 | 
25 | 
26 | async def process_secondary_questions(
27 |     questionnaire: Questionnaire,
28 |     question_per_batch: int,
29 |     session_properties: SessionProperties,
30 |     session_id: str,
31 | ) -> List[QuestionAnswer]:
32 |     knowledge_base = await fetch_context(questionnaire)
33 |     secondary_question_input = prepare_secondary_question(
34 |         questionnaire, knowledge_base, question_per_batch
35 |     )
36 |     retries = 3
37 |     while retries > 0:
38 |         retries -= 1
39 |         async for attempt in AsyncRetrying(**cfg.retry_args):
40 |             with attempt:
41 |                 response_questions: ResponseQuestions = (
42 |                     await create_structured_question_call(session_properties).ainvoke(
43 |                         secondary_question_input
44 |                     )
45 |                 )
46 |                 tasks = [
47 |                     check_question_exists(response_question, session_id)
48 |                     for response_question in response_questions.questions
49 |                 ]
50 |                 results = await asyncio.gather(*tasks)
51 |                 has_repeated = any(results)
52 |                 if not has_repeated or retries == 0:
53 |                     return convert_to_question_answers(response_questions)
54 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/similarity_search.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import List
 3 | 
 4 | import tiktoken
 5 | from langchain.schema import Document
 6 | from langchain_community.vectorstores import FAISS
 7 | 
 8 | from data_questionnaire_agent.config import cfg
 9 | from data_questionnaire_agent.log_init import logger
10 | from data_questionnaire_agent.service.embedding_service import (
11 |     generate_embeddings,
12 |     load_text,
13 | )
14 | 
15 | 
16 | def init_vector_search() -> FAISS:
17 |     embedding_dir = cfg.embeddings_persistence_dir.as_posix()
18 |     embedding_dir_path = Path(embedding_dir)
19 |     # Check if directory exists and has something inside
20 |     if embedding_dir_path.exists() and len(list(embedding_dir_path.glob("*"))) > 0:
21 |         logger.info("reading from existing directory")
22 |         docsearch = FAISS.load_local(
23 |             embedding_dir, cfg.embeddings, allow_dangerous_deserialization=True
24 |         )
25 |         return docsearch
26 |     else:
27 |         logger.warning(f"Cannot find path {embedding_dir} or path is empty.")
28 |         doc_location = cfg.raw_text_folder
29 |         logger.info(f"Using doc location {doc_location}.")
30 |         logger.info("Generating vectors")
31 |         documents = load_text(path=doc_location)
32 |         docsearch = generate_embeddings(
33 |             documents=documents, persist_directory=embedding_dir
34 |         )
35 |         return docsearch
36 | 
37 | 
38 | def join_pages(doc_list: List[Document]) -> str:
39 |     return "\n\n".join([p.page_content for p in doc_list])
40 | 
41 | 
42 | def similarity_search(
43 |     docsearch: FAISS, input: str, how_many=cfg.search_results_how_many
44 | ) -> str:
45 |     """
46 |     Performs multiple searches until it reaches the maximum amount of tokens below a specified threshold.
47 |     When the threshold of tokens is reached it stops and returns the search results.
48 | 
49 |     Parameters:
50 |     docsearch FAISS: The object used to access the vector database.
51 |     input str: The input of the search.
52 |     how_many int: The initial number of results to be retrieved.
53 | 
54 |     Returns:
55 |     str: The maximum amount of text with the number of tokens below the threshold specified in the configuration.
56 |     """
57 |     token_count = 0
58 |     previous_res = ""
59 |     attempts = 0
60 |     max_attempts = 4
61 |     while attempts < max_attempts:
62 |         doc_list = docsearch.similarity_search(input, k=how_many + attempts)
63 |         logger.info("Similarity search results: %s", len(doc_list))
64 |         joined = join_pages(doc_list)
65 |         token_count = num_tokens_from_string(joined)
66 |         logger.info("Token count: %d", token_count)
67 |         attempts += 1
68 |         if token_count > cfg.token_limit:
69 |             return previous_res
70 |         previous_res = joined
71 |     return previous_res
72 | 
73 | 
74 | def num_tokens_from_string(string: str) -> int:
75 |     """
76 |     Returns the number of tokens in a text string.
77 | 
78 |     Parameters:
79 |     string (str): The string for which the tiktokens are to be counted.
80 | 
81 |     Returns:
82 |     int: Recturs the number of tokens generated using tiktoken.
83 |     """
84 |     encoding = tiktoken.encoding_for_model(cfg.model)
85 |     num_tokens = len(encoding.encode(string))
86 |     return num_tokens
87 | 
88 | 
89 | if __name__ == "__main__":
90 |     docsearch = init_vector_search()
91 |     search_res = similarity_search(docsearch, "Passport")
92 |     print(search_res)
93 |     print(num_tokens_from_string(search_res))
94 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/service/tagging_service.py:
--------------------------------------------------------------------------------
 1 | from langchain.chains import LLMChain, create_tagging_chain_pydantic
 2 | from langchain.prompts import (
 3 |     ChatPromptTemplate,
 4 |     HumanMessagePromptTemplate,
 5 |     PromptTemplate,
 6 |     SystemMessagePromptTemplate,
 7 | )
 8 | 
 9 | from data_questionnaire_agent.config import cfg
10 | from data_questionnaire_agent.log_init import logger
11 | from data_questionnaire_agent.model.openai_schema import ResponseTags
12 | from data_questionnaire_agent.toml_support import read_prompts_toml
13 | 
14 | prompts = read_prompts_toml()
15 | 
16 | 
17 | def prompt_factory_sentiment() -> ChatPromptTemplate:
18 |     section = prompts["tagging"]
19 |     human_message = section["human_message"]
20 |     human_message_extraction = section["human_message_extraction"]
21 |     prompt_msgs = [
22 |         SystemMessagePromptTemplate(
23 |             prompt=PromptTemplate(
24 |                 template=section["system_message"], input_variables=[]
25 |             )
26 |         ),
27 |         HumanMessagePromptTemplate(
28 |             prompt=PromptTemplate(
29 |                 template=human_message,
30 |                 input_variables=["answer"],
31 |             )
32 |         ),
33 |         HumanMessagePromptTemplate(
34 |             prompt=PromptTemplate(
35 |                 template=human_message_extraction,
36 |                 input_variables=["answer"],
37 |             )
38 |         ),
39 |     ]
40 |     return ChatPromptTemplate(messages=prompt_msgs)
41 | 
42 | 
43 | def sentiment_chain_factory() -> LLMChain:
44 |     return create_tagging_chain_pydantic(
45 |         ResponseTags, cfg.llm, prompt_factory_sentiment(), verbose=cfg.verbose_llm
46 |     )
47 | 
48 | 
49 | chain = create_tagging_chain_pydantic(ResponseTags, cfg.llm, prompt_factory_sentiment())
50 | 
51 | 
52 | def prepare_sentiment_input(question: str) -> dict:
53 |     return {"answer": question}
54 | 
55 | 
56 | def tag_response(response: str) -> dict:
57 |     res = chain(prepare_sentiment_input(response))
58 |     return res
59 | 
60 | 
61 | if __name__ == "__main__":
62 | 
63 |     def process_answer(answer: str):
64 |         logger.info(type(answer))
65 |         logger.info(answer)
66 | 
67 |     # Does your organization support an event driven architecture for data integration?
68 |     process_answer(tag_response("Yes, it does"))
69 |     process_answer(
70 |         tag_response(
71 |             "Yes, I know that CDC is good to prevent data from being outdated."
72 |         )
73 |     )
74 |     process_answer(
75 |         tag_response("Well, since you are asking, I am not quite sure about it.")
76 |     )
77 |     # Does your organization take more than 3 weeks for data integration between 2 systems?
78 |     process_answer(
79 |         tag_response(
80 |             "Well, that depends on the size of the project. But in most cases yes."
81 |         )
82 |     )
83 |     process_answer(tag_response("Almost we never finish integrations before that."))
84 |     process_answer(
85 |         tag_response(
86 |             "Which is the meaning of dark data? What is CDC (Change Data Capture)?"
87 |         )
88 |     )
89 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/data_questionnaire_agent/test/__init__.py


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/advice_service_test.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.model.openai_schema import ConditionalAdvice
 2 | from data_questionnaire_agent.service.advice_service import (
 3 |     chain_factory_advice,
 4 |     prepare_conditional_advice,
 5 | )
 6 | from data_questionnaire_agent.test.provider.knowledge_base_provider import (
 7 |     provide_knowledge_base,
 8 | )
 9 | from data_questionnaire_agent.test.provider.question_answers_provider import (
10 |     provide_dummy_questionnaire,
11 | )
12 | 
13 | 
14 | def create_chain():
15 |     chain = chain_factory_advice("en")
16 |     assert chain is not None
17 |     return chain
18 | 
19 | 
20 | def test_has_advice_questionnaire():
21 |     chain = create_chain()
22 |     knowledge_base = provide_knowledge_base()
23 |     questions_answers = provide_dummy_questionnaire()
24 |     conditional_advice_input = prepare_conditional_advice(
25 |         knowledge_base, questions_answers
26 |     )
27 |     res: ConditionalAdvice = chain.invoke(conditional_advice_input)
28 |     assert res.has_advice is True, f"{res}"
29 |     print("************ ADVICE ***************")
30 |     print(res.to_html())
31 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/conditional_advice_test.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.model.openai_schema import ConditionalAdvice
 2 | from data_questionnaire_agent.test.provider.advice_provider import create_simple_advice
 3 | 
 4 | 
 5 | def test_create_markdown():
 6 |     advice = create_simple_advice()
 7 |     markdown = advice.to_markdown()
 8 |     assert "- Monitor data quality metrics" in markdown
 9 |     print(markdown)
10 |     advice_json = advice.json()
11 |     assert advice_json is not None
12 |     conditional_advice_check = ConditionalAdvice.parse_raw(advice_json)
13 |     assert conditional_advice_check is not None
14 |     assert len(conditional_advice_check.advices) == len(
15 |         advice.advices
16 |     ), "Advices lengths do not match"
17 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/confidence_schema_test.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.model.confidence_schema import (
 2 |     ConfidenceDegree,
 3 |     ConfidenceRating,
 4 | )
 5 | 
 6 | 
 7 | def test_simple_confidence_rating():
 8 |     degree = ConfidenceDegree.high
 9 |     rating = ConfidenceRating(
10 |         reasoning="We know the main problem of the user and the causes well. The only thing missing is more background information about the technological landscpae of the user.",
11 |         rating=degree,
12 |     )
13 |     assert rating is not None
14 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/confidence_service_test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from typing import List
 3 | 
 4 | from data_questionnaire_agent.model.application_schema import (
 5 |     Questionnaire,
 6 | )
 7 | from data_questionnaire_agent.model.confidence_schema import ConfidenceRating
 8 | from data_questionnaire_agent.service.confidence_service import (
 9 |     calculate_confidence_rating,
10 |     prompt_factory_confidence,
11 | )
12 | from data_questionnaire_agent.test.provider.questionnaire_provider import (
13 |     create_questionnaire_2_questions,
14 |     create_questionnaire_3_questions,
15 |     create_questionnaire_4_questions,
16 |     create_questionnaire_4_questions_german,
17 | )
18 | 
19 | 
20 | def test_prompt_factory_confidence():
21 |     prompt_template = prompt_factory_confidence("en")
22 |     assert prompt_template is not None
23 |     assert prompt_template.messages is not None
24 |     assert len(prompt_template.messages) > 0
25 | 
26 | 
27 | def test_calculate_confidence_rating_mediocre():
28 |     eval_questionnaire(create_questionnaire_2_questions(), ["low", "mediocre"])
29 | 
30 | 
31 | def test_calculate_confidence_rating_mediocre_2():
32 |     eval_questionnaire(create_questionnaire_3_questions(), ["mediocre", "medium"])
33 | 
34 | 
35 | def test_calculate_confidence_rating_stronger():
36 |     eval_questionnaire(
37 |         create_questionnaire_4_questions(), ["mediocre", "medium", "high"]
38 |     )
39 | 
40 | 
41 | def test_calculate_confidence_rating_german():
42 |     eval_questionnaire(
43 |         create_questionnaire_4_questions_german(), ["mediocre", "medium", "high"], "de"
44 |     )
45 | 
46 | 
47 | def eval_questionnaire(
48 |     questionnaire: Questionnaire, expected: List[str], language: str = "en"
49 | ):
50 |     confidence_rating: ConfidenceRating = asyncio.run(
51 |         calculate_confidence_rating(questionnaire, "en")
52 |     )
53 |     assert confidence_rating is not None
54 |     assert confidence_rating.rating is not None
55 |     assert confidence_rating.reasoning is not None
56 |     assert confidence_rating.rating in expected
57 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/consultant_service_test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from data_questionnaire_agent.service.consultant_service import (
 4 |     convert_all_consultants,
 5 |     convert_to_markdown,
 6 |     create_structured_consultant_call,
 7 |     prepare_consultant_call,
 8 |     prompt_factory_consultants,
 9 | )
10 | from data_questionnaire_agent.test.provider.advice_provider import create_full_advice1
11 | from data_questionnaire_agent.test.provider.consultant_provider import (
12 |     create_simple_consultant,
13 | )
14 | from data_questionnaire_agent.test.provider.questionnaire_provider import (
15 |     create_questionnaire_7_questions,
16 | )
17 | 
18 | 
19 | def test_convert_to_markdown():
20 |     consultant = create_simple_consultant()
21 |     markdown = convert_to_markdown([consultant])
22 |     assert markdown is not None, "There is no markdown"
23 |     assert "John" in markdown, "John is not in markdown"
24 |     assert "Doe" in markdown, "Doe is not in markdown"
25 |     assert "gmail" in markdown, "gmail is not in markdown"
26 | 
27 | 
28 | def test_convert_all_consultants():
29 |     markdown = asyncio.run(convert_all_consultants())
30 |     assert markdown is not None, "there is not markdown"
31 |     from pathlib import Path
32 | 
33 |     Path("/tmp/consultants.md").write_text(markdown)
34 | 
35 | 
36 | def test_prompt_factory_consultants():
37 |     prompt_template = prompt_factory_consultants("en")
38 |     assert prompt_template is not None, "There is no prompt template"
39 |     prompt_template.config_schema is not None, "There is no configuration schema"
40 | 
41 | 
42 | def test_create_structured_consultant_call():
43 |     runnable_sequence = create_structured_consultant_call("en")
44 |     assert runnable_sequence is not None, "There is no runnable sequence"
45 | 
46 | 
47 | def provide_dummy_data():
48 |     questionnaire = create_questionnaire_7_questions()
49 |     advice = create_full_advice1()
50 |     prompt_data = asyncio.run(prepare_consultant_call(questionnaire, advice))
51 |     assert prompt_data is not None, "There is no prompt data."
52 |     assert (
53 |         prompt_data["questions_answers"] is not None
54 |     ), "There are no question and answers"
55 |     return prompt_data
56 | 
57 | 
58 | def test_prepare_consultant_call():
59 |     provide_dummy_data()
60 | 
61 | 
62 | def test_call_consultant_evaluation():
63 |     prompt_data = provide_dummy_data()
64 |     runnable_sequence = create_structured_consultant_call("en")
65 |     consultant_ratings = runnable_sequence.invoke(prompt_data)
66 |     assert consultant_ratings is not None, "There are no consultant ratings"
67 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/graph_service_test.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.service.graph_service import generate_analyzed_ontology
 2 | from data_questionnaire_agent.test.provider.ontology_provider import create_ontology
 3 | 
 4 | 
 5 | def test_generate_analyzed_ontology():
 6 |     ontology = create_ontology()
 7 |     analyzed_ontology = generate_analyzed_ontology(ontology)
 8 |     assert analyzed_ontology is not None
 9 |     assert analyzed_ontology.relationships is not None
10 |     assert analyzed_ontology.degree_centrality is not None
11 |     assert analyzed_ontology.betweenness_centrality is not None
12 |     assert len(analyzed_ontology.degree_centrality) == len(
13 |         analyzed_ontology.betweenness_centrality
14 |     )
15 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/graphrag_service_test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from data_questionnaire_agent.service.graphrag_service import graphrag_context
 4 | 
 5 | 
 6 | # Test requires external server to be running
 7 | def test_graphrag_context():
 8 |     result = asyncio.run(graphrag_context("What are the main topics?"))
 9 |     assert len(result) > 100, "Length should be at least 100 characters"
10 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/initial_question_service_test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from data_questionnaire_agent.log_init import logger
 4 | from data_questionnaire_agent.model.openai_schema import ResponseQuestions
 5 | from data_questionnaire_agent.service.initial_question_service import (
 6 |     chain_factory_initial_question,
 7 |     prepare_initial_question,
 8 | )
 9 | from data_questionnaire_agent.service.knowledge_base_service import fetch_context
10 | from data_questionnaire_agent.toml_support import get_prompts_object
11 | 
12 | 
13 | def test_initial_question():
14 |     language = "en"
15 |     initial_question = get_prompts_object(language).questionnaire["initial"]["question"]
16 |     assert initial_question is not None
17 | 
18 |     answer = "Data Quality"
19 |     search_res = asyncio.run(fetch_context(answer))
20 |     input = prepare_initial_question(
21 |         question=initial_question,
22 |         answer=answer,
23 |         questions_per_batch=1,
24 |         knowledge_base=search_res,
25 |     )
26 |     chain = chain_factory_initial_question("en")
27 |     res: dict = chain.invoke(input)
28 |     assert res is not None
29 |     response_questions: ResponseQuestions = res["function"]
30 | 
31 |     logger.info("Results: ")
32 |     logger.info(response_questions)
33 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/jwt_token_service_test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from data_questionnaire_agent.service.jwt_token_service import (
 4 |     decode_token,
 5 |     generate_token_batch,
 6 |     generate_token_batch_file,
 7 | )
 8 | from data_questionnaire_agent.test.provider.jwt_token_data_provider import (
 9 |     generate_token_data,
10 | )
11 | 
12 | 
13 | def test_decode_token():
14 |     token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIwMUo3RzRHMlEyRUVTWTY2Q1BHUjZFR042SCIsIm5hbWUiOiJHaWwiLCJpYXQiOjE3MjYwNDQ3MDMsImV4cCI6MTcyNjA0NDc2M30.2IJ_DoSQ8hyU4DU3lgXZduYPvoaAgxP1WKqTHbphI8Y"
15 |     decoded = decode_token(token)
16 |     assert decoded is not None, "Decoded should not be none."
17 | 
18 | 
19 | def test_generate_token_batch():
20 |     jwt_token_data = generate_token_data()
21 |     amount = 5
22 |     jwt_tokens = asyncio.run(generate_token_batch(jwt_token_data, amount))
23 |     assert len(jwt_tokens) == amount
24 |     for jwt_token in jwt_tokens:
25 |         decoded = asyncio.run(decode_token(jwt_token.token))
26 |         assert decoded is not None, "Decoded should not be none."
27 | 
28 | 
29 | def test_generate_token_batch_file():
30 |     jwt_token_data = generate_token_data()
31 |     amount = 5
32 |     file = asyncio.run(generate_token_batch_file(jwt_token_data, amount))
33 |     assert file.exists(), f"Cannot find file {file}"
34 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/mail_sender_test.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.service.mail_sender import (
 2 |     validate_address,
 3 | )
 4 | from data_questionnaire_agent.test.provider.multipart_provider import (
 5 |     create_dummy_multipart,
 6 | )
 7 | 
 8 | 
 9 | def test_validate_address_ok():
10 |     assert validate_address("john.doe@gmail.com")
11 |     assert validate_address("mary.do@protonmail.com")
12 | 
13 | 
14 | def test_validate_address_not_ok():
15 |     assert not validate_address("john.doegmail.com")
16 |     assert not validate_address("mary.doprotonmail.com")
17 | 
18 | 
19 | def test_create_attachment_email():
20 |     multipart = create_dummy_multipart()
21 |     assert multipart is not None
22 |     assert len(multipart.as_string()) > 0
23 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/ontology_service_test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from data_questionnaire_agent.model.ontology_schema import Ontology
 4 | from data_questionnaire_agent.service.ontology_service import (
 5 |     create_structured_question_call,
 6 |     prepare_ontology_chain_call,
 7 | )
 8 | from data_questionnaire_agent.test.provider.advice_provider import (
 9 |     create_advice_with_questionnaire,
10 | )
11 | 
12 | 
13 | def test_chain_factory_ontology():
14 |     chain = create_structured_question_call("en")
15 |     assert chain is not None
16 |     conditional_advice, questionnaire = create_advice_with_questionnaire()
17 |     assert conditional_advice is not None
18 |     assert questionnaire is not None
19 |     call_params = prepare_ontology_chain_call(questionnaire, conditional_advice)
20 |     res = asyncio.run(chain.ainvoke(call_params))
21 |     assert res is not None
22 |     assert isinstance(res, Ontology)
23 |     assert len(res.relationships) > 0
24 |     with open("relationships.json", "w") as f:
25 |         f.write(res.json())
26 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/persistence_service_consultants_async_test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from data_questionnaire_agent.service.persistence_service_consultants_async import (
 4 |     delete_consultant,
 5 |     delete_session_consultant_ratings,
 6 |     delete_skill,
 7 |     read_consultants,
 8 |     read_session_consultant_ratings,
 9 |     save_consultant,
10 |     save_session_consultant_ratings,
11 |     upsert_skill,
12 | )
13 | from data_questionnaire_agent.test.provider.consultant_provider import (
14 |     create_consultant_rating,
15 |     create_simple_consultant,
16 | )
17 | 
18 | if __name__ == "__main__":
19 | 
20 |     def test_upsert_skill():
21 |         skill = "testing123"
22 |         count = asyncio.run(upsert_skill(skill))
23 |         assert count == 1, "Count is expected to be 1"
24 |         count = asyncio.run(delete_skill(skill))
25 |         assert count == 1, "Delete count is expected to be 1"
26 | 
27 |     async def test_save_consultant():
28 |         consultant = create_simple_consultant()
29 |         await save_consultant(consultant)
30 |         consultants = await read_consultants()
31 |         assert len(consultants) > 0, "There should be at least one consultant"
32 |         await delete_consultant(consultant)
33 | 
34 |     async def test_save_session_consultants():
35 |         dummy_session_id = "dummy1"
36 |         consultant_ratings = create_consultant_rating()
37 |         counter = await save_session_consultant_ratings(
38 |             dummy_session_id, consultant_ratings
39 |         )
40 |         assert counter == 1
41 |         read_crs = await read_session_consultant_ratings(dummy_session_id)
42 |         assert len(read_crs.consultant_ratings) == len(
43 |             consultant_ratings.consultant_ratings
44 |         )
45 |         await delete_session_consultant_ratings(dummy_session_id)
46 | 
47 |     # test_upsert_skill()
48 |     # asyncio.run(test_save_consultant())
49 |     asyncio.run(test_save_session_consultants())
50 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/persistence_service_questions_async_test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from data_questionnaire_agent.service.persistence_service_questions_async import (
 4 |     delete_question,
 5 |     insert_question,
 6 | )
 7 | 
 8 | if __name__ == "__main__":
 9 | 
10 |     def test_insert_question():
11 |         question = "Which is the meaning of life?"
12 |         id = asyncio.run(
13 |             insert_question(
14 |                 question,
15 |                 "en",
16 |                 [
17 |                     {
18 |                         "img_src": "",
19 |                         "img_alt": "",
20 |                         "title": "Test Suggestion Title",
21 |                         "main_text": "This is the main text of the suggestion",
22 |                         "svg_image": "",
23 |                     }
24 |                 ],
25 |             )
26 |         )
27 |         print("Inserted question id", id)
28 |         assert id > 0, "Id should be bigger than 0"
29 |         return id
30 | 
31 |     # I created seperate functions, so i can test them separately if needed.
32 | 
33 |     def test_delete_question(id: int):
34 |         count = asyncio.run(delete_question(id))
35 |         assert count == 1, "Delete count is expected to be 1"
36 |         print("Delete count", count)
37 | 
38 |     def test_insert_delete():
39 |         question_id = test_insert_question()
40 |         assert question_id is not None, "The question identifier is none"
41 |         # You can also pass id manually to test_delete_question function
42 |         test_delete_question(question_id)
43 | 
44 |     test_insert_delete()
45 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/provider/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/data_questionnaire_agent/test/provider/__init__.py


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/provider/confidence_provider.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.model.confidence_schema import (
 2 |     ConfidenceDegree,
 3 |     ConfidenceRating,
 4 | )
 5 | 
 6 | 
 7 | def create_confidence_rating() -> ConfidenceRating:
 8 |     return ConfidenceRating(
 9 |         rating=ConfidenceDegree.low,
10 |         reasoning="""Based on the provided information, I only know the main problem of the customer, which is poor data quality. However, I do not have detailed information about the causes of the problem, the technological landscape, or the data governance strategies of the customer. Therefore, my confidence in giving advice is low.""",
11 |     )
12 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/provider/consultant_provider.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from consultant_info_generator.model import Company, Consultant, Experience, Skill
 4 | 
 5 | from data_questionnaire_agent.model.consultant_rating import (
 6 |     AnalystRating,
 7 |     ConsultantRating,
 8 |     ConsultantRatings,
 9 | )
10 | 
11 | 
12 | def create_simple_consultant() -> Consultant:
13 |     company = Company(name="Onepoint Consulting Ltd")
14 |     experience1 = Experience(
15 |         location="London, UK",
16 |         title="Enterprise Architect",
17 |         company=company,
18 |         start=datetime.datetime(2023, 1, 1),
19 |         end=None,
20 |     )
21 |     consultant = Consultant(
22 |         given_name="John",
23 |         surname="Doe",
24 |         email="john.doe@gmail.com",
25 |         cv="General blabla",
26 |         industry_name="IT",
27 |         geo_location="London",
28 |         linkedin_profile_url="john-doe",
29 |         experiences=[experience1],
30 |         skills=[Skill(name="Data Science"), Skill(name="Enterprise Architecture")],
31 |     )
32 |     return consultant
33 | 
34 | 
35 | def create_consultant_rating() -> ConsultantRatings:
36 |     consultant_rating = ConsultantRating(
37 |         analyst_name="Alexander Polev",
38 |         analyst_linkedin_url="https://www.linkedin.com/in/alexander-polev-cto",
39 |         reasoning="Alexander Polev is an excellent choice, because of this and that.",
40 |         rating=AnalystRating.suitable,
41 |     )
42 |     return ConsultantRatings(consultant_ratings=[consultant_rating])
43 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/provider/jwt_token_data_provider.py:
--------------------------------------------------------------------------------
1 | from data_questionnaire_agent.model.jwt_token import JWTTokenData
2 | 
3 | 
4 | def generate_token_data() -> JWTTokenData:
5 |     return JWTTokenData(
6 |         name="anonymous", email="anonymous@test.com", time_delta_minutes=None
7 |     )
8 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/provider/knowledge_base_provider.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from data_questionnaire_agent.config import cfg
 4 | 
 5 | 
 6 | def read_file(file_name: str) -> str:
 7 |     with open(cfg.raw_text_folder / file_name, "r", encoding="utf-8") as f:
 8 |         return f.read()
 9 | 
10 | 
11 | # provide_data_quality throw an error if the file does not exist
12 | def provide_data_quality() -> str:
13 |     # Check if "data quality.txt" exists in the raw_text_folder, if not, check if "AboutRefugees.txt" exists
14 |     # return read_file("data quality.txt" if not "refugee" in str(cfg.raw_text_folder) else "AboutRefugees.txt")
15 | 
16 |     raw_text_folder = Path(cfg.raw_text_folder)
17 |     assert raw_text_folder.exists()
18 |     if (raw_text_folder / "data quality.txt").exists():
19 |         return read_file("data quality.txt")
20 |     else:
21 |         return read_file("AboutRefugees.txt")
22 | 
23 | 
24 | def provide_data_ops() -> str:
25 |     # return read_file("dataops.txt" if not "refugee" in str(cfg.raw_text_folder) else "AsylumInAustria.txt")
26 |     raw_text_folder = Path(cfg.raw_text_folder)
27 |     assert raw_text_folder.exists()
28 |     if (cfg.raw_text_folder / "dataops.txt").exists():
29 |         return read_file("dataops.txt")
30 |     else:
31 |         return read_file("AsylumInAustria.txt")
32 | 
33 | 
34 | def provide_knowledge_base() -> str:
35 |     return f"{provide_data_quality()}\n\n{provide_data_ops()}"
36 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/provider/multipart_provider.py:
--------------------------------------------------------------------------------
 1 | from email.mime.multipart import MIMEMultipart
 2 | 
 3 | from data_questionnaire_agent.config import cfg
 4 | from data_questionnaire_agent.model.mail_data import Email
 5 | from data_questionnaire_agent.service.mail_sender import create_attachment_email
 6 | 
 7 | 
 8 | def create_dummy_email() -> Email:
 9 |     return Email(
10 |         recipient="john.smith@gmail.com",
11 |         subject="Test",
12 |         html_body="<p>Testing</p>",
13 |         files=[cfg.project_root / "README.md"],
14 |     )
15 | 
16 | 
17 | def create_dummy_email_2() -> Email:
18 |     return Email(
19 |         recipient="gil.fernandes@gmail.com",
20 |         subject="Test",
21 |         html_body="<p>Testing</p>",
22 |         files=[cfg.project_root / "README.md"],
23 |     )
24 | 
25 | 
26 | def create_dummy_multipart() -> MIMEMultipart:
27 |     return create_attachment_email(create_dummy_email())
28 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/provider/ontology_provider.py:
--------------------------------------------------------------------------------
  1 | from data_questionnaire_agent.model.ontology_schema import Ontology
  2 | 
  3 | 
  4 | def create_ontology():
  5 |     ontology_json = {
  6 |         "relationships": [
  7 |             {
  8 |                 "source": "Poor data quality",
  9 |                 "relationship": "leads to",
 10 |                 "target": "incorrect insights",
 11 |             },
 12 |             {
 13 |                 "source": "Poor data quality",
 14 |                 "relationship": "leads to",
 15 |                 "target": "poor decision-making",
 16 |             },
 17 |             {
 18 |                 "source": "Customer duplicates",
 19 |                 "relationship": "is a challenge in",
 20 |                 "target": "data quality",
 21 |             },
 22 |             {
 23 |                 "source": "Manual checks",
 24 |                 "relationship": "used for",
 25 |                 "target": "identifying duplicates",
 26 |             },
 27 |             {
 28 |                 "source": "Manual checks",
 29 |                 "relationship": "leads to",
 30 |                 "target": "time-consuming process",
 31 |             },
 32 |             {"source": "Manual checks", "relationship": "leads to", "target": "errors"},
 33 |             {
 34 |                 "source": "CRM systems",
 35 |                 "relationship": "contributes to",
 36 |                 "target": "duplicate customer records",
 37 |             },
 38 |             {
 39 |                 "source": "E-commerce platforms",
 40 |                 "relationship": "contributes to",
 41 |                 "target": "duplicate customer records",
 42 |             },
 43 |             {
 44 |                 "source": "Customer service databases",
 45 |                 "relationship": "contributes to",
 46 |                 "target": "duplicate customer records",
 47 |             },
 48 |             {
 49 |                 "source": "Different systems",
 50 |                 "relationship": "have",
 51 |                 "target": "varying data formats and standards",
 52 |             },
 53 |             {
 54 |                 "source": "Automated Deduplication Tools",
 55 |                 "relationship": "improves",
 56 |                 "target": "data quality",
 57 |             },
 58 |             {
 59 |                 "source": "Automated Deduplication Tools",
 60 |                 "relationship": "improves",
 61 |                 "target": "operational efficiency",
 62 |             },
 63 |             {
 64 |                 "source": "Standardised Data Formats and Naming Conventions",
 65 |                 "relationship": "reduces",
 66 |                 "target": "duplicates",
 67 |             },
 68 |             {
 69 |                 "source": "Data Governance Policies",
 70 |                 "relationship": "manages",
 71 |                 "target": "data quality",
 72 |             },
 73 |             {
 74 |                 "source": "Data Profiling and Cleansing Tools",
 75 |                 "relationship": "identifies and corrects",
 76 |                 "target": "inconsistencies, inaccuracies, and duplicates",
 77 |             },
 78 |             {
 79 |                 "source": "Data Quality Metrics",
 80 |                 "relationship": "detects and addresses",
 81 |                 "target": "data quality issues",
 82 |             },
 83 |             {
 84 |                 "source": "Manual Processes",
 85 |                 "relationship": "leads to",
 86 |                 "target": "data quality problems",
 87 |             },
 88 |             {
 89 |                 "source": "Ignoring Data Governance",
 90 |                 "relationship": "results in",
 91 |                 "target": "poor data quality",
 92 |             },
 93 |             {
 94 |                 "source": "Overlooking Data Standardisation",
 95 |                 "relationship": "leads to",
 96 |                 "target": "data quality issues",
 97 |             },
 98 |             {
 99 |                 "source": "Automating deduplication and standardising data formats",
100 |                 "relationship": "leads to",
101 |                 "target": "improved operational efficiency",
102 |             },
103 |             {
104 |                 "source": "High-quality, consistent data",
105 |                 "relationship": "enables",
106 |                 "target": "better-informed decision-making",
107 |             },
108 |             {
109 |                 "source": "Accurate and up-to-date customer records",
110 |                 "relationship": "leads to",
111 |                 "target": "increased customer satisfaction",
112 |             },
113 |         ]
114 |     }
115 |     ontology = Ontology.parse_obj(ontology_json)
116 |     return ontology
117 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/provider/question_answer_provider.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.model.application_schema import QuestionAnswer
 2 | 
 3 | 
 4 | def create_question_answer_with_possible_answers() -> QuestionAnswer:
 5 |     return QuestionAnswer(
 6 |         id=None,
 7 |         question="What is the meaning of life?",
 8 |         answer="The meaning of life is 42",
 9 |         clarification=[],
10 |         possible_answers=["The meaning of life is 43", "The meaning of life is 44"],
11 |     )
12 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/provider/question_answers_provider.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import List
 3 | 
 4 | from data_questionnaire_agent.config import cfg
 5 | 
 6 | 
 7 | def questionnaire_str_adapter(questions: List[str], answers: List[str]) -> str:
 8 |     return "\n\n".join([f"{q}\n{a}" for q, a in zip(questions, answers)])
 9 | 
10 | 
11 | def provide_data_silo_questionnaire() -> str:
12 |     questions = [
13 |         "Which area of your data ecosystem are you most concerned about?",
14 |         "What steps have you taken so far to address the issue of data silos in your organization?",
15 |         "How are you currently sharing data between different departments, lines of business and subsidiaries in your organization?",
16 |         "Considering the challenges you've faced with data silos and the partial success of your service-driven architecture, have you considered implementing a Data Mesh architecture to decentralize data ownership and accountability, thereby reducing data silos?",
17 |         "You mentioned that not all departments have bought into the idea of using REST services for data sharing, particularly the billing department. What are the specific concerns or challenges they have raised, and how might these be addressed to improve data integration across your organization?",
18 |     ]
19 |     answers = [
20 |         "Data silos",
21 |         "We have been trying to create a service driven architecture to share data across systems using Mulesoft.",
22 |         "We have a series of well documented REST services that allow sharing data between departments. However not all departments have bought into this, specially the billing department.",
23 |         "Yes, but we are facing political issues with some departments not wanting to share their data.",
24 |         "Billing data is related to personal data and money flows and should therefore only be accessed by the department itself or selected controllers. This is the main concern.",
25 |     ]
26 |     return questionnaire_str_adapter(questions, answers)
27 | 
28 | 
29 | def provide_missing_documents_questionnaire() -> str:
30 |     questions = [
31 |         "What challenges are you currently facing as a refugee?",
32 |         "Have you reported the loss of your documents to the local authorities?",
33 |         "Have you obtained the confirmation of the theft report from the local police, which is required to apply for the reissue of your ID card at the passport authority center?",
34 |         "Do you need assistance in obtaining a replacement for your lost identity card?",
35 |     ]
36 |     answers = [
37 |         "I lost my documents",
38 |         "Yes, I did.",
39 |         "Yes, I have obtained it.",
40 |         "Yes, I need help with the replacement process.",
41 |     ]
42 |     return questionnaire_str_adapter(questions, answers)
43 | 
44 | 
45 | def provide_dummy_questionnaire() -> str:
46 |     raw_text_folder = Path(cfg.raw_text_folder)
47 |     if (raw_text_folder / "data quality.txt").exists():
48 |         return provide_data_silo_questionnaire()
49 |     else:
50 |         return provide_missing_documents_questionnaire()
51 | 
52 | 
53 | def provide_incomplete_questionnaire() -> str:
54 |     questions = ["Which area of your data ecosystem are you most concerned about?"]
55 |     answers = ["Data quality"]
56 |     return questionnaire_str_adapter(questions, answers)
57 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/provider/questionnaire_status_provider.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | from data_questionnaire_agent.model.questionnaire_status import QuestionnaireStatus
 4 | 
 5 | 
 6 | def create_simple() -> QuestionnaireStatus:
 7 |     return QuestionnaireStatus(
 8 |         session_id="12312231231",
 9 |         question="What is the meaning of this?",
10 |         answer="42",
11 |         final_report=False,
12 |         created_at=datetime.now(),
13 |         updated_at=datetime.now(),
14 |     )
15 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/provider/session_configuration_provider.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.model.session_configuration import (
 2 |     DEFAULT_SESSION_STEPS,
 3 |     SESSION_STEPS_CONFIG_KEY,
 4 |     SessionConfigurationEntry,
 5 | )
 6 | 
 7 | 
 8 | def create_session_configuration():
 9 |     return SessionConfigurationEntry(
10 |         session_id="test_id",
11 |         config_key=SESSION_STEPS_CONFIG_KEY,
12 |         config_value=str(DEFAULT_SESSION_STEPS),
13 |     )
14 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/provider/session_properties_provider.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.model.session_configuration import (
 2 |     ChatType,
 3 |     SessionProperties,
 4 | )
 5 | 
 6 | 
 7 | def create_session_properties() -> SessionProperties:
 8 |     return SessionProperties(
 9 |         session_steps=6, session_language="en", chat_type=ChatType.DIVERGING
10 |     )
11 | 
12 | 
13 | def create_session_properties_to_the_point() -> SessionProperties:
14 |     return SessionProperties(
15 |         session_steps=6, session_language="en", chat_type=ChatType.TO_THE_POINT
16 |     )
17 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/question_answer_test.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.model.application_schema import QuestionAnswer
 2 | 
 3 | 
 4 | def test_validate_simple_question_answer():
 5 |     question_answer = QuestionAnswer(
 6 |         id=None, question="What is the meaning of life?", answer="42", clarification=""
 7 |     )
 8 |     assert question_answer.possible_answers is not None
 9 |     assert len(question_answer.possible_answers) == 0
10 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/questionnaire_server_test.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import AsyncMock, patch
 2 | 
 3 | import pytest
 4 | from aiohttp import web
 5 | 
 6 | from data_questionnaire_agent.model.jwt_token import JWTTokenData
 7 | from data_questionnaire_agent.server.questionnaire_server import routes
 8 | 
 9 | CORS_HEADERS = {"Access-Control-Allow-Origin": "*"}
10 | 
11 | 
12 | @pytest.fixture
13 | def client(aiohttp_client):
14 |     app = web.Application()
15 |     app.add_routes(routes)
16 |     return aiohttp_client(app)
17 | 
18 | 
19 | @pytest.mark.asyncio
20 | @patch("data_questionnaire_agent.server.questionnaire_server.generate_jwt_token")
21 | async def test_generate_jwt_token_success(mock_generate_token, client):
22 |     # Mock the generate_token function
23 |     mock_generate_token.return_value = AsyncMock(
24 |         return_value=JWTTokenData(
25 |             name="John Doe", email="john@example.com", time_delta_minutes=30
26 |         )
27 |     )
28 | 
29 |     # Make a POST request with valid JSON
30 |     json_payload = {
31 |         "name": "John Doe",
32 |         "email": "john@example.com",
33 |         "time_delta_minutes": 30,
34 |     }
35 |     c = await client
36 |     resp = await c.post("/gen_jwt_token", json=json_payload)
37 | 
38 |     # Validate response
39 |     assert resp.status == 200
40 |     body = await resp.json()
41 |     assert body["token"] is not None
42 |     assert body["email"] == "john@example.com"
43 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/questionnaire_status_test.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.model.questionnaire_status import QuestionnaireStatus
 2 | from data_questionnaire_agent.test.provider.questionnaire_status_provider import (
 3 |     create_simple,
 4 | )
 5 | 
 6 | 
 7 | def test_has_advice_questionnaire():
 8 |     qs: QuestionnaireStatus = create_simple()
 9 |     assert qs is not None
10 |     difference = qs.updated_at - qs.created_at
11 |     assert difference.seconds < 2
12 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/report_aggregation_main_service_test.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Dict
 3 | 
 4 | import pandas as pd
 5 | 
 6 | from data_questionnaire_agent.config import cfg
 7 | from data_questionnaire_agent.model.report_aggregation_schema import (
 8 |     ReportDocumentClassificationContainer,
 9 |     ReportItemCount,
10 | )
11 | from data_questionnaire_agent.service.report_aggregation_main_service import (
12 |     convert_to_dataframe,
13 |     create_multiple_excel,
14 |     group_reports,
15 |     prompt_factory_document_classifier_prompt,
16 |     prompt_factory_keyword_extraction_prompt,
17 | )
18 | 
19 | 
20 | def test_prompt_factory_keyword_extraction_prompt():
21 |     template = prompt_factory_keyword_extraction_prompt(language="en")
22 |     assert template is not None, "Template should not be none"
23 | 
24 | 
25 | def test_prompt_factory_document_classifier_prompt():
26 |     template = prompt_factory_document_classifier_prompt(language="en")
27 |     assert template is not None, "Template should not be none"
28 | 
29 | 
30 | def test_group_reports():
31 |     sample_file = (
32 |         cfg.project_root
33 |         / "data/report_doc_classification_01JDYZ52AD3KYM2Q7VF5NHV3ER.json"
34 |     )
35 |     assert (sample_file).exists()
36 |     document_classification = ReportDocumentClassificationContainer.model_validate_json(
37 |         sample_file.read_text()
38 |     )
39 |     report_item_count = group_reports(document_classification)
40 |     assert report_item_count is not None
41 |     assert report_item_count.problem_count is not None
42 |     assert report_item_count.problem_area_count is not None
43 |     assert report_item_count.recommendation_count is not None
44 | 
45 | 
46 | def generate_df_dict() -> Dict[str, pd.DataFrame]:
47 |     sample_file = (
48 |         cfg.project_root / "data/report_item_count_01JDZBAA5T6Z0MHZ10KY0HF938.json"
49 |     )
50 |     assert (sample_file).exists()
51 |     report_item_count = ReportItemCount.model_validate_json(sample_file.read_text())
52 |     return convert_to_dataframe(report_item_count)
53 | 
54 | 
55 | def test_convert_to_dataframe():
56 |     df_dict = generate_df_dict()
57 |     assert df_dict is not None
58 |     assert df_dict["problem_df"] is not None
59 |     assert len(df_dict["problem_df"]["df"]) > 0
60 |     assert df_dict["problem_area_df"] is not None
61 |     assert len(df_dict["problem_area_df"]["df"]) > 0
62 | 
63 | 
64 | def test_create_multiple_excel():
65 |     df_dict = generate_df_dict()
66 |     assert df_dict is not None
67 |     excel_path = Path("./report_aggregation_counts.xlsx")
68 |     create_multiple_excel(df_dict, excel_path)
69 |     assert excel_path.exists(), "Cannot find multiple excel report"
70 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/report_aggregation_service_test.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | from typing import List
 3 | 
 4 | from data_questionnaire_agent.config import cfg
 5 | from data_questionnaire_agent.model.questionnaire_status import QuestionnaireStatus
 6 | from data_questionnaire_agent.service.report_aggregation_service import convert_to_str
 7 | from data_questionnaire_agent.service.similarity_search import num_tokens_from_string
 8 | 
 9 | 
10 | def test_convert_to_str():
11 |     questionnaire_pkl = cfg.project_root / "data/questionnaire.pkl"
12 |     assert questionnaire_pkl.exists()
13 |     with open(questionnaire_pkl, "rb") as f:
14 |         questionnaire_data = pickle.load(f)
15 |         questionnaire_statuses: List[QuestionnaireStatus] = convert_to_str(
16 |             questionnaire_data
17 |         )
18 |         assert len(questionnaire_statuses) > 0
19 |         print(num_tokens_from_string("\n".join(questionnaire_statuses)))
20 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/report_aggregation_summarization_service_test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from data_questionnaire_agent.config import cfg
 4 | from data_questionnaire_agent.service.report_aggregation_summarization_service import (
 5 |     aexecute_summarization_batch,
 6 |     create_summarization_call,
 7 |     prompt_factory_summarization_prompt,
 8 | )
 9 | 
10 | 
11 | def test_prompt_factory_summarization_prompt():
12 |     chat_template = prompt_factory_summarization_prompt("en")
13 |     assert chat_template is not None, "Chat template cannot be none"
14 |     res = chat_template.format(full_questionnaire="Bla")
15 |     assert res is not None, "No result from formatting"
16 | 
17 | 
18 | def test_create_summarization_call():
19 |     runnable = create_summarization_call("en")
20 |     assert runnable is not None, "Runnable is none"
21 | 
22 | 
23 | def test_aexecute_summarization_batch():
24 |     files = [
25 |         "data/sample_questionnaire1.md",
26 |         "data/sample_questionnaire2.md",
27 |         "data/sample_questionnaire3.md",
28 |     ]
29 |     texts = [(cfg.project_root / f).read_text(encoding="utf-8") for f in files]
30 |     summaries = asyncio.run(aexecute_summarization_batch(texts))
31 |     assert summaries is not None
32 |     assert len(summaries) == len(files)
33 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/report_enhancement_service_test.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.service.report_enhancement_service import (
 2 |     replace_bold_markdown,
 3 |     replace_markdown_bold_with_links,
 4 | )
 5 | 
 6 | 
 7 | def provider_string():
 8 |     return "This is some **bold** content with some **important** message as you can imagine."
 9 | 
10 | 
11 | def test_replace_bold_markdown():
12 |     test_str = provider_string()
13 |     res = replace_bold_markdown(test_str)
14 |     assert (
15 |         res
16 |         == "This is some <b>bold</b> content with some <b>important</b> message as you can imagine."
17 |     ), f"Unexpected output: {res}"
18 | 
19 | 
20 | def test_replace_markdown_bold_with_links():
21 |     test_str = provider_string()
22 |     res = replace_markdown_bold_with_links(test_str)
23 |     assert (
24 |         res
25 |         == "This is some **[bold](https://www.google.com/search?q=bold)** content with some **[important](https://www.google.com/search?q=important)** message as you can imagine."
26 |     ), f"Unexpected output: {res}"
27 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/server_message_test.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.model.server_model import ServerMessage
 2 | 
 3 | 
 4 | def test_simple_server_message():
 5 |     question = "test"
 6 |     session_id = "1231231231231"
 7 |     server_message = ServerMessage(
 8 |         question=question, session_id=session_id, clarification=None, question_id=None
 9 |     )
10 |     server_message_str = server_message.json()
11 |     assert question in server_message_str
12 |     assert session_id in server_message_str
13 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/session_configuration_test.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.model.session_configuration import (
 2 |     SESSION_STEPS_CONFIG_KEY,
 3 |     SESSION_STEPS_LANGUAGE_KEY,
 4 |     ChatType,
 5 |     chat_type_factory,
 6 |     create_session_configurations,
 7 | )
 8 | from data_questionnaire_agent.test.provider.session_configuration_provider import (
 9 |     create_session_configuration,
10 | )
11 | from data_questionnaire_agent.test.provider.session_properties_provider import (
12 |     create_session_properties,
13 | )
14 | 
15 | 
16 | def test_session_configuration():
17 |     session_configuration = create_session_configuration()
18 |     assert session_configuration.config_key == SESSION_STEPS_CONFIG_KEY
19 | 
20 | 
21 | def test_chat_type_factory():
22 |     assert chat_type_factory(ChatType.DIVERGING.value) == ChatType.DIVERGING
23 |     assert chat_type_factory(ChatType.TO_THE_POINT.value) == ChatType.TO_THE_POINT
24 |     assert chat_type_factory("bla") == ChatType.DIVERGING
25 | 
26 | 
27 | def test_create_session_configurations():
28 |     session_properties = create_session_properties()
29 |     config_entries = create_session_configurations(
30 |         "12345", session_properties, "hi there"
31 |     )
32 |     assert len(config_entries) == 4
33 |     assert config_entries[0].config_key == SESSION_STEPS_CONFIG_KEY
34 |     assert config_entries[1].config_key == SESSION_STEPS_LANGUAGE_KEY
35 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/test/toml_support_test.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.toml_support import DEFAULT_LANGUAGE, get_prompts
 2 | 
 3 | 
 4 | def test_english_prompt():
 5 |     prompts = get_prompts(DEFAULT_LANGUAGE)
 6 |     assert prompts is not None, "Could not find prompts"
 7 |     questionnaire = prompts["questionnaire"]
 8 |     assert questionnaire is not None, "Cannot find questionnaire"
 9 |     initial = questionnaire["initial"]
10 |     assert initial is not None, "Cannot find initial"
11 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/toml_support.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import tomli
 4 | 
 5 | from data_questionnaire_agent.config import cfg
 6 | from data_questionnaire_agent.log_init import logger
 7 | 
 8 | 
 9 | def read_toml(file: Path) -> dict:
10 |     with open(file, "rb") as f:
11 |         return tomli.load(f)
12 | 
13 | 
14 | # Need to add a language parameter.
15 | # Pick the right file based on the language parameter.
16 | # Default to English if the language is not supported.
17 | 
18 | DEFAULT_LANGUAGE = "en"
19 | SUPPORTED_LANGUAGES = ["en", "de"]
20 | 
21 | 
22 | def read_prompts_toml(language: str = DEFAULT_LANGUAGE) -> dict:
23 |     if language not in SUPPORTED_LANGUAGES:
24 |         logger.warn(
25 |             f"Warning: language {language} not supported. Using default language."
26 |         )
27 |         language = DEFAULT_LANGUAGE
28 | 
29 |     logger.warn(f"Reading prompts from prompts_{language}.toml")
30 |     return read_toml(cfg.project_root / f"prompts_{language}.toml")
31 | 
32 | 
33 | prompts_language = {}
34 | for lang in SUPPORTED_LANGUAGES:
35 |     prompts_language[lang] = read_prompts_toml(lang)
36 | 
37 | 
38 | def get_prompts(language: str = DEFAULT_LANGUAGE) -> dict:
39 |     if language in prompts_language:
40 |         return prompts_language[language]
41 |     return prompts_language[DEFAULT_LANGUAGE]
42 | 
43 | 
44 | def get_prompts_object(language: str = DEFAULT_LANGUAGE) -> object:
45 |     prompts = get_prompts(language)
46 |     return objectview(prompts)
47 | 
48 | 
49 | class objectview(object):
50 |     def __init__(self, d):
51 |         self.__dict__ = d
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     from data_questionnaire_agent.log_init import logger
56 | 
57 |     def test_prompts(language: str):
58 |         prompts = get_prompts(language)
59 |         assert prompts is not None, "Could not find prompts"
60 |         questionnaire = prompts["questionnaire"]
61 |         assert questionnaire is not None, "Cannot find questionnaire"
62 |         initial = questionnaire["initial"]
63 |         assert initial is not None, "Cannot find initial"
64 | 
65 |     def english_prompt_test():
66 |         test_prompts(DEFAULT_LANGUAGE)
67 | 
68 |     def farsi_prompt_test():
69 |         test_prompts("fa")
70 | 
71 |     english_prompt_test()
72 |     farsi_prompt_test()
73 | 
74 |     prompts_object = get_prompts("fa")
75 |     print(type(prompts_object))
76 |     print(prompts_object)
77 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/translation.py:
--------------------------------------------------------------------------------
 1 | import i18n
 2 | from data_questionnaire_agent.config import cfg
 3 | 
 4 | i18n.load_path.append(cfg.translation_path)
 5 | 
 6 | 
 7 | def t(key: str, **kwargs):
 8 |     return i18n.t(f"messages.{key}", **kwargs)
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     print(
13 |         t(
14 |             "A big thank you for completing a session with",
15 |             name=cfg.product_title,
16 |             locale="de",
17 |         )
18 |     )
19 |     print(t("db_insert_failed", locale="en"))
20 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/ui/advice_processor.py:
--------------------------------------------------------------------------------
 1 | from langchain_core.runnables.base import RunnableSequence
 2 | from tenacity import AsyncRetrying
 3 | 
 4 | from data_questionnaire_agent.config import cfg
 5 | from data_questionnaire_agent.log_init import logger
 6 | from data_questionnaire_agent.model.application_schema import Questionnaire
 7 | from data_questionnaire_agent.model.openai_schema import ConditionalAdvice
 8 | from data_questionnaire_agent.service.advice_service import (
 9 |     chain_factory_advice,
10 |     prepare_conditional_advice,
11 | )
12 | from data_questionnaire_agent.service.knowledge_base_service import fetch_context
13 | 
14 | 
15 | async def process_advice(
16 |     questionnaire: Questionnaire, advice_chain: RunnableSequence
17 | ) -> ConditionalAdvice:
18 |     questionnaire_str = str(questionnaire)
19 | 
20 |     knowledge_base = await fetch_context(questionnaire_str)
21 | 
22 |     advice_input = prepare_conditional_advice(
23 |         knowledge_base=knowledge_base, questions_answers=questionnaire_str
24 |     )
25 |     async for attempt in AsyncRetrying(**cfg.retry_args):
26 |         with attempt:
27 |             conditional_advice: ConditionalAdvice = await advice_chain.ainvoke(
28 |                 advice_input
29 |             )
30 |             if conditional_advice.has_advice:
31 |                 for advice in conditional_advice.advices:
32 |                     logger.info(advice)
33 |             return conditional_advice
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     import asyncio
38 | 
39 |     from data_questionnaire_agent.test.provider.questionnaire_provider import (
40 |         create_questionnaire_2_questions,
41 |     )
42 | 
43 |     advice_chain = chain_factory_advice()
44 |     questionnaire = create_questionnaire_2_questions()
45 |     print(asyncio.run(process_advice(questionnaire, advice_chain)))
46 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/ui/avatar_factory.py:
--------------------------------------------------------------------------------
 1 | import chainlit as cl
 2 | 
 3 | AVATAR = {"CHATBOT": "Chatbot", "USER": "User"}
 4 | 
 5 | 
 6 | async def setup_avatar():
 7 |     await cl.Avatar(
 8 |         name=AVATAR["CHATBOT"],
 9 |         url="/public/images/companion_icon.png",
10 |     ).send()
11 |     await cl.Avatar(
12 |         name=AVATAR["USER"],
13 |         url="/public/images/user.png",
14 |     ).send()
15 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/ui/chat_settings_factory.py:
--------------------------------------------------------------------------------
 1 | import chainlit as cl
 2 | from chainlit.input_widget import Slider, TextInput
 3 | 
 4 | from data_questionnaire_agent.toml_support import prompts
 5 | 
 6 | MINIMUM_NUMBER_OF_QUESTIONS = "Minimum number of questions"
 7 | QUESTION_PER_BATCH = "Questions per batch"
 8 | INITIAL_QUESTION = "Initial question"
 9 | 
10 | 
11 | async def create_chat_settings() -> cl.ChatSettings:
12 |     questions_per_batch = prompts["general_settings"]["questions_per_batch"]
13 |     minimum_number_of_questions = prompts["general_settings"][
14 |         "minimum_number_of_questions"
15 |     ]
16 |     initial_question = prompts["questionnaire"]["initial"]["question"]
17 |     settings = await cl.ChatSettings(
18 |         [
19 |             TextInput(
20 |                 id=INITIAL_QUESTION, label=INITIAL_QUESTION, initial=initial_question
21 |             ),
22 |             Slider(
23 |                 id=MINIMUM_NUMBER_OF_QUESTIONS,
24 |                 label="Minimum number of questions",
25 |                 initial=minimum_number_of_questions,
26 |                 min=0,
27 |                 max=5,
28 |                 step=1,
29 |             ),
30 |             Slider(
31 |                 id=QUESTION_PER_BATCH,
32 |                 label="Number of question per batch",
33 |                 initial=questions_per_batch,
34 |                 min=0,
35 |                 max=5,
36 |                 step=1,
37 |             ),
38 |         ]
39 |     ).send()
40 |     return settings
41 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/ui/clarifications_chainlit.py:
--------------------------------------------------------------------------------
 1 | import chainlit as cl
 2 | from langchain.agents import AgentExecutor
 3 | from langchain.chains import LLMChain
 4 | from tenacity import AsyncRetrying
 5 | 
 6 | # Activating REST interfaces
 7 | from data_questionnaire_agent.config import cfg
 8 | from data_questionnaire_agent.log_init import logger
 9 | from data_questionnaire_agent.model.application_schema import Questionnaire
10 | from data_questionnaire_agent.model.openai_schema import ResponseTags
11 | from data_questionnaire_agent.service.clarifications_agent import (
12 |     create_clarification_agent,
13 | )
14 | from data_questionnaire_agent.service.tagging_service import prepare_sentiment_input
15 | 
16 | 
17 | async def process_clarifications_chainlit(
18 |     questionnaire: Questionnaire,
19 |     questions_to_process: int,
20 |     has_questions_chain: LLMChain,
21 |     clarification_agent: AgentExecutor,
22 |     use_chainlit: bool = True,
23 | ):
24 |     questions = questionnaire.questions[-questions_to_process:]
25 |     content = "\n".join([q.answer for q in questions])
26 | 
27 |     async for attempt in AsyncRetrying(**cfg.retry_args):
28 |         with attempt:
29 |             response_tags: ResponseTags = await has_questions_chain.arun(
30 |                 prepare_sentiment_input(content)
31 |             )
32 |             if len(response_tags.extracted_questions) > 0:
33 |                 if response_tags.questions_related_to_data_analytics:
34 |                     for clarification_question in response_tags.extracted_questions:
35 |                         clarification = await clarification_agent.arun(
36 |                             clarification_question
37 |                         )
38 |                         if use_chainlit:
39 |                             await cl.Message(content=clarification).send()
40 |                         else:
41 |                             logger.info(clarification)
42 |                 else:
43 |                     message = f"These questions: {response_tags.extracted_questions} are unrelated to our main topic."
44 |                     if use_chainlit:
45 |                         await cl.Message(content=message).send()
46 |                     logger.warn(message)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     import asyncio
51 | 
52 |     from data_questionnaire_agent.service.tagging_service import (
53 |         prepare_sentiment_input,
54 |         sentiment_chain_factory,
55 |     )
56 |     from data_questionnaire_agent.test.provider.questionnaire_provider import (
57 |         create_questionnaire_initial_question,
58 |     )
59 | 
60 |     def deal_with_unrelated_questions():
61 |         questionnaire: Questionnaire = create_questionnaire_initial_question(
62 |             "Which is the capital of India"
63 |         )
64 |         has_questions_chain: LLMChain = sentiment_chain_factory()
65 |         clarification_agent: AgentExecutor = create_clarification_agent()
66 |         asyncio.run(
67 |             process_clarifications_chainlit(
68 |                 questionnaire, 1, has_questions_chain, clarification_agent, False
69 |             )
70 |         )
71 | 
72 |     def deal_with_related_questions():
73 |         questionnaire: Questionnaire = create_questionnaire_initial_question(
74 |             "What do you mean by data quality?"
75 |         )
76 |         has_questions_chain: LLMChain = sentiment_chain_factory()
77 |         clarification_agent: AgentExecutor = create_clarification_agent()
78 |         asyncio.run(
79 |             process_clarifications_chainlit(
80 |                 questionnaire, 1, has_questions_chain, clarification_agent, False
81 |             )
82 |         )
83 | 
84 |     deal_with_unrelated_questions()
85 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/ui/image_support.py:
--------------------------------------------------------------------------------
1 | def display_image(image_path: str, alt: str, title: str):
2 |     return f'![{alt}](/public/images/{image_path} "{title}")'
3 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/ui/mail_processor.py:
--------------------------------------------------------------------------------
 1 | import chainlit as cl
 2 | from asyncer import asyncify
 3 | 
 4 | from data_questionnaire_agent.config import cfg, mail_config
 5 | from data_questionnaire_agent.log_init import logger
 6 | from data_questionnaire_agent.model.application_schema import Questionnaire
 7 | from data_questionnaire_agent.model.openai_schema import ConditionalAdvice
 8 | from data_questionnaire_agent.service.mail_sender import (
 9 |     create_mail_body,
10 |     send_email,
11 |     validate_address,
12 | )
13 | from data_questionnaire_agent.ui.avatar_factory import AVATAR
14 | 
15 | 
16 | async def process_send_email(questionnaire: Questionnaire, advices: ConditionalAdvice):
17 |     response = await cl.AskUserMessage(
18 |         content="Would you like to receive an email with the recommendations? If so, please enter your email address in the chat.",
19 |         timeout=cfg.ui_timeout,
20 |         author=AVATAR["CHATBOT"],
21 |     ).send()
22 |     has_replied = False
23 |     if response and "content" in response:
24 |         has_replied = True
25 |         response_content = response["content"]
26 |         if validate_address(response_content):
27 |             feedback_email = "feedback@onepointltd.ai"
28 |             logger.info("Sending email to %s", response_content)
29 |             await asyncify(send_email)(
30 |                 "Dear customer",
31 |                 response_content,
32 |                 mail_config.mail_subject,
33 |                 create_mail_body(questionnaire, advices, feedback_email),
34 |             )
35 |             await cl.Message(
36 |                 content="Thank you for submitting the query. We really appreciate that you have taken time to do this.",
37 |                 author=AVATAR["CHATBOT"],
38 |             ).send()
39 |         else:
40 |             logger.warn("%s is not a valid email", response_content)
41 |             await cl.ErrorMessage(
42 |                 content=f"Sorry, '{response_content}' does not seem to be an email address",
43 |                 author=AVATAR["CHATBOT"],
44 |             ).send()
45 | 
46 |     extra_message = "" if has_replied else "We did not hear from you... "
47 |     await cl.Message(
48 |         content=f"{extra_message}The session is complete. Please press the 'New Chat' button to restart.",
49 |         author=AVATAR["CHATBOT"],
50 |     ).send()
51 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/ui/model/session_number_container.py:
--------------------------------------------------------------------------------
 1 | class SessionNumberContainer:
 2 |     def __init__(self) -> None:
 3 |         self.counter = 0
 4 | 
 5 |     def current(self):
 6 |         return self.counter
 7 | 
 8 |     def increment_and_get(self):
 9 |         self.counter += 1
10 |         return self.counter
11 | 
12 |     def __repr__(self) -> str:
13 |         return f"{self.counter}"
14 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/ui/pdf_processor.py:
--------------------------------------------------------------------------------
 1 | import chainlit as cl
 2 | from asyncer import asyncify
 3 | 
 4 | from data_questionnaire_agent.log_init import logger
 5 | from data_questionnaire_agent.model.application_schema import Questionnaire
 6 | from data_questionnaire_agent.model.openai_schema import ConditionalAdvice
 7 | from data_questionnaire_agent.service.html_generator import generate_pdf_from
 8 | from data_questionnaire_agent.ui.avatar_factory import AVATAR
 9 | 
10 | 
11 | async def generate_display_pdf(
12 |     advices: ConditionalAdvice, questionnaire: Questionnaire
13 | ):
14 |     pdf_path = await asyncify(generate_pdf_from)(questionnaire, advices)
15 |     logger.info("PDF path: %s", pdf_path)
16 |     elements = [
17 |         cl.File(
18 |             name=pdf_path.name,
19 |             path=pdf_path.as_posix(),
20 |             display="inline",
21 |         ),
22 |     ]
23 |     await cl.Message(
24 |         content="So you can download a copy, here’s a PDF with the recommendations:",
25 |         elements=elements,
26 |         author=AVATAR["CHATBOT"],
27 |     ).send()
28 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/ui/session_chainlit_playground.py:
--------------------------------------------------------------------------------
 1 | import chainlit as cl
 2 | from chainlit import context
 3 | 
 4 | from data_questionnaire_agent.config import cfg
 5 | from data_questionnaire_agent.log_init import logger
 6 | from data_questionnaire_agent.ui.chat_settings_factory import create_chat_settings
 7 | from data_questionnaire_agent.ui.model.session_number_container import (
 8 |     SessionNumberContainer,
 9 | )
10 | 
11 | 
12 | @cl.on_chat_start
13 | async def init():
14 |     logger.info("Init")
15 |     cl.user_session.set("session_counter", SessionNumberContainer())
16 |     settings = await create_chat_settings()
17 |     await run_agent(settings, False)
18 | 
19 | 
20 | @cl.on_settings_update
21 | async def on_settings_update(settings: cl.ChatSettings):
22 |     await run_agent(settings, True)
23 | 
24 | 
25 | async def run_agent(settings: cl.ChatSettings, from_settings: bool):
26 |     session_counter = cl.user_session.get("session_counter")
27 |     my_counter = session_counter.increment_and_get()
28 |     local_context = context.get_context()
29 |     logger.info("start id: %s", local_context.session.id)
30 |     await cl.Message(content=f"Start {local_context.session.id}").send()
31 |     response = None
32 |     while response is None:
33 |         latest_counter = cl.user_session.get("session_counter")
34 |         response = await cl.AskUserMessage(
35 |             content=f"Please reply something {from_settings} {my_counter} {latest_counter} ...",
36 |             timeout=cfg.ui_timeout,
37 |         ).send()
38 | 
39 | 
40 | @cl.on_chat_end
41 | async def end():
42 |     local_context = context.get_context()
43 |     logger.info("end id: %s", local_context.session.id)
44 |     cl.user_session.set("session_started", False)
45 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/utils/advice_parser.py:
--------------------------------------------------------------------------------
 1 | from data_questionnaire_agent.model.openai_schema import ConditionalAdvice
 2 | 
 3 | 
 4 | def parse_advice(advice: str) -> ConditionalAdvice:
 5 |     sections = {}
 6 |     current_section = None
 7 |     section_count = 0
 8 |     consume_item = False
 9 |     current_item = ""
10 |     for i, c in enumerate(advice):
11 |         if c == "#" and advice[i + 1] == " ":
12 |             section_count += 1
13 |             if section_count == 1:
14 |                 current_section = "advice"
15 |             else:
16 |                 current_section = "avoid"
17 |             sections[current_section] = []
18 |             current_item = ""
19 |         elif c == "-" and advice[i + 1] == " ":
20 |             consume_item = True
21 |             current_item = ""
22 |         elif (
23 |             c == "\n"
24 |             and i < len(advice) - 1
25 |             and advice[i + 1] == "\n"
26 |             and len(current_item) > 0
27 |         ):
28 |             consume_item = False
29 |             sections[current_section].append(current_item.strip())
30 |             current_item = ""
31 |         elif consume_item:
32 |             current_item += c
33 |     return ConditionalAdvice(
34 |         has_advice=True,
35 |         advices=sections["advice"],
36 |         what_you_should_avoid=sections["avoid"],
37 |     )
38 | 
39 |     return None
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     text = """# What you should do ...
44 | 
45 | - Given your concerns about data quality, specifically the issue of duplicate customer records, it would be beneficial to invest in machine learning (ML) capabilities. ML algorithms can significantly enhance the efficiency and accuracy of duplicate detection. However, as you've mentioned, maintaining human oversight is crucial. Therefore, a hybrid approach that combines automated ML tools with human review could be an optimal solution.
46 | 
47 | - Investing in training or hiring ML expertise could be a valuable step forward. This would not only help in addressing your current data quality issues but also equip your organisation with the skills needed to leverage ML for other data-related challenges in the future.
48 | 
49 | - Consider implementing a data governance policy if you haven't already. This would provide clear guidelines on data management practices within your organisation, promoting uniformity and improving overall data quality.
50 | 
51 | - Regular monitoring of data quality metrics can help you identify and address issues promptly. This could involve setting up alerts for key metrics or datasets and implementing a systematic process for resolving identified issues.
52 | 
53 | - Lastly, consider investing in data quality training for your employees. This would equip them with the necessary skills to handle data responsibly, thereby contributing to the overall quality of your data.
54 | 
55 | # What you should avoid ... 
56 | 
57 | - Avoid relying solely on manual processes for data quality management. While human oversight is important, manual processes can be time-consuming and prone to errors. Leveraging automated tools, where appropriate, can enhance efficiency and accuracy.
58 | 
59 | - Avoid neglecting the importance of data governance. Without clear policies and procedures in place, data management can become chaotic and inconsistent, leading to poor data quality.
60 | 
61 | - Avoid ignoring the potential of dark data. While it may seem challenging to integrate dark data into your data ecosystem, it can provide valuable insights when properly managed and analysed.
62 | 
63 | 
64 | """
65 | 
66 |     conditional_advice = parse_advice(text)
67 |     assert len(conditional_advice.advices) == 5
68 |     assert len(conditional_advice.what_you_should_avoid) == 3
69 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/utils/tracker_db_init.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from chainlit.onepoint.tracker_db import ONEPOINT_SQL_LITE_DB, create_table
 4 | 
 5 | if __name__ == "__main__":
 6 |     db_path = Path(ONEPOINT_SQL_LITE_DB)
 7 |     if not db_path.exists():
 8 |         db_path.mkdir(parents=True)
 9 |     assert db_path.exists()
10 |     create_table()
11 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/utils/tracker_db_lister.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import sys
 3 | from pathlib import Path
 4 | from typing import Optional
 5 | 
 6 | from chainlit.onepoint.tracker_db import ONEPOINT_SQL_LITE_DB, list_activity_log
 7 | 
 8 | from data_questionnaire_agent.log_init import logger
 9 | 
10 | TRACKING_FIELD_PROPERTIES = [
11 |     {"name": "id"},
12 |     {"name": "operation"},
13 |     {"name": "user_id"},
14 |     {"name": "session_id"},
15 |     {"name": "message"},
16 |     {"name": "timestamp"},
17 | ]
18 | 
19 | 
20 | def write_log(file: Optional[Path] = None):
21 |     logger.info("Database location: %s", ONEPOINT_SQL_LITE_DB)
22 |     logger.info("")
23 | 
24 |     output = sys.stdout
25 |     if file is not None:
26 |         output = open(file, "w")
27 |     logwriter = csv.writer(output, delimiter=",", lineterminator="\n")
28 |     logwriter.writerow([f["name"] for f in TRACKING_FIELD_PROPERTIES])
29 | 
30 |     for row in list_activity_log():
31 |         logwriter.writerow(row)
32 | 
33 |     output.close()
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     write_log()
38 | 


--------------------------------------------------------------------------------
/data_questionnaire_agent/utils/tracker_db_server.py:
--------------------------------------------------------------------------------
 1 | import mimetypes
 2 | 
 3 | from chainlit.server import app
 4 | from fastapi import HTTPException, Request
 5 | from fastapi.responses import FileResponse
 6 | 
 7 | from data_questionnaire_agent.config import cfg
 8 | from data_questionnaire_agent.utils.tracker_db_lister import write_log
 9 | 
10 | PARAM_SECURITY_KEY = "security_key"
11 | 
12 | 
13 | @app.get("/onepoint/logs")
14 | async def get_report(request: Request):
15 |     params = request.query_params
16 |     security_key = params.get(PARAM_SECURITY_KEY)
17 | 
18 |     if security_key != cfg.tracker_db_logs_password:
19 |         raise HTTPException(
20 |             status_code=403,
21 |             detail=f"Please add the {PARAM_SECURITY_KEY} query parameter with the correct security key",
22 |         )
23 | 
24 |     csv_file = cfg.project_root / "report.csv"
25 |     write_log(csv_file)
26 | 
27 |     media_type, _ = mimetypes.guess_type(csv_file)
28 | 
29 |     return FileResponse(csv_file, media_type=media_type, filename=csv_file.name)
30 | 


--------------------------------------------------------------------------------
/deploy_ui.sh:
--------------------------------------------------------------------------------
1 | cd ~/projects/data-wellness-companion-staging/ui
2 | rm -rf *
3 | unzip /home/ubuntu/companion_ui.zip
4 | sed -i -e 's/8085/443/g' ./index.html
5 | sed -i -e 's/127\.0\.0\.1/staging-d-well.onepointltd.ai/g' ./index.html
6 | sed -i -e 's/ws:/wss:/g' ./index.html
7 | sed -i -e 's/http:/https:/g' ./index.html
8 | 


--------------------------------------------------------------------------------
/docs/data_wellness_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/docs/data_wellness_workflow.png


--------------------------------------------------------------------------------
/docs/low_level_protocol/clarification.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "messages": [
 3 |         {
 4 |             "content": "System: I am a helpful assistant. I explain the meaning of questions step by step. I highlight the main topics in bold markdown. I use British English.\\nHuman: Please explain the following question in a way that a layman can understand it:\\n\\nWhat specific aspects of data quality do you find most challenging to address in your organisation's data management processes?\\n",
 5 |             "role": "user"
 6 |         }
 7 |     ],
 8 |     "model": "gpt-4o-mini",
 9 |     "stream": true,
10 |     "n": 1,
11 |     "temperature": 0.5,
12 |     "logprobs": false
13 | }


--------------------------------------------------------------------------------
/docs/prompt_examples/clarification.txt:
--------------------------------------------------------------------------------
1 | System: I am a helpful assistant. I explain the meaning of questions step by step. I highlight the main topics in bold markdown. I use British English.
2 | Human: Please explain the following question in a way that a layman can understand it:
3 | 
4 | What specific technical limitations or gaps in expertise do you believe are hindering your ability to manage and integrate streaming data effectively?
5 | 


--------------------------------------------------------------------------------
/docs/prompt_examples/confidence_rating.txt:
--------------------------------------------------------------------------------
 1 | SYSTEM
 2 | You are an expert at giving data governance advice based on a question answer dialogue with a customer
 3 | 
 4 | HUMAN
 5 | Determine how confident you are in terms giving advice to a customer based on a sequence of questions and answers that you can find here:
 6 | 
 7 | ```
 8 | Which area of your data ecosystem are you most concerned about?
 9 | Data silos - Data trapped in departmental silos can be inaccessible to other parts.
10 | 
11 | What types of data (e.g., structured, unstructured, streaming) are most commonly trapped in your departmental silos?
12 |  - Streaming data such as social media feeds and IoT sensor data.
13 | ```
14 | 
15 | In order to be confident you should know about the difficulties of the customer. You should know about the following:
16 | * the customer's main problem
17 | * have some detailed information about his problem. Just having a high-level sentence on the problem of the customer like e.g: "Data Quality" is not enough to be confident.
18 | * you should have also knowledge about what is causing the problem.
19 | * ideally you know more than one cause behind the main problem of the customer. 
20 | * you also need some more background information about the technologies used by the customer to be confident.
21 | * you should also know about the data governance strategies of the customer to be able to have a high degree of confidence.
22 | 
23 | Please use the following classifications to this question about the degree of confidence with which you can give advice:
24 | 
25 | - "outstanding"
26 | - "high"
27 | - "medium"
28 | - "mediocre"
29 | - "low"
30 | 
31 | For example, you should report an "outstanding" confidence degree when:
32 | You know the main problem of the customer and the causes well. You also know about data governance aspects related to the customer's organisation. 
33 | And you also know the technological landscape of the customer very well.
34 | 
35 | For example, you should report a "high" confidence degree when:
36 | You know the main problem of the customer and the causes well. You also know about data governance aspects related to the customer's organisation. 
37 | The only thing missing is more background information about the technological landscape of the customer.
38 | 
39 | For example, you should report a "medium" confidence degree when:
40 | You know the main problem of the customer and the causes well.
41 | You miss the background information about the technological landscape of the customer and also about data governance in the customer's company.
42 | 
43 | For example, you should report a "mediocre" confidence degree when:
44 | You know the main problem of the customer and the cause not well. The information about the causes is very limited
45 | You miss the background information about the technological landscape of the customer and also about data governance in the customer's company.
46 | 
47 | For example, you should report a "low" confidence degree when:
48 | You know the main problem of the customer and nothing else. Or you do not even know about the main problem of the user.
49 | 
50 | HUMAN
51 | Tip: Make sure to answer in the correct format
52 | 
53 | HUMAN
54 | Tip: Please make sure that you write all your answers in British English.


--------------------------------------------------------------------------------
/docs/prompt_examples/ontology_creation.txt:
--------------------------------------------------------------------------------
 1 | SYSTEM
 2 | You are an expert at creating ontologies
 3 | 
 4 | HUMAN
 5 | # Ontology from text:
 6 | 
 7 | Extract an ontology. Do not draw it, but create a markdown table of relations with 3 columns in this order: source, target, relation name. And also create a list with 2 columns: the first contains the term name and the second column contains the term definition.
 8 | the relations should only be mapped to source, target, relations
 9 | Use this text as a basis:
10 | 
11 | ```
12 | Which area of your data ecosystem are you most concerned about?
13 | Lack of skilled personnel - Missing skills in data science, analytics, AI and ML can impede the effective use of data.
14 | 
15 | What specific challenges are you facing due to the lack of skilled personnel in data science, analytics, AI, and ML?
16 |  - We struggle to build and deploy machine learning models effectively.
17 |  - Our data integration processes are inefficient and error-prone.
18 |  - We have difficulty maintaining data quality and governance standards.
19 | 
20 | What are the primary data sources you are currently integrating, and what challenges do you face with these sources?
21 |  - We are working with a mix of structured and unstructured data from IoT devices and social media platforms, and we encounter difficulties in data processing and storage.
22 |  - Our primary data sources include external APIs and third-party data providers, and we struggle with data quality and reliability issues.
23 | 
24 | What specific issues do you encounter with data processing and storage when working with structured and unstructured data from IoT devices and social media platforms?
25 |  - Data security and privacy concerns make it difficult to store and process data efficiently.
26 |  - We lack the tools to effectively analyse and derive insights from unstructured data.
27 | 
28 | What specific data quality and governance standards are you finding difficult to maintain, and how are these impacting your operations?
29 |  - We face difficulties in implementing data privacy and security measures, which affects our compliance with regulations.
30 | 
31 | What specific tools or technologies are you currently using for data integration, and what limitations or challenges have you encountered with them?
32 |  - We are currently using custom scripts for data integration, which are error-prone and difficult to maintain.
33 |  - We use Microsoft Azure Data Factory, but we struggle with real-time data processing and integration.
34 | 
35 | What are the main pain points you experience with your current data integration processes, and how do they impact your overall data strategy?
36 |  - We struggle with real-time data integration, which impacts our ability to make timely and informed decisions.
37 |  - The lack of automation in our data integration processes results in high manual effort and increased operational costs.
38 | 
39 | Invest in a comprehensive **data governance framework** that includes data quality, privacy, and security measures. This will help ensure compliance with regulations and improve overall data management.
40 | 
41 | Adopt **DataOps** practices to streamline and automate your data integration processes. This will reduce manual effort, minimise errors, and lower operational costs.
42 | 
43 | Utilise advanced **metadata management** tools to maintain semantic consistency and improve data discoverability. This will aid in better data governance and integration.
44 | 
45 | Implement **real-time data processing** capabilities using tools like Apache Kafka or Azure Stream Analytics. This will enhance your ability to make timely and informed decisions.
46 | 
47 | Leverage **AI and machine learning** to analyse unstructured data from IoT devices and social media platforms. This will help derive valuable insights and improve decision-making.
48 | 
49 | 
50 | Avoid relying solely on custom scripts for data integration, as they are error-prone and difficult to maintain.
51 | 
52 | Do not neglect the importance of data security and privacy, especially when dealing with sensitive data from IoT devices and social media platforms.
53 | 
54 | Avoid underestimating the need for skilled personnel in data science, analytics, AI, and ML. Investing in training and hiring is crucial for effective data management.
55 | 
56 | 
57 | Improved compliance with data privacy and security regulations, reducing the risk of legal issues.
58 | 
59 | Enhanced decision-making capabilities through real-time data integration and advanced analytics.
60 | 
61 | Reduced operational costs and increased efficiency through automation and better data governance.
62 | 
63 | 
64 | ```
65 | 
66 | HUMAN
67 | Tip: Make sure to answer in the correct format
68 | 
69 | HUMAN
70 | Tip: Please make sure that you write all your answers in British English.


--------------------------------------------------------------------------------
/format.ps1:
--------------------------------------------------------------------------------
1 | ruff check --fix
2 | black .


--------------------------------------------------------------------------------
/i18n/messages.de.yml:
--------------------------------------------------------------------------------
 1 | de:
 2 |   "A big thank you for completing a session with": "Ein großes Dankeschön für den Abschluss einer Sitzung mit <b>%{name}</b>."
 3 |   "Advice": "Beratung"
 4 |   "A personal offer for you": "Ein persönliches Angebot für Sie"
 5 |   "banner_link": https://d-well.onepointltd.ai/images/D-Well_Report_Banner.png
 6 |   "banner_link_email": https://cdn.discordapp.com/attachments/932953013683879957/1271015144666824704/D-Well_Report_Banner.png?ex=66b5cc98&is=66b47b18&hm=789480e3fae7c3c0f1893ad08538f9ccc82868f4aed2b058704f212ac5f3710a&
 7 |   "Confidence Degree": "Konfidenzgrad"
 8 |   "Data Wellness Aggregation Report": "Data Wellness Bericht"
 9 |   "for_more_info": "Für weitere Informationen besuchen Sie uns bitte unter <a href=\"https://www.onepointltd.com/data-wellness/\">Onepoint Data Wellness</a>."
10 |   "intro_advice": "Hier finden Sie die von uns gestellten Fragen und die Antworten, die Sie gegeben haben, gefolgt von den Empfehlungen und Handlungsvorschlägen."
11 |   "offering_long": "Wir bieten einen kostenlosen Anruf zur Ergebnisinterpretation an, um die Empfehlungen und Handlungsvorschläge des Companion mit einem echten menschlichen Experten zu besprechen. <br /><br />Wenn Sie dazu bereit sind, senden Sie uns bitte eine E-Mail an <a href=\"mailto:datawellness@onepointltd.com\">datawellness@onepointltd.com</a> von Ihrer geschäftlichen E-Mail-Adresse aus mit Ihrer Anfrage, einen Anruf zu vereinbaren. <br /><br /> Sie können auch das kostenlose <a href='https://d-wise.onepointltd.ai/index.html'>KI-gestützte Selbstdiagnose-Berichtstool</a> von Onepoint ausprobieren, um eine kostenlose Selbstdiagnose zu erhalten."
12 |   "Please check the attached report": "Bitte überprüfen Sie den beigefügten Bericht"
13 |   "Potential positive outcomes": "Mögliche positive Ergebnisse (falls Sie den Rat befolgen)"
14 |   "Positive outcomes (if you follow the advices)": "Positive Ergebnisse (wenn Sie den Ratschlägen folgen)"
15 |   "Produced on": "Erstellt am"
16 |   "Reasoning": "Gründe für den Vertrauensgrad von D-Well"
17 |   "Transcript": "Transkript"
18 |   "We would love your feedback": "Wir würden uns über Ihr Feedback freuen"
19 |   "What to avoid": "Was man vermeiden sollte"
20 |   "What you should avoid": "Was Sie vermeiden sollten"
21 |   "What you should do": "Empfehlungen"
22 |   confidence_degree_high: "Gut"
23 |   confidence_degree_low: "Sehr niedrig"
24 |   confidence_degree_mediocre: "Niedrig"
25 |   confidence_degree_medium: "Mittel"
26 |   confidence_degree_outstanding: "Sehr gut"
27 |   db_insert_failed: "Fehler beim Hinzufügen der Abfrage"
28 |   db_update_failed: "Fehler beim Hinzufügen der Antwort"
29 |   failed_insert_question: "Fehler beim Einfügen der Frage in die Datenbank"
30 |   no_answer_from_chatgpt: "Konnte keine Antworten von ChatGPT erhalten"
31 |   no_report_id: "Berichts-ID ist nicht verfügbar"
32 |   regeneration_failed: "Die Frage konnte nicht neu generiert werden."
33 | 


--------------------------------------------------------------------------------
/i18n/messages.en.yml:
--------------------------------------------------------------------------------
 1 | en:
 2 |   "A big thank you for completing a session with": "A big thank you for completing a session with the %{name}"
 3 |   "A personal offer for you": "A personal offer for you"
 4 |   "Advice": "Advice"
 5 |   "banner_link": /development/playground/langchain/data_questionnaire_agent/public/images/D-Well_Report_Banner.png
 6 |   "banner_link_email": https://d-well.onepointltd.ai/images/D-Well_Report_Banner.png
 7 |   "Confidence Degree": "D-Well confidence degree"
 8 |   "Data Wellness Aggregation Report": "Data Wellness Aggregation Report"
 9 |   "for_more_info": "For more information, please visit us at <a href=\"https://www.onepointltd.com/data-wellness/\">Onepoint Data Wellness</a>."
10 |   "intro_advice": "Here you have the questions we asked and the answers you gave, followed by the recommendations and suggested courses of action."
11 |   "offering_long": "We are offering a free results interpretation call to talk through the Companion's recommendations and suggested courses of action with a real human expert. <br /><br />If you are open to that, please email us at <a href=\"mailto:datawellness@onepointltd.com\">datawellness@onepointltd.com</a> from your business email address with your request to schedule a call. <br /><br /> You can also try out Onepoint's free <a href='https://d-wise.onepointltd.ai/index.html'>AI-powered Self-Diagnostic Report tool</a> to get a free self-diagnostic."
12 |   "Please check the attached report": "Please check the attached Data Wellness report."
13 |   "Potential positive outcomes": "Benefits (in case you follow the advice)"
14 |   "Positive outcomes (if you follow the advices)": "Benefits (if you follow the advices)"
15 |   "Produced on": "Produced on"
16 |   "Reasoning": "D-Well reasoning behind confidence degree"
17 |   "Transcript": "Transcript"
18 |   "We would love your feedback": "We would love your feedback"
19 |   "What to avoid": "What to avoid"
20 |   "What you should avoid": "What you should avoid"
21 |   "What you should do": "Suggested courses of action"
22 |   confidence_degree_high: "Good"
23 |   confidence_degree_low: "Very low"
24 |   confidence_degree_mediocre: "Low"
25 |   confidence_degree_medium: "Medium"
26 |   confidence_degree_outstanding: "Very good"
27 |   db_insert_failed: "Failed to add query to database"
28 |   db_update_failed: "Failed to update the answer in database"
29 |   failed_insert_question: "Failed to insert question in database"
30 |   no_answer_from_chatgpt: "Could not get any answers from ChatGPT"
31 |   no_report_id: "Report ID is not available"
32 |   regeneration_failed: "The question could not be re-generated."
33 | 
34 |   


--------------------------------------------------------------------------------
/i18n/messages.fa.yml:
--------------------------------------------------------------------------------
1 | fa:
2 |   db_insert_failed: "عدم موفقیت در افزودن سوال به پایگاه داده"
3 |   db_update_failed: "پاسخ در پایگاه داده به روز نشد"
4 |   no_report_id: "شناسه گزارش در دسترس نیست"
5 |   no_answer_from_chatgpt: "هیچ پاسخی از ChatGPT دریافت نشد"
6 |   failed_insert_question: "عدم موفقیت در وارد کردن سوال در پایگاه داده"
7 | 


--------------------------------------------------------------------------------
/notes.md:
--------------------------------------------------------------------------------
 1 | # Nots Demo
 2 | 
 3 | - Tick all that apply
 4 | - Fine tune the prompts to explain in more detail?
 5 | - Give more information about certain topics (tweak knowledge base)
 6 | - Show other options for confidence degree
 7 | 
 8 | - What types of information is the company dealing with?
 9 | 
10 | Topics
11 | ======
12 | 
13 | cyber security
14 | data protection
15 | information compliance
16 | 
17 | Try to figure out which information the user is dealing with.
18 | What regulation and standards are important
19 | Do you have a problem with data compliance?
20 | 
21 | Add in context help, tool tips
22 | 
23 | 
24 | Test generate report with one JWT token
25 | Report not in session when Restart not clicked
26 | 


--------------------------------------------------------------------------------
/package-lock.json:
--------------------------------------------------------------------------------
1 | {
2 |   "name": "data-questionnaire-agent",
3 |   "lockfileVersion": 3,
4 |   "requires": true,
5 |   "packages": {}
6 | }
7 | 


--------------------------------------------------------------------------------
/public/css/styles.css:
--------------------------------------------------------------------------------
  1 | :root {
  2 |     --text_blue: #0066A4;
  3 |     --experimental-blue: #0087d3
  4 |   }
  5 | 
  6 | .markdown-body img {
  7 |     width: 100%
  8 | }
  9 | 
 10 | .MuiFormControl-root.MuiFormControl-fullWidth.MuiTextField-root {
 11 |     border: 1px var(--text_blue) solid;
 12 | }
 13 | 
 14 | .generated-message {
 15 |     color: var(--text_blue)
 16 | }
 17 | 
 18 | .MuiButtonBase-root.MuiButtonBase-root.css-1vhtqje {
 19 |     background-color: #edf3f8;
 20 |     color: #91b3d6
 21 | }
 22 | 
 23 | .MuiButtonBase-root.MuiButton-root.css-1ruedog {
 24 |     border: 1px solid var(--text_blue);
 25 |     color: #5285be
 26 | }
 27 | 
 28 | .MuiSvgIcon-root.MuiSvgIcon-fontSizeMedium.MuiAvatar-fallback.css-13y7ul3 {
 29 |     color: #4f83bd
 30 | }
 31 | 
 32 | .image-banner {
 33 |     background: url("/public/images/banner_blank.png") center center / cover no-repeat;
 34 |     color: white;
 35 |     min-height: 10em;
 36 | }
 37 | 
 38 | .image-banner h1 {
 39 |     margin: 0.5em 0 0 0;
 40 | }
 41 | 
 42 | .image-banner h2 {
 43 |     margin: 0.2em 0 0.5em 0;
 44 | }
 45 | 
 46 | .logo-box img {
 47 |     position: relative;
 48 |     right: -1em;
 49 |     max-width: 68%
 50 | }
 51 | 
 52 | .image-banner > .css-dvxtzn {
 53 |     align-items: start;
 54 | }
 55 | 
 56 | /* Hiding chat button */
 57 | .logo-box + .MuiStack-root.css-1mzerio {
 58 |     display: none;
 59 | }
 60 | 
 61 | /* Hiding the settings button */
 62 | svg[data-testid=TuneIcon] {
 63 |     display: none;
 64 | }
 65 | 
 66 | /* Hiding the message date */
 67 | .message-date {
 68 |     display: none;
 69 | }
 70 | 
 71 | /* Hiding the settings button */
 72 | .MuiInputAdornment-root .settings-button {
 73 |     display: none;
 74 | }
 75 | 
 76 | /* Aavatar related */
 77 | .MuiAvatar-circular {
 78 |     margin: 0 auto
 79 | }
 80 | 
 81 | .css-kt4rn8 {
 82 |     text-align: center;
 83 | }
 84 | /* End Avatar */
 85 | 
 86 | /* Hide Github button */
 87 | a[href*='https://onepointltd.com'].css-1deacqj {
 88 |     display: none
 89 | }
 90 | 
 91 | button:has(> svg[data-testid=KeyboardDoubleArrowUpIcon]) {
 92 |     display: none;
 93 | }
 94 | 
 95 | /** Hide the side menu */
 96 | .experimental {
 97 |     background-color: var(--experimental-blue);
 98 |     padding: 0.2em 0.1em
 99 | }
100 | 
101 | /* Medium devices (tablets, 768px and up) */
102 | @media (max-width: 1200px) {
103 | 
104 |     /* Mobile title */
105 |     .css-17yvo7f {
106 |         font-size: 74%;
107 |         padding: 0 0.5em;
108 |     }
109 |  }
110 | 
111 | 
112 | /* Medium devices (tablets, 768px and up) */
113 | @media (min-width: 1200px) {
114 | 
115 |     /* Mobile title */
116 |     .css-17yvo7f {
117 |         font-size: 84%;
118 |         padding: 0 1em;
119 |     }
120 |  }
121 | 
122 | 
123 | /* Medium devices (tablets, 768px and up) */
124 | @media (min-width: 1400px) { 
125 | 
126 |     .image-banner {
127 |         min-height: 15em;
128 |     }
129 | }
130 | 
131 | /* Areas of concern */
132 | .markdown-body .init-options-row  {
133 |     font-size: small;
134 | }
135 | 
136 | .markdown-body .img-cell > img {
137 |     max-width: 4em;
138 |     display: block;
139 |     margin: 0 auto;
140 |     cursor: pointer
141 | }
142 | 
143 | .markdown-body .img-cell ~ div.col-9 {
144 |     cursor: pointer
145 | }
146 | 
147 | /* Micro framework for responsive layout */
148 | .row {
149 |     display: flex;
150 |     flex-wrap: wrap;
151 | }
152 | 
153 | .col-2 {
154 |     width: 16.667%
155 | }
156 | 
157 | .col-3 {
158 |     width: 25%
159 | }
160 | 
161 | .col-9 {
162 |     width: 75%
163 | }
164 | 
165 | .col-10 {
166 |     width: 75%
167 | }
168 | 
169 | /* Medium devices (tablets, 768px and up) */
170 | @media (min-width: 768px) {
171 | 
172 |     /* Tablet  */
173 | 
174 |     .col-md-1 {
175 |         width: 8.33%
176 |     }
177 | 
178 |     .col-md-2 {
179 |         width: 16.667%
180 |     }
181 | 
182 |     .col-md-4 {
183 |         width: 33.333%
184 |     }
185 | 
186 |     .col-md-5 {
187 |         width: 41.667%
188 |     }
189 |  }
190 |  /* End micro framework */
191 | 
192 | 
193 | 
194 | 


--------------------------------------------------------------------------------
/public/images/D-Well_Report_Banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/public/images/D-Well_Report_Banner.png


--------------------------------------------------------------------------------
/public/images/Hero_Image_with_Logo_and_Titles.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/public/images/Hero_Image_with_Logo_and_Titles.jpg


--------------------------------------------------------------------------------
/public/images/banner_blank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/public/images/banner_blank.png


--------------------------------------------------------------------------------
/public/images/banner_with_titles.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/public/images/banner_with_titles.png


--------------------------------------------------------------------------------
/public/images/companion_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/public/images/companion_icon.png


--------------------------------------------------------------------------------
/public/images/data_ecosystem_areas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/public/images/data_ecosystem_areas.png


--------------------------------------------------------------------------------
/public/images/natural-language-processing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/public/images/natural-language-processing.png


--------------------------------------------------------------------------------
/public/js/onepoint.js:
--------------------------------------------------------------------------------
 1 | function setNativeValue(element, value) {
 2 |     let lastValue = element.value;
 3 |     element.value = value;
 4 |     let event = new Event("input", { target: element, bubbles: true });
 5 |     // React 15
 6 |     event.simulated = true;
 7 |     // React 16
 8 |     let tracker = element._valueTracker;
 9 |     if (tracker) {
10 |         tracker.setValue(lastValue);
11 |     }
12 |     element.dispatchEvent(event);
13 | }
14 | 
15 | function injectTextListener(e) {
16 |     const altText = e.target.getAttribute('alt')
17 |     const textarea = document.querySelector("#chat-input")
18 |     setNativeValue(textarea, altText)
19 | }
20 | 
21 | 
22 | function processEvent(e, injectFunc) {
23 |     if (typeof (e.onclick) == "undefined" || e.onclick == null) {
24 |         const listener = (e) => injectFunc(e)
25 |         e.addEventListener("click", listener)
26 |         e.onclick = listener
27 |         console.info("Added event listener.")
28 |     } else {
29 |         console.info("Skipped event listener")
30 |     }
31 | }
32 | 
33 | function addImgOnepointEventListener() {
34 |     [...document.querySelectorAll(".img-cell img")].forEach(e => {
35 |         processEvent(e, injectTextListener)
36 |     })
37 | }
38 | 
39 | function injectInnerTextListener(e) {
40 |     const text = e.target.innerText
41 |     const textarea = document.querySelector("#chat-input")
42 |     setNativeValue(textarea, text)
43 | }
44 | 
45 | function addImgOnepointTextEventListener() {
46 |     [...document.querySelectorAll(".img-cell ~ div.col-9")].forEach(e => {
47 |         if (typeof (e.onclick) == "undefined" || e.onclick == null) {
48 |             processEvent(e, injectInnerTextListener)
49 |         }
50 |     })
51 | }
52 | 
53 | function activateClickListeners(_records, _observer) {
54 |     addImgOnepointEventListener()
55 |     addImgOnepointTextEventListener()
56 | }
57 | 
58 | const observerOptions = {
59 |     childList: true,
60 |     subtree: true,
61 | };
62 | 
63 | document.addEventListener('DOMContentLoaded', function () {
64 |     const observer = new MutationObserver(activateClickListeners);
65 |     observer.observe(document.querySelector("body"), observerOptions);
66 | }, false);
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/public/logo_dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/public/logo_dark.png


--------------------------------------------------------------------------------
/public/logo_light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onepointconsulting/data-questionnaire-agent/cc96c26bbd559b15b6e3aec4e081e572713b14d9/public/logo_light.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "data-questionnaire-agent"
 3 | version = "0.2.0"
 4 | description = "Implementation of an agent which asks questions and gives advices about a predefined question."
 5 | authors = ["Gil Fernandes <gil.fernandes@onepointltd.com>"]
 6 | readme = "README.md"
 7 | 
 8 | [tool.poetry.dependencies]
 9 | python = "^3.13"
10 | openai = "^1.37.0"
11 | langchain = "^0.3.16"
12 | langchain-community = "0.3.16"
13 | prompt-toolkit = "^3.0.39"
14 | tomli = "^2.0.1"
15 | faiss-cpu = "^1.7.4"
16 | tiktoken = "^0.8.0"
17 | python-dotenv = "^1.0.0"
18 | pydantic = "2.10.6"
19 | duckduckgo-search = "^3.8.5"
20 | pdfkit = "^1.0.0"
21 | opencv-python = "^4.8.1.78"
22 | scikit-image = "^0.25.1"
23 | tenacity = "^8.2.3"
24 | psycopg = {extras = ["binary"], version = "^3.1.18"}
25 | aiohttp = "^3.9.3"
26 | langchain-openai = "^0.2.10"
27 | python-socketio = "^5.11.2"
28 | asyncer = "^0.0.5"
29 | jinja2 = "^3.1.3"
30 | python-i18n = {extras = ["yaml"], version = "^0.3.9"}
31 | python-ulid = "^2.7.0"
32 | pyjwt = "^2.9.0"
33 | pandas = "^2.2.3"
34 | xlsxwriter = "^3.2.0"
35 | openpyxl = "^3.1.5"
36 | consultant-info-generator = {git = "https://github.com/OnepointConsultingLtd/consultant_info_generator.git"}
37 | 
38 | [tool.poetry.scripts]
39 | build-ui = "data_questionnaire_agent.build:ui"
40 | build-check = "data_questionnaire_agent.build:check"
41 | run = "data_questionnaire_agent.server.questionnaire_server_main:run_server"
42 | generate_tokens = "data_questionnaire_agent.service.jwt_token_service:generate_from_file_cmdline"
43 | 
44 | [tool.poetry.group.dev.dependencies]
45 | pytest = "^7.4.2"
46 | black = "^23.11.0"
47 | ruff = "^0.4.8"
48 | pytest-aiohttp = "^1.0.5"
49 | 
50 | [build-system]
51 | requires = ["poetry-core"]
52 | build-backend = "poetry.core.masonry.api"
53 | 
54 | [tool.ruff]
55 | line-length = 120
56 | indent-width = 4
57 | target-version = "py39"
58 | lint.extend-select = ["I"]
59 | 


--------------------------------------------------------------------------------
/run_app.sh:
--------------------------------------------------------------------------------
1 | 
2 | ENV_NAME=data_wellness_agent_staging
3 | source /home/ubuntu/miniconda3/etc/profile.d/conda.sh
4 | conda activate $ENV_NAME
5 | cd /home/ubuntu/projects/data-wellness-companion-staging
6 | 
7 | python ./data_questionnaire_agent/server/questionnaire_server_main.py
8 | 
9 | 


--------------------------------------------------------------------------------
/sql/db_creation_default.sql:
--------------------------------------------------------------------------------
1 | -- Hypergility database
2 | CREATE DATABASE data_wellness_companion
3 |     WITH
4 |     OWNER = postgres
5 |     ENCODING = 'UTF8'
6 |     LOCALE_PROVIDER = 'libc'
7 |     CONNECTION LIMIT = -1
8 |     IS_TEMPLATE = False;


--------------------------------------------------------------------------------
/sql/db_creation_hypergility.sql:
--------------------------------------------------------------------------------
1 | -- Hypergility database
2 | CREATE DATABASE data_wellness_companion_hypergility
3 |     WITH
4 |     OWNER = postgres
5 |     ENCODING = 'UTF8'
6 |     CONNECTION LIMIT = -1
7 |     IS_TEMPLATE = False;


--------------------------------------------------------------------------------
/sql/initial_data.sql:
--------------------------------------------------------------------------------
 1 | -- Add language
 2 | INSERT INTO public.tb_language(language_code) VALUES ('en');
 3 | INSERT INTO public.tb_language(language_code) VALUES ('fa');
 4 | 
 5 | -- Initial question
 6 | INSERT INTO TB_QUESTION(QUESTION, PREFERRED_QUESTION_ORDER, LANGUAGE_ID)
 7 | VALUES('بیشتر نگران کدام ناحیه از اکوسیستم داده خود هستید؟', 1, (SELECT ID FROM TB_LANGUAGE WHERE language_code = 'en'));
 8 | 
 9 | INSERT INTO TB_QUESTION(QUESTION, PREFERRED_QUESTION_ORDER, LANGUAGE_ID)
10 | VALUES('شما نگران کدام بخش از اکوسیستم داده‌های خود هستید؟', 1, (SELECT ID FROM TB_LANGUAGE WHERE language_code = 'fa'));
11 | 
12 | -- Suggestions
13 | INSERT INTO TB_QUESTION_SUGGESTIONS(IMG_SRC, IMG_ALT, TITLE, MAIN_TEXT, QUESTION_ID)
14 | 	VALUES('poor_data_quality.png', 'کیفیت داده ضعیف', 'کیفیت داده ضعیف', '.داده های با کیفیت پایین می تواند منجر به بینش نادرست و تصمیم گیری ضعیف شود', 
15 | 	   (SELECT ID FROM TB_QUESTION WHERE QUESTION = 'بیشتر نگران کدام ناحیه از اکوسیستم داده خود هستید؟'));
16 | 	   
17 | INSERT INTO TB_QUESTION_SUGGESTIONS(IMG_SRC, IMG_ALT, TITLE, MAIN_TEXT, QUESTION_ID)
18 | 	VALUES('compliance_risks.png', 'انطباق و خطرات امنیتی', 'انطباق و خطرات امنیتی', '.سوء استفاده از داده ها می تواند منجر به مشکلات قانونی و آسیب به اعتبار شود', 
19 | 	   (SELECT ID FROM TB_QUESTION WHERE QUESTION = 'Which area of your data ecosys, ()tem are you most concerned about?'));
20 | 
21 | INSERT INTO TB_QUESTION_SUGGESTIONS(IMG_SRC, IMG_ALT, TITLE, MAIN_TEXT, QUESTION_ID)
22 | 	VALUES('data_silos.png', 'سیلوهای داده', 'سیلوهای داده', '.داده های به دام افتاده در سیلوهای دپارتمان می تواند برای سایر قسمت ها غیرقابل دسترسی باشد', 
23 | 	   (SELECT ID FROM TB_QUESTION WHERE QUESTION = 'بیشتر نگران کدام ناحیه از اکوسیستم داده خود هستید؟'));
24 | 
25 | INSERT INTO TB_QUESTION_SUGGESTIONS(IMG_SRC, IMG_ALT, TITLE, MAIN_TEXT, QUESTION_ID)
26 | 	VALUES('lack_of_skilled_personnel.png', 'کمبود پرسنل ماهر', '.کمبود پرسنل ماهر', 'فقدان مهارت در علم داده، تجزیه و تحلیل، هوش مصنوعی و ML می تواند مانع استفاده موثر از داده ها شود.', 
27 | 	   (SELECT ID FROM TB_QUESTION WHERE QUESTION = 'بیشتر نگران کدام ناحیه از اکوسیستم داده خود هستید؟'));
28 | 
29 | INSERT INTO TB_QUESTION_SUGGESTIONS(IMG_SRC, IMG_ALT, TITLE, MAIN_TEXT, QUESTION_ID)
30 | 	VALUES('data_overload.png', 'اضافه بار داده ها', 'اضافه بار داده ها', '."افزایش داده ها" می تواند فرآیندها را کند کند و تشخیص اینکه چه داده هایی واقعا مفید هستند را دشوار می کند', 
31 | 	   (SELECT ID FROM TB_QUESTION WHERE QUESTION = 'بیشتر نگران کدام ناحیه از اکوسیستم داده خود هستید؟'));
32 | 
33 | INSERT INTO TB_QUESTION_SUGGESTIONS(IMG_SRC, IMG_ALT, TITLE, MAIN_TEXT, QUESTION_ID)
34 | 	VALUES('cost_and_complexity.png', 'هزینه و پیچیدگی', 'هزینه و پیچیدگی', '.یک زیرساخت قوی تجزیه و تحلیل داده ها به سرمایه گذاری قابل توجهی در منابع نیاز دارد', 
35 | 	   (SELECT ID FROM TB_QUESTION WHERE QUESTION = 'بیشتر نگران کدام ناحیه از اکوسیستم داده خود هستید؟'));
36 | 
37 | INSERT INTO TB_QUESTION_SUGGESTIONS(IMG_SRC, IMG_ALT, TITLE, MAIN_TEXT, QUESTION_ID)
38 | 	VALUES('inconsistent_data_strategies.png', 'استراتژی های داده ناسازگار', 'استراتژی های داده ناسازگار', '.این موارد سخت است که با مفاهیم مدرن مانند ساختار داده، شبکه و هوش مصنوعی تولیدی هماهنگ شوند', 
39 | 	   (SELECT ID FROM TB_QUESTION WHERE QUESTION = 'بیشتر نگران کدام ناحیه از اکوسیستم داده خود هستید؟'));
40 | 
41 | INSERT INTO TB_QUESTION_SUGGESTIONS(IMG_SRC, IMG_ALT, TITLE, MAIN_TEXT, QUESTION_ID)
42 | 	VALUES('resistence_to_change.png', 'مقاومت در برابر تغییرات', 'مقاومت در برابر تغییرات', '.کارمندان باید خود را با روش‌های جدید کار تطبیق دهند تا تحول مبتنی بر داده‌ها عملی شود', 
43 | 	   (SELECT ID FROM TB_QUESTION WHERE QUESTION = 'بیشتر نگران کدام ناحیه از اکوسیستم داده خود هستید؟'));
44 | 
45 | 
46 | INSERT INTO TB_QUESTION_SUGGESTIONS(IMG_SRC, IMG_ALT, TITLE, MAIN_TEXT, QUESTION_ID)
47 | 	VALUES('document-related-issues.jpg', 'مراحل درخواست پناهندگی', 'مراحل درخواست پناهندگی', 'من علاقه مند هستم در مورد روش درخواست پناهندگی بیشتر بدانم', 
48 | 	   (SELECT ID FROM TB_QUESTION WHERE QUESTION = 'بیشتر نگران کدام ناحیه از اکوسیستم داده خود هستید؟'));
49 | 


--------------------------------------------------------------------------------
/sql/other_queries.sql:
--------------------------------------------------------------------------------
 1 | -- Extract the content of specific sessions
 2 | select final_report, session_id, question, answer from tb_questionnaire_status where session_id in
 3 | (select distinct c.session_id from tb_session_configuration c inner join tb_questionnaire_status s on s.session_id = c.session_id
 4 | where c.config_key = 'session-client-id' 
 5 | and c.config_value in ('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIwMUpEMkpXTkhBNTZGN1lHRENDU1czRjJaQiIsIm5hbWUiOiJHaWwiLCJpYXQiOjE3MzIwMzI0ODR9.r8LTAiuORLPk2QnrS8YMcX7dHdlYKndHuXc3PEY6Msw')
 6 | and s.final_report = true)
 7 | order by id asc;
 8 | 
 9 | -- Get some statistics on sessions
10 | select * from
11 | (select t.email, sc.session_id, count(*), min(created_at), max(created_at) from tb_session_configuration sc
12 | inner join tb_jwt_token t on sc.config_value = t.jwt_token
13 | inner join tb_questionnaire_status qs on qs.session_id = sc.session_id
14 | where t.id > 100
15 | group by sc.session_id, t.email 
16 | order by count(*)) q where max > '2025-01-15';


--------------------------------------------------------------------------------
/sql/verification_queries.sql:
--------------------------------------------------------------------------------
 1 | select min(ordinal), max(ordinal), session_id, question, min(created_at), max(created_at) from
 2 |     (select ROW_NUMBER() OVER (PARTITION BY session_id ORDER BY created_at) ordinal, session_id, question, created_at 
 3 |         from tb_questionnaire_status
 4 |         where session_id in (select distinct session_id from tb_questionnaire_status where final_report is true)) q
 5 | group by session_id, question having count(*) > 1 order by min(created_at);
 6 | 
 7 | -- Check duplicate questions
 8 | select lower(question), count(*) from tb_questionnaire_status where session_id = '01JD748MW7FKMZ2FY5H87WWQ8B' 
 9 | group by lower(question) having count(*) > 1;
10 | 
11 | -- Select all questions in session
12 | select question from tb_questionnaire_status where session_id = '01JD748MW7FKMZ2FY5H87WWQ8B';


--------------------------------------------------------------------------------
/start.ps1:
--------------------------------------------------------------------------------
1 | conda activate data_wellness_agent
2 | $env:OPENAI_MODEL='gpt-4o'
3 | python .\data_questionnaire_agent\server\questionnaire_server_main.py


--------------------------------------------------------------------------------
/start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Activate environment
 4 | . venv/bin/activate;
 5 | 
 6 | # Install all libraries in the right environment
 7 | ./venv/bin/poetry install
 8 | 
 9 | # Build the UI
10 | echo Building the User Interface
11 | poetry run build-ui
12 | 
13 | # Build the database if requested
14 | echo Building database
15 | python ./data_questionnaire_agent/bootstrap/sql_script.py ./sql/db_setup.sql
16 | 
17 | # Run the main application
18 | /app/venv/bin/python ./data_questionnaire_agent/server/questionnaire_server_main.py
19 | # /app/venv/bin/python ./data_questionnaire_agent/toml_support.py


--------------------------------------------------------------------------------
/templates/consultants-template.md:
--------------------------------------------------------------------------------
 1 | {% for consultant in consultants %}
 2 | # Consultant: {{consultant.given_name}} {{consultant.surname}}
 3 | 
 4 | Email:    {{consultant.email}}<br/>
 5 | Location: {{consultant.geo_location}}<br/>
 6 | Industry: {{consultant.industry_name}}<br/>
 7 | linkedin_profile: [{{consultant.linkedin_profile_url}}](https://www.linkedin.com/in/{{consultant.linkedin_profile_url}})
 8 | 
 9 | ## Curriculum Vitae
10 | 
11 | {{consultant.cv}}
12 | 
13 | ## Experience
14 | 
15 | {% for experience in consultant.experiences %}
16 | ### {{experience.title}}
17 | 
18 | Company: {{experience.company.name}}<br />
19 | {% if experience.location is not none %}Location: {{experience.location}}<br />{% endif %}
20 | From:     {{experience.start.strftime('%Y-%m-%d')}}<br />
21 | {% if experience.end is not none %}To:       {{experience.end.strftime('%Y-%m-%d')}}<br />{% endif %}
22 | 
23 | {% endfor %}
24 | ## Skills
25 | 
26 | {% for skill in consultant.skills %}
27 | - {{skill.name}}
28 | {% endfor %}
29 | 
30 | {% endfor %}


--------------------------------------------------------------------------------
/templates/mail-template.html:
--------------------------------------------------------------------------------
 1 | <table border="0" cellpadding="0" cellspacing="0" align="center"
 2 |     style="font-size:13px;color:#39394d;font-family:Arial;background-color:#f4f5f9;width:100%;height:100%;padding-bottom:10px; border: 0">
 3 |     <tbody>
 4 |         <tr>
 5 |             <td style="vertical-align:top">
 6 |                 <table border="0" cellpadding="0" cellspacing="0" align="center" style="max-width:1024px;">
 7 |                     <tbody>
 8 |                         <tr>
 9 |                             <td style="vertical-align:top;padding:0px;background-color:#f4f5f9">
10 |                                 <table border="0" cellpadding="0" cellspacing="0" align="center"
11 |                                     style="margin-left:0px;margin-right:0px;width:100%;background-color:#ffffff;padding-bottom:37px">
12 |                                     <tbody>
13 |                                         <tr>
14 |                                             <td style="padding: 2em 2em">
15 |                                                 {text}
16 |                                             </td>
17 |                                         </tr>
18 |                                     </tbody>
19 |                                 </table>
20 |                             </td>
21 |                         </tr>
22 |                     </tbody>
23 |                 </table>
24 |             </td>
25 |         </tr>
26 |     </tbody>
27 | </table>


--------------------------------------------------------------------------------
/templates/results-template.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | 
  4 | <head>
  5 |     <meta charset="UTF-8">
  6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7 |     <title>{{title}}</title>
  8 |     <link rel="preconnect" href="https://fonts.googleapis.com">
  9 |     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
 10 |     <link
 11 |         href="https://fonts.googleapis.com/css2?family=Bodoni+Moda:ital,wght@0,400;0,500;1,400;1,500&family=Montserrat:wght@500&family=Sen:wght@400;700&display=swap"
 12 |         rel="stylesheet">
 13 |     <link href="https://fonts.googleapis.com/css2?family=Bodoni+Moda:ital,wght@0,400;0,500;1,400;1,500&family=Lato&family=Montserrat:wght@500&family=Sen:wght@400;700&display=swap" rel="stylesheet">
 14 |     <style>
 15 |         body {
 16 |             font-family: 'Lato', 'Montserrat', sans-serif;
 17 |             margin: 0;
 18 |         }
 19 | 
 20 |         .banner {
 21 |             width: 100%;
 22 |             height: auto;
 23 |         }
 24 | 
 25 |         .container {
 26 |             margin: 2rem;
 27 |             display: flex;
 28 |             flex-direction: column;
 29 |             gap: 1rem;
 30 |             box-sizing: border-box;
 31 |         }        
 32 | 
 33 |         .date-produced {
 34 |             font-size: 0.75rem;
 35 |             line-height: 1rem;
 36 |         }
 37 | 
 38 |         p, ul {
 39 |             font-size: 1rem;
 40 |             line-height: 1.5rem;
 41 |             margin: 0;
 42 |             padding-left: .75rem;
 43 |         }
 44 | 
 45 |         h2 {
 46 |             font-size: 1.25rem;
 47 |             line-height: 1.75rem;
 48 |             border-bottom: solid 1px;
 49 |             border-image: linear-gradient(to right, #2382C3 , #82D6AF);
 50 |             border-image-slice: 1;
 51 |             padding: .75rem;
 52 |             margin-bottom: 0;
 53 |         }
 54 | 
 55 |         table {
 56 |             font-size: 1rem;
 57 |             line-height: 1.5rem;
 58 |             padding: .75rem;
 59 |             margin: 0;
 60 |         }
 61 | 
 62 |         a {
 63 |             color: #0084d7;
 64 |         }
 65 | 
 66 |     </style>
 67 | </head>
 68 | 
 69 | <body>
 70 |     <img src="{{banner}}" class="banner"/>
 71 |     <div class="container">
 72 |         
 73 |         <p class="date-produced">{{produced_on}}: {{timestamp}}</p>
 74 |         <p>{{big_thank_you}}</p>
 75 |         <p>{{intro_advice}} </p>
 76 |         <h2 class="questionnaire">{{title_transcript}}</h2>
 77 |         <table>
 78 |             <tbody>
 79 |                 {{questionnaire}}
 80 |             </tbody>
 81 |         </table>
 82 |         <h2 class="recommendations">{{title_what_to_do}}</h2>
 83 |         <ul>
 84 |             {{advices}}
 85 |         </ul>
 86 |         <h2 class="recommendations">{{title_what_to_avoid}}</h2>
 87 |         <ul>
 88 |             {{avoids}}
 89 |         </ul>
 90 |         <h2 class="recommendations">{{title_potential_outcomes}}</h2>
 91 |         <ul>
 92 |             {{positive_outcomes}}
 93 |         </ul>
 94 |         <h2 class="confidence">{{title_confidence}}</h2>
 95 |         {{confidence}}
 96 |         <h2 class="personalOffer">{{personal_offer}}</h2>
 97 |         <p>{{offering_long}}</p>
 98 |         <p>{{love_feedback}}: <a href="mailto:feedback@onepointltd.ai">feedback@onepointltd.ai</a>.</p>
 99 |         <p>{{for_more_info}}</p>
100 |     </div>
101 | </body>
102 | 
103 | </html>


--------------------------------------------------------------------------------