├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── compose.yaml
├── data
    ├── en_agents.json
    ├── en_calls_batch.parquet
    ├── en_clients.json
    ├── en_conversations.zip
    ├── en_dataframe.parquet
    ├── en_ground_truths.parquet
    ├── en_metadata.parquet
    ├── es_agents.json
    ├── es_calls_batch.parquet
    ├── es_clients.json
    ├── es_conversations.zip
    ├── es_dataframe.parquet
    ├── es_ground_truths.parquet
    ├── es_metadata.parquet
    └── sqlite.db
├── dev-requirements.txt
├── example_data
    ├── agents.zip
    ├── batch_creation
    │   └── calls_batch.zip
    ├── clients.zip
    ├── conversation_generation
    │   ├── conversations.zip
    │   ├── ground_truths.zip
    │   └── metadata.zip
    └── text_to_audio
    │   ├── audio_files.zip
    │   └── dataframe.zip
├── images
    ├── call-center-readme.png
    └── call-center-workflow.png
├── mlrun.env
├── notebook.ipynb
├── project_setup.py
├── pyproject.toml
├── requirements.txt
├── setup.py
├── src
    ├── __init__.py
    ├── calls_analysis
    │   ├── __init__.py
    │   ├── db_management.py
    │   └── postprocessing.py
    ├── calls_generation
    │   ├── __init__.py
    │   ├── conversations_generator.py
    │   └── skip.py
    ├── common.py
    ├── vizro.py
    └── workflows
    │   ├── __init__.py
    │   ├── calls_analysis.py
    │   └── calls_generation.py
└── vizro
    ├── app.py
    ├── assets
        └── vizro_dashboard_styles.css
    ├── custom_charts.py
    └── custom_components.py


/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM mlrun/mlrun-gpu:1.7.0
 2 | 
 3 | # Update apt-get to install ffmpeg (support audio file formats):
 4 | RUN apt-get update -y
 5 | RUN apt-get install ffmpeg -y
 6 | 
 7 | # Install demo requirements:
 8 | 
 9 | RUN pip install transformers==4.44.1
10 | RUN pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118
11 | RUN pip install bitsandbytes==0.41.1 accelerate==0.24.1 datasets==2.14.6 peft==0.5.0 optimum==1.13.2
12 | RUN pip install auto-gptq==0.4.2 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
13 | RUN pip install langchain==0.0.327 openai==0.28.1
14 | RUN pip install git+https://github.com/suno-ai/bark.git
15 | RUN pip install streamlit==1.28.0 st-annotated-text==4.0.1 spacy==3.7.2 librosa==0.10.1 presidio-anonymizer==2.2.34 presidio-analyzer==2.2.34 nltk==3.8.1 flair==0.13.0
16 | RUN python -m spacy download en_core_web_lg
17 | RUN pip install -U SQLAlchemy
18 | 
19 | # Align onnxruntime to use gpu:
20 | RUN pip uninstall -y onnxruntime-gpu
21 | RUN pip uninstall -y onnxruntime
22 | RUN pip install onnxruntime-gpu
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | PYTHON_INTERPRETER = python3
 3 | SHARED_DIR ?= ~/mlrun-data
 4 | MLRUN_TAG ?= 1.4.0
 5 | HOST_IP ?=$$(ip route get 1.2.3.4 | awk '{print $$7}')
 6 | CONDA_ENV ?= mlrun
 7 | SHELL=/bin/bash
 8 | CONDA_PY_VER ?= 3.9
 9 | CONDA_ACTIVATE = source $$(conda info --base)/etc/profile.d/conda.sh ; conda activate ; conda activate
10 | 
11 | #################################################################################
12 | # COMMANDS                                                                      #
13 | #################################################################################
14 | 
15 | .PHONY: help
16 | help: ## Display available commands
17 | 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
18 | 
19 | .PHONY: all
20 | all:
21 | 	$(error please pick a target)
22 | 
23 | .PHONY: install-requirements
24 | install-requirements: ## Install all requirements needed for development
25 | 	$(PYTHON_INTERPRETER) -m pip install -r requirements.txt -r dev-requirements.txt
26 | 
27 | 
28 | .PHONY: package-wheel
29 | package-wheel: clean ## Build python package wheel
30 | 	python setup.py bdist_wheel
31 | 
32 | .PHONY: clean
33 | clean: ## Clean python package build artifacts
34 | 	rm -rf build
35 | 	rm -rf dist
36 | 	find . -type f -name "*.py[co]" -delete
37 | 	find . -type d -name "__pycache__" -delete
38 | 
39 | .PHONY: fmt
40 | fmt: ## Format the code (using black and isort)
41 | 	@echo "Running black fmt..."
42 | 	$(PYTHON_INTERPRETER) -m black src
43 | 	$(PYTHON_INTERPRETER) -m isort src
44 | 
45 | .PHONY: lint
46 | lint: fmt-check flake8 ## Run lint on the code
47 | 
48 | .PHONY: fmt-check
49 | fmt-check: ## Format and check the code (using black and isort)
50 | 	@echo "Running black+isort fmt check..."
51 | 	$(PYTHON_INTERPRETER) -m black --check --diff src
52 | 	$(PYTHON_INTERPRETER) -m isort --check --diff src
53 | 
54 | .PHONY: flake8
55 | flake8: ## Run flake8 lint
56 | 	@echo "Running flake8 lint..."
57 | 	$(PYTHON_INTERPRETER) -m flake8 src
58 | 
59 | .PHONY: mlrun-docker
60 | mlrun-docker: ## Start MLRun & Nuclio containers (using Docker compose)
61 | 	mkdir $(SHARED_DIR) -p
62 | 	@echo "HOST_IP=$(HOST_IP)" > .env
63 | 	SHARED_DIR=$(SHARED_DIR) TAG=$(MLRUN_TAG) docker-compose -f compose.yaml up -d
64 | 	@echo "use docker-compose stop / logs commands to stop or view logs"
65 | 
66 | .PHONY: mlrun-api
67 | mlrun-api: ## Run MLRun DB locally (as process)
68 | 	@echo "Installing MLRun API dependencies ..."
69 | 	$(PYTHON_INTERPRETER) -m pip install uvicorn~=0.17.0 dask-kubernetes~=0.11.0 apscheduler~=3.6 sqlite3-to-mysql~=1.4
70 | 	@echo "Starting local mlrun..."
71 | 	MLRUN_ARTIFACT_PATH=$$(realpath ./artifacts) MLRUN_ENV_FILE= mlrun db -b
72 | 
73 | .PHONY: conda-env
74 | conda-env: ## Create a conda environment
75 | 	@echo "Creating new conda environment $(CONDA_ENV)..."
76 | 	conda create -n $(CONDA_ENV) -y python=$(CONDA_PY_VER) ipykernel graphviz pip
77 | 	test -s ./mlrun.env && conda env config vars set -n $(CONDA_ENV) MLRUN_ENV_FILE=$$(realpath ./mlrun.env)
78 | 	@echo "Installing requirements.txt..."
79 | 	$(CONDA_ACTIVATE) $(CONDA_ENV); pip install -r requirements.txt
80 | 	@echo -e "\nTo run mlrun API as a local process type:\n  conda activate $(CONDA_ENV) && make mlrun-api"


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # <img src="https://uxwing.com/wp-content/themes/uxwing/download/business-professional-services/boy-services-support-icon.png" style="height: 40px"/> MLRun's Call Center Demo
 2 | 
 3 | <img src="./images/call-center-readme.png" alt="huggingface-mlrun" style="width: 600px"/>
 4 | 
 5 | This demo showcases how to use LLMs to turn audio files from call center conversations between customers and agents into valuable data, all in a single workflow orchestrated by MLRun.
 6 | 
 7 | MLRun automates the entire workflow, auto-scales resources as needed, and automatically logs and parses values between the different workflow steps.
 8 | 
 9 | By the end of this demo you will see the potential power of LLMs for feature extraction, and how easily you can do this with MLRun!
10 | 
11 | This demo uses:
12 | * [**OpenAI's Whisper**](https://openai.com/research/whisper) &mdash; To transcribe the audio calls into text.
13 | * [**Flair**](https://flairnlp.github.io/) and [**Microsoft's Presidio**](https://microsoft.github.io/presidio/) - To recognize PII so it can be filtered out.
14 | * [**HuggingFace**](https://huggingface.co/) &mdash; The main machine-learning framework to get the model and tokenizer for the features extraction. 
15 | * and [**MLRun**](https://www.mlrun.org/) &mdash; as the orchestrator to operationalize the workflow.
16 | 
17 | The demo contains a single [notebook](./notebook.ipynb) that encompasses the entire demo.
18 | 
19 | 
20 | Most of the functions are imported from [MLRun's function hub](https://docs.mlrun.org/en/stable/runtimes/load-from-hub.html), which contains a wide range of functions that can be used for a variety of use cases. All functions used in the demo include links to their source in the hub. All of the python source code is under [/src](./src).
21 | Enjoy!
22 | 
23 | ___
24 | <a id="installation"></a>
25 | ## Installation
26 | 
27 | This project can run in different development environments:
28 | * Local computer (using PyCharm, VSCode, Jupyter, etc.)
29 | * Inside GitHub Codespaces 
30 | * Other managed Jupyter environments
31 | 
32 | ### Install the code and the mlrun client 
33 | 
34 | To get started, fork this repo into your GitHub account and clone it into your development environment.
35 | 
36 | To install the package dependencies (not required in GitHub codespaces) use:
37 |  
38 |     make install-requirements
39 |     
40 | If you prefer to use Conda, use this instead (to create and configure a conda env):
41 | 
42 |     make conda-env
43 | 
44 | > Make sure you open the notebooks and select the `mlrun` conda environment 
45 |  
46 | ### Install or connect to the MLRun service/cluster
47 | 
48 | The MLRun service and computation can run locally (minimal setup) or over a remote Kubernetes environment.
49 | 
50 | If your development environment supports Docker and there are sufficient CPU resources, run:
51 | 
52 |     make mlrun-docker
53 |     
54 | > MLRun UI can be viewed in: http://localhost:8060
55 |     
56 | If your environment is minimal, run mlrun as a process (no UI):
57 | 
58 |     [conda activate mlrun &&] make mlrun-api
59 |  
60 | For MLRun to run properly you should set your client environment. This is not required when using **codespaces**, the mlrun **conda** environment, or **iguazio** managed notebooks.
61 | 
62 | Your environment should include `MLRUN_ENV_FILE=<absolute path to the ./mlrun.env file> ` (point to the mlrun .env file 
63 | in this repo); see [mlrun client setup](https://docs.mlrun.org/en/latest/install/remote.html) instructions for details.  
64 |      
65 | > Note: You can also use a remote MLRun service (over Kubernetes): instead of starting a local mlrun: 
66 | > edit the [mlrun.env](./mlrun.env) and specify its address and credentials.
67 | 


--------------------------------------------------------------------------------
/compose.yaml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   init_nuclio:
 3 |     image: alpine:3.16
 4 |     command:
 5 |       - "/bin/sh"
 6 |       - "-c"
 7 |       - |
 8 |         mkdir -p /etc/nuclio/config/platform; \
 9 |         cat << EOF | tee /etc/nuclio/config/platform/platform.yaml
10 |         runtime:
11 |           common:
12 |             env:
13 |               MLRUN_DBPATH: http://${HOST_IP:?err}:8080
14 |         local:
15 |           defaultFunctionContainerNetworkName: mlrun
16 |           defaultFunctionRestartPolicy:
17 |             name: always
18 |             maxRetryCount: 0
19 |           defaultFunctionVolumes:
20 |             - volume:
21 |                 name: mlrun-stuff
22 |                 hostPath:
23 |                   path: ${SHARED_DIR:?err}
24 |               volumeMount:
25 |                 name: mlrun-stuff
26 |                 mountPath: /home/jovyan/data/
27 |         logger:
28 |           sinks:
29 |             myStdoutLoggerSink:
30 |               kind: stdout
31 |           system:
32 |             - level: debug
33 |               sink: myStdoutLoggerSink
34 |           functions:
35 |             - level: debug
36 |               sink: myStdoutLoggerSink
37 |         EOF
38 |     volumes:
39 |       - nuclio-platform-config:/etc/nuclio/config
40 | 
41 |   mlrun-api:
42 |     image: "mlrun/mlrun-api:${TAG:-1.1.2}"
43 |     ports:
44 |       - "8080:8080"
45 |     environment:
46 |       MLRUN_ARTIFACT_PATH: "${SHARED_DIR}/{{project}}"
47 |       # using local storage, meaning files / artifacts are stored locally, so we want to allow access to them
48 |       MLRUN_HTTPDB__REAL_PATH: /data
49 |       MLRUN_HTTPDB__DATA_VOLUME: "${SHARED_DIR}"
50 |       MLRUN_LOG_LEVEL: DEBUG
51 |       MLRUN_NUCLIO_DASHBOARD_URL: http://nuclio:8070
52 |       MLRUN_HTTPDB__DSN: "sqlite:////data/mlrun.db?check_same_thread=false"
53 |       MLRUN_UI__URL: http://localhost:8060
54 |       # not running on k8s meaning no need to store secrets
55 |       MLRUN_SECRET_STORES__KUBERNETES__AUTO_ADD_PROJECT_SECRETS: "false"
56 |       # let mlrun control nuclio resources
57 |       MLRUN_HTTPDB__PROJECTS__FOLLOWERS: "nuclio"
58 |     volumes:
59 |       - "${SHARED_DIR:?err}:/data"
60 |     networks:
61 |       - mlrun
62 | 
63 |   mlrun-ui:
64 |     image: "mlrun/mlrun-ui:${TAG:-1.1.2}"
65 |     ports:
66 |       - "8060:8090"
67 |     environment:
68 |       MLRUN_API_PROXY_URL: http://mlrun-api:8080
69 |       MLRUN_NUCLIO_MODE: enable
70 |       MLRUN_NUCLIO_API_URL: http://nuclio:8070
71 |       MLRUN_NUCLIO_UI_URL: http://localhost:8070
72 |     networks:
73 |       - mlrun
74 | 
75 |   nuclio:
76 |     image: "quay.io/nuclio/dashboard:${NUCLIO_TAG:-stable-amd64}"
77 |     ports:
78 |       - "8070:8070"
79 |     environment:
80 |       NUCLIO_DASHBOARD_EXTERNAL_IP_ADDRESSES: "${HOST_IP:?err}"
81 |     volumes:
82 |       - /var/run/docker.sock:/var/run/docker.sock
83 |       - nuclio-platform-config:/etc/nuclio/config
84 |     depends_on:
85 |       - init_nuclio
86 |     networks:
87 |       - mlrun
88 | 
89 | volumes:
90 |   nuclio-platform-config: {}
91 | 
92 | networks:
93 |   mlrun:
94 |     name: mlrun
95 | 


--------------------------------------------------------------------------------
/data/en_agents.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "first_name": "Michael",
 4 |         "last_name": "Johnson",
 5 |         "agent_id": "A001"
 6 |     },
 7 |     {
 8 |         "first_name": "Emma",
 9 |         "last_name": "Williams",
10 |         "agent_id": "A002"
11 |     },
12 |     {
13 |         "first_name": "Daniel",
14 |         "last_name": "Miller",
15 |         "agent_id": "A003"
16 |     },
17 |     {
18 |         "first_name": "Sophia",
19 |         "last_name": "Brown",
20 |         "agent_id": "A004"
21 |     },
22 |     {
23 |         "first_name": "David",
24 |         "last_name": "Davis",
25 |         "agent_id": "A005"
26 |     },
27 |     {
28 |         "first_name": "Olivia",
29 |         "last_name": "Garcia",
30 |         "agent_id": "A006"
31 |     },
32 |     {
33 |         "first_name": "James",
34 |         "last_name": "Rodriguez",
35 |         "agent_id": "A007"
36 |     },
37 |     {
38 |         "first_name": "Mia",
39 |         "last_name": "Martinez",
40 |         "agent_id": "A008"
41 |     },
42 |     {
43 |         "first_name": "John",
44 |         "last_name": "Hernandez",
45 |         "agent_id": "A009"
46 |     },
47 |     {
48 |         "first_name": "Isabella",
49 |         "last_name": "Lopez",
50 |         "agent_id": "A010"
51 |     }
52 | ]


--------------------------------------------------------------------------------
/data/en_calls_batch.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/data/en_calls_batch.parquet


--------------------------------------------------------------------------------
/data/en_clients.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "first_name": "Emily",
 4 |         "last_name": "Smith",
 5 |         "phone_number": "123-456-7890",
 6 |         "email": "emilysmith@example.com",
 7 |         "client_id": "12345"
 8 |     },
 9 |     {
10 |         "first_name": "John",
11 |         "last_name": "Doe",
12 |         "phone_number": "098-765-4321",
13 |         "email": "johndoe@example.com",
14 |         "client_id": "67890"
15 |     },
16 |     {
17 |         "first_name": "Jane",
18 |         "last_name": "Doe",
19 |         "phone_number": "456-789-0123",
20 |         "email": "janedoe@example.com",
21 |         "client_id": "23456"
22 |     },
23 |     {
24 |         "first_name": "Robert",
25 |         "last_name": "Johnson",
26 |         "phone_number": "789-012-3456",
27 |         "email": "robertjohnson@example.com",
28 |         "client_id": "78901"
29 |     },
30 |     {
31 |         "first_name": "Mary",
32 |         "last_name": "Davis",
33 |         "phone_number": "012-345-6789",
34 |         "email": "marydavis@example.com",
35 |         "client_id": "34567"
36 |     },
37 |     {
38 |         "first_name": "James",
39 |         "last_name": "Miller",
40 |         "phone_number": "987-654-3210",
41 |         "email": "jamesmiller@example.com",
42 |         "client_id": "89012"
43 |     },
44 |     {
45 |         "first_name": "Patricia",
46 |         "last_name": "Wilson",
47 |         "phone_number": "654-321-0987",
48 |         "email": "patriciawilson@example.com",
49 |         "client_id": "45678"
50 |     },
51 |     {
52 |         "first_name": "Michael",
53 |         "last_name": "Moore",
54 |         "phone_number": "321-098-7654",
55 |         "email": "michaelmoore@example.com",
56 |         "client_id": "90123"
57 |     },
58 |     {
59 |         "first_name": "Elizabeth",
60 |         "last_name": "Taylor",
61 |         "phone_number": "234-567-8901",
62 |         "email": "elizabethtaylor@example.com",
63 |         "client_id": "56789"
64 |     },
65 |     {
66 |         "first_name": "David",
67 |         "last_name": "Anderson",
68 |         "phone_number": "567-890-1234",
69 |         "email": "davidanderson@example.com",
70 |         "client_id": "23459"
71 |     }
72 | ]


--------------------------------------------------------------------------------
/data/en_conversations.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/data/en_conversations.zip


--------------------------------------------------------------------------------
/data/en_dataframe.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/data/en_dataframe.parquet


--------------------------------------------------------------------------------
/data/en_ground_truths.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/data/en_ground_truths.parquet


--------------------------------------------------------------------------------
/data/en_metadata.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/data/en_metadata.parquet


--------------------------------------------------------------------------------
/data/es_agents.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "first_name": "Carlos",
 4 |         "last_name": "Gomez",
 5 |         "agent_id": "A5432"
 6 |     },
 7 |     {
 8 |         "first_name": "Marta",
 9 |         "last_name": "Rodriguez",
10 |         "agent_id": "B7658"
11 |     },
12 |     {
13 |         "first_name": "Francisco",
14 |         "last_name": "Lopez",
15 |         "agent_id": "C3421"
16 |     },
17 |     {
18 |         "first_name": "Ana",
19 |         "last_name": "Perez",
20 |         "agent_id": "D5463"
21 |     },
22 |     {
23 |         "first_name": "Luis",
24 |         "last_name": "Martinez",
25 |         "agent_id": "E7654"
26 |     },
27 |     {
28 |         "first_name": "Maria",
29 |         "last_name": "Hernandez",
30 |         "agent_id": "F3214"
31 |     },
32 |     {
33 |         "first_name": "Pedro",
34 |         "last_name": "Gonzalez",
35 |         "agent_id": "G9876"
36 |     },
37 |     {
38 |         "first_name": "Josefa",
39 |         "last_name": "Ramirez",
40 |         "agent_id": "H6543"
41 |     },
42 |     {
43 |         "first_name": "Antonio",
44 |         "last_name": "Sanchez",
45 |         "agent_id": "I4321"
46 |     },
47 |     {
48 |         "first_name": "Isabel",
49 |         "last_name": "Torres",
50 |         "agent_id": "J7658"
51 |     }
52 | ]


--------------------------------------------------------------------------------
/data/es_calls_batch.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/data/es_calls_batch.parquet


--------------------------------------------------------------------------------
/data/es_clients.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "first_name": "Carlos",
 4 |         "last_name": "Gomez",
 5 |         "phone_number": "678-234-5678",
 6 |         "email": "CarlosGomez@email.com",
 7 |         "client_id": "ID001"
 8 |     },
 9 |     {
10 |         "first_name": "Maria",
11 |         "last_name": "Hernandez",
12 |         "phone_number": "789-345-6789",
13 |         "email": "MariaHernandez@email.com",
14 |         "client_id": "ID002"
15 |     },
16 |     {
17 |         "first_name": "Luis",
18 |         "last_name": "Rodriguez",
19 |         "phone_number": "890-456-7890",
20 |         "email": "LuisRodriguez@email.com",
21 |         "client_id": "ID003"
22 |     },
23 |     {
24 |         "first_name": "Ana",
25 |         "last_name": "Sanchez",
26 |         "phone_number": "901-567-8901",
27 |         "email": "AnaSanchez@email.com",
28 |         "client_id": "ID004"
29 |     },
30 |     {
31 |         "first_name": "Jose",
32 |         "last_name": "Martinez",
33 |         "phone_number": "012-678-9012",
34 |         "email": "JoseMartinez@email.com",
35 |         "client_id": "ID005"
36 |     },
37 |     {
38 |         "first_name": "Isabel",
39 |         "last_name": "Lopez",
40 |         "phone_number": "123-789-0123",
41 |         "email": "IsabelLopez@email.com",
42 |         "client_id": "ID006"
43 |     },
44 |     {
45 |         "first_name": "Miguel",
46 |         "last_name": "Gonzalez",
47 |         "phone_number": "234-890-1234",
48 |         "email": "MiguelGonzalez@email.com",
49 |         "client_id": "ID007"
50 |     },
51 |     {
52 |         "first_name": "Sofia",
53 |         "last_name": "Perez",
54 |         "phone_number": "345-901-2345",
55 |         "email": "SofiaPerez@email.com",
56 |         "client_id": "ID008"
57 |     },
58 |     {
59 |         "first_name": "Antonio",
60 |         "last_name": "Ramirez",
61 |         "phone_number": "456-012-3456",
62 |         "email": "AntonioRamirez@email.com",
63 |         "client_id": "ID009"
64 |     },
65 |     {
66 |         "first_name": "Carmen",
67 |         "last_name": "Torres",
68 |         "phone_number": "567-123-4567",
69 |         "email": "CarmenTorres@email.com",
70 |         "client_id": "ID010"
71 |     }
72 | ]


--------------------------------------------------------------------------------
/data/es_conversations.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/data/es_conversations.zip


--------------------------------------------------------------------------------
/data/es_dataframe.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/data/es_dataframe.parquet


--------------------------------------------------------------------------------
/data/es_ground_truths.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/data/es_ground_truths.parquet


--------------------------------------------------------------------------------
/data/es_metadata.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/data/es_metadata.parquet


--------------------------------------------------------------------------------
/data/sqlite.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/data/sqlite.db


--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
1 | pytest~=5.4
2 | black~=24.8
3 | isort~=5.7
4 | flake8~=5.0
5 | 


--------------------------------------------------------------------------------
/example_data/agents.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/example_data/agents.zip


--------------------------------------------------------------------------------
/example_data/batch_creation/calls_batch.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/example_data/batch_creation/calls_batch.zip


--------------------------------------------------------------------------------
/example_data/clients.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/example_data/clients.zip


--------------------------------------------------------------------------------
/example_data/conversation_generation/conversations.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/example_data/conversation_generation/conversations.zip


--------------------------------------------------------------------------------
/example_data/conversation_generation/ground_truths.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/example_data/conversation_generation/ground_truths.zip


--------------------------------------------------------------------------------
/example_data/conversation_generation/metadata.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/example_data/conversation_generation/metadata.zip


--------------------------------------------------------------------------------
/example_data/text_to_audio/audio_files.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/example_data/text_to_audio/audio_files.zip


--------------------------------------------------------------------------------
/example_data/text_to_audio/dataframe.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/example_data/text_to_audio/dataframe.zip


--------------------------------------------------------------------------------
/images/call-center-readme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/images/call-center-readme.png


--------------------------------------------------------------------------------
/images/call-center-workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-call-center/fac6cc4a5661ba469c28638a97af401a666ab031/images/call-center-workflow.png


--------------------------------------------------------------------------------
/mlrun.env:
--------------------------------------------------------------------------------
 1 | # default env vars, will be loaded once MLRun imports/starts
 2 | # write here remote cluster credentials, addresses, etc.
 3 | # uncomment the relevant lines and set with proper parameters
 4 | 
 5 | # local/remote MLRun service address
 6 | MLRUN_DBPATH=http://localhost:8080
 7 | 
 8 | # if Nuclio not detected simulate it with mock
 9 | MLRUN_MOCK_NUCLIO_DEPLOYMENT=auto
10 | 
11 | # Iguazio cluster and V3IO credentials (for remote cluster)
12 | # V3IO_USERNAME=<user>
13 | # V3IO_ACCESS_KEY=<access-key>
14 | 
15 | # AWS S3/services credentials
16 | # AWS_ACCESS_KEY_ID=<key-id>
17 | # AWS_SECRET_ACCESS_KEY=<access-key>
18 | 
19 | # The Azure connection string which points at a storage account. For example:
20 | #   DefaultEndpointsProtocol=https;AccountName=myAcct;AccountKey=XXXX;EndpointSuffix=core.windows.net
21 | # AZURE_STORAGE_CONNECTION_STRING=<connection-string>
22 | 


--------------------------------------------------------------------------------
/project_setup.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Iguazio
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #   http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import os
 15 | from pathlib import Path
 16 | import boto3
 17 | import mlrun
 18 | 
 19 | from src.calls_analysis.db_management import create_tables
 20 | from src.common import ProjectSecrets
 21 | 
 22 | CE_MODE = mlrun.mlconf.is_ce_mode()
 23 | 
 24 | def setup(
 25 |     project: mlrun.projects.MlrunProject,
 26 | ) -> mlrun.projects.MlrunProject:
 27 |     """
 28 |     Creating the project for the demo. This function is expected to call automatically when calling the function
 29 |     `mlrun.get_or_create_project`.
 30 | 
 31 |     :param project: The project to set up.
 32 | 
 33 |     :returns: A fully prepared project for this demo.
 34 |     """
 35 |     # Unpack secrets from environment variables:
 36 |     openai_key = os.getenv(ProjectSecrets.OPENAI_API_KEY)
 37 |     openai_base = os.getenv(ProjectSecrets.OPENAI_API_BASE)
 38 |     mysql_url = os.getenv(ProjectSecrets.MYSQL_URL, "")
 39 | 
 40 |     # Unpack parameters:
 41 |     source = project.get_param(key="source")
 42 |     default_image = project.get_param(key="default_image", default=None)
 43 |     build_image = project.get_param(key="build_image", default=False)
 44 |     gpus = project.get_param(key="gpus", default=0)
 45 |     node_name = project.get_param(key="node_name", default=None)
 46 |     node_selector = project.get_param(key="node_selector", default=None)
 47 |     use_sqlite = project.get_param(key="use_sqlite", default=False)
 48 | 
 49 |     # Update sqlite data:
 50 |     if use_sqlite:
 51 |         # uploading db file to s3:
 52 |         if CE_MODE:
 53 |             s3 = boto3.client("s3") if not os.getenv("S3_ENDPOINT_URL") else boto3.client('s3', endpoint_url=os.getenv("S3_ENDPOINT_URL"))
 54 |             bucket_name = Path(mlrun.mlconf.artifact_path).parts[1]
 55 |             # Upload the file
 56 |             s3.upload_file(
 57 |                 Filename="data/sqlite.db",
 58 |                 Bucket=bucket_name,
 59 |                 Key="sqlite.db",
 60 |             )
 61 |             os.environ["S3_BUCKET_NAME"] = bucket_name
 62 |         else:
 63 |             os.environ["MYSQL_URL"] = f"sqlite:///{os.path.abspath('.')}/data/sqlite.db"
 64 |             mysql_url = os.environ["MYSQL_URL"]
 65 | 
 66 |     # Set the project git source:
 67 |     if source:
 68 |         print(f"Project Source: {source}")
 69 |         project.set_source(source=source, pull_at_runtime=True)
 70 | 
 71 |     # Set default image:
 72 |     if default_image:
 73 |         project.set_default_image(default_image)
 74 | 
 75 |     # Build the image:
 76 |     if build_image:
 77 |         print("Building default image for the demo:")
 78 |         _build_image(project=project, with_gpu=gpus)
 79 | 
 80 |     # Set the secrets:
 81 |     _set_secrets(
 82 |         project=project,
 83 |         openai_key=openai_key,
 84 |         openai_base=openai_base,
 85 |         mysql_url=mysql_url,
 86 |         bucket_name=os.getenv(ProjectSecrets.S3_BUCKET_NAME),
 87 |     )
 88 | 
 89 |     # Refresh MLRun hub to the most up-to-date version:
 90 |     mlrun.get_run_db().get_hub_catalog(source_name="default", force_refresh=True)
 91 | 
 92 |     # Set the functions:
 93 |     _set_calls_generation_functions(project=project, node_name=node_name)
 94 |     _set_calls_analysis_functions(project=project, gpus=gpus, node_name=node_name, node_selector=node_selector)
 95 | 
 96 |     # Set the workflows:
 97 |     _set_workflows(project=project)
 98 | 
 99 |     # Set UI application:
100 |     app = project.set_function(
101 |         name="call-center-ui",
102 |         kind="application",
103 |         requirements=["vizro==0.1.38", "gunicorn"]
104 |     )
105 |     # Set the internal application port to Vizro's default port
106 |     app.set_internal_application_port(8050)
107 | 
108 |     # Set the command to run the Vizro application
109 |     app.spec.command = "gunicorn"
110 |     app.spec.args = [
111 |         "app:app",
112 |         "--bind",
113 |         "0.0.0.0:8050",
114 |         "--chdir",
115 |         f"home/mlrun_code/vizro"
116 |     ]
117 |     app.save()
118 | 
119 |     # Create the DB tables:
120 |     create_tables()
121 | 
122 |     # Save and return the project:
123 |     project.save()
124 |     return project
125 | 
126 | def _build_image(project: mlrun.projects.MlrunProject, with_gpu: bool):
127 |     config = {
128 |         "base_image": "mlrun/mlrun-gpu" if with_gpu else "mlrun/mlrun",
129 |         "torch_index": "https://download.pytorch.org/whl/cu118" if with_gpu else "https://download.pytorch.org/whl/cpu",
130 |         "onnx_package": "onnxruntime-gpu" if with_gpu else "onnxruntime"
131 |     }
132 |     # Define commands in logical groups while maintaining order
133 |     system_commands = [
134 |         # Update apt-get to install ffmpeg (support audio file formats):
135 |         "apt-get update -y && apt-get install ffmpeg -y"
136 |     ]
137 | 
138 |     infrastructure_requirements = [
139 |         "pip install transformers==4.44.1",
140 |         f"pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url {config['torch_index']}"
141 |     ]
142 | 
143 |     huggingface_requirements = [
144 |         "pip install bitsandbytes==0.41.1 accelerate==0.24.1 datasets==2.14.6 peft==0.5.0 optimum==1.13.2"
145 |     ]
146 | 
147 |     gpu_specific_requirements = [
148 |         "pip install auto-gptq==0.4.2 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/"
149 |     ] if with_gpu else []
150 | 
151 |     other_requirements = [
152 |         "pip install mlrun langchain==0.2.17 openai==1.58.1 langchain_community==0.2.19 pydub==0.25.1 streamlit==1.28.0 st-annotated-text==4.0.1 spacy==3.7.2 librosa==0.10.1 presidio-anonymizer==2.2.34 presidio-analyzer==2.2.34 nltk==3.8.1 flair==0.13.0 htbuilder==0.6.2",
153 |         "python -m spacy download en_core_web_lg",
154 |         "pip install -U SQLAlchemy",
155 |         "pip uninstall -y onnxruntime-gpu onnxruntime",
156 |         f"pip install {config['onnx_package']}",
157 |     ]
158 | 
159 |     # Combine commands in the required order
160 |     commands = (
161 |             system_commands +
162 |             infrastructure_requirements +
163 |             huggingface_requirements +
164 |             gpu_specific_requirements +
165 |             other_requirements
166 |     )
167 | 
168 |     # Build the image
169 |     assert project.build_image(
170 |         base_image=config["base_image"],
171 |         commands=commands,
172 |         set_as_default=True,
173 |     )
174 | 
175 | def _set_secrets(
176 |     project: mlrun.projects.MlrunProject,
177 |     openai_key: str,
178 |     openai_base: str,
179 |     mysql_url: str,
180 |     bucket_name: str = None,
181 | ):
182 |     # Must have secrets:
183 |     project.set_secrets(
184 |         secrets={
185 |             ProjectSecrets.OPENAI_API_KEY: openai_key,
186 |             ProjectSecrets.OPENAI_API_BASE: openai_base,
187 |             ProjectSecrets.MYSQL_URL: mysql_url,
188 |         }
189 |     )
190 |     if bucket_name:
191 |         project.set_secrets(
192 |             secrets={
193 |                 ProjectSecrets.S3_BUCKET_NAME: bucket_name,
194 |             }
195 |         )
196 | 
197 | 
198 | def _set_function(
199 |         project: mlrun.projects.MlrunProject,
200 |         func: str,
201 |         name: str,
202 |         kind: str,
203 |         gpus: int = 0,
204 |         node_name: str = None,
205 |         with_repo: bool = None,
206 |         image: str = None,
207 |         node_selector: dict = None,
208 |         apply_auto_mount: bool = True,
209 | ):
210 |     # Set the given function:
211 |     if with_repo is None:
212 |         with_repo =  not func.startswith("hub://")
213 |     mlrun_function = project.set_function(
214 |         func=func, name=name, kind=kind, with_repo=with_repo, image=image,
215 |     )
216 | 
217 |     # Configure GPUs according to the given kind:
218 |     if gpus >= 1:
219 |         if node_selector:
220 |             mlrun_function.with_node_selection(node_selector=node_selector)
221 |         if kind == "mpijob":
222 |             # 1 GPU for each rank:
223 |             mlrun_function.with_limits(gpus=1)
224 |             mlrun_function.spec.replicas = gpus
225 |         else:
226 |             # All GPUs for the single job:
227 |             mlrun_function.with_limits(gpus=gpus)
228 |     # Set the node selection:
229 |     elif node_name:
230 |         mlrun_function.with_node_selection(node_name=node_name)
231 | 
232 |     if not CE_MODE and apply_auto_mount:
233 |         # Apply auto mount:
234 |         mlrun_function.apply(mlrun.auto_mount())
235 |     # Save:
236 |     mlrun_function.save()
237 | 
238 | 
239 | def _set_calls_generation_functions(
240 |     project: mlrun.projects.MlrunProject,
241 |     node_name: str = None,
242 | ):
243 |     # Client and agent data generator
244 |     _set_function(
245 |         project=project,
246 |         func="hub://structured_data_generator",
247 |         name="structured-data-generator",
248 |         kind="job",
249 |         node_name=node_name,
250 |         apply_auto_mount=True,
251 |     )
252 | 
253 |     # Conversation generator:
254 |     _set_function(
255 |         project=project,
256 |         func="./src/calls_generation/conversations_generator.py",
257 |         name="conversations-generator",
258 |         kind="job",
259 |         node_name=node_name,
260 |         apply_auto_mount=True,
261 |     )
262 | 
263 |     # Text to audio generator:
264 |     _set_function(
265 |         project=project,
266 |         func="hub://text_to_audio_generator",
267 |         name="text-to-audio-generator",
268 |         kind="job",
269 |         with_repo=False,
270 |         apply_auto_mount=True,
271 |     )
272 | 
273 | 
274 | def _set_calls_analysis_functions(
275 |     project: mlrun.projects.MlrunProject,
276 |     gpus: int,
277 |     node_name: str = None,
278 |     node_selector: dict = None,
279 | ):
280 |     # DB management:
281 |     _set_function(
282 |         project=project,
283 |         func="./src/calls_analysis/db_management.py",
284 |         name="db-management",
285 |         kind="job",
286 |         node_name=node_name,
287 |         apply_auto_mount=True,
288 |     )
289 | 
290 |     # Speech diarization:
291 |     _set_function(
292 |         project=project,
293 |         func="hub://silero_vad",
294 |         name="silero-vad",
295 |         kind="job",
296 |         node_name=node_name,
297 |     )
298 | 
299 |     # Transcription:
300 |     _set_function(
301 |         project=project,
302 |         func="hub://transcribe",
303 |         name="transcription",
304 |         kind="mpijob" if gpus > 1 else "job",
305 |         gpus=gpus,
306 |         node_name=node_name,
307 |         node_selector=node_selector,
308 |     )
309 | 
310 |     # PII recognition:
311 |     _set_function(
312 |         project=project,
313 |         func="hub://pii_recognizer",
314 |         name="pii-recognition",
315 |         kind="job",
316 |         node_name=node_name,
317 |     )
318 | 
319 |     # Question answering:
320 |     _set_function(
321 |         project=project,
322 |         func="hub://question_answering",
323 |         name="question-answering",
324 |         kind="job",
325 |         gpus=gpus,
326 |         node_name=node_name,
327 |         node_selector=node_selector,
328 |     )
329 | 
330 |     # Postprocessing:
331 |     _set_function(
332 |         project=project,
333 |         func="./src/calls_analysis/postprocessing.py",
334 |         name="postprocessing",
335 |         with_repo=False,
336 |         kind="job",
337 |         node_name=node_name,
338 |     )
339 | 
340 | 
341 | def _set_workflows(project: mlrun.projects.MlrunProject):
342 |     project.set_workflow(
343 |         name="calls-generation", workflow_path="./src/workflows/calls_generation.py"
344 |     )
345 |     project.set_workflow(
346 |         name="calls-analysis", workflow_path="./src/workflows/calls_analysis.py"
347 |     )
348 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.isort]
2 | profile = "black"
3 | multi_line_output = 3
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | mlrun
2 | SQLAlchemy


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | project_name = "myproj"
 4 | with open("README.md", "r", encoding="utf-8") as fh:
 5 |     long_description = fh.read()
 6 | 
 7 | setup(
 8 |     name=project_name,
 9 |     packages=[project_name],
10 |     package_dir={project_name: "src"},
11 |     version="0.1.0",
12 |     description="my desc",
13 |     author="Yaron",
14 |     author_email="author@example.com",
15 |     license="MIT",
16 |     long_description=long_description,
17 |     long_description_content_type="text/markdown",
18 |     python_requires=">=3.9",
19 | )
20 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Iguazio
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/src/calls_analysis/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Iguazio
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/src/calls_analysis/db_management.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Iguazio
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #   http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import datetime
 15 | import os
 16 | import tempfile
 17 | from typing import List, Optional, Tuple
 18 | 
 19 | import boto3
 20 | import mlrun
 21 | import pandas as pd
 22 | from sqlalchemy import (
 23 |     Boolean,
 24 |     Date,
 25 |     Enum,
 26 |     ForeignKey,
 27 |     Integer,
 28 |     String,
 29 |     Time,
 30 |     bindparam,
 31 |     create_engine,
 32 |     insert,
 33 |     select,
 34 |     update,
 35 | )
 36 | from sqlalchemy.orm import (
 37 |     Mapped,
 38 |     declarative_base,
 39 |     mapped_column,
 40 |     relationship,
 41 |     sessionmaker,
 42 | )
 43 | 
 44 | from src.common import CallStatus, ProjectSecrets
 45 | 
 46 | ID_LENGTH = 32
 47 | FILE_PATH_LENGTH = 500
 48 | 
 49 | Base = declarative_base()
 50 | 
 51 | 
 52 | class Client(Base):
 53 |     __tablename__ = "client"
 54 | 
 55 |     # Columns:
 56 |     client_id: Mapped[str] = mapped_column(String(length=ID_LENGTH), primary_key=True)
 57 |     first_name: Mapped[str] = mapped_column(String(length=30))
 58 |     last_name: Mapped[str] = mapped_column(String(length=30))
 59 |     phone_number: Mapped[str] = mapped_column(String(length=20))
 60 |     email: Mapped[str] = mapped_column(String(length=50))
 61 |     client_city: Mapped[str] = mapped_column(String(length=30))
 62 |     latitude: Mapped[str] = mapped_column(String(length=20))
 63 |     longitude: Mapped[str] = mapped_column(String(length=20))
 64 | 
 65 |     # Many-to-one relationship:
 66 |     calls: Mapped[List["Call"]] = relationship(back_populates="client", lazy=True)
 67 | 
 68 | 
 69 | class Agent(Base):
 70 |     __tablename__ = "agent"
 71 | 
 72 |     # Columns:
 73 |     agent_id: Mapped[str] = mapped_column(String(length=ID_LENGTH), primary_key=True)
 74 |     first_name: Mapped[str] = mapped_column(String(length=30))
 75 |     last_name: Mapped[str] = mapped_column(String(length=30))
 76 |     # phone: Mapped[str] = mapped_column(String(length=20))
 77 |     # email: Mapped[str] = mapped_column(String(length=50))
 78 | 
 79 |     # Many-to-one relationship:
 80 |     calls: Mapped[List["Call"]] = relationship(back_populates="agent", lazy=True)
 81 | 
 82 | 
 83 | class Call(Base):
 84 |     __tablename__ = "call"
 85 | 
 86 |     # Metadata:
 87 |     call_id: Mapped[str] = mapped_column(String(length=ID_LENGTH), primary_key=True)
 88 |     client_id: Mapped[str] = mapped_column(
 89 |         String(length=ID_LENGTH), ForeignKey("client.client_id")
 90 |     )
 91 |     agent_id: Mapped[str] = mapped_column(
 92 |         String(length=ID_LENGTH), ForeignKey("agent.agent_id")
 93 |     )
 94 |     date: Mapped[datetime.date] = mapped_column(Date())
 95 |     time: Mapped[datetime.time] = mapped_column(Time())
 96 |     status: Mapped[CallStatus] = mapped_column(Enum(CallStatus), nullable=True)
 97 |     # Files:
 98 |     audio_file: Mapped[str] = mapped_column(String(length=FILE_PATH_LENGTH))
 99 |     # TODO: processed_audio_file: Mapped[Optional[str]] = mapped_column(String(length=FILE_PATH_LENGTH))
100 |     transcription_file: Mapped[Optional[str]] = mapped_column(
101 |         String(length=FILE_PATH_LENGTH),
102 |         nullable=True,
103 |         default=None,
104 |     )
105 |     anonymized_file: Mapped[Optional[str]] = mapped_column(
106 |         String(length=FILE_PATH_LENGTH),
107 |         nullable=True,
108 |         default=None,
109 |     )
110 |     # Analysis:
111 |     topic: Mapped[Optional[str]] = mapped_column(
112 |         String(length=50),
113 |         nullable=True,
114 |         default=None,
115 |     )
116 |     summary: Mapped[Optional[str]] = mapped_column(
117 |         String(length=1000),
118 |         nullable=True,
119 |         default=None,
120 |     )
121 |     concern_addressed: Mapped[Optional[bool]] = mapped_column(
122 |         Boolean(),
123 |         nullable=True,
124 |         default=None,
125 |     )
126 |     client_tone: Mapped[Optional[str]] = mapped_column(
127 |         String(length=20),
128 |         nullable=True,
129 |         default=None,
130 |     )
131 |     agent_tone: Mapped[Optional[str]] = mapped_column(
132 |         String(length=20),
133 |         nullable=True,
134 |         default=None,
135 |     )
136 |     upsale_attempted: Mapped[Optional[bool]] = mapped_column(
137 |         Boolean(),
138 |         nullable=True,
139 |         default=None,
140 |     )
141 |     upsale_success: Mapped[Optional[bool]] = mapped_column(
142 |         Boolean(),
143 |         nullable=True,
144 |         default=None,
145 |     )
146 |     empathy: Mapped[Optional[int]] = mapped_column(
147 |         Integer(),
148 |         nullable=True,
149 |         default=None,
150 |     )
151 |     professionalism: Mapped[Optional[int]] = mapped_column(
152 |         Integer(),
153 |         nullable=True,
154 |         default=None,
155 |     )
156 |     kindness: Mapped[Optional[int]] = mapped_column(
157 |         Integer(),
158 |         nullable=True,
159 |         default=None,
160 |     )
161 |     effective_communication: Mapped[Optional[int]] = mapped_column(
162 |         Integer(),
163 |         nullable=True,
164 |         default=None,
165 |     )
166 |     active_listening: Mapped[Optional[int]] = mapped_column(
167 |         Integer(),
168 |         nullable=True,
169 |         default=None,
170 |     )
171 |     customization: Mapped[Optional[int]] = mapped_column(
172 |         Integer(),
173 |         nullable=True,
174 |         default=None,
175 |     )
176 | 
177 |     # One-to-many relationships:
178 |     client: Mapped["Client"] = relationship(back_populates="calls", lazy=True)
179 |     agent: Mapped["Agent"] = relationship(back_populates="calls", lazy=True)
180 | 
181 | 
182 | class DBEngine:
183 |     def __init__(self, context: mlrun.MLClientCtx):
184 |         self.bucket_name = context.get_secret(key=ProjectSecrets.S3_BUCKET_NAME)
185 |         self.db_url = context.get_secret(key=ProjectSecrets.MYSQL_URL)
186 |         self.temp_file = None
187 |         self.engine = self._create_engine()
188 | 
189 |     def get_session(self):
190 |         return sessionmaker(self.engine)
191 | 
192 |     def update_db(self):
193 |         if self.bucket_name:
194 |             s3 = boto3.client("s3")
195 |             s3.upload_file(self.temp_file.name, self.bucket_name, "sqlite.db")
196 | 
197 |     def _create_engine(self):
198 |         if self.bucket_name:
199 |             # Create a temporary file that will persist throughout the object's lifetime
200 |             self.temp_file = tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False)
201 |             self.temp_file.close()  # Close the file but keep the name
202 | 
203 |             s3 = boto3.client("s3")
204 |             try:
205 |                 s3.download_file(self.bucket_name, "sqlite.db", self.temp_file.name)
206 |             except Exception as e:
207 |                 print(f"Warning: Could not download database from S3: {e}")
208 | 
209 |             return create_engine(f"sqlite:///{self.temp_file.name}")
210 |         else:
211 |             return create_engine(url=self.db_url)
212 | 
213 |     def __del__(self):
214 |         # Clean up the temporary file when the object is destroyed
215 |         if self.temp_file:
216 |             try:
217 |                 os.unlink(self.temp_file.name)
218 |             except:
219 |                 pass
220 | 
221 | 
222 | def create_tables():
223 |     """
224 |     Create the call center schema tables for when creating or loading the MLRun project.
225 |     """
226 |     # Create an engine:
227 |     engine = DBEngine(mlrun.get_or_create_ctx("create_tables"))
228 | 
229 |     # Create the schema's tables:
230 |     Base.metadata.create_all(engine.engine)
231 | 
232 |     engine.update_db()
233 | 
234 | 
235 | def insert_clients(context: mlrun.MLClientCtx, clients: list):
236 |     # Create an engine:
237 |     engine = DBEngine(context)
238 | 
239 |     # Initialize a session maker:
240 |     session = engine.get_session()
241 | 
242 |     # Insert the new calls into the table and commit:
243 |     with session.begin() as sess:
244 |         sess.execute(insert(Client), clients)
245 | 
246 |     engine.update_db()
247 | 
248 | 
249 | def insert_agents(context: mlrun.MLClientCtx, agents: list):
250 |     # Create an engine:
251 |     engine = DBEngine(context)
252 | 
253 |     # Initialize a session maker:
254 |     session = engine.get_session()
255 | 
256 |     # Insert the new calls into the table and commit:
257 |     with session.begin() as sess:
258 |         sess.execute(insert(Agent), agents)
259 | 
260 |     engine.update_db()
261 | 
262 | 
263 | def insert_calls(
264 |     context: mlrun.MLClientCtx, calls: pd.DataFrame
265 | ) -> Tuple[pd.DataFrame, List[str]]:
266 |     # Create an engine:
267 |     engine = DBEngine(context)
268 | 
269 |     # Initialize a session maker:
270 |     session = engine.get_session()
271 | 
272 |     # Cast data from dataframe to a list of dictionaries:
273 |     records = calls.to_dict(orient="records")
274 | 
275 |     # Insert the new calls into the table and commit:
276 |     with session.begin() as sess:
277 |         sess.execute(insert(Call), records)
278 | 
279 |     engine.update_db()
280 | 
281 |     # Return the metadata and audio files:
282 |     audio_files = list(calls["audio_file"])
283 |     return calls, audio_files
284 | 
285 | 
286 | def update_calls(
287 |     context: mlrun.MLClientCtx,
288 |     status: str,
289 |     table_key: str,
290 |     data_key: str,
291 |     data: pd.DataFrame,
292 | ):
293 |     # Create an engine:
294 |     engine = DBEngine(context)
295 | 
296 |     # Initialize a session maker:
297 |     session = engine.get_session()
298 | 
299 |     # Add the status to the dataframe:
300 |     data["status"] = [CallStatus(status)] * len(data)
301 | 
302 |     # Make sure keys are not duplicates (so we can update by the key with `bindparam`):
303 |     if data_key == table_key:
304 |         data_key += "_2"
305 |         data.rename(columns={table_key: data_key}, inplace=True)
306 | 
307 |     # Cast data from dataframe to a list of dictionaries:
308 |     data = data.to_dict(orient="records")
309 | 
310 |     # Insert the new calls into the table and commit:
311 |     with session.begin() as sess:
312 |         sess.connection().execute(
313 |             update(Call).where(getattr(Call, table_key) == bindparam(data_key)), data
314 |         )
315 | 
316 |     engine.update_db()
317 | 
318 | 
319 | def get_calls() -> pd.DataFrame:
320 |     context = mlrun.get_or_create_ctx("get_calls")
321 |     # Create an engine:
322 |     engine = DBEngine(context)
323 | 
324 |     # Initialize a session maker:
325 |     session = engine.get_session()
326 | 
327 |     # Select all calls:
328 |     with session.begin() as sess:
329 |         calls = pd.read_sql(select(Call), sess.connection())
330 | 
331 |     return calls
332 | 
333 | 
334 | def get_agents(context: mlrun.MLClientCtx) -> list:
335 |     # Create an engine:
336 |     engine = DBEngine(context)
337 | 
338 |     # Initialize a session maker:
339 |     session = engine.get_session()
340 | 
341 |     # Select all calls:
342 |     with session.begin() as sess:
343 |         agents = pd.read_sql(select(Agent), sess.connection())
344 |     return agents
345 | 
346 | 
347 | def get_clients(context: mlrun.MLClientCtx) -> list:
348 |     # Create an engine:
349 |     engine = DBEngine(context)
350 | 
351 |     # Initialize a session maker:
352 |     session = engine.get_session()
353 | 
354 |     # Select all calls:
355 |     with session.begin() as sess:
356 |         clients = pd.read_sql(select(Client), sess.connection())
357 |     return clients
358 | 


--------------------------------------------------------------------------------
/src/calls_analysis/postprocessing.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def postprocess_answers(answers: pd.DataFrame):
 5 |     for column in ["concern_addressed", "upsale_attempted", "upsale_success"]:
 6 |         answers[column] = answers[column].apply(lambda x: "yes" in x.casefold())
 7 |     for column in ["client_tone", "agent_tone"]:
 8 |         answers[column] = answers[column].apply(
 9 |             lambda x: "Positive" if "Positive" in x else x
10 |         )
11 |         answers[column] = answers[column].apply(
12 |             lambda x: "Negative" if "Negative" in x else x
13 |         )
14 |         answers[column] = answers[column].apply(
15 |             lambda x: "Neutral" if "Neutral" in x else x
16 |         )
17 |     return answers
18 | 


--------------------------------------------------------------------------------
/src/calls_generation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Iguazio
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from .skip import skip_and_import_local_data
15 | 


--------------------------------------------------------------------------------
/src/calls_generation/conversations_generator.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Iguazio
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #   http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import datetime
 15 | import hashlib
 16 | import os
 17 | import pathlib
 18 | import random
 19 | import tempfile
 20 | from typing import Tuple
 21 | 
 22 | import mlrun
 23 | import pandas as pd
 24 | import tqdm
 25 | from langchain.chat_models import ChatOpenAI
 26 | 
 27 | from src.common import TONES, TOPICS, ProjectSecrets
 28 | 
 29 | #: The approximate amount of words in one minute.
 30 | WORDS_IN_1_MINUTE = 240
 31 | 
 32 | 
 33 | def generate_conversations(
 34 |     context: mlrun.MLClientCtx,
 35 |     amount: int,
 36 |     agent_data: pd.DataFrame,
 37 |     client_data: pd.DataFrame,
 38 |     output_directory: str = None,
 39 |     model_name: str = "gpt-3.5-turbo",
 40 |     language: str = "en",
 41 |     min_time: int = 2,
 42 |     max_time: int = 5,
 43 |     from_date: str = "01.01.2023",
 44 |     to_date: str = "01.03.2023",
 45 |     from_time: str = "09:00",
 46 |     to_time: str = "17:00",
 47 | ) -> Tuple[str, pd.DataFrame, pd.DataFrame]:
 48 |     """
 49 |     Generates a list of conversations between an internet provider call center and a customer.
 50 | 
 51 |     :param context:             The MLRun context.
 52 |     :param amount:              The number of conversations to generate.
 53 |     :param agent_data:          The agent data to use for the conversations.
 54 |     :param client_data:         The client data to use for the conversations.
 55 |     :param output_directory:    The directory to save the conversations to.
 56 |     :param model_name:          The name of the model to use for conversation generation.
 57 |                                 You should choose one of GPT-4 or GPT-3.5 from the list here:
 58 |                                 https://platform.openai.com/docs/models. Default: 'gpt-3.5-turbo'.
 59 |     :param language:            The language to use for the generated conversation text.
 60 |     :param min_time:            Minimum time of conversation in minutes.
 61 |                                 Will be used approximately to generate the minimum words with the following assessment:
 62 |                                 240 words are equal to one minute. Default: 2.
 63 |     :param max_time:            Maximum time of conversation in minutes.
 64 |                                 Will be used approximately to generate the maximum words  with the following assessment:
 65 |                                 240 words are equal to one minute. Default: 5.
 66 |     :param from_date:           The minimum date of the conversation.
 67 |     :param to_date:             The maximum date of the conversation.
 68 |     :param from_time:           The minimum time (HH:MM) of the conversation.
 69 |     :param to_time:             The maximum time (HH:MM) of the conversation.
 70 |     """
 71 |     # Get the minimum and maximum amount of words:
 72 |     min_words = WORDS_IN_1_MINUTE * min_time
 73 |     max_words = WORDS_IN_1_MINUTE * max_time
 74 | 
 75 |     # Get the minimum and maximum dates and times:
 76 |     min_time = datetime.datetime.strptime(from_time, "%H:%M")
 77 |     max_time = datetime.datetime.strptime(to_time, "%H:%M")
 78 |     min_date = datetime.datetime.strptime(from_date, "%m.%d.%Y").date()
 79 |     max_date = datetime.datetime.strptime(to_date, "%m.%d.%Y").date()
 80 | 
 81 |     # Create the concern addressed options:
 82 |     concern_addressed_options = {
 83 |         True: "",
 84 |         False: "Don't",
 85 |     }
 86 | 
 87 |     # Create the agent upsales options:
 88 |     agent_upsales_options = {
 89 |         "Doesn't try": "Doesn't try to upsale the customer on more services.",
 90 |         "Tries and doesn't succeed": "Tries to upsale the customer on more services, and doesn't succeed",
 91 |         "Tries and succeeds": "Tries to upsale the customer on more services, and succeeds",
 92 |     }
 93 | 
 94 |     # Create the upsale mapping:
 95 |     upsale_mapping = {
 96 |         "Doesn't try": [False, False],
 97 |         "Tries and doesn't succeed": [True, False],
 98 |         "Tries and succeeds": [True, True],
 99 |     }
100 | 
101 |     # Create the prompt structure:
102 |     prompt_structure = (
103 |         "Generate a conversation between an internet provider call center agent named {agent_name} from (“Iguazio Internet”) and "
104 |         "a client named {client_name} with email: {client_email} and phone number: {client_phone} in {language} except 'Agent' and 'Client' prefixes which are constants.\n"
105 |         "Format the conversation as follow:\n"
106 |         "Agent: <text here>\n"
107 |         "Client: <text here>\n"
108 |         "The conversations has to include at least {min_words} words and no more than {max_words} words.\n"
109 |         "The call must include the following steps: \n"
110 |         "1. Opening (greeting and customer details validation and confirmation)\n"
111 |         "2. Presenting the problem by the customer"
112 |         "3. The agent {concern_addressed} address the client's concern.\n"
113 |         "4. The Agent {agent_upsales}"
114 |         "5. Summerizing and closing the call"
115 |         "It has to be about a client who is calling to discuss about {topic}.\n"
116 |         "Do not add any descriptive tag and do not mark the end of the conversation with [End of conversation].\n"
117 |         "Use ... for hesitation.\n"
118 |         "The client needs to have a {client_tone} tone.\n"
119 |         "The agent needs to have a {agent_tone}.\n"
120 |         "Remove from the final output any word inside parentheses of all types. \n"
121 |         "use the following levels of these attributes while describing the agent's role: \n"
122 |         "Empathy {empathy}, Professionalism {professionalism}, Kindness {kindness}, \n"
123 |         "Effective Communication {effective_communication}, Active listening {active_listening}, Customization {customization}."
124 |     )
125 | 
126 |     # Load the OpenAI model using langchain:
127 |     os.environ["OPENAI_API_KEY"] = context.get_secret(key=ProjectSecrets.OPENAI_API_KEY)
128 |     os.environ["OPENAI_API_BASE"] = context.get_secret(
129 |         key=ProjectSecrets.OPENAI_API_BASE
130 |     )
131 |     llm = ChatOpenAI(model=model_name)
132 | 
133 |     # Create the output directory:
134 |     if output_directory is None:
135 |         output_directory = tempfile.mkdtemp()
136 |     output_directory = pathlib.Path(output_directory)
137 |     if not output_directory.exists():
138 |         output_directory.mkdir(parents=True, exist_ok=True)
139 | 
140 |     # Start generating conversations:
141 |     conversations = []
142 |     ground_truths = []
143 |     for _ in tqdm.tqdm(range(amount), desc="Generating"):
144 |         # Randomize the conversation metadata:
145 |         conversation_id = _generate_id()
146 |         date = _get_random_date(min_date=min_date, max_date=max_date)
147 |         time = _get_random_time(min_time=min_time, max_time=max_time)
148 | 
149 |         # Randomly select the conversation parameters:
150 |         concern_addressed = random.choice(list(concern_addressed_options.keys()))
151 |         agent_upsales = random.choice(list(agent_upsales_options.keys()))
152 |         client_tone = random.choice(TONES)
153 |         agent_tone = random.choice(TONES)
154 |         topic = random.choice(TOPICS)
155 |         agent = agent_data.sample().to_dict(orient="records")[0]
156 |         client = client_data.sample().to_dict(orient="records")[0]
157 | 
158 |         # Generate levels os different agent attributes:
159 |         empathy = random.randint(1, 5)
160 |         professionalism = random.randint(1, 5)
161 |         kindness = random.randint(1, 5)
162 |         effective_communication = random.randint(1, 5)
163 |         active_listening = random.randint(1, 5)
164 |         customization = random.randint(1, 5)
165 | 
166 |         # Create the prompt:
167 |         prompt = prompt_structure.format(
168 |             language=language,
169 |             min_words=min_words,
170 |             max_words=max_words,
171 |             topic=topic,
172 |             concern_addressed=concern_addressed_options[concern_addressed],
173 |             agent_upsales=agent_upsales_options[agent_upsales],
174 |             client_tone=client_tone,
175 |             agent_tone=agent_tone,
176 |             agent_name=f"{agent['first_name']} {agent['last_name']}",
177 |             client_name=f"{client['first_name']} {client['last_name']}",
178 |             client_email=client["email"],
179 |             client_phone=client["phone_number"],
180 |             empathy=empathy,
181 |             professionalism=professionalism,
182 |             kindness=kindness,
183 |             effective_communication=effective_communication,
184 |             active_listening=active_listening,
185 |             customization=customization,
186 |         )
187 | 
188 |         # Generate the conversation:
189 |         conversation = llm.predict(text=prompt)
190 |         # Remove redundant newlines and spaces:
191 |         conversation = "".join(
192 |             [
193 |                 line
194 |                 for line in conversation.strip().splitlines(keepends=True)
195 |                 if line.strip("\n").strip()
196 |             ]
197 |         )
198 |         # Save to file:
199 |         conversation_text_path = output_directory / f"{conversation_id}.txt"
200 |         with open(conversation_text_path, "w") as fp:
201 |             fp.write(conversation)
202 | 
203 |         # Collect to the conversations and ground truths lists:
204 |         conversations.append(
205 |             [
206 |                 conversation_id,
207 |                 conversation_text_path.name,
208 |                 client["client_id"],
209 |                 agent["agent_id"],
210 |                 date,
211 |                 time,
212 |             ]
213 |         )
214 |         ground_truths.append(
215 |             [
216 |                 conversation_id,
217 |                 language,
218 |                 topic,
219 |                 concern_addressed,
220 |                 upsale_mapping[agent_upsales][0],
221 |                 upsale_mapping[agent_upsales][1],
222 |                 client_tone,
223 |                 agent_tone,
224 |                 client["client_id"],
225 |                 agent["agent_id"],
226 |                 empathy,
227 |                 professionalism,
228 |                 kindness,
229 |                 effective_communication,
230 |                 active_listening,
231 |                 customization,
232 |             ]
233 |         )
234 | 
235 |     # Cast the conversations and ground truths into a dataframe:
236 |     conversations = pd.DataFrame(
237 |         conversations,
238 |         columns=["call_id", "text_file", "client_id", "agent_id", "date", "time"],
239 |     )
240 |     ground_truths = pd.DataFrame(
241 |         ground_truths,
242 |         columns=[
243 |             "call_id",
244 |             "language",
245 |             "topic",
246 |             "concern_addressed",
247 |             "agent_tries_upsale",
248 |             "agent_succeeds_upsale",
249 |             "client_tone",
250 |             "agent_tone",
251 |             "agent_id",
252 |             "client_id",
253 |             "empathy",
254 |             "professionalism",
255 |             "kindness",
256 |             "effective_communication",
257 |             "active_listening",
258 |             "customization",
259 |         ],
260 |     )
261 | 
262 |     return str(output_directory), conversations, ground_truths
263 | 
264 | 
265 | def _get_random_time(
266 |     min_time: datetime.datetime, max_time: datetime.datetime
267 | ) -> datetime.time:
268 |     if max_time.hour <= min_time.hour:
269 |         max_time += datetime.timedelta(days=1)
270 |     return (
271 |         min_time
272 |         + datetime.timedelta(
273 |             seconds=random.randint(0, int((max_time - min_time).total_seconds())),
274 |         )
275 |     ).time()
276 | 
277 | 
278 | def _get_random_date(min_date, max_date) -> datetime.date:
279 |     return min_date + datetime.timedelta(
280 |         days=random.randint(0, int((max_date - min_date).days)),
281 |     )
282 | 
283 | 
284 | def create_batch_for_analysis(
285 |     conversations_data: pd.DataFrame, audio_files: pd.DataFrame
286 | ) -> pd.DataFrame:
287 |     conversations_data = conversations_data.join(
288 |         other=audio_files.set_index(keys="text_file"), on="text_file"
289 |     )
290 |     conversations_data.drop(columns="text_file", inplace=True)
291 |     conversations_data.dropna(inplace=True)
292 |     return conversations_data
293 | 
294 | 
295 | def _generate_id() -> str:
296 |     return hashlib.md5(str(datetime.datetime.now()).encode("utf-8")).hexdigest()
297 | 


--------------------------------------------------------------------------------
/src/calls_generation/skip.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Iguazio
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #   http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from pathlib import Path
 15 | 
 16 | import mlrun
 17 | import pandas as pd
 18 | import yaml
 19 | from mlrun.artifacts import ArtifactSpec, DatasetArtifact
 20 | from sqlalchemy import insert
 21 | 
 22 | from src.calls_analysis.db_management import Agent, Call, Client, create_tables, DBEngine
 23 | 
 24 | 
 25 | def skip_and_import_local_data(language: str):
 26 |     """
 27 |     This function logs example data to the database and to the project.
 28 |     Call this function from the notebook in order to skip the calls generation workflow.
 29 |     """
 30 |     # Get the example data directory:
 31 |     example_data_dir = Path("data")
 32 |     # Get the project:
 33 |     project = mlrun.get_current_project()
 34 | 
 35 |     # clean and recreate database tables:
 36 |     engine = DBEngine(mlrun.get_or_create_ctx("skip"))
 37 |     Call.__table__.drop(engine.engine)
 38 |     Client.__table__.drop(engine.engine)
 39 |     Agent.__table__.drop(engine.engine)
 40 |     create_tables()
 41 |     print("- Initialized tables")
 42 | 
 43 |     # log agents and clients data
 44 |     json_spec = ArtifactSpec(
 45 |         unpackaging_instructions={
 46 |             "packager_name": "ListPackager",
 47 |             "object_type": "builtins.list",
 48 |             "artifact_type": "file",
 49 |             "instructions": {"file_format": "json"},
 50 |         }
 51 |     )
 52 |     zip_spec = ArtifactSpec(
 53 |         unpackaging_instructions={
 54 |             "packager_name": "StrPackager",
 55 |             "object_type": "builtins.str",
 56 |             "artifact_type": "path",
 57 |             "instructions": {"archive_format": "zip", "is_directory": "true"},
 58 |         }
 59 |     )
 60 |     parquet_spec = ArtifactSpec(
 61 |         unpackaging_instructions={
 62 |             "packager_name": "PandasDataFramePackager",
 63 |             "object_type": "pandas.core.frame.DataFrame",
 64 |             "artifact_type": "dataset",
 65 |             "instructions": {},
 66 |         }
 67 |     )
 68 |     # load agent and client data:
 69 |     agents = project.log_artifact(
 70 |         item="agent-data-generator_agents",
 71 |         spec=json_spec,
 72 |         local_path=str(example_data_dir / f"{language}_agents.json"),
 73 |         db_key="agent-data-generator_agents",
 74 |     )
 75 |     agents = agents.to_dataitem()
 76 |     agents = yaml.load(agents.get(), Loader=yaml.FullLoader)
 77 |     clients = project.log_artifact(
 78 |         item="client-data-generator_clients",
 79 |         spec=json_spec,
 80 |         local_path=str(example_data_dir / f"{language}_clients.json"),
 81 |         db_key="client-data-generator_clients",
 82 |     )
 83 |     clients = clients.to_dataitem()
 84 |     clients = yaml.load(clients.get(), Loader=yaml.FullLoader)
 85 | 
 86 |     # insert agent and client data to database:
 87 |     _insert_agents_and_clients_to_db(agents, clients)
 88 |     print("- agents and clients inserted")
 89 | 
 90 |     # log zip files
 91 |     remote_zip_path = mlrun.get_sample_path(f"call-demo/{language}_audio_files.zip")
 92 |     conversations_art = project.log_artifact(
 93 |         item="conversation-generation_conversations",
 94 |         spec=zip_spec,
 95 |         local_path=str(example_data_dir / f"{language}_conversations.zip"),
 96 |         db_key="conversation-generation_conversations",
 97 |     )
 98 |     audio_files_art = project.log_artifact(
 99 |         item="text-to-audio_audio_files",
100 |         spec=zip_spec,
101 |         target_path=remote_zip_path,
102 |         db_key="text-to-audio_audio_files",
103 |     )
104 |     # log parquet files
105 |     calls_batch_df = pd.read_parquet(
106 |         str(example_data_dir / f"{language}_calls_batch.parquet")
107 |     )
108 |     dataframe_df = pd.read_parquet(
109 |         str(example_data_dir / f"{language}_dataframe.parquet")
110 |     )
111 |     ground_truths_df = pd.read_parquet(
112 |         str(example_data_dir / f"{language}_ground_truths.parquet")
113 |     )
114 |     metadata_df = pd.read_parquet(
115 |         str(example_data_dir / f"{language}_metadata.parquet")
116 |     )
117 | 
118 |     project.log_artifact(
119 |         item=DatasetArtifact(key="batch-creation_calls_batch", df=calls_batch_df),
120 |         spec=parquet_spec,
121 |         local_path=str(example_data_dir / f"{language}_calls_batch.parquet"),
122 |     )
123 |     project.log_artifact(
124 |         item=DatasetArtifact(key="text-to-audio_dataframe", df=dataframe_df),
125 |         spec=parquet_spec,
126 |     )
127 |     project.log_artifact(
128 |         item=DatasetArtifact(
129 |             key="conversation-generation_ground_truths", df=ground_truths_df
130 |         ),
131 |         spec=parquet_spec,
132 |     )
133 |     project.log_artifact(
134 |         item=DatasetArtifact(key="conversation-generation_metadata", df=metadata_df),
135 |         spec=parquet_spec,
136 |     )
137 |     print("*** first workflow skipped successfully ***")
138 | 
139 | 
140 | def _insert_agents_and_clients_to_db(agents: list, clients: list):
141 |     # Create an engine:
142 |     engine = DBEngine(mlrun.get_or_create_ctx("skip"))
143 | 
144 |     # Initialize a session maker:
145 |     session = engine.get_session()
146 | 
147 |     # Insert the new calls into the table and commit:
148 |     with session.begin() as sess:
149 |         sess.execute(insert(Agent), agents)
150 |         sess.execute(insert(Client), clients)
151 | 
152 | 
153 | # TODO: change to export the actual data and not the artifacts
154 | def save_current_example_data():
155 |     project = mlrun.get_current_project()
156 |     export_dir = Path("example_data")
157 |     if not export_dir.exists():
158 |         export_dir.mkdir(parents=True, exist_ok=True)
159 | 
160 |     for artifact_name, target_path in [
161 |         ("client-data-generator_clients", "clients.zip"),
162 |         ("agent-data-generator_agents", "agents.zip"),
163 |         (
164 |             "conversation-generation_conversations",
165 |             "conversation_generation/conversations.zip",
166 |         ),
167 |         ("conversation-generation_metadata", "conversation_generation/metadata.zip"),
168 |         (
169 |             "conversation-generation_ground_truths",
170 |             "conversation_generation/ground_truths.zip",
171 |         ),
172 |         ("text-to-audio_audio_files", "text_to_audio/audio_files.zip"),
173 |         ("text-to-audio_dataframe", "text_to_audio/dataframe.zip"),
174 |         ("batch-creation_calls_batch", "batch_creation/calls_batch.zip"),
175 |     ]:
176 |         export_path = export_dir / target_path
177 |         if not export_path.exists():
178 |             export_path.parent.mkdir(parents=True, exist_ok=True)
179 |         project.get_artifact(artifact_name).export(f"example_data/{target_path}")
180 |         print(f"- exported {artifact_name} to {target_path}")
181 |     print("*** all artifacts exported successfully ***")
182 | 


--------------------------------------------------------------------------------
/src/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Iguazio
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import enum
15 | 
16 | 
17 | class ProjectSecrets:
18 |     OPENAI_API_KEY = "OPENAI_API_KEY"
19 |     OPENAI_API_BASE = "OPENAI_API_BASE"
20 |     MYSQL_URL = "MYSQL_URL"
21 |     MYSQL_CONNECT_ARGS = "MYSQL_CONNECT_ARGS"
22 |     S3_BUCKET_NAME = "S3_BUCKET_NAME"
23 | 
24 | 
25 | class CallStatus(enum.Enum):
26 |     CREATED = "Created"
27 |     AUDIO_PROCESSED = "Audio processed"
28 |     SPEECH_DIARIZED = "Speech diarized"
29 |     TRANSCRIBED = "Transcribed"
30 |     TRANSLATED = "Translated"
31 |     ANONYMIZED = "Anonymized"
32 |     ANALYZED = "Analyzed"
33 | 
34 | 
35 | TOPICS = [
36 |     "slow internet speed",
37 |     "billing discrepancies",
38 |     "account login problems",
39 |     "setting up a new device",
40 |     "phishing or malware concerns",
41 |     "scheduled maintenance notifications",
42 |     "service upgrades",
43 |     "negotiating pricing",
44 |     "canceling service",
45 |     "customer service feedback",
46 | ]
47 | 
48 | TONES = [
49 |     "Positive",
50 |     "Neutral",
51 |     "Negative",
52 | ]
53 | 


--------------------------------------------------------------------------------
/src/vizro.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import tarfile
  4 | from pathlib import Path
  5 | 
  6 | import boto3
  7 | import mlrun
  8 | import mlrun.common.schemas
  9 | import pandas as pd
 10 | 
 11 | from src.calls_analysis.db_management import get_calls, get_clients
 12 | 
 13 | COLUMNS_MAPPING = {
 14 |     "active_listening": "Active Listening",
 15 |     "agent_id": "Agent ID",
 16 |     "agent_tone": "Agent Tone",
 17 |     "date": "Call Date",
 18 |     "client_id": "Caller ID",
 19 |     "client_tone": "Client Tone",
 20 |     "concern_addressed": "Concern Addressed",
 21 |     "customization": "Customization",
 22 |     "effective_communication": "Effective Communication",
 23 |     "empathy": "Empathy",
 24 |     "kindness": "Kindness",
 25 |     "professionalism": "Professionalism",
 26 |     "summary": "Summary",
 27 |     "time": "Time",
 28 |     "topic": "Topic",
 29 |     "upsale_attempted": "Upsale Attempted",
 30 |     "upsale_success": "Upsale Success",
 31 |     "client_city": "Caller City",
 32 |     "anonymized_file": "text_file",
 33 | }
 34 | 
 35 | 
 36 | def deploy_vizro_application():
 37 |     dir_name = "vizro"
 38 | 
 39 |     # Prepare the dataframe for vizro:
 40 |     _prepare_vizro_source(dir_name)
 41 |     print("Application source code ready for deployment.")
 42 | 
 43 |     # Archive
 44 |     bucket_name = os.getenv("S3_BUCKET_NAME")
 45 |     if bucket_name:
 46 |         _upload_to_s3(dir_name)
 47 |         # Add the source code to the application
 48 |         src_path = f"s3://{bucket_name}/{dir_name}.tar.gz"
 49 |         print(f"Uploading {src_path} to {bucket_name}")
 50 |     else:
 51 |         # Set the source path to V3IO
 52 |         src_path = f'v3io:///users/{os.environ["V3IO_USERNAME"]}/{os.getcwd().replace("/User/", "")}/{dir_name}.tar.gz'
 53 |         print(f"Configuring V3IO {src_path} to UI")
 54 |     project = mlrun.get_current_project()
 55 |     app = project.get_function("call-center-ui")
 56 |     app.with_source_archive(src_path, pull_at_runtime=False)
 57 | 
 58 |     # Deploy the application
 59 |     app.deploy(force_build=True, create_default_api_gateway=False, with_mlrun=False)
 60 |     app.create_api_gateway(
 61 |         name="call-center-ui",
 62 |         direct_port_access=True,
 63 |         set_as_default=True,
 64 |         authentication_mode=mlrun.common.schemas.api_gateway.APIGatewayAuthenticationMode.none,
 65 |     )
 66 |     print("Application deployed successfully!")
 67 | 
 68 | 
 69 | def _prepare_vizro_source(dir_name: str):
 70 |     clients_df = get_clients(mlrun.get_or_create_ctx("mlrun"))
 71 |     calls_df = get_calls()
 72 |     vizro_df = pd.merge(
 73 |         calls_df,
 74 |         clients_df[["client_id", "client_city", "latitude", "longitude"]],
 75 |         on="client_id",
 76 |     )
 77 |     vizro_df = vizro_df.rename(columns=COLUMNS_MAPPING)
 78 |     vizro_df.to_csv("vizro/data.csv")
 79 | 
 80 |     # add text and audio files to vizro:
 81 |     shutil.copytree("outputs", "vizro/outputs", dirs_exist_ok=True)
 82 | 
 83 |     # Write the application code to a file
 84 |     app_dir = "vizro"
 85 | 
 86 |     # Create an archive of the application code
 87 |     archive_name = f"{dir_name}.tar.gz"
 88 |     with tarfile.open(archive_name, "w:gz") as tar:
 89 |         tar.add(app_dir)
 90 | 
 91 | 
 92 | def _upload_to_s3(dir_name: str):
 93 |     # uploading db file to s3:
 94 |     s3 = boto3.client("s3")
 95 |     bucket_name = Path(mlrun.mlconf.artifact_path).parts[1]
 96 | 
 97 |     # Upload the file
 98 |     s3.upload_file(
 99 |         Filename=f"{dir_name}.tar.gz",
100 |         Bucket=bucket_name,
101 |         Key=f"{dir_name}.tar.gz",
102 |     )
103 | 


--------------------------------------------------------------------------------
/src/workflows/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Iguazio
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/src/workflows/calls_analysis.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Iguazio
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #   http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from typing import List
 15 | 
 16 | import kfp
 17 | import mlrun
 18 | from kfp import dsl
 19 | 
 20 | from src.common import TONES, TOPICS, CallStatus
 21 | 
 22 | QUESTIONS = [
 23 |     [
 24 |         f"1. Classify the topic of the text from the following list (choose one): {TOPICS}",
 25 |         "2. Write a long summary of the text, focus on the topic (max 50 words).",
 26 |         "3. Was the Client's concern addressed, (choose only one) [Yes, No]?",
 27 |         f"4. Was the Client tone (choose only one, if not sure choose Neutral) {TONES}? ",
 28 |         f"5. Was the Call Center Agent tone (choose only one, if not sure choose Neutral) {TONES}?",
 29 |     ],
 30 |     [
 31 |         "1. Did the agent try to upsale the customer (choose only one) [Yes, No]? (sell any additional product or service)",
 32 |         "2. If the agent indeed try to upsale the client, did he succeed (choose only one) [Yes, No]? if he didn't try' answer No",
 33 |         "3. Rate the agent's level of empathy (The ability to understand and share the feelings of others) on a scale of 1-5.",
 34 |         "4. Rate the agent's level of professionalism (Conducting oneself in a way that is appropriate for the workplace) on a scale of 1-5.",
 35 |         "5. Rate the agent's level of kindness (The quality of being friendly, generous, and considerate) on a scale of 1-5.",
 36 |         "6. Rate the agent's level of effective communication (The ability to convey information clearly and concisely) on a scale of 1-5.",
 37 |         "7. Rate the agent's level of active listening (The process of paying attention to and understanding what someone is saying) on a scale of 1-5.",
 38 |         "8. Rate the agent's level of customization (The process of tailoring something to the specific needs or preferences of an individual) on a scale of 1-5.",
 39 |     ],
 40 | ]
 41 | DEMO_CALL = (
 42 |     "Agent: Good afternoon, you've reached [Internet Service Provider] customer support. I'm Megan. How can I assist "
 43 |     "you today?\n"
 44 |     "Customer: Hello, Megan. This is Lisa. I've noticed some billing discrepancies on my last statement.\n"
 45 |     "Agent: I'm sorry to hear that, Lisa. I'd be happy to help you with that. Could you please provide me with your "
 46 |     "account number or phone number associated with your account?\n"
 47 |     "Customer: Of course, my account number is 123456789.\n"
 48 |     "Agent: Thank you, Lisa. Let me pull up your account. I see the billing discrepancies you mentioned. It appears "
 49 |     "there was an error in the charges. I apologize for the inconvenience.\n"
 50 |     "Customer: Thank you for acknowledging the issue, Megan. Can you please help me get it resolved?\n"
 51 |     "Agent: Absolutely, Lisa. I've made note of the discrepancies, and I'll escalate this to our billing department "
 52 |     "for investigation and correction. You should see the adjustments on your next statement.\n"
 53 |     "Customer: That sounds good, Megan. I appreciate your help.\n"
 54 |     "Agent: You're welcome, Lisa. If you have any more questions or concerns in the future, please don't hesitate to "
 55 |     "reach out. Is there anything else I can assist you with today?\n"
 56 |     "Customer: No, that's all. Thank you for your assistance, Megan.\n"
 57 |     "Agent: Not a problem, Lisa. Have a wonderful day, and we'll get this sorted out for you.\n"
 58 |     "Customer: You too! Goodbye, Megan.\n"
 59 |     "Agent: Goodbye, Lisa!"
 60 | )
 61 | DEMO_ANSWERS = [
 62 |     (
 63 |         "1. billing discrepancies\n"
 64 |         "2. The customer, contacted the call center regarding billing discrepancies on her statement. The agent, "
 65 |         "acknowledged the issue, assured The customer it would be resolved, and escalated it to the billing department for "
 66 |         "correction.\n"
 67 |         "3. Yes.\n"
 68 |         "4. Natural.\n"
 69 |         "5. positive.\n"
 70 |     ),
 71 |     ("1. No\n" "2. No\n" "3. 4\n" "4. 5\n" "5. 4\n" "6. 5\n" "7. 4\n" "8. 3"),
 72 | ]
 73 | TEXT_WRAPPER = [
 74 |     (
 75 |         f"<|im_start|>system: You are an AI assistant that answers questions accurately and shortly<|im_end|>\n"
 76 |         f"<|im_start|>user: Given the following text:\n"
 77 |         f"{DEMO_CALL}\n"
 78 |         f"answer the questions as accurately as you can:\n"
 79 |         f"{QUESTIONS[i]}<|im_end|>\n"
 80 |         f"<|im_start|>assistant:\n"
 81 |         f"{DEMO_ANSWERS[i]}<|im_end|>\n"
 82 |         f"<|im_start|>user: Given the following text:\n"
 83 |         "{}"
 84 |     )
 85 |     for i in range(len(QUESTIONS))
 86 | ]
 87 | QUESTIONS_WRAPPER = (
 88 |     " answer the given questions as accurately as you can, do not write more answers the questions:\n"
 89 |     "{}<|im_end|>\n"
 90 |     "<|im_start|>assistant:\n"
 91 | )
 92 | 
 93 | 
 94 | @kfp.dsl.pipeline()
 95 | def pipeline(
 96 |     batch: str,
 97 |     calls_audio_files: str,
 98 |     transcribe_model: str,
 99 |     translate_to_english: bool,
100 |     pii_recognition_model: str,
101 |     pii_recognition_entities: List[str],
102 |     pii_recognition_entity_operator_map: List[str],
103 |     question_answering_model: str,
104 |     batch_size: int = 2,
105 |     auto_gptq_exllama_max_input_length: int = None,
106 |     insert_calls_db: bool = True,
107 | ):
108 |     # Get the project:
109 |     project = mlrun.get_current_project()
110 |     db_management_function = project.get_function("db-management")
111 |     with dsl.Condition(insert_calls_db == True) as insert_calls_condition:
112 |         # Insert new calls:
113 |         insert_calls_run = project.run_function(
114 |             db_management_function,
115 |             handler="insert_calls",
116 |             name="insert-calls",
117 |             inputs={"calls": batch},
118 |             returns=[
119 |                 "calls_batch: dataset",
120 |                 "audio_files: file",
121 |             ],
122 |         )
123 | 
124 |     # Speech diarize:
125 |     speech_diarization_function = project.get_function("silero-vad")
126 |     diarize_run = project.run_function(
127 |         speech_diarization_function,
128 |         handler="diarize",
129 |         name="diarization",
130 |         inputs={"data_path": calls_audio_files},
131 |         params={
132 |             "speaker_labels": ["Agent", "Client"],
133 |             "verbose": True,
134 |         },
135 |         returns=["speech_diarization: file", "diarize_errors: file"],
136 |     ).after(insert_calls_condition)
137 | 
138 |     # Update diarization state:
139 |     update_calls_post_speech_diarization_run = project.run_function(
140 |         db_management_function,
141 |         handler="update_calls",
142 |         name="update-calls",
143 |         inputs={"data": batch},
144 |         params={
145 |             "status": CallStatus.SPEECH_DIARIZED.value,
146 |             "table_key": "call_id",
147 |             "data_key": "call_id",
148 |         },
149 |     ).after(diarize_run)
150 | 
151 |     # Transcribe:
152 |     transcription_function = project.get_function("transcription")
153 |     transcribe_run = project.run_function(
154 |         transcription_function,
155 |         handler="transcribe",
156 |         name="transcription",
157 |         inputs={
158 |             "data_path": calls_audio_files,
159 |             "speech_diarization": diarize_run.outputs["speech_diarization"],
160 |         },
161 |         params={
162 |             "model_name": transcribe_model,
163 |             "device": "cuda",
164 |             "use_better_transformers": True,
165 |             "batch_size": batch_size,
166 |             "translate_to_english": translate_to_english,
167 |         },
168 |         returns=[
169 |             "transcriptions: path",
170 |             "transcriptions_dataframe: dataset",
171 |             "transcriptions_errors: file",
172 |         ],
173 |     )
174 | 
175 |     # Update transcription state:
176 |     update_calls_post_transcription_run = project.run_function(
177 |         db_management_function,
178 |         handler="update_calls",
179 |         name="update-calls-2",
180 |         inputs={"data": transcribe_run.outputs["transcriptions_dataframe"]},
181 |         params={
182 |             "status": CallStatus.TRANSCRIBED.value,
183 |             "table_key": "audio_file",
184 |             "data_key": "audio_file",
185 |         },
186 |     )
187 | 
188 |     # Recognize PII:
189 |     pii_recognition_function = project.get_function("pii-recognition")
190 |     recognize_pii_run = project.run_function(
191 |         pii_recognition_function,
192 |         handler="recognize_pii",
193 |         name="pii-recognition",
194 |         inputs={"input_path": transcribe_run.outputs["transcriptions"]},
195 |         params={
196 |             "model": pii_recognition_model,
197 |             "html_key": "highlighted",
198 |             "entities": pii_recognition_entities,
199 |             "entity_operator_map": pii_recognition_entity_operator_map,
200 |             "score_threshold": 0.8,
201 |             "is_full_report": False,
202 |         },
203 |         returns=[
204 |             "anonymized_files: path",
205 |             "anonymized_files_dataframe: dataset",
206 |             "anonymized_files_errors: file",
207 |             "anonymized_files_report: file",
208 |         ],
209 |     )
210 | 
211 |     # Update PII state:
212 |     update_calls_post_pii_recognition_run = project.run_function(
213 |         db_management_function,
214 |         handler="update_calls",
215 |         name="update-calls-3",
216 |         inputs={"data": recognize_pii_run.outputs["anonymized_files_dataframe"]},
217 |         params={
218 |             "status": CallStatus.ANONYMIZED.value,
219 |             "table_key": "transcription_file",
220 |             "data_key": "original_file",
221 |         },
222 |     )
223 | 
224 |     # Question-answering:
225 |     question_answering_function = project.get_function("question-answering")
226 |     question_answering_function.with_requests(mem="20G")
227 |     answer_questions_run = project.run_function(
228 |         question_answering_function,
229 |         handler="answer_questions",
230 |         name="analysis",
231 |         inputs={"data_path": recognize_pii_run.outputs["anonymized_files"]},
232 |         params={
233 |             "verbose": True,
234 |             "model_name": question_answering_model,
235 |             # We don't need the auto_gptq_exllama if using CPU, we do need it if using GPU
236 |             "auto_gptq_exllama_max_input_length": auto_gptq_exllama_max_input_length,
237 |             "device_map": "auto",
238 |             "text_wrapper": TEXT_WRAPPER,
239 |             "questions": QUESTIONS,
240 |             "questions_wrapper": QUESTIONS_WRAPPER,
241 |             "questions_columns": [
242 |                 "topic",
243 |                 "summary",
244 |                 "concern_addressed",
245 |                 "client_tone",
246 |                 "agent_tone",
247 |                 "upsale_attempted",
248 |                 "upsale_success",
249 |                 "empathy",
250 |                 "professionalism",
251 |                 "kindness",
252 |                 "effective_communication",
253 |                 "active_listening",
254 |                 "customization",
255 |             ],
256 |             "questions_config": [
257 |                 {},
258 |                 {"type": "poll", "poll_count": 3, "poll_strategy": "most_common"},
259 |             ],
260 |             "generation_config": {
261 |                 "max_new_tokens": 250,
262 |                 "do_sample": True,
263 |                 "temperature": 0.7,
264 |                 "top_p": 0.95,
265 |                 "top_k": 40,
266 |                 "repetition_penalty": 1.1,
267 |             },
268 |             "batch_size": 1,
269 |             "model_kwargs": {},
270 |         },
271 |         returns=[
272 |             "question_answering_dataframe: dataset",
273 |             "question_answering_errors: file",
274 |         ],
275 |     )
276 | 
277 |     # Postprocess answers:
278 |     postprocessing_function = project.get_function("postprocessing")
279 |     postprocess_answers_run = project.run_function(
280 |         postprocessing_function,
281 |         handler="postprocess_answers",
282 |         name="answers-postprocessing",
283 |         inputs={
284 |             "answers": answer_questions_run.outputs["question_answering_dataframe"]
285 |         },
286 |         returns=["processed_answers: dataset"],
287 |     )
288 | 
289 |     # Update question answering state:
290 |     update_calls_post_question_answering_run = project.run_function(
291 |         db_management_function,
292 |         handler="update_calls",
293 |         name="update-calls-4",
294 |         inputs={"data": postprocess_answers_run.outputs["processed_answers"]},
295 |         params={
296 |             "status": CallStatus.ANALYZED.value,
297 |             "table_key": "anonymized_file",
298 |             "data_key": "text_file",
299 |         },
300 |     )
301 | 


--------------------------------------------------------------------------------
/src/workflows/calls_generation.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Iguazio
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #   http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from typing import List
 15 | 
 16 | import kfp
 17 | import mlrun
 18 | from kfp import dsl
 19 | 
 20 | 
 21 | @kfp.dsl.pipeline()
 22 | def pipeline(
 23 |     amount: int,
 24 |     generation_model: str,
 25 |     tts_model: str,
 26 |     language: str,
 27 |     available_voices: List[str],
 28 |     min_time: int,
 29 |     max_time: int,
 30 |     from_date: str,
 31 |     to_date: str,
 32 |     from_time: str,
 33 |     to_time: str,
 34 |     num_clients: int,
 35 |     num_agents: int,
 36 |     generate_clients_and_agents: bool = True,
 37 | ):
 38 |     # Get the project:
 39 |     project = mlrun.get_current_project()
 40 | 
 41 |     with dsl.Condition(generate_clients_and_agents == True) as generate_data_condition:
 42 |         # Generate client data:
 43 |         client_data_generator_function = project.get_function(
 44 |             "structured_data_generator"
 45 |         )
 46 |         client_data_run = project.run_function(
 47 |             client_data_generator_function,
 48 |             handler="generate_data",
 49 |             name="client-data-generator",
 50 |             params={
 51 |                 "amount": num_clients,
 52 |                 "model_name": generation_model,
 53 |                 "language": language,
 54 |                 "fields": [
 55 |                     f"first_name: in {language}, no special characters",
 56 |                     f"last_name: in {language}, no special characters",
 57 |                     "phone_number",
 58 |                     "email",
 59 |                     "client_id: no leading zeros",
 60 |                     "client_city: Enter city, state in the US (e.g., Austin, TX), Not only Texas",
 61 |                     "latitude: That correspond to the city",
 62 |                     "longitude: That correspond to the city",
 63 |                 ],
 64 |             },
 65 |             returns=["clients: file"],
 66 |         )
 67 | 
 68 |         # Insert client data to database
 69 |         db_management_function = project.get_function("db-management")
 70 |         project.run_function(
 71 |             db_management_function,
 72 |             handler="insert_clients",
 73 |             name="insert-clients",
 74 |             inputs={
 75 |                 "clients": client_data_run.outputs["clients"],
 76 |             },
 77 |         )
 78 | 
 79 |         # Generate agent data:
 80 |         agent_data_generator_function = project.get_function(
 81 |             "structured_data_generator"
 82 |         )
 83 |         agent_data_run = project.run_function(
 84 |             agent_data_generator_function,
 85 |             handler="generate_data",
 86 |             name="agent-data-generator",
 87 |             params={
 88 |                 "amount": num_agents,
 89 |                 "model_name": generation_model,
 90 |                 "language": language,
 91 |                 "fields": [
 92 |                     f"first_name: in {language}, no special characters",
 93 |                     f"last_name: in {language}, no special characters",
 94 |                     "agent_id: no leading zeros",
 95 |                 ],
 96 |             },
 97 |             returns=["agents: file"],
 98 |         )
 99 | 
100 |         # Insert agent data to database
101 |         db_management_function = project.get_function("db-management")
102 |         project.run_function(
103 |             db_management_function,
104 |             handler="insert_agents",
105 |             name="insert-agents",
106 |             inputs={
107 |                 "agents": agent_data_run.outputs["agents"],
108 |             },
109 |         )
110 | 
111 |     # Get agents from database
112 |     db_management_function = project.get_function("db-management")
113 |     get_agents_run = project.run_function(
114 |         db_management_function,
115 |         handler="get_agents",
116 |         name="get-agents",
117 |         returns=["agents: file"],
118 |     ).after(generate_data_condition)
119 | 
120 |     # Get clients from database
121 |     db_management_function = project.get_function("db-management")
122 |     get_clients_run = project.run_function(
123 |         db_management_function,
124 |         handler="get_clients",
125 |         name="get-clients",
126 |         returns=["clients: file"],
127 |     ).after(generate_data_condition)
128 | 
129 |     # Generate conversations texts:
130 |     conversations_generator_function = project.get_function("conversations-generator")
131 |     generate_conversations_run = project.run_function(
132 |         conversations_generator_function,
133 |         handler="generate_conversations",
134 |         name="conversation-generation",
135 |         params={
136 |             "amount": amount,
137 |             "model_name": generation_model,
138 |             "language": language,
139 |             "min_time": min_time,
140 |             "max_time": max_time,
141 |             "from_date": from_date,
142 |             "to_date": to_date,
143 |             "from_time": from_time,
144 |             "to_time": to_time,
145 |         },
146 |         inputs={
147 |             "agent_data": get_agents_run.outputs["agents"],
148 |             "client_data": get_clients_run.outputs["clients"],
149 |         },
150 |         returns=[
151 |             "conversations: path",
152 |             "metadata: dataset",
153 |             "ground_truths: dataset",
154 |         ],
155 |     )
156 | 
157 |     # Text to audio:
158 |     text_to_audio_generator_function = project.get_function("text-to-audio-generator")
159 |     generate_multi_speakers_audio_run = project.run_function(
160 |         text_to_audio_generator_function,
161 |         handler="generate_multi_speakers_audio",
162 |         name="text-to-audio",
163 |         inputs={"data_path": generate_conversations_run.outputs["conversations"]},
164 |         params={
165 |             "speakers": {"Agent": 0, "Client": 1},
166 |             "available_voices": available_voices,
167 |             "model": tts_model,
168 |             "speed": 1,
169 |         },
170 |         returns=[
171 |             "audio_files: path",
172 |             "dataframe: dataset",
173 |             "errors: file",
174 |         ],
175 |     )
176 | 
177 |     # Create the input batch:
178 |     create_batch_for_analysis_run = project.run_function(
179 |         conversations_generator_function,
180 |         handler="create_batch_for_analysis",
181 |         name="batch-creation",
182 |         inputs={
183 |             "conversations_data": generate_conversations_run.outputs["metadata"],
184 |             "audio_files": generate_multi_speakers_audio_run.outputs["dataframe"],
185 |         },
186 |         returns=["calls_batch: dataset"],
187 |     )
188 | 


--------------------------------------------------------------------------------
/vizro/app.py:
--------------------------------------------------------------------------------
  1 | """Main app entry point for Vizro dashboard."""
  2 | 
  3 | # DEFINE IMPORTS
  4 | import pandas as pd
  5 | from custom_charts import (
  6 |     plot_bar_concerns,
  7 |     plot_bar_quality,
  8 |     plot_bar_upsales,
  9 |     plot_box_communication,
 10 |     plot_butterfly_upsales_concerns,
 11 |     plot_donut_concerns,
 12 |     plot_donut_upsales,
 13 |     plot_line_calls_over_time,
 14 |     plot_map_call_locations,
 15 |     plot_radar_quality,
 16 | )
 17 | from custom_components import Audio, make_tabs_with_title, update_from_selected_row
 18 | from dash import html
 19 | 
 20 | import vizro.models as vm
 21 | from vizro import Vizro
 22 | from vizro.figures import kpi_card, kpi_card_reference
 23 | from vizro.tables import dash_ag_grid
 24 | 
 25 | # DEFINE CONSTANTS
 26 | MIN_ROW_HEIGHT = 420
 27 | CONCERN_LABELS = ["Concerns Not Addressed", "Concerns Addressed"]
 28 | 
 29 | 
 30 | def px(val: int) -> str:
 31 |     """Convert integer value to pixel string."""
 32 |     return f"{int(val)}px"
 33 | 
 34 | 
 35 | # DEFINE DATA
 36 | try:
 37 |     df = pd.read_csv("/home/mlrun_code/vizro/data.csv")
 38 | except FileNotFoundError:
 39 |     raise RuntimeError("The data file 'fake_data.csv' was not found.")
 40 | df["Call Date"] = pd.to_datetime(df["Call Date"])
 41 | df["Upsale Success Reference"] = 0.25
 42 | df["Concern Reference"] = 0.50
 43 | 
 44 | # DEFINE DASHBOARD
 45 | kpi_container = vm.Container(
 46 |     layout=vm.Grid(grid=[[0, 1, 2, 3, 4]], row_gap="0px", col_gap="20px"),
 47 |     components=[
 48 |         vm.Figure(
 49 |             figure=kpi_card_reference(
 50 |                 data_frame=df,
 51 |                 value_column="Upsale Success",
 52 |                 reference_column="Upsale Success Reference",
 53 |                 title="Upsale Success",
 54 |                 value_format="{value:.0%}",
 55 |                 reference_format="{delta_relative:+.1%} vs. target",
 56 |                 icon="more_up",
 57 |                 agg_func="mean",
 58 |             )
 59 |         ),
 60 |         vm.Figure(
 61 |             figure=kpi_card_reference(
 62 |                 data_frame=df,
 63 |                 value_column="Concern Addressed",
 64 |                 reference_column="Concern Reference",
 65 |                 title="Concerns Addressed",
 66 |                 value_format="{value:.0%}",
 67 |                 reference_format="{delta_relative:+.1%} vs. target",
 68 |                 agg_func="mean",
 69 |                 icon="recommend",
 70 |             )
 71 |         ),
 72 |         vm.Figure(
 73 |             figure=kpi_card(
 74 |                 data_frame=df,
 75 |                 agg_func="count",
 76 |                 value_column="Caller ID",
 77 |                 title="Number of Calls",
 78 |                 icon="call",
 79 |             )
 80 |         ),
 81 |         vm.Figure(
 82 |             figure=kpi_card(
 83 |                 data_frame=df,
 84 |                 agg_func="nunique",
 85 |                 value_column="Agent ID",
 86 |                 title="Number of Agents",
 87 |                 icon="support_agent",
 88 |             )
 89 |         ),
 90 |         vm.Figure(
 91 |             figure=kpi_card(
 92 |                 data_frame=df,
 93 |                 agg_func="nunique",
 94 |                 value_column="Caller ID",
 95 |                 title="Number of Callers",
 96 |                 icon="person",
 97 |             )
 98 |         ),
 99 |     ],
100 | )
101 | 
102 | call_summary_container = vm.Container(
103 |     title="Calls Summary",
104 |     layout=vm.Grid(grid=[[0, 1]], row_min_height=px(MIN_ROW_HEIGHT), row_gap="0px"),
105 |     components=[
106 |         vm.Container(
107 |             title="",
108 |             layout=vm.Grid(
109 |                 grid=[[0], [1]], row_min_height=px(MIN_ROW_HEIGHT // 2), row_gap="0px"
110 |             ),
111 |             components=[
112 |                 vm.Graph(
113 |                     title="Calls over time",
114 |                     figure=plot_line_calls_over_time(df),
115 |                 ),
116 |                 vm.Graph(
117 |                     title="Upsales and Concerns Addressed",
118 |                     figure=plot_butterfly_upsales_concerns(df),
119 |                 ),
120 |             ],
121 |             variant="filled",
122 |         ),
123 |         vm.Container(
124 |             title="",
125 |             layout=vm.Grid(
126 |                 grid=[[0]], row_min_height=px(MIN_ROW_HEIGHT), row_gap="0px"
127 |             ),
128 |             components=[
129 |                 vm.Graph(
130 |                     title="Call Locations",
131 |                     header="Showing actual number of calls per city",
132 |                     figure=plot_map_call_locations(df),
133 |                 )
134 |             ],
135 |             variant="filled",
136 |         ),
137 |     ],
138 | )
139 | 
140 | upsales_container = make_tabs_with_title(
141 |     title="Upsales",
142 |     tabs=[
143 |         vm.Container(
144 |             title="Percentage",
145 |             layout=vm.Grid(grid=[[0, 1]], row_min_height=px(MIN_ROW_HEIGHT)),
146 |             components=[
147 |                 vm.Graph(
148 |                     title="Average Across Agents",
149 |                     header="Showing percentage of calls",
150 |                     figure=plot_donut_upsales(
151 |                         data_frame=df,
152 |                         group_column="Agent ID",
153 |                         mode="average",
154 |                     ),
155 |                 ),
156 |                 vm.Graph(
157 |                     title="Per Agent",
158 |                     header="Showing percentage of calls",
159 |                     figure=plot_donut_upsales(
160 |                         data_frame=df,
161 |                         group_column="Agent ID",
162 |                         mode="comparison",
163 |                     ),
164 |                     footer="(The Agent ID is shown inside each donut)",
165 |                 ),
166 |             ],
167 |         ),
168 |         vm.Container(
169 |             title="Absolute",
170 |             layout=vm.Grid(grid=[[0, 1]], row_min_height=px(MIN_ROW_HEIGHT)),
171 |             components=[
172 |                 vm.Graph(
173 |                     title="Average Across Agents",
174 |                     header="Showing actual number of calls",
175 |                     figure=plot_bar_upsales(
176 |                         data_frame=df,
177 |                         group_column="Agent ID",
178 |                         mode="average",
179 |                     ),
180 |                 ),
181 |                 vm.Graph(
182 |                     title="Per Agent",
183 |                     header="Showing actual number of calls",
184 |                     figure=plot_bar_upsales(
185 |                         data_frame=df,
186 |                         group_column="Agent ID",
187 |                         mode="comparison",
188 |                     ),
189 |                 ),
190 |             ],
191 |         ),
192 |     ],
193 | )
194 | 
195 | concerns_container = make_tabs_with_title(
196 |     title="Concerns",
197 |     tabs=[
198 |         vm.Container(
199 |             title="Percentage",
200 |             layout=vm.Grid(grid=[[0, 1]], row_min_height=px(MIN_ROW_HEIGHT)),
201 |             components=[
202 |                 vm.Graph(
203 |                     title="Average Across Agents",
204 |                     header="Showing percentage of calls",
205 |                     figure=plot_donut_concerns(
206 |                         data_frame=df,
207 |                         group_column="Agent ID",
208 |                         count_column="Concern Addressed",
209 |                         label_names=CONCERN_LABELS,
210 |                         mode="average",
211 |                     ),
212 |                 ),
213 |                 vm.Graph(
214 |                     title="Per Agent",
215 |                     header="Showing percentage of calls",
216 |                     figure=plot_donut_concerns(
217 |                         data_frame=df,
218 |                         group_column="Agent ID",
219 |                         count_column="Concern Addressed",
220 |                         label_names=CONCERN_LABELS,
221 |                         mode="comparison",
222 |                     ),
223 |                     footer="(The Agent ID is shown inside each donut)",
224 |                 ),
225 |             ],
226 |         ),
227 |         vm.Container(
228 |             title="Absolute",
229 |             layout=vm.Grid(grid=[[0, 1]], row_min_height=px(MIN_ROW_HEIGHT)),
230 |             components=[
231 |                 vm.Graph(
232 |                     title="Average Across Agents",
233 |                     header="Showing actual number of calls",
234 |                     figure=plot_bar_concerns(
235 |                         data_frame=df,
236 |                         group_column="Agent ID",
237 |                         mode="average",
238 |                     ),
239 |                 ),
240 |                 vm.Graph(
241 |                     title="Per Agent",
242 |                     header="Showing actual number of calls",
243 |                     figure=plot_bar_concerns(
244 |                         data_frame=df,
245 |                         group_column="Agent ID",
246 |                         mode="comparison",
247 |                     ),
248 |                 ),
249 |             ],
250 |         ),
251 |     ],
252 | )
253 | 
254 | quality_scores_container = make_tabs_with_title(
255 |     title="Quality Scores",
256 |     tabs=[
257 |         vm.Container(
258 |             title="Absolute",
259 |             layout=vm.Grid(grid=[[0, 1]], row_min_height=px(MIN_ROW_HEIGHT)),
260 |             components=[
261 |                 vm.Graph(
262 |                     title="Average Across Agents",
263 |                     header="Showing actual score",
264 |                     figure=plot_radar_quality(df, "average"),
265 |                 ),
266 |                 vm.Graph(
267 |                     title="Per Agent",
268 |                     header="Showing actual score",
269 |                     figure=plot_radar_quality(df, "comparison"),
270 |                     footer="(View the tooltips to see the Agent ID)",
271 |                 ),
272 |             ],
273 |         ),
274 |         vm.Container(
275 |             title="Comparison",
276 |             layout=vm.Grid(grid=[[0, 1]], row_min_height=px(MIN_ROW_HEIGHT)),
277 |             components=[
278 |                 vm.Graph(
279 |                     title="Average Across Agents",
280 |                     header="Showing actual score",
281 |                     figure=plot_bar_quality(df, "average"),
282 |                 ),
283 |                 vm.Graph(
284 |                     title="Per Agent",
285 |                     header="Showing actual score",
286 |                     figure=plot_bar_quality(df, "comparison"),
287 |                 ),
288 |             ],
289 |         ),
290 |     ],
291 | )
292 | 
293 | effective_communication_container = vm.Container(
294 |     title="Effective Communication",
295 |     layout=vm.Grid(grid=[[0, 1]], row_min_height=px(MIN_ROW_HEIGHT)),
296 |     collapsed=False,
297 |     components=[
298 |         vm.Graph(
299 |             title="Average Across Agents",
300 |             header="Showing actual score",
301 |             figure=plot_box_communication(data_frame=df, mode="average"),
302 |         ),
303 |         vm.Graph(
304 |             title="Per Agent",
305 |             header="Showing actual score",
306 |             figure=plot_box_communication(data_frame=df, mode="comparison"),
307 |         ),
308 |     ],
309 |     variant="filled",
310 | )
311 | 
312 | transcripts_and_audio_container = vm.Container(
313 |     title="Call transcripts",
314 |     layout=vm.Flex(gap="40px"),
315 |     components=[
316 |         vm.AgGrid(
317 |             id="outer_grid",
318 |             figure=dash_ag_grid(
319 |                 id="inner_grid",
320 |                 data_frame=df[
321 |                     [
322 |                         "Agent ID",
323 |                         "Caller ID",
324 |                         "Topic",
325 |                         "Summary",
326 |                         "audio_file",
327 |                         "text_file",
328 |                     ]
329 |                 ],
330 |                 dashGridOptions={
331 |                     "rowSelection": "single",
332 |                     "suppressRowDeselection": True,
333 |                 },
334 |                 columnState=[
335 |                     {"colId": "audio_file", "hide": True},
336 |                     {"colId": "text_file", "hide": True},
337 |                 ],
338 |                 columnSize="responsiveSizeToFit",
339 |             ),
340 |             actions=[
341 |                 vm.Action(
342 |                     function=update_from_selected_row(),
343 |                     inputs=["inner_grid.selectedRows"],
344 |                     outputs=["transcript.children", "audio.src"],
345 |                 )
346 |             ],
347 |         ),
348 |         vm.Container(
349 |             layout=vm.Grid(grid=[[0, 0, 1]]),
350 |             components=[
351 |                 vm.Card(
352 |                     id="transcript",
353 |                     text="Select a row from the above table to see a transcript",
354 |                     extra={"style": {"height": "450px"}},
355 |                 ),
356 |                 Audio(id="audio"),
357 |             ],
358 |         ),
359 |     ],
360 | )
361 | 
362 | call_center_summary_page = vm.Page(
363 |     title="Call Center Summary",
364 |     layout=vm.Flex(gap="20px"),
365 |     components=[
366 |         kpi_container,
367 |         call_summary_container,
368 |         upsales_container,
369 |         concerns_container,
370 |         quality_scores_container,
371 |         effective_communication_container,
372 |     ],
373 |     controls=[
374 |         vm.Filter(column="Agent ID", selector=vm.Dropdown(title="Agent ID")),
375 |         vm.Filter(column="Caller ID", selector=vm.Dropdown(title="Caller ID")),
376 |         vm.Filter(column="Client Tone"),
377 |         vm.Filter(
378 |             column="Effective Communication",
379 |             selector=vm.RangeSlider(title="Effective Communication Score", step=1),
380 |         ),
381 |         vm.Filter(column="Caller City", selector=vm.Dropdown(title="Caller City")),
382 |     ],
383 | )
384 | 
385 | call_transcripts_page = vm.Page(
386 |     title="Call Transcripts",
387 |     components=[transcripts_and_audio_container],
388 |     controls=[
389 |         vm.Filter(column="Agent ID", selector=vm.Dropdown(title="Agent ID")),
390 |         vm.Filter(column="Caller ID", selector=vm.Dropdown(title="Caller ID")),
391 |     ],
392 | )
393 | 
394 | dashboard = vm.Dashboard(pages=[call_center_summary_page, call_transcripts_page])
395 | 
396 | app = Vizro().build(dashboard)
397 | 
398 | if __name__ == "__main__":
399 |     app.run()
400 | 


--------------------------------------------------------------------------------
/vizro/assets/vizro_dashboard_styles.css:
--------------------------------------------------------------------------------
 1 | audio::-webkit-media-controls-panel, audio::-webkit-media-controls-enclosure {
 2 |     border-radius: 0;
 3 |     background: var(--surfaces-bg-card);
 4 | }
 5 | 
 6 | #outer_grid {
 7 |     width: unset;
 8 | }
 9 | 
10 | #transcript {
11 |     line-height: unset;
12 | }


--------------------------------------------------------------------------------
/vizro/custom_charts.py:
--------------------------------------------------------------------------------
   1 | """Custom charts for Vizro dashboard.
   2 | """
   3 | 
   4 | import math
   5 | 
   6 | import numpy as np
   7 | import pandas as pd
   8 | import plotly.graph_objects as go
   9 | from plotly.subplots import make_subplots
  10 | 
  11 | import vizro.plotly.express as px
  12 | from vizro.models.types import capture
  13 | 
  14 | CONCERN_LABELS = ["Concerns Not Addressed", "Concerns Addressed"]
  15 | UPSALE_LABELS = ["Failed Upsales", "No Upsale Attempted", "Successful Upsales"]
  16 | 
  17 | 
  18 | @capture("graph")
  19 | def plot_donut_concerns(
  20 |     data_frame: pd.DataFrame,
  21 |     group_column: str,
  22 |     count_column: str,
  23 |     label_names: list[str],
  24 |     mode: str,
  25 | ) -> go.Figure:
  26 |     """Create a donut chart for concerns addressed, by agent or average.
  27 | 
  28 |     Args:
  29 |         data_frame (pd.DataFrame): Input data containing agent and concern columns.
  30 |         group_column (str): Column name for grouping (e.g., agent ID).
  31 |         count_column (str): Column name for concern addressed (boolean).
  32 |         label_names (list[str]): List of label names for the donut chart.
  33 |         mode (str): 'comparison' for agent subplots, 'average' for overall.
  34 | 
  35 |     Returns:
  36 |         go.Figure: Plotly Figure object representing the donut chart(s).
  37 |     """
  38 |     if mode == "comparison":
  39 | 
  40 |         agent_count = data_frame[group_column].nunique()
  41 | 
  42 |         num_rows = math.ceil(agent_count / 4)
  43 |         num_cols = 4
  44 | 
  45 |         fig = make_subplots(
  46 |             rows=num_rows,
  47 |             cols=num_cols,
  48 |             subplot_titles=None,
  49 |             horizontal_spacing=0.08,
  50 |             vertical_spacing=0.02,
  51 |             specs=[[{"type": "pie"}] * num_cols for _ in range(num_rows)],
  52 |         )
  53 | 
  54 |         agent_list = data_frame[group_column].unique().tolist()
  55 | 
  56 |         for i in range(0, len(agent_list)):
  57 |             chart_data = data_frame.copy()
  58 |             chart_data = chart_data[chart_data[group_column] == agent_list[i]]
  59 | 
  60 |             counts = chart_data[count_column].value_counts()
  61 |             labels = label_names
  62 | 
  63 |             chart_data = pd.DataFrame(
  64 |                 {
  65 |                     "Labels": labels,
  66 |                     "Counts": [counts.get(False, 0), counts.get(True, 0)],
  67 |                 }
  68 |             )
  69 | 
  70 |             chart_data.sort_values(by="Labels", ascending=True, inplace=True)
  71 | 
  72 |             labels = chart_data["Labels"]
  73 |             values = chart_data["Counts"]
  74 | 
  75 |             color_discrete_map = {
  76 |                 "Concerns Addressed": "#00b4ff",
  77 |                 "Concerns Not Addressed": "#ff9222",
  78 |             }
  79 |             colors = [color_discrete_map[label] for label in labels]
  80 | 
  81 |             fig.add_trace(
  82 |                 go.Pie(
  83 |                     labels=labels,
  84 |                     values=values,
  85 |                     hole=0.6,
  86 |                     title=str(agent_list[i]),
  87 |                     marker=dict(colors=colors),
  88 |                     sort=False,
  89 |                     hovertemplate="Category: %{label}<br>Count: %{value}<br>Percent: %{percent}<extra></extra>",
  90 |                 ),
  91 |                 row=i // num_cols + 1,
  92 |                 col=i % num_cols + 1,
  93 |             )
  94 | 
  95 |         fig.update_traces(
  96 |             textposition="outside",
  97 |             textinfo="percent+label",
  98 |             opacity=0.9,
  99 |         )
 100 | 
 101 |         fig.update_traces(textinfo="none")
 102 | 
 103 |         fig.update_layout(
 104 |             margin_t=0, margin_b=0, margin_l=0, margin_r=0, showlegend=False
 105 |         )
 106 | 
 107 |     if mode == "average":
 108 |         chart_data = data_frame.copy()
 109 | 
 110 |         counts = chart_data[count_column].value_counts()
 111 |         labels = label_names
 112 | 
 113 |         chart_data = pd.DataFrame(
 114 |             {"Labels": labels, "Counts": [counts.get(False, 0), counts.get(True, 0)]}
 115 |         )
 116 | 
 117 |         chart_data.sort_values(by="Labels", ascending=True, inplace=True)
 118 | 
 119 |         labels = chart_data["Labels"]
 120 |         values = chart_data["Counts"]
 121 | 
 122 |         color_discrete_map = {
 123 |             "Concerns Addressed": "#00b4ff",
 124 |             "Concerns Not Addressed": "#ff9222",
 125 |         }
 126 |         colors = [color_discrete_map[label] for label in labels]
 127 | 
 128 |         fig = go.Figure()
 129 | 
 130 |         fig.add_trace(
 131 |             go.Pie(
 132 |                 labels=labels,
 133 |                 values=values,
 134 |                 hole=0.6,
 135 |                 marker=dict(colors=colors),
 136 |                 sort=False,
 137 |                 hovertemplate="Category: %{label}<br>Count: %{value}<br>Percent: %{percent}<extra></extra>",
 138 |             )
 139 |         )
 140 | 
 141 |         fig.update_layout(margin_t=0, margin_b=0, margin_l=0, margin_r=0)
 142 |         fig.update_traces(textposition="outside", textinfo="percent", opacity=0.9)
 143 | 
 144 |     return fig
 145 | 
 146 | 
 147 | @capture("graph")
 148 | def plot_donut_upsales(
 149 |     data_frame: pd.DataFrame,
 150 |     group_column: str,
 151 |     mode: str,
 152 | ) -> go.Figure:
 153 |     """Create a donut chart for upsales outcomes, by agent or average.
 154 | 
 155 |     Args:
 156 |         data_frame (pd.DataFrame): Input data containing agent and upsale columns.
 157 |         group_column (str): Column name for grouping (e.g., agent ID).
 158 |         mode (str): 'comparison' for agent subplots, 'average' for overall.
 159 | 
 160 |     Returns:
 161 |         go.Figure: Plotly Figure object representing the donut chart(s).
 162 |     """
 163 |     color_discrete_map = {
 164 |         "Failed Upsales": "#FF9222",
 165 |         "No Upsale Attempted": "#3949AB",
 166 |         "Successful Upsales": "#00B4FF",
 167 |     }
 168 | 
 169 |     labels = ["Failed Upsales", "No Upsale Attempted", "Successful Upsales"]
 170 | 
 171 |     if mode == "comparison":
 172 | 
 173 |         agent_count = data_frame[group_column].nunique()
 174 | 
 175 |         num_rows = math.ceil(agent_count / 4)
 176 |         num_cols = 4
 177 | 
 178 |         fig = make_subplots(
 179 |             rows=num_rows,
 180 |             cols=num_cols,
 181 |             subplot_titles=None,
 182 |             horizontal_spacing=0.08,
 183 |             vertical_spacing=0.02,
 184 |             specs=[[{"type": "pie"}] * num_cols for _ in range(num_rows)],
 185 |         )
 186 | 
 187 |         agent_list = data_frame[group_column].unique().tolist()
 188 | 
 189 |         for i in range(0, len(agent_list)):
 190 | 
 191 |             chart_data = data_frame.copy()
 192 |             upsale_outcomes = chart_data[chart_data[group_column] == agent_list[i]]
 193 | 
 194 |             upsale_outcomes = (
 195 |                 upsale_outcomes.groupby(["Upsale Attempted", "Upsale Success"])
 196 |                 .size()
 197 |                 .reset_index(name="counts")
 198 |             )
 199 | 
 200 |             def categorize(row: pd.Series) -> str:
 201 |                 """Categorize upsale outcome for a row.
 202 | 
 203 |                 Args:
 204 |                     row (pd.Series): Row of DataFrame with 'Upsale Attempted' and 'Upsale Success'.
 205 |                 Returns:
 206 |                     str: Category label for the upsale outcome.
 207 |                 """
 208 |                 if not row["Upsale Attempted"]:
 209 |                     return "No Upsale Attempted"
 210 |                 elif row["Upsale Success"]:
 211 |                     return "Successful Upsales"
 212 |                 else:
 213 |                     return "Failed Upsales"
 214 | 
 215 |             upsale_outcomes["category"] = upsale_outcomes.apply(categorize, axis=1)
 216 | 
 217 |             counts = upsale_outcomes["category"].value_counts()
 218 | 
 219 |             chart_data = pd.DataFrame(
 220 |                 {
 221 |                     "Labels": labels,
 222 |                     "Counts": [
 223 |                         counts.get("Failed Upsales", 0),
 224 |                         counts.get("No Upsale Attempted", 0),
 225 |                         counts.get("Successful Upsales", 0),
 226 |                     ],
 227 |                 }
 228 |             )
 229 | 
 230 |             chart_data.sort_values(by="Labels", ascending=True, inplace=True)
 231 | 
 232 |             labels = chart_data["Labels"]
 233 |             values = chart_data["Counts"]
 234 | 
 235 |             colors = [color_discrete_map[label] for label in labels]
 236 | 
 237 |             fig.add_trace(
 238 |                 go.Pie(
 239 |                     labels=labels,
 240 |                     values=values,
 241 |                     hole=0.6,
 242 |                     title=str(agent_list[i]),
 243 |                     marker=dict(colors=colors),
 244 |                     sort=False,
 245 |                     hovertemplate="Category: %{label}<br>Count: %{value}<br>Percent: %{percent}<extra></extra>",
 246 |                 ),
 247 |                 row=i // num_cols + 1,
 248 |                 col=i % num_cols + 1,
 249 |             )
 250 | 
 251 |         fig.update_traces(
 252 |             textposition="outside",
 253 |             textinfo="percent+label",
 254 |             opacity=0.9,
 255 |         )
 256 | 
 257 |         fig.update_traces(textinfo="none")
 258 | 
 259 |         fig.update_layout(
 260 |             margin_t=0, margin_b=0, margin_l=0, margin_r=0, showlegend=False
 261 |         )
 262 | 
 263 |     if mode == "average":
 264 | 
 265 |         upsale_outcomes = data_frame.copy()
 266 | 
 267 |         labels = ["Failed Upsales", "No Upsale Attempted", "Successful Upsales"]
 268 | 
 269 |         upsale_outcomes = (
 270 |             upsale_outcomes.groupby(["Upsale Attempted", "Upsale Success"])
 271 |             .size()
 272 |             .reset_index(name="counts")
 273 |         )
 274 | 
 275 |         def categorize(row: pd.Series) -> str:
 276 |             """Categorize upsale outcome for a row.
 277 | 
 278 |             Args:
 279 |                 row (pd.Series): Row of DataFrame with 'Upsale Attempted' and 'Upsale Success'.
 280 |             Returns:
 281 |                 str: Category label for the upsale outcome.
 282 |             """
 283 |             if not row["Upsale Attempted"]:
 284 |                 return "No Upsale Attempted"
 285 |             elif row["Upsale Success"]:
 286 |                 return "Successful Upsales"
 287 |             else:
 288 |                 return "Failed Upsales"
 289 | 
 290 |         upsale_outcomes["category"] = upsale_outcomes.apply(categorize, axis=1)
 291 |         category_counts = (
 292 |             upsale_outcomes.groupby("category")["counts"].sum().reset_index()
 293 |         )
 294 | 
 295 |         counts = dict(zip(category_counts["category"], category_counts["counts"]))
 296 | 
 297 |         chart_data = pd.DataFrame(
 298 |             {
 299 |                 "Labels": labels,
 300 |                 "Counts": [
 301 |                     counts.get("Failed Upsales", 0),
 302 |                     counts.get("No Upsale Attempted", 0),
 303 |                     counts.get("Successful Upsales", 0),
 304 |                 ],
 305 |             }
 306 |         )
 307 | 
 308 |         chart_data.sort_values(by="Labels", ascending=True, inplace=True)
 309 | 
 310 |         labels = chart_data["Labels"]
 311 |         values = chart_data["Counts"]
 312 | 
 313 |         colors = [color_discrete_map[label] for label in labels]
 314 | 
 315 |         fig = go.Figure()
 316 | 
 317 |         fig.add_trace(
 318 |             go.Pie(
 319 |                 labels=labels,
 320 |                 values=values,
 321 |                 hole=0.6,
 322 |                 marker=dict(colors=colors),
 323 |                 sort=False,
 324 |                 hovertemplate="Category: %{label}<br>Count: %{value}<br>Percent: %{percent}<extra></extra>",
 325 |             )
 326 |         )
 327 | 
 328 |         fig.update_layout(
 329 |             margin_t=0, margin_b=0, margin_l=0, margin_r=0, legend_traceorder="reversed"
 330 |         )
 331 |         fig.update_traces(textposition="outside", textinfo="percent", opacity=0.9)
 332 | 
 333 |     return fig
 334 | 
 335 | 
 336 | @capture("graph")
 337 | def plot_bar_concerns(
 338 |     data_frame: pd.DataFrame,
 339 |     group_column: str,
 340 |     mode: str,
 341 | ) -> go.Figure:
 342 |     """Create a bar chart for concerns addressed, by agent or average.
 343 | 
 344 |     Args:
 345 |         data_frame (pd.DataFrame): Input data containing agent and concern columns.
 346 |         group_column (str): Column name for grouping (e.g., agent ID).
 347 |         mode (str): 'comparison' for agent subplots, 'average' for overall.
 348 | 
 349 |     Returns:
 350 |         go.Figure: Plotly Figure object representing the bar chart(s).
 351 |     """
 352 |     color_discrete_map = {
 353 |         "Concerns Addressed": "#00b4ff",
 354 |         "Concerns Not Addressed": "#ff9222",
 355 |     }
 356 | 
 357 |     if mode == "comparison":
 358 | 
 359 |         data = pd.DataFrame()
 360 | 
 361 |         agent_list = data_frame[group_column].unique().tolist()
 362 | 
 363 |         for i in range(0, len(agent_list)):
 364 | 
 365 |             chart_data = data_frame.copy()
 366 |             chart_data = chart_data[chart_data[group_column] == agent_list[i]]
 367 |             chart_data["Concern Addressed"] = chart_data["Concern Addressed"].replace(
 368 |                 {True: "Concerns Addressed", False: "Concerns Not Addressed"}
 369 |             )
 370 | 
 371 |             outcomes = (
 372 |                 chart_data.groupby(["Concern Addressed"])
 373 |                 .size()
 374 |                 .reset_index(name="counts")
 375 |             )
 376 | 
 377 |             category_counts = (
 378 |                 outcomes.groupby("Concern Addressed")["counts"].sum().reset_index()
 379 |             )
 380 | 
 381 |             category_counts["agent_id"] = i
 382 | 
 383 |             data = pd.concat([data, category_counts])
 384 | 
 385 |         fig = px.bar(
 386 |             data,
 387 |             x="agent_id",
 388 |             y="counts",
 389 |             color="Concern Addressed",
 390 |             title="",
 391 |             color_discrete_map=color_discrete_map,
 392 |             category_orders={
 393 |                 "category": [
 394 |                     "Concerns Not Addressed",
 395 |                     "Concerns Addressed",
 396 |                 ]
 397 |             },
 398 |         )
 399 | 
 400 |         fig.update_layout(
 401 |             showlegend=False,
 402 |             xaxis=dict(
 403 |                 tickmode="array",
 404 |                 tickvals=list(range(0, len(agent_list))),
 405 |                 ticktext=agent_list,
 406 |             ),
 407 |             xaxis_title="Agent ID",
 408 |             yaxis_title=None,
 409 |         )
 410 |         fig.update_traces(
 411 |             hovertemplate="Category: %{fullData.name}<br>Count: %{y}<extra></extra>"
 412 |         )
 413 | 
 414 |     if mode == "average":
 415 | 
 416 |         chart_data = data_frame.copy()
 417 |         chart_data["Concern Addressed"] = chart_data["Concern Addressed"].replace(
 418 |             {True: "Concerns Addressed", False: "Concerns Not Addressed"}
 419 |         )
 420 | 
 421 |         outcomes = (
 422 |             chart_data.groupby(["Concern Addressed"]).size().reset_index(name="counts")
 423 |         )
 424 | 
 425 |         category_counts = (
 426 |             outcomes.groupby("Concern Addressed")["counts"].sum().reset_index()
 427 |         )
 428 |         category_counts["PLACEHOLDER"] = 1
 429 | 
 430 |         fig = px.bar(
 431 |             category_counts,
 432 |             y="PLACEHOLDER",
 433 |             x="counts",
 434 |             color="Concern Addressed",
 435 |             title="",
 436 |             orientation="h",
 437 |             text="counts",
 438 |             color_discrete_map=color_discrete_map,
 439 |             category_orders={
 440 |                 "category": [
 441 |                     "Concerns Not Addressed",
 442 |                     "Concerns Addressed",
 443 |                 ]
 444 |             },
 445 |         )
 446 | 
 447 |         fig.update_layout(
 448 |             xaxis=dict(visible=False),
 449 |             yaxis=dict(visible=False),
 450 |             showlegend=True,
 451 |             legend_title=None,
 452 |             margin=dict(t=60),
 453 |         )
 454 | 
 455 |         fig.update_traces(
 456 |             textposition="inside",
 457 |             insidetextanchor="middle",
 458 |             width=0.2,
 459 |             hovertemplate="Category: %{fullData.name}<br>Count: %{x}<extra></extra>",
 460 |         )
 461 | 
 462 |     return fig
 463 | 
 464 | 
 465 | @capture("graph")
 466 | def plot_bar_upsales(
 467 |     data_frame: pd.DataFrame,
 468 |     group_column: str,
 469 |     mode: str,
 470 | ) -> go.Figure:
 471 |     """Create a bar chart for upsales outcomes, by agent or average.
 472 | 
 473 |     Args:
 474 |         data_frame (pd.DataFrame): Input data containing agent and upsale columns.
 475 |         group_column (str): Column name for grouping (e.g., agent ID).
 476 |         mode (str): 'comparison' for agent subplots, 'average' for overall.
 477 | 
 478 |     Returns:
 479 |         go.Figure: Plotly Figure object representing the bar chart(s).
 480 |     """
 481 |     color_discrete_map = {
 482 |         "Failed Upsales": "#FF9222",
 483 |         "No Upsale Attempted": "#3949AB",
 484 |         "Successful Upsales": "#00B4FF",
 485 |     }
 486 | 
 487 |     if mode == "comparison":
 488 | 
 489 |         data = pd.DataFrame()
 490 | 
 491 |         agent_list = data_frame[group_column].unique().tolist()
 492 | 
 493 |         for i in range(0, len(agent_list)):
 494 | 
 495 |             chart_data = data_frame.copy()
 496 |             chart_data = chart_data[chart_data[group_column] == agent_list[i]]
 497 |             upsale_outcomes = (
 498 |                 chart_data.groupby(["Upsale Attempted", "Upsale Success"])
 499 |                 .size()
 500 |                 .reset_index(name="counts")
 501 |             )
 502 | 
 503 |             def categorize(row: pd.Series) -> str:
 504 |                 """Categorize upsale outcome for a row.
 505 | 
 506 |                 Args:
 507 |                     row (pd.Series): Row of DataFrame with 'Upsale Attempted' and 'Upsale Success'.
 508 |                 Returns:
 509 |                     str: Category label for the upsale outcome.
 510 |                 """
 511 |                 if not row["Upsale Attempted"]:
 512 |                     return "No Upsale Attempted"
 513 |                 elif row["Upsale Success"]:
 514 |                     return "Successful Upsales"
 515 |                 else:
 516 |                     return "Failed Upsales"
 517 | 
 518 |             upsale_outcomes["category"] = upsale_outcomes.apply(categorize, axis=1)
 519 |             category_counts = (
 520 |                 upsale_outcomes.groupby("category")["counts"].sum().reset_index()
 521 |             )
 522 |             category_counts["agent_id"] = i
 523 | 
 524 |             data = pd.concat([data, category_counts])
 525 | 
 526 |         fig = px.bar(
 527 |             data,
 528 |             x="agent_id",
 529 |             y="counts",
 530 |             color="category",
 531 |             title="",
 532 |             color_discrete_map=color_discrete_map,
 533 |             category_orders={
 534 |                 "category": [
 535 |                     "Successful Upsales",
 536 |                     "No Upsale Attempted",
 537 |                     "Failed Upsales",
 538 |                 ]
 539 |             },
 540 |         )
 541 | 
 542 |         fig.update_traces(
 543 |             hovertemplate="Category: %{fullData.name}<br>Count: %{y}<extra></extra>"
 544 |         )
 545 | 
 546 |     if mode == "average":
 547 | 
 548 |         upsale_outcomes = (
 549 |             data_frame.groupby(["Upsale Attempted", "Upsale Success"])
 550 |             .size()
 551 |             .reset_index(name="counts")
 552 |         )
 553 | 
 554 |         def categorize(row: pd.Series) -> str:
 555 |             """Categorize upsale outcome for a row.
 556 | 
 557 |             Args:
 558 |                 row (pd.Series): Row of DataFrame with 'Upsale Attempted' and 'Upsale Success'.
 559 |             Returns:
 560 |                 str: Category label for the upsale outcome.
 561 |             """
 562 |             if not row["Upsale Attempted"]:
 563 |                 return "No Upsale Attempted"
 564 |             elif row["Upsale Success"]:
 565 |                 return "Successful Upsales"
 566 |             else:
 567 |                 return "Failed Upsales"
 568 | 
 569 |         upsale_outcomes["category"] = upsale_outcomes.apply(categorize, axis=1)
 570 |         category_counts = (
 571 |             upsale_outcomes.groupby("category")["counts"].sum().reset_index()
 572 |         )
 573 |         category_counts["PLACEHOLDER"] = 1
 574 | 
 575 |         fig = px.bar(
 576 |             category_counts,
 577 |             y="PLACEHOLDER",
 578 |             x="counts",
 579 |             color="category",
 580 |             title="",
 581 |             orientation="h",
 582 |             text="counts",
 583 |             color_discrete_map=color_discrete_map,
 584 |             category_orders={
 585 |                 "category": [
 586 |                     "Successful Upsales",
 587 |                     "No Upsale Attempted",
 588 |                     "Failed Upsales",
 589 |                 ]
 590 |             },
 591 |         )
 592 | 
 593 |         fig.update_traces(
 594 |             hovertemplate="Category: %{fullData.name}<br>Count: %{y}<extra></extra>",
 595 |             textposition="inside",
 596 |             insidetextanchor="middle",
 597 |             width=0.2,
 598 |         )
 599 | 
 600 |         fig.update_layout(
 601 |             xaxis=dict(visible=False),
 602 |             yaxis=dict(visible=False),
 603 |             showlegend=True,
 604 |             legend_title=None,
 605 |         )
 606 | 
 607 |     return fig
 608 | 
 609 | 
 610 | @capture("graph")
 611 | def plot_radar_quality(
 612 |     data_frame: pd.DataFrame,
 613 |     mode: str,
 614 | ) -> go.Figure:
 615 |     """Create a radar (polar) chart for agent communication quality metrics.
 616 | 
 617 |     Args:
 618 |         data_frame (pd.DataFrame): Input data with agent communication metrics.
 619 |         mode (str): 'comparison' for agent subplots, 'average' for overall.
 620 | 
 621 |     Returns:
 622 |         go.Figure: Plotly Figure object representing the radar chart(s).
 623 |     """
 624 |     data = data_frame.copy()
 625 |     melted_df = pd.melt(
 626 |         data,
 627 |         id_vars=["Agent ID"],
 628 |         value_vars=[
 629 |             "Empathy",
 630 |             "Professionalism",
 631 |             "Kindness",
 632 |             "Effective Communication",
 633 |             "Active Listening",
 634 |         ],
 635 |         var_name="Communication Metric",
 636 |         value_name="Value",
 637 |     )
 638 | 
 639 |     grouped_avg_df = melted_df.groupby(
 640 |         ["Agent ID", "Communication Metric"], as_index=False
 641 |     )["Value"].mean()
 642 | 
 643 |     if mode == "comparison":
 644 | 
 645 |         agent_count = data_frame["Agent ID"].nunique()
 646 | 
 647 |         num_rows = math.ceil(agent_count / 4)
 648 |         num_cols = 4
 649 | 
 650 |         fig = make_subplots(
 651 |             rows=num_rows,
 652 |             cols=num_cols,
 653 |             subplot_titles=None,
 654 |             horizontal_spacing=0.02,
 655 |             vertical_spacing=0.02,
 656 |             specs=[[{"type": "polar"}] * num_cols for _ in range(num_rows)],
 657 |         )
 658 | 
 659 |         agent_list = grouped_avg_df["Agent ID"].unique().tolist()
 660 | 
 661 |         for i in range(0, len(agent_list)):
 662 |             chart_data = grouped_avg_df.copy()
 663 |             chart_data = chart_data[chart_data["Agent ID"] == agent_list[i]]
 664 | 
 665 |             fig.add_trace(
 666 |                 go.Barpolar(
 667 |                     r=chart_data["Value"],
 668 |                     theta=chart_data["Communication Metric"],
 669 |                     marker_color=[
 670 |                         "#00B4FF",
 671 |                         "#FF9222",
 672 |                         "#3949AB",
 673 |                         "#FF5267",
 674 |                         "#08BDBA",
 675 |                         "#FDC935",
 676 |                     ],
 677 |                     hovertemplate=f"Agent ID: {agent_list[i]}<br>Metric: %{{theta}}<br>Score: %{{r}}<extra></extra>",
 678 |                 ),
 679 |                 row=i // num_cols + 1,
 680 |                 col=i % num_cols + 1,
 681 |             )
 682 | 
 683 |         for i in range(num_rows * num_cols):
 684 |             fig.update_layout(
 685 |                 **{
 686 |                     f"polar{i + 1}": dict(
 687 |                         radialaxis=dict(visible=False, showgrid=False),
 688 |                         angularaxis=dict(visible=False, showgrid=False),
 689 |                         bgcolor="rgba(0, 0, 0, 0)",
 690 |                     )
 691 |                 }
 692 |             )
 693 | 
 694 |         fig.update_layout(
 695 |             showlegend=False,
 696 |             paper_bgcolor="rgba(0, 0, 0, 0)",
 697 |             plot_bgcolor="rgba(0, 0, 0, 0)",
 698 |         )
 699 |     if mode == "average":
 700 | 
 701 |         grouped_avg_df = melted_df.groupby(
 702 |             ["Agent ID", "Communication Metric"], as_index=False
 703 |         )["Value"].mean()
 704 |         grouped_avg_df = grouped_avg_df.groupby(
 705 |             ["Communication Metric"], as_index=False
 706 |         )["Value"].mean()
 707 | 
 708 |         fig = go.Figure()
 709 | 
 710 |         fig.add_trace(
 711 |             go.Barpolar(
 712 |                 r=grouped_avg_df["Value"],
 713 |                 theta=grouped_avg_df["Communication Metric"],
 714 |                 marker_color=[
 715 |                     "#00B4FF",
 716 |                     "#FF9222",
 717 |                     "#3949AB",
 718 |                     "#FF5267",
 719 |                     "#08BDBA",
 720 |                     "#FDC935",
 721 |                 ],
 722 |                 hovertemplate="Metric: %{theta}<br>Score: %{r}<extra></extra>",
 723 |             )
 724 |         )
 725 | 
 726 |         fig.update_layout(
 727 |             polar=dict(
 728 |                 angularaxis=dict(),
 729 |                 radialaxis=dict(
 730 |                     dtick=1,
 731 |                     showgrid=False,
 732 |                 ),
 733 |                 bgcolor="rgba(0, 0, 0, 0)",
 734 |             ),
 735 |             showlegend=False,
 736 |         )
 737 | 
 738 |     return fig
 739 | 
 740 | 
 741 | @capture("graph")
 742 | def plot_bar_quality(
 743 |     data_frame: pd.DataFrame,
 744 |     mode: str,
 745 | ) -> go.Figure:
 746 |     """Create a bar chart for agent communication quality metrics.
 747 | 
 748 |     Args:
 749 |         data_frame (pd.DataFrame): Input data with agent communication metrics.
 750 |         mode (str): 'comparison' for agent subplots, 'average' for overall.
 751 | 
 752 |     Returns:
 753 |         go.Figure: Plotly Figure object representing the bar chart(s).
 754 |     """
 755 |     data = data_frame.copy()
 756 |     melted_df = pd.melt(
 757 |         data,
 758 |         id_vars=["Agent ID"],
 759 |         value_vars=[
 760 |             "Empathy",
 761 |             "Professionalism",
 762 |             "Kindness",
 763 |             "Effective Communication",
 764 |             "Active Listening",
 765 |         ],
 766 |         var_name="Communication Metric",
 767 |         value_name="Value",
 768 |     )
 769 | 
 770 |     grouped_avg_df = melted_df.groupby(
 771 |         ["Agent ID", "Communication Metric"], as_index=False
 772 |     )["Value"].mean()
 773 | 
 774 |     if mode == "comparison":
 775 |         agent_count = data_frame["Agent ID"].nunique()
 776 |         num_rows = math.ceil(agent_count / 4)
 777 |         num_cols = 4
 778 |         fig = make_subplots(
 779 |             rows=num_rows,
 780 |             cols=num_cols,
 781 |             subplot_titles=None,
 782 |             horizontal_spacing=0.04,
 783 |             vertical_spacing=0.02,
 784 |             specs=[[{"type": "xy"}] * num_cols for _ in range(num_rows)],
 785 |         )
 786 |         agent_list = grouped_avg_df["Agent ID"].unique().tolist()
 787 |         colors = ["#00B4FF", "#FF9222", "#3949AB", "#FF5267", "#08BDBA", "#FDC935"]
 788 |         for i, agent in enumerate(agent_list):
 789 |             chart_data = grouped_avg_df[grouped_avg_df["Agent ID"] == agent]
 790 |             for idx, row in chart_data.iterrows():
 791 |                 fig.add_trace(
 792 |                     go.Scatter(
 793 |                         x=[row["Communication Metric"], row["Communication Metric"]],
 794 |                         y=[0, row["Value"]],
 795 |                         mode="lines",
 796 |                         line=dict(color=colors[idx % len(colors)], width=3),
 797 |                         showlegend=False,
 798 |                     ),
 799 |                     row=i // num_cols + 1,
 800 |                     col=i % num_cols + 1,
 801 |                 )
 802 |                 fig.add_trace(
 803 |                     go.Scatter(
 804 |                         x=[row["Communication Metric"]],
 805 |                         y=[row["Value"]],
 806 |                         mode="markers",
 807 |                         marker=dict(color=colors[idx % len(colors)], size=8),
 808 |                         name=row["Communication Metric"] if i == 0 else None,
 809 |                         showlegend=(i == 0),
 810 |                         hovertemplate=f"Agent ID: {agent}<br>Metric: %{{x}}<br>Score: %{{y}}<extra></extra>",
 811 |                     ),
 812 |                     row=i // num_cols + 1,
 813 |                     col=i % num_cols + 1,
 814 |                 )
 815 |             fig.update_xaxes(
 816 |                 showgrid=False,
 817 |                 visible=True,
 818 |                 showticklabels=False,
 819 |                 ticks="",
 820 |                 title=dict(text=str(agent), font=dict(size=10), standoff=2),
 821 |                 row=i // num_cols + 1,
 822 |                 col=i % num_cols + 1,
 823 |                 zeroline=True,
 824 |             )
 825 |             fig.update_yaxes(
 826 |                 showgrid=False,
 827 |                 visible=False,
 828 |                 zeroline=False,
 829 |                 row=i // num_cols + 1,
 830 |                 col=i % num_cols + 1,
 831 |             )
 832 |         fig.update_layout(
 833 |             showlegend=False,
 834 |             paper_bgcolor="rgba(0, 0, 0, 0)",
 835 |             plot_bgcolor="rgba(0, 0, 0, 0)",
 836 |             margin=dict(t=10),
 837 |         )
 838 | 
 839 |     if mode == "average":
 840 |         grouped_avg_df = melted_df.groupby(
 841 |             ["Agent ID", "Communication Metric"], as_index=False
 842 |         )["Value"].mean()
 843 |         grouped_avg_df = grouped_avg_df.groupby(
 844 |             ["Communication Metric"], as_index=False
 845 |         )["Value"].mean()
 846 |         fig = go.Figure()
 847 |         colors = ["#00B4FF", "#FF9222", "#3949AB", "#FF5267", "#08BDBA", "#FDC935"]
 848 |         for idx, row in grouped_avg_df.iterrows():
 849 |             fig.add_trace(
 850 |                 go.Bar(
 851 |                     y=[row["Value"]],
 852 |                     x=[row["Communication Metric"]],
 853 |                     name=row["Communication Metric"],
 854 |                     marker=dict(color=colors[idx % len(colors)]),
 855 |                     text=[round(row["Value"], 1)],
 856 |                     textposition="inside",
 857 |                     hovertemplate="Metric: %{x}<br>Score: %{y}<extra></extra>",
 858 |                     width=0.6,
 859 |                 )
 860 |             )
 861 |         fig.update_layout(
 862 |             showlegend=True,
 863 |             paper_bgcolor="rgba(0, 0, 0, 0)",
 864 |             plot_bgcolor="rgba(0, 0, 0, 0)",
 865 |             xaxis=dict(
 866 |                 showgrid=False,
 867 |                 visible=True,
 868 |                 zeroline=True,
 869 |                 zerolinecolor="rgba(150,150,150,0.7)",
 870 |                 zerolinewidth=2,
 871 |                 showticklabels=False,
 872 |                 ticks="",
 873 |             ),
 874 |             yaxis=dict(
 875 |                 showgrid=False,
 876 |                 visible=False,
 877 |             ),
 878 |             barmode="group",
 879 |         )
 880 |     return fig
 881 | 
 882 | 
 883 | @capture("graph")
 884 | def plot_box_communication(
 885 |     data_frame: pd.DataFrame,
 886 |     mode: str,
 887 | ) -> go.Figure:
 888 |     """Create a box plot for Effective Communication scores, by agent or average.
 889 | 
 890 |     Args:
 891 |         data_frame (pd.DataFrame): Input data with agent and communication scores.
 892 |         mode (str): 'comparison' for agent subplots, 'average' for overall.
 893 | 
 894 |     Returns:
 895 |         go.Figure: Plotly Figure object representing the box plot(s).
 896 |     """
 897 |     data = data_frame[["Agent ID", "Effective Communication"]].copy()
 898 |     data["PLACEHOLDER"] = 1
 899 |     if mode == "comparison":
 900 |         fig = px.box(data, x="Agent ID", y="Effective Communication")
 901 |         fig.update_layout(xaxis=dict(tickvals=data["Agent ID"], tickangle=90))
 902 |     if mode == "average":
 903 |         fig = px.box(
 904 |             data, y="PLACEHOLDER", x="Effective Communication", orientation="h"
 905 |         )
 906 |         fig.update_layout(
 907 |             yaxis=dict(range=[0, 2], visible=False), boxmode="group", bargap=0.5
 908 |         )
 909 |     return fig
 910 | 
 911 | 
 912 | @capture("graph")
 913 | def plot_map_call_locations(
 914 |     data_frame: pd.DataFrame,
 915 | ) -> go.Figure:
 916 |     """Create a map of call locations with bubble size by call count.
 917 | 
 918 |     Args:
 919 |         data_frame (pd.DataFrame): Input data with city, latitude, longitude, and call info.
 920 | 
 921 |     Returns:
 922 |         go.Figure: Plotly Figure object representing the map.
 923 |     """
 924 |     aggregated_df = (
 925 |         data_frame.groupby(["Caller City", "latitude", "longitude"])
 926 |         .agg(
 927 |             Call_Count=("Caller ID", "count"),
 928 |             Agent_IDs=("Agent ID", "count"),
 929 |             Caller_Count=("Caller ID", "nunique"),
 930 |         )
 931 |         .reset_index()
 932 |     )
 933 |     populations = aggregated_df["Call_Count"]
 934 |     min_size = 10
 935 |     max_size = 50
 936 |     sizes = np.interp(
 937 |         aggregated_df["Call_Count"],
 938 |         (populations.min(), populations.max()),
 939 |         (min_size, max_size),
 940 |     )
 941 |     fig = go.Figure(
 942 |         go.Scattergeo(
 943 |             lat=aggregated_df["latitude"],
 944 |             lon=aggregated_df["longitude"],
 945 |             mode="markers",
 946 |             marker=dict(
 947 |                 size=sizes,
 948 |                 color="#00B4FF",
 949 |                 opacity=0.6,
 950 |                 line=dict(width=0),
 951 |             ),
 952 |             hovertemplate="City: %{text}<br>Calls: %{customdata[0]:,}<br>Agents: %{customdata[1]:,}<br>Callers: %{customdata[2]:,}<extra></extra>",
 953 |             customdata=aggregated_df[["Call_Count", "Agent_IDs", "Caller_Count"]],
 954 |             text=aggregated_df["Caller City"],
 955 |         )
 956 |     )
 957 |     fig.update_geos(
 958 |         visible=False,
 959 |         resolution=110,
 960 |         scope="usa",
 961 |         showcountries=True,
 962 |         countrycolor="rgb(150, 150, 150)",
 963 |         showsubunits=True,
 964 |         subunitcolor="rgb(150, 150, 150)",
 965 |     )
 966 |     fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0}, showlegend=False)
 967 |     return fig
 968 | 
 969 | 
 970 | @capture("graph")
 971 | def plot_line_calls_over_time(
 972 |     data_frame: pd.DataFrame,
 973 | ) -> go.Figure:
 974 |     """Create a line chart of number of calls per month.
 975 | 
 976 |     Args:
 977 |         data_frame (pd.DataFrame): Input data with call dates.
 978 | 
 979 |     Returns:
 980 |         go.Figure: Plotly Figure object representing the line chart.
 981 |     """
 982 |     calls_per_month = (
 983 |         data_frame.groupby(data_frame["Call Date"].dt.to_period("M"))
 984 |         .size()
 985 |         .reset_index(name="Count")
 986 |     )
 987 |     calls_per_month["TickLabel"] = calls_per_month["Call Date"].dt.strftime("%b %y")
 988 |     calls_per_month["Call Date"] = calls_per_month["Call Date"].dt.strftime("%Y-%m")
 989 |     fig = go.Figure()
 990 |     fig.add_trace(
 991 |         go.Scatter(
 992 |             x=calls_per_month["Call Date"],
 993 |             y=calls_per_month["Count"],
 994 |             mode="lines+markers+text",
 995 |             text=calls_per_month["Count"],
 996 |             textposition="top center",
 997 |             hovertemplate="Month: %{x}<br>Count: %{y}<extra></extra>",
 998 |             marker=dict(size=6, color="#00B4FF"),
 999 |             line=dict(color="#00B4FF", width=2),
1000 |             showlegend=False,
1001 |             cliponaxis=False,
1002 |         )
1003 |     )
1004 |     fig.update_layout(
1005 |         showlegend=False,
1006 |         title=None,
1007 |         yaxis=dict(visible=False),
1008 |         xaxis=dict(
1009 |             title=None,
1010 |             tickangle=90,
1011 |             tickmode="array",
1012 |             tickvals=calls_per_month["Call Date"],
1013 |             ticktext=calls_per_month["TickLabel"],
1014 |             tickfont=dict(size=12),
1015 |             showgrid=False,
1016 |         ),
1017 |         margin=dict(t=10, b=60),
1018 |     )
1019 |     return fig
1020 | 
1021 | 
1022 | @capture("graph")
1023 | def plot_butterfly_upsales_concerns(
1024 |     data_frame: pd.DataFrame,
1025 | ) -> go.Figure:
1026 |     """Create a butterfly chart comparing upsales and concerns addressed percentages per month.
1027 | 
1028 |     Args:
1029 |         data_frame (pd.DataFrame): Input data with call dates, upsale, and concern columns.
1030 | 
1031 |     Returns:
1032 |         go.Figure: Plotly Figure object representing the butterfly chart.
1033 |     """
1034 |     df = data_frame.copy()
1035 |     df["Month"] = df["Call Date"].dt.to_period("M")
1036 |     upsales = (
1037 |         df[df["Upsale Attempted"]]
1038 |         .groupby("Month")["Upsale Success"]
1039 |         .mean()
1040 |         .reset_index()
1041 |     )
1042 |     upsales["Metric"] = "Upsales Success"
1043 |     upsales["Value"] = upsales["Upsale Success"] * 100
1044 |     concerns = df.groupby("Month")["Concern Addressed"].mean().reset_index()
1045 |     concerns["Metric"] = "Concerns Addressed"
1046 |     concerns["Value"] = -concerns["Concern Addressed"] * 100
1047 |     plot_df = pd.concat(
1048 |         [upsales[["Month", "Metric", "Value"]], concerns[["Month", "Metric", "Value"]]]
1049 |     )
1050 |     plot_df = plot_df.sort_values(["Month", "Metric"])
1051 |     plot_df["MonthLabel"] = plot_df["Month"].dt.strftime("%b %y")
1052 |     plot_df = plot_df.sort_values("Month")
1053 |     pivot_df = plot_df.pivot(
1054 |         index=["Month", "MonthLabel"], columns="Metric", values="Value"
1055 |     ).reset_index()
1056 |     pivot_df = pivot_df.sort_values("Month")
1057 |     month_labels = pivot_df["MonthLabel"]
1058 |     if 'Upsales Success' not in pivot_df.columns:
1059 |         pivot_df['Upsales Success'] = 0
1060 |     else:
1061 |         pivot_df['Upsales Success'].fillna(value=0, inplace=True)
1062 | 
1063 |     
1064 |     if 'Concerns Addressed' not in pivot_df.columns:
1065 |         pivot_df['Concerns Addressed'] = 0
1066 |     else:
1067 |         pivot_df['Concerns Addressed'].fillna(value=0, inplace=True)   
1068 |     upsales_y = pivot_df["Upsales Success"].fillna(0)
1069 |     concerns_y = pivot_df["Concerns Addressed"].fillna(0)
1070 |     fig = go.Figure()
1071 |     fig.add_traces(
1072 |         [
1073 |             go.Bar(
1074 |                 x=month_labels,
1075 |                 y=upsales_y,
1076 |                 name="% Upsales Success",
1077 |                 marker_color="#00B4FF",
1078 |                 text=[f"{int(round(v))}%" if v != 0 else "" for v in upsales_y],
1079 |                 textposition="inside",
1080 |                 insidetextanchor="start",
1081 |                 textfont=dict(size=2, color="white"),
1082 |                 textangle=90,
1083 |                 offsetgroup=1,
1084 |                 cliponaxis=False,
1085 |                 width=0.6,
1086 |                 hovertemplate="Month: %{x}<br>Upsales Success: %{y:.0f}%<extra></extra>",
1087 |             ),
1088 |             go.Bar(
1089 |                 x=month_labels,
1090 |                 y=concerns_y,
1091 |                 name="% Concerns Addressed",
1092 |                 marker_color="#FF9222",
1093 |                 text=[f"{int(round(abs(v)))}%" if v != 0 else "" for v in concerns_y],
1094 |                 textposition="inside",
1095 |                 insidetextanchor="end",
1096 |                 textfont=dict(size=2, color="white"),
1097 |                 textangle=90,
1098 |                 offsetgroup=1,
1099 |                 cliponaxis=False,
1100 |                 width=0.6,
1101 |                 hovertemplate="Month: %{x}<br>Concerns Addressed: %{customdata:.0f}%<extra></extra>",
1102 |                 customdata=[abs(v) for v in concerns_y],
1103 |             ),
1104 |         ]
1105 |     )
1106 |     fig.update_layout(
1107 |         barmode="relative",
1108 |         bargap=0,
1109 |         showlegend=False,
1110 |         xaxis=dict(
1111 |             visible=True,
1112 |             showline=False,
1113 |             showticklabels=True,
1114 |             ticks="",
1115 |             showgrid=False,
1116 |             zeroline=False,
1117 |             tickangle=90,
1118 |             tickfont=dict(size=12),
1119 |         ),
1120 |         yaxis=dict(visible=False),
1121 |         margin=dict(t=0, b=0),
1122 |     )
1123 |     fig.add_hline(y=0, line_width=1, line_color="rgba(150,150,150,0.7)")
1124 |     return fig
1125 | 


--------------------------------------------------------------------------------
/vizro/custom_components.py:
--------------------------------------------------------------------------------
 1 | """Custom components for Vizro dashboard extensions.
 2 | """
 3 | 
 4 | import base64
 5 | import re
 6 | from pathlib import Path
 7 | from typing import Any, Literal, Sequence
 8 | 
 9 | from dash import html
10 | from dash.exceptions import PreventUpdate
11 | 
12 | import vizro.models as vm
13 | from vizro.models.types import capture
14 | 
15 | 
16 | @capture("action")
17 | def update_from_selected_row(
18 |     selected_rows: Sequence[dict[str, Any]]
19 | ) -> tuple[str, str]:
20 |     """Update transcript and audio from the selected row in the grid.
21 | 
22 |     Args:
23 |         selected_rows (Sequence[dict[str, Any]]):
24 |             List of selected row dictionaries from the grid, each containing 'text_file' and 'audio_file' keys.
25 | 
26 |     Returns:
27 |         tuple[str, str]:
28 |             A tuple containing:
29 |                 - The transcript as markdown-formatted string.
30 |                 - The audio source as a base64-encoded string suitable for HTML audio playback.
31 | 
32 |     Raises:
33 |         PreventUpdate: If the required files are not found or cannot be read.
34 |     """
35 |     selected_row = selected_rows[0]
36 |     text_file_path = Path(f"outputs/anonymized_files/{selected_row['text_file']}")
37 |     audio_file_path = Path(f"outputs/audio_files/{selected_row['audio_file']}")
38 |     if (
39 |         text_file_path not in Path("outputs/anonymized_files").iterdir()
40 |         or audio_file_path not in Path("outputs/audio_files").iterdir()
41 |     ):
42 |         raise PreventUpdate
43 |     try:
44 |         call_transcript = text_file_path.read_text()
45 |     except Exception as e:
46 |         raise PreventUpdate from e
47 |     call_transcript = call_transcript.replace("\n", "  \n")
48 |     call_transcript = re.sub(r"^(\w+)", r"**\1**", call_transcript, flags=re.MULTILINE)
49 |     try:
50 |         call_audio_src = base64.b64encode(audio_file_path.read_bytes())
51 |     except Exception as e:
52 |         raise PreventUpdate from e
53 |     call_audio_src = f"data:audio/wav;base64,{call_audio_src.decode('utf-8')}"
54 |     return call_transcript, call_audio_src
55 | 
56 | 
57 | class Audio(vm.VizroBaseModel):
58 |     """Audio component for Vizro dashboard.
59 | 
60 |     This component renders an audio player for playback of call recordings or other audio content.
61 |     """
62 | 
63 |     type: Literal["audio"] = "audio"
64 | 
65 |     def build(self) -> html.Audio:
66 |         """Build the Dash Audio component for playback.
67 | 
68 |         Returns:
69 |             html.Audio: Dash HTML audio component with controls enabled.
70 |         """
71 |         return html.Audio(id=self.id, controls=True)
72 | 
73 | 
74 | vm.Container.add_type("components", Audio)
75 | 
76 | 
77 | def make_tabs_with_title(title: str, tabs: list[vm.Container]) -> vm.Container:
78 |     """Create a container with a title and tabbed content for the Vizro dashboard.
79 | 
80 |     Args:
81 |         title (str):
82 |             The title to display above the tabbed content.
83 |         tabs (list[vm.Container]):
84 |             List of vm.Container objects, each representing a tab.
85 | 
86 |     Returns:
87 |         vm.Container: A container with a title and tabbed content, styled for the dashboard.
88 |     """
89 |     return vm.Container(
90 |         title=title, components=[vm.Tabs(tabs=tabs)], variant="filled", collapsed=False
91 |     )
92 | 


--------------------------------------------------------------------------------