├── .github └── workflows │ └── test-build-publish.yml ├── .gitignore ├── Dockerfile ├── Dockerfile.cuda ├── LICENSE ├── README.md ├── datadm-header.png ├── datadm.ipynb ├── datadm ├── __init__.py ├── agent.py ├── agents │ ├── __init__.py │ ├── baseline.py │ └── cotmultistep.py ├── app.py ├── backend.py ├── conversation.py └── repl.py ├── demos ├── .gitignore ├── data │ ├── country-regions.csv │ └── world_happiness_report.csv ├── datadm_happiness_qa_and_plots.mp4 ├── demo.ipynb └── mouse_helper.js ├── dev-requirements.txt ├── pyproject.toml └── tests ├── __init__.py └── test_repl.py /.github/workflows/test-build-publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI and ghcr 2 | 3 | env: 4 | REGISTRY: ghcr.io 5 | IMAGE_NAME: ${{ github.repository }} 6 | 7 | on: push 8 | jobs: 9 | tests: 10 | name: Test package 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | # python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] 15 | python-version: ['3.8', '3.9', '3.10', '3.11'] 16 | steps: 17 | - uses: actions/checkout@master 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v4 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip 25 | python -m pip install tox tox-gh-actions 26 | - name: Test with tox 27 | run: tox 28 | build-n-publish-to-pypi: 29 | name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI 30 | runs-on: ubuntu-latest 31 | needs: [tests] 32 | steps: 33 | - uses: actions/checkout@master 34 | - name: Set up Python 3.11 35 | uses: actions/setup-python@v3 36 | with: 37 | python-version: "3.11" 38 | - name: Install pypa/build 39 | run: >- 40 | python -m 41 | pip install 42 | build 43 | --user 44 | - name: Build a binary wheel and a source tarball 45 | run: >- 46 | python -m 47 | build 48 | --sdist 49 | --wheel 50 | --outdir dist/ 51 | - name: Publish distribution 📦 to PyPI 52 | if: startsWith(github.ref, 'refs/tags') 53 | uses: pypa/gh-action-pypi-publish@release/v1 54 | with: 55 | password: ${{ secrets.PYPI_API_TOKEN }} 56 | build-n-push-image: 57 | runs-on: ubuntu-latest 58 | needs: [build-n-publish-to-pypi] 59 | permissions: 60 | contents: read 61 | packages: write 62 | steps: 63 | - name: Checkout repository 64 | uses: actions/checkout@v3 65 | - name: Set up QEMU 66 | uses: docker/setup-qemu-action@v2 67 | - name: Set up Docker Buildx 68 | uses: docker/setup-buildx-action@v2 69 | - name: Log in to the Container registry 70 | uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 71 | with: 72 | registry: ${{ env.REGISTRY }} 73 | username: ${{ github.actor }} 74 | password: ${{ secrets.GITHUB_TOKEN }} 75 | - name: Extract metadata (tags, labels) for Docker 76 | id: meta 77 | uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 78 | with: 79 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 80 | - name: Build and push Docker image 81 | uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671 82 | with: 83 | context: . 84 | push: ${{ startsWith(github.ref, 'refs/tags') }} 85 | platforms: linux/amd64,linux/arm64/v8 86 | tags: ${{ steps.meta.outputs.tags }} 87 | labels: ${{ steps.meta.outputs.labels }} 88 | build-n-push-cuda-image: 89 | runs-on: ubuntu-latest 90 | needs: [build-n-publish-to-pypi] 91 | permissions: 92 | contents: read 93 | packages: write 94 | steps: 95 | - name: Checkout repository 96 | uses: actions/checkout@v3 97 | - name: Log in to the Container registry 98 | uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 99 | with: 100 | registry: ${{ env.REGISTRY }} 101 | username: ${{ github.actor }} 102 | password: ${{ secrets.GITHUB_TOKEN }} 103 | - name: Extract metadata (tags, labels) for Docker 104 | id: meta 105 | uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 106 | with: 107 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 108 | flavor: | 109 | suffix=-cuda,onlatest=true 110 | - name: Build and push Docker image 111 | uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671 112 | with: 113 | context: . 114 | file: ./Dockerfile.cuda 115 | push: ${{ startsWith(github.ref, 'refs/tags') }} 116 | tags: ${{ steps.meta.outputs.tags }} 117 | labels: ${{ steps.meta.outputs.labels }} 118 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | .python-version 163 | 164 | datadm/_version.py 165 | 166 | .DS_Store 167 | 168 | *.csv 169 | datadm/scripts/* -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.11-bullseye 2 | 3 | RUN mkdir /datadm 4 | WORKDIR /datadm 5 | 6 | COPY README.md /datadm 7 | COPY pyproject.toml /datadm 8 | COPY datadm/ /datadm/datadm 9 | 10 | RUN SETUPTOOLS_SCM_PRETEND_VERSION=0.0.0 pip install -e . 11 | 12 | CMD ["datadm"] -------------------------------------------------------------------------------- /Dockerfile.cuda: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime 2 | 3 | RUN mkdir /datadm 4 | WORKDIR /datadm 5 | 6 | COPY README.md /datadm 7 | COPY pyproject.toml /datadm 8 | COPY datadm/ /datadm/datadm 9 | 10 | RUN SETUPTOOLS_SCM_PRETEND_VERSION=0.0.0 pip install -e ".[cuda]" 11 | 12 | CMD ["datadm"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Approximate Labs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [dataDM](https://github.com/approximatelabs/datadm) 💬📊 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/datadm)](https://pypi.org/project/datadm/) 4 | [![tests](https://github.com/approximatelabs/datadm/actions/workflows/test-build-publish.yml/badge.svg)](https://github.com/approximatelabs/datadm/actions/workflows/test-build-publish.yml) 5 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/approximatelabs/datadm/blob/main/datadm.ipynb) 6 | [![](https://dcbadge.vercel.app/api/server/kW9nBQErGe?compact=true&style=flat)](https://discord.gg/kW9nBQErGe) 7 | 8 | ![dataDM](datadm-header.png?raw=true) 9 | 10 | DataDM is your private data assistant. A conversational interface for your data where you can load, clean, transform, and visualize without a single line of code. DataDM is open source and can be run entirely locally, keeping your juicy data secrets fully private. 11 | 12 | ## Demo 13 | 14 | https://github.com/approximatelabs/datadm/assets/916073/f15e6ab5-8108-40ea-a6de-c69a1389af84 15 | 16 | Note: Demo above is `GPT-4`, which sends the conversation to OpenAI's API. To use in full local mode, be sure to select `starchat-alpha-cuda` or `starchat-beta-cuda` as the model. This will use the StarChat model, which is a bit less capable but runs entirely locally. 17 | 18 | ⚠️ LLMs are known to hallucinate and generate fake results. So, double-check before trusting their results blindly! 19 | 20 | ### Join our [discord](https://discord.gg/kW9nBQErGe) to join the community and share your thoughts! 21 | 22 | ## Features 23 | - [x] Persistent Juptyer kernel backend for data manipulation during conversation 24 | - [x] Run entirely locally, keeping your data private 25 | - [x] Natural language chat, visualizations/plots, and direct download of data assets 26 | - [x] Easy to use docker-images for one-line deployment 27 | - [x] Load multiple tables directly into the chat 28 | - [x] Search for data and load CSVs directly from github 29 | - [x] Option to use OpenAI's GPT-3.5 or GPT-4 (requires API key) 30 | - [ ] WIP: GGML based mode (CPU only, no GPU required) 31 | - [ ] WIP: Rollback kernel state when undo ~using `criu`~ (re-execute all cells) 32 | - [ ] TODO: Support for more data sources (e.g. SQL, S3, PySpark etc.) 33 | - [ ] TODO: Export a conversation as a notebook or html 34 | 35 | ## Things you can ask DataDM 36 | - [x] Load data from a URL 37 | - [x] Clean data by removing duplicates, nulls, outliers, etc. 38 | - [x] Join data from multiple tables into a single output table 39 | - [x] Visualize data with plots and charts 40 | - [x] Ask whatever you want to your very own private code-interpreter 41 | 42 | ## Quickstart 43 | 44 | You can use docker, colab, or install locally. 45 | 46 | ### 1. Docker to run locally 47 | ```bash 48 | docker run -e OPENAI_API_KEY={{YOUR_API_KEY_HERE}} -p 7860:7860 -it ghcr.io/approximatelabs/datadm:latest 49 | ``` 50 | 51 | For local-mode using StarChat model (requiring a CUDA device with at least 24GB of RAM) 52 | ```bash 53 | docker run --gpus all -p 7860:7860 -it ghcr.io/approximatelabs/datadm:latest-cuda 54 | ``` 55 | 56 | ### 2. Colab to run in the cloud 57 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/approximatelabs/datadm/blob/main/datadm.ipynb) 58 | 59 | 60 | ### 3. Use as a python package 61 | 62 | > ⚠️ datadm used this way runs LLM generated code in your userspace 63 | 64 | For local-data, cloud-model mode (no GPU required) - requires an OpenAI API key 65 | ```bash 66 | $ pip install datadm 67 | $ datadm 68 | ``` 69 | 70 | For local-mode using StarChat model (requiring a CUDA device with at least 24GB of RAM) 71 | ```bash 72 | $ pip install "datadm[cuda]" 73 | $ datadm 74 | ``` 75 | 76 | ## Special Thanks 77 | 78 | * [starchat-beta](https://huggingface.co/HuggingFaceH4/starchat-beta) ([starcoder](https://github.com/bigcode-project/starcoder) with [databricks-dolly](https://huggingface.co/datasets/databricks/databricks-dolly-15k) and [OpenAssistant/oasst1](https://huggingface.co/datasets/OpenAssistant/oasst1)) 79 | * [Guidance](https://github.com/microsoft/guidance) 80 | * [HuggingFace](https://huggingface.co/) 81 | * [OpenAI](https://openai.com/) 82 | 83 | ## Contributions 84 | 85 | Contributions are welcome! Feel free to submit a PR or open an issue. 86 | 87 | ## Community 88 | 89 | Join the [Discord](https://discord.gg/kW9nBQErGe) to chat with the team 90 | 91 | Check out our other projects: [sketch](https://github.com/approximatelabs/sketch) and [approximatelabs](https://approximatelabs.com) 92 | -------------------------------------------------------------------------------- /datadm-header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/approximatelabs/datadm/c6e1484398ecd29bf669cf9da9bc01cf9770a2bf/datadm-header.png -------------------------------------------------------------------------------- /datadm.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "! pip install datadm" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import os\n", 19 | "os.environ['OPENAI_API_KEY'] = 'YOUR_API_KEY'" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "from datadm.app import main\n", 29 | "\n", 30 | "main(share=True)" 31 | ] 32 | } 33 | ], 34 | "metadata": { 35 | "language_info": { 36 | "name": "python" 37 | }, 38 | "orig_nbformat": 4 39 | }, 40 | "nbformat": 4, 41 | "nbformat_minor": 2 42 | } 43 | -------------------------------------------------------------------------------- /datadm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/approximatelabs/datadm/c6e1484398ecd29bf669cf9da9bc01cf9770a2bf/datadm/__init__.py -------------------------------------------------------------------------------- /datadm/agent.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import tenacity 4 | 5 | from datadm.backend import llm_manager, local_available 6 | from datadm.conversation import conversation_list_to_history 7 | 8 | 9 | class Agent: 10 | is_local = False 11 | 12 | def __init__(self): 13 | pass 14 | 15 | @tenacity.retry(wait=tenacity.wait_fixed(1), stop=tenacity.stop_after_attempt(3)) 16 | def bot(self, repl, conversation, model_selection): 17 | llm = llm_manager.llms.get(model_selection, {}).get('llm') 18 | if llm is None: 19 | yield conversation_list_to_history(conversation + [{'role': 'assistant', 'content': 'Please select and load a model'}]), conversation 20 | return 21 | 22 | for conversation in self._bot(repl, conversation, llm): 23 | yield conversation_list_to_history(conversation), conversation 24 | 25 | 26 | def _bot(self, repl, conversation, llm): 27 | raise NotImplementedError(f"Please Implement _bot method on {self.__class__.__name__}") 28 | 29 | def user(self, message, history, conversation): 30 | return "", history + [[message, None]], conversation + [{'role': 'user', 'content': message}] 31 | 32 | def add_data(self, file, repl, conversation): 33 | def clean(varStr): return re.sub('\W|^(?=\d)','_', varStr) 34 | if isinstance(file, str): 35 | basename = file 36 | varname = clean(basename.split('/')[-1].split('.')[0]) 37 | else: 38 | repl.upload_file(file.name) 39 | basename = file.name.split('/')[-1] 40 | varname = clean(basename.split('.')[0]) 41 | code_to_execute = f"{varname} = pd.read_csv('{basename}')\nprint({varname}.head())" 42 | result = repl.exec(code_to_execute) 43 | conversation.append({'role': 'user', 'content': f"Added {basename}"}) 44 | conversation.append({'role': 'assistant', 'content': f"Loading the data...\n```python\n{code_to_execute}\n```"}) 45 | conversation.append({'role': 'assistant', 'content': result}) 46 | return conversation_list_to_history(conversation), conversation 47 | 48 | @property 49 | def valid_models(self): 50 | if self.is_local: 51 | return set([k for k, v in llm_manager.llms.items() if v['mode'] != 'api']) 52 | else: 53 | return set(llm_manager.llms.keys()) 54 | 55 | class AgentManager: 56 | def __init__(self): 57 | self.agents = {} 58 | for file in os.listdir(os.path.join(os.path.dirname(__file__), 'agents')): 59 | if file.endswith('.py') and not file.startswith('__'): 60 | module_name = file[:-3] 61 | try: 62 | module = __import__(f"datadm.agents.{module_name}", fromlist=[module_name]) 63 | for name, obj in module.__dict__.items(): 64 | if isinstance(obj, type) and issubclass(obj, Agent) and obj != Agent: 65 | if obj.is_local and not local_available: 66 | continue 67 | self.agents[name] = obj() 68 | except Exception as e: 69 | print(f"Error importing agent {module_name}: {e}") 70 | 71 | def get(self, full_agent_text): 72 | agent_name = full_agent_text.split(' ')[0] 73 | return self.agents.get(agent_name, None) 74 | 75 | @property 76 | def names(self): 77 | names = [] 78 | for agent in self.agents.values(): 79 | names.append(agent.__class__.__name__ + (" (local-only)" if agent.is_local else "")) 80 | return names 81 | 82 | agent_manager = AgentManager() -------------------------------------------------------------------------------- /datadm/agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/approximatelabs/datadm/c6e1484398ecd29bf669cf9da9bc01cf9770a2bf/datadm/agents/__init__.py -------------------------------------------------------------------------------- /datadm/agents/baseline.py: -------------------------------------------------------------------------------- 1 | import guidance 2 | import re 3 | 4 | from datadm.agent import Agent 5 | from datadm.conversation import clean_conversation_list 6 | 7 | 8 | base_prompt = ''' 9 | {{#user~}} 10 | You are a helpful AI code-writing assistant, the perfect data analyst who is jovial, fun and writes great code to solve data problems! 11 | 12 | Answer my questions with both text describing your plan (but not an answer), and then the code in markdown that will be executed! 13 | 14 | * Use `print` to show results. 15 | * Don't answer the question directly, instead suggest how you will solve the problem, then write in a ```python markdown block, the code you will use to solve the problem. 16 | * For plotting, please use `matplotlib`. use `plt.show()` to display the plot to the user. 17 | {{~/user}} 18 | {{#each conversation}} 19 | {{#if (equal this.role 'user')}} 20 | {{#user~}} 21 | {{this.content}} 22 | {{~/user}} 23 | {{/if}} 24 | {{#if (equal this.role 'assistant')}} 25 | {{#assistant~}} 26 | {{this.content}} 27 | {{~/assistant}} 28 | {{/if}} 29 | {{/each}} 30 | ''' 31 | 32 | gensponse = ''' 33 | {{#assistant~}} 34 | {{gen "response" temperature=0.5 max_tokens=800}} 35 | {{~/assistant}} 36 | ''' 37 | 38 | def extract_all_code_blocks(text): 39 | starts = [m.start() for m in re.finditer('```', text)] 40 | output = "" 41 | for i in range(0, len(starts), 2): 42 | res = text[starts[i]+3:starts[i+1]] 43 | if res.startswith('python'): 44 | res = res[6:] 45 | output += res 46 | return output 47 | 48 | 49 | class Baseline(Agent): 50 | def _bot(self, repl, conversation, llm): 51 | starting_convo = conversation 52 | 53 | tries = 0 54 | while tries < 2: 55 | precode = guidance(base_prompt + gensponse, llm=llm) 56 | 57 | for result in precode(conversation=clean_conversation_list(starting_convo), silent=True, stream=True): 58 | yield starting_convo + [{'role': 'assistant', 'content': result.get('response') or ''}] 59 | starting_convo += [{'role': 'assistant', 'content': result.get('response')}] 60 | 61 | exec_result = repl.exec(extract_all_code_blocks(result['response'])) 62 | starting_convo += [{'role': 'assistant', 'content': exec_result}] 63 | yield starting_convo 64 | 65 | if exec_result['tracebacks']: 66 | tries += 1 67 | continue 68 | break 69 | -------------------------------------------------------------------------------- /datadm/agents/cotmultistep.py: -------------------------------------------------------------------------------- 1 | import guidance 2 | 3 | from datadm.agent import Agent 4 | from datadm.conversation import clean_conversation_list 5 | 6 | 7 | base_prompt = ''' 8 | {{#user~}} 9 | You are a helpful AI code-writing assistant, the perfect data analyst who is jovial, fun and writes great code to solve data problems! 10 | 11 | Answer my questions with both text describing your plan (but not an answer), and then the code in markdown that will be executed! 12 | 13 | * Use `print` to show results. 14 | * Don't answer the question directly, instead suggest how you will solve the problem, then write in a ```python markdown block, the code you will use to solve the problem. 15 | * For plotting, please use `matplotlib`. use `plt.show()` to display the plot to the user. 16 | {{~/user}} 17 | {{#each conversation}} 18 | {{#if (equal this.role 'user')}} 19 | {{#user~}} 20 | {{this.content}} 21 | {{~/user}} 22 | {{/if}} 23 | {{#if (equal this.role 'assistant')}} 24 | {{#assistant~}} 25 | {{this.content}} 26 | {{~/assistant}} 27 | {{/if}} 28 | {{/each}} 29 | ''' 30 | 31 | precode_prompt = ''' 32 | {{#assistant~}} 33 | {{gen "thoughts" temperature=0.1 max_tokens=120 stop=["```", "<|end|>"]}} 34 | ```python 35 | {{gen "code" temperature=0.0 max_tokens=800 stop=["```", "<|end|>"]}} 36 | {{~/assistant}} 37 | ''' 38 | 39 | postcode_prompt = ''' 40 | {{#assistant~}} 41 | Looking at the executed results above, we can see {{gen "summary" temperature=0.0 max_tokens=120 stop=["```", "<|end|>"]}} 42 | {{~/assistant}} 43 | ''' 44 | 45 | class CoTMultiStep(Agent): 46 | is_local = True 47 | 48 | def _bot(self, repl, conversation, llm): 49 | starting_convo = conversation 50 | 51 | tries = 0 52 | while tries < 2: 53 | precode = guidance(base_prompt + precode_prompt, llm=llm) 54 | 55 | for result in precode(conversation=clean_conversation_list(starting_convo), silent=True, stream=True): 56 | resolved_content = result.get('thoughts') or '' 57 | resolved_content += '\n```python\n'+(result.get('code') or '')+'\n```' 58 | resolved_convo = starting_convo + [{'role': 'assistant', 'content': resolved_content}] 59 | yield resolved_convo 60 | starting_convo += [{'role': 'assistant', 'content': resolved_content}] 61 | 62 | exec_result = repl.exec(result['code']) 63 | starting_convo += [{'role': 'assistant', 'content': exec_result}] 64 | yield starting_convo 65 | 66 | if exec_result['tracebacks']: 67 | tries += 1 68 | continue 69 | break 70 | 71 | postcode = guidance(base_prompt + postcode_prompt, llm=llm) 72 | 73 | for result in postcode(conversation=clean_conversation_list(starting_convo), silent=True, stream=True): 74 | yield starting_convo + [{'role': 'assistant', 'content': f'Looking at the executed results above, we can see {result.get("summary") or ""}'}] 75 | -------------------------------------------------------------------------------- /datadm/app.py: -------------------------------------------------------------------------------- 1 | import dotenv 2 | import gradio as gr 3 | import requests 4 | 5 | import os 6 | import requests 7 | import dotenv 8 | 9 | from datadm.repl import REPL 10 | from datadm.backend import llm_manager 11 | from datadm.agent import agent_manager 12 | from datadm.conversation import conversation_list_to_history 13 | 14 | dotenv.load_dotenv() 15 | 16 | 17 | def get_downloads(repl): 18 | frames = repl.dataframes_as_csvs() 19 | if len(frames) == 0: 20 | result = [gr.Text.update(visible=True)] 21 | else: 22 | result = [gr.Text.update(visible=False)] 23 | for frame in frames: 24 | result.append( 25 | gr.File.update( 26 | value = frame['csv'], 27 | label = f"{frame['name']} ({frame['rows']} rows, {len(frame['columns'])} cols)", 28 | visible=True, 29 | ) 30 | ) 31 | while len(result) < 11: 32 | result.append(gr.File.update(visible=False)) 33 | return result 34 | 35 | 36 | def remove_to_last_talker(conversation, model_selection): 37 | # assume you want to clear cache as well 38 | llm_manager.llms.get(model_selection, {}).get('llm').cache.clear() 39 | if len(conversation) == 0: 40 | return conversation_list_to_history(conversation), conversation 41 | last_talker = conversation[-1]['role'] 42 | while len(conversation) > 0 and conversation[-1]['role'] == last_talker: 43 | conversation.pop() 44 | return conversation_list_to_history(conversation), conversation 45 | 46 | 47 | def bot(agent_selection, repl, conversation, model_selection): 48 | agent = agent_manager.get(agent_selection) 49 | yield from agent.bot(repl, conversation, model_selection) 50 | 51 | def user(agent_selection, message, history, conversation): 52 | agent = agent_manager.get(agent_selection) 53 | return agent.user(message, history, conversation) 54 | 55 | def add_data(agent_selection, file, repl, conversation): 56 | agent = agent_manager.get(agent_selection) 57 | return agent.add_data(file, repl, conversation) 58 | 59 | def setup_repl(): 60 | repl = REPL() 61 | repl.exec('import pandas as pd') 62 | repl.exec('import numpy as np') 63 | repl.exec('import matplotlib.pyplot as plt') 64 | repl.exec("pd.set_option('display.max_columns', 500)") 65 | repl.exec("pd.set_option('display.width', 1000)") 66 | return repl 67 | 68 | 69 | css = """ 70 | footer {display: none !important;} 71 | .gradio-container {min-height: 0px !important;} 72 | .disclaimer {font-variant-caps: all-small-caps;} 73 | #chatbox {flex-grow: 1; overflow-y: hidden !important;} 74 | #fullheight {height: 87vh; flex-wrap: nowrap;} 75 | #chatbox > .wrap { max-height: none !important; } 76 | #chatbox img { max-height: none !important; max-width: 100% !important; } 77 | #justify_center {justify-content: center !important;} 78 | #load_model_button {flex-grow: 0 !important;} 79 | #upload_button {flex-grow: 0 !important;} 80 | """ 81 | 82 | posthog_default_off_analytics_script = """ 83 | async () => { 84 | !function(t,e){var o,n,p,r;e.__SV||(window.posthog=e,e._i=[],e.init=function(i,s,a){function g(t,e){var o=e.split(".");2==o.length&&(t=t[o[0]],e=o[1]),t[e]=function(){t.push([e].concat(Array.prototype.slice.call(arguments,0)))}}(p=t.createElement("script")).type="text/javascript",p.async=!0,p.src=s.api_host+"/static/array.js",(r=t.getElementsByTagName("script")[0]).parentNode.insertBefore(p,r);var u=e;for(void 0!==a?u=e[a]=[]:a="posthog",u.people=u.people||[],u.toString=function(t){var e="posthog";return"posthog"!==a&&(e+="."+a),t||(e+=" (stub)"),e},u.people.toString=function(){return u.toString(1)+".people (stub)"},o="capture identify alias people.set people.set_once set_config register register_once unregister opt_out_capturing has_opted_out_capturing opt_in_capturing reset isFeatureEnabled onFeatureFlags".split(" "),n=0;n'}}, 99 | 'url': {'class': gr.Text, 'kwargs': {'value': 'unknown', "visible": False}}, 100 | 'download': {'class': gr.Button, 'kwargs': {'value': 'Add To Chat'}}, 101 | } 102 | self.ref_order = [] 103 | 104 | def component(self, name, **extra_kwargs): 105 | self.ref_order.append(name) 106 | kwargs = self.gradios.get(name, {}).get('kwargs', {}) 107 | return self.gradios.get(name, {}).get('class', gr.Text)(**kwargs, **extra_kwargs) 108 | 109 | def gradio_gen(self, upload_magic_thens): 110 | objs = [] 111 | with gr.Column(scale=10): 112 | objs.append(self.component('html')) 113 | objs.append(self.component('url')) 114 | with gr.Column(scale=1, elem_id="justify_center", min_width=180): 115 | objs.append(self.component('download', container=False)) 116 | events = objs[-1].click(lambda url: print(f"Downloading CSV: {url}"), objs[-2], None) 117 | for then_args in upload_magic_thens(objs[-2]): 118 | events.then(*then_args) 119 | return objs 120 | 121 | def gradio_update(self): 122 | res = [] 123 | for k in self.ref_order: 124 | res.append(self.gradios[k]['class'].update(**self.gradios[k]['kwargs'])) 125 | return res 126 | 127 | def update_from_dict(self, data): 128 | self.data = data 129 | if data is None: 130 | self.gradios['html']['kwargs']['value'] = '
' 131 | self.gradios['url']['kwargs']['value'] = 'unknown' 132 | return 133 | self.gradios['html']['kwargs']['value'] = f""" 134 |
135 |
136 | {data['repo']} 137 | {data['subpath']} 138 |
139 |
{data['text']}
140 |
141 | """ 142 | self.gradios['url']['kwargs']['value'] = data['fullurl'] 143 | 144 | 145 | class Container: 146 | def __init__(self, n): 147 | self.n = n 148 | self.data = [] 149 | self.objs = [] 150 | for _ in range(n): 151 | self.objs.append(DataSearchResultRowComponent()) 152 | 153 | def gradio_gen(self, upload_magic_thens): 154 | ret = [] 155 | for obj in self.objs: 156 | with gr.Row(): 157 | for obj in obj.gradio_gen(upload_magic_thens): 158 | ret.append(obj) 159 | return ret 160 | 161 | def update_values(self, tables): 162 | self.data = tables 163 | 164 | def set_offset(self, offset): 165 | to_be_rendered = self.data[offset:offset+self.n] 166 | for i, obj in enumerate(self.objs): 167 | if i < len(to_be_rendered): 168 | obj.update_from_dict(to_be_rendered[i]) 169 | else: 170 | obj.update_from_dict(None) 171 | 172 | def updater(self, offset): 173 | self.set_offset(offset) 174 | updates = [] 175 | for obj in self.objs: 176 | updates.extend(obj.gradio_update()) 177 | return updates 178 | 179 | 180 | def search_code(query): 181 | base_url = "https://api.github.com" 182 | endpoint = "/search/code" 183 | params = { 184 | "q": f"{query} .csv in:path", 185 | "per_page": 5, 186 | } 187 | headers = { 188 | "Accept": "application/vnd.github.v3+json", 189 | "Authorization": "Bearer " + os.environ.get("GITHUB_ACCESS_TOKEN"), 190 | } 191 | response = requests.get(base_url + endpoint, params=params, headers=headers) 192 | response.raise_for_status() 193 | data = response.json() 194 | return data.get("items", []) 195 | 196 | 197 | def format_items(items): 198 | formatted_results = [] 199 | for item in items: 200 | repo = item["repository"]["full_name"] 201 | subpath = item["path"] 202 | fullurl = item["html_url"].replace("/blob/", "/raw/") 203 | 204 | response = requests.get(fullurl, stream=True) 205 | if response.status_code == 200: 206 | lines_count = 0 207 | content = '' 208 | for line in response.iter_lines(): 209 | if lines_count >= 3: 210 | break 211 | content += line.decode('utf-8', errors="ignore") + "\n" 212 | lines_count += 1 213 | else: 214 | content = "" 215 | 216 | result = { 217 | "repo": repo, 218 | "subpath": subpath, 219 | "fullurl": fullurl, 220 | "text": content, 221 | } 222 | formatted_results.append(result) 223 | 224 | return formatted_results 225 | 226 | 227 | def searchupdate(query, container): 228 | results = search_code(query) 229 | formatted_results = format_items(results) 230 | container.update_values(formatted_results) 231 | return container.updater(0) 232 | 233 | 234 | with gr.Blocks( 235 | theme=gr.themes.Soft(), 236 | css=css, 237 | analytics_enabled=False, 238 | title="DataDM" 239 | ) as demo: 240 | repl = gr.State(None) 241 | files = [] 242 | conversation = gr.State([]) 243 | gr.Markdown("# Welcome to DataDM!") 244 | with gr.Tabs() as tabs: 245 | with gr.Tab("Chat", id=0): 246 | with gr.Row(): 247 | with gr.Column(scale=5, elem_id="fullheight"): 248 | chatbot = gr.Chatbot(elem_id="chatbox", show_label=False) 249 | with gr.Row(): 250 | with gr.Column(): 251 | msg = gr.Textbox( 252 | label="Chat Message Box", 253 | placeholder="Chat Message Box", 254 | show_label=False, 255 | elem_id="chat_message_box", 256 | container=False 257 | ) 258 | with gr.Column(): 259 | with gr.Row(): 260 | submit = gr.Button("Submit", elem_id="submit_button") 261 | cancel = gr.Button("Cancel", variant="stop", visible=False) 262 | undo = gr.Button("Undo") 263 | retry = gr.Button("Retry") 264 | with gr.Column(scale=1): 265 | with gr.Row(): 266 | agent_selection = gr.Dropdown( 267 | choices=agent_manager.names, 268 | value="Baseline", 269 | label="agent", 270 | multiselect=False, 271 | show_label=True, 272 | interactive=True, 273 | container=False) 274 | with gr.Row(): 275 | model_selection = gr.Dropdown( 276 | choices=list(llm_manager.llms.keys()), 277 | value=list(llm_manager.llms.keys())[0], 278 | label="model", 279 | multiselect=False, 280 | show_label=True, 281 | interactive=True, 282 | elem_id='model_selection_dropdown', 283 | container=False) 284 | model_state = gr.HighlightedText(label=False, container=False) 285 | load_model = gr.Button("Load Model", visible=False, elem_id="load_model_button") 286 | files.append(gr.Text("No Data Files", label="Data Files")) 287 | for _ in range(10): 288 | f = gr.File(__file__, visible=False) 289 | files.append(f) 290 | upload = gr.UploadButton(label="Upload CSV", elem_id="upload_button") 291 | upload_magic_thens = lambda filepath_object: [ 292 | (add_data, [agent_selection, filepath_object, repl, conversation], [chatbot, conversation]), 293 | (get_downloads, repl, files), 294 | (lambda: gr.Tabs.update(selected=0), None, tabs) 295 | ] 296 | 297 | with gr.Tab("Search", id=1): 298 | container = gr.State(Container(5)) 299 | results = [] 300 | with gr.Row(): 301 | query = gr.Textbox( 302 | label="Search", 303 | placeholder="What data are you looking for?", 304 | show_label=False, 305 | elem_id="search_textbox", 306 | container=False 307 | ) 308 | search = gr.Button("Search") 309 | with gr.Column(): 310 | results.extend(container.value.gradio_gen(upload_magic_thens)) 311 | 312 | # Run analytics tracking javascript only if in analytics tracking mode (default is off) 313 | if os.environ.get("ANALYTICS_TRACKING", "0") == "1": 314 | demo.load(None, None, None, _js=posthog_default_off_analytics_script) 315 | 316 | # Search Blocks 317 | query.submit(searchupdate, [query, container], results) 318 | search.click(searchupdate, [query, container], results) 319 | 320 | # Setup Blocks 321 | demo.load(lambda: gr.Button.update(visible=False), None, load_model 322 | ).then(llm_manager.model_status, model_selection, model_state 323 | ).then(lambda llm_name: gr.Button.update(visible=(llm_manager.llms[llm_name]['state'] != 'ready')), model_selection, load_model) 324 | demo.load(setup_repl, None, repl) 325 | 326 | # Configuration Blocks 327 | model_selection.change(lambda x: (x, llm_manager.model_status(x)), model_selection, [model_selection, model_state] 328 | ).then(lambda llm_name: gr.Button.update(visible=(llm_manager.llms[llm_name]['state'] != 'ready')), model_selection, load_model) 329 | agent_selection.change( 330 | lambda x: gr.Dropdown.update( 331 | choices=agent_manager.get(x).valid_models & set(llm_manager.llms.keys()), 332 | value=list(agent_manager.get(x).valid_models)[0] 333 | ), 334 | agent_selection, 335 | [model_selection] 336 | ).then(lambda llm_name: gr.Button.update(visible=(llm_manager.llms[llm_name]['state'] != 'ready')), model_selection, load_model) 337 | 338 | load_model.click(llm_manager.load, model_selection, model_state 339 | ).then(lambda llm_name: gr.Button.update(visible=(llm_manager.llms[llm_name]['state'] != 'ready')), model_selection, load_model) 340 | 341 | # Agent Blocks 342 | upload_event = upload.upload(add_data, [agent_selection, upload, repl, conversation],[chatbot, conversation] 343 | ).then(get_downloads, repl, files) 344 | 345 | buttonset = [submit, cancel, undo, retry] 346 | running_buttons = [gr.Button.update(**k) for k in [{'visible': False}, {'visible': True}, {'interactive': False}, {'interactive': False}]] 347 | idle_buttons = [gr.Button.update(**k) for k in [{'visible': True}, {'visible': False}, {'interactive': True}, {'interactive': True}]] 348 | 349 | msg_enter_event = msg.submit(user, [agent_selection, msg, chatbot, conversation], [msg, chatbot, conversation], queue=False 350 | ).then(lambda: running_buttons, None, buttonset, queue=False 351 | ).then(bot, [agent_selection, repl, conversation, model_selection], [chatbot, conversation], queue=True) 352 | msg_enter_finalize = msg_enter_event.then(get_downloads, repl, files 353 | ).then(lambda: idle_buttons, None, buttonset, queue=False) 354 | 355 | submit_click_event = submit.click(user, [agent_selection, msg, chatbot, conversation], [msg, chatbot, conversation], queue=False 356 | ).then(lambda: running_buttons, None, buttonset, queue=False 357 | ).then(bot, [agent_selection, repl, conversation, model_selection], [chatbot, conversation], queue=True) 358 | submit_click_finalize = submit_click_event.then(get_downloads, repl, files 359 | ).then(lambda: idle_buttons, None, buttonset, queue=False) 360 | 361 | # Control Blocks 362 | undo.click(remove_to_last_talker, [conversation, model_selection], outputs=[chatbot, conversation], queue=False) 363 | cancel.click(None, cancels=[msg_enter_event, submit_click_event], queue=False 364 | ).then(lambda: idle_buttons, None, buttonset, queue=False) 365 | retry.click(remove_to_last_talker, [conversation, model_selection], outputs=[chatbot, conversation], queue=False 366 | ).then(bot, [agent_selection, repl, conversation, model_selection], [chatbot, conversation], queue=True 367 | ).then(get_downloads, repl, files) 368 | 369 | demo.queue(max_size=128, concurrency_count=1) 370 | 371 | def main(share=False): 372 | demo.launch(share=share, server_name="0.0.0.0") 373 | 374 | if __name__ == "__main__": 375 | main() -------------------------------------------------------------------------------- /datadm/backend.py: -------------------------------------------------------------------------------- 1 | import guidance 2 | from transformers import AutoModelForCausalLM, AutoTokenizer 3 | import os 4 | 5 | 6 | # TODO: fix this to check devices and packages to dynamically adjust available LLMs and models 7 | try: 8 | import accelerate 9 | local_available = True 10 | except ImportError: 11 | local_available = False 12 | 13 | class StarChat(guidance.llms.Transformers): 14 | def __init__(self, model_path=None, revision=None, **kwargs): 15 | import torch 16 | tokenizer = AutoTokenizer.from_pretrained(model_path, device_map='auto', revision=revision) 17 | model = AutoModelForCausalLM.from_pretrained(model_path, device_map='auto', torch_dtype=torch.bfloat16, revision=revision) 18 | model.eval() 19 | super().__init__(model, tokenizer=tokenizer, device_map='auto', **kwargs) 20 | 21 | @staticmethod 22 | def role_start(role): 23 | return f"<|{role}|>" 24 | 25 | @staticmethod 26 | def role_end(role): 27 | return '<|end|>' 28 | 29 | 30 | class BackendLLMManager(): 31 | OPENAI_MODELS = ['gpt-3.5-turbo', 'gpt-4', 'gpt-3.5-turbo-16k', 'gpt-4-32k'] 32 | 33 | def __init__(self): 34 | self.llms = {} 35 | if local_available: 36 | self.llms['starchat-alpha-cuda'] = {'state': 'unloaded', 'llm': None, 'mode': 'cuda', 'model_path': 'HuggingFaceH4/starchat-alpha', 'revision': '5058bd8557100137ade3c459bfc8100e90f71ec7'} 37 | self.llms['starchat-beta-cuda'] = {'state': 'unloaded', 'llm': None, 'mode': 'cuda', 'model_path': 'HuggingFaceH4/starchat-beta', 'revision': 'b1bcda690655777373f57ea6614eb095ec2c886f'} 38 | 39 | for model_name in self.OPENAI_MODELS: 40 | self.llms[model_name] = {'state': 'unloaded', 'llm': None, 'mode': 'api'} 41 | 42 | def load(self, llm_name): 43 | if self.llms[llm_name]['state'] == 'unloaded': 44 | self.llms[llm_name]['state'] = 'loading' 45 | if llm_name in ['starchat-alpha-cuda', 'starchat-beta-cuda']: 46 | self.llms[llm_name]['llm'] = StarChat(**self.llms[llm_name]) 47 | elif llm_name in self.OPENAI_MODELS: 48 | if 'OPENAI_API_KEY' not in os.environ: 49 | self.llms[llm_name]['state'] = 'error' 50 | raise RuntimeError("OPENAI_API_KEY not found in environment") 51 | self.llms[llm_name]['llm'] = guidance.llms.OpenAI(llm_name) 52 | else: 53 | self.llms[llm_name]['state'] = 'error' 54 | raise RuntimeError(f"LLM {llm_name} not supported") 55 | self.llms[llm_name]['state'] = 'ready' 56 | return self.model_status(llm_name) 57 | 58 | def unload(self, llm_name): 59 | if llm_name in self.llms: 60 | self.llms[llm_name]['state'] = 'unloaded' 61 | self.llms[llm_name]['llm'] = None 62 | 63 | def model_status(self, llm_name): 64 | state = self.llms[llm_name]['state'] 65 | return [(llm_name, state)] 66 | 67 | 68 | llm_manager = BackendLLMManager() 69 | -------------------------------------------------------------------------------- /datadm/conversation.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import tempfile 3 | import base64 4 | import hashlib 5 | import os 6 | import re 7 | 8 | temp_image_dir = tempfile.TemporaryDirectory() 9 | atexit.register(temp_image_dir.cleanup) 10 | 11 | 12 | # TODO: Replace the entire concept of conversation with the actual REPL kernel object 13 | # the conversation can be stored as messages to the kernel (eg. markdown messages) 14 | # this will allow exporting the full jupyter kernel history as a notebook / html page 15 | # which would represent the entire conversation and code. 16 | 17 | def strip_ansi(text): 18 | ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') 19 | return ansi_escape.sub('', text) 20 | 21 | 22 | def conversation_list_to_history(convo_list): 23 | # assuming this is a conversation between user and assistant, return a list of list of [user, assistant] messages 24 | # if someone speaks out of turn, put [None, text] for assistant or [text, None] for user 25 | # [{'role': 'user', 'content': 'hello'}, {'role': 'assistant', 'content': 'hi'}] -> [['hello', 'hi']] 26 | # [{'role': 'user', 'content': 'hi'}, {'role': 'user', 'content': ' HEY! '}, {'role': 'assisstant', 'content': ' what do yo'}] -> [['hi', None], [' HEY! ', ' what do yo']] 27 | history = [] 28 | for item in convo_list: 29 | if item['role'] == 'user': 30 | # this always causes a new entry 31 | history.append([item['content'], None]) 32 | elif item['role'] == 'assistant': 33 | # this either appends to the last entry, or creates a new entry 34 | if len(history) == 0 or history[-1][1] is not None: 35 | history.append([None, item['content']]) 36 | else: 37 | history[-1][1] = item['content'] 38 | # for all history, check if any are not strings, and conver them to valid history objects 39 | new_history = [] 40 | for i, c in enumerate(history): 41 | images_to_append = [] 42 | new_row = [] 43 | for j, val in enumerate(c): 44 | new_text = val 45 | new_html_text = "" 46 | if not isinstance(val, str) and val is not None: 47 | new_text = "" 48 | if val['stdout']: 49 | new_text += val['stdout'] 50 | if val['tracebacks']: 51 | new_text += strip_ansi(val['tracebacks']) 52 | if val['data']: 53 | for dataentry in val['data']: 54 | for k, v in dataentry.items(): 55 | if 'text' in k: 56 | if 'html' in k: 57 | new_html_text += v 58 | else: 59 | new_text += v 60 | else: 61 | # assume this is a file, written in base64... convert to bytes, save to determinstiic temporary file path, and replace with that path 62 | filename = hashlib.sha256(v.encode('utf-8')).hexdigest()+'.png' 63 | file_path = os.path.join(temp_image_dir.name, filename) 64 | with open(file_path, 'wb') as f: 65 | f.write(base64.b64decode(v)) 66 | images_to_append += [file_path] 67 | if new_text: 68 | new_text = f'```bash\n{new_text}\n```' 69 | new_text += new_html_text 70 | if new_text: 71 | new_row.append(new_text) 72 | else: 73 | new_row.append(None) 74 | new_history.append(new_row) 75 | if images_to_append: 76 | for image_file in images_to_append: 77 | new_history.append([None, (image_file,)]) 78 | return new_history 79 | 80 | def clean_conversation_list(convo_list): 81 | # for any "content" that is not a string, convert / replace it with something simple 82 | # assume that they are the output from `exec`, so they should have 3 keys, `stdout`, `tracebacks`, and `data` 83 | cleaned = [] 84 | for convo in convo_list: 85 | if isinstance(convo['content'], str) or convo['content'] is None: 86 | cleaned.append(convo) 87 | continue 88 | new_html_text = "" 89 | new_text = "" 90 | if convo['content']['stdout']: 91 | new_text += convo['content']['stdout'] 92 | if convo['content']['tracebacks']: 93 | new_text += convo['content']['tracebacks'][:400] 94 | if convo['content']['data']: 95 | new_text += "\n".join([v for dataentry in convo['content']['data'] for k, v in dataentry.items() if 'text' in k and 'html' not in k]) 96 | new_html_text += "\n".join([v for dataentry in convo['content']['data'] for k, v in dataentry.items() if 'html' in k]) 97 | cleaned.append({'role': convo['role'], 'content': 'EXECUTION OF LAST CODE BLOCK RESULT: ' + (f'```\n{new_text}\n```' if new_text else '' )+new_html_text}) 98 | return cleaned 99 | -------------------------------------------------------------------------------- /datadm/repl.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import json 3 | import os 4 | import signal 5 | import subprocess 6 | import tempfile 7 | import time 8 | import uuid 9 | from queue import Empty 10 | 11 | from jupyter_client.blocking import BlockingKernelClient 12 | 13 | 14 | class REPL: 15 | # TODO: add a "save as ipynb file" as serialization option 16 | # (allow for "added readme" comment operations, so `bot` can write the conversation into it) 17 | # Use this to offer "jupyter file" or even better: "in colab" 18 | # -> For any uploaded files, call them out in the header that the notebook expects those files ("system was run with {x} {y}") 19 | # -> "Add secret" -> doesn't show up in the conversation, shows up in notebooks as an env-var 20 | # "download conversation as webpage" -> {ipynb} -> {html} 21 | def __init__(self): 22 | self.history = [] 23 | self.uid = str(uuid.uuid4()) 24 | self.conn_file = tempfile.NamedTemporaryFile(suffix='.json') 25 | self.runtime_dir = tempfile.TemporaryDirectory() 26 | self.work_dir = self.runtime_dir.name 27 | kernel_process = subprocess.Popen( 28 | ['jupyter-kernel', '--KernelManager.connection_file', self.conn_file.name], 29 | stdin=subprocess.DEVNULL, 30 | stdout=subprocess.DEVNULL, 31 | stderr=subprocess.STDOUT, 32 | cwd=self.work_dir, 33 | start_new_session=True, 34 | env={ 35 | 'HOME': self.work_dir, 36 | 'SHELL': '/bin/bash', 37 | 'PWD': self.work_dir, 38 | 'PATH': os.environ.get('PATH', ''), 39 | } 40 | ) 41 | self.kernel_pid = kernel_process.pid 42 | atexit.register(lambda: os.kill(self.kernel_pid, signal.SIGKILL)) 43 | atexit.register(self.runtime_dir.cleanup) 44 | self.kc = self.connect() 45 | 46 | def read_all(self): 47 | while True: 48 | try: 49 | yield self.kc.iopub_channel.get_msg(timeout=0.01) 50 | except Empty: 51 | break 52 | 53 | def snapshot(self): 54 | self.kc.stop_channels() 55 | os.makedirs(f"/tmp/{self.uid}", exist_ok=True) 56 | subprocess.run( 57 | [ 58 | 'sudo', 'criu', 'dump', 59 | '-t', str(self.kernel_pid), 60 | '-D', f'/tmp/{self.uid}', 61 | '--tcp-established', 62 | '--ext-unix-sk', 63 | '--shell-job', 64 | '--track-mem', 65 | # '--leave-running' 66 | ], 67 | check=True 68 | ) 69 | self.restore() 70 | # okay, should be back up, now reconnect the kernel client 71 | # self.connect() 72 | 73 | def restore(self): 74 | self.kc.stop_channels() 75 | # TODO: fix this? 76 | os.kill(self.kernel_pid, signal.SIGTERM) 77 | time.sleep(10) 78 | subprocess.run( 79 | ['sudo', 'criu', 'restore', 80 | '-d', 81 | '-D', f'/tmp/{self.uid}', 82 | '--pidfile', str(self.kernel_pid), 83 | '-x', 84 | '--tcp-established'], 85 | check=True 86 | ) 87 | self.connect() 88 | 89 | def connect(self, n_retries=100): 90 | tries = 0 91 | while tries < n_retries: 92 | try: 93 | kc = BlockingKernelClient(connection_file=self.conn_file.name) 94 | kc.load_connection_file() 95 | kc.start_channels() 96 | break 97 | except json.decoder.JSONDecodeError: 98 | time.sleep(0.2) 99 | tries += 1 100 | else: 101 | raise RuntimeError('Kernel did not start') 102 | self.kc = kc 103 | self.kc.wait_for_ready(timeout=5) 104 | return kc 105 | 106 | def exec(self, code, timeout=10): 107 | list(self.read_all()) # flush 108 | self.kc.execute(code) 109 | self.kc.get_shell_msg(timeout=timeout) 110 | output = { 111 | 'stdout': '', 112 | 'tracebacks': '', 113 | 'data': [], 114 | } 115 | results = [] 116 | for result in self.read_all(): 117 | results.append(result) 118 | if result['msg_type'] == 'status': 119 | if result['content']['execution_state'] == 'idle': 120 | continue # done in theory 121 | elif result['content']['execution_state'] == 'busy': 122 | continue # beginning execution 123 | elif result['content']['execution_state'] == 'starting': 124 | continue 125 | elif result['content']['execution_state'] == 'restarting': 126 | continue 127 | else: 128 | raise RuntimeError(f'Unknown execution state: {result["content"]["execution_state"]}') 129 | elif result['msg_type'] == 'execute_input': 130 | continue # ignore 131 | else: 132 | content = result['content'] 133 | if result['msg_type'] == 'stream': 134 | output['stdout'] += content['text'] 135 | elif result['msg_type'] == 'error': 136 | output['tracebacks'] += "\n".join(content['traceback']) 137 | elif result['msg_type'] == 'display_data': 138 | output['data'].append(content['data']) 139 | elif result['msg_type'] == 'execute_result': 140 | output['data'].append(content['data']) 141 | else: 142 | raise RuntimeError(f'Unknown message type {result["msg_type"]}') 143 | self.history.append({ 144 | 'code': code, 145 | 'output': output, 146 | 'results': results, 147 | }) 148 | return output 149 | 150 | def whos(self, type=None): 151 | if type: 152 | return self.exec(f'%whos {type}')['stdout'] 153 | # assume it always responds w/ no error 154 | return self.exec('%whos')['stdout'] 155 | 156 | def upload_file(self, filepath): 157 | filename = os.path.basename(filepath) 158 | with open(filepath, 'rb') as f: 159 | filebytes = f.read() 160 | return self.upload_bytes(filebytes, filename=filename) 161 | 162 | def upload_bytes(self, filebytes, filename=None): 163 | if filename is None: 164 | with tempfile.NamedTemporaryFile(dir=self.work_dir, delete=False) as f: 165 | f.write(filebytes) 166 | filename = f.name 167 | else: 168 | with open(os.path.join(self.work_dir, filename), 'wb') as f: 169 | f.write(filebytes) 170 | return filename 171 | 172 | def dataframes_as_csvs(self): 173 | # find all dataframes or series 174 | code_to_extract = f""" 175 | import json 176 | output = [] 177 | for name, x in list(globals().items()): 178 | if isinstance(x, pd.DataFrame) or isinstance(x, pd.Series): 179 | try: 180 | x.to_csv(name + '.csv') 181 | output.append({{ 182 | 'name': name, 183 | 'columns': list(x.columns), 184 | 'rows': len(x), 185 | 'type': 'DataFrame' if isinstance(x, pd.DataFrame) else 'Series', 186 | 'csv': '{self.work_dir}' + '/' + name + '.csv', 187 | }}) 188 | except Exception as e: 189 | pass 190 | print("FROMHERE:"+json.dumps(output)+":TOHERE") 191 | """ 192 | result = self.exec(code_to_extract) 193 | frames = [] 194 | if result['stdout']: 195 | try: 196 | jsonstring = result['stdout'].split('FROMHERE:')[1].split(':TOHERE')[0] 197 | frames = json.loads(jsonstring) 198 | except Exception as e: 199 | print(e, result['stdout']) 200 | return frames 201 | -------------------------------------------------------------------------------- /demos/.gitignore: -------------------------------------------------------------------------------- 1 | *.webm -------------------------------------------------------------------------------- /demos/data/country-regions.csv: -------------------------------------------------------------------------------- 1 | name,alpha-2,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code 2 | Afghanistan,AF,AFG,004,ISO 3166-2:AF,Asia,Southern Asia,"",142,034,"" 3 | Åland Islands,AX,ALA,248,ISO 3166-2:AX,Europe,Northern Europe,"",150,154,"" 4 | Albania,AL,ALB,008,ISO 3166-2:AL,Europe,Southern Europe,"",150,039,"" 5 | Algeria,DZ,DZA,012,ISO 3166-2:DZ,Africa,Northern Africa,"",002,015,"" 6 | American Samoa,AS,ASM,016,ISO 3166-2:AS,Oceania,Polynesia,"",009,061,"" 7 | Andorra,AD,AND,020,ISO 3166-2:AD,Europe,Southern Europe,"",150,039,"" 8 | Angola,AO,AGO,024,ISO 3166-2:AO,Africa,Sub-Saharan Africa,Middle Africa,002,202,017 9 | Anguilla,AI,AIA,660,ISO 3166-2:AI,Americas,Latin America and the Caribbean,Caribbean,019,419,029 10 | Antarctica,AQ,ATA,010,ISO 3166-2:AQ,"","","","","","" 11 | Antigua and Barbuda,AG,ATG,028,ISO 3166-2:AG,Americas,Latin America and the Caribbean,Caribbean,019,419,029 12 | Argentina,AR,ARG,032,ISO 3166-2:AR,Americas,Latin America and the Caribbean,South America,019,419,005 13 | Armenia,AM,ARM,051,ISO 3166-2:AM,Asia,Western Asia,"",142,145,"" 14 | Aruba,AW,ABW,533,ISO 3166-2:AW,Americas,Latin America and the Caribbean,Caribbean,019,419,029 15 | Australia,AU,AUS,036,ISO 3166-2:AU,Oceania,Australia and New Zealand,"",009,053,"" 16 | Austria,AT,AUT,040,ISO 3166-2:AT,Europe,Western Europe,"",150,155,"" 17 | Azerbaijan,AZ,AZE,031,ISO 3166-2:AZ,Asia,Western Asia,"",142,145,"" 18 | Bahamas,BS,BHS,044,ISO 3166-2:BS,Americas,Latin America and the Caribbean,Caribbean,019,419,029 19 | Bahrain,BH,BHR,048,ISO 3166-2:BH,Asia,Western Asia,"",142,145,"" 20 | Bangladesh,BD,BGD,050,ISO 3166-2:BD,Asia,Southern Asia,"",142,034,"" 21 | Barbados,BB,BRB,052,ISO 3166-2:BB,Americas,Latin America and the Caribbean,Caribbean,019,419,029 22 | Belarus,BY,BLR,112,ISO 3166-2:BY,Europe,Eastern Europe,"",150,151,"" 23 | Belgium,BE,BEL,056,ISO 3166-2:BE,Europe,Western Europe,"",150,155,"" 24 | Belize,BZ,BLZ,084,ISO 3166-2:BZ,Americas,Latin America and the Caribbean,Central America,019,419,013 25 | Benin,BJ,BEN,204,ISO 3166-2:BJ,Africa,Sub-Saharan Africa,Western Africa,002,202,011 26 | Bermuda,BM,BMU,060,ISO 3166-2:BM,Americas,Northern America,"",019,021,"" 27 | Bhutan,BT,BTN,064,ISO 3166-2:BT,Asia,Southern Asia,"",142,034,"" 28 | Bolivia (Plurinational State of),BO,BOL,068,ISO 3166-2:BO,Americas,Latin America and the Caribbean,South America,019,419,005 29 | "Bonaire, Sint Eustatius and Saba",BQ,BES,535,ISO 3166-2:BQ,Americas,Latin America and the Caribbean,Caribbean,019,419,029 30 | Bosnia and Herzegovina,BA,BIH,070,ISO 3166-2:BA,Europe,Southern Europe,"",150,039,"" 31 | Botswana,BW,BWA,072,ISO 3166-2:BW,Africa,Sub-Saharan Africa,Southern Africa,002,202,018 32 | Bouvet Island,BV,BVT,074,ISO 3166-2:BV,Americas,Latin America and the Caribbean,South America,019,419,005 33 | Brazil,BR,BRA,076,ISO 3166-2:BR,Americas,Latin America and the Caribbean,South America,019,419,005 34 | British Indian Ocean Territory,IO,IOT,086,ISO 3166-2:IO,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 35 | Brunei Darussalam,BN,BRN,096,ISO 3166-2:BN,Asia,South-eastern Asia,"",142,035,"" 36 | Bulgaria,BG,BGR,100,ISO 3166-2:BG,Europe,Eastern Europe,"",150,151,"" 37 | Burkina Faso,BF,BFA,854,ISO 3166-2:BF,Africa,Sub-Saharan Africa,Western Africa,002,202,011 38 | Burundi,BI,BDI,108,ISO 3166-2:BI,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 39 | Cabo Verde,CV,CPV,132,ISO 3166-2:CV,Africa,Sub-Saharan Africa,Western Africa,002,202,011 40 | Cambodia,KH,KHM,116,ISO 3166-2:KH,Asia,South-eastern Asia,"",142,035,"" 41 | Cameroon,CM,CMR,120,ISO 3166-2:CM,Africa,Sub-Saharan Africa,Middle Africa,002,202,017 42 | Canada,CA,CAN,124,ISO 3166-2:CA,Americas,Northern America,"",019,021,"" 43 | Cayman Islands,KY,CYM,136,ISO 3166-2:KY,Americas,Latin America and the Caribbean,Caribbean,019,419,029 44 | Central African Republic,CF,CAF,140,ISO 3166-2:CF,Africa,Sub-Saharan Africa,Middle Africa,002,202,017 45 | Chad,TD,TCD,148,ISO 3166-2:TD,Africa,Sub-Saharan Africa,Middle Africa,002,202,017 46 | Chile,CL,CHL,152,ISO 3166-2:CL,Americas,Latin America and the Caribbean,South America,019,419,005 47 | China,CN,CHN,156,ISO 3166-2:CN,Asia,Eastern Asia,"",142,030,"" 48 | Christmas Island,CX,CXR,162,ISO 3166-2:CX,Oceania,Australia and New Zealand,"",009,053,"" 49 | Cocos (Keeling) Islands,CC,CCK,166,ISO 3166-2:CC,Oceania,Australia and New Zealand,"",009,053,"" 50 | Colombia,CO,COL,170,ISO 3166-2:CO,Americas,Latin America and the Caribbean,South America,019,419,005 51 | Comoros,KM,COM,174,ISO 3166-2:KM,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 52 | Congo,CG,COG,178,ISO 3166-2:CG,Africa,Sub-Saharan Africa,Middle Africa,002,202,017 53 | "Congo, Democratic Republic of the",CD,COD,180,ISO 3166-2:CD,Africa,Sub-Saharan Africa,Middle Africa,002,202,017 54 | Cook Islands,CK,COK,184,ISO 3166-2:CK,Oceania,Polynesia,"",009,061,"" 55 | Costa Rica,CR,CRI,188,ISO 3166-2:CR,Americas,Latin America and the Caribbean,Central America,019,419,013 56 | Côte d'Ivoire,CI,CIV,384,ISO 3166-2:CI,Africa,Sub-Saharan Africa,Western Africa,002,202,011 57 | Croatia,HR,HRV,191,ISO 3166-2:HR,Europe,Southern Europe,"",150,039,"" 58 | Cuba,CU,CUB,192,ISO 3166-2:CU,Americas,Latin America and the Caribbean,Caribbean,019,419,029 59 | Curaçao,CW,CUW,531,ISO 3166-2:CW,Americas,Latin America and the Caribbean,Caribbean,019,419,029 60 | Cyprus,CY,CYP,196,ISO 3166-2:CY,Asia,Western Asia,"",142,145,"" 61 | Czechia,CZ,CZE,203,ISO 3166-2:CZ,Europe,Eastern Europe,"",150,151,"" 62 | Denmark,DK,DNK,208,ISO 3166-2:DK,Europe,Northern Europe,"",150,154,"" 63 | Djibouti,DJ,DJI,262,ISO 3166-2:DJ,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 64 | Dominica,DM,DMA,212,ISO 3166-2:DM,Americas,Latin America and the Caribbean,Caribbean,019,419,029 65 | Dominican Republic,DO,DOM,214,ISO 3166-2:DO,Americas,Latin America and the Caribbean,Caribbean,019,419,029 66 | Ecuador,EC,ECU,218,ISO 3166-2:EC,Americas,Latin America and the Caribbean,South America,019,419,005 67 | Egypt,EG,EGY,818,ISO 3166-2:EG,Africa,Northern Africa,"",002,015,"" 68 | El Salvador,SV,SLV,222,ISO 3166-2:SV,Americas,Latin America and the Caribbean,Central America,019,419,013 69 | Equatorial Guinea,GQ,GNQ,226,ISO 3166-2:GQ,Africa,Sub-Saharan Africa,Middle Africa,002,202,017 70 | Eritrea,ER,ERI,232,ISO 3166-2:ER,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 71 | Estonia,EE,EST,233,ISO 3166-2:EE,Europe,Northern Europe,"",150,154,"" 72 | Eswatini,SZ,SWZ,748,ISO 3166-2:SZ,Africa,Sub-Saharan Africa,Southern Africa,002,202,018 73 | Ethiopia,ET,ETH,231,ISO 3166-2:ET,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 74 | Falkland Islands (Malvinas),FK,FLK,238,ISO 3166-2:FK,Americas,Latin America and the Caribbean,South America,019,419,005 75 | Faroe Islands,FO,FRO,234,ISO 3166-2:FO,Europe,Northern Europe,"",150,154,"" 76 | Fiji,FJ,FJI,242,ISO 3166-2:FJ,Oceania,Melanesia,"",009,054,"" 77 | Finland,FI,FIN,246,ISO 3166-2:FI,Europe,Northern Europe,"",150,154,"" 78 | France,FR,FRA,250,ISO 3166-2:FR,Europe,Western Europe,"",150,155,"" 79 | French Guiana,GF,GUF,254,ISO 3166-2:GF,Americas,Latin America and the Caribbean,South America,019,419,005 80 | French Polynesia,PF,PYF,258,ISO 3166-2:PF,Oceania,Polynesia,"",009,061,"" 81 | French Southern Territories,TF,ATF,260,ISO 3166-2:TF,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 82 | Gabon,GA,GAB,266,ISO 3166-2:GA,Africa,Sub-Saharan Africa,Middle Africa,002,202,017 83 | Gambia,GM,GMB,270,ISO 3166-2:GM,Africa,Sub-Saharan Africa,Western Africa,002,202,011 84 | Georgia,GE,GEO,268,ISO 3166-2:GE,Asia,Western Asia,"",142,145,"" 85 | Germany,DE,DEU,276,ISO 3166-2:DE,Europe,Western Europe,"",150,155,"" 86 | Ghana,GH,GHA,288,ISO 3166-2:GH,Africa,Sub-Saharan Africa,Western Africa,002,202,011 87 | Gibraltar,GI,GIB,292,ISO 3166-2:GI,Europe,Southern Europe,"",150,039,"" 88 | Greece,GR,GRC,300,ISO 3166-2:GR,Europe,Southern Europe,"",150,039,"" 89 | Greenland,GL,GRL,304,ISO 3166-2:GL,Americas,Northern America,"",019,021,"" 90 | Grenada,GD,GRD,308,ISO 3166-2:GD,Americas,Latin America and the Caribbean,Caribbean,019,419,029 91 | Guadeloupe,GP,GLP,312,ISO 3166-2:GP,Americas,Latin America and the Caribbean,Caribbean,019,419,029 92 | Guam,GU,GUM,316,ISO 3166-2:GU,Oceania,Micronesia,"",009,057,"" 93 | Guatemala,GT,GTM,320,ISO 3166-2:GT,Americas,Latin America and the Caribbean,Central America,019,419,013 94 | Guernsey,GG,GGY,831,ISO 3166-2:GG,Europe,Northern Europe,Channel Islands,150,154,830 95 | Guinea,GN,GIN,324,ISO 3166-2:GN,Africa,Sub-Saharan Africa,Western Africa,002,202,011 96 | Guinea-Bissau,GW,GNB,624,ISO 3166-2:GW,Africa,Sub-Saharan Africa,Western Africa,002,202,011 97 | Guyana,GY,GUY,328,ISO 3166-2:GY,Americas,Latin America and the Caribbean,South America,019,419,005 98 | Haiti,HT,HTI,332,ISO 3166-2:HT,Americas,Latin America and the Caribbean,Caribbean,019,419,029 99 | Heard Island and McDonald Islands,HM,HMD,334,ISO 3166-2:HM,Oceania,Australia and New Zealand,"",009,053,"" 100 | Holy See,VA,VAT,336,ISO 3166-2:VA,Europe,Southern Europe,"",150,039,"" 101 | Honduras,HN,HND,340,ISO 3166-2:HN,Americas,Latin America and the Caribbean,Central America,019,419,013 102 | Hong Kong,HK,HKG,344,ISO 3166-2:HK,Asia,Eastern Asia,"",142,030,"" 103 | Hungary,HU,HUN,348,ISO 3166-2:HU,Europe,Eastern Europe,"",150,151,"" 104 | Iceland,IS,ISL,352,ISO 3166-2:IS,Europe,Northern Europe,"",150,154,"" 105 | India,IN,IND,356,ISO 3166-2:IN,Asia,Southern Asia,"",142,034,"" 106 | Indonesia,ID,IDN,360,ISO 3166-2:ID,Asia,South-eastern Asia,"",142,035,"" 107 | Iran (Islamic Republic of),IR,IRN,364,ISO 3166-2:IR,Asia,Southern Asia,"",142,034,"" 108 | Iraq,IQ,IRQ,368,ISO 3166-2:IQ,Asia,Western Asia,"",142,145,"" 109 | Ireland,IE,IRL,372,ISO 3166-2:IE,Europe,Northern Europe,"",150,154,"" 110 | Isle of Man,IM,IMN,833,ISO 3166-2:IM,Europe,Northern Europe,"",150,154,"" 111 | Israel,IL,ISR,376,ISO 3166-2:IL,Asia,Western Asia,"",142,145,"" 112 | Italy,IT,ITA,380,ISO 3166-2:IT,Europe,Southern Europe,"",150,039,"" 113 | Jamaica,JM,JAM,388,ISO 3166-2:JM,Americas,Latin America and the Caribbean,Caribbean,019,419,029 114 | Japan,JP,JPN,392,ISO 3166-2:JP,Asia,Eastern Asia,"",142,030,"" 115 | Jersey,JE,JEY,832,ISO 3166-2:JE,Europe,Northern Europe,Channel Islands,150,154,830 116 | Jordan,JO,JOR,400,ISO 3166-2:JO,Asia,Western Asia,"",142,145,"" 117 | Kazakhstan,KZ,KAZ,398,ISO 3166-2:KZ,Asia,Central Asia,"",142,143,"" 118 | Kenya,KE,KEN,404,ISO 3166-2:KE,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 119 | Kiribati,KI,KIR,296,ISO 3166-2:KI,Oceania,Micronesia,"",009,057,"" 120 | Korea (Democratic People's Republic of),KP,PRK,408,ISO 3166-2:KP,Asia,Eastern Asia,"",142,030,"" 121 | "Korea, Republic of",KR,KOR,410,ISO 3166-2:KR,Asia,Eastern Asia,"",142,030,"" 122 | Kuwait,KW,KWT,414,ISO 3166-2:KW,Asia,Western Asia,"",142,145,"" 123 | Kyrgyzstan,KG,KGZ,417,ISO 3166-2:KG,Asia,Central Asia,"",142,143,"" 124 | Lao People's Democratic Republic,LA,LAO,418,ISO 3166-2:LA,Asia,South-eastern Asia,"",142,035,"" 125 | Latvia,LV,LVA,428,ISO 3166-2:LV,Europe,Northern Europe,"",150,154,"" 126 | Lebanon,LB,LBN,422,ISO 3166-2:LB,Asia,Western Asia,"",142,145,"" 127 | Lesotho,LS,LSO,426,ISO 3166-2:LS,Africa,Sub-Saharan Africa,Southern Africa,002,202,018 128 | Liberia,LR,LBR,430,ISO 3166-2:LR,Africa,Sub-Saharan Africa,Western Africa,002,202,011 129 | Libya,LY,LBY,434,ISO 3166-2:LY,Africa,Northern Africa,"",002,015,"" 130 | Liechtenstein,LI,LIE,438,ISO 3166-2:LI,Europe,Western Europe,"",150,155,"" 131 | Lithuania,LT,LTU,440,ISO 3166-2:LT,Europe,Northern Europe,"",150,154,"" 132 | Luxembourg,LU,LUX,442,ISO 3166-2:LU,Europe,Western Europe,"",150,155,"" 133 | Macao,MO,MAC,446,ISO 3166-2:MO,Asia,Eastern Asia,"",142,030,"" 134 | Madagascar,MG,MDG,450,ISO 3166-2:MG,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 135 | Malawi,MW,MWI,454,ISO 3166-2:MW,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 136 | Malaysia,MY,MYS,458,ISO 3166-2:MY,Asia,South-eastern Asia,"",142,035,"" 137 | Maldives,MV,MDV,462,ISO 3166-2:MV,Asia,Southern Asia,"",142,034,"" 138 | Mali,ML,MLI,466,ISO 3166-2:ML,Africa,Sub-Saharan Africa,Western Africa,002,202,011 139 | Malta,MT,MLT,470,ISO 3166-2:MT,Europe,Southern Europe,"",150,039,"" 140 | Marshall Islands,MH,MHL,584,ISO 3166-2:MH,Oceania,Micronesia,"",009,057,"" 141 | Martinique,MQ,MTQ,474,ISO 3166-2:MQ,Americas,Latin America and the Caribbean,Caribbean,019,419,029 142 | Mauritania,MR,MRT,478,ISO 3166-2:MR,Africa,Sub-Saharan Africa,Western Africa,002,202,011 143 | Mauritius,MU,MUS,480,ISO 3166-2:MU,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 144 | Mayotte,YT,MYT,175,ISO 3166-2:YT,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 145 | Mexico,MX,MEX,484,ISO 3166-2:MX,Americas,Latin America and the Caribbean,Central America,019,419,013 146 | Micronesia (Federated States of),FM,FSM,583,ISO 3166-2:FM,Oceania,Micronesia,"",009,057,"" 147 | "Moldova, Republic of",MD,MDA,498,ISO 3166-2:MD,Europe,Eastern Europe,"",150,151,"" 148 | Monaco,MC,MCO,492,ISO 3166-2:MC,Europe,Western Europe,"",150,155,"" 149 | Mongolia,MN,MNG,496,ISO 3166-2:MN,Asia,Eastern Asia,"",142,030,"" 150 | Montenegro,ME,MNE,499,ISO 3166-2:ME,Europe,Southern Europe,"",150,039,"" 151 | Montserrat,MS,MSR,500,ISO 3166-2:MS,Americas,Latin America and the Caribbean,Caribbean,019,419,029 152 | Morocco,MA,MAR,504,ISO 3166-2:MA,Africa,Northern Africa,"",002,015,"" 153 | Mozambique,MZ,MOZ,508,ISO 3166-2:MZ,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 154 | Myanmar,MM,MMR,104,ISO 3166-2:MM,Asia,South-eastern Asia,"",142,035,"" 155 | Namibia,NA,NAM,516,ISO 3166-2:NA,Africa,Sub-Saharan Africa,Southern Africa,002,202,018 156 | Nauru,NR,NRU,520,ISO 3166-2:NR,Oceania,Micronesia,"",009,057,"" 157 | Nepal,NP,NPL,524,ISO 3166-2:NP,Asia,Southern Asia,"",142,034,"" 158 | Netherlands,NL,NLD,528,ISO 3166-2:NL,Europe,Western Europe,"",150,155,"" 159 | New Caledonia,NC,NCL,540,ISO 3166-2:NC,Oceania,Melanesia,"",009,054,"" 160 | New Zealand,NZ,NZL,554,ISO 3166-2:NZ,Oceania,Australia and New Zealand,"",009,053,"" 161 | Nicaragua,NI,NIC,558,ISO 3166-2:NI,Americas,Latin America and the Caribbean,Central America,019,419,013 162 | Niger,NE,NER,562,ISO 3166-2:NE,Africa,Sub-Saharan Africa,Western Africa,002,202,011 163 | Nigeria,NG,NGA,566,ISO 3166-2:NG,Africa,Sub-Saharan Africa,Western Africa,002,202,011 164 | Niue,NU,NIU,570,ISO 3166-2:NU,Oceania,Polynesia,"",009,061,"" 165 | Norfolk Island,NF,NFK,574,ISO 3166-2:NF,Oceania,Australia and New Zealand,"",009,053,"" 166 | North Macedonia,MK,MKD,807,ISO 3166-2:MK,Europe,Southern Europe,"",150,039,"" 167 | Northern Mariana Islands,MP,MNP,580,ISO 3166-2:MP,Oceania,Micronesia,"",009,057,"" 168 | Norway,NO,NOR,578,ISO 3166-2:NO,Europe,Northern Europe,"",150,154,"" 169 | Oman,OM,OMN,512,ISO 3166-2:OM,Asia,Western Asia,"",142,145,"" 170 | Pakistan,PK,PAK,586,ISO 3166-2:PK,Asia,Southern Asia,"",142,034,"" 171 | Palau,PW,PLW,585,ISO 3166-2:PW,Oceania,Micronesia,"",009,057,"" 172 | "Palestine, State of",PS,PSE,275,ISO 3166-2:PS,Asia,Western Asia,"",142,145,"" 173 | Panama,PA,PAN,591,ISO 3166-2:PA,Americas,Latin America and the Caribbean,Central America,019,419,013 174 | Papua New Guinea,PG,PNG,598,ISO 3166-2:PG,Oceania,Melanesia,"",009,054,"" 175 | Paraguay,PY,PRY,600,ISO 3166-2:PY,Americas,Latin America and the Caribbean,South America,019,419,005 176 | Peru,PE,PER,604,ISO 3166-2:PE,Americas,Latin America and the Caribbean,South America,019,419,005 177 | Philippines,PH,PHL,608,ISO 3166-2:PH,Asia,South-eastern Asia,"",142,035,"" 178 | Pitcairn,PN,PCN,612,ISO 3166-2:PN,Oceania,Polynesia,"",009,061,"" 179 | Poland,PL,POL,616,ISO 3166-2:PL,Europe,Eastern Europe,"",150,151,"" 180 | Portugal,PT,PRT,620,ISO 3166-2:PT,Europe,Southern Europe,"",150,039,"" 181 | Puerto Rico,PR,PRI,630,ISO 3166-2:PR,Americas,Latin America and the Caribbean,Caribbean,019,419,029 182 | Qatar,QA,QAT,634,ISO 3166-2:QA,Asia,Western Asia,"",142,145,"" 183 | Réunion,RE,REU,638,ISO 3166-2:RE,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 184 | Romania,RO,ROU,642,ISO 3166-2:RO,Europe,Eastern Europe,"",150,151,"" 185 | Russian Federation,RU,RUS,643,ISO 3166-2:RU,Europe,Eastern Europe,"",150,151,"" 186 | Rwanda,RW,RWA,646,ISO 3166-2:RW,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 187 | Saint Barthélemy,BL,BLM,652,ISO 3166-2:BL,Americas,Latin America and the Caribbean,Caribbean,019,419,029 188 | "Saint Helena, Ascension and Tristan da Cunha",SH,SHN,654,ISO 3166-2:SH,Africa,Sub-Saharan Africa,Western Africa,002,202,011 189 | Saint Kitts and Nevis,KN,KNA,659,ISO 3166-2:KN,Americas,Latin America and the Caribbean,Caribbean,019,419,029 190 | Saint Lucia,LC,LCA,662,ISO 3166-2:LC,Americas,Latin America and the Caribbean,Caribbean,019,419,029 191 | Saint Martin (French part),MF,MAF,663,ISO 3166-2:MF,Americas,Latin America and the Caribbean,Caribbean,019,419,029 192 | Saint Pierre and Miquelon,PM,SPM,666,ISO 3166-2:PM,Americas,Northern America,"",019,021,"" 193 | Saint Vincent and the Grenadines,VC,VCT,670,ISO 3166-2:VC,Americas,Latin America and the Caribbean,Caribbean,019,419,029 194 | Samoa,WS,WSM,882,ISO 3166-2:WS,Oceania,Polynesia,"",009,061,"" 195 | San Marino,SM,SMR,674,ISO 3166-2:SM,Europe,Southern Europe,"",150,039,"" 196 | Sao Tome and Principe,ST,STP,678,ISO 3166-2:ST,Africa,Sub-Saharan Africa,Middle Africa,002,202,017 197 | Saudi Arabia,SA,SAU,682,ISO 3166-2:SA,Asia,Western Asia,"",142,145,"" 198 | Senegal,SN,SEN,686,ISO 3166-2:SN,Africa,Sub-Saharan Africa,Western Africa,002,202,011 199 | Serbia,RS,SRB,688,ISO 3166-2:RS,Europe,Southern Europe,"",150,039,"" 200 | Seychelles,SC,SYC,690,ISO 3166-2:SC,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 201 | Sierra Leone,SL,SLE,694,ISO 3166-2:SL,Africa,Sub-Saharan Africa,Western Africa,002,202,011 202 | Singapore,SG,SGP,702,ISO 3166-2:SG,Asia,South-eastern Asia,"",142,035,"" 203 | Sint Maarten (Dutch part),SX,SXM,534,ISO 3166-2:SX,Americas,Latin America and the Caribbean,Caribbean,019,419,029 204 | Slovakia,SK,SVK,703,ISO 3166-2:SK,Europe,Eastern Europe,"",150,151,"" 205 | Slovenia,SI,SVN,705,ISO 3166-2:SI,Europe,Southern Europe,"",150,039,"" 206 | Solomon Islands,SB,SLB,090,ISO 3166-2:SB,Oceania,Melanesia,"",009,054,"" 207 | Somalia,SO,SOM,706,ISO 3166-2:SO,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 208 | South Africa,ZA,ZAF,710,ISO 3166-2:ZA,Africa,Sub-Saharan Africa,Southern Africa,002,202,018 209 | South Georgia and the South Sandwich Islands,GS,SGS,239,ISO 3166-2:GS,Americas,Latin America and the Caribbean,South America,019,419,005 210 | South Sudan,SS,SSD,728,ISO 3166-2:SS,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 211 | Spain,ES,ESP,724,ISO 3166-2:ES,Europe,Southern Europe,"",150,039,"" 212 | Sri Lanka,LK,LKA,144,ISO 3166-2:LK,Asia,Southern Asia,"",142,034,"" 213 | Sudan,SD,SDN,729,ISO 3166-2:SD,Africa,Northern Africa,"",002,015,"" 214 | Suriname,SR,SUR,740,ISO 3166-2:SR,Americas,Latin America and the Caribbean,South America,019,419,005 215 | Svalbard and Jan Mayen,SJ,SJM,744,ISO 3166-2:SJ,Europe,Northern Europe,"",150,154,"" 216 | Sweden,SE,SWE,752,ISO 3166-2:SE,Europe,Northern Europe,"",150,154,"" 217 | Switzerland,CH,CHE,756,ISO 3166-2:CH,Europe,Western Europe,"",150,155,"" 218 | Syrian Arab Republic,SY,SYR,760,ISO 3166-2:SY,Asia,Western Asia,"",142,145,"" 219 | "Taiwan, Province of China",TW,TWN,158,ISO 3166-2:TW,Asia,Eastern Asia,"",142,030,"" 220 | Tajikistan,TJ,TJK,762,ISO 3166-2:TJ,Asia,Central Asia,"",142,143,"" 221 | "Tanzania, United Republic of",TZ,TZA,834,ISO 3166-2:TZ,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 222 | Thailand,TH,THA,764,ISO 3166-2:TH,Asia,South-eastern Asia,"",142,035,"" 223 | Timor-Leste,TL,TLS,626,ISO 3166-2:TL,Asia,South-eastern Asia,"",142,035,"" 224 | Togo,TG,TGO,768,ISO 3166-2:TG,Africa,Sub-Saharan Africa,Western Africa,002,202,011 225 | Tokelau,TK,TKL,772,ISO 3166-2:TK,Oceania,Polynesia,"",009,061,"" 226 | Tonga,TO,TON,776,ISO 3166-2:TO,Oceania,Polynesia,"",009,061,"" 227 | Trinidad and Tobago,TT,TTO,780,ISO 3166-2:TT,Americas,Latin America and the Caribbean,Caribbean,019,419,029 228 | Tunisia,TN,TUN,788,ISO 3166-2:TN,Africa,Northern Africa,"",002,015,"" 229 | Turkey,TR,TUR,792,ISO 3166-2:TR,Asia,Western Asia,"",142,145,"" 230 | Turkmenistan,TM,TKM,795,ISO 3166-2:TM,Asia,Central Asia,"",142,143,"" 231 | Turks and Caicos Islands,TC,TCA,796,ISO 3166-2:TC,Americas,Latin America and the Caribbean,Caribbean,019,419,029 232 | Tuvalu,TV,TUV,798,ISO 3166-2:TV,Oceania,Polynesia,"",009,061,"" 233 | Uganda,UG,UGA,800,ISO 3166-2:UG,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 234 | Ukraine,UA,UKR,804,ISO 3166-2:UA,Europe,Eastern Europe,"",150,151,"" 235 | United Arab Emirates,AE,ARE,784,ISO 3166-2:AE,Asia,Western Asia,"",142,145,"" 236 | United Kingdom of Great Britain and Northern Ireland,GB,GBR,826,ISO 3166-2:GB,Europe,Northern Europe,"",150,154,"" 237 | United States of America,US,USA,840,ISO 3166-2:US,Americas,Northern America,"",019,021,"" 238 | United States Minor Outlying Islands,UM,UMI,581,ISO 3166-2:UM,Oceania,Micronesia,"",009,057,"" 239 | Uruguay,UY,URY,858,ISO 3166-2:UY,Americas,Latin America and the Caribbean,South America,019,419,005 240 | Uzbekistan,UZ,UZB,860,ISO 3166-2:UZ,Asia,Central Asia,"",142,143,"" 241 | Vanuatu,VU,VUT,548,ISO 3166-2:VU,Oceania,Melanesia,"",009,054,"" 242 | Venezuela (Bolivarian Republic of),VE,VEN,862,ISO 3166-2:VE,Americas,Latin America and the Caribbean,South America,019,419,005 243 | Viet Nam,VN,VNM,704,ISO 3166-2:VN,Asia,South-eastern Asia,"",142,035,"" 244 | Virgin Islands (British),VG,VGB,092,ISO 3166-2:VG,Americas,Latin America and the Caribbean,Caribbean,019,419,029 245 | Virgin Islands (U.S.),VI,VIR,850,ISO 3166-2:VI,Americas,Latin America and the Caribbean,Caribbean,019,419,029 246 | Wallis and Futuna,WF,WLF,876,ISO 3166-2:WF,Oceania,Polynesia,"",009,061,"" 247 | Western Sahara,EH,ESH,732,ISO 3166-2:EH,Africa,Northern Africa,"",002,015,"" 248 | Yemen,YE,YEM,887,ISO 3166-2:YE,Asia,Western Asia,"",142,145,"" 249 | Zambia,ZM,ZMB,894,ISO 3166-2:ZM,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 250 | Zimbabwe,ZW,ZWE,716,ISO 3166-2:ZW,Africa,Sub-Saharan Africa,Eastern Africa,002,202,014 251 | -------------------------------------------------------------------------------- /demos/data/world_happiness_report.csv: -------------------------------------------------------------------------------- 1 | Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption 2 | 1,Finland,7.769,1.340,1.587,0.986,0.596,0.153,0.393 3 | 2,Denmark,7.600,1.383,1.573,0.996,0.592,0.252,0.410 4 | 3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341 5 | 4,Iceland,7.494,1.380,1.624,1.026,0.591,0.354,0.118 6 | 5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298 7 | 6,Switzerland,7.480,1.452,1.526,1.052,0.572,0.263,0.343 8 | 7,Sweden,7.343,1.387,1.487,1.009,0.574,0.267,0.373 9 | 8,New Zealand,7.307,1.303,1.557,1.026,0.585,0.330,0.380 10 | 9,Canada,7.278,1.365,1.505,1.039,0.584,0.285,0.308 11 | 10,Austria,7.246,1.376,1.475,1.016,0.532,0.244,0.226 12 | 11,Australia,7.228,1.372,1.548,1.036,0.557,0.332,0.290 13 | 12,Costa Rica,7.167,1.034,1.441,0.963,0.558,0.144,0.093 14 | 13,Israel,7.139,1.276,1.455,1.029,0.371,0.261,0.082 15 | 14,Luxembourg,7.090,1.609,1.479,1.012,0.526,0.194,0.316 16 | 15,United Kingdom,7.054,1.333,1.538,0.996,0.450,0.348,0.278 17 | 16,Ireland,7.021,1.499,1.553,0.999,0.516,0.298,0.310 18 | 17,Germany,6.985,1.373,1.454,0.987,0.495,0.261,0.265 19 | 18,Belgium,6.923,1.356,1.504,0.986,0.473,0.160,0.210 20 | 19,United States,6.892,1.433,1.457,0.874,0.454,0.280,0.128 21 | 20,Czech Republic,6.852,1.269,1.487,0.920,0.457,0.046,0.036 22 | 21,United Arab Emirates,6.825,1.503,1.310,0.825,0.598,0.262,0.182 23 | 22,Malta,6.726,1.300,1.520,0.999,0.564,0.375,0.151 24 | 23,Mexico,6.595,1.070,1.323,0.861,0.433,0.074,0.073 25 | 24,France,6.592,1.324,1.472,1.045,0.436,0.111,0.183 26 | 25,Taiwan,6.446,1.368,1.430,0.914,0.351,0.242,0.097 27 | 26,Chile,6.444,1.159,1.369,0.920,0.357,0.187,0.056 28 | 27,Guatemala,6.436,0.800,1.269,0.746,0.535,0.175,0.078 29 | 28,Saudi Arabia,6.375,1.403,1.357,0.795,0.439,0.080,0.132 30 | 29,Qatar,6.374,1.684,1.313,0.871,0.555,0.220,0.167 31 | 30,Spain,6.354,1.286,1.484,1.062,0.362,0.153,0.079 32 | 31,Panama,6.321,1.149,1.442,0.910,0.516,0.109,0.054 33 | 32,Brazil,6.300,1.004,1.439,0.802,0.390,0.099,0.086 34 | 33,Uruguay,6.293,1.124,1.465,0.891,0.523,0.127,0.150 35 | 34,Singapore,6.262,1.572,1.463,1.141,0.556,0.271,0.453 36 | 35,El Salvador,6.253,0.794,1.242,0.789,0.430,0.093,0.074 37 | 36,Italy,6.223,1.294,1.488,1.039,0.231,0.158,0.030 38 | 37,Bahrain,6.199,1.362,1.368,0.871,0.536,0.255,0.110 39 | 38,Slovakia,6.198,1.246,1.504,0.881,0.334,0.121,0.014 40 | 39,Trinidad & Tobago,6.192,1.231,1.477,0.713,0.489,0.185,0.016 41 | 40,Poland,6.182,1.206,1.438,0.884,0.483,0.117,0.050 42 | 41,Uzbekistan,6.174,0.745,1.529,0.756,0.631,0.322,0.240 43 | 42,Lithuania,6.149,1.238,1.515,0.818,0.291,0.043,0.042 44 | 43,Colombia,6.125,0.985,1.410,0.841,0.470,0.099,0.034 45 | 44,Slovenia,6.118,1.258,1.523,0.953,0.564,0.144,0.057 46 | 45,Nicaragua,6.105,0.694,1.325,0.835,0.435,0.200,0.127 47 | 46,Kosovo,6.100,0.882,1.232,0.758,0.489,0.262,0.006 48 | 47,Argentina,6.086,1.092,1.432,0.881,0.471,0.066,0.050 49 | 48,Romania,6.070,1.162,1.232,0.825,0.462,0.083,0.005 50 | 49,Cyprus,6.046,1.263,1.223,1.042,0.406,0.190,0.041 51 | 50,Ecuador,6.028,0.912,1.312,0.868,0.498,0.126,0.087 52 | 51,Kuwait,6.021,1.500,1.319,0.808,0.493,0.142,0.097 53 | 52,Thailand,6.008,1.050,1.409,0.828,0.557,0.359,0.028 54 | 53,Latvia,5.940,1.187,1.465,0.812,0.264,0.075,0.064 55 | 54,South Korea,5.895,1.301,1.219,1.036,0.159,0.175,0.056 56 | 55,Estonia,5.893,1.237,1.528,0.874,0.495,0.103,0.161 57 | 56,Jamaica,5.890,0.831,1.478,0.831,0.490,0.107,0.028 58 | 57,Mauritius,5.888,1.120,1.402,0.798,0.498,0.215,0.060 59 | 58,Japan,5.886,1.327,1.419,1.088,0.445,0.069,0.140 60 | 59,Honduras,5.860,0.642,1.236,0.828,0.507,0.246,0.078 61 | 60,Kazakhstan,5.809,1.173,1.508,0.729,0.410,0.146,0.096 62 | 61,Bolivia,5.779,0.776,1.209,0.706,0.511,0.137,0.064 63 | 62,Hungary,5.758,1.201,1.410,0.828,0.199,0.081,0.020 64 | 63,Paraguay,5.743,0.855,1.475,0.777,0.514,0.184,0.080 65 | 64,Northern Cyprus,5.718,1.263,1.252,1.042,0.417,0.191,0.162 66 | 65,Peru,5.697,0.960,1.274,0.854,0.455,0.083,0.027 67 | 66,Portugal,5.693,1.221,1.431,0.999,0.508,0.047,0.025 68 | 67,Pakistan,5.653,0.677,0.886,0.535,0.313,0.220,0.098 69 | 68,Russia,5.648,1.183,1.452,0.726,0.334,0.082,0.031 70 | 69,Philippines,5.631,0.807,1.293,0.657,0.558,0.117,0.107 71 | 70,Serbia,5.603,1.004,1.383,0.854,0.282,0.137,0.039 72 | 71,Moldova,5.529,0.685,1.328,0.739,0.245,0.181,0.000 73 | 72,Libya,5.525,1.044,1.303,0.673,0.416,0.133,0.152 74 | 73,Montenegro,5.523,1.051,1.361,0.871,0.197,0.142,0.080 75 | 74,Tajikistan,5.467,0.493,1.098,0.718,0.389,0.230,0.144 76 | 75,Croatia,5.432,1.155,1.266,0.914,0.296,0.119,0.022 77 | 76,Hong Kong,5.430,1.438,1.277,1.122,0.440,0.258,0.287 78 | 77,Dominican Republic,5.425,1.015,1.401,0.779,0.497,0.113,0.101 79 | 78,Bosnia and Herzegovina,5.386,0.945,1.212,0.845,0.212,0.263,0.006 80 | 79,Turkey,5.373,1.183,1.360,0.808,0.195,0.083,0.106 81 | 80,Malaysia,5.339,1.221,1.171,0.828,0.508,0.260,0.024 82 | 81,Belarus,5.323,1.067,1.465,0.789,0.235,0.094,0.142 83 | 82,Greece,5.287,1.181,1.156,0.999,0.067,0.000,0.034 84 | 83,Mongolia,5.285,0.948,1.531,0.667,0.317,0.235,0.038 85 | 84,North Macedonia,5.274,0.983,1.294,0.838,0.345,0.185,0.034 86 | 85,Nigeria,5.265,0.696,1.111,0.245,0.426,0.215,0.041 87 | 86,Kyrgyzstan,5.261,0.551,1.438,0.723,0.508,0.300,0.023 88 | 87,Turkmenistan,5.247,1.052,1.538,0.657,0.394,0.244,0.028 89 | 88,Algeria,5.211,1.002,1.160,0.785,0.086,0.073,0.114 90 | 89,Morocco,5.208,0.801,0.782,0.782,0.418,0.036,0.076 91 | 90,Azerbaijan,5.208,1.043,1.147,0.769,0.351,0.035,0.182 92 | 91,Lebanon,5.197,0.987,1.224,0.815,0.216,0.166,0.027 93 | 92,Indonesia,5.192,0.931,1.203,0.660,0.491,0.498,0.028 94 | 93,China,5.191,1.029,1.125,0.893,0.521,0.058,0.100 95 | 94,Vietnam,5.175,0.741,1.346,0.851,0.543,0.147,0.073 96 | 95,Bhutan,5.082,0.813,1.321,0.604,0.457,0.370,0.167 97 | 96,Cameroon,5.044,0.549,0.910,0.331,0.381,0.187,0.037 98 | 97,Bulgaria,5.011,1.092,1.513,0.815,0.311,0.081,0.004 99 | 98,Ghana,4.996,0.611,0.868,0.486,0.381,0.245,0.040 100 | 99,Ivory Coast,4.944,0.569,0.808,0.232,0.352,0.154,0.090 101 | 100,Nepal,4.913,0.446,1.226,0.677,0.439,0.285,0.089 102 | 101,Jordan,4.906,0.837,1.225,0.815,0.383,0.110,0.130 103 | 102,Benin,4.883,0.393,0.437,0.397,0.349,0.175,0.082 104 | 103,Congo (Brazzaville),4.812,0.673,0.799,0.508,0.372,0.105,0.093 105 | 104,Gabon,4.799,1.057,1.183,0.571,0.295,0.043,0.055 106 | 105,Laos,4.796,0.764,1.030,0.551,0.547,0.266,0.164 107 | 106,South Africa,4.722,0.960,1.351,0.469,0.389,0.130,0.055 108 | 107,Albania,4.719,0.947,0.848,0.874,0.383,0.178,0.027 109 | 108,Venezuela,4.707,0.960,1.427,0.805,0.154,0.064,0.047 110 | 109,Cambodia,4.700,0.574,1.122,0.637,0.609,0.232,0.062 111 | 110,Palestinian Territories,4.696,0.657,1.247,0.672,0.225,0.103,0.066 112 | 111,Senegal,4.681,0.450,1.134,0.571,0.292,0.153,0.072 113 | 112,Somalia,4.668,0.000,0.698,0.268,0.559,0.243,0.270 114 | 113,Namibia,4.639,0.879,1.313,0.477,0.401,0.070,0.056 115 | 114,Niger,4.628,0.138,0.774,0.366,0.318,0.188,0.102 116 | 115,Burkina Faso,4.587,0.331,1.056,0.380,0.255,0.177,0.113 117 | 116,Armenia,4.559,0.850,1.055,0.815,0.283,0.095,0.064 118 | 117,Iran,4.548,1.100,0.842,0.785,0.305,0.270,0.125 119 | 118,Guinea,4.534,0.380,0.829,0.375,0.332,0.207,0.086 120 | 119,Georgia,4.519,0.886,0.666,0.752,0.346,0.043,0.164 121 | 120,Gambia,4.516,0.308,0.939,0.428,0.382,0.269,0.167 122 | 121,Kenya,4.509,0.512,0.983,0.581,0.431,0.372,0.053 123 | 122,Mauritania,4.490,0.570,1.167,0.489,0.066,0.106,0.088 124 | 123,Mozambique,4.466,0.204,0.986,0.390,0.494,0.197,0.138 125 | 124,Tunisia,4.461,0.921,1.000,0.815,0.167,0.059,0.055 126 | 125,Bangladesh,4.456,0.562,0.928,0.723,0.527,0.166,0.143 127 | 126,Iraq,4.437,1.043,0.980,0.574,0.241,0.148,0.089 128 | 127,Congo (Kinshasa),4.418,0.094,1.125,0.357,0.269,0.212,0.053 129 | 128,Mali,4.390,0.385,1.105,0.308,0.327,0.153,0.052 130 | 129,Sierra Leone,4.374,0.268,0.841,0.242,0.309,0.252,0.045 131 | 130,Sri Lanka,4.366,0.949,1.265,0.831,0.470,0.244,0.047 132 | 131,Myanmar,4.360,0.710,1.181,0.555,0.525,0.566,0.172 133 | 132,Chad,4.350,0.350,0.766,0.192,0.174,0.198,0.078 134 | 133,Ukraine,4.332,0.820,1.390,0.739,0.178,0.187,0.010 135 | 134,Ethiopia,4.286,0.336,1.033,0.532,0.344,0.209,0.100 136 | 135,Swaziland,4.212,0.811,1.149,0.000,0.313,0.074,0.135 137 | 136,Uganda,4.189,0.332,1.069,0.443,0.356,0.252,0.060 138 | 137,Egypt,4.166,0.913,1.039,0.644,0.241,0.076,0.067 139 | 138,Zambia,4.107,0.578,1.058,0.426,0.431,0.247,0.087 140 | 139,Togo,4.085,0.275,0.572,0.410,0.293,0.177,0.085 141 | 140,India,4.015,0.755,0.765,0.588,0.498,0.200,0.085 142 | 141,Liberia,3.975,0.073,0.922,0.443,0.370,0.233,0.033 143 | 142,Comoros,3.973,0.274,0.757,0.505,0.142,0.275,0.078 144 | 143,Madagascar,3.933,0.274,0.916,0.555,0.148,0.169,0.041 145 | 144,Lesotho,3.802,0.489,1.169,0.168,0.359,0.107,0.093 146 | 145,Burundi,3.775,0.046,0.447,0.380,0.220,0.176,0.180 147 | 146,Zimbabwe,3.663,0.366,1.114,0.433,0.361,0.151,0.089 148 | 147,Haiti,3.597,0.323,0.688,0.449,0.026,0.419,0.110 149 | 148,Botswana,3.488,1.041,1.145,0.538,0.455,0.025,0.100 150 | 149,Syria,3.462,0.619,0.378,0.440,0.013,0.331,0.141 151 | 150,Malawi,3.410,0.191,0.560,0.495,0.443,0.218,0.089 152 | 151,Yemen,3.380,0.287,1.163,0.463,0.143,0.108,0.077 153 | 152,Rwanda,3.334,0.359,0.711,0.614,0.555,0.217,0.411 154 | 153,Tanzania,3.231,0.476,0.885,0.499,0.417,0.276,0.147 155 | 154,Afghanistan,3.203,0.350,0.517,0.361,0.000,0.158,0.025 156 | 155,Central African Republic,3.083,0.026,0.000,0.105,0.225,0.235,0.035 157 | 156,South Sudan,2.853,0.306,0.575,0.295,0.010,0.202,0.091 158 | -------------------------------------------------------------------------------- /demos/datadm_happiness_qa_and_plots.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/approximatelabs/datadm/c6e1484398ecd29bf669cf9da9bc01cf9770a2bf/demos/datadm_happiness_qa_and_plots.mp4 -------------------------------------------------------------------------------- /demos/demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "!pip install playwright" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "from playwright.async_api import async_playwright, TimeoutError\n", 19 | "import asyncio\n", 20 | "import random\n", 21 | "import shutil\n", 22 | "\n", 23 | "async def xy_for_element(page, locator_arg):\n", 24 | " locator = page.locator(locator_arg)\n", 25 | " # wait for it?\n", 26 | " try:\n", 27 | " await locator.wait_for(state='visible', timeout=1000)\n", 28 | " except TimeoutError:\n", 29 | " raise TimeoutError(f\"Could not find element {locator_arg}\")\n", 30 | " bounding_box = await locator.bounding_box()\n", 31 | " # sample a 2D normal distribution point centered at the center of bounding box\n", 32 | " # with a standard deviation of 1/2 the width in x, and 1/2 height of the bounding box\n", 33 | " # and resample if the point is outside the bounding box\n", 34 | "\n", 35 | " res = -1, -1\n", 36 | " while not (0 <= res[0] <= bounding_box['width'] and 0 <= res[1] <= bounding_box['height']):\n", 37 | " res = (\n", 38 | " random.gauss(bounding_box['width'] / 2, bounding_box['width'] / 4),\n", 39 | " random.gauss(bounding_box['height'] / 2, bounding_box['height'] / 4)\n", 40 | " )\n", 41 | " return bounding_box['x'] + res[0], bounding_box['y'] + res[1]\n", 42 | "\n", 43 | "async def moveto_and_click(page, selector, steps=20, sleep=0.5):\n", 44 | " target = await xy_for_element(page, selector)\n", 45 | " await page.mouse.move(*target, steps=steps)\n", 46 | " # quickly check for target if it moved\n", 47 | " bb = await page.locator(selector).bounding_box()\n", 48 | " if not (bb['x'] <= target[0] <= bb['x'] + bb['width'] and bb['y'] <= target[1] <= bb['y'] + bb['height']):\n", 49 | " target = await xy_for_element(page, selector)\n", 50 | " await page.mouse.move(*target, steps=2)\n", 51 | " await page.mouse.click(*target)\n", 52 | " await asyncio.sleep(sleep)\n", 53 | "\n", 54 | "async def prompt(page, prompt):\n", 55 | " await moveto_and_click(page, \"#chat_message_box\")\n", 56 | " await page.keyboard.type(f\"{prompt}\\n\", delay=50)\n", 57 | " while True:\n", 58 | " await asyncio.sleep(1)\n", 59 | " if not await page.locator('#submit_button').is_hidden():\n", 60 | " break\n", 61 | "\n", 62 | "async def upload_file(page, file, sleep=0.5):\n", 63 | " async with page.expect_file_chooser() as fc_info:\n", 64 | " await moveto_and_click(page, \"#upload_button\")\n", 65 | "\n", 66 | " file_chooser = await fc_info.value\n", 67 | " await file_chooser.set_files(file)\n", 68 | " await asyncio.sleep(sleep)\n", 69 | "\n", 70 | "async def run_demo(procedure, save_video_as=None):\n", 71 | " async with async_playwright() as p:\n", 72 | " browser = await p.chromium.launch(\n", 73 | " headless=False,\n", 74 | " slow_mo=50\n", 75 | " )\n", 76 | " context = await browser.new_context(\n", 77 | " color_scheme=\"dark\",\n", 78 | " geolocation={\"longitude\": 12.492507, \"latitude\": 41.889938},\n", 79 | " record_video_dir=\"videos/\",\n", 80 | " record_video_size={\"width\": 1920, \"height\": 1080},\n", 81 | " viewport={\"width\": 1280, \"height\": 720},\n", 82 | " )\n", 83 | " page = await context.new_page()\n", 84 | " await page.add_init_script(\n", 85 | " path=\"mouse_helper.js\"\n", 86 | " )\n", 87 | " await procedure(page)\n", 88 | " video_path = await page.video.path()\n", 89 | " await context.close()\n", 90 | " await browser.close()\n", 91 | " if save_video_as is not None:\n", 92 | " shutil.move(video_path, save_video_as)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "async def demo1(page):\n", 102 | " await page.goto(\"localhost:7860\")\n", 103 | "\n", 104 | " try:\n", 105 | " # a way to wait for page to be ready, is waiting for the load_model button to appear\n", 106 | " await page.wait_for_selector(\"#load_model_button\", timeout=5000)\n", 107 | " except TimeoutError:\n", 108 | " pass\n", 109 | "\n", 110 | " await moveto_and_click(page, \"#model_selection_dropdown\")\n", 111 | " await moveto_and_click(page, \"text=gpt-4\")\n", 112 | " try:\n", 113 | " await moveto_and_click(page, \"#load_model_button\")\n", 114 | " except TimeoutError:\n", 115 | " pass\n", 116 | "\n", 117 | " await upload_file(page, \"data/world_happiness_report.csv\")\n", 118 | "\n", 119 | " await prompt(page, \"Who is happier, US or Canada?\")\n", 120 | " await prompt(page, \"Can you plot a scatter plot of happiness vs. gdp, and change the size of the dots based on life expectancy?\")\n", 121 | " await asyncio.sleep(3) # let the graphic sink in\n", 122 | "\n", 123 | " await upload_file(page, \"data/country-regions.csv\")\n", 124 | " \n", 125 | " await prompt(page, \"Can we color each dot in the previous plot based on the region?\")\n", 126 | " await asyncio.sleep(3) # let the graphic sink in\n", 127 | "\n", 128 | " await prompt(page, \"Can we add a line of best fit and show the score for correlation?\")\n", 129 | " await asyncio.sleep(6)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "await run_demo(demo1, save_video_as=\"demo1.webm\")" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "# # Use FFMPEG to clean up the video\n", 148 | "# ffmpeg -i demos/demo1.webm demo1.mp4\n", 149 | "# ffmpeg -i demo1.mp4 -ss 0.5 -c:v libx264 -c:a copy -crf 23 -preset fast demo_cleaned.mp4\n", 150 | "# ffmpeg -sseof -1 -i demo_cleaned.mp4 -update 1 -q:v 1 last_frame.jpg\n", 151 | "# ffmpeg -loop 1 -i last_frame.jpg -c:v libx264 -t 1 -pix_fmt yuv420p -vf \"scale=trunc(iw/2)*2:trunc(ih/2)*2\" 1_frame.mp4\n", 152 | "# echo -e \"file '1_frame.mp4'\\nfile 'demo_cleaned.mp4'\" > concat_list.txt\n", 153 | "# ffmpeg -f concat -safe 0 -i concat_list.txt -c copy final_demo.mp4\n", 154 | "# rm last_frame.jpg 1_frame.mp4 concat_list.txt\n", 155 | "# mv final_demo.mp4 datadm_happiness_qa_and_plots.mp4" 156 | ] 157 | } 158 | ], 159 | "metadata": { 160 | "kernelspec": { 161 | "display_name": "datadm", 162 | "language": "python", 163 | "name": "python3" 164 | }, 165 | "language_info": { 166 | "codemirror_mode": { 167 | "name": "ipython", 168 | "version": 3 169 | }, 170 | "file_extension": ".py", 171 | "mimetype": "text/x-python", 172 | "name": "python", 173 | "nbconvert_exporter": "python", 174 | "pygments_lexer": "ipython3", 175 | "version": "3.10.10" 176 | }, 177 | "orig_nbformat": 4 178 | }, 179 | "nbformat": 4, 180 | "nbformat_minor": 2 181 | } 182 | -------------------------------------------------------------------------------- /demos/mouse_helper.js: -------------------------------------------------------------------------------- 1 | const leftptr = ` 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | `; 27 | 28 | window.addEventListener( 29 | "DOMContentLoaded", 30 | () => { 31 | const box = document.createElement("p-mouse-pointer"); 32 | box.innerHTML = leftptr; 33 | const styleElement = document.createElement("style"); 34 | styleElement.innerHTML = ` 35 | p-mouse-pointer { 36 | pointer-events: none; 37 | position: absolute; 38 | top: 0; 39 | z-index: 10000; 40 | left: 0; 41 | width: 20px; 42 | height: 20px; 43 | } 44 | p-mouse-pointer.button-1 { 45 | transition: none; 46 | border-radius: 50%; 47 | border: 4px solid rgba(0,0,255,0.9); 48 | } 49 | p-mouse-pointer.button-2 { 50 | transition: none; 51 | border-color: rgba(0,0,255,0.9); 52 | } 53 | p-mouse-pointer.button-3 { 54 | transition: none; 55 | border-radius: 4px; 56 | } 57 | p-mouse-pointer.button-4 { 58 | transition: none; 59 | border-color: rgba(255,0,0,0.9); 60 | } 61 | p-mouse-pointer.button-5 { 62 | transition: none; 63 | border-color: rgba(0,255,0,0.9); 64 | } 65 | p-mouse-pointer-hide { 66 | display: none 67 | } 68 | `; 69 | document.head.appendChild(styleElement); 70 | document.body.appendChild(box); 71 | document.addEventListener( 72 | "mousemove", 73 | (event) => { 74 | box.style.left = String(event.pageX) + "px"; 75 | box.style.top = String(event.pageY) + "px"; 76 | box.classList.remove("p-mouse-pointer-hide"); 77 | updateButtons(event.buttons); 78 | }, 79 | true 80 | ); 81 | document.addEventListener( 82 | "mousedown", 83 | (event) => { 84 | updateButtons(event.buttons); 85 | box.classList.add("button-" + String(event.which)); 86 | box.classList.remove("p-mouse-pointer-hide"); 87 | }, 88 | true 89 | ); 90 | document.addEventListener( 91 | "mouseup", 92 | (event) => { 93 | updateButtons(event.buttons); 94 | box.classList.remove("button-" + String(event.which)); 95 | box.classList.remove("p-mouse-pointer-hide"); 96 | }, 97 | true 98 | ); 99 | document.addEventListener( 100 | "mouseleave", 101 | (event) => { 102 | updateButtons(event.buttons); 103 | box.classList.add("p-mouse-pointer-hide"); 104 | }, 105 | true 106 | ); 107 | document.addEventListener( 108 | "mouseenter", 109 | (event) => { 110 | updateButtons(event.buttons); 111 | box.classList.remove("p-mouse-pointer-hide"); 112 | }, 113 | true 114 | ); 115 | /* eslint-disable */ 116 | function updateButtons(buttons) { 117 | for (let i = 0; i < 5; i++) { 118 | // @ts-ignore 119 | box.classList.toggle("button-" + String(i), buttons & (1 << i)); 120 | } 121 | } 122 | }, 123 | false 124 | ); 125 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | tox 3 | isort 4 | black 5 | flake8 6 | pytest-asyncio 7 | pytest-mock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=45", "wheel", "setuptools_scm>=6.2"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "datadm" 7 | description = "DataDM is your private data assistant. Slide into your data's DMs" 8 | readme = "README.md" 9 | requires-python = ">=3.8" 10 | keywords = ["nlp", "ai", "data", "chatbot", "database", "csv", "analytics", "datachat", "datadm"] 11 | license = {file = "LICENSE"} 12 | classifiers = [ 13 | "Programming Language :: Python :: 3", 14 | ] 15 | dependencies = [ 16 | "gradio", 17 | "guidance==0.0.64", 18 | "jupyter", 19 | "matplotlib", 20 | "pandas", 21 | "sketch", 22 | "transformers", 23 | "scikit-learn", 24 | "safetensors==0.3.2", 25 | "seaborn", 26 | "lxml", 27 | "scipy", 28 | "xgboost", 29 | ] 30 | urls = {homepage = "https://github.com/approximatelabs/datadm"} 31 | dynamic = ["version"] 32 | 33 | 34 | [project.optional-dependencies] 35 | cuda = ["accelerate"] 36 | all = ["datadm[cuda]"] 37 | 38 | [project.scripts] 39 | datadm = "datadm.app:main" 40 | 41 | [tool.setuptools] 42 | packages = ["datadm"] 43 | 44 | [tool.setuptools_scm] 45 | 46 | [tool.tox] 47 | legacy_tox_ini = """ 48 | [tox] 49 | envlist = py38, py39, py310, py311 50 | 51 | [gh-actions] 52 | python = 53 | 3.8: py38 54 | 3.9: py39 55 | 3.10: py310 56 | 3.11: py311 57 | 58 | [testenv] 59 | deps= -rdev-requirements.txt 60 | commands = python -m pytest tests -s 61 | """ -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/approximatelabs/datadm/c6e1484398ecd29bf669cf9da9bc01cf9770a2bf/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_repl.py: -------------------------------------------------------------------------------- 1 | from datadm.repl import REPL 2 | 3 | 4 | def test_exec(): 5 | repl = REPL() 6 | out = repl.exec("print('hi')") 7 | assert 'hi' in out['stdout'] 8 | --------------------------------------------------------------------------------