├── .env.sample ├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── Alzkb_Q_A_Embedding ├── .env.sample ├── .gitignore ├── readme.md ├── requirements.txt └── test_data.json ├── Benchmark_Comparison_and_Setup.md ├── KRAGEN_Dashboard ├── Backend │ ├── .env.sample.notused │ ├── Dockerfile │ ├── ExecGPTServer │ │ ├── __init__.py │ │ ├── api │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── chatapi.py │ │ │ ├── execapi.py │ │ │ └── openai.py │ │ ├── db │ │ │ ├── __init__.py │ │ │ └── db.py │ │ ├── notused │ │ │ ├── __init__old.py │ │ │ ├── aiconf.py │ │ │ ├── aiconn.py │ │ │ └── routes_old.py │ │ ├── requirements.txt │ │ ├── schema.sql │ │ ├── server.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── chat_util.py │ │ │ ├── config_util.py │ │ │ ├── exec_util.py │ │ │ └── util.py │ ├── LICENSE │ ├── README.md │ ├── code_run │ │ ├── 1 │ │ │ └── test.txt │ │ └── 2 │ │ │ └── test.txt │ ├── config.json.sample │ ├── execgpt.py │ ├── got.py │ ├── graph_of_thoughts │ │ ├── LICENSE │ │ ├── README.md │ │ ├── graph_of_thoughts │ │ │ ├── __init__.py │ │ │ ├── controller │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ └── controller.py │ │ │ ├── language_models │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── abstract_language_model.py │ │ │ │ ├── azuregpt.py │ │ │ │ ├── chatgpt.py │ │ │ │ ├── config_template.json │ │ │ │ └── llamachat_hf.py │ │ │ ├── operations │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── graph_of_operations.py │ │ │ │ ├── operations.py │ │ │ │ └── thought.py │ │ │ ├── parser │ │ │ │ ├── __init__.py │ │ │ │ └── parser.py │ │ │ ├── prompter │ │ │ │ ├── __init__.py │ │ │ │ └── prompter.py │ │ │ └── vector_db │ │ │ │ ├── __init__.py │ │ │ │ ├── azure_embedding.py │ │ │ │ └── weaviate.py │ │ └── pyproject.toml │ └── routes.rest ├── Frontend │ ├── .env.sample │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ ├── package.json │ ├── public │ │ ├── favicon.ico │ │ ├── gotdata │ │ │ └── dataset.json.sample │ │ ├── images │ │ │ ├── answer.svg │ │ │ ├── charttype.svg │ │ │ ├── company.svg │ │ │ ├── concept.svg │ │ │ ├── field.svg │ │ │ ├── generator.svg │ │ │ ├── list.svg │ │ │ ├── method.svg │ │ │ ├── organization.svg │ │ │ ├── person.svg │ │ │ ├── question.svg │ │ │ ├── selector.svg │ │ │ ├── technology.svg │ │ │ ├── tool.svg │ │ │ └── unknown.svg │ │ └── index.html │ ├── src │ │ ├── components │ │ │ ├── ChatGPTForKRAGENLoc │ │ │ │ ├── ChatBox.js │ │ │ │ ├── ResizableDiv.js │ │ │ │ ├── SideMenu.js │ │ │ │ ├── context │ │ │ │ │ ├── AllContext.js │ │ │ │ │ ├── ThemeContext.js │ │ │ │ │ └── chatLogContext.js │ │ │ │ ├── index.js │ │ │ │ ├── logo.svg │ │ │ │ ├── reportWebVitals.js │ │ │ │ └── useChatData.js │ │ │ ├── DisplayGraph │ │ │ │ ├── ClustersPanel.tsx │ │ │ │ ├── DescriptionPanel.tsx │ │ │ │ ├── GraphDataController.tsx │ │ │ │ ├── GraphEventsController.tsx │ │ │ │ ├── GraphSettingsController.tsx │ │ │ │ ├── GraphTitle.tsx │ │ │ │ ├── Panel.tsx │ │ │ │ ├── SearchField.tsx │ │ │ │ ├── TagsPanel.tsx │ │ │ │ ├── dist │ │ │ │ │ └── index.js │ │ │ │ ├── index.tsx │ │ │ │ └── others │ │ │ │ │ ├── canvas-utils.ts │ │ │ │ │ ├── types.ts │ │ │ │ │ └── use-debounce.ts │ │ │ ├── ErrorBoundary.tsx │ │ │ ├── apiService.js │ │ │ └── codeUtils.js │ │ ├── index.tsx │ │ ├── react-app-env.d.ts │ │ └── styles.css │ └── tsconfig.json └── readme.md ├── LICENSE ├── config ├── class.json └── kragen.env ├── conversion.md ├── dev_guide.md ├── docker-compose-flask.yml ├── docker-compose-gui.yml ├── docker-compose-kragen.yml ├── docker-compose-weaviate.yml ├── docker-compose.yml ├── docker ├── Dockerfile ├── prod.Dockerfile └── requirements.txt ├── images ├── KG2Diagram.png ├── KG2VectorDB_Process.png ├── OIG4.jpeg └── radar_chart_kragen.png ├── kragen-gui.sh ├── readme.md ├── release ├── deploy_production_release.sh ├── generate_production_release.sh └── readme.md ├── src ├── __init__.py ├── addTokenInfo.py ├── config.py ├── convert.py ├── extract_data.ipynb ├── k_setup.py ├── kragen.py ├── make_vector.py ├── parse.py └── upload.py ├── test.csv └── test_data ├── MCQ_1hop.json ├── MCQ_2hop.json ├── MCQ_genes.json ├── OpenEnded_1hop.json ├── OpenEnded_2hop.json ├── OpenEnded_genes.json ├── True_or_False_1hop.json ├── True_or_False_2hop.json └── True_or_False_genes.json /.env.sample: -------------------------------------------------------------------------------- 1 | # This is a sample file identical to the one in the 'Alzkb_Q_A_Embedding' folder. Please copy this file into '.env' and modify it accordingly. 2 | # For each variable, replace the current value with your specific details. 3 | 4 | # Project version 5 | TAG=0.1.0a0 6 | 7 | # OpenAI API 8 | OPENAI_API_KEY=YOUR_API_KEY 9 | OPENAI_EMBEDDING_MODEL=text-embedding-ada-002 10 | 11 | # Weaviate Server Settings 12 | WEAVIATE_URL=http://192.168.1.49:8080 13 | WEAVIATE_API_KEY=hashkey1 14 | 15 | # Flask Server Config 16 | IMAGE_NAME="kragen-flask-server" 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Additional context** 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | # .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | .env 131 | config.json 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | .idea/ 163 | 164 | alzkb_test.csv 165 | 166 | !**/__init__.py 167 | !**/requirements.txt 168 | 169 | 170 | 171 | # exclude config files 172 | config.json 173 | 174 | 175 | # dependencies 176 | /node_modules 177 | /.pnp 178 | .pnp.js 179 | 180 | weaviate_data/ 181 | 182 | # folders to ignore 183 | 184 | Alzkb_Q_A_Embedding/old/ 185 | Alzkb_Q_A_Embedding/divided_data_v3_dataset_disconnectedgenes/* 186 | !Alzkb_Q_A_Embedding/divided_data_v3_dataset_disconnectedgenes/ 187 | Alzkb_Q_A_Embedding/divided_data_v3_dataset_disconnectedgenes_embedded/* 188 | !Alzkb_Q_A_Embedding/divided_data_v3_dataset_disconnectedgenes_embedded/ 189 | Alzkb_Q_A_Embedding/divided_data_v3_dataset_disconnectedgenes_embedded_addtokens/* 190 | !Alzkb_Q_A_Embedding/divided_data_v3_dataset_disconnectedgenes_embedded_addtokens/ 191 | 192 | Alzkb_Q_A_Embedding/.env.sample 193 | Alzkb_Q_A_Embedding/.gitignore 194 | Alzkb_Q_A_Embedding/readme.md 195 | Alzkb_Q_A_Embedding/requirements.txt 196 | Alzkb_Q_A_Embedding/test_data.json 197 | Alzkb_Q_A_Embedding/test.py 198 | 199 | .env_my -------------------------------------------------------------------------------- /Alzkb_Q_A_Embedding/.env.sample: -------------------------------------------------------------------------------- 1 | # This is a sample file. Please copy this file to .env and edit it. 2 | # In each variable, please replace each value with your own. 3 | 4 | # Project version 5 | TAG=0.1.0a0 6 | 7 | # Azure OpenAI API 8 | OPENAI_API_TYPE=azure 9 | OPENAI_API_BASE=https://caire-azure-openai.openai.azure.com/ 10 | OPENAI_API_VERSION=2023-05-15 11 | OPENAI_API_KEY=YOUR_API_KEY 12 | OPENAI_EMBEDDING_MODEL=text-embedding-ada-002 13 | 14 | # parse.py settings 15 | INPUT_CSV_FILE_LOCATION = './Alzkb_Q_A_Embedding/test_data.csv' 16 | OUTPUT_DIR = './Alzkb_Q_A_Embedding/divided_data_v3_dataset_disconnectedgenes' 17 | 18 | # make_vector_for_in_context_learning_Azure_parallelization_real_dask_7.py 19 | INPUT_DIR_FOR_EMBEDDING = './Alzkb_Q_A_Embedding/divided_data_v3_dataset_disconnectedgenes' 20 | OUTPUT_DIR_FOR_EMBEDDING = './Alzkb_Q_A_Embedding/divided_data_v3_dataset_disconnectedgenes_embedded' 21 | 22 | # addTokenInfo.py 23 | INPUT_DIR_FOR_ADDING_TOKEN_INFO = './Alzkb_Q_A_Embedding/divided_data_v3_dataset_disconnectedgenes_embedded' 24 | OUTPUT_DIR_FOR_ADDING_TOKEN_INFO = './Alzkb_Q_A_Embedding/divided_data_v3_dataset_disconnectedgenes_embedded_addtokens' 25 | 26 | -------------------------------------------------------------------------------- /Alzkb_Q_A_Embedding/.gitignore: -------------------------------------------------------------------------------- 1 | # Python Files 2 | parse-for-.py 3 | checkUniqueAnswer.py 4 | csvToJson.py 5 | embedding_testQ_A.py 6 | test.py 7 | make_vector_for_in_context_learning_Azure_parallelization_real_dask_7_origin.py 8 | 9 | # Data Files 10 | *.zip 11 | divided_data 12 | divided_data_embedded 13 | divided_data_embedded_addtokens 14 | 15 | 16 | /divided_data_v3_dataset_disconnectedgenes/* 17 | !/divided_data_v3_dataset_disconnectedgenes/ 18 | 19 | /divided_data_v3_dataset_disconnectedgenes_embedded/* 20 | !/divided_data_v3_dataset_disconnectedgenes_embedded/ 21 | 22 | /divided_data_v3_dataset_disconnectedgenes_embedded_addtokens/* 23 | !/divided_data_v3_dataset_disconnectedgenes_embedded_addtokens/ 24 | 25 | 26 | test_divided_data_v3_dataset_disconnectedgenes_embedded/ 27 | 28 | test_divided_data_v3_dataset_disconnectedgenes_embedded_addtokens 29 | 30 | test_folder/ 31 | testQ_A_embedded_addtokens/ 32 | 33 | 34 | # memo 35 | readme-memo 36 | 37 | # ENV 38 | .env 39 | 40 | !requirements.txt 41 | 42 | # data files 43 | data_alzkb_Mythreye_version_3_dataset_disconnectedgenes.json 44 | 45 | -------------------------------------------------------------------------------- /Alzkb_Q_A_Embedding/readme.md: -------------------------------------------------------------------------------- 1 | # Project Documentation 2 | 3 | ## 0. Environment Setup and 4 | 5 | Create an environment by installing the required packages from `requirements.txt`. Python version 3.10 is recommended. 6 | 7 | In addition please set the following environment variables in the `.env` file: 8 | 9 | ``` 10 | # parse.py settings 11 | INPUT_JSON_FILE_LOCATION = 12 | OUTPUT_DIR = 13 | 14 | # make_vector_for_in_context_learning_Azure_parallelization_real_dask_7.py 15 | # 16 | INPUT_DIR_FOR_EMBEDDING = 17 | OUTPUT_DIR_FOR_EMBEDDING = 18 | 19 | 20 | # Azure OpenAI API 21 | OPENAI_API_TYPE= 22 | OPENAI_API_BASE= 23 | OPENAI_API_VERSION= 24 | OPENAI_API_KEY= 25 | OPENAI_EMBEDDING_MODEL= 26 | ``` 27 | 28 | ### Please run the below scripts in order. 29 | 30 | ## 1. run parse.py 31 | 32 | This script is used for parsing data. 33 | 34 | ## 2. run make_vector_for_in_context_learning_Azure_parallelization_real_dask_7.py 35 | 36 | This script creates vectors for in-context learning. 37 | 38 | ## 3. run addTokenInfo.py 39 | 40 | This script adds tokens for divided data that has been embedded. 41 | -------------------------------------------------------------------------------- /Alzkb_Q_A_Embedding/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==2.0.2 2 | weaviate-client==3.25.3 3 | python-dotenv==1.0.0 4 | openai==0.28.0 5 | matplotlib==3.8.1 6 | plotly==5.16.1 7 | scipy==1.10.1 8 | scikit-learn==1.3.2 9 | dask==2023.10.1 10 | distributed==2023.10.1 11 | nltk==3.8.1 -------------------------------------------------------------------------------- /Alzkb_Q_A_Embedding/test_data.json: -------------------------------------------------------------------------------- 1 | [{"question": "What does the gene NATP do?", "answer": "Gene NATP is a pseudo gene for N-acetyltransferase pseudogene"}, {"question": "What does the gene PARP1P2 do?", "answer": "Gene PARP1P2 is a pseudo gene for poly(ADP-ribose) polymerase 1 pseudogene 2"}, {"question": "What does the gene PARP1P1 do?", "answer": "Gene PARP1P1 is a pseudo gene for poly(ADP-ribose) polymerase 1 pseudogene 1"}, {"question": "What does the gene AAVS1 do?", "answer": "Gene AAVS1 is a other gene for adeno-associated virus integration site 1"}, {"question": "What does the gene A2MP1 do?", "answer": "Gene A2MP1 is a pseudo gene for alpha-2-macroglobulin pseudogene 1"}, {"question": "What does the gene AFG3L1P do?", "answer": "Gene AFG3L1P is a pseudo gene for AFG3 like matrix AAA peptidase subunit 1, pseudogene"}, {"question": "What does the gene ACTG1P1 do?", "answer": "Gene ACTG1P1 is a pseudo gene for actin gamma 1 pseudogene 1"}, {"question": "What does the gene ACTG1P2 do?", "answer": "Gene ACTG1P2 is a pseudo gene for actin gamma 1 pseudogene 2"}, {"question": "What does the gene ACTG1P3 do?", "answer": "Gene ACTG1P3 is a pseudo gene for actin gamma 1 pseudogene 3"}, {"question": "What does the gene ACTG1P6 do?", "answer": "Gene ACTG1P6 is a pseudo gene for actin gamma 1 pseudogene 6"}] -------------------------------------------------------------------------------- /Benchmark_Comparison_and_Setup.md: -------------------------------------------------------------------------------- 1 | # Performance Comparison on Various Question Types 2 | 3 | | Methods | Question Type 1 (T/F-1hop) #560 | Question Type 2 (T/F-2hop) # 540 | Question Type 3 (MCQ-1hop) # 498 | Question Type 4 (MCQ-2hop) # 419 | 4 | | -------------------- | ------------------------------- | -------------------------------- | -------------------------------- | -------------------------------- | 5 | | ChatGPT3.5 | 45.2% | 55.4% | 47.6% | 58.5% | 6 | | OpenChat3.5 | 59.1% | 58.7% | 48.8% | 62.8% | 7 | | ChatGPT4 | 68.6% | 62.4% | 56.6% | 53.1% | 8 | | KRAGEN with ChatGPT4 | **80.3%** | **62.9%** | **70.4%** | **71.8%** | 9 | 10 | # Experiment Setup 11 | 12 | This document outlines the configurations and hyperparameters used for the performance comparison of various baseline models and KRAGEN. 13 | 14 | ## Baseline Models Configuration 15 | 16 | ### Azure AI ChatGPT3.5 17 | 18 | - **Model:** gpt-3.5-turbo-16k 19 | - **Temperature:** 0 20 | - **Top P:** 1 21 | - **Reference:** [Azure AI OpenAI Service Documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference) 22 | 23 | ### Azure AI ChatGPT4 24 | 25 | - **Model:** gpt-4 26 | - **Temperature:** 0 27 | - **Top P:** 1 28 | - **Reference:** [Azure AI OpenAI Service Documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference) 29 | 30 | ### OpenChat3.5 31 | 32 | - **Model:** OpenChat 3.5 33 | - **Temperature:** 0 34 | - **Top P:** 1 35 | - **Reference:** [Hugging Face OpenChat 3.5](https://huggingface.co/openchat/openchat_3.5) 36 | 37 | ### KRAGEN Configuration 38 | 39 | - **Model:** gpt-4 40 | - **Temperature:** 0 41 | - **Top P:** 1 42 | - **Embedding model:** text-embedding-ada-002 43 | - **Reference:** [Azure AI OpenAI Service Documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference) 44 | 45 | #### BioGPT, available on Hugging Face at https://huggingface.co/microsoft/biogpt, has been tested using the True/False-1hop, True/False-2hop, MCQ-1hop, and MCQ-2hop datasets. However, it does not perform adequately for these tasks. Other BioGPT models are being evaluated and will publish results as we continue testing. 46 | 47 | ## Data Configuration 48 | 49 | The evaluation encompasses four distinct question types, each representing a different level of complexity. The dataset for these evaluations is located within the [`test_data`](https://github.com/EpistasisLab/KRAGEN/tree/main/test_data) directory at the root of the GitHub repository. 50 | : 51 | 52 | - **Question Type 1 (T/F-1hop)**: Consists of 560 questions where each is a True/False question that can be answered with a single inference step. 53 | - **Question Type 2 (T/F-2hop)**: Comprises 540 True/False questions, each necessitating two logical inference steps for resolution. 54 | - **Question Type 3 (MCQ-1hop)**: Includes 498 multiple-choice questions (MCQs), each of which requires a single inference step to determine the correct option. 55 | - **Question Type 4 (MCQ-2hop)**: Contains 419 multiple-choice questions that each require two inference steps to answer correctly. 56 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/.env.sample.notused: -------------------------------------------------------------------------------- 1 | AI_SERVICE=openai 2 | AI_API_KEY=your_api_key 3 | CODE_RUN_PATH=code_run 4 | CLIENT_ORIGINS='http://localhost:3000' -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the existing Python image as base 2 | FROM python:3.10-slim 3 | 4 | # Set the working directory inside the container 5 | WORKDIR /app 6 | 7 | # Copy the code into the container 8 | COPY . /app 9 | 10 | RUN apt update --fix-missing \ 11 | && apt upgrade -y \ 12 | && apt install -y gcc python3-dev \ 13 | && pip install --upgrade pip 14 | 15 | # Install dependencies 16 | RUN pip install -r ./ExecGPTServer/requirements.txt 17 | RUN pip install -e graph_of_thoughts 18 | RUN flask --app ExecGPTServer init-db 19 | 20 | # Expose the port on which your Flask server will run (if needed) 21 | EXPOSE 5050 22 | 23 | # Command to run the Flask server 24 | CMD ["python", "execgpt.py"] 25 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/__init__.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from flask_cors import CORS 3 | from .utils import util 4 | import os 5 | 6 | 7 | def create_app(): 8 | app = Flask(__name__, instance_relative_config=True) 9 | app.config.from_mapping( 10 | SECRET_KEY='dev', 11 | DATABASE=os.path.join(app.instance_path, 'ExecGPTServer.sqlite'), 12 | ) 13 | 14 | try: 15 | os.makedirs(app.instance_path) 16 | except OSError: 17 | pass 18 | 19 | # ai_service = "AI_SERVICE" 20 | # api_key = "AI_API_KEY" 21 | 22 | # if ai_service not in os.environ: 23 | # os.environ[ai_service] = "openai" 24 | 25 | # if api_key not in os.environ: 26 | # print(f"The '{api_key}' environment variable is not set.") 27 | # print('Please set it and try again.') 28 | # exit(1) 29 | 30 | cors = CORS(app, origins=util.client_origins) 31 | 32 | from .db import db 33 | db.init_app(app) 34 | 35 | from . import server 36 | app.register_blueprint(server.bp) 37 | 38 | from .api import api 39 | app.register_blueprint(api.bp) 40 | 41 | from .api import openai 42 | app.register_blueprint(openai.bp) 43 | 44 | from .api import execapi 45 | app.register_blueprint(execapi.bp) 46 | 47 | from .api import chatapi 48 | app.register_blueprint(chatapi.bp) 49 | 50 | return app 51 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/KRAGEN/149a62d9fe0da6273a86957e5703e7d6aa5c6341/KRAGEN_Dashboard/Backend/ExecGPTServer/api/__init__.py -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/api/api.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, jsonify, request 2 | from flask_cors import CORS, cross_origin 3 | from ..utils import config_util 4 | 5 | bp = Blueprint('api', __name__, url_prefix='/api/v1') 6 | 7 | # Enable CORS on the blueprint 8 | CORS(bp, origins='http://localhost:3000') 9 | 10 | @bp.route('/configs', methods=['POST']) 11 | @cross_origin() 12 | def create_config(): 13 | req = request.get_json() 14 | if 'service' not in req: 15 | return jsonify({'error': 'Missing service parameter'}), 400 16 | 17 | if 'key' not in req: 18 | return jsonify({'error': 'Missing key parameter'}), 400 19 | 20 | result = config_util.create_config(req['service'], req['key']) 21 | 22 | # if 'error' in result and result['error'] is not None: 23 | if 'error' in result: 24 | return jsonify({'error': result['error']}), 500 25 | 26 | return jsonify(result), 200 27 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/api/execapi.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, jsonify, request 2 | from flask_cors import CORS, cross_origin 3 | from ..utils import exec_util 4 | 5 | bp = Blueprint('execapi', __name__, url_prefix='/execapi/v1') 6 | 7 | # Enable CORS on the blueprint 8 | CORS(bp, origins='http://localhost:3000') 9 | 10 | 11 | @bp.route('/executions', methods=['POST']) 12 | @bp.route('/executions/', methods=['GET']) 13 | @cross_origin() 14 | def executions(execution_id=None): 15 | execution = {} 16 | if request.method == 'POST': 17 | req = request.get_json() 18 | 19 | if 'src_code' not in req: 20 | return jsonify({'error': 'Missing src_code parameter'}), 400 21 | 22 | execution = exec_util.create_execution(req['src_code']) 23 | 24 | # if 'error' in execution and execution['error'] is not None: 25 | if 'error' in execution: 26 | return jsonify({'error': execution['error']}), 500 27 | 28 | execution['src_code'] = req['src_code'] 29 | 30 | run_dir = exec_util.create_code_run_dir(execution['id']) 31 | 32 | execution = exec_util.run_code(execution, run_dir) 33 | 34 | execution['files'] = exec_util.add_generated_files(run_dir) 35 | 36 | exec_util.update_execution(execution) 37 | 38 | elif request.method == 'GET': 39 | execution = exec_util.get_execution(execution_id) 40 | # if 'error' in execution and execution['error'] is not None: 41 | if 'error' in execution: 42 | return jsonify({'error': execution['error']}), 500 43 | 44 | return jsonify(execution), 200 45 | 46 | 47 | @bp.route('/packages', methods=('GET', 'POST')) 48 | @cross_origin() 49 | def packages(): 50 | if request.method == 'GET': 51 | result = exec_util.get_packages() 52 | # if 'error' in result and result['error'] is not None: 53 | if 'error' in result: 54 | return result, 500 55 | 56 | return jsonify(result), 200 57 | else: 58 | req = request.get_json() 59 | if 'packages' not in req: 60 | return jsonify({'error': 'Missing packages parameter'}), 400 61 | 62 | result = exec_util.install_packages(req['packages']) 63 | # if 'error' in result and result['error'] is not None: 64 | if 'error' in result: 65 | return jsonify({'error': result['error']}), 500 66 | else: 67 | return jsonify(result), 200 68 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/api/openai.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, jsonify, request 2 | from flask_cors import CORS, cross_origin 3 | from ..server import get_openai_connection 4 | 5 | bp = Blueprint('openai', __name__, url_prefix='/openai/v1') 6 | 7 | 8 | @bp.route('/chat/completions', methods=('POST',)) 9 | @cross_origin() 10 | def chat_completions(): 11 | req = request.get_json() 12 | try: 13 | openai = get_openai_connection() 14 | returned_data = openai.ChatCompletion.create(**req) 15 | print ("returned_data: ", returned_data) 16 | return returned_data 17 | except Exception as e: 18 | return jsonify({'error': str(e)}), 500 19 | 20 | 21 | @bp.route('/models', methods=['GET']) 22 | @cross_origin() 23 | def models(): 24 | try: 25 | openai = get_openai_connection() 26 | print("jsonify(openai.Model.list())",jsonify(openai.Model.list())) 27 | 28 | return jsonify(openai.Model.list()) 29 | except Exception as e: 30 | return jsonify({'error': str(e)}), 500 31 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/db/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/KRAGEN/149a62d9fe0da6273a86957e5703e7d6aa5c6341/KRAGEN_Dashboard/Backend/ExecGPTServer/db/__init__.py -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/db/db.py: -------------------------------------------------------------------------------- 1 | from flask import current_app, g 2 | import sqlite3 3 | import click 4 | import os 5 | 6 | 7 | def get_db(): 8 | if 'db' not in g: 9 | g.db = sqlite3.connect( 10 | current_app.config['DATABASE'], 11 | detect_types=sqlite3.PARSE_DECLTYPES 12 | ) 13 | g.db.row_factory = sqlite3.Row 14 | 15 | return g.db 16 | 17 | 18 | def close_db(e=None): 19 | db = g.pop('db', None) 20 | 21 | if db is not None: 22 | db.close() 23 | 24 | 25 | def init_db(): 26 | db = get_db() 27 | 28 | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 29 | 30 | file_path = os.path.join(parent_dir, 'schema.sql') 31 | 32 | with current_app.open_resource(file_path) as f: 33 | db.executescript(f.read().decode('utf8')) 34 | 35 | 36 | @click.command('init-db') 37 | def init_db_command(): 38 | init_db() 39 | click.echo('Initialized the database.') 40 | 41 | 42 | def init_app(app): 43 | app.teardown_appcontext(close_db) 44 | app.cli.add_command(init_db_command) 45 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/notused/__init__old.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from flask import Flask 4 | 5 | def create_app(test_config=None): 6 | # create and configure the app 7 | app = Flask(__name__, instance_relative_config=True) 8 | app.config.from_mapping( 9 | SECRET_KEY='dev', 10 | DATABASE=os.path.join(app.instance_path, 'ExecGPT.sqlite'), 11 | ) 12 | 13 | if test_config is None: 14 | # load the instance config, if it exists, when not testing 15 | app.config.from_pyfile('config.py', silent=True) 16 | else: 17 | # load the test config if passed in 18 | app.config.from_mapping(test_config) 19 | 20 | # ensure the instance folder exists 21 | try: 22 | os.makedirs(app.instance_path) 23 | except OSError: 24 | pass 25 | 26 | # initialize the database 27 | from . import db 28 | db.init_app(app) 29 | 30 | from . import routes 31 | app.register_blueprint(routes.bp) 32 | 33 | return app 34 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/notused/aiconf.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .db import get_db 3 | 4 | 5 | def save_config(org_id=None, api_key=None): 6 | result = { 'error': None } 7 | print('save_config') 8 | print('org_id: ', org_id) 9 | print('api_key: ', api_key) 10 | 11 | 12 | db = get_db() 13 | try: 14 | cursor = db.execute( 15 | 'INSERT INTO config (org_id, api_key) VALUES (?, ?) ON CONFLICT DO NOTHING', 16 | (org_id, api_key) 17 | ) 18 | db.commit() 19 | if cursor.rowcount == 0: 20 | cursor = db.execute( 21 | 'SELECT id FROM config WHERE org_id = ?', 22 | (org_id,) 23 | ) 24 | existing_config = cursor.fetchone() 25 | if existing_config: 26 | config_id = existing_config['id'] 27 | result['message'] = 'Config already exists' 28 | result['config_id'] = config_id 29 | else: 30 | result['error'] = 'Error while inserting config.' 31 | else: 32 | result['message'] = 'Config created successfully' 33 | result['config_id'] = config_id 34 | 35 | except db.IntegrityError: 36 | result['error'] = 'Error while inserting config.' 37 | 38 | return result 39 | 40 | 41 | def update_config(config_id=None, org_id=None, api_key=None): 42 | result = {} 43 | if config_id is None: 44 | result['error'] = 'config_id is required.' 45 | elif not org_id and not api_key: 46 | result['error'] = 'Nothing to update. Please provide either org_id or api_key.' 47 | 48 | # if result['error'] is not None: 49 | if 'error' in result: 50 | return result 51 | 52 | db = get_db() 53 | try: 54 | placeholders = [] 55 | values = [] 56 | 57 | if org_id: 58 | placeholders.append('org_id = ?') 59 | values.append(org_id) 60 | if api_key: 61 | placeholders.append('api_key = ?') 62 | values.append(api_key) 63 | 64 | if placeholders: 65 | query = 'UPDATE config SET {} WHERE id = ?'.format(', '.join(placeholders)) 66 | values.append(config_id) 67 | 68 | db.execute(query, values) 69 | db.commit() 70 | 71 | result['message'] = 'Config updated successfully' 72 | result['config_id'] = config_id 73 | except db.IntegrityError: 74 | result['error'] = 'Error while updating config.' 75 | 76 | return result 77 | 78 | 79 | def get_config(): 80 | db = get_db() 81 | config = db.execute( 82 | 'SELECT * FROM config' 83 | ).fetchone() 84 | return config 85 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/notused/aiconn.py: -------------------------------------------------------------------------------- 1 | import openai 2 | from . import aiconf 3 | 4 | 5 | class AIConn: 6 | _instance = None 7 | 8 | def __new__(cls, *args, **kwargs): 9 | if not cls._instance: 10 | cls._instance = super(AIConn, cls).__new__(cls, *args, **kwargs) 11 | return cls._instance 12 | 13 | # def __init__(self): 14 | # # if not self.org_id or not self.api_key: 15 | # # if not self.api_key: 16 | # self.set_config() 17 | 18 | def set_config(self): 19 | config = aiconf.get_config() 20 | # self.org_id = config['org_id'] 21 | self.api_key = config['api_key'] 22 | openai.api_key = self.api_key 23 | 24 | def connect(self): 25 | self.set_config() 26 | # settintg the organization does not work 27 | # openai.organization = self.org_id 28 | # openai.api_key = self.api_key 29 | result = {} 30 | result = self.get_models() 31 | # if result['error'] is not None: 32 | if 'error' in result: 33 | result['message'] = 'Connection failed' 34 | else: 35 | result['message'] = 'Connected successfully' 36 | return result 37 | 38 | def disconnect(self): 39 | # openai.organization = None 40 | openai.api_key = None 41 | return {'message': 'Disconnected successfully', 'error': None} 42 | 43 | def check_connection(self): 44 | # result = { 'error': None } 45 | # if openai.api_key == self.api_key and openai.organization == self.org_id: 46 | # print('openai.api_key: ', openai.api_key) 47 | # print('self.api_key: ', self.api_key) 48 | # if openai.api_key == self.api_key: 49 | result = self.get_models() 50 | if result['error'] is not None: 51 | result['message'] = 'Not connected' 52 | else: 53 | result['message'] = 'Connected' 54 | # else: 55 | # result['message'] = 'Not connected' 56 | 57 | return result 58 | 59 | def get_models(self): 60 | result = {} 61 | try: 62 | models = openai.Model.list() 63 | result['models'] = models.data 64 | except Exception as e: 65 | result['error'] = str(e) 66 | 67 | return result 68 | 69 | def chat_completions(self, req): 70 | # req['api_key'] = self.api_key 71 | return openai.ChatCompletion.create(**req) 72 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/notused/routes_old.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, jsonify, request 2 | 3 | 4 | bp = Blueprint('routes', __name__, url_prefix='/openai/v1') 5 | 6 | @bp.route('/connections', methods=('GET', 'DELETE', 'POST')) 7 | def connections(): 8 | g.ai_conn = AIConn() 9 | result = { 'error': None } 10 | status = 405 11 | if request.method == 'GET': 12 | result = g.ai_conn.check_connection() 13 | elif request.method == 'DELETE': 14 | result = g.ai_conn.disconnect() 15 | elif request.method == 'POST': 16 | result = g.ai_conn.connect() 17 | else: 18 | result['error'] = 'Invalid request method' 19 | 20 | status = 200 if result['error'] is None else 405 21 | 22 | return jsonify(result), status 23 | 24 | 25 | @bp.route('/models', methods=['GET']) 26 | def models(): 27 | g.ai_conn = AIConn() 28 | return g.ai_conn.get_models() 29 | 30 | 31 | @bp.route('/chat/completions', methods=('POST',)) 32 | def chat_completions(): 33 | req = request.get_json() 34 | g.ai_conn = AIConn() 35 | return g.ai_conn.chat_completions(req) 36 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/requirements.txt: -------------------------------------------------------------------------------- 1 | openai==1.3.1 2 | Flask==2.3.2 3 | python-dotenv==1.0.0 4 | flask_cors==4.0.1 5 | weaviate-client==4.4.4 6 | watchdog==4.0.0 -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/schema.sql: -------------------------------------------------------------------------------- 1 | -- DROP TABLE IF EXISTS config; 2 | 3 | -- CREATE TABLE IF NOT EXISTS config ( 4 | -- id INTEGER PRIMARY KEY AUTOINCREMENT, 5 | -- key TEXT NOT NULL, 6 | -- value TEXT NOT NULL, 7 | -- timestamp DATETIME DEFAULT CURRENT_TIMESTAMP 8 | -- ); 9 | 10 | DROP TABLE IF EXISTS execution; 11 | 12 | CREATE TABLE execution ( 13 | id INTEGER PRIMARY KEY AUTOINCREMENT, 14 | src_code TEXT, 15 | status TEXT, 16 | result TEXT, 17 | files TEXT, 18 | timestamp DATETIME DEFAULT CURRENT_TIMESTAMP 19 | ); 20 | 21 | -- DROP TABLE IF EXISTS files; 22 | 23 | -- CREATE TABLE files ( 24 | -- id INTEGER PRIMARY KEY AUTOINCREMENT, 25 | -- name TEXT NOT NULL, 26 | -- path TEXT NOT NULL, 27 | -- execution_id INTEGER NOT NULL, 28 | -- timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, 29 | -- FOREIGN KEY (execution_id) REFERENCES execution(id) 30 | -- ); 31 | 32 | DROP TABLE IF EXISTS chat; 33 | 34 | CREATE TABLE chat ( 35 | id INTEGER PRIMARY KEY AUTOINCREMENT, 36 | title TEXT NOT NULL, 37 | timestamp DATETIME DEFAULT CURRENT_TIMESTAMP 38 | ); 39 | 40 | DROP TABLE IF EXISTS chatlog; 41 | 42 | CREATE TABLE chatlog ( 43 | id INTEGER PRIMARY KEY AUTOINCREMENT, 44 | chat_id INTEGER NOT NULL, 45 | execution_id INTEGER, 46 | message TEXT NOT NULL, 47 | message_type TEXT NOT NULL, 48 | src_code TEXT, 49 | who TEXT NOT NULL, 50 | timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, 51 | FOREIGN KEY (chat_id) REFERENCES chat(id), 52 | FOREIGN KEY (execution_id) REFERENCES execution(id) 53 | ); -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/server.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, jsonify, request, g 2 | from flask_cors import CORS, cross_origin 3 | import os 4 | from ExecGPTServer.utils import chat_util 5 | 6 | bp = Blueprint('routes', __name__) 7 | 8 | # Enable CORS on the blueprint 9 | CORS(bp, origins='http://localhost:3000') 10 | 11 | 12 | # bp = Blueprint('routes', __name__, url_prefix='/openai/v1') 13 | bp = Blueprint('routes', __name__) 14 | 15 | 16 | import openai 17 | openai.api_key = "" 18 | 19 | 20 | def get_openai_connection(): 21 | if 'openai_conn' not in g: 22 | g.openai_conn = openai 23 | 24 | return g.openai_conn 25 | 26 | 27 | @bp.route('/', methods=['GET']) 28 | @cross_origin() 29 | def index(): 30 | # replace this with the actual index page in React 31 | # for example: 32 | # return current_app.send_static_file('build/index.html') 33 | return jsonify({'message': 'Hello, world!'}), 200 34 | 35 | 36 | @bp.route('/chatlogs', methods=['GET']) 37 | @cross_origin() 38 | def getchatlogs(): 39 | req = request.get_json() 40 | if 'chatname' not in req: 41 | return jsonify({'error': 'Missing chatname parameter'}), 400 42 | 43 | chatname = req['chatname'] 44 | if not isinstance(chatname, str): 45 | return jsonify({'error': 'Chatname parameter must be a string'}), 400 46 | 47 | # return jsonify(chat.get_chat_entries(chatname)), 200 48 | result = chat_util.get_chat_entries(chatname) 49 | result['chatname'] = chatname 50 | return result, 200 51 | 52 | 53 | @bp.route('/chatlogs', methods=['POST']) 54 | @cross_origin() 55 | def addchatlog(): 56 | req = request.get_json() 57 | if 'chatname' not in req: 58 | return jsonify({'error': 'Missing chatname parameter'}), 400 59 | 60 | chatname = req['chatname'] 61 | if not isinstance(chatname, str): 62 | return jsonify({'error': 'Chatname parameter must be a string'}), 400 63 | 64 | if 'user' not in req: 65 | return jsonify({'error': 'Missing user parameter'}), 400 66 | 67 | user = req['user'] 68 | if not isinstance(user, str): 69 | return jsonify({'error': 'User parameter must be a string'}), 400 70 | 71 | if 'message' not in req: 72 | return jsonify({'error': 'Missing message parameter'}), 400 73 | 74 | message = req['message'] 75 | if not isinstance(message, str): 76 | return jsonify({'error': 'Message parameter must be a string'}), 400 77 | 78 | return jsonify(chat_util.add_chat_entry(chatname, req)), 200 79 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/KRAGEN/149a62d9fe0da6273a86957e5703e7d6aa5c6341/KRAGEN_Dashboard/Backend/ExecGPTServer/utils/__init__.py -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/utils/config_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def create_config(service, key): 5 | # result = { 'error': None } 6 | result = {} 7 | 8 | os.environ['AI_SERVICE'] = service 9 | os.environ['AI_API_KEY'] = key 10 | 11 | set_key(service, key) 12 | 13 | result['message'] = 'Config created successfully' 14 | 15 | return result 16 | 17 | 18 | def set_key(service, key): 19 | if service == 'openai': 20 | import openai 21 | openai.api_key = key 22 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/utils/exec_util.py: -------------------------------------------------------------------------------- 1 | from ..db.db import get_db 2 | from io import StringIO 3 | import subprocess 4 | import os 5 | import sys 6 | import json 7 | 8 | 9 | def create_execution(src_code): 10 | # result = { 'error': None } 11 | result = {} 12 | db = get_db() 13 | try: 14 | cursor = db.execute( 15 | 'INSERT INTO execution (status, src_code)' 16 | ' VALUES (?, ?)', 17 | ('submitted', src_code) 18 | ) 19 | db.commit() 20 | result['id'] = cursor.lastrowid 21 | result['message'] = 'Execution saved successfully' 22 | except Exception as e: 23 | print(e) 24 | result['error'] = str(e) 25 | 26 | return result 27 | 28 | 29 | def get_execution(execution_id): 30 | # result = { 'error': None } 31 | result = {} 32 | db = get_db() 33 | try: 34 | row = db.execute( 35 | 'SELECT * FROM execution WHERE id = ?', 36 | (execution_id,) 37 | ).fetchone() 38 | 39 | if row is None: 40 | result['error'] = 'Execution id {} does not exist'.format(execution_id) 41 | else: 42 | result['execution'] = dict(row) 43 | result['message'] = 'Execution retrieved successfully' 44 | except Exception as e: 45 | result['error'] = str(e) 46 | 47 | return result 48 | 49 | 50 | # this contradicts the flexibility of saving executed files anywhere on the FS. 51 | def update_execution(execution): 52 | # result = { 'error': None } 53 | result = {} 54 | db = get_db() 55 | query = 'UPDATE execution SET' 56 | parameters = [] 57 | if 'status' in execution: 58 | query += ' status = ?,' 59 | parameters.append(execution['status']) 60 | if 'result' in execution: 61 | query += ' result = ?,' 62 | parameters.append(execution['result']) 63 | if 'files' in execution: 64 | query += ' files = ?,' 65 | parameters.append(execution['files']) 66 | query = query[:-1] + ' WHERE id = ?' 67 | parameters.append(execution['id']) 68 | 69 | try: 70 | db.execute(query, parameters) 71 | db.commit() 72 | result['message'] = 'Execution updated successfully' 73 | except Exception as e: 74 | result['error'] = str(e) 75 | 76 | return result 77 | 78 | 79 | def run_code(execution, run_dir): 80 | try: 81 | current_dir = os.getcwd() 82 | os.chdir(run_dir) 83 | 84 | stdout_backup = sys.stdout 85 | sys.stdout = StringIO() 86 | 87 | exec(execution['src_code']) 88 | 89 | execution['result'] = sys.stdout.getvalue() 90 | sys.stdout = stdout_backup 91 | 92 | os.chdir(current_dir) 93 | 94 | if execution['result'] is None: 95 | execution['result'] = '' 96 | 97 | execution['status'] = 'completed' 98 | 99 | except Exception as e: 100 | execution['result'] = str(e) 101 | execution['status'] = 'failed' 102 | 103 | return execution 104 | 105 | 106 | def create_code_run_dir(execution_id): 107 | # check if run_dir exists 108 | # if not, create it 109 | # return run_dir 110 | current_dir = os.getcwd() 111 | run_dir = os.path.join(current_dir, os.environ['CODE_RUN_PATH'], str(execution_id)) 112 | 113 | if not os.path.exists(run_dir): 114 | os.makedirs(run_dir) 115 | 116 | return run_dir 117 | 118 | 119 | def add_generated_files(run_dir): 120 | files = [] 121 | for file in os.listdir(run_dir): 122 | file = os.path.join(run_dir, file) 123 | # print the file path relative to the current working directory 124 | # print(os.path.relpath(file, os.getcwd())) 125 | if os.path.isfile(file): 126 | # uncomment this line to save relative path instead of full path: 127 | file = os.path.relpath(file, os.getcwd()) 128 | files.append(file) 129 | 130 | return ','.join(files) 131 | 132 | 133 | def get_packages(): 134 | # result = { 'error': None } 135 | result = {} 136 | try: 137 | result['packages'] = subprocess.check_output(['pip', 'list', '--format', 'json'], stderr=subprocess.STDOUT) 138 | result['packages'] = [package['name'] for package in json.loads(result['packages'])] 139 | except Exception as e: 140 | result['error'] = str(e) 141 | 142 | return result 143 | 144 | 145 | def install_packages(packages): 146 | # result = { 'error': None } 147 | result = {} 148 | try: 149 | result['message'] = subprocess.check_output(['pip', 'install', *packages], stderr=subprocess.STDOUT) 150 | # parse the output to get the message and return it as proper json 151 | result['message'] = result['message'].decode('utf-8').split('\n') 152 | result['message'] = [line for line in result['message'] if line != ''] 153 | print('before pop') 154 | print(result['message']) 155 | result['message'] = result['message'][-1] 156 | print('after pop') 157 | print(result['message']) 158 | except Exception as e: 159 | result['error'] = str(e) 160 | 161 | return result 162 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/ExecGPTServer/utils/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | client_origins = os.environ.get('CLIENT_ORIGINS', 'http://localhost:3000') -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/README.md: -------------------------------------------------------------------------------- 1 | # ExecGPT 2 | 3 | ## Installation 4 | It is recommended that you use a python [virtual environment](https://docs.python.org/3/library/venv.html) 5 | 6 | To install dependencies, from within the ExecGPT directory run: 7 | ``` 8 | $ pip install -r ExecGPTServer/requirements.txt 9 | ``` 10 | 11 | Copy the .env.sample file as .env 12 | 13 | ``` 14 | $ cp .env.sample .env 15 | ``` 16 | 17 | In the .env file, variable **CODE_RUN_PATH** is the filepath where code executions will run. 18 | 19 | ## Setup 20 | ### Database 21 | The database needs to be initialized the first time you run the app. To initialize it run the following command within the ExecGPT directory: 22 | ``` 23 | $ flask --app ExecGPTServer init-db 24 | ``` 25 | 26 | ### API KEY 27 | An active api key is required to run this application. 28 | 29 | The api key can be set either via the GUI or the command line. 30 | 31 | #### GUI 32 | [coming soon] 33 | 34 | #### Command Line 35 | 36 | Your api key can be set via an environmental variable: 37 | ``` 38 | $ export AI_API_KEY=[your api key] 39 | ``` 40 | 41 | OpenAI is the default service used. If you prefer to use a different service, you may set it in the AI_SERVICE environmental variable (see the list of supported services below) 42 | 43 | ``` 44 | $ export AI_SERVICE=[service] 45 | ``` 46 | 47 | **Note:** the **AI_SERVICE** and **AI_API_KEY** environmental variables can also be set in the .env file. 48 | 49 | ### Supported services 50 | - OpenAI 51 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/code_run/1/test.txt: -------------------------------------------------------------------------------- 1 | hello there -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/code_run/2/test.txt: -------------------------------------------------------------------------------- 1 | hello there -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/config.json.sample: -------------------------------------------------------------------------------- 1 | { 2 | "azuregpt": { 3 | "model_id": "gpt-35-turbo-16k", 4 | "prompt_token_cost": 0.001, 5 | "response_token_cost": 0.002, 6 | "temperature": 0, 7 | "max_tokens": 1200, 8 | "stop": null, 9 | "api_version": "2023-07-01-preview", 10 | "api_base": "", 11 | "api_key": "", 12 | "embedding_id": "text-embedding-ada-002" 13 | }, 14 | "weaviate": { 15 | "api_key": "hashkey1", 16 | "url": "http://0.0.0.0:8080", 17 | "db": "AlzKB", 18 | "limit": 200 19 | }, 20 | "chatgpt": { 21 | "model_id": "gpt-3.5-turbo", 22 | "prompt_token_cost": 0.001, 23 | "response_token_cost": 0.002, 24 | "temperature": 1.0, 25 | "max_tokens": 1536, 26 | "stop": null, 27 | "organization": "", 28 | "api_key": "", 29 | "embedding_id": "text-embedding-ada-002" 30 | }, 31 | "chatgpt4": { 32 | "model_id": "gpt-4", 33 | "prompt_token_cost": 0.03, 34 | "response_token_cost": 0.06, 35 | "temperature": 1.0, 36 | "max_tokens": 4096, 37 | "stop": null, 38 | "organization": "", 39 | "api_key": "" 40 | }, 41 | "llama7b-hf": { 42 | "model_id": "Llama-2-7b-chat-hf", 43 | "cache_dir": "/llama", 44 | "prompt_token_cost": 0.0, 45 | "response_token_cost": 0.0, 46 | "temperature": 0.6, 47 | "top_k": 10, 48 | "max_tokens": 4096 49 | }, 50 | "llama13b-hf": { 51 | "model_id": "Llama-2-13b-chat-hf", 52 | "cache_dir": "/llama", 53 | "prompt_token_cost": 0.0, 54 | "response_token_cost": 0.0, 55 | "temperature": 0.6, 56 | "top_k": 10, 57 | "max_tokens": 4096 58 | }, 59 | "llama70b-hf": { 60 | "model_id": "Llama-2-70b-chat-hf", 61 | "cache_dir": "/llama", 62 | "prompt_token_cost": 0.0, 63 | "response_token_cost": 0.0, 64 | "temperature": 0.6, 65 | "top_k": 10, 66 | "max_tokens": 4096 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/execgpt.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | 4 | 5 | def main(): 6 | args = ["flask", "--app", "ExecGPTServer", "run", "--host=0.0.0.0", "--port", "5050"] 7 | arguments = sys.argv[1:] 8 | 9 | # if len(arguments) > 0 and arguments[0] == "debug": 10 | # args.append("--debug") 11 | # # temporarily default to debug 12 | # args.append("--debug") 13 | 14 | subprocess.call(args) 15 | 16 | 17 | if __name__ == "__main__": 18 | main() 19 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023 ETH Zurich. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | - Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | - Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer listed 13 | in this license in the documentation and/or other materials 14 | provided with the distribution. 15 | 16 | - Neither the name of the copyright holders nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | The copyright holders provide no reassurances that the source code 21 | provided does not infringe any patent, copyright, or any other 22 | intellectual property rights of third parties. The copyright holders 23 | disclaim any liability to any recipient for claims brought against 24 | recipient by any third party for infringement of that parties 25 | intellectual property rights. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 30 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 31 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 33 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 | 39 | 40 | Citation 41 | ======== 42 | 43 | Any published work which uses this software should include the 44 | following citation: 45 | 46 | ---------------------------------------------------------------------- 47 | Maciej Besta, Nils Blach, Ales Kubicek, Robert Gerstenberger, Lukas 48 | Gianinazzi, Joanna Gajda, Tomasz Lehmann, Michał Podstawski, Hubert 49 | Niewiadomski, Piotr Nyczyk, Torsten Hoefler: Graph of Thoughts: Solving 50 | Elaborate Problems with Large Language Models. In: arXiv preprint 51 | arXiv:2308.09687 52 | ---------------------------------------------------------------------- 53 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/README.md: -------------------------------------------------------------------------------- 1 | # Graph of Thoughts (GoT) 2 | 3 |

4 | 5 |

6 | 7 | This is the official implementation of [Graph of Thoughts: Solving Elaborate Problems with Large Language Models](https://arxiv.org/pdf/2308.09687.pdf). 8 | This framework gives you the ability to solve complex problems by modeling them as a Graph of Operations (GoO), which is automatically executed with a Large Language Model (LLM) as the engine. 9 | This framework is designed to be flexible and extensible, allowing you to not only solve problems using the new GoT approach, but also to implement GoOs resembling previous approaches like CoT or ToT. 10 | 11 | ## Setup Guide 12 | 13 | In order to use this framework, you need to have a working installation of Python 3.8 or newer. 14 | 15 | ### Installing GoT 16 | 17 | Before running either of the following two installation methods, make sure to activate your Python environment (if any) beforehand. 18 | If you are a user and you just want to use `graph_of_thoughts`, you can install it directly from PyPI: 19 | ```bash 20 | pip install graph_of_thoughts 21 | ``` 22 | If you are a developer and you want to modify the code, you can install it in editable mode from source: 23 | ```bash 24 | git clone https://github.com/spcl/graph-of-thoughts.git 25 | cd graph-of-thoughts 26 | pip install -e . 27 | ``` 28 | 29 | ### Configuring the LLM 30 | 31 | In order to use the framework, you need to have access to an LLM. 32 | Please follow the instructions in the [Controller README](graph_of_thoughts/controller/README.md) to configure the LLM of your choice. 33 | 34 | ## Quick Start 35 | 36 | The following code snippet shows how to use the framework to solve the sorting problem for a list of 32 numbers using a CoT-like approach. 37 | Make sure you have followed the [Setup Guide](#setup-guide) before running the code. 38 | 39 | ```python 40 | from examples.sorting.sorting_032 import SortingPrompter, SortingParser, utils 41 | from graph_of_thoughts import controller, language_models, operations 42 | 43 | # Problem input 44 | 45 | to_be_sorted = "[0, 2, 6, 3, 8, 7, 1, 1, 6, 7, 7, 7, 7, 9, 3, 0, 1, 7, 9, 1, 3, 5, 1, 3, 6, 4, 5, 4, 7, 3, 5, 7]" 46 | 47 | # Create the Graph of Operations 48 | gop = operations.GraphOfOperations() 49 | gop.append_operation(operations.Generate()) 50 | gop.append_operation(operations.Score(scoring_function=utils.num_errors)) 51 | gop.append_operation(operations.GroundTruth(utils.test_sorting)) 52 | 53 | # Configure the Language Model (Assumes config.json is in the current directory with OpenAI API key) 54 | lm = language_models.ChatGPT("config.json", model_name="chatgpt") 55 | 56 | # Create the Controller 57 | ctrl = controller.Controller( 58 | lm, 59 | gop, 60 | SortingPrompter(), 61 | SortingParser(), 62 | # The following dictionary is used to configure the initial thought state 63 | { 64 | "original": to_be_sorted, 65 | "current": "", 66 | "method": "cot" 67 | } 68 | ) 69 | 70 | # Run the Controller and generate the output graph 71 | ctrl.run() 72 | ctrl.output_graph("output_cot.json") 73 | ``` 74 | 75 | To run the more sophisticated GoT approach, you can use the following code snippet. 76 | 77 | ```python 78 | from examples.sorting.sorting_032 import SortingPrompter, SortingParser, got, utils 79 | from graph_of_thoughts import controller, language_models, operations 80 | 81 | # Problem input 82 | 83 | to_be_sorted = "[0, 2, 6, 3, 8, 7, 1, 1, 6, 7, 7, 7, 7, 9, 3, 0, 1, 7, 9, 1, 3, 5, 1, 3, 6, 4, 5, 4, 7, 3, 5, 7]" 84 | 85 | # Retrieve the Graph of Operations 86 | gop = got() 87 | 88 | # Configure the Language Model (Assumes config.json is in the current directory with OpenAI API key) 89 | lm = language_models.ChatGPT("config.json", model_name="chatgpt") 90 | 91 | # Create the Controller 92 | ctrl = controller.Controller( 93 | lm, 94 | gop, 95 | SortingPrompter(), 96 | SortingParser(), 97 | # The following dictionary is used to configure the initial thought state 98 | { 99 | "original": to_be_sorted, 100 | "current": "", 101 | "phase": 0, 102 | "method": "got" 103 | } 104 | ) 105 | 106 | # Run the Controller and generate the output graph 107 | ctrl.run() 108 | ctrl.output_graph("output_got.json") 109 | ``` 110 | You can compare the two results by inspecting the output graphs `output_cot.json` and `output_got.json`. 111 | The final thought states' scores indicate the number of errors in the sorted list. 112 | 113 | ## Documentation 114 | The paper gives a high-level overview of the framework and its components. 115 | In order to understand the framework in more detail, you can read the documentation of the individual modules. 116 | Especially the [Controller](graph_of_thoughts/controller/README.md) and [Operations](graph_of_thoughts/operations/README.md) modules are important for understanding how to make the most out of the framework. 117 | We took extra care to fully document the code, so that you can easily understand how it works and how to extend it. 118 | 119 | ## Examples 120 | 121 | The [examples](examples) directory contains several examples of problems that can be solved using the framework, including the ones presented in the paper. 122 | It is a great starting point for learning how to use the framework to solve real problems. 123 | Each example contains a `README.md` file with instructions on how to run it and play with it. The code is fully documented and should be easy to follow. 124 | You can also run the examples straight from the main directory. Note that the results will be stored in the respective examples sub-directory. 125 | 126 | Try for instance: 127 | ```bash 128 | python -m examples.sorting.sorting_032 129 | python -m examples.keyword_counting.keyword_counting 130 | ``` 131 | ## Paper Results 132 | 133 | You can run the experiments from the paper by following the instructions in the [examples](examples) directory. 134 | However, if you just want to inspect and replot the results, you can use the [paper](paper) directory. 135 | 136 | ## Citations 137 | 138 | If you find this repository valuable, please give it a star! 139 | Got any questions or feedback? Feel free to reach out to [nils.blach@inf.ethz.ch](mailto:nils.blach@inf.ethz.ch) or open an issue. 140 | Using this in your work? Please reference us using the provided citation: 141 | 142 | ```bibtex 143 | @misc{besta2023got, 144 | title = {{Graph of Thoughts: Solving Elaborate Problems with Large Language Models}}, 145 | author = {Besta, Maciej and Blach, Nils and Kubicek, Ales and Gerstenberger, Robert and Gianinazzi, Lukas and Gajda, Joanna and Lehmann, Tomasz and Podstawski, Micha{\l} and Niewiadomski, Hubert and Nyczyk, Piotr and Hoefler, Torsten}, 146 | year = 2023, 147 | eprinttype = {arXiv}, 148 | eprint = {2308.09687} 149 | } 150 | ``` 151 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/KRAGEN/149a62d9fe0da6273a86957e5703e7d6aa5c6341/KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/__init__.py -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/controller/README.md: -------------------------------------------------------------------------------- 1 | # Controller 2 | 3 | The Controller class is responsible for traversing the Graph of Operations (GoO), which is a static structure that is constructed once, before the execution starts. 4 | GoO prescribes the execution plan of thought operations and the Controller invokes their execution, generating the Graph Reasoning State (GRS). 5 | 6 | In order for a GoO to be executed, an instance of Large Language Model (LLM) must be supplied to the controller (along with other required objects). 7 | Please refer to the [Language Models](../language_models/README.md) section for more information about LLMs. 8 | 9 | The following section describes how to instantiate the Controller to run a defined GoO. 10 | 11 | ## Controller Instantiation 12 | - Requires custom `Prompter`, `Parser`, as well as instantiated `GraphOfOperations` and `AbstractLanguageModel` - creation of these is described separately. 13 | - Prepare initial state (thought) as dictionary - this can be used in the initial prompts by the operations. 14 | ``` 15 | lm = ...create 16 | graph_of_operations = ...create 17 | 18 | executor = controller.Controller( 19 | lm, 20 | graph_of_operations, 21 | , 22 | , 23 | , 24 | ) 25 | executor.run() 26 | executor.output_graph("path/to/output.json") 27 | ``` 28 | - After the run the graph is written to an output file, which contains individual operations, their thoughts, information about scores and validity and total amount of used tokens / cost. 29 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/controller/__init__.py: -------------------------------------------------------------------------------- 1 | from .controller import Controller 2 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/language_models/__init__.py: -------------------------------------------------------------------------------- 1 | from .abstract_language_model import AbstractLanguageModel 2 | from .chatgpt import ChatGPT 3 | from .llamachat_hf import Llama2HF 4 | from .azuregpt import AzureGPT -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/language_models/abstract_language_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 ETH Zurich. 2 | # All rights reserved. 3 | # 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | # 7 | # main author: Nils Blach 8 | 9 | from abc import ABC, abstractmethod 10 | from typing import List, Dict, Union, Any 11 | import json 12 | import os 13 | import logging 14 | 15 | 16 | class AbstractLanguageModel(ABC): 17 | """ 18 | Abstract base class that defines the interface for all language models. 19 | """ 20 | 21 | def __init__( 22 | self, config_path: str = "", model_name: str = "", cache: bool = False 23 | ) -> None: 24 | """ 25 | Initialize the AbstractLanguageModel instance with configuration, model details, and caching options. 26 | 27 | :param config_path: Path to the config file. Defaults to "". 28 | :type config_path: str 29 | :param model_name: Name of the language model. Defaults to "". 30 | :type model_name: str 31 | :param cache: Flag to determine whether to cache responses. Defaults to False. 32 | :type cache: bool 33 | """ 34 | self.logger = logging.getLogger(self.__class__.__name__) 35 | self.config: Dict = None 36 | self.model_name: str = model_name 37 | self.cache = cache 38 | if self.cache: 39 | self.respone_cache: Dict[str, List[Any]] = {} 40 | self.load_config(config_path) 41 | self.prompt_tokens: int = 0 42 | self.completion_tokens: int = 0 43 | self.cost: float = 0.0 44 | 45 | def load_config(self, path: str) -> None: 46 | """ 47 | Load configuration from a specified path. 48 | 49 | :param path: Path to the config file. If an empty path provided, 50 | default is `config.json` in the current directory. 51 | :type path: str 52 | """ 53 | if path == "": 54 | current_dir = os.path.dirname(os.path.abspath(__file__)) 55 | path = os.path.join(current_dir, "config.json") 56 | 57 | with open(path, "r") as f: 58 | self.config = json.load(f) 59 | 60 | self.logger.debug(f"Loaded config from {path} for {self.model_name}") 61 | 62 | def clear_cache(self) -> None: 63 | """ 64 | Clear the response cache. 65 | """ 66 | self.respone_cache.clear() 67 | 68 | @abstractmethod 69 | def query(self, query: str, num_responses: int = 1) -> Any: 70 | """ 71 | Abstract method to query the language model. 72 | 73 | :param query: The query to be posed to the language model. 74 | :type query: str 75 | :param num_responses: The number of desired responses. 76 | :type num_responses: int 77 | :return: The language model's response(s). 78 | :rtype: Any 79 | """ 80 | pass 81 | 82 | @abstractmethod 83 | def get_response_texts(self, query_responses: Union[List[Any], Any]) -> List[str]: 84 | """ 85 | Abstract method to extract response texts from the language model's response(s). 86 | 87 | :param query_responses: The responses returned from the language model. 88 | :type query_responses: Union[List[Any], Any] 89 | :return: List of textual responses. 90 | :rtype: List[str] 91 | """ 92 | pass 93 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/language_models/config_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "chatgpt" : { 3 | "model_id": "gpt-3.5-turbo", 4 | "prompt_token_cost": 0.0015, 5 | "response_token_cost": 0.002, 6 | "temperature": 1.0, 7 | "max_tokens": 1536, 8 | "stop": null, 9 | "organization": "", 10 | "api_key": "" 11 | }, 12 | "chatgpt4" : { 13 | "model_id": "gpt-4", 14 | "prompt_token_cost": 0.03, 15 | "response_token_cost": 0.06, 16 | "temperature": 1.0, 17 | "max_tokens": 4096, 18 | "stop": null, 19 | "organization": "", 20 | "api_key": "" 21 | }, 22 | "llama7b-hf" : { 23 | "model_id": "Llama-2-7b-chat-hf", 24 | "cache_dir": "/llama", 25 | "prompt_token_cost": 0.0, 26 | "response_token_cost": 0.0, 27 | "temperature": 0.6, 28 | "top_k": 10, 29 | "max_tokens": 4096 30 | }, 31 | "llama13b-hf" : { 32 | "model_id": "Llama-2-13b-chat-hf", 33 | "cache_dir": "/llama", 34 | "prompt_token_cost": 0.0, 35 | "response_token_cost": 0.0, 36 | "temperature": 0.6, 37 | "top_k": 10, 38 | "max_tokens": 4096 39 | }, 40 | "llama70b-hf" : { 41 | "model_id": "Llama-2-70b-chat-hf", 42 | "cache_dir": "/llama", 43 | "prompt_token_cost": 0.0, 44 | "response_token_cost": 0.0, 45 | "temperature": 0.6, 46 | "top_k": 10, 47 | "max_tokens": 4096 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/language_models/llamachat_hf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 ETH Zurich. 2 | # All rights reserved. 3 | # 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | # 7 | # main author: Ales Kubicek 8 | 9 | import os 10 | import torch 11 | from typing import List, Dict, Union 12 | from .abstract_language_model import AbstractLanguageModel 13 | 14 | 15 | class Llama2HF(AbstractLanguageModel): 16 | """ 17 | An interface to use LLaMA 2 models through the HuggingFace library. 18 | """ 19 | 20 | def __init__( 21 | self, config_path: str = "", model_name: str = "llama7b-hf", cache: bool = False 22 | ) -> None: 23 | """ 24 | Initialize an instance of the Llama2HF class with configuration, model details, and caching options. 25 | 26 | :param config_path: Path to the configuration file. Defaults to an empty string. 27 | :type config_path: str 28 | :param model_name: Specifies the name of the LLaMA model variant. Defaults to "llama7b-hf". 29 | Used to select the correct configuration. 30 | :type model_name: str 31 | :param cache: Flag to determine whether to cache responses. Defaults to False. 32 | :type cache: bool 33 | """ 34 | super().__init__(config_path, model_name, cache) 35 | self.config: Dict = self.config[model_name] 36 | # Detailed id of the used model. 37 | self.model_id: str = self.config["model_id"] 38 | # Costs for 1000 tokens. 39 | self.prompt_token_cost: float = self.config["prompt_token_cost"] 40 | self.response_token_cost: float = self.config["response_token_cost"] 41 | # The temperature is defined as the randomness of the model's output. 42 | self.temperature: float = self.config["temperature"] 43 | # Top K sampling. 44 | self.top_k: int = self.config["top_k"] 45 | # The maximum number of tokens to generate in the chat completion. 46 | self.max_tokens: int = self.config["max_tokens"] 47 | 48 | # Important: must be done before importing transformers 49 | os.environ["TRANSFORMERS_CACHE"] = self.config["cache_dir"] 50 | import transformers 51 | 52 | hf_model_id = f"meta-llama/{self.model_id}" 53 | model_config = transformers.AutoConfig.from_pretrained(hf_model_id) 54 | bnb_config = transformers.BitsAndBytesConfig( 55 | load_in_4bit=True, 56 | bnb_4bit_quant_type="nf4", 57 | bnb_4bit_use_double_quant=True, 58 | bnb_4bit_compute_dtype=torch.bfloat16, 59 | ) 60 | 61 | self.tokenizer = transformers.AutoTokenizer.from_pretrained(hf_model_id) 62 | self.model = transformers.AutoModelForCausalLM.from_pretrained( 63 | hf_model_id, 64 | trust_remote_code=True, 65 | config=model_config, 66 | quantization_config=bnb_config, 67 | device_map="auto", 68 | ) 69 | self.model.eval() 70 | torch.no_grad() 71 | 72 | self.generate_text = transformers.pipeline( 73 | model=self.model, tokenizer=self.tokenizer, task="text-generation" 74 | ) 75 | 76 | def query(self, query: str, num_responses: int = 1) -> List[Dict]: 77 | """ 78 | Query the LLaMA 2 model for responses. 79 | 80 | :param query: The query to be posed to the language model. 81 | :type query: str 82 | :param num_responses: Number of desired responses, default is 1. 83 | :type num_responses: int 84 | :return: Response(s) from the LLaMA 2 model. 85 | :rtype: List[Dict] 86 | """ 87 | if self.cache and query in self.respone_cache: 88 | return self.respone_cache[query] 89 | sequences = [] 90 | query = f"<>You are a helpful assistant. Always follow the intstructions precisely and output the response exactly in the requested format.<>\n\n[INST] {query} [/INST]" 91 | for _ in range(num_responses): 92 | sequences.extend( 93 | self.generate_text( 94 | query, 95 | do_sample=True, 96 | top_k=self.top_k, 97 | num_return_sequences=1, 98 | eos_token_id=self.tokenizer.eos_token_id, 99 | max_length=self.max_tokens, 100 | ) 101 | ) 102 | response = [ 103 | {"generated_text": sequence["generated_text"][len(query) :].strip()} 104 | for sequence in sequences 105 | ] 106 | if self.cache: 107 | self.respone_cache[query] = response 108 | return response 109 | 110 | def get_response_texts(self, query_responses: List[Dict]) -> List[str]: 111 | """ 112 | Extract the response texts from the query response. 113 | 114 | :param query_responses: The response list of dictionaries generated from the `query` method. 115 | :type query_responses: List[Dict] 116 | :return: List of response strings. 117 | :rtype: List[str] 118 | """ 119 | return [query_response["generated_text"] for query_response in query_responses] 120 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/operations/README.md: -------------------------------------------------------------------------------- 1 | # Operations 2 | 3 | The Operations module contains operations to manipulate and process thoughts represented by the [Thought](thought.py) class. 4 | Operations interface with a language model and use other helper classes like [Prompter](../prompter/prompter.py) and [Parser](../parser/parser.py) for effective communication and extraction of results from the language model. 5 | The [Graph of Operations](graph_of_operations.py) class is the main class of the module and is responsible for orchestrating the operations, defining their relationships and maintaining the state of the thought graph, also known as Graph Reasoning State. 6 | 7 | ## Graph of Operations 8 | The [GraphOfOperations](graph_of_operations.py) class facilitates the creation and management of a directed graph representing the sequence and interrelationships of operations on thoughts. Here’s how you can construct and work with the Graph of Operations: 9 | 10 | ### Initialization 11 | Creating a new instance of GraphOfOperations: 12 | 13 | ```python 14 | from graph_of_thoughts.operations import GraphOfOperations 15 | 16 | graph = GraphOfOperations() 17 | ``` 18 | 19 | Upon initialization, the graph will be empty with no operations, roots, or leaves. 20 | 21 | ### Adding Operations 22 | **Append Operation:** You can append operations to the end of the graph using the append_operation method. This ensures that the operation becomes a successor to all current leaf operations in the graph. 23 | ```python 24 | from graph_of_thoughts.operations import Generate 25 | 26 | operationA = Generate() 27 | graph.append_operation(operationA) 28 | ``` 29 | **Add Operation with Relationships:** If you want to define specific relationships for an operation, use the add_operation method. 30 | ```python 31 | operationB = Generate() 32 | operationB.predecessors.append(operationA) 33 | graph.add_operation(operationB) 34 | ``` 35 | Remember to set up the predecessors (and optionally successors) for your operation before adding it to the graph. 36 | 37 | ## Available Operations 38 | The following operations are available in the module: 39 | 40 | **Score:** Collect all thoughts from preceding operations and score them either using the LLM or a custom scoring function. 41 | - num_samples (Optional): The number of samples to use for scoring, defaults to 1. 42 | - combined_scoring (Optional): Whether to score all thoughts together in a single prompt or separately, defaults to False. 43 | - scoring_function (Optional): A function that takes in a list of thought states and returns a list of scores for each thought. 44 | 45 | **ValidateAndImprove:** For each thought, validate it and if it is invalid, improve it. 46 | - num_samples (Optional): The number of samples to use for validation, defaults to 1. 47 | - improve (Optional): Whether to improve the thought if it is invalid, defaults to True. 48 | - num_tries (Optional): The number of times to try improving the thought, before giving up, defaults to 3. 49 | - validate_function (Optional): A function that takes in a thought state and returns a boolean indicating whether the thought is valid. 50 | 51 | **Generate:** Generate new thoughts from the current thoughts. If no previous thoughts are available, the thoughts are initialized with the input to the [Controller](../controller/controller.py). 52 | - num_branches_prompt (Optional): Number of responses that each prompt should generate (passed to prompter). Defaults to 1. 53 | - num_branches_response (Optional): Number of responses the LLM should generate for each prompt. Defaults to 1. 54 | 55 | **Improve:** Improve the current thoughts. This operation is similar to the ValidateAndImprove operation, but it does not validate the thoughts and always tries to improve them. 56 | 57 | **Aggregate:** Aggregate the current thoughts into a single thought. This operation is useful when you want to combine multiple thoughts into a single thought. 58 | - num_responses (Optional): Number of responses to request from the LLM (generates multiple new thoughts). Defaults to 1. 59 | 60 | **KeepBestN:** Keep the best N thoughts from the preceding thoughts. Assumes that the thoughts are already scored and throws an error if they are not. 61 | - n: The number of thoughts to keep in order of score. 62 | - higher_is_better (Optional): Whether higher scores are better (True) or lower scores are better (False). Defaults to True. 63 | 64 | **KeepValid:** Keep only the valid thoughts from the preceding thoughts. Assumes that each thought has already been validated, if not, it will be considered valid. 65 | 66 | **Selector:** Select a number of thoughts from the preceding thoughts using a selection function. This is useful if subsequent operations should only be applied to a subset of the preceding thoughts. 67 | - selector: A function that takes in a list of thoughts and returns a list of thoughts to select. 68 | 69 | **GroundTruth**: Evaluates if the preceding/current thoughts solve the problem and equal the ground truth. This operation is useful for terminating the graph and checking if the final thoughts solve the problem, but is only useful if the ground truth is known. 70 | - ground_truth_evaluator: A function that takes in a thought state and returns a boolean indicating whether the thought solves the problem. 71 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/operations/__init__.py: -------------------------------------------------------------------------------- 1 | from .thought import Thought 2 | from .graph_of_operations import GraphOfOperations 3 | from .operations import ( 4 | Operation, 5 | Score, 6 | ValidateAndImprove, 7 | Generate, 8 | Aggregate, 9 | KeepBestN, 10 | KeepValid, 11 | Selector, 12 | GroundTruth, 13 | Improve, 14 | ) 15 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/operations/graph_of_operations.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 ETH Zurich. 2 | # All rights reserved. 3 | # 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | # 7 | # main author: Nils Blach 8 | 9 | from __future__ import annotations 10 | from typing import List 11 | 12 | from graph_of_thoughts.operations.operations import Operation 13 | 14 | 15 | class GraphOfOperations: 16 | """ 17 | Represents the Graph of Operations, which prescribes the execution plan of thought operations. 18 | """ 19 | 20 | def __init__(self) -> None: 21 | """ 22 | Initializes a new Graph of Operations instance with empty operations, roots, and leaves. 23 | The roots are the entry points in the graph with no predecessors. 24 | The leaves are the exit points in the graph with no successors. 25 | """ 26 | self.operations: List[Operation] = [] 27 | self.roots: List[Operation] = [] 28 | self.leaves: List[Operation] = [] 29 | 30 | def append_operation(self, operation: Operation) -> None: 31 | """ 32 | Appends an operation to all leaves in the graph and updates the relationships. 33 | 34 | :param operation: The operation to append. 35 | :type operation: Operation 36 | """ 37 | self.operations.append(operation) 38 | 39 | if len(self.roots) == 0: 40 | self.roots = [operation] 41 | else: 42 | for leave in self.leaves: 43 | leave.add_successor(operation) 44 | 45 | self.leaves = [operation] 46 | 47 | def add_operation(self, operation: Operation) -> None: 48 | """ 49 | Add an operation to the graph considering its predecessors and successors. 50 | Adjust roots and leaves based on the added operation's position within the graph. 51 | 52 | :param operation: The operation to add. 53 | :type operation: Operation 54 | """ 55 | self.operations.append(operation) 56 | if len(self.roots) == 0: 57 | self.roots = [operation] 58 | self.leaves = [operation] 59 | assert ( 60 | len(operation.predecessors) == 0 61 | ), "First operation should have no predecessors" 62 | else: 63 | if len(operation.predecessors) == 0: 64 | self.roots.append(operation) 65 | for predecessor in operation.predecessors: 66 | if predecessor in self.leaves: 67 | self.leaves.remove(predecessor) 68 | if len(operation.successors) == 0: 69 | self.leaves.append(operation) 70 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/operations/thought.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 ETH Zurich. 2 | # All rights reserved. 3 | # 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | # 7 | # main author: Nils Blach 8 | 9 | from __future__ import annotations 10 | import logging 11 | from typing import Iterator, Dict, Optional 12 | import itertools 13 | 14 | 15 | class Thought: 16 | """ 17 | Represents an LLM thought with its state, constructed by the parser, and various flags. 18 | """ 19 | 20 | _ids: Iterator[int] = itertools.count(0) 21 | 22 | def __init__(self, state: Optional[Dict] = None) -> None: 23 | """ 24 | Initializes a new Thought instance with a state and various default flags. 25 | 26 | :param state: The state of the thought. Defaults to None. 27 | :type state: Optional[Dict] 28 | """ 29 | self.logger: logging.Logger = logging.getLogger(self.__class__.__name__) 30 | self.id: int = next(Thought._ids) 31 | self.state: Dict = state 32 | self._score: float = 0.0 33 | self._valid: bool = False 34 | self._solved: bool = False 35 | self.scored: bool = False 36 | self.validated: bool = False 37 | self.compared_to_ground_truth: bool = False 38 | 39 | @staticmethod 40 | def from_thought(thought: Thought) -> Thought: 41 | """ 42 | Creates a new thought from an existing one. 43 | 44 | :param thought: An instance of a Thought to clone. 45 | :return: A new Thought instance with properties copied from the input thought. 46 | """ 47 | new_thought = Thought(thought.state) 48 | new_thought.score = thought.score 49 | new_thought.valid = thought.valid 50 | new_thought.solved = thought.solved 51 | new_thought.scored = thought.scored 52 | new_thought.validated = thought.validated 53 | new_thought.compared_to_ground_truth = thought.compared_to_ground_truth 54 | return new_thought 55 | 56 | @property 57 | def valid(self) -> bool: 58 | """ 59 | Returns the validity of the thought. 60 | 61 | :return: The validity of the thought. 62 | :rtype: bool 63 | """ 64 | return self._valid 65 | 66 | @valid.setter 67 | def valid(self, valid: bool) -> None: 68 | """ 69 | Sets the validity of the thought and the validated flag. 70 | 71 | :param valid: The validity of the thought. 72 | :type valid: bool 73 | """ 74 | self.validated = True 75 | self._valid = valid 76 | 77 | @property 78 | def score(self) -> float: 79 | """ 80 | Returns the score of the thought. 81 | 82 | :return: The score of the thought. 83 | :rtype: float 84 | """ 85 | return self._score 86 | 87 | @score.setter 88 | def score(self, new_score: float) -> None: 89 | """ 90 | Sets the score of the thought and the scored flag. 91 | 92 | :param new_score: The score of the thought. 93 | :type new_score: float 94 | """ 95 | self.scored = True 96 | self._score = new_score 97 | 98 | @property 99 | def solved(self) -> bool: 100 | """ 101 | Returns the solved flag of the thought. 102 | 103 | :return: The solved flag of the thought. 104 | :rtype: bool 105 | """ 106 | return self._solved 107 | 108 | @solved.setter 109 | def solved(self, solved: bool) -> None: 110 | """ 111 | Sets the solved flag of the thought and the compared_to_ground_truth flag. 112 | 113 | :param solved: Whether the thought contains a solution to the problem. 114 | :type solved: bool 115 | """ 116 | self.compared_to_ground_truth = True 117 | self._solved = solved 118 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/parser/__init__.py: -------------------------------------------------------------------------------- 1 | from .parser import Parser 2 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/parser/parser.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 ETH Zurich. 2 | # All rights reserved. 3 | # 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | # 7 | # main authors: Robert Gerstenberger, Nils Blach 8 | 9 | from __future__ import annotations 10 | from abc import ABC, abstractmethod 11 | from typing import Dict, List, Union 12 | 13 | 14 | class Parser(ABC): 15 | """ 16 | Abstract base class that defines the interface for all parsers. 17 | Parsers are used to parse the responses from the language models. 18 | """ 19 | 20 | @abstractmethod 21 | def parse_aggregation_answer( 22 | self, states: List[Dict], texts: List[str] 23 | ) -> Union[Dict, List[Dict]]: 24 | """ 25 | Parse the response from the language model for a aggregation prompt. 26 | 27 | :param states: The thought states used to generate the prompt. 28 | :type states: List[Dict] 29 | :param texts: The responses to the prompt from the language model. 30 | :type texts: List[str] 31 | :return: The new thought states after parsing the response from the language model. 32 | :rtype: Union[Dict, List[Dict]] 33 | """ 34 | pass 35 | 36 | @abstractmethod 37 | def parse_improve_answer(self, state: Dict, texts: List[str]) -> Dict: 38 | """ 39 | Parse the response from the language model for an improve prompt. 40 | 41 | :param state: The thought state used to generate the prompt. 42 | :type state: Dict 43 | :param texts: The responses to the prompt from the language model. 44 | :type texts: List[str] 45 | :return: The new thought state after parsing the response from the language model. 46 | :rtype: Dict 47 | """ 48 | pass 49 | 50 | @abstractmethod 51 | def parse_generate_answer(self, state: Dict, texts: List[str]) -> List[Dict]: 52 | """ 53 | Parse the response from the language model for a generate prompt. 54 | 55 | :param state: The thought state used to generate the prompt. 56 | :type state: Dict 57 | :param texts: The responses to the prompt from the language model. 58 | :type texts: List[str] 59 | :return: The new thought states after parsing the response from the language model. 60 | :rtype: List[Dict] 61 | """ 62 | pass 63 | 64 | @abstractmethod 65 | def parse_validation_answer(self, state: Dict, texts: List[str]) -> bool: 66 | """ 67 | Parse the response from the language model for a validation prompt. 68 | 69 | :param state: The thought state used to generate the prompt. 70 | :type state: Dict 71 | :param texts: The responses to the prompt from the language model. 72 | :type texts: List[str] 73 | :return: Whether the thought state is valid or not. 74 | :rtype: bool 75 | """ 76 | pass 77 | 78 | @abstractmethod 79 | def parse_score_answer(self, states: List[Dict], texts: List[str]) -> List[float]: 80 | """ 81 | Parse the response from the language model for a score prompt. 82 | 83 | :param states: The thought states used to generate the prompt. 84 | :type states: List[Dict] 85 | :param texts: The responses to the prompt from the language model. 86 | :type texts: List[str] 87 | :return: The scores for the thought states. 88 | :rtype: List[float] 89 | """ 90 | pass 91 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/prompter/__init__.py: -------------------------------------------------------------------------------- 1 | from .prompter import Prompter 2 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/prompter/prompter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 ETH Zurich. 2 | # All rights reserved. 3 | # 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | # 7 | # main authors: Robert Gerstenberger, Nils Blach 8 | 9 | from __future__ import annotations 10 | from abc import ABC, abstractmethod 11 | from typing import Dict, List 12 | 13 | 14 | class Prompter(ABC): 15 | """ 16 | Abstract base class that defines the interface for all prompters. 17 | Prompters are used to generate the prompts for the language models. 18 | """ 19 | 20 | @abstractmethod 21 | def aggregation_prompt(self, state_dicts: List[Dict], **kwargs) -> str: 22 | """ 23 | Generate a aggregation prompt for the language model. 24 | 25 | :param state_dicts: The thought states that should be aggregated. 26 | :type state_dicts: List[Dict] 27 | :param kwargs: Additional keyword arguments. 28 | :return: The aggregation prompt. 29 | :rtype: str 30 | """ 31 | pass 32 | 33 | @abstractmethod 34 | def improve_prompt(self, **kwargs) -> str: 35 | """ 36 | Generate an improve prompt for the language model. 37 | The thought state is unpacked to allow for additional keyword arguments 38 | and concrete implementations to specify required arguments explicitly. 39 | 40 | :param kwargs: Additional keyword arguments. 41 | :return: The improve prompt. 42 | :rtype: str 43 | """ 44 | pass 45 | 46 | @abstractmethod 47 | def generate_prompt(self, num_branches: int, **kwargs) -> str: 48 | """ 49 | Generate a generate prompt for the language model. 50 | The thought state is unpacked to allow for additional keyword arguments 51 | and concrete implementations to specify required arguments explicitly. 52 | 53 | :param num_branches: The number of responses the prompt should ask the LM to generate. 54 | :type num_branches: int 55 | :param kwargs: Additional keyword arguments. 56 | :return: The generate prompt. 57 | :rtype: str 58 | """ 59 | pass 60 | 61 | @abstractmethod 62 | def validation_prompt(self, **kwargs) -> str: 63 | """ 64 | Generate a validation prompt for the language model. 65 | The thought state is unpacked to allow for additional keyword arguments 66 | and concrete implementations to specify required arguments explicitly. 67 | 68 | :param kwargs: Additional keyword arguments. 69 | :return: The validation prompt. 70 | :rtype: str 71 | """ 72 | pass 73 | 74 | @abstractmethod 75 | def score_prompt(self, state_dicts: List[Dict], **kwargs) -> str: 76 | """ 77 | Generate a score prompt for the language model. 78 | 79 | :param state_dicts: The thought states that should be scored, 80 | if more than one, they should be scored together. 81 | :type state_dicts: List[Dict] 82 | :param kwargs: Additional keyword arguments. 83 | :return: The score prompt. 84 | :rtype: str 85 | """ 86 | pass 87 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/vector_db/__init__.py: -------------------------------------------------------------------------------- 1 | from .azure_embedding import * 2 | from .weaviate import * 3 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/graph_of_thoughts/vector_db/azure_embedding.py: -------------------------------------------------------------------------------- 1 | from openai import AzureOpenAI 2 | 3 | def get_embedding(api_key,api_base,api_version,deployment_name, text_to_embed): 4 | client = AzureOpenAI( 5 | api_key=api_key, 6 | api_version=api_version, 7 | azure_endpoint = api_base 8 | ) 9 | response = client.embeddings.create( 10 | model=deployment_name, 11 | input=text_to_embed 12 | ) 13 | 14 | return response.data[0].embedding -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/graph_of_thoughts/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "graph_of_thoughts" 7 | version = "0.0.3" 8 | authors = [ 9 | { name="Maciej Besta", email="maciej.besta@inf.ethz.ch" }, 10 | { name="Nils Blach", email="nils.blach@inf.ethz.ch" }, 11 | { name="Ales Kubicek", email="akubicek@student.ethz.ch" }, 12 | { name="Robert Gerstenberger", email="gerstenberger.robert@gmail.com" }, 13 | ] 14 | description = "Python package for Graph of Thoughts that enables solving elaborate problems with Large Language Models" 15 | readme = "README.md" 16 | license = {file = "LICENSE"} 17 | requires-python = ">=3.8" 18 | classifiers = [ 19 | "Programming Language :: Python :: 3", 20 | "Operating System :: OS Independent", 21 | ] 22 | dependencies = [ 23 | "backoff>=2.2.1,<3.0.0", 24 | "openai>=1.0.0,<2.0.0", 25 | "matplotlib>=3.7.1,<4.0.0", 26 | "numpy>=1.24.3,<2.0.0", 27 | "pandas>=2.0.3,<3.0.0", 28 | "sympy>=1.12,<2.0", 29 | "torch>=2.0.1,<3.0.0", 30 | "transformers>=4.31.0,<5.0.0", 31 | "accelerate>=0.21.0,<1.0.0", 32 | "bitsandbytes>=0.41.0,<1.0.0", 33 | "scipy>=1.10.1,<2.0.0", 34 | "weaviate-client>=4.4.4" 35 | ] 36 | 37 | [project.urls] 38 | Homepage = "https://github.com/spcl/graph-of-thoughts" 39 | 40 | [project.scripts] 41 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Backend/routes.rest: -------------------------------------------------------------------------------- 1 | @host = localhost 2 | @port = 5050 3 | @base_url = http://{{host}}:{{port}} 4 | 5 | POST {{base_url}}/api/v1/configs 6 | Content-Type: application/json 7 | 8 | { 9 | "service": "openai", 10 | "key": "apikey" 11 | } 12 | 13 | ### 14 | GET {{base_url}} 15 | 16 | ### 17 | GET {{base_url}}/openai/v1/models 18 | 19 | ### 20 | POST {{base_url}}/openai/v1/chat/completions 21 | Content-Type: application/json 22 | 23 | { 24 | "model" : "gpt-3.5-turbo", 25 | "messages" : [{"role": "user", "content": "hello there"}] 26 | } 27 | 28 | ### 29 | POST {{base_url}}/execapi/v1/executions 30 | Content-Type: application/json 31 | 32 | { 33 | "src_code": "with open('test.txt', 'w') as f:\n f.write('hello there')\n f.close()" 34 | } 35 | 36 | ### 37 | GET {{base_url}}/execapi/v1/executions/13 38 | 39 | ### 40 | GET {{base_url}}/execapi/v1/packages 41 | 42 | ### 43 | 44 | POST {{base_url}}/execapi/v1/packages 45 | Content-Type: application/json 46 | 47 | { 48 | "packages": ["streamline"] 49 | } 50 | 51 | ### 52 | 53 | POST {{base_url}}/chatapi/v1/chats 54 | Content-Type: application/json 55 | 56 | { 57 | "title": "mytestchat" 58 | } 59 | 60 | ### 61 | 62 | GET {{base_url}}/chatapi/v1/chats/1 63 | 64 | ### 65 | DELETE {{base_url}}/chatapi/v1/chats/1 66 | 67 | ### 68 | 69 | PATCH {{base_url}}/chatapi/v1/chats/1 70 | Content-Type: application/json 71 | 72 | { 73 | "title": "mynewtitle" 74 | } 75 | 76 | ### 77 | 78 | POST {{base_url}}/chatapi/v1/chats/1/chatlogs 79 | Content-Type: application/json 80 | 81 | { 82 | "message": "hello there chat #4", 83 | "message_type": "message_type", 84 | "who": "user" 85 | } 86 | 87 | ### 88 | 89 | PATCH {{base_url}}/chatapi/v1/chats/4/chatlogs/7 90 | Content-Type: application/json 91 | 92 | { 93 | "message": "hello there 4 updated", 94 | "message_type": "message_type", 95 | "who": "user", 96 | "src_code": "print('hello world')" 97 | } 98 | 99 | ### 100 | 101 | GET {{base_url}}/chatapi/v1/chats/1/chatlogs 102 | 103 | 104 | # return all chat ids 105 | ### 106 | GET {{base_url}}/chatapi/v1/chats/chatids 107 | 108 | 109 | 110 | 111 | 112 | ### 113 | # /chattitle/ 114 | GET {{base_url}}/chatapi/v1/chattitle/1 115 | 116 | ### 117 | PATCH {{base_url}}/chatapi/v1/chattitle/1 118 | Content-Type: application/json 119 | 120 | { 121 | "title": "testtitle" 122 | } -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/.env.sample: -------------------------------------------------------------------------------- 1 | PORT=3000 2 | SKIP_PREFLIGHT_CHECK=true 3 | REACT_APP_API_URL=http://127.0.0.1 4 | REACT_APP_API_PORT=5050 -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # production 12 | /build 13 | 14 | # misc 15 | .DS_Store 16 | .env.local 17 | .env.development.local 18 | .env.test.local 19 | .env.production.local 20 | 21 | npm-debug.log* 22 | yarn-debug.log* 23 | yarn-error.log* 24 | 25 | public/gotdata/* 26 | !public/gotdata/dataset.json.sample 27 | 28 | # Ignore specific files 29 | AISVGLogo.js 30 | ChatMessage.js 31 | FileUploadMultiple.js 32 | sample.js 33 | TestContent.js 34 | TestPage.js 35 | sample.json 36 | setupTests.js 37 | 38 | # Ignore entire directory 39 | hooks/ 40 | 41 | # not to ignore /dist 42 | !dist/ 43 | 44 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:lts-bookworm-slim 2 | 3 | WORKDIR /app/KRAGEN_Dashboard/Frontend 4 | 5 | COPY . /app 6 | 7 | RUN npm install 8 | 9 | EXPOSE 3000 10 | 11 | CMD ["npm", "start"] 12 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/README.md: -------------------------------------------------------------------------------- 1 | # KRAGEN Dashboard: Integrating Sigma.js with React for Enhanced Graph Visualization 2 | 3 | The KRAGEN Dashboard project is designed as a comprehensive application intended for practical use, leveraging the capabilities of sigma.js for graph visualization. It was initiated using Create React App, a widely recognized setup for creating React applications. To integrate sigma.js, a powerful library for graph drawing, with the React ecosystem, the project employs react-sigma-v2. This approach allows for the development of an interactive and visually appealing interface for representing complex data in a graph format, making it accessible and manageable within a React-based application framework. 4 | 5 | ## Available Scripts 6 | 7 | In the project directory, you can run: 8 | 9 | ### `npm install` 10 | 11 | Installs the dependencies needed to run the app. 12 | 13 | ### `npm start` 14 | 15 | Runs the app in the development mode.\ 16 | Open [http://localhost:5000](http://localhost:5000) to view it in the browser. 17 | 18 | The page will reload if you make edits.\ 19 | You will also see any lint errors in the console. 20 | 21 | ### `npm test` 22 | 23 | Launches the test runner in the interactive watch mode.\ 24 | See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information. 25 | 26 | ### `npm run build` 27 | 28 | Builds the app for production to the `build` folder.\ 29 | It correctly bundles React in production mode and optimizes the build for the best performance. 30 | 31 | The build is minified and the filenames include the hashes.\ 32 | Your app is ready to be deployed! 33 | 34 | See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information. 35 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sigma-demo", 3 | "version": "0.1.0", 4 | "private": true, 5 | "homepage": "/demo", 6 | "dependencies": { 7 | "@emotion/react": "^11.11.3", 8 | "@emotion/styled": "^11.11.0", 9 | "@monaco-editor/react": "^4.6.0", 10 | "@mui/icons-material": "^5.15.6", 11 | "@mui/material": "^5.15.6", 12 | "@types/lodash": "^4.14.178", 13 | "@types/node": "^17.0.2", 14 | "@types/react": "^17.0.37", 15 | "@types/react-dom": "^17.0.11", 16 | "@xenova/transformers": "^2.14.0", 17 | "babel-loader": "8.2.3", 18 | "graphology": "^0.25.4", 19 | "graphology-layout-forceatlas2": "^0.8.1", 20 | "graphology-types": "^0.24.5", 21 | "lodash": "^4.17.21", 22 | "react": "^17.0.2", 23 | "react-animate-height": "^2.0.23", 24 | "react-dom": "^17.0.2", 25 | "react-icons": "^4.3.1", 26 | "react-resizable": "^3.0.5", 27 | "react-scripts": "^5.0.1", 28 | "react-sigma-v2": "^1.3.0", 29 | "sigma": "latest", 30 | "typescript": "^4.5.4" 31 | }, 32 | "scripts": { 33 | "start": "react-scripts start", 34 | "build": "react-scripts build", 35 | "test": "react-scripts test", 36 | "eject": "react-scripts eject" 37 | }, 38 | "eslintConfig": { 39 | "extends": [ 40 | "react-app", 41 | "react-app/jest" 42 | ], 43 | "rules": { 44 | "react-hooks/exhaustive-deps": "off" 45 | } 46 | }, 47 | "browserslist": { 48 | "production": [ 49 | ">0.2%", 50 | "not dead", 51 | "not op_mini all" 52 | ], 53 | "development": [ 54 | "last 1 chrome version", 55 | "last 1 firefox version", 56 | "last 1 safari version" 57 | ] 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/KRAGEN/149a62d9fe0da6273a86957e5703e7d6aa5c6341/KRAGEN_Dashboard/Frontend/public/favicon.ico -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/answer.svg: -------------------------------------------------------------------------------- 1 | 2 | A 3 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/charttype.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 23 | 25 | image/svg+xml 26 | 28 | 29 | 30 | 31 | 32 | 34 | 54 | 59 | 60 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/company.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 23 | 25 | image/svg+xml 26 | 28 | 29 | 30 | 31 | 32 | 34 | 54 | 59 | 64 | 65 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/concept.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 23 | 25 | image/svg+xml 26 | 28 | 29 | 30 | 31 | 32 | 34 | 54 | 59 | 60 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/field.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 23 | 25 | image/svg+xml 26 | 28 | 29 | 30 | 31 | 32 | 34 | 54 | 59 | 60 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/generator.svg: -------------------------------------------------------------------------------- 1 | 2 | G 3 | 4 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/list.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 23 | 25 | image/svg+xml 26 | 28 | 29 | 30 | 31 | 32 | 34 | 54 | 59 | 60 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/method.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 23 | 25 | image/svg+xml 26 | 28 | 29 | 30 | 31 | 32 | 34 | 54 | 59 | 60 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/organization.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 23 | 25 | image/svg+xml 26 | 28 | 29 | 30 | 31 | 32 | 34 | 54 | 59 | 60 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/person.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 23 | 25 | image/svg+xml 26 | 28 | 29 | 30 | 31 | 32 | 34 | 54 | 59 | 60 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/question.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/selector.svg: -------------------------------------------------------------------------------- 1 | 2 | S 3 | 4 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/technology.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 23 | 25 | image/svg+xml 26 | 28 | 29 | 30 | 31 | 32 | 34 | 54 | 59 | 60 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/tool.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 23 | 25 | image/svg+xml 26 | 28 | 29 | 30 | 31 | 32 | 34 | 54 | 58 | 63 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/images/unknown.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 23 | 25 | image/svg+xml 26 | 28 | 29 | 30 | 31 | 32 | 34 | 54 | 58 | 59 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 18 | kragen 19 | 20 | 21 | 22 |
23 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/ChatGPTForKRAGENLoc/ResizableDiv.js: -------------------------------------------------------------------------------- 1 | import React, { useState } from "react"; 2 | import { useContext } from "react"; 3 | import { AllContext } from "./context/AllContext"; 4 | 5 | const ResizableDiv = () => { 6 | const { gotQuestion, gotAnswer } = useContext(AllContext); 7 | const [size, setSize] = useState({ width: 500, height: 130 }); 8 | 9 | const handleDrag = (e) => { 10 | e.preventDefault(); 11 | const startX = e.clientX; 12 | const startY = e.clientY; 13 | const startWidth = size.width; 14 | const startHeight = size.height; 15 | 16 | const doDrag = (dragEvent) => { 17 | setSize({ 18 | width: startWidth + dragEvent.clientX - startX, 19 | height: startHeight + dragEvent.clientY - startY, 20 | }); 21 | }; 22 | 23 | const stopDrag = () => { 24 | document.removeEventListener("mousemove", doDrag); 25 | document.removeEventListener("mouseup", stopDrag); 26 | }; 27 | 28 | document.addEventListener("mousemove", doDrag); 29 | document.addEventListener("mouseup", stopDrag); 30 | }; 31 | 32 | return ( 33 |
50 |

51 | Question: {gotQuestion} 52 |

53 |

54 | Answer: {gotAnswer} 55 |

56 |
68 |
69 | ); 70 | }; 71 | 72 | export default ResizableDiv; 73 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/ChatGPTForKRAGENLoc/context/AllContext.js: -------------------------------------------------------------------------------- 1 | import {createContext} from 'react'; 2 | 3 | export const AllContext = createContext(null); -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/ChatGPTForKRAGENLoc/context/ThemeContext.js: -------------------------------------------------------------------------------- 1 | import {createContext} from 'react'; 2 | 3 | export const ThemeContext = createContext(null); -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/ChatGPTForKRAGENLoc/context/chatLogContext.js: -------------------------------------------------------------------------------- 1 | import { createContext } from 'react'; 2 | 3 | export const chatLogContext = createContext(); 4 | 5 | export function chatLogProvider({children}){ 6 | 7 | 8 | {children} 9 | 10 | 11 | 12 | } -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/ChatGPTForKRAGENLoc/logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/ChatGPTForKRAGENLoc/reportWebVitals.js: -------------------------------------------------------------------------------- 1 | const reportWebVitals = onPerfEntry => { 2 | if (onPerfEntry && onPerfEntry instanceof Function) { 3 | import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => { 4 | getCLS(onPerfEntry); 5 | getFID(onPerfEntry); 6 | getFCP(onPerfEntry); 7 | getLCP(onPerfEntry); 8 | getTTFB(onPerfEntry); 9 | }); 10 | } 11 | }; 12 | 13 | export default reportWebVitals; 14 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/ChatGPTForKRAGENLoc/useChatData.js: -------------------------------------------------------------------------------- 1 | // useChatData.js 2 | import { useState, useEffect } from "react"; 3 | import { getChatMessageByExperimentId } from "../apiService"; 4 | function useChatData(experimentId) { 5 | const [data, setData] = useState(null); 6 | const [isLoading, setIsLoading] = useState(false); 7 | const [error, setError] = useState(null); 8 | 9 | useEffect(() => { 10 | const fetchData = async () => { 11 | setIsLoading(true); 12 | try { 13 | const result = await getChatMessageByExperimentId(experimentId); 14 | setData(result); 15 | } catch (error) { 16 | setError(error); 17 | } finally { 18 | setIsLoading(false); 19 | } 20 | }; 21 | 22 | if (experimentId) { 23 | fetchData(); 24 | } 25 | }, [experimentId]); 26 | 27 | return { data, isLoading, error }; 28 | } 29 | 30 | export default useChatData; 31 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/DisplayGraph/ClustersPanel.tsx: -------------------------------------------------------------------------------- 1 | import React, { FC, useEffect, useMemo, useState } from "react"; 2 | import { useSigma } from "react-sigma-v2"; 3 | import { sortBy, values, keyBy, mapValues } from "lodash"; 4 | import { MdGroupWork } from "react-icons/md"; 5 | import { AiOutlineCheckCircle, AiOutlineCloseCircle } from "react-icons/ai"; 6 | 7 | import { Cluster, FiltersState } from "./others/types"; 8 | import Panel from "./Panel"; 9 | 10 | const ClustersPanel: FC<{ 11 | clusters: Cluster[]; 12 | filters: FiltersState; 13 | toggleCluster: (cluster: string) => void; 14 | setClusters: (clusters: Record) => void; 15 | }> = ({ clusters, filters, toggleCluster, setClusters }) => { 16 | const sigma = useSigma(); 17 | const graph = sigma.getGraph(); 18 | 19 | const nodesPerCluster = useMemo(() => { 20 | const index: Record = {}; 21 | graph.forEachNode((_, { cluster }) => (index[cluster] = (index[cluster] || 0) + 1)); 22 | return index; 23 | }, []); 24 | 25 | const maxNodesPerCluster = useMemo(() => Math.max(...values(nodesPerCluster)), [nodesPerCluster]); 26 | const visibleClustersCount = useMemo(() => Object.keys(filters.clusters).length, [filters]); 27 | 28 | const [visibleNodesPerCluster, setVisibleNodesPerCluster] = useState>(nodesPerCluster); 29 | useEffect(() => { 30 | // To ensure the graphology instance has up to data "hidden" values for 31 | // nodes, we wait for next frame before reindexing. This won't matter in the 32 | // UX, because of the visible nodes bar width transition. 33 | requestAnimationFrame(() => { 34 | const index: Record = {}; 35 | graph.forEachNode((_, { cluster, hidden }) => !hidden && (index[cluster] = (index[cluster] || 0) + 1)); 36 | setVisibleNodesPerCluster(index); 37 | }); 38 | }, [filters]); 39 | 40 | const sortedClusters = useMemo( 41 | () => sortBy(clusters, (cluster) => -nodesPerCluster[cluster.key]), 42 | [clusters, nodesPerCluster], 43 | ); 44 | 45 | return ( 46 | 49 | Clusters 50 | {visibleClustersCount < clusters.length ? ( 51 | 52 | {" "} 53 | ({visibleClustersCount} / {clusters.length}) 54 | 55 | ) : ( 56 | "" 57 | )} 58 | 59 | } 60 | > 61 |

62 | Click a cluster to show/hide related pages from the network. 63 |

64 |

65 | {" "} 68 | 71 |

72 |
    73 | {sortedClusters.map((cluster) => { 74 | const nodesCount = nodesPerCluster[cluster.key]; 75 | const visibleNodesCount = visibleNodesPerCluster[cluster.key] || 0; 76 | return ( 77 |
  • 1 ? "s" : ""}${ 81 | visibleNodesCount !== nodesCount ? ` (only ${visibleNodesCount} visible)` : "" 82 | }`} 83 | > 84 | toggleCluster(cluster.key)} 88 | id={`cluster-${cluster.key}`} 89 | /> 90 |
  • 105 | ); 106 | })} 107 |
108 |
109 | ); 110 | }; 111 | 112 | export default ClustersPanel; 113 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/DisplayGraph/GraphDataController.tsx: -------------------------------------------------------------------------------- 1 | import { useSigma } from "react-sigma-v2"; 2 | import { FC, useEffect } from "react"; 3 | import { keyBy, omit } from "lodash"; 4 | 5 | import { Dataset, FiltersState } from "./others/types"; 6 | 7 | const GraphDataController: FC<{ dataset: Dataset; filters: FiltersState }> = ({ 8 | dataset, 9 | filters, 10 | children, 11 | }) => { 12 | const sigma = useSigma(); 13 | const graph = sigma.getGraph(); 14 | 15 | /** 16 | * Feed graphology with the new dataset: 17 | */ 18 | useEffect(() => { 19 | if (!graph || !dataset) return; 20 | 21 | const clusters = keyBy(dataset.clusters, "key"); 22 | const tags = keyBy(dataset.tags, "key"); 23 | 24 | dataset.nodes.forEach((node) => 25 | graph.addNode(node.key, { 26 | ...node, 27 | ...omit(clusters[node.cluster], "key"), 28 | image: `${process.env.PUBLIC_URL}/images/${tags[node.tag].image}`, 29 | }) 30 | ); 31 | dataset.edges.forEach(([source, target, edgename]) => 32 | graph.addEdge(source, target, { 33 | size: 5, 34 | label: "Edge:" + " " + edgename, 35 | }) 36 | ); 37 | 38 | // Use degrees as node sizes: 39 | const sizenodes = graph 40 | .nodes() 41 | .map((node) => graph.getNodeAttribute(node, "sizenode")); 42 | const minDegree = Math.min(...sizenodes); 43 | const maxDegree = Math.max(...sizenodes); 44 | const MIN_NODE_SIZE = 3; 45 | const MAX_NODE_SIZE = 30; 46 | graph.forEachNode((node) => 47 | graph.setNodeAttribute( 48 | node, 49 | "size", 50 | // ((graph.getNodeAttribute(node, "score") - minDegree) / 51 | // (maxDegree - minDegree)) * 52 | // (MAX_NODE_SIZE - MIN_NODE_SIZE) + 53 | // MIN_NODE_SIZE 54 | graph.getNodeAttribute(node, "sizenode") / 2 55 | ) 56 | ); 57 | 58 | return () => graph.clear(); 59 | }, [graph, dataset]); 60 | 61 | /** 62 | * Apply filters to graphology: 63 | */ 64 | useEffect(() => { 65 | const { clusters, tags } = filters; 66 | graph.forEachNode((node, { cluster, tag }) => 67 | graph.setNodeAttribute(node, "hidden", !clusters[cluster] || !tags[tag]) 68 | ); 69 | }, [graph, filters]); 70 | 71 | return <>{children}; 72 | }; 73 | 74 | export default GraphDataController; 75 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/DisplayGraph/GraphEventsController.tsx: -------------------------------------------------------------------------------- 1 | import { useRegisterEvents, useSigma } from "react-sigma-v2"; 2 | import { FC, useEffect } from "react"; 3 | 4 | function getMouseLayer() { 5 | return document.querySelector(".sigma-mouse"); 6 | } 7 | 8 | const GraphEventsController: FC<{ 9 | setHoveredNode: (node: string | null) => void; 10 | setHoveredEdge: (edge: string | null) => void; 11 | setHoveredEdgeLabel: (edgeLabel: string | null) => void; 12 | setDescriptionForClickedNode: (description: string) => void; 13 | }> = ({ 14 | setHoveredNode, 15 | setHoveredEdge, 16 | setHoveredEdgeLabel, 17 | setDescriptionForClickedNode, 18 | children, 19 | }) => { 20 | const sigma = useSigma(); 21 | const graph = sigma.getGraph(); 22 | const registerEvents = useRegisterEvents(); 23 | 24 | /** 25 | * Initialize here settings that require to know the graph and/or the sigma 26 | * instance: 27 | */ 28 | useEffect(() => { 29 | registerEvents({ 30 | clickNode({ node }) { 31 | // Check if the 'hidden' attribute of the node is false 32 | if (!graph.getNodeAttribute(node, "hidden")) { 33 | // if node is node_-1, then do not open the node's URL in a new tab 34 | if (node === "node_-1") { 35 | // get entire node information 36 | let nodeName = graph.getNodeAttributes(node); 37 | // please show this json in to console 38 | let nodeThoughts = JSON.stringify(nodeName, null, 2); 39 | 40 | setDescriptionForClickedNode(nodeThoughts); 41 | } else { 42 | // Get the node's label (name) and display it 43 | // let nodeName = graph.getNodeAttribute(node, "thoughts"); 44 | let nodeName = graph.getNodeAttributes(node); 45 | // please show this json in to console 46 | 47 | let nodeThoughts = JSON.stringify(nodeName, null, 2); 48 | // alert("Node thoughts: " + nodeName); 49 | 50 | setDescriptionForClickedNode(nodeThoughts); 51 | } 52 | } 53 | }, 54 | enterNode({ node }) { 55 | setHoveredNode(node); 56 | // TODO: Find a better way to get the DOM mouse layer: 57 | const mouseLayer = getMouseLayer(); 58 | if (mouseLayer) mouseLayer.classList.add("mouse-pointer"); 59 | }, 60 | leaveNode() { 61 | setHoveredNode(null); 62 | // TODO: Find a better way to get the DOM mouse layer: 63 | const mouseLayer = getMouseLayer(); 64 | if (mouseLayer) mouseLayer.classList.remove("mouse-pointer"); 65 | }, 66 | enterEdge({ edge }) { 67 | // setHoveredEdge(edge); 68 | setHoveredEdge(null); 69 | // Handle edge hover enter event 70 | // Example: Highlight the edge, show edge information, etc. 71 | // graph.setEdgeAttribute(edge, "color", "#ff0000"); // Change color to red 72 | graph.setEdgeAttribute(edge, "size", 7); // Increase size for visibility 73 | 74 | // how to get edge information 75 | const edgeData = graph.getEdgeAttributes(edge); 76 | 77 | // If you want to display this label in the UI, you can use a state variable. 78 | // For example: 79 | // setHoveredEdgeLabel(edgeData.label); 80 | setHoveredEdgeLabel(""); 81 | }, 82 | leaveEdge({ edge }) { 83 | setHoveredEdge(null); 84 | setHoveredEdgeLabel(""); 85 | // Handle edge hover leave event 86 | graph.setEdgeAttribute(edge, "color", "#ffffff"); // Change color back to default 87 | graph.setEdgeAttribute(edge, "size", 5); // Revert size to original 88 | }, 89 | clickEdge({ edge }) { 90 | // Handle edge click event 91 | // Example: Show detailed information about the edge, etc. 92 | const edgeData = graph.getEdgeAttributes(edge); 93 | // Set the clicked edge data 94 | // setClickedEdge(edgeData); 95 | }, 96 | }); 97 | }, []); 98 | 99 | return <>{children}; 100 | }; 101 | 102 | export default GraphEventsController; 103 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/DisplayGraph/GraphSettingsController.tsx: -------------------------------------------------------------------------------- 1 | import { useSigma } from "react-sigma-v2"; 2 | import { FC, useEffect } from "react"; 3 | 4 | // import { drawHover } from "../../canvas-utils"; 5 | import drawHover from "./others/canvas-utils"; 6 | // import useDebounce from "../../use-debounce"; 7 | import useDebounce from "./others/use-debounce"; 8 | 9 | const NODE_FADE_COLOR = "#bbb"; 10 | const EDGE_FADE_COLOR = "#eee"; 11 | 12 | const GraphSettingsController: FC<{ hoveredNode: string | null }> = ({ children, hoveredNode }) => { 13 | const sigma = useSigma(); 14 | const graph = sigma.getGraph(); 15 | 16 | // Here we debounce the value to avoid having too much highlights refresh when 17 | // moving the mouse over the graph: 18 | const debouncedHoveredNode = useDebounce(hoveredNode, 40); 19 | 20 | /** 21 | * Initialize here settings that require to know the graph and/or the sigma 22 | * instance: 23 | */ 24 | useEffect(() => { 25 | sigma.setSetting("hoverRenderer", (context, data, settings) => 26 | drawHover(context, { ...sigma.getNodeDisplayData(data.key), ...data }, settings), 27 | ); 28 | }, [sigma, graph]); 29 | 30 | /** 31 | * Update node and edge reducers when a node is hovered, to highlight its 32 | * neighborhood: 33 | */ 34 | useEffect(() => { 35 | const hoveredColor: string = debouncedHoveredNode ? sigma.getNodeDisplayData(debouncedHoveredNode)!.color : ""; 36 | 37 | sigma.setSetting( 38 | "nodeReducer", 39 | debouncedHoveredNode 40 | ? (node, data) => 41 | node === debouncedHoveredNode || 42 | graph.hasEdge(node, debouncedHoveredNode) || 43 | graph.hasEdge(debouncedHoveredNode, node) 44 | ? { ...data, zIndex: 1 } 45 | : { ...data, zIndex: 0, label: "", color: NODE_FADE_COLOR, image: null, highlighted: false } 46 | : null, 47 | ); 48 | sigma.setSetting( 49 | "edgeReducer", 50 | debouncedHoveredNode 51 | ? (edge, data) => 52 | graph.hasExtremity(edge, debouncedHoveredNode) 53 | ? { ...data, color: hoveredColor, size: 4 } 54 | : { ...data, color: EDGE_FADE_COLOR, hidden: true } 55 | : null, 56 | ); 57 | }, [debouncedHoveredNode]); 58 | 59 | return <>{children}; 60 | }; 61 | 62 | export default GraphSettingsController; 63 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/DisplayGraph/GraphTitle.tsx: -------------------------------------------------------------------------------- 1 | import React, { FC, useEffect, useState } from "react"; 2 | import { useSigma } from "react-sigma-v2"; 3 | 4 | import { FiltersState } from "./others/types"; 5 | 6 | function prettyPercentage(val: number): string { 7 | return (val * 100).toFixed(1) + "%"; 8 | } 9 | 10 | const GraphTitle: FC<{ filters: FiltersState }> = ({ filters }) => { 11 | const sigma = useSigma(); 12 | const graph = sigma.getGraph(); 13 | 14 | const [visibleItems, setVisibleItems] = useState<{ 15 | nodes: number; 16 | edges: number; 17 | }>({ nodes: 0, edges: 0 }); 18 | useEffect(() => { 19 | // To ensure the graphology instance has up to data "hidden" values for 20 | // nodes, we wait for next frame before reindexing. This won't matter in the 21 | // UX, because of the visible nodes bar width transition. 22 | requestAnimationFrame(() => { 23 | const index = { nodes: 0, edges: 0 }; 24 | graph.forEachNode((_, { hidden }) => !hidden && index.nodes++); 25 | graph.forEachEdge( 26 | (_, _2, _3, _4, source, target) => 27 | !source.hidden && !target.hidden && index.edges++ 28 | ); 29 | setVisibleItems(index); 30 | }); 31 | }, [filters]); 32 | 33 | return ( 34 |
35 |

A cartography of Wikipedia pages around data visualization

36 |

37 | 38 | {graph.order} node{graph.order > 1 ? "s" : ""}{" "} 39 | {visibleItems.nodes !== graph.order 40 | ? ` (only ${prettyPercentage( 41 | visibleItems.nodes / graph.order 42 | )} visible)` 43 | : ""} 44 | , {graph.size} edge 45 | {graph.size > 1 ? "s" : ""}{" "} 46 | {visibleItems.edges !== graph.size 47 | ? ` (only ${prettyPercentage( 48 | visibleItems.edges / graph.size 49 | )} visible)` 50 | : ""} 51 | 52 |

53 |
54 | ); 55 | }; 56 | 57 | export default GraphTitle; 58 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/DisplayGraph/Panel.tsx: -------------------------------------------------------------------------------- 1 | import React, { FC, useEffect, useRef, useState } from "react"; 2 | import { MdExpandLess, MdExpandMore } from "react-icons/md"; 3 | import AnimateHeight from "react-animate-height"; 4 | 5 | const DURATION = 300; 6 | 7 | const Panel: FC<{ title: JSX.Element | string; initiallyDeployed?: boolean }> = ({ 8 | title, 9 | initiallyDeployed, 10 | children, 11 | }) => { 12 | const [isDeployed, setIsDeployed] = useState(initiallyDeployed || false); 13 | const dom = useRef(null); 14 | 15 | useEffect(() => { 16 | if (isDeployed) 17 | setTimeout(() => { 18 | if (dom.current) dom.current.parentElement!.scrollTo({ top: dom.current.offsetTop - 5, behavior: "smooth" }); 19 | }, DURATION); 20 | }, [isDeployed]); 21 | 22 | return ( 23 |
24 |

25 | {title}{" "} 26 | 29 |

30 | 31 | {children} 32 | 33 |
34 | ); 35 | }; 36 | 37 | export default Panel; 38 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/DisplayGraph/SearchField.tsx: -------------------------------------------------------------------------------- 1 | import React, { KeyboardEvent, ChangeEvent, FC, useEffect, useState } from "react"; 2 | import { useSigma } from "react-sigma-v2"; 3 | import { Attributes } from "graphology-types"; 4 | import { BsSearch } from "react-icons/bs"; 5 | 6 | import { FiltersState } from "./others/types"; 7 | 8 | /** 9 | * This component is basically a fork from React-sigma-v2's SearchControl 10 | * component, to get some minor adjustments: 11 | * 1. We need to hide hidden nodes from results 12 | * 2. We need custom markup 13 | */ 14 | const SearchField: FC<{ filters: FiltersState }> = ({ filters }) => { 15 | const sigma = useSigma(); 16 | 17 | const [search, setSearch] = useState(""); 18 | const [values, setValues] = useState>([]); 19 | const [selected, setSelected] = useState(null); 20 | 21 | const refreshValues = () => { 22 | const newValues: Array<{ id: string; label: string }> = []; 23 | const lcSearch = search.toLowerCase(); 24 | if (!selected && search.length > 1) { 25 | sigma.getGraph().forEachNode((key: string, attributes: Attributes): void => { 26 | if (!attributes.hidden && attributes.label && attributes.label.toLowerCase().indexOf(lcSearch) === 0) 27 | newValues.push({ id: key, label: attributes.label }); 28 | }); 29 | } 30 | setValues(newValues); 31 | }; 32 | 33 | // Refresh values when search is updated: 34 | useEffect(() => refreshValues(), [search]); 35 | 36 | // Refresh values when filters are updated (but wait a frame first): 37 | useEffect(() => { 38 | requestAnimationFrame(refreshValues); 39 | }, [filters]); 40 | 41 | useEffect(() => { 42 | if (!selected) return; 43 | 44 | sigma.getGraph().setNodeAttribute(selected, "highlighted", true); 45 | const nodeDisplayData = sigma.getNodeDisplayData(selected); 46 | 47 | if (nodeDisplayData) 48 | sigma.getCamera().animate( 49 | { ...nodeDisplayData, ratio: 0.05 }, 50 | { 51 | duration: 600, 52 | }, 53 | ); 54 | 55 | return () => { 56 | sigma.getGraph().setNodeAttribute(selected, "highlighted", false); 57 | }; 58 | }, [selected]); 59 | 60 | const onInputChange = (e: ChangeEvent) => { 61 | const searchString = e.target.value; 62 | const valueItem = values.find((value) => value.label === searchString); 63 | if (valueItem) { 64 | setSearch(valueItem.label); 65 | setValues([]); 66 | setSelected(valueItem.id); 67 | } else { 68 | setSelected(null); 69 | setSearch(searchString); 70 | } 71 | }; 72 | 73 | const onKeyPress = (e: KeyboardEvent) => { 74 | if (e.key === "Enter" && values.length) { 75 | setSearch(values[0].label); 76 | setSelected(values[0].id); 77 | } 78 | }; 79 | 80 | return ( 81 |
82 | 90 | 91 | 92 | {values.map((value: { id: string; label: string }) => ( 93 | 96 | ))} 97 | 98 |
99 | ); 100 | }; 101 | 102 | export default SearchField; 103 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/DisplayGraph/TagsPanel.tsx: -------------------------------------------------------------------------------- 1 | import React, { FC, useEffect, useMemo, useState } from "react"; 2 | import { useSigma } from "react-sigma-v2"; 3 | import { MdCategory } from "react-icons/md"; 4 | import { keyBy, mapValues, sortBy, values } from "lodash"; 5 | import { AiOutlineCheckCircle, AiOutlineCloseCircle } from "react-icons/ai"; 6 | 7 | import { FiltersState, Tag } from "./others/types"; 8 | import Panel from "./Panel"; 9 | 10 | const TagsPanel: FC<{ 11 | tags: Tag[]; 12 | filters: FiltersState; 13 | toggleTag: (tag: string) => void; 14 | setTags: (tags: Record) => void; 15 | }> = ({ tags, filters, toggleTag, setTags }) => { 16 | const sigma = useSigma(); 17 | const graph = sigma.getGraph(); 18 | 19 | const nodesPerTag = useMemo(() => { 20 | const index: Record = {}; 21 | graph.forEachNode((_, { tag }) => (index[tag] = (index[tag] || 0) + 1)); 22 | return index; 23 | }, []); 24 | 25 | const maxNodesPerTag = useMemo(() => Math.max(...values(nodesPerTag)), [nodesPerTag]); 26 | const visibleTagsCount = useMemo(() => Object.keys(filters.tags).length, [filters]); 27 | 28 | const [visibleNodesPerTag, setVisibleNodesPerTag] = useState>(nodesPerTag); 29 | useEffect(() => { 30 | // To ensure the graphology instance has up to data "hidden" values for 31 | // nodes, we wait for next frame before reindexing. This won't matter in the 32 | // UX, because of the visible nodes bar width transition. 33 | requestAnimationFrame(() => { 34 | const index: Record = {}; 35 | graph.forEachNode((_, { tag, hidden }) => !hidden && (index[tag] = (index[tag] || 0) + 1)); 36 | setVisibleNodesPerTag(index); 37 | }); 38 | }, [filters]); 39 | 40 | const sortedTags = useMemo( 41 | () => sortBy(tags, (tag) => (tag.key === "unknown" ? Infinity : -nodesPerTag[tag.key])), 42 | [tags, nodesPerTag], 43 | ); 44 | 45 | return ( 46 | 49 | Categories 50 | {visibleTagsCount < tags.length ? ( 51 | 52 | {" "} 53 | ({visibleTagsCount} / {tags.length}) 54 | 55 | ) : ( 56 | "" 57 | )} 58 | 59 | } 60 | > 61 |

62 | Click a category to show/hide related pages from the network. 63 |

64 |

65 | {" "} 68 | 71 |

72 |
    73 | {sortedTags.map((tag) => { 74 | const nodesCount = nodesPerTag[tag.key]; 75 | const visibleNodesCount = visibleNodesPerTag[tag.key] || 0; 76 | return ( 77 |
  • 1 ? "s" : ""}${ 81 | visibleNodesCount !== nodesCount ? ` (only ${visibleNodesCount} visible)` : "" 82 | }`} 83 | > 84 | toggleTag(tag.key)} 88 | id={`tag-${tag.key}`} 89 | /> 90 |
  • 108 | ); 109 | })} 110 |
111 |
112 | ); 113 | }; 114 | 115 | export default TagsPanel; 116 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/DisplayGraph/others/canvas-utils.ts: -------------------------------------------------------------------------------- 1 | import { NodeDisplayData, PartialButFor, PlainObject } from "sigma/types"; 2 | import { Settings } from "sigma/settings"; 3 | 4 | const TEXT_COLOR = "#000000"; 5 | 6 | /** 7 | * This function draw in the input canvas 2D context a rectangle. 8 | * It only deals with tracing the path, and does not fill or stroke. 9 | */ 10 | export function drawRoundRect( 11 | ctx: CanvasRenderingContext2D, 12 | x: number, 13 | y: number, 14 | width: number, 15 | height: number, 16 | radius: number, 17 | ): void { 18 | ctx.beginPath(); 19 | ctx.moveTo(x + radius, y); 20 | ctx.lineTo(x + width - radius, y); 21 | ctx.quadraticCurveTo(x + width, y, x + width, y + radius); 22 | ctx.lineTo(x + width, y + height - radius); 23 | ctx.quadraticCurveTo(x + width, y + height, x + width - radius, y + height); 24 | ctx.lineTo(x + radius, y + height); 25 | ctx.quadraticCurveTo(x, y + height, x, y + height - radius); 26 | ctx.lineTo(x, y + radius); 27 | ctx.quadraticCurveTo(x, y, x + radius, y); 28 | ctx.closePath(); 29 | } 30 | 31 | /** 32 | * Custom hover renderer 33 | */ 34 | export function drawHover(context: CanvasRenderingContext2D, data: PlainObject, settings: PlainObject) { 35 | const size = settings.labelSize; 36 | const font = settings.labelFont; 37 | const weight = settings.labelWeight; 38 | const subLabelSize = size - 2; 39 | 40 | const label = data.label; 41 | const subLabel = data.tag !== "unknown" ? data.tag : ""; 42 | const clusterLabel = data.clusterLabel; 43 | 44 | // Then we draw the label background 45 | context.beginPath(); 46 | context.fillStyle = "#fff"; 47 | context.shadowOffsetX = 0; 48 | context.shadowOffsetY = 2; 49 | context.shadowBlur = 8; 50 | context.shadowColor = "#000"; 51 | 52 | context.font = `${weight} ${size}px ${font}`; 53 | const labelWidth = context.measureText(label).width; 54 | context.font = `${weight} ${subLabelSize}px ${font}`; 55 | const subLabelWidth = subLabel ? context.measureText(subLabel).width : 0; 56 | context.font = `${weight} ${subLabelSize}px ${font}`; 57 | const clusterLabelWidth = clusterLabel ? context.measureText(clusterLabel).width : 0; 58 | 59 | const textWidth = Math.max(labelWidth, subLabelWidth, clusterLabelWidth); 60 | 61 | const x = Math.round(data.x); 62 | const y = Math.round(data.y); 63 | const w = Math.round(textWidth + size / 2 + data.size + 3); 64 | const hLabel = Math.round(size / 2 + 4); 65 | const hSubLabel = subLabel ? Math.round(subLabelSize / 2 + 9) : 0; 66 | const hClusterLabel = Math.round(subLabelSize / 2 + 9); 67 | 68 | drawRoundRect(context, x, y - hSubLabel - 12, w, hClusterLabel + hLabel + hSubLabel + 12, 5); 69 | context.closePath(); 70 | context.fill(); 71 | 72 | context.shadowOffsetX = 0; 73 | context.shadowOffsetY = 0; 74 | context.shadowBlur = 0; 75 | 76 | // And finally we draw the labels 77 | context.fillStyle = TEXT_COLOR; 78 | context.font = `${weight} ${size}px ${font}`; 79 | context.fillText(label, data.x + data.size + 3, data.y + size / 3); 80 | 81 | if (subLabel) { 82 | context.fillStyle = TEXT_COLOR; 83 | context.font = `${weight} ${subLabelSize}px ${font}`; 84 | context.fillText(subLabel, data.x + data.size + 3, data.y - (2 * size) / 3 - 2); 85 | } 86 | 87 | context.fillStyle = data.color; 88 | context.font = `${weight} ${subLabelSize}px ${font}`; 89 | context.fillText(clusterLabel, data.x + data.size + 3, data.y + size / 3 + 3 + subLabelSize); 90 | } 91 | 92 | /** 93 | * Custom label renderer 94 | */ 95 | export default function drawLabel( 96 | context: CanvasRenderingContext2D, 97 | data: PartialButFor, 98 | settings: Settings, 99 | ): void { 100 | if (!data.label) return; 101 | 102 | const size = settings.labelSize, 103 | font = settings.labelFont, 104 | weight = settings.labelWeight; 105 | 106 | context.font = `${weight} ${size}px ${font}`; 107 | const width = context.measureText(data.label).width + 8; 108 | 109 | context.fillStyle = "#ffffffcc"; 110 | context.fillRect(data.x + data.size, data.y + size / 3 - 15, width, 20); 111 | 112 | context.fillStyle = "#000"; 113 | context.fillText(data.label, data.x + data.size + 3, data.y + size / 3); 114 | } 115 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/DisplayGraph/others/types.ts: -------------------------------------------------------------------------------- 1 | export interface NodeData { 2 | key: string; 3 | label: string; 4 | tag: string; 5 | URL: string; 6 | cluster: string; 7 | x: number; 8 | y: number; 9 | } 10 | 11 | export interface Cluster { 12 | key: string; 13 | color: string; 14 | clusterLabel: string; 15 | } 16 | 17 | export interface Tag { 18 | key: string; 19 | image: string; 20 | } 21 | 22 | export interface Dataset { 23 | nodes: NodeData[]; 24 | edges: [string, string, string][]; 25 | clusters: Cluster[]; 26 | tags: Tag[]; 27 | question: string; 28 | answer: string; 29 | } 30 | 31 | export interface FiltersState { 32 | clusters: Record; 33 | tags: Record; 34 | } 35 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/DisplayGraph/others/use-debounce.ts: -------------------------------------------------------------------------------- 1 | import { useEffect, useState } from "react"; 2 | 3 | function useDebounce(value: T, delay: number): T { 4 | // State and setters for debounced value 5 | const [debouncedValue, setDebouncedValue] = useState(value); 6 | 7 | useEffect( 8 | () => { 9 | // Update debounced value after delay 10 | const handler = setTimeout(() => { 11 | if (value !== debouncedValue) setDebouncedValue(value); 12 | }, delay); 13 | 14 | // Cancel the timeout if value changes (also on delay change or unmount) 15 | // This is how we prevent debounced value from updating if value is changed ... 16 | // .. within the delay period. Timeout gets cleared and restarted. 17 | return () => { 18 | clearTimeout(handler); 19 | }; 20 | }, 21 | [value, delay] // Only re-call effect if value or delay changes 22 | ); 23 | 24 | return debouncedValue; 25 | } 26 | 27 | export default useDebounce; 28 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/components/ErrorBoundary.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | interface ErrorBoundaryState { 4 | error: Error | null; 5 | errorInfo: React.ErrorInfo | null; 6 | } 7 | 8 | class ErrorBoundary extends React.Component<{}, ErrorBoundaryState> { 9 | constructor(props: {}) { 10 | super(props); 11 | this.state = { error: null, errorInfo: null }; 12 | } 13 | 14 | componentDidCatch(error: Error, errorInfo: React.ErrorInfo) { 15 | // Catch errors in any components below and re-render with error message 16 | this.setState({ 17 | error: error, 18 | errorInfo: errorInfo, 19 | }); 20 | // You can also log error messages to an error reporting service here 21 | } 22 | 23 | render() { 24 | if (this.state.errorInfo) { 25 | // Error path 26 | return ( 27 |
28 |

Something went wrong.

29 |
30 | {this.state.error && this.state.error.toString()} 31 |
32 | {this.state.errorInfo.componentStack} 33 |
34 |
35 | ); 36 | } 37 | // Normally, just render children 38 | return this.props.children; 39 | } 40 | } 41 | 42 | export default ErrorBoundary; 43 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/index.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState } from "react"; 2 | import ReactDOM from "react-dom"; 3 | import "./styles.css"; 4 | 5 | import ChatGPTForKRAGENLoc from "./components/ChatGPTForKRAGENLoc"; 6 | // import AlertExplainPurpose from "./components/AlertExplainPurpose"; 7 | // import * as Sentry from "@sentry/react"; 8 | 9 | import ErrorBoundary from "./components/ErrorBoundary"; 10 | 11 | // Set up global error handler 12 | window.onerror = function (message, source, lineno, colno, error) { 13 | // Add error logging logic here 14 | console.log("Captured in window.onerror:", message); 15 | // Return true to prevent default browser handling 16 | return true; 17 | }; 18 | 19 | function App() { 20 | const [isAlertOpen, setIsAlertOpen] = useState(true); 21 | 22 | return ( 23 |
36 |
37 |
38 |
43 | {/* */} 44 | 45 | 46 |
47 |
48 |
49 | ); 50 | } 51 | 52 | ReactDOM.render( 53 | 54 | 55 | 56 | 57 | , 58 | document.getElementById("root") 59 | ); 60 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/src/react-app-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/Frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "lib": [ 5 | "dom", 6 | "dom.iterable", 7 | "esnext" 8 | ], 9 | "allowJs": true, 10 | "skipLibCheck": true, 11 | "esModuleInterop": true, 12 | "allowSyntheticDefaultImports": true, 13 | "strict": true, 14 | "forceConsistentCasingInFileNames": true, 15 | "noFallthroughCasesInSwitch": true, 16 | "module": "esnext", 17 | "moduleResolution": "node", 18 | "resolveJsonModule": true, 19 | "isolatedModules": true, 20 | "noEmit": true, 21 | "jsx": "react-jsx" 22 | }, 23 | "include": [ 24 | "src" 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /KRAGEN_Dashboard/readme.md: -------------------------------------------------------------------------------- 1 | # Please check each readme.md in the Frontend and Backend folders for running the application. 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Epistasis Lab at Cedars Sinai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /config/class.json: -------------------------------------------------------------------------------- 1 | { 2 | "class": "AlzKB", 3 | "properties": [ 4 | { 5 | "name": "knowledge", 6 | "dataType": ["text"] 7 | }, 8 | { 9 | "name": "knowledge_num_token", 10 | "dataType": ["int"] 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /config/kragen.env: -------------------------------------------------------------------------------- 1 | # convert.py 2 | CONVERT_OUTPUT_DIR=target/convert 3 | CONVERT_OUTPUT_FILENAME=converted.csv 4 | CONVERT_CHUNK_SIZE=5 5 | 6 | # parse.py settings 7 | INPUT_CSV_FILE_LOCATION='./target/convert/converted.csv' 8 | OUTPUT_DIR='./target/parse' 9 | 10 | # make_vector.py 11 | INPUT_DIR_FOR_EMBEDDING='./target/parse' 12 | OUTPUT_DIR_FOR_EMBEDDING='./target/embed' 13 | 14 | # addTokenInfo.py 15 | INPUT_DIR_FOR_ADDING_TOKEN_INFO='./target/embed' 16 | OUTPUT_DIR_FOR_ADDING_TOKEN_INFO='./target/tokens' 17 | 18 | # upload.py 19 | # TODO: Update to real value 20 | # INPUT_DIR_DB_UPLOAD='./input_dir/' 21 | INPUT_DIR_DB_UPLOAD='./target/tokens' 22 | 23 | # Backend Config 24 | # The following variables will be used to generate a config.json file 25 | # based on BACKEND_CONFIG_FILENAME inside KRAGEN_BACKEND_PATH 26 | # TODO: Rename BACKEND_CONFIG_FILENAME as BACKEND_CONFIG_SAMPLE_FILE 27 | # Inspect the BACKEND_CONFIG_FILENAME to review the file structure 28 | KRAGEN_BACKEND_PATH='KRAGEN_Dashboard/Backend' 29 | BACKEND_CONFIG_FILENAME='config.json.sample' 30 | # inspect BACKEND_CONFIG_FILENAME for a list of available GPT_API_SERVICEs 31 | GPT_API_SERVICE=chatgpt 32 | # GPT API_BASE is required only when GPT_API_SERVICE is set to 'azuregpt' 33 | GPT_API_BASE=your_api_base 34 | # GPT API_VERSION is required only when GPT_API_SERVICE is set to 'azuregpt' 35 | GPT_API_VERSION='2023-07-01-preview' 36 | # no need to source .env, docker-compose-flask.yml will use 37 | # both kragen.env and .env 38 | # source .env 39 | MODEL_ID="gpt-3.5-turbo" 40 | PROMPT_TOKEN_COST=0.001 41 | RESPONSE_TOKEN_COST=0.002 42 | TEMPERATURE=1.0 43 | MAX_TOKENS=1536 44 | STOP=null 45 | ORGANIZATION="" 46 | # API_KEY=$OPENAI_API_KEY 47 | EMBEDDING_ID=$OPENAI_EMBEDDING_MODEL 48 | 49 | # Weaviate DB Config 50 | # The weaviate url and key are already in .env 51 | # The docker-compose-flask.yml file will use both kragen.env and .env 52 | # WEAVIATE_URL=$WEAVIATE_URL 53 | # WEAVIATE_API_KEY=$WEAVIATE_API_KEY 54 | WEAVIATE_DB=AlzKB 55 | WEAVIATE_LIMIT=200 56 | -------------------------------------------------------------------------------- /conversion.md: -------------------------------------------------------------------------------- 1 | # Data Conversion 2 | ## General Conversion Process 3 | Data conversion is a multi-step process that involves retrieving 4 | data from a source, transforming it into a format that is compatible with a 5 | Vector Store, and ultimately uploading it into a Weaviate Vector Database. 6 | The source can be a structured or unstructured data source. The only 7 | requirement is that the data be in a format that can be transformed into 8 | natural language. 9 | Here is a depiction of one way the conversion process can be implemented 10 | with a Knowledge Graph as the source: 11 | 12 | [![Logo](https://raw.githubusercontent.com/EpistasisLab/KRAGEN/main/images/KG2VectorDB_Process.png)]() 13 | 14 | The data extraction is done using a query language that is specific to the 15 | Knowledge Graph (e.g. Cypher for Neo4j). Once the data is extracted, KRAGEN 16 | will handle the transformation of the data to make it compatible with a 17 | Weaviate Vector Database. See the [Installation Instructions](https://github.com/EpistasisLab/KRAGEN?tab=readme-ov-file#installation) 18 | for more infomation on how to run this process. 19 | 20 | 21 | ## Use Cases 22 | ### Alzheimer's Knowledgebase 23 | For a description of the Alzheimer's Knowledgebase, please see our [AlzKB GitHub 24 | repository](https://github.com/EpistasisLab/AlzKB). 25 | 26 | The conversion process consisted of the following steps: 27 | 1. Relationships were extracted using Cypher Query Language and output as a CSV 28 | file. [source](https://github.com/EpistasisLab/KRAGEN/blob/main/src/extract_data.ipynb) 29 | 2. The CSV file was then parsed and converted into English sentences using 30 | language that is specific to the existing structure and relationships in the 31 | Alzheimer's Knowledgebase. [source](https://github.com/EpistasisLab/KRAGEN/blob/main/src/convert.py) 32 | 3. This natural language was then vectorized using the `text-embedding-ada-002` 33 | model via OpenAI's API. [source](https://github.com/EpistasisLab/KRAGEN/blob/main/src/make_vector.py) 34 | 4. The vectorized data was then uploaded into a Weaviate Vector Database. [source](https://github.com/EpistasisLab/KRAGEN/blob/main/src/upload.py) 35 | 36 | -------------------------------------------------------------------------------- /dev_guide.md: -------------------------------------------------------------------------------- 1 | # Developers Guide 2 | ## Building Docker Images 3 | These are some notes for developers. While following the Installation notes in readme.md, keep the following points in mind: 4 | - Any time the source code is updated, **rebuild** the docker images. 5 | - This is currently necessary since the source code is copied into the docker image when it's built. This need to be improved (we can use volumes in development instead of copying the code) 6 | - updating a config variable (e.g. .env) should not require a rebuild of the docker image. Just make sure to stop the currently running container and restart it. 7 | - To see what environmental variables a particular container is using, you can run `docker inspect ` -------------------------------------------------------------------------------- /docker-compose-flask.yml: -------------------------------------------------------------------------------- 1 | services: 2 | execgpt: 3 | build: 4 | context: ./KRAGEN_Dashboard/Backend 5 | dockerfile: Dockerfile 6 | # image: moorelab/${IMAGE_NAME}:${TAG} 7 | image: kragen-server:${TAG} 8 | container_name: kragen-server 9 | ports: 10 | - 5050:5050 11 | # entrypoint: ["python", "src/kragen.py"] 12 | env_file: 13 | - .env 14 | - config/kragen.env 15 | -------------------------------------------------------------------------------- /docker-compose-gui.yml: -------------------------------------------------------------------------------- 1 | services: 2 | gui: 3 | build: 4 | context: ./KRAGEN_Dashboard/Frontend 5 | dockerfile: Dockerfile 6 | # image: moorelab/${IMAGE_NAME}-gui:${TAG} 7 | image: kragen-gui:${TAG} 8 | container_name: kragen-gui 9 | ports: 10 | - 3000:3000 11 | env_file: 12 | - .env 13 | - config/kragen.env 14 | # command: bash -c "npm install && npm start" 15 | -------------------------------------------------------------------------------- /docker-compose-kragen.yml: -------------------------------------------------------------------------------- 1 | services: 2 | kragen: 3 | build: 4 | context: . 5 | dockerfile: docker/Dockerfile 6 | args: 7 | docker_files: 'docker' 8 | image: kragen:${TAG} 9 | volumes: 10 | - "./:/usr/src/app" 11 | working_dir: /usr/src/app 12 | entrypoint: ["python", "src/kragen.py"] 13 | env_file: 14 | - .env 15 | - config/kragen.env 16 | command: test.csv 17 | -------------------------------------------------------------------------------- /docker-compose-weaviate.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: '3.4' 3 | services: 4 | weaviate: 5 | command: 6 | - --host 7 | - 0.0.0.0 8 | - --port 9 | - '8080' 10 | - --scheme 11 | - http 12 | image: cr.weaviate.io/semitechnologies/weaviate:1.22.3 13 | ports: 14 | - 8080:8080 15 | - 50051:50051 # port for gRPC 16 | volumes: 17 | - ./weaviate_data:/var/lib/weaviate 18 | restart: on-failure:0 19 | environment: 20 | QUERY_DEFAULTS_LIMIT: 25 21 | AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'false' 22 | PERSISTENCE_DATA_PATH: '/var/lib/weaviate' 23 | AUTHENTICATION_APIKEY_ENABLED: 'true' 24 | AUTHENTICATION_APIKEY_ALLOWED_KEYS: 'hashkey1,hashkey2' 25 | AUTHENTICATION_APIKEY_USERS: 'user1@email.com,user2@email.com' 26 | AUTHORIZATION_ADMINLIST_ENABLED: 'true' 27 | AUTHORIZATION_ADMINLIST_USERS: 'user1@email.com' 28 | AUTHORIZATION_ADMINLIST_READONLY_USERS: 'user2@gmail.com' 29 | DEFAULT_VECTORIZER_MODULE: 'none' 30 | ENABLE_MODULES: '' 31 | CLUSTER_HOSTNAME: 'node1' 32 | volumes: 33 | weaviate_data: 34 | ... 35 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | kragen: 3 | extends: 4 | file: docker-compose-kragen.yml 5 | service: kragen 6 | depends_on: 7 | - weaviate 8 | networks: 9 | - kragen-net 10 | 11 | weaviate: 12 | extends: 13 | file: docker-compose-weaviate.yml 14 | service: weaviate 15 | networks: 16 | - kragen-net 17 | 18 | execgpt: 19 | extends: 20 | file: docker-compose-flask.yml 21 | service: execgpt 22 | depends_on: 23 | - weaviate 24 | networks: 25 | - kragen-net 26 | 27 | gui: 28 | extends: 29 | file: docker-compose-gui.yml 30 | service: gui 31 | depends_on: 32 | - execgpt 33 | networks: 34 | - kragen-net 35 | 36 | networks: 37 | kragen-net: 38 | driver: bridge 39 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim 2 | ARG docker_files=docker 3 | WORKDIR /usr/src/app 4 | 5 | # WORKDIR /app 6 | # COPY src/*.py . 7 | 8 | COPY ${docker_files}/requirements.txt requirements.txt 9 | RUN apt update --fix-missing \ 10 | && apt upgrade -y \ 11 | && apt install -y gcc python3-dev \ 12 | && pip install --upgrade pip \ 13 | && pip install -r requirements.txt 14 | 15 | ENTRYPOINT [ "python", "/usr/src/app/kragen.py" ] 16 | -------------------------------------------------------------------------------- /docker/prod.Dockerfile: -------------------------------------------------------------------------------- 1 | # Need to fully test this 2 | # Made this version, since we're focusing on the dev version first. 3 | FROM python:3.10-slim 4 | ARG docker_files=docker 5 | WORKDIR /usr/src/app 6 | COPY src/*.py . 7 | COPY ${docker_files}/requirements.txt requirements.txt 8 | RUN apt update && apt upgrade && pip install --upgrade pip && pip install -r requirements.txt 9 | ENTRYPOINT [ "python", "/usr/src/app/kragen.py" ] 10 | -------------------------------------------------------------------------------- /docker/requirements.txt: -------------------------------------------------------------------------------- 1 | dask==2024.1.0 2 | pandas==2.1.4 3 | weaviate-client==3.25.3 4 | python-dotenv==1.0.0 5 | openai==1.55.3 6 | httpx==0.27.2 7 | matplotlib==3.8.1 8 | plotly==5.16.1 9 | scipy==1.10.1 10 | scikit-learn==1.3.2 11 | distributed==2024.1.0 12 | nltk==3.8.1 13 | -------------------------------------------------------------------------------- /images/KG2Diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/KRAGEN/149a62d9fe0da6273a86957e5703e7d6aa5c6341/images/KG2Diagram.png -------------------------------------------------------------------------------- /images/KG2VectorDB_Process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/KRAGEN/149a62d9fe0da6273a86957e5703e7d6aa5c6341/images/KG2VectorDB_Process.png -------------------------------------------------------------------------------- /images/OIG4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/KRAGEN/149a62d9fe0da6273a86957e5703e7d6aa5c6341/images/OIG4.jpeg -------------------------------------------------------------------------------- /images/radar_chart_kragen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/KRAGEN/149a62d9fe0da6273a86957e5703e7d6aa5c6341/images/radar_chart_kragen.png -------------------------------------------------------------------------------- /kragen-gui.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copy .env file and replace the value of the 11th line 4 | # cp ./KRAGEN_Dashboard/Backend/config.json.sample ./KRAGEN_Dashboard/Backend/config.json 5 | 6 | # openaikey=$(awk 'NR == 11 {print}' .env) 7 | # openaikey=$(echo $openaikey | cut -d'=' -f2) 8 | 9 | # weaviatekey=$(awk 'NR == 16 {print}' .env) 10 | # weaviatekey=$(echo $weaviatekey | cut -d'=' -f2) 11 | 12 | # weaviateurl=$(awk 'NR == 15 {print}' .env) 13 | # weaviateurl=$(echo $weaviateurl | cut -d'=' -f2) 14 | 15 | # awk -v value="$openaikey" 'NR == 28 {$0 = "\"api_key\": \"" value "\", "} 1' ./KRAGEN_Dashboard/Backend/config.json > temp && mv temp ./KRAGEN_Dashboard/Backend/config.json 16 | 17 | # awk -v value="$weaviatekey" 'NR == 15 {$0 = "\"api_key\": \"" value "\", "} 1' ./KRAGEN_Dashboard/Backend/config.json > temp && mv temp ./KRAGEN_Dashboard/Backend/config.json 18 | 19 | # awk -v value="$weaviateurl" 'NR == 16 {$0 = "\"url\": \"" value "\", "} 1' ./KRAGEN_Dashboard/Backend/config.json > temp && mv temp ./KRAGEN_Dashboard/Backend/config.json 20 | 21 | # Start Flask server in Docker container 22 | # cd ./KRAGEN_Dashboard/Backend 23 | 24 | # IMAGE_NAME="kragen-flask-server" 25 | # # Check if the Docker image already exists 26 | # if docker images "$IMAGE_NAME" | grep -q "$IMAGE_NAME"; then 27 | # echo "Docker image $IMAGE_NAME already exists. Skipping build." 28 | # else 29 | # echo "Docker image $IMAGE_NAME does not exist. Building..." 30 | # docker build -t "$IMAGE_NAME" . 31 | # fi 32 | 33 | # Source environment variables from the .env file 34 | source .env 35 | 36 | # Configure the backend config.json file: 37 | docker compose run kragen config_backend 38 | 39 | source config/kragen.env 40 | # Display the OpenAI API key, Weaviate URL, and Weaviate API key for verification 41 | echo "WEAVIATE_URL: $WEAVIATE_URL" 42 | 43 | Start the Docker container for the Flask server with docker compose 44 | docker compose run -d -p 5050:5050 execgpt 45 | 46 | # # Navigate to the Frontend directory 47 | # cd KRAGEN_Dashboard/Frontend 48 | # 49 | # # Install npm dependencies and start the React application in the background 50 | # npm install 51 | # npm start & 52 | # REACT_PID=$! 53 | # 54 | # # Define a function to clean up background processes on script exit 55 | # function cleanup_react() { 56 | # echo "Exiting..." 57 | # kill $REACT_PID 58 | # exit 59 | # } 60 | # 61 | # # Trap EXIT signal to ensure the cleanup function runs on script exit 62 | # trap cleanup_react EXIT 63 | # 64 | # # Wait for the React process to finish before exiting the script 65 | # wait $REACT_PID 66 | 67 | docker compose run -d -p 3000:3000 gui 68 | -------------------------------------------------------------------------------- /release/deploy_production_release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source .env 4 | echo "tag: $TAG" 5 | 6 | echo "Pushing images to DockerHub" 7 | docker push moorelab/kragen:${TAG} 8 | 9 | git tag -fa "v${TAG}" -m "v${TAG}" 10 | git push --tags -------------------------------------------------------------------------------- /release/generate_production_release.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | source .env 4 | echo "tag: $TAG" 5 | 6 | docker compose -f docker-compose-prod.yml build 7 | 8 | docker tag kragen_convert:${TAG} moorelab/kragen:${TAG} -------------------------------------------------------------------------------- /release/readme.md: -------------------------------------------------------------------------------- 1 | # Release process 2 | 1. **Update the `.env` file with a new version number.** Update the TAG environment variable in `.env` to the current production version 3 | as per [semantic versioning](https://semver.org/) and the python package 4 | version specification [PEP440](https://www.python.org/dev/peps/pep-0440). 5 | Development images should have a tag indicating it is a 6 | [pre-release](https://www.python.org/dev/peps/pep-0440/#pre-releases) 7 | (for example, `a0`). 8 | 2. **Build production docker images with:** 9 | `bash release/generate_production_release.sh` 10 | 3. **Check that the docker images were pushed to DockerHub and tag the production git branch by 11 | running:** 12 | `bash release/deploy_production_release.sh` -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/KRAGEN/149a62d9fe0da6273a86957e5703e7d6aa5c6341/src/__init__.py -------------------------------------------------------------------------------- /src/addTokenInfo.py: -------------------------------------------------------------------------------- 1 | # 3. addtokens for divided_data_embedded 2 | 3 | # read csv files from divided_data_embedded 4 | import os 5 | import pandas as pd 6 | import nltk 7 | from nltk.tokenize import word_tokenize 8 | from dotenv import load_dotenv 9 | # Download the necessary NLTK models (if not already downloaded) 10 | nltk.download('punkt') 11 | 12 | # Specify the path to 'kragen.env' file instead of the default '.env' file 13 | dotenv_path = os.path.join(os.getcwd(), 'config', 'kragen.env') 14 | load_dotenv(dotenv_path) # This loads the variables from 'kragen.env' 15 | 16 | 17 | # INPUT_DIR_FOR_ADDING_TOKEN_INFO 18 | input_dir = os.getenv('INPUT_DIR_FOR_ADDING_TOKEN_INFO') 19 | # OUTPUT_DIR_FOR_ADDING_TOKEN_INFO 20 | output_dir = os.getenv('OUTPUT_DIR_FOR_ADDING_TOKEN_INFO') 21 | 22 | def main(): 23 | # Create the output directory if it doesn't exist 24 | if not os.path.exists(output_dir): 25 | os.makedirs(output_dir) 26 | 27 | # check the number of csv files in input_dir 28 | csv_files = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith('.csv')] 29 | csv_files.sort() 30 | 31 | 32 | for file in csv_files: 33 | df = pd.read_csv(file) 34 | 35 | # Here we assume 'query' is the name of the column containing the text to be tokenized. 36 | # df['query_num_token'] = df['query'].apply(lambda x: len(x.split()) if pd.notnull(x) else 0) 37 | 38 | # USE tokenizer.tokenize(sentence) to GET LENGTH OF TOKENS 39 | df['query_num_token'] = df['query'].apply(lambda x: len(word_tokenize(x)) if pd.notnull(x) else 0) 40 | 41 | 42 | # df['statement_num_token'] = df['statement'].apply(lambda x: len(x.split()) if pd.notnull(x) else 0) 43 | # output_dir + '/' + os.path.basename(file) 44 | 45 | # Use the tokenizer to tokenize the text 46 | df['statement_num_token'] = df['statement'].apply(lambda x: len(word_tokenize(x)) if pd.notnull(x) else 0) 47 | 48 | df.to_csv(output_dir + '/' + os.path.basename(file), index=False) # Saving the DataFrame back to CSV, without the index. 49 | 50 | 51 | if __name__ == '__main__': 52 | main() 53 | -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def config(input_file): 5 | # read from a yaml file 6 | kragen_config = { 7 | 'output_directory': os.getenv('CONVERT_OUTPUT_DIR'), 8 | 'convert_chunk_size': os.getenv('CONVERT_CHUNK_SIZE'), 9 | 'output_filename': os.getenv('CONVERT_OUTPUT_FILENAME'), 10 | 'input_file': input_file, 11 | 12 | } 13 | return kragen_config 14 | -------------------------------------------------------------------------------- /src/k_setup.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | import sys 4 | import json 5 | import convert as convert 6 | from config import config 7 | from parse import main as parse 8 | from make_vector import main as embed 9 | from addTokenInfo import main as tokenize 10 | from upload import create_class, upload 11 | 12 | 13 | def setup(filename, configure_backend=True): 14 | 15 | pad = '*' 16 | frame = pad * 80 17 | 18 | steps = [ 19 | { 20 | 'name': 'Confirm', 21 | 'msg': 'Ensure that all variables in config/kragen.env and .env are correct', 22 | 'func': None, 23 | 'output': '', 24 | }, 25 | { 26 | 'name': 'Convert', 27 | 'func': convert.run, 28 | 'arg': config(filename), 29 | 'output': os.path.join(os.getenv('CONVERT_OUTPUT_DIR'), 30 | os.getenv('CONVERT_OUTPUT_FILENAME')), 31 | }, 32 | { 33 | 'name': 'Parse', 34 | 'func': parse, 35 | 'output': os.getenv('OUTPUT_DIR'), 36 | }, 37 | { 38 | 'name': 'Vectorize', 39 | 'func': embed, 40 | 'output': os.getenv('OUTPUT_DIR_FOR_EMBEDDING'), 41 | }, 42 | { 43 | 'name': 'Tokenize', 44 | 'func': tokenize, 45 | 'output': os.getenv('OUTPUT_DIR_FOR_ADDING_TOKEN_INFO'), 46 | }, 47 | { 48 | 'name': 'Upload', 49 | 'func': upload, 50 | 'output': f"Vector database: {os.getenv('WEAVIATE_URL')}", 51 | } 52 | ] 53 | 54 | for step in steps: 55 | msg = f"Executing Step: {step['name']}" 56 | if 'msg' in step and step['msg']: 57 | msg += f"\n{step['msg']}" 58 | info = f'{frame}\n{pad} {msg}\n{frame}' 59 | if not prompt(info): 60 | sys.exit(0) 61 | if 'func' in step and step['func']: 62 | if 'arg' in step and step['arg']: 63 | step['func'](step['arg']) 64 | else: 65 | step['func']() 66 | msg = f"Step: {step['name']} complete" 67 | if 'output' in step and step['output']: 68 | msg += f"\nCheck Output: {step['output']}" 69 | info = f'{frame}\n{pad} {msg}\n{frame}' 70 | if 'output' in step and step['output']: 71 | if not prompt(info): 72 | sys.exit(0) 73 | else: 74 | print(info) 75 | 76 | if configure_backend: 77 | config_backend() 78 | 79 | info = f'{frame}\n{pad} Process Complete!\n{frame}' 80 | print(info) 81 | 82 | 83 | def prompt(msg): 84 | print(msg) 85 | return click.confirm('Continue?', default=False) 86 | 87 | 88 | def config_backend(service='chatgpt'): 89 | 90 | print(f'configuring service: {service}') 91 | 92 | backend_path = os.getenv('KRAGEN_BACKEND_PATH') 93 | sample_filename = os.getenv('BACKEND_CONFIG_FILENAME') 94 | openai_api_key = os.getenv('OPENAI_API_KEY') 95 | embedding_model = os.getenv('OPENAI_EMBEDDING_MODEL') 96 | # prompt_token_cost = os.getenv('PROMPT_TOKEN_COST') 97 | # the response_token_cost is currently defined as a different 98 | # value for multiple models in the config.json file. 99 | # response_token_cost = os.getenv('RESPONSE_TOKEN_COST') 100 | gpt_api_version = os.getenv('GPT_API_VERSION') 101 | gpt_api_base = os.getenv('GPT_API_BASE') 102 | # weaviate variables 103 | weaviate_url = os.getenv('WEAVIATE_URL') 104 | weaviate_api_key = os.getenv('WEAVIATE_API_KEY') 105 | 106 | sample_config = os.path.join(backend_path, sample_filename) 107 | config_file = os.path.join(backend_path, 'config.json') 108 | 109 | with open(sample_config, 'r') as f: 110 | data = json.load(f) 111 | if not service in data: 112 | print(f'{service} is not a key in {sample_config}') 113 | return 114 | 115 | if not 'api_key' in data[service]: 116 | print(f"'api_key' is not a key in {service}: {data[service]}") 117 | return 118 | 119 | data[service]['api_key'] = openai_api_key 120 | 121 | if embedding_model is not None and 'embedding_id' in data[service]: 122 | data[service]['embedding_id'] = embedding_model 123 | 124 | if gpt_api_version is not None and 'api_version' in data[service]: 125 | data[service]['api_version'] = gpt_api_version 126 | 127 | if gpt_api_base is not None and 'api_base' in data[service]: 128 | data[service]['api_base'] = gpt_api_base 129 | 130 | # set weaviate variables 131 | if 'weaviate' in data: 132 | if weaviate_url is not None and 'url' in data['weaviate']: 133 | data['weaviate']['url'] = weaviate_url 134 | if weaviate_api_key is not None and 'api_key' in data['weaviate']: 135 | data['weaviate']['api_key'] = weaviate_api_key 136 | 137 | json_data = json.dumps(data, indent=2) 138 | 139 | with open(config_file, 'w') as outfile: 140 | outfile.write(json_data) 141 | print(f'file: {config_file} configured.') 142 | 143 | -------------------------------------------------------------------------------- /src/kragen.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import convert as convert 4 | from parse import main as parse 5 | from make_vector import main as embed 6 | from addTokenInfo import main as tokenize 7 | from upload import create_class, upload 8 | from k_setup import setup, config_backend 9 | from config import config 10 | 11 | 12 | def mk_dir(directory): 13 | os.makedirs(directory, exist_ok=True) 14 | 15 | 16 | def main(): 17 | command = sys.argv[1] 18 | # print('command:', command) 19 | # print("env vars:") 20 | # for env_var in os.environ.items(): 21 | # print(env_var) 22 | # sys.exit(0) 23 | if command == 'convert': 24 | if len(sys.argv) != 3: 25 | print("Usage: docker-compose run kragen convert ") 26 | sys.exit(1) 27 | # check that the input file exists 28 | input_csv_file = sys.argv[2] 29 | if not os.path.isfile(input_csv_file): 30 | # print("File {} does not exist".format(input_csv_file)) 31 | print(f"File {input_csv_file} does not exist") 32 | sys.exit(1) 33 | convert.run(config(input_csv_file)) 34 | elif command == 'parse': 35 | parse() 36 | elif command == 'vectorize': 37 | embed() 38 | elif command == 'tokenize': 39 | tokenize() 40 | elif command == 'create_class': 41 | create_class() 42 | elif command == 'upload': 43 | upload() 44 | elif command == 'config_backend': 45 | gpt_service = os.getenv('GPT_API_SERVICE') 46 | config_backend(gpt_service) 47 | elif command == 'setup': 48 | if len(sys.argv) != 3: 49 | print("Usage: docker-compose run kragen setup ") 50 | sys.exit(1) 51 | # check that the input file exists 52 | input_csv_file = sys.argv[2] 53 | if not os.path.isfile(input_csv_file): 54 | print(f"File {input_csv_file} does not exist") 55 | sys.exit(1) 56 | setup(input_csv_file) 57 | else: 58 | print("Usage: docker-compose run kragen []") 59 | 60 | 61 | if __name__ == "__main__": 62 | main() 63 | -------------------------------------------------------------------------------- /src/make_vector.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import pandas as pd 4 | import openai 5 | from dotenv import load_dotenv 6 | # from langchain.embeddings import OpenAIEmbeddings 7 | import numpy as np 8 | from openai import OpenAI 9 | # import tiktoken 10 | import ast 11 | import time 12 | from dask.distributed import Client, as_completed 13 | import dask.dataframe as dd 14 | 15 | import logging 16 | 17 | 18 | # Specify the path to 'kragen.env' file instead of the default '.env' file 19 | kragen_env_path = os.path.join(os.getcwd(), 'config', 'kragen.env') 20 | load_dotenv(kragen_env_path) # This loads the variables from 'kragen.env' 21 | 22 | 23 | input_dir = os.getenv('INPUT_DIR_FOR_EMBEDDING') 24 | print("7-input_dir:", input_dir) 25 | # OUTPUT_DIR_FOR_EMBEDDING 26 | output_dir = os.getenv('OUTPUT_DIR_FOR_EMBEDDING') 27 | print("7-output_dir:", output_dir) 28 | 29 | dotenv_path = os.path.join(os.getcwd(), '.env') 30 | load_dotenv(dotenv_path, override=True) 31 | 32 | openai.api_key = os.getenv('OPENAI_API_KEY') 33 | openai_embedding_model = os.getenv('OPENAI_EMBEDDING_MODEL') 34 | 35 | # Setup logging 36 | logging.basicConfig(level=logging.INFO) 37 | logger = logging.getLogger(__name__) 38 | 39 | def cosine_similarity(a, b): 40 | return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) 41 | 42 | def get_embedding(text, engine="text-embedding-3-small"): 43 | client = OpenAI() 44 | 45 | response = client.embeddings.create( 46 | input=text, 47 | model=engine 48 | ) 49 | 50 | return response.data[0].embedding 51 | 52 | # search through the reviews for a specific product 53 | def search_docs(df, user_query, top_n=3, to_print=False): 54 | 55 | embedding = get_embedding(user_query, engine=openai_embedding_model) 56 | 57 | df['query_embedding'] = df['query_embedding'].apply(ast.literal_eval) 58 | df["similarities"] = df['query_embedding'].apply(lambda x: cosine_similarity(x, embedding)) 59 | 60 | res = ( 61 | df.sort_values("similarities", ascending=False) 62 | .head(top_n) 63 | ) 64 | # if to_print: 65 | # display(res) 66 | return res 67 | 68 | 69 | def extractUsefulInfoFromData(data_origin_converted, query): 70 | print("query:", query) 71 | print("data_origin_converted[train][0]:", data_origin_converted["train"][0]) 72 | 73 | len_data_origin_converted = len(data_origin_converted["train"]) 74 | 75 | # opanai text embedding for query and each in data_origin_converted["train"] 76 | # openai_text_embedding = openai.Completion.create( 77 | 78 | 79 | 80 | 81 | def process_csv_dask(file_path): 82 | try: 83 | df = pd.read_csv(file_path) 84 | df["query_embedding"] = df["query"].apply( 85 | lambda x: get_embedding(x, engine=openai_embedding_model) 86 | ) 87 | df["statement_embedding"] = df["statement"].apply( 88 | lambda x: get_embedding(x, engine=openai_embedding_model) 89 | ) 90 | 91 | # check if the output directory exists 92 | if not os.path.exists(output_dir): 93 | os.makedirs(output_dir, exist_ok=True) 94 | 95 | output_file_path = os.path.join(output_dir, os.path.basename(file_path)) 96 | df.to_csv(output_file_path, index=False) 97 | 98 | return output_file_path 99 | except Exception as e: 100 | logger.error(f"Error processing {file_path}: {e}") 101 | return None 102 | 103 | def process_all_csv_files(directory): 104 | client = Client() 105 | try: 106 | while True: 107 | # make sure the output_dir exists 108 | if not os.path.exists(output_dir): 109 | os.makedirs(output_dir, exist_ok=True) 110 | csv_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.csv')] 111 | 112 | csv_files.sort() 113 | print("# csv_files for processing:", csv_files) 114 | only_file_name_csv_files = [os.path.basename(f) for f in csv_files] 115 | 116 | 117 | # if it exists, get the files names in the directory 118 | csv_files_embedded = [] 119 | if os.path.exists(output_dir): 120 | csv_files_embedded = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith('.csv')] 121 | csv_files_embedded.sort() 122 | 123 | only_file_name_csv_files_embedded = [os.path.basename(f) for f in csv_files_embedded] 124 | 125 | diff_csv_files = list(set(only_file_name_csv_files) - set(only_file_name_csv_files_embedded)) 126 | 127 | # csv_files = list(set(csv_files) - set(csv_files_embedded)) 128 | 129 | # correct element in csv_files which also exists in diff_csv_files 130 | csv_files = [f for f in csv_files if os.path.basename(f) in diff_csv_files] 131 | 132 | 133 | # print("csv_files_embedded:", csv_files_embedded) 134 | # print("csv_files:", csv_files) 135 | print("len(csv_files):", len(csv_files)) 136 | print("len(csv_files_embedded):", len(csv_files_embedded)) 137 | else: 138 | print(f"{output_dir} directory does not exist") 139 | 140 | # ... Checking and printing info about embedded files ... 141 | 142 | 143 | if len(csv_files) == 0: 144 | logger.info("No more files to process.") 145 | 146 | client.close() # Ensure client is properly closed 147 | break 148 | 149 | csv_files_batch = csv_files[:15] 150 | futures = [client.submit(process_csv_dask, csv_file) for csv_file in csv_files_batch] 151 | 152 | for future in as_completed(futures): 153 | result = future.result() 154 | if result is not None: 155 | logger.info(f"Processed file: {result}") 156 | else: 157 | 158 | logger.warning("A file was not processed due to an error.") 159 | 160 | time.sleep(50) 161 | 162 | 163 | except Exception as e: 164 | logger.error(f"An error occurred during processing: {e}") 165 | finally: 166 | client.close() # Ensure client is properly closed 167 | 168 | 169 | def main(): 170 | process_all_csv_files(input_dir) 171 | 172 | 173 | if __name__ == '__main__': 174 | main() 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | -------------------------------------------------------------------------------- /src/parse.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | from dotenv import load_dotenv 4 | 5 | # Specify the path to 'kragen.env' file instead of the default '.env' file 6 | dotenv_path = os.path.join(os.getcwd(), 'config', 'kragen.env') 7 | load_dotenv(dotenv_path) # This loads the variables from 'kragen.env' 8 | 9 | # Change: Instead of getting the location of a JSON file, get the location of a CSV file from the environment variables. 10 | input_csv_file_location = os.getenv('INPUT_CSV_FILE_LOCATION') 11 | output_dir = os.getenv('OUTPUT_DIR') 12 | 13 | def main(): 14 | print("input_csv_file_location:", input_csv_file_location) 15 | # Load the DataFrame from a CSV file instead of a JSON file 16 | df = pd.read_csv(input_csv_file_location) 17 | 18 | # Create the output directory if it doesn't exist 19 | if not os.path.exists(output_dir): 20 | os.makedirs(output_dir) 21 | 22 | # Divide the DataFrame and save each part as a CSV file 23 | for i in range(0, df.shape[0], 100): 24 | df[i:i+100].to_csv(f'{output_dir}/data_{i//100}.csv', index=False) 25 | 26 | 27 | if __name__ == '__main__': 28 | main() 29 | -------------------------------------------------------------------------------- /src/upload.py: -------------------------------------------------------------------------------- 1 | import weaviate 2 | import json 3 | import pandas as pd 4 | import os 5 | 6 | from dotenv import load_dotenv 7 | 8 | from weaviate.util import generate_uuid5 9 | 10 | dotenv_path = os.path.join(os.getcwd(), '.env') 11 | load_dotenv(dotenv_path, override=True) 12 | 13 | weaviate_client = None 14 | weaviate_url = os.getenv('WEAVIATE_URL') 15 | weaviate_apikey = os.getenv('WEAVIATE_API_KEY') 16 | input_dir = os.getenv('INPUT_DIR_DB_UPLOAD') 17 | 18 | 19 | def get_client(url=weaviate_url): 20 | auth_config = weaviate.AuthApiKey(api_key=weaviate_apikey) 21 | global weaviate_client 22 | if weaviate_client is None: 23 | weaviate_client = weaviate.Client( 24 | url=url, 25 | auth_client_secret=auth_config 26 | ) 27 | # weaviate_client = weaviate.Client(url=url) 28 | return weaviate_client 29 | 30 | 31 | def get_class(class_name): 32 | return get_client().schema.get(class_name) 33 | 34 | 35 | # def create_class(class_obj): 36 | def create_class(): 37 | class_obj = read_json() 38 | print('got class_obj:', class_obj) 39 | # get_client().schema.create_class(class_obj) 40 | 41 | 42 | # def delete_class(class_name): 43 | # get_client().schema.delete_class(class_name) 44 | 45 | 46 | # def update_class(class_name, class_obj): 47 | # get_client().schema.update_config(class_name, class_obj) 48 | 49 | 50 | counter = 0 51 | interval = 50 52 | 53 | def add_object(class_name, obj) -> None: 54 | global counter 55 | properties = { 56 | "knowledge": obj["statement"], 57 | "knowledge_num_token": obj["statement_num_token"] 58 | } 59 | 60 | # Convert the string representation of the vector to a list 61 | vector = [float(val) for val in obj["statement_embedding"].strip('[]').split(',')] 62 | # print("vector:...") 63 | # print(vector) 64 | 65 | get_client().batch.configure(batch_size=200) 66 | with get_client().batch as batch: 67 | batch.add_data_object( 68 | data_object=properties, 69 | class_name=class_name, 70 | vector=vector, 71 | uuid=generate_uuid5(obj["statement"]) 72 | ) 73 | 74 | counter += 1 75 | if counter % interval == 0: 76 | print(f'Imported {counter} objects...') 77 | 78 | 79 | def batch_load_csv(class_name, csv_filename): 80 | with pd.read_csv( 81 | csv_filename, 82 | usecols=["statement", "statement_embedding", "statement_num_token"], 83 | chunksize=100, 84 | ) as csv_iterator: 85 | for chunk in csv_iterator: 86 | for index, row in chunk.iterrows(): 87 | add_object(class_name, row) 88 | 89 | print(f'Finished importing {counter} objects.') 90 | 91 | 92 | def process_directory(class_name, dir_path=os.getcwd()): 93 | for filename in os.listdir(dir_path): 94 | if filename.endswith(".csv"): 95 | csv_filepath = os.path.join(dir_path, filename) 96 | batch_load_csv(class_name, csv_filepath) 97 | 98 | 99 | # def process_upload(upload_type): 100 | def upload(): 101 | # client = get_client() 102 | class_obj = read_json() 103 | class_name = class_obj["class"] 104 | # csv_filename = "knowledge.csv" 105 | # if upload_type == "csv": 106 | # batch_load_csv(class_name, csv_filename) 107 | process_directory( 108 | class_name, 109 | dir_path=input_dir 110 | ) 111 | 112 | 113 | def read_json(filename='config/class.json'): 114 | with open(filename, 'r') as file: 115 | data = json.load(file) 116 | return data 117 | 118 | 119 | # def main(): 120 | # # read the json file from the filesystem 121 | # class_obj = read_json() 122 | 123 | # class_name = class_obj['class'] 124 | 125 | # # check if the class already exists 126 | # existing_class = get_class(class_name) 127 | 128 | # print(existing_class) 129 | 130 | # # create_class(class_obj) 131 | # # process_upload() 132 | 133 | 134 | # if __name__ == '__main__': 135 | # main() -------------------------------------------------------------------------------- /test_data/OpenEnded_genes.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "question": "What is the name of the gene with symbol FCF1P11?", 4 | "answer": "FCF1 pseudogene 11" 5 | }, 6 | { 7 | "question": "What is the name of the gene with symbol YEATS2?", 8 | "answer": "YEATS domain containing 2" 9 | }, 10 | { 11 | "question": "What is the name of the gene with symbol PCDHA13?", 12 | "answer": "protocadherin alpha 13" 13 | }, 14 | { 15 | "question": "What is the name of the gene with symbol KRT8P42?", 16 | "answer": "keratin 8 pseudogene 42" 17 | }, 18 | { 19 | "question": "What is the name of the gene with symbol DHX38?", 20 | "answer": "DEAH-box helicase 38" 21 | }, 22 | { 23 | "question": "What is the name of the gene with symbol MRM3?", 24 | "answer": "mitochondrial rRNA methyltransferase 3" 25 | }, 26 | { 27 | "question": "What is the name of the gene with symbol NETO1?", 28 | "answer": "neuropilin and tolloid like 1" 29 | }, 30 | { 31 | "question": "What is the name of the gene with symbol CRADD-AS1?", 32 | "answer": "CRADD antisense RNA 1" 33 | }, 34 | { 35 | "question": "What is the name of the gene with symbol SCARB2?", 36 | "answer": "scavenger receptor class B member 2" 37 | }, 38 | { 39 | "question": "What is the name of the gene with symbol SOCS2P1?", 40 | "answer": "suppressor of cytokine signaling 2 pseudogene 1" 41 | }, 42 | { 43 | "question": "What is the gene symbol of gene peptidylprolyl isomerase H pseudogene 1?", 44 | "answer": "PPIHP1" 45 | }, 46 | { 47 | "question": "What is the gene symbol of gene heparan sulfate 6-O-sulfotransferase 2?", 48 | "answer": "HS6ST2" 49 | }, 50 | { 51 | "question": "What is the gene symbol of gene elongin C pseudogene 13?", 52 | "answer": "ELOCP13" 53 | }, 54 | { 55 | "question": "What is the gene symbol of gene Yes1 associated transcriptional regulator?", 56 | "answer": "YAP1" 57 | }, 58 | { 59 | "question": "What is the gene symbol of gene mitochondrial ribosomal protein L47?", 60 | "answer": "MRPL47" 61 | }, 62 | { 63 | "question": "What is the gene symbol of gene immunoglobulin lambda variable 3-15 (pseudogene)?", 64 | "answer": "IGLV3-15" 65 | }, 66 | { 67 | "question": "What is the gene symbol of gene ubiquitin conjugating enzyme E2 N pseudogene 1?", 68 | "answer": "UBE2NP1" 69 | }, 70 | { 71 | "question": "What is the gene symbol of gene tetrapeptide repeat homeobox like (pseudogene)?", 72 | "answer": "TPRXL" 73 | }, 74 | { 75 | "question": "What is the gene symbol of gene mucolipin TRP cation channel 1?", 76 | "answer": "MCOLN1" 77 | }, 78 | { 79 | "question": "What is the gene symbol of gene IQ motif containing H?", 80 | "answer": "IQCH" 81 | }, 82 | { 83 | "question": "What is the type of gene LOC126653360?", 84 | "answer": "biological-region gene" 85 | }, 86 | { 87 | "question": "What is the type of gene LOC127897247?", 88 | "answer": "biological-region gene" 89 | }, 90 | { 91 | "question": "What is the type of gene IL15RA?", 92 | "answer": "protein-coding gene" 93 | }, 94 | { 95 | "question": "What is the type of gene LOC127891619?", 96 | "answer": "biological-region gene" 97 | }, 98 | { 99 | "question": "What is the type of gene LOC127886254?", 100 | "answer": "biological-region gene" 101 | }, 102 | { 103 | "question": "What is the type of gene PABPC3?", 104 | "answer": "protein-coding gene" 105 | }, 106 | { 107 | "question": "What is the type of gene LOC127815850?", 108 | "answer": "biological-region gene" 109 | }, 110 | { 111 | "question": "What is the type of gene ARL2BPP9?", 112 | "answer": "pseudo gene" 113 | }, 114 | { 115 | "question": "What is the type of gene LOC101927664?", 116 | "answer": "ncRNA gene" 117 | }, 118 | { 119 | "question": "What is the type of gene LOC126807504?", 120 | "answer": "biological-region gene" 121 | } 122 | ] -------------------------------------------------------------------------------- /test_data/True_or_False_genes.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "question": "True or False Question: COA7 is the gene symbol of cytochrome c oxidase assembly factor 7", 4 | "answer": "True" 5 | }, 6 | { 7 | "question": "True or False Question: ZNF462 is the gene symbol of zinc finger protein 462", 8 | "answer": "True" 9 | }, 10 | { 11 | "question": "True or False Question: KCNK10 is the gene symbol of potassium two pore domain channel subfamily K member 10", 12 | "answer": "True" 13 | }, 14 | { 15 | "question": "True or False Question: MAGI2-AS3 is the gene symbol of MAGI2 antisense RNA 3", 16 | "answer": "True" 17 | }, 18 | { 19 | "question": "True or False Question: JKAMP is the gene symbol of JNK1/MAPK8 associated membrane protein", 20 | "answer": "True" 21 | }, 22 | { 23 | "question": "True or False Question: BAAT is the gene symbol of PRELI domain containing 2", 24 | "answer": "False" 25 | }, 26 | { 27 | "question": "True or False Question: PRELID2 is the gene symbol of RNA, 7SL, cytoplasmic 30, pseudogene", 28 | "answer": "False" 29 | }, 30 | { 31 | "question": "True or False Question: RN7SL30P is the gene symbol of inorganic pyrophosphatase 1", 32 | "answer": "False" 33 | }, 34 | { 35 | "question": "True or False Question: PPA1 is the gene symbol of interleukin 12 receptor subunit beta 2", 36 | "answer": "False" 37 | }, 38 | { 39 | "question": "True or False Question: IL12RB2 is the gene symbol of bile acid-CoA:amino acid N-acyltransferase", 40 | "answer": "False" 41 | }, 42 | { 43 | "question": "True or False Question: ADCP1 is a unknown gene", 44 | "answer": "True" 45 | }, 46 | { 47 | "question": "True or False Question: SLC9B1P1 is a pseudo gene", 48 | "answer": "True" 49 | }, 50 | { 51 | "question": "True or False Question: LOC107986921 is a ncRNA gene", 52 | "answer": "True" 53 | }, 54 | { 55 | "question": "True or False Question: LOC127817117 is a biological-region gene", 56 | "answer": "True" 57 | }, 58 | { 59 | "question": "True or False Question: LOC127403461 is a biological-region gene", 60 | "answer": "True" 61 | }, 62 | { 63 | "question": "True or False Question: LOC125338463 is a protein-coding gene", 64 | "answer": "False" 65 | }, 66 | { 67 | "question": "True or False Question: MYT1L-AS1 is a protein-coding gene", 68 | "answer": "False" 69 | }, 70 | { 71 | "question": "True or False Question: LOC127898507 is a protein-coding gene", 72 | "answer": "False" 73 | }, 74 | { 75 | "question": "True or False Question: LOC127826008 is a protein-coding gene", 76 | "answer": "False" 77 | }, 78 | { 79 | "question": "True or False Question: RNU6-988P is a protein-coding gene", 80 | "answer": "False" 81 | }, 82 | { 83 | "question": "True or False Question: CIBAR1P1 is not the gene symbol of cytochrome c oxidase subunit 5B pseudogene 7", 84 | "answer": "True" 85 | }, 86 | { 87 | "question": "True or False Question: COX5BP7 is not the gene symbol of RAD21 pseudogene 1", 88 | "answer": "True" 89 | }, 90 | { 91 | "question": "True or False Question: RAD21P1 is not the gene symbol of FKBP prolyl isomerase 4 pseudogene 2", 92 | "answer": "True" 93 | }, 94 | { 95 | "question": "True or False Question: FKBP4P2 is not the gene symbol of carnosine dipeptidase 1", 96 | "answer": "True" 97 | }, 98 | { 99 | "question": "True or False Question: CNDP1 is not the gene symbol of CIBAR1 pseudogene 1", 100 | "answer": "True" 101 | }, 102 | { 103 | "question": "True or False Question: GNG5 is not the gene symbol of G protein subunit gamma 5", 104 | "answer": "False" 105 | }, 106 | { 107 | "question": "True or False Question: LINC02405 is not the gene symbol of long intergenic non-protein coding RNA 2405", 108 | "answer": "False" 109 | }, 110 | { 111 | "question": "True or False Question: CHRNA2 is not the gene symbol of cholinergic receptor nicotinic alpha 2 subunit", 112 | "answer": "False" 113 | }, 114 | { 115 | "question": "True or False Question: CD46P1 is not the gene symbol of CD46 molecule pseudogene 1", 116 | "answer": "False" 117 | }, 118 | { 119 | "question": "True or False Question: FKTN is not the gene symbol of fukutin", 120 | "answer": "False" 121 | }, 122 | { 123 | "question": "True or False Question: MIRLET7A1HG is not a protein-coding gene", 124 | "answer": "True" 125 | }, 126 | { 127 | "question": "True or False Question: LOC127270765 is not a protein-coding gene", 128 | "answer": "True" 129 | }, 130 | { 131 | "question": "True or False Question: LOC129389367 is not a protein-coding gene", 132 | "answer": "True" 133 | }, 134 | { 135 | "question": "True or False Question: LOC127828818 is not a protein-coding gene", 136 | "answer": "True" 137 | }, 138 | { 139 | "question": "True or False Question: LOC127897139 is not a protein-coding gene", 140 | "answer": "True" 141 | }, 142 | { 143 | "question": "True or False Question: LOC127826911 is not a biological-region gene", 144 | "answer": "False" 145 | }, 146 | { 147 | "question": "True or False Question: LOC121392924 is not a biological-region gene", 148 | "answer": "False" 149 | }, 150 | { 151 | "question": "True or False Question: NBPF26 is not a protein-coding gene", 152 | "answer": "False" 153 | }, 154 | { 155 | "question": "True or False Question: LOC127885808 is not a biological-region gene", 156 | "answer": "False" 157 | }, 158 | { 159 | "question": "True or False Question: MIRLET7A2 is not a ncRNA gene", 160 | "answer": "False" 161 | } 162 | ] --------------------------------------------------------------------------------