├── apis ├── guardrails.log ├── .env ├── requirements.txt ├── output_files │ └── Nurses_and_Social_Workers_-_Certificated_Salary_Schedules_-_2022-23.pdf ├── app.py └── scripts │ └── generators.py ├── example.gif ├── frontend ├── public │ ├── robots.txt │ ├── favicon.ico │ ├── logo192.png │ ├── logo512.png │ ├── manifest.json │ └── index.html ├── src │ ├── index.js │ ├── App.js │ ├── index.css │ ├── App.css │ ├── components │ │ ├── fileUpload │ │ │ ├── fileUpload.css │ │ │ └── fileUpload.jsx │ │ └── dictTable │ │ │ └── dictTable.jsx │ └── pages │ │ └── Home.js ├── package.json └── README.md ├── LICENSE ├── README.md ├── logo.svg └── notebook └── example.ipynb /apis/guardrails.log: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /apis/.env: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY="{your API key here}" -------------------------------------------------------------------------------- /example.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightshifted/TablesGPT/HEAD/example.gif -------------------------------------------------------------------------------- /frontend/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /frontend/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightshifted/TablesGPT/HEAD/frontend/public/favicon.ico -------------------------------------------------------------------------------- /frontend/public/logo192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightshifted/TablesGPT/HEAD/frontend/public/logo192.png -------------------------------------------------------------------------------- /frontend/public/logo512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightshifted/TablesGPT/HEAD/frontend/public/logo512.png -------------------------------------------------------------------------------- /apis/requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | llama-index 3 | PyPDF2 4 | guardrails-ai 5 | langchain 6 | kor 7 | Werkzeug==2.2.2 8 | flask 9 | python-dotenv 10 | jsonify -------------------------------------------------------------------------------- /apis/output_files/Nurses_and_Social_Workers_-_Certificated_Salary_Schedules_-_2022-23.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightshifted/TablesGPT/HEAD/apis/output_files/Nurses_and_Social_Workers_-_Certificated_Salary_Schedules_-_2022-23.pdf -------------------------------------------------------------------------------- /frontend/src/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom/client'; 3 | import './index.css'; 4 | import App from './App'; 5 | 6 | const root = ReactDOM.createRoot(document.getElementById('root')); 7 | root.render( 8 | 9 | 10 | 11 | ); 12 | 13 | 14 | -------------------------------------------------------------------------------- /frontend/src/App.js: -------------------------------------------------------------------------------- 1 | import Home from './pages/Home'; 2 | import { BrowserRouter as Router, Route, Switch } from 'react-router-dom'; 3 | import './App.css'; 4 | 5 | 6 | function App() { 7 | return ( 8 | 9 | 10 | 11 | 12 | 13 | ); 14 | } 15 | 16 | export default App; 17 | -------------------------------------------------------------------------------- /frontend/src/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0; 3 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 4 | 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', 5 | sans-serif; 6 | -webkit-font-smoothing: antialiased; 7 | -moz-osx-font-smoothing: grayscale; 8 | } 9 | 10 | code { 11 | font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', 12 | monospace; 13 | } 14 | -------------------------------------------------------------------------------- /frontend/src/App.css: -------------------------------------------------------------------------------- 1 | .App { 2 | text-align: center; 3 | } 4 | 5 | .App-header { 6 | background-color: #282c34; 7 | min-height: 100vh; 8 | display: flex; 9 | flex-direction: column; 10 | align-items: center; 11 | justify-content: center; 12 | font-size: calc(10px + 2vmin); 13 | color: white; 14 | } 15 | 16 | .App-link { 17 | color: #61dafb; 18 | } 19 | 20 | @keyframes App-header-spin { 21 | from { 22 | transform: rotate(0deg); 23 | } 24 | to { 25 | transform: rotate(360deg); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /frontend/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | }, 10 | { 11 | "src": "logo192.png", 12 | "type": "image/png", 13 | "sizes": "192x192" 14 | }, 15 | { 16 | "src": "logo512.png", 17 | "type": "image/png", 18 | "sizes": "512x512" 19 | } 20 | ], 21 | "start_url": ".", 22 | "display": "standalone", 23 | "theme_color": "#000000", 24 | "background_color": "#ffffff" 25 | } 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 lightshifted 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pdftotable", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "@emotion/react": "^11.10.6", 7 | "@emotion/styled": "^11.10.6", 8 | "@material-ui/core": "^4.12.4", 9 | "@mui/material": "^5.11.16", 10 | "@testing-library/jest-dom": "^5.16.5", 11 | "@testing-library/react": "^13.4.0", 12 | "@testing-library/user-event": "^13.5.0", 13 | "react": "^18.2.0", 14 | "react-data-table-component": "^7.5.3", 15 | "react-dom": "^18.2.0", 16 | "react-pdf": "^6.2.2", 17 | "react-router": "^5.2.1", 18 | "react-router-dom": "^5.3.0", 19 | "react-scripts": "5.0.1", 20 | "styled-components": "^5.3.9", 21 | "web-vitals": "^2.1.4" 22 | }, 23 | "scripts": { 24 | "start": "react-scripts start", 25 | "build": "react-scripts build", 26 | "test": "react-scripts test", 27 | "eject": "react-scripts eject" 28 | }, 29 | "eslintConfig": { 30 | "extends": [ 31 | "react-app", 32 | "react-app/jest" 33 | ] 34 | }, 35 | "browserslist": { 36 | "production": [ 37 | ">0.2%", 38 | "not dead", 39 | "not op_mini all" 40 | ], 41 | "development": [ 42 | "last 1 chrome version", 43 | "last 1 firefox version", 44 | "last 1 safari version" 45 | ] 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /frontend/src/components/fileUpload/fileUpload.css: -------------------------------------------------------------------------------- 1 | .file-upload-container { 2 | width: 400px; 3 | height: 250px; 4 | border: 2px dashed #b7b7b7; 5 | border-radius: 10px; 6 | display: flex; 7 | justify-content: center; 8 | align-items: center; 9 | transition: border 0.2s ease-in-out; 10 | } 11 | 12 | .file-upload-container.dragging { 13 | border: 2px dashed #2196f3; 14 | } 15 | 16 | .file-upload-message { 17 | text-align: center; 18 | } 19 | 20 | .drag-drop-message { 21 | margin: 0; 22 | font-size: 20px; 23 | font-weight: bold; 24 | } 25 | 26 | .or-message { 27 | margin: 10px 0; 28 | font-size: 16px; 29 | } 30 | 31 | .browse-link { 32 | display: inline-block; 33 | padding: 8px 16px; 34 | border: 2px solid #2196f3; 35 | border-radius: 5px; 36 | color: #2196f3; 37 | text-decoration: none; 38 | transition: background-color 0.2s ease-in-out, color 0.2s ease-in-out; 39 | cursor: pointer; 40 | } 41 | 42 | .browse-link:hover { 43 | background-color: #2196f3; 44 | color: #fff; 45 | } 46 | 47 | .file-name { 48 | margin: 5px; 49 | font-size: 20px; 50 | font-weight: bold; 51 | } 52 | 53 | .submit-button { 54 | display:inline-block; 55 | margin-top: 10px; 56 | padding: 8px 16px; 57 | border: 2px solid #8BC34A; 58 | border-radius: 5px; 59 | color: #8BC34A; 60 | font-size: 16px; 61 | background-color: #fff; 62 | text-decoration: none; 63 | transition: background-color 0.2s ease-in-out, color 0.2s ease-in-out; 64 | cursor: pointer; 65 | } 66 | 67 | .submit-button:hover { 68 | background-color: #8BC34A; 69 | color: #fff; 70 | } -------------------------------------------------------------------------------- /frontend/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 | 13 | 17 | 18 | 27 | React App 28 | 29 | 30 | 31 |
32 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /frontend/src/components/dictTable/dictTable.jsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect } from 'react'; 2 | import DataTable from 'react-data-table-component'; 3 | 4 | async function fetchData(setTableData, setIsLoading, setError) { 5 | try { 6 | const response = await fetch('http://127.0.0.1:5000/get_json'); 7 | if (!response.ok) { 8 | throw new Error('Network response was not ok'); 9 | } 10 | const data = await response.json(); 11 | setTableData(data); 12 | } catch (error) { 13 | console.error(error); 14 | setError(error); 15 | } finally { 16 | setIsLoading(false); 17 | } 18 | } 19 | 20 | function DictTable({ data: initialData, file }) { 21 | const [tableData, setTableData] = useState([]); 22 | const [isLoading, setIsLoading] = useState(true); 23 | const [error, setError] = useState(null); 24 | 25 | useEffect(() => { 26 | fetchData(setTableData, setIsLoading, setError); 27 | }, []); 28 | 29 | const data = tableData.length > 0 ? tableData : initialData; 30 | const headers = tableData.length > 0 ? Object.keys(tableData[0]) : []; 31 | const columns = headers.map((header) => ({ 32 | name: header, 33 | selector: header, 34 | sortable: true, 35 | })); 36 | 37 | if (isLoading) { 38 | return
Loading...
; 39 | } 40 | 41 | if (error) { 42 | return
Error: {error.message}
; 43 | } 44 | 45 | return ( 46 | <> 47 |
53 | {file.name} 54 |
55 | 64 | 65 | ); 66 | } 67 | 68 | export default DictTable; 69 | -------------------------------------------------------------------------------- /apis/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | from flask import( 3 | Flask, 4 | request, 5 | render_template, 6 | redirect, 7 | send_from_directory, 8 | jsonify 9 | ) 10 | from flask_cors import cross_origin 11 | from werkzeug.utils import secure_filename 12 | import shutil 13 | import dotenv 14 | import jsonify 15 | 16 | import json 17 | import openai 18 | import glob 19 | import sys 20 | 21 | sys.path.append("./scripts") 22 | from generators import llm_json_generator 23 | 24 | 25 | # load environment variables 26 | try: 27 | dotenv.load_dotenv() 28 | except Exception as e: 29 | print(f"An error occured while loading environment variables: {e}") 30 | 31 | # check if OpenAI API key is set 32 | if os.getenv("OPENAI_API_KEY") is not None: 33 | api_key_set = True 34 | else: 35 | api_key_set = False 36 | 37 | # routes 38 | app = Flask(__name__) 39 | 40 | @app.route('/upload', methods=['POST']) 41 | @cross_origin() 42 | def upload_file(): 43 | 44 | # empty output_files directory 45 | shutil.rmtree('output_files') 46 | os.mkdir('output_files') 47 | 48 | if 'file' not in request.files: 49 | return jsonify({'error': 'No file part in the request'}), 400 50 | 51 | file = request.files['file'] 52 | if file.filename == '': 53 | return jsonify({'error': 'No file selected for uploading'}), 400 54 | 55 | if file and file.filename.endswith('.pdf'): 56 | filename = secure_filename(file.filename) 57 | file_path = os.path.join('output_files', filename) 58 | file.save(file_path) 59 | 60 | output_list = llm_json_generator() 61 | 62 | with open('./output_files/example.json', 'w') as file: 63 | file.write(json.dumps(output_list)) 64 | 65 | return "200" 66 | 67 | return jsonify({'error': 'Invalid file type, only PDF files are allowed'}), 400 68 | 69 | 70 | @app.route('/get_json', methods=['GET']) 71 | @cross_origin() 72 | def get_json(): 73 | with open('./output_files/example.json', 'r') as file: 74 | return file.read() 75 | 76 | 77 | if __name__ == "__main__": 78 | port = int(os.environ.get("PORT", 5000)) 79 | app.run(debug=True, host='0.0.0.0', port=port) 80 | -------------------------------------------------------------------------------- /frontend/src/pages/Home.js: -------------------------------------------------------------------------------- 1 | import FileUpload from '../components/fileUpload/fileUpload'; 2 | import CssBaseline from '@mui/material/CssBaseline'; 3 | import Grid from '@mui/material/Grid'; 4 | import Paper from '@mui/material/Paper'; 5 | import Box from '@mui/material/Box'; 6 | import { createTheme, ThemeProvider } from '@mui/material/styles'; 7 | 8 | const theme = createTheme(); 9 | 10 | function Home() { 11 | 12 | return ( 13 | 14 | 15 | 16 | 28 | t.palette.mode === 'light' ? t.palette.grey[50] : t.palette.grey[900], 29 | backgroundSize: 'cover', 30 | backgroundPosition: 'center', 31 | }} 32 | /> 33 | 34 | 41 | 42 | 43 | 44 | 45 | 46 | ); 47 | } 48 | 49 | export default Home; 50 | -------------------------------------------------------------------------------- /frontend/src/components/fileUpload/fileUpload.jsx: -------------------------------------------------------------------------------- 1 | import React, { useState } from 'react'; 2 | import './fileUpload.css'; 3 | import DictTable from '../dictTable/dictTable'; 4 | 5 | const FileUpload = () => { 6 | const [dragging, setDragging] = useState(false); 7 | const [file, setFile] = useState(null); 8 | const [showDictTable, setShowDictTable] = useState(false); 9 | const [errorMessage, setErrorMessage] = useState(null); 10 | const [isLoading, setIsLoading] = useState(false); 11 | 12 | const handleDragEnter = (e) => { 13 | e.preventDefault(); 14 | setDragging(true); 15 | }; 16 | 17 | const handleDragLeave = (e) => { 18 | e.preventDefault(); 19 | setDragging(false); 20 | }; 21 | 22 | const handleDrop = (e) => { 23 | e.preventDefault(); 24 | setDragging(false); 25 | const file = e.dataTransfer.files[0]; 26 | setFile(file) 27 | }; 28 | 29 | const handleSubmit = (event) => { 30 | event.preventDefault(); 31 | setIsLoading(true); 32 | const formData = new FormData(); 33 | formData.append('file', file); 34 | fetch("http://127.0.0.1:5000/upload", { 35 | method: "POST", 36 | body: formData, 37 | }) 38 | .then((response) => response.json()) 39 | .then((data) => { 40 | console.log(data); 41 | setShowDictTable(true); 42 | setErrorMessage(null); // Reset the error message 43 | setIsLoading(false); 44 | }) 45 | .catch((error) => { 46 | console.error(error); 47 | setErrorMessage(error.message); 48 | setIsLoading(false); 49 | }); 50 | }; 51 | 52 | return ( 53 |
54 | {!showDictTable && ( 55 |
62 | 63 |
64 | {file ? ( 65 | <> 66 |

{file.name}

67 | 68 | {isLoading &&

Loading table data...

} 69 | {errorMessage &&

{errorMessage}

} 70 | 71 | ) : ( 72 | <> 73 |

Drag and drop your file here

74 |

or

75 | 78 | setFile(e.target.files[0])} 82 | style={{ display: 'none' }} 83 | /> 84 | 85 | )} 86 |
87 |
88 | )} 89 |
90 | {showDictTable && } 91 |
92 |
93 | ); 94 | }; 95 | 96 | export default FileUpload; 97 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | TablesGPT 3 |
TablesGPT 4 |

5 | 6 |
7 | 8 | [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](code_of_conduct.md) 9 | 10 |
11 | 12 |

TablesGPT is a powerful and versatile open-source application that allows users to extract, organize, analyze, and visualize tabular data from files using natural language commands. 13 |

14 | 15 |
16 | 17 | 18 | Data Extraction 19 |
20 | 21 | ## Features Roadmap 22 | 23 | 24 | ### Extract 25 | 26 | - [x] PDF Table Extraction: Enable users to efficiently extract structured tabular data from PDF documents. The implementation will be designed to handle varying table formats, with the capability to accurately identify and parse data from both simple and complex table structures.
27 | 28 | ### Export 29 | - [ ] SQL Query Export: Provide users with the ability to generate SQL queries tailored for inserting the generated JSON data into their preferred database system. 30 | 31 | ## Getting Started 32 | ### Setting your OpenAI API Key 33 | 1️⃣ Navigate to the `apis` directory 34 | 35 | 2️⃣ Update the `.env` file with your OpenAI API key. 36 | 37 | ### Starting the React Frontend 38 | 1️⃣ Navigate to the `frontend` directory: 39 | ```bash 40 | $ cd frontend 41 | ``` 42 | 2️⃣ Install the required dependencies (if you haven't already): 43 | ```bash 44 | $ npm install 45 | ``` 46 | 3️⃣Start the React frontend: 47 | ```bash 48 | $ npm start 49 | ``` 50 | 51 | ### Starting the Flask API 52 | 1️⃣ Navigate to the `apis` directory: 53 | ```bash 54 | $ cd apis 55 | ``` 56 | 2️⃣ Install the required dependencies (if you haven't already): 57 | ```bash 58 | $ python -m pip install -r requirements.txt 59 | ``` 60 | 3️⃣ Run the Flask API server: 61 | ```bash 62 | $ python app.py 63 | ``` 64 | 65 | ### Interested in Contributing? 66 | We welcome and appreciate any contributions to our community-driven project. Our goals are to enhance the accessibility and usability of tabular data manipulation, and to make it significantly easier for users with limited technical expertise to interact with and analyze complex datasets. 🚀 67 | 68 | #### Types of Contributions 69 | Discussions, bug reports, issues, and pull requests are all welcome. If you're interested in contributing through pull requests, please follow the guidelines below. 70 | 71 | #### Pull Request Guidelines 72 | 73 | * Review [Issues](https://github.com/lightshifted/TablesGPT/issues) and [Pull Requests](https://github.com/lightshifted/TablesGPT/pulls) before submitting your own. 74 | * Fork the repository and create your branch from the master branch. 75 | * Make sure your code is well-documented, easy to understand, and readable. 76 | * Provide a clear and concise description of the changes made in the pull request. 77 | * Ensure that your pull request has a descriptive title and a reference to the relevant issue(s). 78 | * Be responsive and open to feedback and suggestions from the community. 79 | * Please be patient, as we have a small team at the moment and only one person reviewing the pull requests. 80 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | # Getting Started with Create React App 2 | 3 | This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app). 4 | 5 | ## Available Scripts 6 | 7 | In the project directory, you can run: 8 | 9 | ### `npm start` 10 | 11 | Runs the app in the development mode.\ 12 | Open [http://localhost:3000](http://localhost:3000) to view it in your browser. 13 | 14 | The page will reload when you make changes.\ 15 | You may also see any lint errors in the console. 16 | 17 | ### `npm test` 18 | 19 | Launches the test runner in the interactive watch mode.\ 20 | See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information. 21 | 22 | ### `npm run build` 23 | 24 | Builds the app for production to the `build` folder.\ 25 | It correctly bundles React in production mode and optimizes the build for the best performance. 26 | 27 | The build is minified and the filenames include the hashes.\ 28 | Your app is ready to be deployed! 29 | 30 | See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information. 31 | 32 | ### `npm run eject` 33 | 34 | **Note: this is a one-way operation. Once you `eject`, you can't go back!** 35 | 36 | If you aren't satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project. 37 | 38 | Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you're on your own. 39 | 40 | You don't have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn't feel obligated to use this feature. However we understand that this tool wouldn't be useful if you couldn't customize it when you are ready for it. 41 | 42 | ## Learn More 43 | 44 | You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started). 45 | 46 | To learn React, check out the [React documentation](https://reactjs.org/). 47 | 48 | ### Code Splitting 49 | 50 | This section has moved here: [https://facebook.github.io/create-react-app/docs/code-splitting](https://facebook.github.io/create-react-app/docs/code-splitting) 51 | 52 | ### Analyzing the Bundle Size 53 | 54 | This section has moved here: [https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size](https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size) 55 | 56 | ### Making a Progressive Web App 57 | 58 | This section has moved here: [https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app](https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app) 59 | 60 | ### Advanced Configuration 61 | 62 | This section has moved here: [https://facebook.github.io/create-react-app/docs/advanced-configuration](https://facebook.github.io/create-react-app/docs/advanced-configuration) 63 | 64 | ### Deployment 65 | 66 | This section has moved here: [https://facebook.github.io/create-react-app/docs/deployment](https://facebook.github.io/create-react-app/docs/deployment) 67 | 68 | ### `npm run build` fails to minify 69 | 70 | This section has moved here: [https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify](https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify) 71 | -------------------------------------------------------------------------------- /logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /apis/scripts/generators.py: -------------------------------------------------------------------------------- 1 | from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader 2 | from llama_index.output_parsers import GuardrailsOutputParser 3 | from llama_index.llm_predictor import StructuredLLMPredictor 4 | from llama_index.prompts.prompts import QuestionAnswerPrompt, RefinePrompt 5 | from llama_index.prompts.default_prompts import DEFAULT_TEXT_QA_PROMPT_TMPL, DEFAULT_REFINE_PROMPT_TMPL 6 | import json 7 | import dotenv 8 | 9 | 10 | # take environment variable from .env 11 | try: 12 | dotenv.load_dotenv() 13 | except Exception as e: 14 | print(f"Error loading the .env file: {e}") 15 | 16 | def create_llm_predictor(model: str="gpt-3.5-turbo"): 17 | return StructuredLLMPredictor(llm=model) 18 | 19 | def load_documents(input_dir='./output_files'): 20 | return SimpleDirectoryReader(input_dir=input_dir).load_data() 21 | 22 | def create_index(documents): 23 | return GPTSimpleVectorIndex.from_documents(documents) 24 | 25 | def create_output_parser(rail_spec, llm): 26 | return GuardrailsOutputParser.from_rail_string(rail_spec, llm=llm) 27 | 28 | def create_qa_prompt(fmt_qa_tmpl, output_parser): 29 | return QuestionAnswerPrompt(fmt_qa_tmpl, output_parser=output_parser) 30 | 31 | def create_refine_prompt(fmt_refine_tmpl, output_parser): 32 | return RefinePrompt(fmt_refine_tmpl, output_parser=output_parser) 33 | 34 | def llm_index_generator( 35 | prompt="What are the values in column 'Pay Scale Area'?", 36 | llm_predictor=None, 37 | documents=None, 38 | index=None, 39 | model="gpt-4", 40 | ): 41 | if not llm_predictor: 42 | llm_predictor = create_llm_predictor(model=model) 43 | if not documents: 44 | documents = load_documents() 45 | if not index: 46 | index = create_index(documents) 47 | 48 | rail_spec = (""" 49 | 50 | 51 | 52 | 53 | 54 | Return your response as a string. 55 | 56 | @xml_prefix_prompt 57 | 58 | {output_schema} 59 | 60 | @json_suffix_prompt_v2_wo_none 61 | 62 | 63 | """) 64 | 65 | output_parser = create_output_parser(rail_spec, llm_predictor.llm) 66 | 67 | fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL) 68 | fmt_refine_tmpl = output_parser.format(DEFAULT_REFINE_PROMPT_TMPL) 69 | 70 | qa_prompt = create_qa_prompt(fmt_qa_tmpl, output_parser) 71 | refine_prompt = create_refine_prompt(fmt_refine_tmpl, output_parser) 72 | 73 | response = index.query( 74 | prompt, 75 | text_qa_template=qa_prompt, 76 | refine_template=refine_prompt, 77 | ) 78 | 79 | print(response.response) 80 | 81 | return json.loads(response.response)['step_names'] 82 | 83 | 84 | def llm_json_generator(prompt="What is the salary schedule in this document?", llm_predictor=None, documents=None, index=None): 85 | if not llm_predictor: 86 | llm_predictor = create_llm_predictor() 87 | if not documents: 88 | documents = load_documents() 89 | if not index: 90 | index = create_index(documents) 91 | 92 | print(documents) 93 | 94 | output_list = [] 95 | 96 | rail_spec = (""" 97 | 98 | 99 | 100 | 101 | 102 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | Return all numbers in your response in the Float format. Remove the commas from the numeric values since commas are not allowed in JSON numbers. 111 | @xml_prefix_prompt 112 | 113 | {output_schema} 114 | 115 | @json_suffix_prompt_v2_wo_none 116 | 117 | 118 | """) 119 | 120 | step_names = llm_index_generator(llm_predictor=llm_predictor, documents=documents, index=index) 121 | 122 | for area in step_names: 123 | formatted_spec = rail_spec.format(area=area, output_schema="{output_schema}") 124 | output_parser = create_output_parser(formatted_spec, llm_predictor.llm) 125 | 126 | fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL) 127 | fmt_refine_tmpl = output_parser.format(DEFAULT_REFINE_PROMPT_TMPL) 128 | 129 | qa_prompt = create_qa_prompt(fmt_qa_tmpl, output_parser) 130 | refine_prompt = create_refine_prompt(fmt_refine_tmpl, output_parser) 131 | 132 | response = index.query( 133 | prompt, 134 | text_qa_template=qa_prompt, 135 | refine_template=refine_prompt, 136 | ) 137 | 138 | try: 139 | output_list.append( 140 | json.loads(response.response)['salary_schedule'] 141 | ) 142 | 143 | except json.JSONDecodeError as e: 144 | print(f"JSONDecodeError: {e}") 145 | output_list.append(response.response) 146 | 147 | 148 | return output_list 149 | -------------------------------------------------------------------------------- /notebook/example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "colab": { 8 | "base_uri": "https://localhost:8080/" 9 | }, 10 | "id": "tZqCAfn6L5ib", 11 | "outputId": "ee1188a0-9111-40f5-cad2-4e793f8557b5" 12 | }, 13 | "outputs": [ 14 | { 15 | "name": "stdout", 16 | "output_type": "stream", 17 | "text": [ 18 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 19 | "Collecting openai\n", 20 | " Downloading openai-0.27.4-py3-none-any.whl (70 kB)\n", 21 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.3/70.3 KB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 22 | "\u001b[?25hRequirement already satisfied: requests>=2.20 in /usr/local/lib/python3.9/dist-packages (from openai) (2.27.1)\n", 23 | "Collecting aiohttp\n", 24 | " Downloading aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\n", 25 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 26 | "\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from openai) (4.65.0)\n", 27 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests>=2.20->openai) (2022.12.7)\n", 28 | "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests>=2.20->openai) (1.26.15)\n", 29 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests>=2.20->openai) (3.4)\n", 30 | "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests>=2.20->openai) (2.0.12)\n", 31 | "Collecting yarl<2.0,>=1.0\n", 32 | " Downloading yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (264 kB)\n", 33 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m264.6/264.6 KB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 34 | "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai) (22.2.0)\n", 35 | "Collecting async-timeout<5.0,>=4.0.0a3\n", 36 | " Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n", 37 | "Collecting aiosignal>=1.1.2\n", 38 | " Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n", 39 | "Collecting multidict<7.0,>=4.5\n", 40 | " Downloading multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n", 41 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.2/114.2 KB\u001b[0m \u001b[31m14.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 42 | "\u001b[?25hCollecting frozenlist>=1.1.1\n", 43 | " Downloading frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (158 kB)\n", 44 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m158.8/158.8 KB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 45 | "\u001b[?25hInstalling collected packages: multidict, frozenlist, async-timeout, yarl, aiosignal, aiohttp, openai\n", 46 | "Successfully installed aiohttp-3.8.4 aiosignal-1.3.1 async-timeout-4.0.2 frozenlist-1.3.3 multidict-6.0.4 openai-0.27.4 yarl-1.8.2\n", 47 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 48 | "Collecting llama-index\n", 49 | " Downloading llama_index-0.5.9.tar.gz (166 kB)\n", 50 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.2/166.2 KB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 51 | "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 52 | "Collecting dataclasses_json\n", 53 | " Downloading dataclasses_json-0.5.7-py3-none-any.whl (25 kB)\n", 54 | "Collecting langchain\n", 55 | " Downloading langchain-0.0.134-py3-none-any.whl (510 kB)\n", 56 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.7/510.7 KB\u001b[0m \u001b[31m22.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 57 | "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.9/dist-packages (from llama-index) (1.22.4)\n", 58 | "Requirement already satisfied: tenacity<9.0.0,>=8.2.0 in /usr/local/lib/python3.9/dist-packages (from llama-index) (8.2.2)\n", 59 | "Requirement already satisfied: openai>=0.26.4 in /usr/local/lib/python3.9/dist-packages (from llama-index) (0.27.4)\n", 60 | "Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (from llama-index) (1.4.4)\n", 61 | "Collecting tiktoken\n", 62 | " Downloading tiktoken-0.3.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n", 63 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m53.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 64 | "\u001b[?25hRequirement already satisfied: requests>=2.20 in /usr/local/lib/python3.9/dist-packages (from openai>=0.26.4->llama-index) (2.27.1)\n", 65 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from openai>=0.26.4->llama-index) (4.65.0)\n", 66 | "Requirement already satisfied: aiohttp in /usr/local/lib/python3.9/dist-packages (from openai>=0.26.4->llama-index) (3.8.4)\n", 67 | "Collecting marshmallow-enum<2.0.0,>=1.5.1\n", 68 | " Downloading marshmallow_enum-1.5.1-py2.py3-none-any.whl (4.2 kB)\n", 69 | "Collecting marshmallow<4.0.0,>=3.3.0\n", 70 | " Downloading marshmallow-3.19.0-py3-none-any.whl (49 kB)\n", 71 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.1/49.1 KB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 72 | "\u001b[?25hCollecting typing-inspect>=0.4.0\n", 73 | " Downloading typing_inspect-0.8.0-py3-none-any.whl (8.7 kB)\n", 74 | "Requirement already satisfied: pydantic<2,>=1 in /usr/local/lib/python3.9/dist-packages (from langchain->llama-index) (1.10.7)\n", 75 | "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.9/dist-packages (from langchain->llama-index) (4.0.2)\n", 76 | "Requirement already satisfied: PyYAML>=5.4.1 in /usr/local/lib/python3.9/dist-packages (from langchain->llama-index) (6.0)\n", 77 | "Requirement already satisfied: SQLAlchemy<2,>=1 in /usr/local/lib/python3.9/dist-packages (from langchain->llama-index) (1.4.47)\n", 78 | "Collecting openapi-schema-pydantic<2.0,>=1.2\n", 79 | " Downloading openapi_schema_pydantic-1.2.4-py3-none-any.whl (90 kB)\n", 80 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m90.0/90.0 KB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 81 | "\u001b[?25hRequirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas->llama-index) (2022.7.1)\n", 82 | "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas->llama-index) (2.8.2)\n", 83 | "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.9/dist-packages (from tiktoken->llama-index) (2022.10.31)\n", 84 | "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai>=0.26.4->llama-index) (2.0.12)\n", 85 | "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai>=0.26.4->llama-index) (1.3.3)\n", 86 | "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai>=0.26.4->llama-index) (1.8.2)\n", 87 | "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai>=0.26.4->llama-index) (1.3.1)\n", 88 | "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai>=0.26.4->llama-index) (6.0.4)\n", 89 | "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai>=0.26.4->llama-index) (22.2.0)\n", 90 | "Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.9/dist-packages (from marshmallow<4.0.0,>=3.3.0->dataclasses_json->llama-index) (23.0)\n", 91 | "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from pydantic<2,>=1->langchain->llama-index) (4.5.0)\n", 92 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas->llama-index) (1.16.0)\n", 93 | "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests>=2.20->openai>=0.26.4->llama-index) (1.26.15)\n", 94 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests>=2.20->openai>=0.26.4->llama-index) (2022.12.7)\n", 95 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests>=2.20->openai>=0.26.4->llama-index) (3.4)\n", 96 | "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.9/dist-packages (from SQLAlchemy<2,>=1->langchain->llama-index) (2.0.2)\n", 97 | "Collecting mypy-extensions>=0.3.0\n", 98 | " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", 99 | "Building wheels for collected packages: llama-index\n", 100 | " Building wheel for llama-index (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 101 | " Created wheel for llama-index: filename=llama_index-0.5.9-py3-none-any.whl size=248723 sha256=522deb04a157adfec1e610705ff9a3120f7fa537b86590b750f557fd5a7b47f7\n", 102 | " Stored in directory: /root/.cache/pip/wheels/8d/85/5d/cb632e565ac6b0394b211bf311b3819fb96b4640cdf3cf172f\n", 103 | "Successfully built llama-index\n", 104 | "Installing collected packages: mypy-extensions, marshmallow, typing-inspect, tiktoken, openapi-schema-pydantic, marshmallow-enum, dataclasses_json, langchain, llama-index\n", 105 | "Successfully installed dataclasses_json-0.5.7 langchain-0.0.134 llama-index-0.5.9 marshmallow-3.19.0 marshmallow-enum-1.5.1 mypy-extensions-1.0.0 openapi-schema-pydantic-1.2.4 tiktoken-0.3.3 typing-inspect-0.8.0\n", 106 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 107 | "Collecting PyPDF2\n", 108 | " Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)\n", 109 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m232.6/232.6 KB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 110 | "\u001b[?25hRequirement already satisfied: typing_extensions>=3.10.0.0 in /usr/local/lib/python3.9/dist-packages (from PyPDF2) (4.5.0)\n", 111 | "Installing collected packages: PyPDF2\n", 112 | "Successfully installed PyPDF2-3.0.1\n", 113 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 114 | "Collecting guardrails-ai\n", 115 | " Downloading guardrails_ai-0.1.5-py2.py3-none-any.whl (46 kB)\n", 116 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.1/46.1 KB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 117 | "\u001b[?25hRequirement already satisfied: openai in /usr/local/lib/python3.9/dist-packages (from guardrails-ai) (0.27.4)\n", 118 | "Requirement already satisfied: pydantic in /usr/local/lib/python3.9/dist-packages (from guardrails-ai) (1.10.7)\n", 119 | "Requirement already satisfied: lxml in /usr/local/lib/python3.9/dist-packages (from guardrails-ai) (4.9.2)\n", 120 | "Requirement already satisfied: rich in /usr/local/lib/python3.9/dist-packages (from guardrails-ai) (13.3.3)\n", 121 | "Collecting eliot\n", 122 | " Downloading eliot-1.14.0-py2.py3-none-any.whl (114 kB)\n", 123 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.8/114.8 KB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 124 | "\u001b[?25hCollecting eliot-tree\n", 125 | " Downloading eliot_tree-21.0.0-py3-none-any.whl (40 kB)\n", 126 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.1/40.1 KB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 127 | "\u001b[?25hRequirement already satisfied: pyrsistent>=0.11.8 in /usr/local/lib/python3.9/dist-packages (from eliot->guardrails-ai) (0.19.3)\n", 128 | "Collecting boltons>=19.0.1\n", 129 | " Downloading boltons-23.0.0-py2.py3-none-any.whl (194 kB)\n", 130 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 KB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 131 | "\u001b[?25hCollecting zope.interface\n", 132 | " Downloading zope.interface-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (246 kB)\n", 133 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m246.1/246.1 KB\u001b[0m \u001b[31m26.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 134 | "\u001b[?25hRequirement already satisfied: six in /usr/local/lib/python3.9/dist-packages (from eliot->guardrails-ai) (1.16.0)\n", 135 | "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.9/dist-packages (from eliot-tree->guardrails-ai) (0.12.0)\n", 136 | "Collecting colored>=1.4.2\n", 137 | " Downloading colored-1.4.4.tar.gz (36 kB)\n", 138 | " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 139 | "Collecting jmespath>=0.7.1\n", 140 | " Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n", 141 | "Collecting iso8601>=0.1.10\n", 142 | " Downloading iso8601-1.1.0-py3-none-any.whl (9.9 kB)\n", 143 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from openai->guardrails-ai) (4.65.0)\n", 144 | "Requirement already satisfied: aiohttp in /usr/local/lib/python3.9/dist-packages (from openai->guardrails-ai) (3.8.4)\n", 145 | "Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.9/dist-packages (from openai->guardrails-ai) (2.27.1)\n", 146 | "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from pydantic->guardrails-ai) (4.5.0)\n", 147 | "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.9/dist-packages (from rich->guardrails-ai) (2.14.0)\n", 148 | "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /usr/local/lib/python3.9/dist-packages (from rich->guardrails-ai) (2.2.0)\n", 149 | "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.9/dist-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->guardrails-ai) (0.1.2)\n", 150 | "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests>=2.20->openai->guardrails-ai) (2.0.12)\n", 151 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests>=2.20->openai->guardrails-ai) (2022.12.7)\n", 152 | "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests>=2.20->openai->guardrails-ai) (1.26.15)\n", 153 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests>=2.20->openai->guardrails-ai) (3.4)\n", 154 | "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai->guardrails-ai) (1.8.2)\n", 155 | "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai->guardrails-ai) (22.2.0)\n", 156 | "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai->guardrails-ai) (6.0.4)\n", 157 | "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai->guardrails-ai) (4.0.2)\n", 158 | "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai->guardrails-ai) (1.3.3)\n", 159 | "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai->guardrails-ai) (1.3.1)\n", 160 | "Requirement already satisfied: setuptools in /usr/local/lib/python3.9/dist-packages (from zope.interface->eliot->guardrails-ai) (67.6.1)\n", 161 | "Building wheels for collected packages: colored\n", 162 | " Building wheel for colored (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 163 | " Created wheel for colored: filename=colored-1.4.4-py3-none-any.whl size=14268 sha256=13115acc1933e01bec33b7a0d5fe7014dea513fb4df88626954643358f293c77\n", 164 | " Stored in directory: /root/.cache/pip/wheels/7a/f5/27/54cfa98930f018369067d8d02e508e053b1fec3704c258916b\n", 165 | "Successfully built colored\n", 166 | "Installing collected packages: colored, boltons, zope.interface, jmespath, iso8601, eliot, eliot-tree, guardrails-ai\n", 167 | "Successfully installed boltons-23.0.0 colored-1.4.4 eliot-1.14.0 eliot-tree-21.0.0 guardrails-ai-0.1.5 iso8601-1.1.0 jmespath-1.0.1 zope.interface-6.0\n", 168 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 169 | "Requirement already satisfied: langchain in /usr/local/lib/python3.9/dist-packages (0.0.134)\n", 170 | "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.9/dist-packages (from langchain) (4.0.2)\n", 171 | "Requirement already satisfied: dataclasses-json<0.6.0,>=0.5.7 in /usr/local/lib/python3.9/dist-packages (from langchain) (0.5.7)\n", 172 | "Requirement already satisfied: SQLAlchemy<2,>=1 in /usr/local/lib/python3.9/dist-packages (from langchain) (1.4.47)\n", 173 | "Requirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.9/dist-packages (from langchain) (1.22.4)\n", 174 | "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.9/dist-packages (from langchain) (2.27.1)\n", 175 | "Requirement already satisfied: openapi-schema-pydantic<2.0,>=1.2 in /usr/local/lib/python3.9/dist-packages (from langchain) (1.2.4)\n", 176 | "Requirement already satisfied: PyYAML>=5.4.1 in /usr/local/lib/python3.9/dist-packages (from langchain) (6.0)\n", 177 | "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.9/dist-packages (from langchain) (3.8.4)\n", 178 | "Requirement already satisfied: pydantic<2,>=1 in /usr/local/lib/python3.9/dist-packages (from langchain) (1.10.7)\n", 179 | "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.9/dist-packages (from langchain) (8.2.2)\n", 180 | "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n", 181 | "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (22.2.0)\n", 182 | "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.0.12)\n", 183 | "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.3)\n", 184 | "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.8.2)\n", 185 | "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", 186 | "Requirement already satisfied: marshmallow-enum<2.0.0,>=1.5.1 in /usr/local/lib/python3.9/dist-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain) (1.5.1)\n", 187 | "Requirement already satisfied: marshmallow<4.0.0,>=3.3.0 in /usr/local/lib/python3.9/dist-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain) (3.19.0)\n", 188 | "Requirement already satisfied: typing-inspect>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain) (0.8.0)\n", 189 | "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from pydantic<2,>=1->langchain) (4.5.0)\n", 190 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests<3,>=2->langchain) (2022.12.7)\n", 191 | "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests<3,>=2->langchain) (1.26.15)\n", 192 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests<3,>=2->langchain) (3.4)\n", 193 | "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.9/dist-packages (from SQLAlchemy<2,>=1->langchain) (2.0.2)\n", 194 | "Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.9/dist-packages (from marshmallow<4.0.0,>=3.3.0->dataclasses-json<0.6.0,>=0.5.7->langchain) (23.0)\n", 195 | "Requirement already satisfied: mypy-extensions>=0.3.0 in /usr/local/lib/python3.9/dist-packages (from typing-inspect>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain) (1.0.0)\n", 196 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 197 | "Collecting kor\n", 198 | " Downloading kor-0.7.0-py3-none-any.whl (24 kB)\n", 199 | "Requirement already satisfied: langchain>=0.0.110 in /usr/local/lib/python3.9/dist-packages (from kor) (0.0.134)\n", 200 | "Requirement already satisfied: openai<0.28,>=0.27 in /usr/local/lib/python3.9/dist-packages (from kor) (0.27.4)\n", 201 | "Collecting pandas<2.0.0,>=1.5.3\n", 202 | " Downloading pandas-1.5.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.2 MB)\n", 203 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.2/12.2 MB\u001b[0m \u001b[31m83.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 204 | "\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.9/dist-packages (from langchain>=0.0.110->kor) (1.22.4)\n", 205 | "Requirement already satisfied: pydantic<2,>=1 in /usr/local/lib/python3.9/dist-packages (from langchain>=0.0.110->kor) (1.10.7)\n", 206 | "Requirement already satisfied: PyYAML>=5.4.1 in /usr/local/lib/python3.9/dist-packages (from langchain>=0.0.110->kor) (6.0)\n", 207 | "Requirement already satisfied: SQLAlchemy<2,>=1 in /usr/local/lib/python3.9/dist-packages (from langchain>=0.0.110->kor) (1.4.47)\n", 208 | "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.9/dist-packages (from langchain>=0.0.110->kor) (4.0.2)\n", 209 | "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.9/dist-packages (from langchain>=0.0.110->kor) (3.8.4)\n", 210 | "Requirement already satisfied: openapi-schema-pydantic<2.0,>=1.2 in /usr/local/lib/python3.9/dist-packages (from langchain>=0.0.110->kor) (1.2.4)\n", 211 | "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.9/dist-packages (from langchain>=0.0.110->kor) (8.2.2)\n", 212 | "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.9/dist-packages (from langchain>=0.0.110->kor) (2.27.1)\n", 213 | "Requirement already satisfied: dataclasses-json<0.6.0,>=0.5.7 in /usr/local/lib/python3.9/dist-packages (from langchain>=0.0.110->kor) (0.5.7)\n", 214 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from openai<0.28,>=0.27->kor) (4.65.0)\n", 215 | "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas<2.0.0,>=1.5.3->kor) (2.8.2)\n", 216 | "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas<2.0.0,>=1.5.3->kor) (2022.7.1)\n", 217 | "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.110->kor) (22.2.0)\n", 218 | "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.110->kor) (1.3.1)\n", 219 | "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.110->kor) (6.0.4)\n", 220 | "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.110->kor) (1.3.3)\n", 221 | "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.110->kor) (2.0.12)\n", 222 | "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.110->kor) (1.8.2)\n", 223 | "Requirement already satisfied: marshmallow<4.0.0,>=3.3.0 in /usr/local/lib/python3.9/dist-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain>=0.0.110->kor) (3.19.0)\n", 224 | "Requirement already satisfied: typing-inspect>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain>=0.0.110->kor) (0.8.0)\n", 225 | "Requirement already satisfied: marshmallow-enum<2.0.0,>=1.5.1 in /usr/local/lib/python3.9/dist-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain>=0.0.110->kor) (1.5.1)\n", 226 | "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from pydantic<2,>=1->langchain>=0.0.110->kor) (4.5.0)\n", 227 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas<2.0.0,>=1.5.3->kor) (1.16.0)\n", 228 | "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests<3,>=2->langchain>=0.0.110->kor) (1.26.15)\n", 229 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests<3,>=2->langchain>=0.0.110->kor) (3.4)\n", 230 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests<3,>=2->langchain>=0.0.110->kor) (2022.12.7)\n", 231 | "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.9/dist-packages (from SQLAlchemy<2,>=1->langchain>=0.0.110->kor) (2.0.2)\n", 232 | "Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.9/dist-packages (from marshmallow<4.0.0,>=3.3.0->dataclasses-json<0.6.0,>=0.5.7->langchain>=0.0.110->kor) (23.0)\n", 233 | "Requirement already satisfied: mypy-extensions>=0.3.0 in /usr/local/lib/python3.9/dist-packages (from typing-inspect>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain>=0.0.110->kor) (1.0.0)\n", 234 | "Installing collected packages: pandas, kor\n", 235 | " Attempting uninstall: pandas\n", 236 | " Found existing installation: pandas 1.4.4\n", 237 | " Uninstalling pandas-1.4.4:\n", 238 | " Successfully uninstalled pandas-1.4.4\n", 239 | "Successfully installed kor-0.7.0 pandas-1.5.3\n" 240 | ] 241 | } 242 | ], 243 | "source": [ 244 | "!pip install openai\n", 245 | "!pip install llama-index\n", 246 | "!pip install PyPDF2\n", 247 | "!pip install guardrails-ai\n", 248 | "!pip install langchain\n", 249 | "!pip install kor" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": { 255 | "id": "o1XKAFM_vXkR" 256 | }, 257 | "source": [ 258 | "## LLM Creates Rail Spec" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": { 265 | "id": "Gl_ttb82825s" 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader\n", 270 | "from llama_index.output_parsers import GuardrailsOutputParser\n", 271 | "from llama_index.llm_predictor import StructuredLLMPredictor\n", 272 | "from llama_index.prompts.prompts import QuestionAnswerPrompt, RefinePrompt\n", 273 | "from llama_index.prompts.default_prompts import DEFAULT_TEXT_QA_PROMPT_TMPL, DEFAULT_REFINE_PROMPT_TMPL\n", 274 | "\n", 275 | "import json\n", 276 | "import os\n", 277 | "\n", 278 | "# Set your OpenAI API key\n", 279 | "os.environ[\"OPENAI_API_KEY\"] = \"{your key here}\" # place your key here" 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": { 285 | "id": "DBqLNrYe1dPD" 286 | }, 287 | "source": [ 288 | "I first use an LLM to extract the values of column 'Pay Scale Area (Verifiable Years of Service). The idea is to use these values as an index our LLM can use to extract information from each row. To ensure LLM outputs a list object, I use Guardrails." 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": { 295 | "colab": { 296 | "base_uri": "https://localhost:8080/" 297 | }, 298 | "id": "vbUTnebmvkzH", 299 | "outputId": "4f6cc9bf-4e1f-4afd-b8b6-abff55c6dbab" 300 | }, 301 | "outputs": [ 302 | { 303 | "name": "stderr", 304 | "output_type": "stream", 305 | "text": [ 306 | "/usr/local/lib/python3.9/dist-packages/guardrails/schema.py:198: UserWarning: Validator list is not valid for element string.\n", 307 | " warnings.warn(\n" 308 | ] 309 | } 310 | ], 311 | "source": [ 312 | "llm_predictor = StructuredLLMPredictor()\n", 313 | "documents = SimpleDirectoryReader(input_dir='/content').load_data()\n", 314 | "index = GPTSimpleVectorIndex.from_documents(documents)\n", 315 | "\n", 316 | "rail_spec = (\"\"\"\n", 317 | "\n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | "\n", 322 | " \n", 323 | " Return your response as a string. \n", 324 | "\n", 325 | " @xml_prefix_prompt\n", 326 | "\n", 327 | " {output_schema}\n", 328 | "\n", 329 | " @json_suffix_prompt_v2_wo_none\n", 330 | " \n", 331 | "\n", 332 | "\"\"\")\n", 333 | "\n", 334 | "output_parser = GuardrailsOutputParser.from_rail_string(rail_spec, llm=llm_predictor.llm)\n", 335 | "\n", 336 | "fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)\n", 337 | "fmt_refine_tmpl = output_parser.format(DEFAULT_REFINE_PROMPT_TMPL)\n", 338 | "\n", 339 | "qa_prompt = QuestionAnswerPrompt(fmt_qa_tmpl, output_parser=output_parser)\n", 340 | "refine_prompt = RefinePrompt(fmt_refine_tmpl, output_parser=output_parser)\n", 341 | "\n", 342 | "response = index.query(\n", 343 | " \"What are the values in column 'Pay Scale Area'?\",\n", 344 | " text_qa_template=qa_prompt,\n", 345 | " refine_template=refine_prompt,\n", 346 | " )\n", 347 | "\n", 348 | "step_names = json.loads(response.response)['step_names']" 349 | ] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "metadata": { 354 | "id": "5q44bIPl2OwK" 355 | }, 356 | "source": [ 357 | "The output is a list we will iterate through to generate variations of rail_spec; one spec for each row." 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": { 364 | "colab": { 365 | "base_uri": "https://localhost:8080/" 366 | }, 367 | "id": "e1idUebi2fFp", 368 | "outputId": "e2c56ba8-fa2a-4631-9fbe-8b5a666577d2" 369 | }, 370 | "outputs": [ 371 | { 372 | "name": "stdout", 373 | "output_type": "stream", 374 | "text": [ 375 | "['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', 'Career Increment A', 'Career Increment B', 'Career Increment C', 'Career Increment D', 'Career Increment E']\n" 376 | ] 377 | } 378 | ], 379 | "source": [ 380 | "print(step_names)" 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "metadata": { 386 | "id": "qBHtzNXCvbxA" 387 | }, 388 | "source": [ 389 | "## LLM Outputs JSON" 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": { 395 | "id": "ye5bLZo02-9y" 396 | }, 397 | "source": [ 398 | "We next create a rail specification for LLM to output JSON objects." 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": null, 404 | "metadata": { 405 | "id": "VpdChCo93ZEq" 406 | }, 407 | "outputs": [], 408 | "source": [ 409 | "rail_spec = (\"\"\"\n", 410 | "\n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | "\n", 422 | " \n", 423 | " Return all numbers in your response in the Float format. Remove the commas from the numeric values since commas are not allowed in JSON numbers.\n", 424 | " Each row should be returned only once.\n", 425 | "\n", 426 | " @xml_prefix_prompt\n", 427 | "\n", 428 | " {output_schema}\n", 429 | "\n", 430 | " @json_suffix_prompt_v2_wo_none\n", 431 | " \n", 432 | "\n", 433 | "\"\"\")" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": null, 439 | "metadata": { 440 | "colab": { 441 | "base_uri": "https://localhost:8080/" 442 | }, 443 | "id": "08hP-Yz2XvNn", 444 | "outputId": "bb20b485-dc67-4d2f-ddc1-9d1b567fc790" 445 | }, 446 | "outputs": [ 447 | { 448 | "name": "stderr", 449 | "output_type": "stream", 450 | "text": [ 451 | "/usr/local/lib/python3.9/dist-packages/guardrails/schema.py:198: UserWarning: Validator max-len is not valid for element string.\n", 452 | " warnings.warn(\n", 453 | "/usr/local/lib/python3.9/dist-packages/guardrails/schema.py:198: UserWarning: Validator float is not valid for element float.\n", 454 | " warnings.warn(\n" 455 | ] 456 | }, 457 | { 458 | "name": "stdout", 459 | "output_type": "stream", 460 | "text": [ 461 | "\n", 462 | "{\n", 463 | " \"salary_schedule\": {\n", 464 | " \"pay_scale_area\": 1.0,\n", 465 | " \"base_salary\": 61159.88,\n", 466 | " \"qtea_addon\": 4885.0,\n", 467 | " \"fwea_addon\": 3884.0,\n", 468 | " \"total_annual_salary\": 69928.88,\n", 469 | " \"per_diem\": 380.05\n", 470 | " }\n", 471 | "}\n", 472 | "\n", 473 | "{\n", 474 | " \"salary_schedule\": {\n", 475 | " \"pay_scale_area\": 2.0,\n", 476 | " \"base_salary\": 64764.94,\n", 477 | " \"qtea_addon\": 3724.0,\n", 478 | " \"fwea_addon\": 4112.0,\n", 479 | " \"total_annual_salary\": 72600.94,\n", 480 | " \"per_diem\": 394.57\n", 481 | " }\n", 482 | "}\n", 483 | "\n", 484 | "{\n", 485 | " \"salary_schedule\": {\n", 486 | " \"pay_scale_area\": 3.0,\n", 487 | " \"base_salary\": 68373.18,\n", 488 | " \"qtea_addon\": 3962.0,\n", 489 | " \"fwea_addon\": 4342.0,\n", 490 | " \"total_annual_salary\": 76677.18,\n", 491 | " \"per_diem\": 416.72\n", 492 | " }\n", 493 | "}\n", 494 | "\n", 495 | "{\n", 496 | " \"salary_schedule\": {\n", 497 | " \"pay_scale_area\": 4.0,\n", 498 | " \"base_salary\": 71980.36,\n", 499 | " \"qtea_addon\": 4022.0,\n", 500 | " \"fwea_addon\": 4571.0,\n", 501 | " \"total_annual_salary\": 80573.36,\n", 502 | " \"per_diem\": 437.9\n", 503 | " }\n", 504 | "}\n", 505 | "\n", 506 | "{\n", 507 | " \"salary_schedule\": {\n", 508 | " \"pay_scale_area\": 5.0,\n", 509 | " \"base_salary\": 75585.42,\n", 510 | " \"qtea_addon\": 4042.0,\n", 511 | " \"fwea_addon\": 4799.0,\n", 512 | " \"total_annual_salary\": 84426.42,\n", 513 | " \"per_diem\": 458.84\n", 514 | " }\n", 515 | "}\n", 516 | "\n", 517 | "{\n", 518 | " \"salary_schedule\": {\n", 519 | " \"pay_scale_area\": 6.0,\n", 520 | " \"base_salary\": 79194.72,\n", 521 | " \"qtea_addon\": 4710.0,\n", 522 | " \"fwea_addon\": 5029.0,\n", 523 | " \"total_annual_salary\": 88933.72,\n", 524 | " \"per_diem\": 483.34\n", 525 | " }\n", 526 | "}\n", 527 | "\n", 528 | "{\n", 529 | " \"salary_schedule\": {\n", 530 | " \"pay_scale_area\": 7.0,\n", 531 | " \"base_salary\": 82.80084,\n", 532 | " \"qtea_addon\": 3.655,\n", 533 | " \"fwea_addon\": 5.258,\n", 534 | " \"total_annual_salary\": 91.71384,\n", 535 | " \"per_diem\": 498.44\n", 536 | " }\n", 537 | "}\n", 538 | "\n", 539 | "{\n", 540 | " \"salary_schedule\": {\n", 541 | " \"pay_scale_area\": 8.0,\n", 542 | " \"base_salary\": 86405.9,\n", 543 | " \"qtea_addon\": 2601.0,\n", 544 | " \"fwea_addon\": 5487.0,\n", 545 | " \"total_annual_salary\": 94493.9,\n", 546 | " \"per_diem\": 513.55\n", 547 | " }\n", 548 | "}\n", 549 | "\n", 550 | "{\n", 551 | " \"salary_schedule\": {\n", 552 | " \"pay_scale_area\": 9.0,\n", 553 | " \"base_salary\": 90015.2,\n", 554 | " \"qtea_addon\": 1544.0,\n", 555 | " \"fwea_addon\": 5716.0,\n", 556 | " \"total_annual_salary\": 97275.2,\n", 557 | " \"per_diem\": 528.67\n", 558 | " }\n", 559 | "}\n", 560 | "\n", 561 | "{\n", 562 | " \"salary_schedule\": {\n", 563 | " \"pay_scale_area\": 10,\n", 564 | " \"base_salary\": 93015.2,\n", 565 | " \"qtea_addon\": 1544,\n", 566 | " \"fwea_addon\": 5716,\n", 567 | " \"total_annual_salary\": 97275.2,\n", 568 | " \"per_diem\": 528.67\n", 569 | " }\n", 570 | "}\n", 571 | "\n", 572 | "{\n", 573 | " \"salary_schedule\": {\n", 574 | " \"pay_scale_area\": 11,\n", 575 | " \"base_salary\": 97226.38,\n", 576 | " \"qtea_addon\": 2252.0,\n", 577 | " \"fwea_addon\": 6174.0,\n", 578 | " \"total_annual_salary\": 105652.38,\n", 579 | " \"per_diem\": 574.2\n", 580 | " }\n", 581 | "}\n", 582 | "\n", 583 | "{\n", 584 | " \"salary_schedule\": {\n", 585 | " \"pay_scale_area\": 12.0,\n", 586 | " \"base_salary\": 97226.38,\n", 587 | " \"qtea_addon\": 2252.0,\n", 588 | " \"fwea_addon\": 6174.0,\n", 589 | " \"total_annual_salary\": 105652.38,\n", 590 | " \"per_diem\": 574.2\n", 591 | " }\n", 592 | "}\n", 593 | "\n", 594 | "{\n", 595 | " \"salary_schedule\": {\n", 596 | " \"pay_scale_area\": 13,\n", 597 | " \"base_salary\": 97226.38,\n", 598 | " \"qtea_addon\": 2252.0,\n", 599 | " \"fwea_addon\": 6174.0,\n", 600 | " \"total_annual_salary\": 105652.38,\n", 601 | " \"per_diem\": 574.2\n", 602 | " }\n", 603 | "}\n", 604 | "\n", 605 | "{\n", 606 | " \"salary_schedule\": {\n", 607 | " \"pay_scale_area\": 1.0,\n", 608 | " \"base_salary\": 61159.88,\n", 609 | " \"qtea_addon\": 4885.0,\n", 610 | " \"fwea_addon\": 3884.0,\n", 611 | " \"total_annual_salary\": 69928.88,\n", 612 | " \"per_diem\": 380.05\n", 613 | " }\n", 614 | "}\n", 615 | "\n", 616 | "{\n", 617 | " \"salary_schedule\": {\n", 618 | " \"pay_scale_area\": \"Career Increment B\",\n", 619 | " \"base_salary\": 104017.8,\n", 620 | " \"qtea_addon\": 2056.0,\n", 621 | " \"fwea_addon\": 6605.0,\n", 622 | " \"total_annual_salary\": 112678.8,\n", 623 | " \"per_diem\": 612.38\n", 624 | " }\n", 625 | "}\n", 626 | "\n", 627 | "{\n", 628 | " \"salary_schedule\": {\n", 629 | " \"pay_scale_area\": 12.0,\n", 630 | " \"base_salary\": 97226.38,\n", 631 | " \"qtea_addon\": 2252.0,\n", 632 | " \"fwea_addon\": 6174.0,\n", 633 | " \"total_annual_salary\": 105652.38,\n", 634 | " \"per_diem\": 574.2\n", 635 | " }\n", 636 | "}\n", 637 | "\n", 638 | "{\n", 639 | " \"salary_schedule\": {\n", 640 | " \"pay_scale_area\": 12.0,\n", 641 | " \"base_salary\": 97226.38,\n", 642 | " \"qtea_addon\": 2252.0,\n", 643 | " \"fwea_addon\": 6174.0,\n", 644 | " \"total_annual_salary\": 105652.38,\n", 645 | " \"per_diem\": 574.2\n", 646 | " }\n", 647 | "}\n", 648 | "\n", 649 | "{\n", 650 | " \"salary_schedule\": {\n", 651 | " \"pay_scale_area\": \"Career Increment E\",\n", 652 | " \"base_salary\": 114203.34,\n", 653 | " \"qtea_addon\": 3494.00,\n", 654 | " \"fwea_addon\": 7252.00,\n", 655 | " \"total_annual_salary\": 124949.34,\n", 656 | " \"per_diem\": 679.07\n", 657 | " }\n", 658 | "}\n" 659 | ] 660 | } 661 | ], 662 | "source": [ 663 | "output_dict = {}\n", 664 | "output_list = []\n", 665 | "\n", 666 | "for area in step_names:\n", 667 | " formatted_spec = rail_spec.format(area=area, output_schema=\"{output_schema}\")\n", 668 | " output_parser = GuardrailsOutputParser.from_rail_string(formatted_spec, llm=llm_predictor.llm)\n", 669 | "\n", 670 | " fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)\n", 671 | " fmt_refine_tmpl = output_parser.format(DEFAULT_REFINE_PROMPT_TMPL)\n", 672 | "\n", 673 | " qa_prompt = QuestionAnswerPrompt(fmt_qa_tmpl, output_parser=output_parser)\n", 674 | " refine_prompt = RefinePrompt(fmt_refine_tmpl, output_parser=output_parser)\n", 675 | "\n", 676 | " response = index.query(\n", 677 | " \"What is the salary schedule in this document?\",\n", 678 | " text_qa_template=qa_prompt,\n", 679 | " refine_template=refine_prompt,\n", 680 | " )\n", 681 | " \n", 682 | " try:\n", 683 | " output_list.append(\n", 684 | " json.loads(response.response)['salary_schedule']\n", 685 | " )\n", 686 | " print(response)\n", 687 | "\n", 688 | " except json.JSONDecodeError as e:\n", 689 | " print(f\"JSONDecodeError: {e}\")\n", 690 | " output_list.append(response.response)\n", 691 | " \n", 692 | "with open('example.json', 'w') as file:\n", 693 | " file.write(json.dumps(output_list))" 694 | ] 695 | } 696 | ], 697 | "metadata": { 698 | "colab": { 699 | "provenance": [] 700 | }, 701 | "kernelspec": { 702 | "display_name": "Python 3", 703 | "name": "python3" 704 | }, 705 | "language_info": { 706 | "name": "python" 707 | } 708 | }, 709 | "nbformat": 4, 710 | "nbformat_minor": 0 711 | } 712 | --------------------------------------------------------------------------------