├── .gitignore ├── LICENSE ├── README.md ├── example.ipynb ├── pip_library_etl ├── __init__.py └── main.py ├── requirements.txt ├── setup.py └── version.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Distribution / packaging 7 | .Python 8 | build/ 9 | dist/ 10 | egg-info/ 11 | *.egg-info/ 12 | *.egg 13 | *.whl 14 | 15 | # Installer logs 16 | pip-log.txt 17 | pip-delete-this-directory.txt 18 | 19 | # Virtual environments 20 | venv/ 21 | env/ 22 | ENV/ 23 | 24 | # IDE files 25 | .vscode/ 26 | .idea/ 27 | *.sublime-project 28 | *.sublime-workspace 29 | 30 | # Compiled Python files 31 | *.pyc 32 | *.pyo 33 | *.pyd 34 | 35 | # macOS 36 | .DS_Store 37 | 38 | # Windows 39 | Thumbs.db 40 | 41 | # Jupyter Notebook temporary files 42 | .ipynb_checkpoints/ 43 | 44 | # Package logs 45 | logs/ 46 | 47 | # Ignore any local development settings 48 | *.env 49 | 50 | # Ignore your editor's temporary/backup files 51 | *~ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Pipable INC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pip_library_etl 2 | 3 | `pip_library_etl` is a Python package designed to streamline the creation of docstrings for functions and methods within specified modules, while also providing the capability to effortlessly generate SQL queries tailored to a designated schema. With its intuitive functionality, users can seamlessly generate function calls from natural language input, existing function signatures, or craft new ones using the integrated model. This versatile tool not only enhances documentation generation but also empowers users to compose sophisticated SQL queries with the finesse expected from cutting-edge Language Model Models (LLMs). Elevate your development and data management workflows with the comprehensive capabilities of pip_library_etl. 4 | 5 | It harnesses the power of the [PipableAI/pip-library-etl-1.3b](https://huggingface.co/PipableAI/pip-library-etl-1.3b) language model to do all the tasks. 6 | 7 | For more examples: [notebook](https://colab.research.google.com/drive/17PyMU_3QN9LROy7x-jmaema0cuLRzBvc?usp=sharing) 8 | 9 | ## Installation 10 | 11 | ```bash 12 | pip3 install git+https://github.com/PipableAI/pip-library-etl.git 13 | ``` 14 | 15 | ## Usage 16 | 17 | 18 | ### NOTE 19 | 20 | If you want to try this model without using your GPU, we have hosted the model on our end. 21 | To run the library using the playground hosted model, initialize the generator in the following way: 22 | 23 | ```python 24 | generator = PipEtl(device="cloud") 25 | ``` 26 | 27 | If you want to use your own GPU of the local machine (at least 10-12 GB VRAM): 28 | 29 | ```python 30 | generator = PipEtl(device="cuda") 31 | ``` 32 | 33 | If you want to infer on the CPU of the local machine: 34 | 35 | ```python 36 | generator = PipEtl(device="cpu") 37 | ``` 38 | 39 | ### Example: Function Calling 40 | ```python 41 | docstring = """ 42 | Function Name: make_get_req 43 | Description: This function is used to make a GET request. 44 | Parameters: 45 | - path (str): The path of the URL to be requested. 46 | - data (dict): The data to be sent in the body of the request. 47 | - flags (dict): The flags to be sent in the request. 48 | - params (dict): The parameters to be sent in the request. 49 | - headers (dict): The headers to be sent in the request. 50 | - not_json_response (bool): OPTIONAL: If set to True, the function will return the raw response content instead of trying to parse it as JSON. 51 | - trailing (str): OPTIONAL: For wrapping slash symbol in the end of string. 52 | - absolute (bool): OPTIONAL: If set to True, the function will not prefix the URL with the base URL. 53 | - advanced_mode (bool): OPTIONAL: If set to True, the function will return the raw response instead of trying to parse it as JSON. 54 | Returns: 55 | - Union[str, dict, list, None]: The response content as a string, a dictionary, a list, or None if the response was not successful. 56 | """ 57 | 58 | question = """ 59 | Make a GET request for the URL parameter using variable_2. For the params parameter, use 'weight' as one of the keys with variable_3 as its value, and 'width' as another key with a value of 10. For the data parameter, use variable_1. Prefix the URL with the base URL, and ensure the response is in raw format. 60 | """ 61 | 62 | function_call = generator.generate_function_call(docstring=docstring, question=question) 63 | 64 | print(function_call) 65 | ``` 66 | 67 | ```python 68 | code = """ 69 | def _query_model(prompt: str, max_new_tokens: int) -> str: 70 | if device == "cloud": 71 | payload = { 72 | "model_name": "PipableAI/pip-library-etl-1.3b", 73 | "prompt": prompt, 74 | "max_new_tokens": max_new_tokens, 75 | } 76 | response = requests.request( 77 | method="POST", url=url, data=payload, timeout=120 78 | ) 79 | if response.status_code == 200: 80 | return json.loads(response.text)["response"] 81 | else: 82 | raise Exception(f"Error generating response using url.") 83 | else: 84 | inputs = tokenizer(prompt, return_tensors="pt").to("cuda") 85 | outputs = model.generate(**inputs, max_new_tokens=max_new_tokens) 86 | return tokenizer.decode(outputs[0], skip_special_tokens=True) 87 | """ 88 | 89 | question = """ 90 | I want to query model with prompt = "What is 2 + 2", and use 200 as maximum token limit. 91 | """ 92 | 93 | function_call = generator.generate_function_call(code=code, question=question) 94 | 95 | print(function_call) 96 | ``` 97 | 98 | 99 | ### Example: Generate Docstrings for Functions and Methods 100 | 101 | ```python 102 | from pip_library_etl import PipEtl 103 | 104 | # Instantiate the PipEtl 105 | generator = PipEtl() 106 | 107 | # Replace 'your_module' and 'YourModule' with the actual module and module name 108 | module_name = 'your_module' 109 | module = __import__(module_name) 110 | 111 | # Generate docstrings for the module's functions and methods 112 | docs = generator.generate_module_docs(module, module_name) 113 | 114 | # 'docs' now contains a dictionary mapping function/method names to their generated docstrings 115 | ``` 116 | 117 | ### Example: Generate Docstring for a Single Code snippet 118 | 119 | ```python 120 | from pip_library_etl import PipEtl 121 | 122 | # Instantiate the PipEtl 123 | generator = PipEtl() 124 | 125 | code_snippet = """ 126 | def example_function(x): 127 | return x * 2 128 | """ 129 | 130 | docstring = generator.generate_docstring(code_snippet) 131 | print("Generated Docstring:") 132 | print(docstring) 133 | ``` 134 | 135 | ### Example: Adding Docstrings to Python File 136 | 137 | ```python 138 | from pip_library_etl import PipEtl 139 | 140 | # Instantiate the PipEtl 141 | generator = PipEtl() 142 | 143 | # Specify the path to the Python file 144 | file_path = 'your_file.py' 145 | 146 | # Add docstrings to functions in the Python file 147 | # If overwrite is set to True, the existing file will be overwritten with the modified content. 148 | # If overwrite is set to False (default), a new file with "_docstring" appended to its name will be created. 149 | overwrite = False 150 | generator.add_docstrings_to_file(file_path, overwrite) 151 | ``` 152 | 153 | 154 | 155 | ### Example: Generate SQL queries 156 | ```python 157 | 158 | instructions = """ 159 | 1. In department table, column Budget_in_Billions is in billions, so 1 will represent 1 billion 160 | """ 161 | 162 | schema = f""" 163 | 164 | CREATE TABLE department ( 165 | Department_ID number, -- Unique identifier for the department 166 | Name text, -- Name of the department 167 | Creation text, -- Date of creation or establishment 168 | Ranking number, -- Ranking of the department 169 | Budget_in_Billions number, -- Budget of the department in billions 170 | Num_Employees number -- Number of employees in the department 171 | ); 172 | 173 | CREATE TABLE head ( 174 | head_ID number, -- Unique identifier for the head 175 | name text, -- Name of the head 176 | born_state text, -- State where the head was born 177 | age number -- Age of the head 178 | ); 179 | 180 | CREATE TABLE management ( 181 | department_ID number, -- Foreign key referencing Department_ID in department table 182 | head_ID number, -- Foreign key referencing head_ID in head table 183 | temporary_acting text -- Indicates if the head is temporarily acting 184 | ); 185 | 186 | """ 187 | 188 | question = "What are the names of the heads who are born outside the California state ?" 189 | 190 | generator = PipEtl() 191 | 192 | query = generator.generate_sql(schema=schema, question=question, instructions=instructions) 193 | print("Generated SQL:") 194 | print(query) 195 | ``` 196 | 197 | ### Changing Model and Device 198 | 199 | The `PipEtl` class allows you to change the huggingface pip model and device while initializing the object. By default, it uses the model key `PipableAI/pip-library-etl-1.3b` and the device `cuda`. You can specify different models and devices by providing arguments during initialization. (Make sure the prompt of the new model is same as that of `PipableAI/pip-library-etl-1.3b`) 200 | 201 | ```python 202 | # Example: Instantiate PipEtl with a different model and device 203 | generator = PipEtl(model_key="your_custom_model", device="cpu") 204 | ``` 205 | 206 | ## How It Works 207 | 208 | - `generate_docstring`: Utilizes a GPU-based language model to analyze Python code and generate corresponding docstrings. 209 | - `generate_module_docstrings`: Generates documentation for all methods and functions in a specified module. 210 | - `generate_sql`: Generate SQL queries based on the provided schema and question. 211 | - `generate_function_call`: Generate a function call based on question, and either a undocumented code or docstring of the related function. 212 | - `add_docstrings_to_file`: Adds docstrings to functions in a Python file and writes the modified content to a new file. If `overwrite` is `True`, the existing file will be overwritten; otherwise, a new file will be created. 213 | 214 | 215 | ## Dependencies 216 | 217 | - `transformers` from Hugging Face 218 | 219 | ## Contributing 220 | 221 | Feel free to contribute to the project by opening issues or submitting pull requests. 222 | 223 | ## License 224 | 225 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 226 | -------------------------------------------------------------------------------- /example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Examples using model hosted by PipableAI" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stderr", 17 | "output_type": "stream", 18 | "text": [ 19 | "/opt/homebrew/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", 20 | " from .autonotebook import tqdm as notebook_tqdm\n" 21 | ] 22 | } 23 | ], 24 | "source": [ 25 | "from pip_library_etl import PipEtl" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "generator = PipEtl(device=\"cloud\")" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 6, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "Generated SQL:\n", 47 | "SELECT head.name FROM head WHERE head.born_state <> 'California';\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "instructions = \"\"\"\n", 53 | "1. In department table, column Budget_in_Billions is in billions, so 1 will represent 1 billion\n", 54 | "\"\"\"\n", 55 | "\n", 56 | "schema = f\"\"\"\n", 57 | "\n", 58 | "CREATE TABLE department (\n", 59 | " Department_ID number, -- Unique identifier for the department\n", 60 | " Name text, -- Name of the department\n", 61 | " Creation text, -- Date of creation or establishment\n", 62 | " Ranking number, -- Ranking of the department\n", 63 | " Budget_in_Billions number, -- Budget of the department in billions\n", 64 | " Num_Employees number -- Number of employees in the department\n", 65 | ");\n", 66 | "\n", 67 | "CREATE TABLE head (\n", 68 | " head_ID number, -- Unique identifier for the head\n", 69 | " name text, -- Name of the head\n", 70 | " born_state text, -- State where the head was born\n", 71 | " age number -- Age of the head\n", 72 | ");\n", 73 | "\n", 74 | "CREATE TABLE management (\n", 75 | " department_ID number, -- Foreign key referencing Department_ID in department table\n", 76 | " head_ID number, -- Foreign key referencing head_ID in head table\n", 77 | " temporary_acting text -- Indicates if the head is temporarily acting\n", 78 | ");\n", 79 | "\n", 80 | "\"\"\"\n", 81 | "\n", 82 | "question = \"What are the names of the heads who are born outside the California state ?\"\n", 83 | "\n", 84 | "query = generator.generate_sql(schema=schema, question=question, instructions=instructions)\n", 85 | "print(\"Generated SQL:\")\n", 86 | "print(query)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 2, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "\n", 99 | " \n", 100 | "make_get_req(path='https://example.com', data=variable_1, params={'weight': variable_3, 'width': 10}, headers={'Content-Type': 'application/json'}, absolute=True, not_json_response=True)\n", 101 | " \n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "docstring = \"\"\"\n", 107 | "Function Name: make_get_req\n", 108 | "Description: This function is used to make a GET request.\n", 109 | "Parameters:\n", 110 | "- path (str): The path of the URL to be requested.\n", 111 | "- data (dict): The data to be sent in the body of the request.\n", 112 | "- flags (dict): The flags to be sent in the request.\n", 113 | "- params (dict): The parameters to be sent in the request.\n", 114 | "- headers (dict): The headers to be sent in the request.\n", 115 | "- not_json_response (bool): OPTIONAL: If set to True, the function will return the raw response content instead of trying to parse it as JSON.\n", 116 | "- trailing (str): OPTIONAL: For wrapping slash symbol in the end of string.\n", 117 | "- absolute (bool): OPTIONAL: If set to True, the function will not prefix the URL with the base URL.\n", 118 | "- advanced_mode (bool): OPTIONAL: If set to True, the function will return the raw response instead of trying to parse it as JSON.\n", 119 | "Returns:\n", 120 | "- Union[str, dict, list, None]: The response content as a string, a dictionary, a list, or None if the response was not successful.\n", 121 | "\"\"\"\n", 122 | "\n", 123 | "question = \"\"\"\n", 124 | "Make a GET request for the URL parameter using variable_2. For the params parameter, use 'weight' as one of the keys with variable_3 as its value, and 'width' as another key with a value of 10. For the data parameter, use variable_1. Prefix the URL with the base URL, and ensure the response is in raw format.\n", 125 | "\"\"\"\n", 126 | "\n", 127 | "function_call = generator.generate_function_call(docstring=docstring, question=question)\n", 128 | "\n", 129 | "print(function_call)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 8, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "\n", 142 | " \n", 143 | "query_model(\"What is 2 + 2\", 200)\n", 144 | "\n", 145 | " \n" 146 | ] 147 | } 148 | ], 149 | "source": [ 150 | "code = \"\"\"\n", 151 | "def _query_model(prompt: str, max_new_tokens: int) -> str:\n", 152 | " if device == \"cloud\":\n", 153 | " payload = {\n", 154 | " \"model_name\": \"PipableAI/pip-library-etl-1.3b\",\n", 155 | " \"prompt\": prompt,\n", 156 | " \"max_new_tokens\": max_new_tokens,\n", 157 | " }\n", 158 | " response = requests.request(\n", 159 | " method=\"POST\", url=url, data=payload, timeout=120\n", 160 | " )\n", 161 | " if response.status_code == 200:\n", 162 | " return json.loads(response.text)[\"response\"]\n", 163 | " else:\n", 164 | " raise Exception(f\"Error generating response using url.\")\n", 165 | " else:\n", 166 | " inputs = tokenizer(prompt, return_tensors=\"pt\").to(\"cuda\")\n", 167 | " outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)\n", 168 | " return tokenizer.decode(outputs[0], skip_special_tokens=True)\n", 169 | "\"\"\"\n", 170 | "\n", 171 | "question = \"\"\"\n", 172 | "I want to query model with prompt \"What is 2 + 2\", and use 200 as maximum token limit.\n", 173 | "\"\"\"\n", 174 | "\n", 175 | "function_call = generator.generate_function_call(code=code, question=question)\n", 176 | "\n", 177 | "print(function_call)" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 3, 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "name": "stdout", 187 | "output_type": "stream", 188 | "text": [ 189 | "Generated Docstring:\n", 190 | "\n", 191 | " Description: This function divides a given number by 2.\n", 192 | " Parameters:\n", 193 | " - x (float): The input value to be divided by 2.\n", 194 | " Returns:\n", 195 | " - float: The result of x divided by 2.\n", 196 | " Example:\n", 197 | " divide_by_two(1.0)\n", 198 | " \n" 199 | ] 200 | } 201 | ], 202 | "source": [ 203 | "code_snippet = \"\"\"\n", 204 | "def example_function(x):\n", 205 | " return x * 2\n", 206 | "\"\"\"\n", 207 | "\n", 208 | "docstring = generator.generate_docstring(code_snippet)\n", 209 | "print(\"Generated Docstring:\")\n", 210 | "print(docstring)" 211 | ] 212 | } 213 | ], 214 | "metadata": { 215 | "kernelspec": { 216 | "display_name": "Python 3", 217 | "language": "python", 218 | "name": "python3" 219 | }, 220 | "language_info": { 221 | "codemirror_mode": { 222 | "name": "ipython", 223 | "version": 3 224 | }, 225 | "file_extension": ".py", 226 | "mimetype": "text/x-python", 227 | "name": "python", 228 | "nbconvert_exporter": "python", 229 | "pygments_lexer": "ipython3", 230 | "version": "3.10.14" 231 | } 232 | }, 233 | "nbformat": 4, 234 | "nbformat_minor": 2 235 | } 236 | -------------------------------------------------------------------------------- /pip_library_etl/__init__.py: -------------------------------------------------------------------------------- 1 | from pip_library_etl.main import ( 2 | PipEtl 3 | ) 4 | 5 | __all__ = [ 6 | "PipEtl" 7 | ] 8 | -------------------------------------------------------------------------------- /pip_library_etl/main.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import inspect 3 | import json 4 | from typing import Any 5 | import os 6 | import ast 7 | 8 | import requests 9 | from transformers import AutoModelForCausalLM, AutoTokenizer 10 | 11 | INFERENCE_URL = "https://playground.pipable.ai/infer" 12 | 13 | 14 | class PipEtl: 15 | """ 16 | Class for generating documentation and SQL queries using a Pipable model. 17 | """ 18 | 19 | def __init__( 20 | self, 21 | model_key="PipableAI/pip-library-etl-1.3b", 22 | device="cuda", 23 | url=INFERENCE_URL, 24 | ): 25 | self.device = device 26 | self.model_key = model_key 27 | self.model = None 28 | self.tokenizer = None 29 | self.url = None 30 | if self.device == "cloud": 31 | self.url = url 32 | else: 33 | self._load_model() 34 | 35 | def _query_model(self, prompt: str, max_new_tokens: int) -> str: 36 | if self.device == "cloud": 37 | payload = { 38 | "model_name": self.model_key, 39 | "prompt": prompt, 40 | "max_new_tokens": max_new_tokens, 41 | } 42 | response = requests.request( 43 | method="POST", url=self.url, data=payload, timeout=120 44 | ) 45 | if response.status_code == 200: 46 | return json.loads(response.text)["response"] 47 | else: 48 | raise Exception(f"Error generating response using {self.url}.") 49 | else: 50 | inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) 51 | outputs = self.model.generate(**inputs, max_new_tokens=max_new_tokens) 52 | return self.tokenizer.decode(outputs[0], skip_special_tokens=True) 53 | 54 | def _load_model(self): 55 | if self.model is None or self.tokenizer is None: 56 | self.model = AutoModelForCausalLM.from_pretrained(self.model_key).to( 57 | self.device 58 | ) 59 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_key) 60 | 61 | def generate_docstring(self, code: str) -> str: 62 | """ 63 | Generate a docstring for Python code using a local GPU-based model loaded from Hugging Face. 64 | 65 | Args: 66 | - code (str): The Python code for which a docstring needs to be generated. 67 | 68 | Returns: 69 | - str: The generated docstring for the input code. 70 | 71 | Note: 72 | This function loads a GPU-based language model from Hugging Face and utilizes it to analyze the provided code, 73 | generating a corresponding docstring. 74 | 75 | """ 76 | try: 77 | prompt = f""" 78 | 79 | --code:def divide_by_two(x: float) -> float: return x / 2 80 | --question:Document the python code above giving function description ,parameters and return type and example on how to call the function 81 | --doc: 82 | Description: This function divides a given number by 2. 83 | Parameters: 84 | - x (float): The input value to be divided by 2. 85 | Returns: 86 | - float: The result of x divided by 2. 87 | Example: 88 | divide_by_two(1.0) 89 | 90 | {code} 91 | 92 | 1. In the examples while calling function use the name mentioned after `def ` in the above function_code. 93 | 2. In the generated docs use valid python type hints as per PEP 484. 94 | 95 | Document the python code above giving function description ,parameters and return type and example on how to call the function 96 | """ 97 | res = self._query_model(prompt, 450) 98 | doc = res.split("")[-1].split("")[0] 99 | doc = ( 100 | doc.replace("

", "") 101 | .replace("

", "") 102 | .replace("", "") 103 | .replace("", "") 104 | ) 105 | return doc 106 | except Exception as e: 107 | message = f"Unable to generate the docs using model with error: {e}" 108 | raise ValueError(message) from e 109 | 110 | def generate_module_docstrings(self, module: Any, module_name: str) -> dict: 111 | """ 112 | Generate documentation for all methods and functions in a given module. 113 | 114 | Args: 115 | - module (Any): The module or package to inspect. 116 | - module_name (str): The name of the module or package. 117 | 118 | Returns: 119 | - dict: A dictionary mapping method/function names to their corresponding generated docstrings. 120 | """ 121 | complete_docs = {} 122 | 123 | # Replace 'get_all_methods_and_functions' with your actual implementation 124 | code_data = self._get_all_methods_and_functions(module, module_name) 125 | 126 | try: 127 | for function, code in code_data.items(): 128 | print(f"Generating docs for {function}:") 129 | 130 | try: 131 | doc = self.generate_docstring(code) 132 | except ValueError as e: 133 | print(e) 134 | 135 | complete_docs[function] = doc 136 | 137 | print(f"Doc for {function}:\n{doc}\n") 138 | 139 | except KeyboardInterrupt: 140 | print("\nKeyboardInterrupt: Returning the latest complete_docs.") 141 | return complete_docs 142 | else: 143 | return complete_docs 144 | 145 | def generate_sql( 146 | self, schema: str, question: str, instructions: str = None, examples: str = None 147 | ) -> str: 148 | """ 149 | Generate SQL queries based on the provided schema and question. 150 | 151 | Args: 152 | schema (str): The schema for the SQL query. 153 | question (str): The question related to the SQL query. 154 | instructions (str, optional): Additional instructions for generating the SQL query. Defaults to None. 155 | examples (str, optional): An examples for generating the SQL query. Defaults to None. 156 | 157 | Returns: 158 | str: The generated SQL query. 159 | 160 | Raises: 161 | ValueError: If unable to generate the SQL query using the model. 162 | 163 | """ 164 | try: 165 | prompt = "Generate simple SQL queries from the schema mentioned for the following questions." 166 | 167 | if instructions: 168 | prompt += f"\n{instructions}" 169 | 170 | if examples: 171 | prompt += f"\n{examples}" 172 | 173 | prompt += f""" 174 | {schema} 175 | {question} 176 | """ 177 | res = self._query_model(prompt, 300) 178 | sql_section = res.split("")[1].split("")[0] 179 | 180 | sql_section = sql_section.replace("

", "").replace("

", "") 181 | 182 | return sql_section 183 | 184 | except Exception as e: 185 | message = f"Unable to generate the SQL query using model with error: {e}" 186 | raise ValueError(message) from e 187 | 188 | def _get_all_methods_and_functions(self, module: Any, module_name: str): 189 | """ 190 | Retrieve methods and functions along with their source code from a module or package. 191 | 192 | Args: 193 | - module (Any): The module or package to inspect. 194 | - module_name (str): The name of the module or package. 195 | 196 | Returns: 197 | - dict: A dictionary mapping method/function names to their corresponding source code. 198 | 199 | Note: 200 | This function recursively explores the module or package and its submodules to extract 201 | methods and functions along with their source code. 202 | 203 | """ 204 | function_to_code_data = {} 205 | already_done = {} 206 | 207 | def _helper_function(module_or_class: Any, path: str): 208 | try: 209 | for name, obj in inspect.getmembers(module_or_class): 210 | if name.startswith("_"): 211 | continue 212 | complete_path = path + "." + name 213 | if inspect.isclass(obj) or inspect.ismodule(obj): 214 | if type(obj).__name__ == "module": 215 | try: 216 | importlib.import_module(complete_path) 217 | except ModuleNotFoundError: 218 | if name in already_done or not name.startswith( 219 | f"{module_name}." 220 | ): 221 | continue 222 | already_done[name] = 1 223 | _helper_function(obj, path + "." + name) 224 | elif inspect.ismethod(obj) or inspect.isfunction(obj): 225 | function_to_code_data[complete_path] = str( 226 | inspect.getsource(obj) 227 | ) 228 | except TypeError as e: 229 | print(f"Unable to extract code for {path} with Error: {e}") 230 | 231 | _helper_function(module, module_name) 232 | return function_to_code_data 233 | 234 | def generate_function_call( 235 | self, 236 | question: str, 237 | docstring: str = None, 238 | code: str = None, 239 | ) -> str: 240 | """ 241 | Generates a function call in Python language based on a given question, and either the docstring of the function or a undocuemneted code. 242 | 243 | Args: 244 | docstring (str): The documentation string template for the function. 245 | question (str): The question prompting the function call generation. 246 | code (str, optional): The code of the function. This can be used when the docstring is not present. 247 | Returns: 248 | str: The Python function call generated based on the question and the provided docstring template. 249 | """ 250 | try: 251 | if docstring is None and code is None: 252 | raise ValueError("Provide either code or docstring.") 253 | if docstring is None: 254 | docstring = self.generate_docstring(code=code) 255 | prompt = f""" 256 | Give a function call in python langugae for the following question: 257 | 258 | --doc: 259 | Description: This function logs a curl command in debug mode. 260 | Parameters: 261 | - method (str): The HTTP method to use for the request. 262 | - url (str): The URL to send the request to. 263 | - data (dict, optional): The data to send in the request. Defaults to None. 264 | - headers (dict, optional): The headers to send with the request. Defaults to None. 265 | - level (int, optional): The log level to use for this log message. Defaults to logging.DEBUG. 266 | Returns: 267 | - None 268 | Example: 269 | log_curl_debug('GET', 'https://example.com') 270 | --question: log a curl PUT request for url https://web.io/ 271 | --function_call: log_curl_debug(method='PUT', url = 'https://web.io') 272 | 273 | 274 | {docstring} 275 | 276 | 277 | 1. Strictly use named parameters mentioned in the doc to generate function calls. 278 | 2. Only return the response as python parsable string version of function call. 279 | 3. mention the 'self' parameter if required. 280 | 281 | 282 | {question} 283 | 284 | 285 | """ 286 | res = self._query_model(prompt, 200) 287 | result = res.split("")[1].split("")[0] 288 | return result 289 | except Exception as e: 290 | raise RuntimeError(f"An error occurred: {e}") 291 | 292 | 293 | def add_docstrings_to_file(self, file_path, overwrite=False): 294 | """ 295 | Reads a Python file, generates docstrings for its functions, adds the docstrings to the functions, 296 | and writes the modified content to a new file. 297 | 298 | Args: 299 | - file_path (str): The path to the original Python file. 300 | - overwrite (bool): If True, overwrite the existing file with the same name. If False, write a new file with "_docstring" added. 301 | """ 302 | import os 303 | import ast 304 | 305 | base_path = os.path.dirname(file_path) 306 | file_name, ext = os.path.splitext(os.path.basename(file_path)) 307 | output_file_path = os.path.join(base_path, f"{file_name}_docstring{ext}") if not overwrite else file_path 308 | 309 | # Read the original Python file 310 | with open(file_path, "r") as file: 311 | code = file.read() 312 | 313 | # Parse the code to get functions and their code 314 | tree = ast.parse(code) 315 | function_code_map = {} 316 | for node in ast.walk(tree): 317 | if isinstance(node, ast.FunctionDef): 318 | function_code_map[node.name] = node 319 | 320 | # Generate and add docstrings to functions 321 | for func_name, func_node in function_code_map.items(): 322 | # Skip functions that already have docstrings 323 | if func_node.body and isinstance(func_node.body[0], ast.Expr) and isinstance(func_node.body[0].value, ast.Str): 324 | continue 325 | 326 | # Generate docstring 327 | docstring = self.generate_docstring(ast.unparse(func_node)) 328 | # Add docstring to function 329 | func_node.body.insert(0, ast.Expr(ast.Str(docstring))) 330 | 331 | # Reconstruct the modified code 332 | modified_code = ast.unparse(tree) 333 | 334 | # Write the modified content to a new file 335 | with open(output_file_path, "w") as output_file: 336 | output_file.write(modified_code) 337 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | requests -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | from version import __version__ 4 | 5 | with open("README.md", "r", encoding="UTF-8") as f: 6 | long_description = f.read() 7 | 8 | setup( 9 | name="pip_library_etl", 10 | version=__version__, 11 | packages=find_packages(), 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | install_requires=[ 15 | "transformers", 16 | ], 17 | ) 18 | -------------------------------------------------------------------------------- /version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.3.4" 2 | --------------------------------------------------------------------------------