├── .github └── workflows │ └── pylint.yml ├── .gitignore ├── LICENSE ├── README.md ├── cli.py ├── hypertune ├── __init__.py ├── core.py ├── database.py ├── predictor.py └── scoring.py └── setup.py /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- 1 | name: Pylint 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.8", "3.9", "3.10"] 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Set up Python ${{ matrix.python-version }} 14 | uses: actions/setup-python@v3 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install pylint 21 | - name: Analysing the code with pylint 22 | run: | 23 | pylint $(git ls-files '*.py') 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 geeknik 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HyperTune 2 | 3 | HyperTune is an advanced tool for optimizing and analyzing text generation using OpenAI's GPT models. It explores various hyperparameter combinations to produce high-quality responses to given prompts, and provides comprehensive analysis of the results. 4 | 5 | ## Features 6 | 7 | - Generate multiple responses to a given prompt using different hyperparameter settings 8 | - Score responses based on coherence, relevance, and complexity 9 | - Analyze common themes and unique insights across responses 10 | - Visualize the impact of hyperparameters on response quality 11 | - Provide detailed explanations of scoring and recommendations for further tuning 12 | 13 | ## Installation 14 | 15 | 1. Clone this repository: 16 | ``` 17 | git clone https://github.com/geeknik/hypertune 18 | cd hypertune 19 | python -m venv venv && source venv/bin/activate 20 | ``` 21 | 22 | 2. Install the required dependencies: 23 | ``` 24 | pip install openai scikit-learn nltk matplotlib seaborn tabulate pandas 25 | ``` 26 | 27 | 3. Set up your OpenAI API key as an environment variable: 28 | ``` 29 | export OPENAI_API_KEY='your-api-key-here' 30 | ``` 31 | 32 | ## Usage 33 | 34 | Run the CLI script with your desired prompt and number of iterations: 35 | 36 | ``` 37 | python cli.py --prompt "Your prompt here" --iterations 10 38 | ``` 39 | 40 | The script will generate responses, analyze them, and provide detailed output including: 41 | 42 | - Top 3 responses with score breakdowns 43 | - Key concepts and their frequencies 44 | - Unique insights from the responses 45 | - Hyperparameter analysis and trends 46 | - Recommendations for further tuning 47 | 48 | The script also generates several visualization charts: 49 | - `score_comparison.png`: Comparison of top 3 responses' scores 50 | - `word_frequency.png`: Bar chart of most frequent words 51 | - `hyperparameter_impact.png`: Scatter plots showing the impact of each hyperparameter on the total score 52 | 53 | ## How It Works 54 | 55 | HyperTune uses a combination of natural language processing techniques and machine learning to generate and analyze text responses: 56 | 57 | 1. It generates multiple responses using OpenAI's GPT model with varying hyperparameters. 58 | 2. Each response is scored based on coherence, relevance to the prompt, and language complexity. 59 | 3. The tool then analyzes the responses collectively to identify common themes, unique insights, and the impact of different hyperparameters. 60 | 4. Finally, it provides a comprehensive report with visualizations to help understand the results. 61 | 62 | ![hyperparameter_impact](https://github.com/user-attachments/assets/73490098-4333-479f-9b3f-094944b34acd) 63 | 64 | ![word_frequency](https://github.com/user-attachments/assets/e0cd3271-78d3-406e-98a6-b39e75205dbf) 65 | 66 | ![score_comparison](https://github.com/user-attachments/assets/e6ad5ace-2632-404f-99c7-c50a88d328f7) 67 | 68 | 69 | ## Contributing 70 | 71 | Contributions to HyperTune are welcome! Please feel free to submit a PR. 72 | 73 | ## License 74 | 75 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 76 | 77 | ## Disclaimer 78 | 79 | This tool interacts with OpenAI's GPT models. The authors are not responsible for any misuse or for any offensive content that may be generated. 80 | -------------------------------------------------------------------------------- /cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hypertune.core import HyperTune 3 | from collections import Counter 4 | import nltk 5 | from nltk.tokenize import word_tokenize 6 | from nltk.corpus import stopwords 7 | import matplotlib.pyplot as plt 8 | import seaborn as sns 9 | from tabulate import tabulate 10 | import numpy as np 11 | import pandas as pd 12 | 13 | nltk.download('punkt', quiet=True) 14 | nltk.download('stopwords', quiet=True) 15 | 16 | def analyze_results(results): 17 | all_text = " ".join([result['text'] for result in results]) 18 | words = word_tokenize(all_text.lower()) 19 | stop_words = set(stopwords.words('english')) 20 | words = [word for word in words if word.isalnum() and word not in stop_words] 21 | 22 | word_freq = Counter(words) 23 | common_terms = word_freq.most_common(10) 24 | 25 | unique_concepts = set() 26 | for result in results: 27 | sentences = nltk.sent_tokenize(result['text']) 28 | for sentence in sentences: 29 | if any(term in sentence.lower() for term, _ in common_terms): 30 | unique_concepts.add(sentence) 31 | 32 | return common_terms, list(unique_concepts) 33 | 34 | def explain_score(result): 35 | explanation = f"Total Score: {result['total_score']:.2f}\n" 36 | explanation += f" Coherence: {result['coherence_score']:.2f} (40% weight) - Measures how well the sentences flow and connect.\n" 37 | explanation += f" Relevance: {result['relevance_score']:.2f} (40% weight) - Measures how closely the response relates to the prompt.\n" 38 | explanation += f" Complexity: {result['complexity_score']:.2f} (20% weight) - Measures the sophistication of language used.\n" 39 | return explanation 40 | 41 | def plot_score_comparison(results): 42 | labels = ['Coherence', 'Relevance', 'Complexity'] 43 | data = np.array([[r['coherence_score'], r['relevance_score'], r['complexity_score']] for r in results[:3]]) 44 | 45 | fig, ax = plt.subplots(figsize=(10, 6)) 46 | x = np.arange(len(labels)) 47 | width = 0.25 48 | 49 | for i in range(3): 50 | ax.bar(x + i*width, data[i], width, label=f'Response {i+1}') 51 | 52 | ax.set_ylabel('Scores') 53 | ax.set_title('Comparison of Top 3 Responses') 54 | ax.set_xticks(x + width) 55 | ax.set_xticklabels(labels) 56 | ax.legend() 57 | 58 | plt.tight_layout() 59 | plt.savefig('score_comparison.png') 60 | plt.close() 61 | 62 | def plot_word_frequency(common_terms): 63 | words, frequencies = zip(*common_terms) 64 | 65 | plt.figure(figsize=(12, 6)) 66 | sns.barplot(x=list(words), y=list(frequencies)) 67 | plt.title('Top 10 Most Frequent Words') 68 | plt.xlabel('Words') 69 | plt.ylabel('Frequency') 70 | plt.xticks(rotation=45) 71 | plt.tight_layout() 72 | plt.savefig('word_frequency.png') 73 | plt.close() 74 | 75 | def plot_hyperparameter_impact(results): 76 | df = pd.DataFrame([ 77 | { 78 | 'temperature': r['hyperparameters']['temperature'], 79 | 'top_p': r['hyperparameters']['top_p'], 80 | 'frequency_penalty': r['hyperparameters']['frequency_penalty'], 81 | 'presence_penalty': r['hyperparameters']['presence_penalty'], 82 | 'total_score': r['total_score'] 83 | } for r in results 84 | ]) 85 | 86 | fig, axs = plt.subplots(2, 2, figsize=(15, 15)) 87 | fig.suptitle('Impact of Hyperparameters on Total Score') 88 | 89 | for i, param in enumerate(['temperature', 'top_p', 'frequency_penalty', 'presence_penalty']): 90 | ax = axs[i // 2, i % 2] 91 | sns.scatterplot(data=df, x=param, y='total_score', ax=ax) 92 | ax.set_title(f'{param.capitalize()} vs Total Score') 93 | 94 | plt.tight_layout() 95 | plt.savefig('hyperparameter_impact.png') 96 | plt.close() 97 | 98 | def main(): 99 | parser = argparse.ArgumentParser(description="HyperTune CLI") 100 | parser.add_argument("--prompt", required=True, help="Input prompt") 101 | parser.add_argument("--iterations", type=int, default=5, help="Number of iterations") 102 | args = parser.parse_args() 103 | 104 | ht = HyperTune(args.prompt, args.iterations) 105 | results = ht.run() 106 | 107 | print("HyperTune Analysis:") 108 | print(f"\nPrompt: '{args.prompt}'") 109 | print(f"Number of iterations: {args.iterations}") 110 | 111 | print("\nTop 3 Responses (by score):") 112 | for i, result in enumerate(results[:3], 1): 113 | print(f"\n{i}. Score Breakdown:") 114 | print(explain_score(result)) 115 | print("Hyperparameters:") 116 | for param, value in result['hyperparameters'].items(): 117 | print(f" {param}: {value}") 118 | print("Response:") 119 | print(result['text']) 120 | 121 | plot_score_comparison(results) 122 | print("\nScore comparison chart saved as 'score_comparison.png'") 123 | 124 | plot_hyperparameter_impact(results) 125 | print("Hyperparameter impact chart saved as 'hyperparameter_impact.png'") 126 | 127 | common_terms, unique_concepts = analyze_results(results) 128 | 129 | print("\nKey Concepts and Frequency:") 130 | table = tabulate(common_terms, headers=['Term', 'Frequency'], tablefmt='grid') 131 | print(table) 132 | 133 | plot_word_frequency(common_terms) 134 | print("Word frequency chart saved as 'word_frequency.png'") 135 | 136 | print("\nUnique Insights:") 137 | for i, concept in enumerate(unique_concepts, 1): 138 | print(f"{i}. {concept}") 139 | 140 | print("\nHyperparameter Analysis:") 141 | best_result = results[0] 142 | print("Best performing hyperparameters:") 143 | for param, value in best_result['hyperparameters'].items(): 144 | print(f" {param}: {value}") 145 | 146 | print("\nHyperparameter Trends:") 147 | df = pd.DataFrame([r['hyperparameters'] | {'total_score': r['total_score']} for r in results]) 148 | for param in ['temperature', 'top_p', 'frequency_penalty', 'presence_penalty']: 149 | correlation = df[param].corr(df['total_score']) 150 | print(f" {param}: {'Positive' if correlation > 0 else 'Negative'} correlation ({correlation:.2f}) with total score") 151 | 152 | print("\nKey Takeaways:") 153 | print("1. The top response provides the most balanced explanation, scoring well across all metrics.") 154 | print("2. Common themes across responses include: " + ", ".join([term for term, _ in common_terms[:5]])) 155 | print("3. The complexity of explanations varies, with some responses using more technical language than others.") 156 | print("4. All top responses maintain high relevance to the prompt, ensuring focused explanations.") 157 | print(f"5. The best performing set of hyperparameters achieved a total score of {best_result['total_score']:.2f}") 158 | 159 | print("\nRecommendations for Further Tuning:") 160 | print("1. Experiment with narrower ranges of hyperparameters around the best performing values.") 161 | print("2. Consider increasing the number of iterations to explore more hyperparameter combinations.") 162 | print("3. Analyze the trade-offs between different scoring components (coherence, relevance, complexity) and adjust weights if needed.") 163 | 164 | print("\nMethodology:") 165 | print("Scoring is based on three key factors:") 166 | print("- Coherence (40%): How well the ideas connect and flow") 167 | print("- Relevance (40%): How closely the response aligns with the prompt") 168 | print("- Complexity (20%): The sophistication of language and concepts used") 169 | print("Hyperparameters are randomly selected for each iteration to explore different generation settings.") 170 | 171 | if __name__ == "__main__": 172 | main() 173 | -------------------------------------------------------------------------------- /hypertune/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import HyperTune 2 | from .database import Database 3 | from .predictor import HyperPredictor 4 | -------------------------------------------------------------------------------- /hypertune/core.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | 3 | client = OpenAI() 4 | import numpy as np 5 | from sklearn.feature_extraction.text import TfidfVectorizer 6 | from sklearn.metrics.pairwise import cosine_similarity 7 | import nltk 8 | from nltk.tokenize import sent_tokenize, word_tokenize 9 | from nltk.corpus import stopwords 10 | import string 11 | import random 12 | 13 | nltk.download('punkt', quiet=True) 14 | nltk.download('stopwords', quiet=True) 15 | 16 | class HyperTune: 17 | def __init__(self, prompt, iterations): 18 | self.prompt = prompt 19 | self.iterations = iterations 20 | self.stop_words = set(stopwords.words('english')) 21 | 22 | def generate(self): 23 | results = [] 24 | for _ in range(self.iterations): 25 | temperature = round(random.uniform(0.1, 1.0), 2) 26 | top_p = round(random.uniform(0.1, 1.0), 2) 27 | frequency_penalty = round(random.uniform(0.0, 2.0), 2) 28 | presence_penalty = round(random.uniform(0.0, 2.0), 2) 29 | 30 | response = client.chat.completions.create(model="gpt-4o", 31 | messages=[ 32 | {"role": "system", "content": "You are a helpful assistant."}, 33 | {"role": "user", "content": self.prompt} 34 | ], 35 | temperature=temperature, 36 | top_p=top_p, 37 | frequency_penalty=frequency_penalty, 38 | presence_penalty=presence_penalty) 39 | results.append({ 40 | 'text': response.choices[0].message.content, 41 | 'hyperparameters': { 42 | 'temperature': temperature, 43 | 'top_p': top_p, 44 | 'frequency_penalty': frequency_penalty, 45 | 'presence_penalty': presence_penalty 46 | } 47 | }) 48 | return results 49 | 50 | def score(self, results): 51 | scored_results = [] 52 | for result in results: 53 | coherence_score = self.evaluate_coherence(result['text']) 54 | relevance_score = self.evaluate_relevance(result['text'], self.prompt) 55 | complexity_score = self.evaluate_complexity(result['text']) 56 | total_score = (coherence_score * 0.4 + 57 | relevance_score * 0.4 + 58 | complexity_score * 0.2) 59 | scored_results.append({ 60 | 'text': result['text'], 61 | 'total_score': total_score, 62 | 'coherence_score': coherence_score, 63 | 'relevance_score': relevance_score, 64 | 'complexity_score': complexity_score, 65 | 'hyperparameters': result['hyperparameters'] 66 | }) 67 | return sorted(scored_results, key=lambda x: x['total_score'], reverse=True) 68 | 69 | def evaluate_coherence(self, text): 70 | sentences = sent_tokenize(text) 71 | if len(sentences) < 2: 72 | return 0 73 | sentence_vectors = [] 74 | for sentence in sentences: 75 | words = [w.lower() for w in word_tokenize(sentence) if w.lower() not in self.stop_words] 76 | sentence_vectors.append(' '.join(words)) 77 | vectorizer = TfidfVectorizer() 78 | tfidf_matrix = vectorizer.fit_transform(sentence_vectors) 79 | coherence_scores = [] 80 | for i in range(len(sentences) - 1): 81 | coherence_scores.append(cosine_similarity(tfidf_matrix[i], tfidf_matrix[i+1])[0][0]) 82 | return np.mean(coherence_scores) 83 | 84 | def evaluate_relevance(self, text, prompt): 85 | vectorizer = TfidfVectorizer() 86 | tfidf_matrix = vectorizer.fit_transform([prompt, text]) 87 | return cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0] 88 | 89 | def evaluate_complexity(self, text): 90 | words = word_tokenize(text.lower()) 91 | words = [word for word in words if word not in string.punctuation] 92 | avg_word_length = np.mean([len(word) for word in words]) 93 | unique_words_ratio = len(set(words)) / len(words) 94 | sentences = sent_tokenize(text) 95 | avg_sentence_length = np.mean([len(word_tokenize(sentence)) for sentence in sentences]) 96 | # Normalize each component 97 | norm_word_length = min(avg_word_length / 10, 1) # Assume max avg word length is 10 98 | # unique_words_ratio is already between 0 and 1 99 | norm_sentence_length = min(avg_sentence_length / 30, 1) # Assume max avg sentence length is 30 100 | complexity_score = (norm_word_length * 0.3 + 101 | unique_words_ratio * 0.3 + 102 | norm_sentence_length * 0.4) 103 | return complexity_score # Already normalized between 0 and 1 104 | 105 | def run(self): 106 | results = self.generate() 107 | scored_results = self.score(results) 108 | return scored_results 109 | -------------------------------------------------------------------------------- /hypertune/database.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | class Database: 4 | def __init__(self, db_name="hypertune.db"): 5 | self.conn = sqlite3.connect(db_name) 6 | self.create_tables() 7 | 8 | def create_tables(self): 9 | self.conn.execute(""" 10 | CREATE TABLE IF NOT EXISTS results ( 11 | id INTEGER PRIMARY KEY, 12 | prompt TEXT, 13 | output TEXT, 14 | temperature REAL, 15 | top_p REAL, 16 | score REAL 17 | ) 18 | """) 19 | 20 | def insert_result(self, prompt, output, temperature, top_p, score): 21 | self.conn.execute(""" 22 | INSERT INTO results (prompt, output, temperature, top_p, score) 23 | VALUES (?, ?, ?, ?, ?) 24 | """, (prompt, output, temperature, top_p, score)) 25 | self.conn.commit() 26 | 27 | def get_best_params(self, prompt): 28 | # Query to get best params based on similar prompts 29 | pass 30 | -------------------------------------------------------------------------------- /hypertune/predictor.py: -------------------------------------------------------------------------------- 1 | from sklearn.ensemble import RandomForestRegressor 2 | import numpy as np 3 | 4 | class HyperPredictor: 5 | def __init__(self): 6 | self.model = RandomForestRegressor() 7 | 8 | def train(self, X, y): 9 | self.model.fit(X, y) 10 | 11 | def predict(self, prompt_features): 12 | return self.model.predict(prompt_features) 13 | 14 | # Usage 15 | predictor = HyperPredictor() 16 | # Train with data from database 17 | # predictor.train(X_train, y_train) 18 | -------------------------------------------------------------------------------- /hypertune/scoring.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | from sklearn.metrics.pairwise import cosine_similarity 3 | from sentence_transformers import SentenceTransformer 4 | 5 | def perplexity_score(text): 6 | # Implement perplexity calculation 7 | pass 8 | 9 | def semantic_coherence(text): 10 | model = SentenceTransformer('all-MiniLM-L6-v2') 11 | sentences = nltk.sent_tokenize(text) 12 | embeddings = model.encode(sentences) 13 | scores = [cosine_similarity([embeddings[i]], [embeddings[i+1]])[0][0] for i in range(len(embeddings)-1)] 14 | return sum(scores) / len(scores) 15 | 16 | def factual_accuracy(text): 17 | # Implement fact-checking logic 18 | pass 19 | 20 | # Add more scoring functions 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # setup.py 2 | 3 | from setuptools import setup, find_packages 4 | 5 | setup( 6 | name="hypertune", 7 | version="0.1", 8 | packages=find_packages(), 9 | install_requires=[ 10 | 'openai', 11 | 'click', 12 | 'nltk', 13 | 'scikit-learn', 14 | 'sentence-transformers', 15 | ], 16 | entry_points={ 17 | 'console_scripts': [ 18 | 'hypertune=cli:run', 19 | ], 20 | }, 21 | ) 22 | --------------------------------------------------------------------------------