├── .github
    └── workflows
    │   └── pylint.yml
├── .gitignore
├── LICENSE
├── README.md
├── cli.py
├── hypertune
    ├── __init__.py
    ├── core.py
    ├── database.py
    ├── predictor.py
    └── scoring.py
└── setup.py


/.github/workflows/pylint.yml:
--------------------------------------------------------------------------------
 1 | name: Pylint
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: ["3.8", "3.9", "3.10"]
11 |     steps:
12 |     - uses: actions/checkout@v4
13 |     - name: Set up Python ${{ matrix.python-version }}
14 |       uses: actions/setup-python@v3
15 |       with:
16 |         python-version: ${{ matrix.python-version }}
17 |     - name: Install dependencies
18 |       run: |
19 |         python -m pip install --upgrade pip
20 |         pip install pylint
21 |     - name: Analysing the code with pylint
22 |       run: |
23 |         pylint $(git ls-files '*.py')
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 geeknik
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # HyperTune
 2 | 
 3 | HyperTune is an advanced tool for optimizing and analyzing text generation using OpenAI's GPT models. It explores various hyperparameter combinations to produce high-quality responses to given prompts, and provides comprehensive analysis of the results.
 4 | 
 5 | ## Features
 6 | 
 7 | - Generate multiple responses to a given prompt using different hyperparameter settings
 8 | - Score responses based on coherence, relevance, and complexity
 9 | - Analyze common themes and unique insights across responses
10 | - Visualize the impact of hyperparameters on response quality
11 | - Provide detailed explanations of scoring and recommendations for further tuning
12 | 
13 | ## Installation
14 | 
15 | 1. Clone this repository:
16 |    ```
17 |    git clone https://github.com/geeknik/hypertune
18 |    cd hypertune
19 |    python -m venv venv && source venv/bin/activate
20 |    ```
21 | 
22 | 2. Install the required dependencies:
23 |    ```
24 |    pip install openai scikit-learn nltk matplotlib seaborn tabulate pandas
25 |    ```
26 | 
27 | 3. Set up your OpenAI API key as an environment variable:
28 |    ```
29 |    export OPENAI_API_KEY='your-api-key-here'
30 |    ```
31 | 
32 | ## Usage
33 | 
34 | Run the CLI script with your desired prompt and number of iterations:
35 | 
36 | ```
37 | python cli.py --prompt "Your prompt here" --iterations 10
38 | ```
39 | 
40 | The script will generate responses, analyze them, and provide detailed output including:
41 | 
42 | - Top 3 responses with score breakdowns
43 | - Key concepts and their frequencies
44 | - Unique insights from the responses
45 | - Hyperparameter analysis and trends
46 | - Recommendations for further tuning
47 | 
48 | The script also generates several visualization charts:
49 | - `score_comparison.png`: Comparison of top 3 responses' scores
50 | - `word_frequency.png`: Bar chart of most frequent words
51 | - `hyperparameter_impact.png`: Scatter plots showing the impact of each hyperparameter on the total score
52 | 
53 | ## How It Works
54 | 
55 | HyperTune uses a combination of natural language processing techniques and machine learning to generate and analyze text responses:
56 | 
57 | 1. It generates multiple responses using OpenAI's GPT model with varying hyperparameters.
58 | 2. Each response is scored based on coherence, relevance to the prompt, and language complexity.
59 | 3. The tool then analyzes the responses collectively to identify common themes, unique insights, and the impact of different hyperparameters.
60 | 4. Finally, it provides a comprehensive report with visualizations to help understand the results.
61 | 
62 | ![hyperparameter_impact](https://github.com/user-attachments/assets/73490098-4333-479f-9b3f-094944b34acd)
63 | 
64 | ![word_frequency](https://github.com/user-attachments/assets/e0cd3271-78d3-406e-98a6-b39e75205dbf)
65 | 
66 | ![score_comparison](https://github.com/user-attachments/assets/e6ad5ace-2632-404f-99c7-c50a88d328f7)
67 | 
68 | 
69 | ## Contributing
70 | 
71 | Contributions to HyperTune are welcome! Please feel free to submit a PR.
72 | 
73 | ## License
74 | 
75 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
76 | 
77 | ## Disclaimer
78 | 
79 | This tool interacts with OpenAI's GPT models. The authors are not responsible for any misuse or for any offensive content that may be generated.
80 | 


--------------------------------------------------------------------------------
/cli.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from hypertune.core import HyperTune
  3 | from collections import Counter
  4 | import nltk
  5 | from nltk.tokenize import word_tokenize
  6 | from nltk.corpus import stopwords
  7 | import matplotlib.pyplot as plt
  8 | import seaborn as sns
  9 | from tabulate import tabulate
 10 | import numpy as np
 11 | import pandas as pd
 12 | 
 13 | nltk.download('punkt', quiet=True)
 14 | nltk.download('stopwords', quiet=True)
 15 | 
 16 | def analyze_results(results):
 17 |     all_text = " ".join([result['text'] for result in results])
 18 |     words = word_tokenize(all_text.lower())
 19 |     stop_words = set(stopwords.words('english'))
 20 |     words = [word for word in words if word.isalnum() and word not in stop_words]
 21 |     
 22 |     word_freq = Counter(words)
 23 |     common_terms = word_freq.most_common(10)
 24 |     
 25 |     unique_concepts = set()
 26 |     for result in results:
 27 |         sentences = nltk.sent_tokenize(result['text'])
 28 |         for sentence in sentences:
 29 |             if any(term in sentence.lower() for term, _ in common_terms):
 30 |                 unique_concepts.add(sentence)
 31 |     
 32 |     return common_terms, list(unique_concepts)
 33 | 
 34 | def explain_score(result):
 35 |     explanation = f"Total Score: {result['total_score']:.2f}\n"
 36 |     explanation += f"  Coherence: {result['coherence_score']:.2f} (40% weight) - Measures how well the sentences flow and connect.\n"
 37 |     explanation += f"  Relevance: {result['relevance_score']:.2f} (40% weight) - Measures how closely the response relates to the prompt.\n"
 38 |     explanation += f"  Complexity: {result['complexity_score']:.2f} (20% weight) - Measures the sophistication of language used.\n"
 39 |     return explanation
 40 | 
 41 | def plot_score_comparison(results):
 42 |     labels = ['Coherence', 'Relevance', 'Complexity']
 43 |     data = np.array([[r['coherence_score'], r['relevance_score'], r['complexity_score']] for r in results[:3]])
 44 |     
 45 |     fig, ax = plt.subplots(figsize=(10, 6))
 46 |     x = np.arange(len(labels))
 47 |     width = 0.25
 48 |     
 49 |     for i in range(3):
 50 |         ax.bar(x + i*width, data[i], width, label=f'Response {i+1}')
 51 |     
 52 |     ax.set_ylabel('Scores')
 53 |     ax.set_title('Comparison of Top 3 Responses')
 54 |     ax.set_xticks(x + width)
 55 |     ax.set_xticklabels(labels)
 56 |     ax.legend()
 57 |     
 58 |     plt.tight_layout()
 59 |     plt.savefig('score_comparison.png')
 60 |     plt.close()
 61 | 
 62 | def plot_word_frequency(common_terms):
 63 |     words, frequencies = zip(*common_terms)
 64 |     
 65 |     plt.figure(figsize=(12, 6))
 66 |     sns.barplot(x=list(words), y=list(frequencies))
 67 |     plt.title('Top 10 Most Frequent Words')
 68 |     plt.xlabel('Words')
 69 |     plt.ylabel('Frequency')
 70 |     plt.xticks(rotation=45)
 71 |     plt.tight_layout()
 72 |     plt.savefig('word_frequency.png')
 73 |     plt.close()
 74 | 
 75 | def plot_hyperparameter_impact(results):
 76 |     df = pd.DataFrame([
 77 |         {
 78 |             'temperature': r['hyperparameters']['temperature'],
 79 |             'top_p': r['hyperparameters']['top_p'],
 80 |             'frequency_penalty': r['hyperparameters']['frequency_penalty'],
 81 |             'presence_penalty': r['hyperparameters']['presence_penalty'],
 82 |             'total_score': r['total_score']
 83 |         } for r in results
 84 |     ])
 85 | 
 86 |     fig, axs = plt.subplots(2, 2, figsize=(15, 15))
 87 |     fig.suptitle('Impact of Hyperparameters on Total Score')
 88 | 
 89 |     for i, param in enumerate(['temperature', 'top_p', 'frequency_penalty', 'presence_penalty']):
 90 |         ax = axs[i // 2, i % 2]
 91 |         sns.scatterplot(data=df, x=param, y='total_score', ax=ax)
 92 |         ax.set_title(f'{param.capitalize()} vs Total Score')
 93 | 
 94 |     plt.tight_layout()
 95 |     plt.savefig('hyperparameter_impact.png')
 96 |     plt.close()
 97 | 
 98 | def main():
 99 |     parser = argparse.ArgumentParser(description="HyperTune CLI")
100 |     parser.add_argument("--prompt", required=True, help="Input prompt")
101 |     parser.add_argument("--iterations", type=int, default=5, help="Number of iterations")
102 |     args = parser.parse_args()
103 | 
104 |     ht = HyperTune(args.prompt, args.iterations)
105 |     results = ht.run()
106 |     
107 |     print("HyperTune Analysis:")
108 |     print(f"\nPrompt: '{args.prompt}'")
109 |     print(f"Number of iterations: {args.iterations}")
110 |     
111 |     print("\nTop 3 Responses (by score):")
112 |     for i, result in enumerate(results[:3], 1):
113 |         print(f"\n{i}. Score Breakdown:")
114 |         print(explain_score(result))
115 |         print("Hyperparameters:")
116 |         for param, value in result['hyperparameters'].items():
117 |             print(f"  {param}: {value}")
118 |         print("Response:")
119 |         print(result['text'])
120 |     
121 |     plot_score_comparison(results)
122 |     print("\nScore comparison chart saved as 'score_comparison.png'")
123 |     
124 |     plot_hyperparameter_impact(results)
125 |     print("Hyperparameter impact chart saved as 'hyperparameter_impact.png'")
126 |     
127 |     common_terms, unique_concepts = analyze_results(results)
128 |     
129 |     print("\nKey Concepts and Frequency:")
130 |     table = tabulate(common_terms, headers=['Term', 'Frequency'], tablefmt='grid')
131 |     print(table)
132 |     
133 |     plot_word_frequency(common_terms)
134 |     print("Word frequency chart saved as 'word_frequency.png'")
135 |     
136 |     print("\nUnique Insights:")
137 |     for i, concept in enumerate(unique_concepts, 1):
138 |         print(f"{i}. {concept}")
139 |     
140 |     print("\nHyperparameter Analysis:")
141 |     best_result = results[0]
142 |     print("Best performing hyperparameters:")
143 |     for param, value in best_result['hyperparameters'].items():
144 |         print(f"  {param}: {value}")
145 |     
146 |     print("\nHyperparameter Trends:")
147 |     df = pd.DataFrame([r['hyperparameters'] | {'total_score': r['total_score']} for r in results])
148 |     for param in ['temperature', 'top_p', 'frequency_penalty', 'presence_penalty']:
149 |         correlation = df[param].corr(df['total_score'])
150 |         print(f"  {param}: {'Positive' if correlation > 0 else 'Negative'} correlation ({correlation:.2f}) with total score")
151 |     
152 |     print("\nKey Takeaways:")
153 |     print("1. The top response provides the most balanced explanation, scoring well across all metrics.")
154 |     print("2. Common themes across responses include: " + ", ".join([term for term, _ in common_terms[:5]]))
155 |     print("3. The complexity of explanations varies, with some responses using more technical language than others.")
156 |     print("4. All top responses maintain high relevance to the prompt, ensuring focused explanations.")
157 |     print(f"5. The best performing set of hyperparameters achieved a total score of {best_result['total_score']:.2f}")
158 |     
159 |     print("\nRecommendations for Further Tuning:")
160 |     print("1. Experiment with narrower ranges of hyperparameters around the best performing values.")
161 |     print("2. Consider increasing the number of iterations to explore more hyperparameter combinations.")
162 |     print("3. Analyze the trade-offs between different scoring components (coherence, relevance, complexity) and adjust weights if needed.")
163 |     
164 |     print("\nMethodology:")
165 |     print("Scoring is based on three key factors:")
166 |     print("- Coherence (40%): How well the ideas connect and flow")
167 |     print("- Relevance (40%): How closely the response aligns with the prompt")
168 |     print("- Complexity (20%): The sophistication of language and concepts used")
169 |     print("Hyperparameters are randomly selected for each iteration to explore different generation settings.")
170 | 
171 | if __name__ == "__main__":
172 |     main()
173 | 


--------------------------------------------------------------------------------
/hypertune/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import HyperTune
2 | from .database import Database
3 | from .predictor import HyperPredictor
4 | 


--------------------------------------------------------------------------------
/hypertune/core.py:
--------------------------------------------------------------------------------
  1 | from openai import OpenAI
  2 | 
  3 | client = OpenAI()
  4 | import numpy as np
  5 | from sklearn.feature_extraction.text import TfidfVectorizer
  6 | from sklearn.metrics.pairwise import cosine_similarity
  7 | import nltk
  8 | from nltk.tokenize import sent_tokenize, word_tokenize
  9 | from nltk.corpus import stopwords
 10 | import string
 11 | import random
 12 | 
 13 | nltk.download('punkt', quiet=True)
 14 | nltk.download('stopwords', quiet=True)
 15 | 
 16 | class HyperTune:
 17 |     def __init__(self, prompt, iterations):
 18 |         self.prompt = prompt
 19 |         self.iterations = iterations
 20 |         self.stop_words = set(stopwords.words('english'))
 21 | 
 22 |     def generate(self):
 23 |         results = []
 24 |         for _ in range(self.iterations):
 25 |             temperature = round(random.uniform(0.1, 1.0), 2)
 26 |             top_p = round(random.uniform(0.1, 1.0), 2)
 27 |             frequency_penalty = round(random.uniform(0.0, 2.0), 2)
 28 |             presence_penalty = round(random.uniform(0.0, 2.0), 2)
 29 | 
 30 |             response = client.chat.completions.create(model="gpt-4o",
 31 |             messages=[
 32 |                 {"role": "system", "content": "You are a helpful assistant."},
 33 |                 {"role": "user", "content": self.prompt}
 34 |             ],
 35 |             temperature=temperature,
 36 |             top_p=top_p,
 37 |             frequency_penalty=frequency_penalty,
 38 |             presence_penalty=presence_penalty)
 39 |             results.append({
 40 |                 'text': response.choices[0].message.content,
 41 |                 'hyperparameters': {
 42 |                     'temperature': temperature,
 43 |                     'top_p': top_p,
 44 |                     'frequency_penalty': frequency_penalty,
 45 |                     'presence_penalty': presence_penalty
 46 |                 }
 47 |             })
 48 |         return results
 49 | 
 50 |     def score(self, results):
 51 |         scored_results = []
 52 |         for result in results:
 53 |             coherence_score = self.evaluate_coherence(result['text'])
 54 |             relevance_score = self.evaluate_relevance(result['text'], self.prompt)
 55 |             complexity_score = self.evaluate_complexity(result['text'])
 56 |             total_score = (coherence_score * 0.4 +
 57 |                            relevance_score * 0.4 +
 58 |                            complexity_score * 0.2)
 59 |             scored_results.append({
 60 |                 'text': result['text'],
 61 |                 'total_score': total_score,
 62 |                 'coherence_score': coherence_score,
 63 |                 'relevance_score': relevance_score,
 64 |                 'complexity_score': complexity_score,
 65 |                 'hyperparameters': result['hyperparameters']
 66 |             })
 67 |         return sorted(scored_results, key=lambda x: x['total_score'], reverse=True)
 68 | 
 69 |     def evaluate_coherence(self, text):
 70 |         sentences = sent_tokenize(text)
 71 |         if len(sentences) < 2:
 72 |             return 0
 73 |         sentence_vectors = []
 74 |         for sentence in sentences:
 75 |             words = [w.lower() for w in word_tokenize(sentence) if w.lower() not in self.stop_words]
 76 |             sentence_vectors.append(' '.join(words))
 77 |         vectorizer = TfidfVectorizer()
 78 |         tfidf_matrix = vectorizer.fit_transform(sentence_vectors)
 79 |         coherence_scores = []
 80 |         for i in range(len(sentences) - 1):
 81 |             coherence_scores.append(cosine_similarity(tfidf_matrix[i], tfidf_matrix[i+1])[0][0])
 82 |         return np.mean(coherence_scores)
 83 | 
 84 |     def evaluate_relevance(self, text, prompt):
 85 |         vectorizer = TfidfVectorizer()
 86 |         tfidf_matrix = vectorizer.fit_transform([prompt, text])
 87 |         return cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
 88 | 
 89 |     def evaluate_complexity(self, text):
 90 |         words = word_tokenize(text.lower())
 91 |         words = [word for word in words if word not in string.punctuation]
 92 |         avg_word_length = np.mean([len(word) for word in words])
 93 |         unique_words_ratio = len(set(words)) / len(words)
 94 |         sentences = sent_tokenize(text)
 95 |         avg_sentence_length = np.mean([len(word_tokenize(sentence)) for sentence in sentences])
 96 |         # Normalize each component
 97 |         norm_word_length = min(avg_word_length / 10, 1)  # Assume max avg word length is 10
 98 |         # unique_words_ratio is already between 0 and 1
 99 |         norm_sentence_length = min(avg_sentence_length / 30, 1)  # Assume max avg sentence length is 30
100 |         complexity_score = (norm_word_length * 0.3 +
101 |                             unique_words_ratio * 0.3 +
102 |                             norm_sentence_length * 0.4)
103 |         return complexity_score  # Already normalized between 0 and 1
104 | 
105 |     def run(self):
106 |         results = self.generate()
107 |         scored_results = self.score(results)
108 |         return scored_results
109 | 


--------------------------------------------------------------------------------
/hypertune/database.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | 
 3 | class Database:
 4 |     def __init__(self, db_name="hypertune.db"):
 5 |         self.conn = sqlite3.connect(db_name)
 6 |         self.create_tables()
 7 | 
 8 |     def create_tables(self):
 9 |         self.conn.execute("""
10 |             CREATE TABLE IF NOT EXISTS results (
11 |                 id INTEGER PRIMARY KEY,
12 |                 prompt TEXT,
13 |                 output TEXT,
14 |                 temperature REAL,
15 |                 top_p REAL,
16 |                 score REAL
17 |             )
18 |         """)
19 | 
20 |     def insert_result(self, prompt, output, temperature, top_p, score):
21 |         self.conn.execute("""
22 |             INSERT INTO results (prompt, output, temperature, top_p, score)
23 |             VALUES (?, ?, ?, ?, ?)
24 |         """, (prompt, output, temperature, top_p, score))
25 |         self.conn.commit()
26 | 
27 |     def get_best_params(self, prompt):
28 |         # Query to get best params based on similar prompts
29 |         pass
30 | 


--------------------------------------------------------------------------------
/hypertune/predictor.py:
--------------------------------------------------------------------------------
 1 | from sklearn.ensemble import RandomForestRegressor
 2 | import numpy as np
 3 | 
 4 | class HyperPredictor:
 5 |     def __init__(self):
 6 |         self.model = RandomForestRegressor()
 7 | 
 8 |     def train(self, X, y):
 9 |         self.model.fit(X, y)
10 | 
11 |     def predict(self, prompt_features):
12 |         return self.model.predict(prompt_features)
13 | 
14 | # Usage
15 | predictor = HyperPredictor()
16 | # Train with data from database
17 | # predictor.train(X_train, y_train)
18 | 


--------------------------------------------------------------------------------
/hypertune/scoring.py:
--------------------------------------------------------------------------------
 1 | import nltk
 2 | from sklearn.metrics.pairwise import cosine_similarity
 3 | from sentence_transformers import SentenceTransformer
 4 | 
 5 | def perplexity_score(text):
 6 |     # Implement perplexity calculation
 7 |     pass
 8 | 
 9 | def semantic_coherence(text):
10 |     model = SentenceTransformer('all-MiniLM-L6-v2')
11 |     sentences = nltk.sent_tokenize(text)
12 |     embeddings = model.encode(sentences)
13 |     scores = [cosine_similarity([embeddings[i]], [embeddings[i+1]])[0][0] for i in range(len(embeddings)-1)]
14 |     return sum(scores) / len(scores)
15 | 
16 | def factual_accuracy(text):
17 |     # Implement fact-checking logic
18 |     pass
19 | 
20 | # Add more scoring functions
21 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # setup.py
 2 | 
 3 | from setuptools import setup, find_packages
 4 | 
 5 | setup(
 6 |     name="hypertune",
 7 |     version="0.1",
 8 |     packages=find_packages(),
 9 |     install_requires=[
10 |         'openai',
11 |         'click',
12 |         'nltk',
13 |         'scikit-learn',
14 |         'sentence-transformers',
15 |     ],
16 |     entry_points={
17 |         'console_scripts': [
18 |             'hypertune=cli:run',
19 |         ],
20 |     },
21 | )
22 | 


--------------------------------------------------------------------------------