├── vectorizer.pkl
├── Naive Bayes.pkl
├── Logistic Regression.pkl
├── LICENSE
├── train_models.py
├── app.py
└── templates
    └── index.html


/vectorizer.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kvcops/AI-Text-Classification/main/vectorizer.pkl


--------------------------------------------------------------------------------
/Naive Bayes.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kvcops/AI-Text-Classification/main/Naive Bayes.pkl


--------------------------------------------------------------------------------
/Logistic Regression.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kvcops/AI-Text-Classification/main/Logistic Regression.pkl


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 KARRI VAMSI KRISHNA
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/train_models.py:
--------------------------------------------------------------------------------
 1 | # train_models.py
 2 | import pandas as pd
 3 | import joblib
 4 | from sklearn.model_selection import train_test_split
 5 | from sklearn.linear_model import LogisticRegression
 6 | from sklearn.ensemble import RandomForestClassifier
 7 | from sklearn.svm import SVC
 8 | from sklearn.naive_bayes import MultinomialNB
 9 | from sklearn.feature_extraction.text import TfidfVectorizer
10 | 
11 | # Load the CSV file
12 | df = pd.read_csv('data.csv')
13 | 
14 | # Preprocess data
15 | X = df['text']
16 | y = df['generated']
17 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
18 | 
19 | # TF-IDF Vectorization
20 | vectorizer = TfidfVectorizer(max_features=10000)
21 | X_train_vec = vectorizer.fit_transform(X_train)
22 | X_test_vec = vectorizer.transform(X_test)
23 | 
24 | # Save vectorizer
25 | joblib.dump(vectorizer, 'vectorizer.pkl')
26 | 
27 | # Train and save models
28 | models = {
29 |     'Logistic Regression': LogisticRegression(max_iter=200),
30 |     'Random Forest': RandomForestClassifier(n_estimators=100),
31 |     'SVM': SVC(kernel='linear'),
32 |     'Naive Bayes': MultinomialNB()
33 | }
34 | 
35 | for name, model in models.items():
36 |     model.fit(X_train_vec, y_train)
37 |     joblib.dump(model, f'{name}.pkl')
38 | 
39 | print("Models and vectorizer saved successfully.")
40 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, render_template, request
 2 | import pandas as pd
 3 | import joblib
 4 | from sklearn.metrics import accuracy_score
 5 | import matplotlib.pyplot as plt
 6 | import io
 7 | import base64
 8 | from sklearn.model_selection import train_test_split
 9 | app = Flask(__name__)
10 | 
11 | # Load the CSV file
12 | df = pd.read_csv('data.csv')
13 | 
14 | # Load models and vectorizer
15 | vectorizer = joblib.load('vectorizer.pkl')
16 | models = {
17 |     'Logistic Regression': joblib.load('Logistic Regression.pkl'),
18 |     'Random Forest': joblib.load('Random Forest.pkl'),
19 |     'SVM': joblib.load('SVM.pkl'),
20 |     'Naive Bayes': joblib.load('Naive Bayes.pkl')
21 | }
22 | 
23 | # Preprocess data for accuracy calculation
24 | X = df['text']
25 | y = df['generated']
26 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
27 | X_test_vec = vectorizer.transform(X_test)
28 | 
29 | @app.route('/')
30 | def index():
31 |     return render_template('index.html', prediction=None, plot_url=None, 
32 |                            head_data=None, tail_data=None, 
33 |                            performance_data=None)
34 | 
35 | 
36 | @app.route('/analyze', methods=['POST'])
37 | def analyze():
38 |     text = request.form['text']
39 |     model_choice = request.form['model']
40 |     viz_choice = request.form['visualization']
41 |     show_head = request.form.get('head')
42 |     show_tail = request.form.get('tail')
43 |     show_performance = request.form.get('performance')
44 | 
45 |     # Model Selection
46 |     model = models.get(model_choice, models['Logistic Regression'])
47 | 
48 |     # Prediction
49 |     text_vec = vectorizer.transform([text])
50 |     prediction_num = model.predict(text_vec)[0]
51 |     prediction = "Human-generated" if prediction_num == 0 else "AI-generated"
52 |     accuracy = accuracy_score(y_test, model.predict(X_test_vec))
53 | 
54 |     # Visualization
55 |     plot_url = None
56 |     if viz_choice == 'Pie Chart':
57 |         plt.figure(figsize=(6, 6))
58 |         df['generated'].value_counts().plot.pie(autopct='%1.1f%%')
59 |         img = io.BytesIO()
60 |         plt.savefig(img, format='png')
61 |         img.seek(0)
62 |         plot_url = base64.b64encode(img.getvalue()).decode()
63 | 
64 |     elif viz_choice == 'Box Plot':
65 |         plt.figure(figsize=(6, 6))
66 |         df.boxplot(column=['generated'])
67 |         img = io.BytesIO()
68 |         plt.savefig(img, format='png')
69 |         img.seek(0)
70 |         plot_url = base64.b64encode(img.getvalue()).decode()
71 | 
72 |     # Prepare Data for Display
73 |     head_data = df.head().to_html() if show_head else None
74 |     tail_data = df.tail().to_html() if show_tail else None
75 |     performance_data = f"Accuracy: {accuracy:.2f}" if show_performance else None
76 | 
77 |     return render_template('index.html', prediction=prediction, plot_url=plot_url,
78 |                            head_data=head_data, tail_data=tail_data, 
79 |                            performance_data=performance_data)
80 | 
81 | if __name__ == '__main__':
82 |     app.run(debug=True)
83 | 


--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>AI Text Classification</title>
  7 |     <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap" rel="stylesheet">
  8 |     <style>
  9 |         body {
 10 |             font-family: 'Roboto', sans-serif;
 11 |             background: linear-gradient(to right, #ece9e6, #ffffff);
 12 |             color: #333;
 13 |             margin: 0;
 14 |             padding: 0;
 15 |             display: flex;
 16 |             justify-content: center;
 17 |             align-items: center;
 18 |             height: 100vh;
 19 |             overflow: hidden;
 20 |         }
 21 |         .container {
 22 |             background: #fff;
 23 |             padding: 2em;
 24 |             border-radius: 10px;
 25 |             box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
 26 |             max-width: 600px;
 27 |             width: 100%;
 28 |             overflow-y: auto;
 29 |             max-height: 90vh;
 30 |             animation: fadeIn 1s ease-in-out;
 31 |         }
 32 |         h1, h2 {
 33 |             color: #4A90E2;
 34 |             text-align: center;
 35 |         }
 36 |         form {
 37 |             display: flex;
 38 |             flex-direction: column;
 39 |             gap: 1em;
 40 |         }
 41 |         label {
 42 |             font-weight: bold;
 43 |         }
 44 |         textarea, select, input[type="text"], input[type="submit"] {
 45 |             padding: 0.5em;
 46 |             border: 1px solid #ccc;
 47 |             border-radius: 5px;
 48 |             font-size: 1em;
 49 |             width: 100%;
 50 |             box-sizing: border-box;
 51 |         }
 52 |         input[type="submit"] {
 53 |             background: #4A90E2;
 54 |             color: #fff;
 55 |             border: none;
 56 |             cursor: pointer;
 57 |             transition: background 0.3s;
 58 |         }
 59 |         input[type="submit"]:hover {
 60 |             background: #357ABD;
 61 |         }
 62 |         .checkbox-group {
 63 |             display: flex;
 64 |             flex-direction: column;
 65 |             gap: 0.5em;
 66 |         }
 67 |         @keyframes fadeIn {
 68 |             from {
 69 |                 opacity: 0;
 70 |                 transform: translateY(-20px);
 71 |             }
 72 |             to {
 73 |                 opacity: 1;
 74 |                 transform: translateY(0);
 75 |             }
 76 |         }
 77 |         .fade-in {
 78 |             animation: fadeIn 1s ease-in-out;
 79 |         }
 80 |     </style>
 81 | </head>
 82 | <body>
 83 |     <div class="container fade-in">
 84 |         <h1>AI Text Classification</h1>
 85 |         <form action="/analyze" method="post">
 86 |             <label for="text">Enter text:</label>
 87 |             <textarea id="text" name="text" rows="4"></textarea>
 88 |             
 89 |             <label for="model">Choose a model:</label>
 90 |             <select id="model" name="model">
 91 |                 <option value="Logistic Regression">Logistic Regression</option>
 92 |                 <option value="Random Forest">Random Forest</option>
 93 |                 <option value="SVM">SVM</option>
 94 |                 <option value="Naive Bayes">Naive Bayes</option>
 95 |             </select>
 96 |             
 97 |             <label for="visualization">Choose a visualization:</label>
 98 |             <select id="visualization" name="visualization">
 99 |                 <option value="None">None</option>
100 |                 <option value="Pie Chart">Pie Chart</option>
101 |                 <option value="Box Plot">Box Plot</option>
102 |             </select>
103 | 
104 |             <div class="checkbox-group">
105 |                 <input type="checkbox" id="head" name="head">
106 |                 <label for="head">Show head</label>
107 | 
108 |                 <input type="checkbox" id="tail" name="tail">
109 |                 <label for="tail">Show tail</label>
110 | 
111 |                 <input type="checkbox" id="performance" name="performance">
112 |                 <label for="performance">Show performance</label>
113 |             </div>
114 | 
115 |             <input type="submit" value="Analyze">
116 |         </form>
117 | 
118 |         {% if prediction %}
119 |             <h2>Prediction: {{ prediction }}</h2>
120 |         {% endif %}
121 | 
122 |         {% if performance_data %}
123 |             <h2>Performance: {{ performance_data }}</h2>
124 |         {% endif %}
125 | 
126 |         {% if plot_url %}
127 |             <h2>Visualization:</h2>
128 |             <img src="data:image/png;base64,{{ plot_url }}" alt="Visualization">
129 |         {% endif %}
130 | 
131 |         {% if head_data %}
132 |             <h2>Head of the Data:</h2>
133 |             <div>{{ head_data|safe }}</div>
134 |         {% endif %}
135 | 
136 |         {% if tail_data %}
137 |             <h2>Tail of the Data:</h2>
138 |             <div>{{ tail_data|safe }}</div>
139 |         {% endif %}
140 |     </div>
141 | </body>
142 | </html>
143 | 


--------------------------------------------------------------------------------