├── Fake News Detection ├── Fake_News_Detection.ipynb ├── fake_news_detection (1).py └── fake_news_detection.py └── README.md /Fake News Detection/Fake_News_Detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "code", 19 | "source": [ 20 | "!pip install numpy pandas sklearn" 21 | ], 22 | "metadata": { 23 | "colab": { 24 | "base_uri": "https://localhost:8080/" 25 | }, 26 | "id": "DymWhyqXXXuT", 27 | "outputId": "8c796f75-c7b0-4397-b688-d9048a230b87" 28 | }, 29 | "execution_count": 3, 30 | "outputs": [ 31 | { 32 | "output_type": "stream", 33 | "name": "stdout", 34 | "text": [ 35 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 36 | "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (1.21.6)\n", 37 | "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (1.3.5)\n", 38 | "Requirement already satisfied: sklearn in /usr/local/lib/python3.7/dist-packages (0.0.post1)\n", 39 | "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas) (2.8.2)\n", 40 | "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas) (2022.5)\n", 41 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n" 42 | ] 43 | } 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "source": [ 49 | "# Make necessary imports:" 50 | ], 51 | "metadata": { 52 | "id": "xVW6a526Yk56" 53 | } 54 | }, 55 | { 56 | "cell_type": "code", 57 | "source": [ 58 | "import numpy as np\n", 59 | "import pandas as pd\n", 60 | "import itertools\n", 61 | "from sklearn.model_selection import train_test_split\n", 62 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 63 | "from sklearn.linear_model import PassiveAggressiveClassifier\n", 64 | "from sklearn.metrics import accuracy_score, confusion_matrix" 65 | ], 66 | "metadata": { 67 | "id": "De3DTqkEXpHD" 68 | }, 69 | "execution_count": 2, 70 | "outputs": [] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "source": [ 75 | "# read the data into a DataFrame" 76 | ], 77 | "metadata": { 78 | "id": "jIj1ySqzYg6a" 79 | } 80 | }, 81 | { 82 | "cell_type": "code", 83 | "source": [ 84 | "#Read the data\n", 85 | "df=pd.read_csv('news.csv')\n", 86 | "#Get shape and head\n", 87 | "df.shape\n", 88 | "df.head()" 89 | ], 90 | "metadata": { 91 | "colab": { 92 | "base_uri": "https://localhost:8080/", 93 | "height": 1288 94 | }, 95 | "id": "3nFXzJVLYOtz", 96 | "outputId": "470cf152-96c4-4006-fa31-688f200f9019" 97 | }, 98 | "execution_count": 8, 99 | "outputs": [ 100 | { 101 | "output_type": "execute_result", 102 | "data": { 103 | "text/plain": [ 104 | " Unnamed: 0 title \\\n", 105 | "0 8476 You Can Smell Hillary’s Fear \n", 106 | "1 10294 Watch The Exact Moment Paul Ryan Committed Pol... \n", 107 | "2 3608 Kerry to go to Paris in gesture of sympathy \n", 108 | "3 10142 Bernie supporters on Twitter erupt in anger ag... \n", 109 | "4 875 The Battle of New York: Why This Primary Matters \n", 110 | "\n", 111 | " text label \n", 112 | "0 Daniel Greenfield, a Shillman Journalism Fello... FAKE \n", 113 | "1 Google Pinterest Digg Linkedin Reddit Stumbleu... FAKE \n", 114 | "2 U.S. Secretary of State John F. Kerry said Mon... REAL \n", 115 | "3 — Kaydee King (@KaydeeKing) November 9, 2016 T... FAKE \n", 116 | "4 It's primary day in New York and front-runners... REAL " 117 | ], 118 | "text/html": [ 119 | "\n", 120 | "
\n", 121 | "
\n", 122 | "
\n", 123 | "\n", 136 | "\n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | "
Unnamed: 0titletextlabel
08476You Can Smell Hillary’s FearDaniel Greenfield, a Shillman Journalism Fello...FAKE
110294Watch The Exact Moment Paul Ryan Committed Pol...Google Pinterest Digg Linkedin Reddit Stumbleu...FAKE
23608Kerry to go to Paris in gesture of sympathyU.S. Secretary of State John F. Kerry said Mon...REAL
310142Bernie supporters on Twitter erupt in anger ag...— Kaydee King (@KaydeeKing) November 9, 2016 T...FAKE
4875The Battle of New York: Why This Primary MattersIt's primary day in New York and front-runners...REAL
\n", 184 | "
\n", 185 | " \n", 195 | " \n", 196 | " \n", 233 | "\n", 234 | " \n", 258 | "
\n", 259 | "
\n", 260 | " " 261 | ] 262 | }, 263 | "metadata": {}, 264 | "execution_count": 8 265 | } 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "source": [ 271 | "# labels from the DataFrame." 272 | ], 273 | "metadata": { 274 | "id": "VvpHD5dEYrST" 275 | } 276 | }, 277 | { 278 | "cell_type": "code", 279 | "source": [ 280 | "#DataFlair - Get the labels\n", 281 | "labels=df.label\n", 282 | "labels.head()" 283 | ], 284 | "metadata": { 285 | "colab": { 286 | "base_uri": "https://localhost:8080/" 287 | }, 288 | "id": "fdtp93K0Yce7", 289 | "outputId": "a6599e43-358c-41d8-b4a9-2945e01c80f2" 290 | }, 291 | "execution_count": 9, 292 | "outputs": [ 293 | { 294 | "output_type": "execute_result", 295 | "data": { 296 | "text/plain": [ 297 | "0 FAKE\n", 298 | "1 FAKE\n", 299 | "2 REAL\n", 300 | "3 FAKE\n", 301 | "4 REAL\n", 302 | "Name: label, dtype: object" 303 | ] 304 | }, 305 | "metadata": {}, 306 | "execution_count": 9 307 | } 308 | ] 309 | }, 310 | { 311 | "cell_type": "markdown", 312 | "source": [ 313 | "# Split the dataset into training and testing sets." 314 | ], 315 | "metadata": { 316 | "id": "1t8io4vQZEoS" 317 | } 318 | }, 319 | { 320 | "cell_type": "code", 321 | "source": [ 322 | "x_train,x_test,y_train,y_test=train_test_split(df['text'], labels, test_size=0.2, random_state=7)" 323 | ], 324 | "metadata": { 325 | "id": "oBeQY99WZqwC" 326 | }, 327 | "execution_count": 10, 328 | "outputs": [] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "source": [ 333 | "# fit and transform the vectorizer on the train set, and transform the vectorizer on the test set." 334 | ], 335 | "metadata": { 336 | "id": "SG7sTmaBZIMy" 337 | } 338 | }, 339 | { 340 | "cell_type": "code", 341 | "source": [ 342 | "#DataFlair - Initialize a TfidfVectorizer\n", 343 | "tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.7)\n", 344 | "#DataFlair - Fit and transform train set, transform test set\n", 345 | "tfidf_train=tfidf_vectorizer.fit_transform(x_train) \n", 346 | "tfidf_test=tfidf_vectorizer.transform(x_test)" 347 | ], 348 | "metadata": { 349 | "id": "5RAQbQ_BZ60q" 350 | }, 351 | "execution_count": 11, 352 | "outputs": [] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "source": [ 357 | "# initialize a PassiveAggressiveClassifier" 358 | ], 359 | "metadata": { 360 | "id": "wVSEDdquZNyJ" 361 | } 362 | }, 363 | { 364 | "cell_type": "code", 365 | "source": [ 366 | "pac=PassiveAggressiveClassifier(max_iter=50)\n", 367 | "pac.fit(tfidf_train,y_train)\n", 368 | "#DataFlair - Predict on the test set and calculate accuracy\n", 369 | "y_pred=pac.predict(tfidf_test)\n", 370 | "score=accuracy_score(y_test,y_pred)\n", 371 | "print(f'Accuracy: {round(score*100,2)}%')" 372 | ], 373 | "metadata": { 374 | "colab": { 375 | "base_uri": "https://localhost:8080/" 376 | }, 377 | "id": "PYOeAHYIaCRK", 378 | "outputId": "9aecd13c-b483-4318-a4f6-8858db36bd5b" 379 | }, 380 | "execution_count": 12, 381 | "outputs": [ 382 | { 383 | "output_type": "stream", 384 | "name": "stdout", 385 | "text": [ 386 | "Accuracy: 92.66%\n" 387 | ] 388 | } 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "source": [ 394 | "# confusion matrix" 395 | ], 396 | "metadata": { 397 | "id": "Ppxx1I6uaIqS" 398 | } 399 | }, 400 | { 401 | "cell_type": "code", 402 | "source": [ 403 | "confusion_matrix(y_test,y_pred, labels=['FAKE','REAL'])" 404 | ], 405 | "metadata": { 406 | "colab": { 407 | "base_uri": "https://localhost:8080/" 408 | }, 409 | "id": "xPvxpmgkaIDC", 410 | "outputId": "371823c2-7724-475b-9f81-7b1cc20b2eaf" 411 | }, 412 | "execution_count": 13, 413 | "outputs": [ 414 | { 415 | "output_type": "execute_result", 416 | "data": { 417 | "text/plain": [ 418 | "array([[589, 49],\n", 419 | " [ 44, 585]])" 420 | ] 421 | }, 422 | "metadata": {}, 423 | "execution_count": 13 424 | } 425 | ] 426 | } 427 | ] 428 | } -------------------------------------------------------------------------------- /Fake News Detection/fake_news_detection (1).py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Fake_News_Detection.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/drive/1KYptjQmV4HiFoERmKTuSTCjgl9fuf3IX 8 | """ 9 | 10 | !pip install numpy pandas sklearn 11 | 12 | """# Make necessary imports:""" 13 | 14 | import numpy as np 15 | import pandas as pd 16 | import itertools 17 | from sklearn.model_selection import train_test_split 18 | from sklearn.feature_extraction.text import TfidfVectorizer 19 | from sklearn.linear_model import PassiveAggressiveClassifier 20 | from sklearn.metrics import accuracy_score, confusion_matrix 21 | 22 | """# read the data into a DataFrame""" 23 | 24 | #Read the data 25 | df=pd.read_csv('news.csv') 26 | #Get shape and head 27 | df.shape 28 | df.head() 29 | 30 | """# labels from the DataFrame.""" 31 | 32 | #DataFlair - Get the labels 33 | labels=df.label 34 | labels.head() 35 | 36 | """# Split the dataset into training and testing sets.""" 37 | 38 | x_train,x_test,y_train,y_test=train_test_split(df['text'], labels, test_size=0.2, random_state=7) 39 | 40 | """# fit and transform the vectorizer on the train set, and transform the vectorizer on the test set.""" 41 | 42 | #DataFlair - Initialize a TfidfVectorizer 43 | tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.7) 44 | #DataFlair - Fit and transform train set, transform test set 45 | tfidf_train=tfidf_vectorizer.fit_transform(x_train) 46 | tfidf_test=tfidf_vectorizer.transform(x_test) 47 | 48 | """# initialize a PassiveAggressiveClassifier""" 49 | 50 | pac=PassiveAggressiveClassifier(max_iter=50) 51 | pac.fit(tfidf_train,y_train) 52 | #DataFlair - Predict on the test set and calculate accuracy 53 | y_pred=pac.predict(tfidf_test) 54 | score=accuracy_score(y_test,y_pred) 55 | print(f'Accuracy: {round(score*100,2)}%') 56 | 57 | """# confusion matrix""" 58 | 59 | confusion_matrix(y_test,y_pred, labels=['FAKE','REAL']) -------------------------------------------------------------------------------- /Fake News Detection/fake_news_detection.py: -------------------------------------------------------------------------------- 1 | pip install numpy pandas sklear -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data_Science_projects 2 | These are some of the ML project ideas along with the source code 3 | --------------------------------------------------------------------------------