├── Fake News Detection
├── Fake_News_Detection.ipynb
├── fake_news_detection (1).py
└── fake_news_detection.py
└── README.md
/Fake News Detection/Fake_News_Detection.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | },
12 | "language_info": {
13 | "name": "python"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "code",
19 | "source": [
20 | "!pip install numpy pandas sklearn"
21 | ],
22 | "metadata": {
23 | "colab": {
24 | "base_uri": "https://localhost:8080/"
25 | },
26 | "id": "DymWhyqXXXuT",
27 | "outputId": "8c796f75-c7b0-4397-b688-d9048a230b87"
28 | },
29 | "execution_count": 3,
30 | "outputs": [
31 | {
32 | "output_type": "stream",
33 | "name": "stdout",
34 | "text": [
35 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
36 | "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (1.21.6)\n",
37 | "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (1.3.5)\n",
38 | "Requirement already satisfied: sklearn in /usr/local/lib/python3.7/dist-packages (0.0.post1)\n",
39 | "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas) (2.8.2)\n",
40 | "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas) (2022.5)\n",
41 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n"
42 | ]
43 | }
44 | ]
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "source": [
49 | "# Make necessary imports:"
50 | ],
51 | "metadata": {
52 | "id": "xVW6a526Yk56"
53 | }
54 | },
55 | {
56 | "cell_type": "code",
57 | "source": [
58 | "import numpy as np\n",
59 | "import pandas as pd\n",
60 | "import itertools\n",
61 | "from sklearn.model_selection import train_test_split\n",
62 | "from sklearn.feature_extraction.text import TfidfVectorizer\n",
63 | "from sklearn.linear_model import PassiveAggressiveClassifier\n",
64 | "from sklearn.metrics import accuracy_score, confusion_matrix"
65 | ],
66 | "metadata": {
67 | "id": "De3DTqkEXpHD"
68 | },
69 | "execution_count": 2,
70 | "outputs": []
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "source": [
75 | "# read the data into a DataFrame"
76 | ],
77 | "metadata": {
78 | "id": "jIj1ySqzYg6a"
79 | }
80 | },
81 | {
82 | "cell_type": "code",
83 | "source": [
84 | "#Read the data\n",
85 | "df=pd.read_csv('news.csv')\n",
86 | "#Get shape and head\n",
87 | "df.shape\n",
88 | "df.head()"
89 | ],
90 | "metadata": {
91 | "colab": {
92 | "base_uri": "https://localhost:8080/",
93 | "height": 1288
94 | },
95 | "id": "3nFXzJVLYOtz",
96 | "outputId": "470cf152-96c4-4006-fa31-688f200f9019"
97 | },
98 | "execution_count": 8,
99 | "outputs": [
100 | {
101 | "output_type": "execute_result",
102 | "data": {
103 | "text/plain": [
104 | " Unnamed: 0 title \\\n",
105 | "0 8476 You Can Smell Hillary’s Fear \n",
106 | "1 10294 Watch The Exact Moment Paul Ryan Committed Pol... \n",
107 | "2 3608 Kerry to go to Paris in gesture of sympathy \n",
108 | "3 10142 Bernie supporters on Twitter erupt in anger ag... \n",
109 | "4 875 The Battle of New York: Why This Primary Matters \n",
110 | "\n",
111 | " text label \n",
112 | "0 Daniel Greenfield, a Shillman Journalism Fello... FAKE \n",
113 | "1 Google Pinterest Digg Linkedin Reddit Stumbleu... FAKE \n",
114 | "2 U.S. Secretary of State John F. Kerry said Mon... REAL \n",
115 | "3 — Kaydee King (@KaydeeKing) November 9, 2016 T... FAKE \n",
116 | "4 It's primary day in New York and front-runners... REAL "
117 | ],
118 | "text/html": [
119 | "\n",
120 | "
\n",
121 | "
\n",
122 | "
\n",
123 | "\n",
136 | "
\n",
137 | " \n",
138 | " \n",
139 | " | \n",
140 | " Unnamed: 0 | \n",
141 | " title | \n",
142 | " text | \n",
143 | " label | \n",
144 | "
\n",
145 | " \n",
146 | " \n",
147 | " \n",
148 | " 0 | \n",
149 | " 8476 | \n",
150 | " You Can Smell Hillary’s Fear | \n",
151 | " Daniel Greenfield, a Shillman Journalism Fello... | \n",
152 | " FAKE | \n",
153 | "
\n",
154 | " \n",
155 | " 1 | \n",
156 | " 10294 | \n",
157 | " Watch The Exact Moment Paul Ryan Committed Pol... | \n",
158 | " Google Pinterest Digg Linkedin Reddit Stumbleu... | \n",
159 | " FAKE | \n",
160 | "
\n",
161 | " \n",
162 | " 2 | \n",
163 | " 3608 | \n",
164 | " Kerry to go to Paris in gesture of sympathy | \n",
165 | " U.S. Secretary of State John F. Kerry said Mon... | \n",
166 | " REAL | \n",
167 | "
\n",
168 | " \n",
169 | " 3 | \n",
170 | " 10142 | \n",
171 | " Bernie supporters on Twitter erupt in anger ag... | \n",
172 | " — Kaydee King (@KaydeeKing) November 9, 2016 T... | \n",
173 | " FAKE | \n",
174 | "
\n",
175 | " \n",
176 | " 4 | \n",
177 | " 875 | \n",
178 | " The Battle of New York: Why This Primary Matters | \n",
179 | " It's primary day in New York and front-runners... | \n",
180 | " REAL | \n",
181 | "
\n",
182 | " \n",
183 | "
\n",
184 | "
\n",
185 | "
\n",
195 | " \n",
196 | " \n",
233 | "\n",
234 | " \n",
258 | "
\n",
259 | "
\n",
260 | " "
261 | ]
262 | },
263 | "metadata": {},
264 | "execution_count": 8
265 | }
266 | ]
267 | },
268 | {
269 | "cell_type": "markdown",
270 | "source": [
271 | "# labels from the DataFrame."
272 | ],
273 | "metadata": {
274 | "id": "VvpHD5dEYrST"
275 | }
276 | },
277 | {
278 | "cell_type": "code",
279 | "source": [
280 | "#DataFlair - Get the labels\n",
281 | "labels=df.label\n",
282 | "labels.head()"
283 | ],
284 | "metadata": {
285 | "colab": {
286 | "base_uri": "https://localhost:8080/"
287 | },
288 | "id": "fdtp93K0Yce7",
289 | "outputId": "a6599e43-358c-41d8-b4a9-2945e01c80f2"
290 | },
291 | "execution_count": 9,
292 | "outputs": [
293 | {
294 | "output_type": "execute_result",
295 | "data": {
296 | "text/plain": [
297 | "0 FAKE\n",
298 | "1 FAKE\n",
299 | "2 REAL\n",
300 | "3 FAKE\n",
301 | "4 REAL\n",
302 | "Name: label, dtype: object"
303 | ]
304 | },
305 | "metadata": {},
306 | "execution_count": 9
307 | }
308 | ]
309 | },
310 | {
311 | "cell_type": "markdown",
312 | "source": [
313 | "# Split the dataset into training and testing sets."
314 | ],
315 | "metadata": {
316 | "id": "1t8io4vQZEoS"
317 | }
318 | },
319 | {
320 | "cell_type": "code",
321 | "source": [
322 | "x_train,x_test,y_train,y_test=train_test_split(df['text'], labels, test_size=0.2, random_state=7)"
323 | ],
324 | "metadata": {
325 | "id": "oBeQY99WZqwC"
326 | },
327 | "execution_count": 10,
328 | "outputs": []
329 | },
330 | {
331 | "cell_type": "markdown",
332 | "source": [
333 | "# fit and transform the vectorizer on the train set, and transform the vectorizer on the test set."
334 | ],
335 | "metadata": {
336 | "id": "SG7sTmaBZIMy"
337 | }
338 | },
339 | {
340 | "cell_type": "code",
341 | "source": [
342 | "#DataFlair - Initialize a TfidfVectorizer\n",
343 | "tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.7)\n",
344 | "#DataFlair - Fit and transform train set, transform test set\n",
345 | "tfidf_train=tfidf_vectorizer.fit_transform(x_train) \n",
346 | "tfidf_test=tfidf_vectorizer.transform(x_test)"
347 | ],
348 | "metadata": {
349 | "id": "5RAQbQ_BZ60q"
350 | },
351 | "execution_count": 11,
352 | "outputs": []
353 | },
354 | {
355 | "cell_type": "markdown",
356 | "source": [
357 | "# initialize a PassiveAggressiveClassifier"
358 | ],
359 | "metadata": {
360 | "id": "wVSEDdquZNyJ"
361 | }
362 | },
363 | {
364 | "cell_type": "code",
365 | "source": [
366 | "pac=PassiveAggressiveClassifier(max_iter=50)\n",
367 | "pac.fit(tfidf_train,y_train)\n",
368 | "#DataFlair - Predict on the test set and calculate accuracy\n",
369 | "y_pred=pac.predict(tfidf_test)\n",
370 | "score=accuracy_score(y_test,y_pred)\n",
371 | "print(f'Accuracy: {round(score*100,2)}%')"
372 | ],
373 | "metadata": {
374 | "colab": {
375 | "base_uri": "https://localhost:8080/"
376 | },
377 | "id": "PYOeAHYIaCRK",
378 | "outputId": "9aecd13c-b483-4318-a4f6-8858db36bd5b"
379 | },
380 | "execution_count": 12,
381 | "outputs": [
382 | {
383 | "output_type": "stream",
384 | "name": "stdout",
385 | "text": [
386 | "Accuracy: 92.66%\n"
387 | ]
388 | }
389 | ]
390 | },
391 | {
392 | "cell_type": "markdown",
393 | "source": [
394 | "# confusion matrix"
395 | ],
396 | "metadata": {
397 | "id": "Ppxx1I6uaIqS"
398 | }
399 | },
400 | {
401 | "cell_type": "code",
402 | "source": [
403 | "confusion_matrix(y_test,y_pred, labels=['FAKE','REAL'])"
404 | ],
405 | "metadata": {
406 | "colab": {
407 | "base_uri": "https://localhost:8080/"
408 | },
409 | "id": "xPvxpmgkaIDC",
410 | "outputId": "371823c2-7724-475b-9f81-7b1cc20b2eaf"
411 | },
412 | "execution_count": 13,
413 | "outputs": [
414 | {
415 | "output_type": "execute_result",
416 | "data": {
417 | "text/plain": [
418 | "array([[589, 49],\n",
419 | " [ 44, 585]])"
420 | ]
421 | },
422 | "metadata": {},
423 | "execution_count": 13
424 | }
425 | ]
426 | }
427 | ]
428 | }
--------------------------------------------------------------------------------
/Fake News Detection/fake_news_detection (1).py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Fake_News_Detection.ipynb
3 |
4 | Automatically generated by Colaboratory.
5 |
6 | Original file is located at
7 | https://colab.research.google.com/drive/1KYptjQmV4HiFoERmKTuSTCjgl9fuf3IX
8 | """
9 |
10 | !pip install numpy pandas sklearn
11 |
12 | """# Make necessary imports:"""
13 |
14 | import numpy as np
15 | import pandas as pd
16 | import itertools
17 | from sklearn.model_selection import train_test_split
18 | from sklearn.feature_extraction.text import TfidfVectorizer
19 | from sklearn.linear_model import PassiveAggressiveClassifier
20 | from sklearn.metrics import accuracy_score, confusion_matrix
21 |
22 | """# read the data into a DataFrame"""
23 |
24 | #Read the data
25 | df=pd.read_csv('news.csv')
26 | #Get shape and head
27 | df.shape
28 | df.head()
29 |
30 | """# labels from the DataFrame."""
31 |
32 | #DataFlair - Get the labels
33 | labels=df.label
34 | labels.head()
35 |
36 | """# Split the dataset into training and testing sets."""
37 |
38 | x_train,x_test,y_train,y_test=train_test_split(df['text'], labels, test_size=0.2, random_state=7)
39 |
40 | """# fit and transform the vectorizer on the train set, and transform the vectorizer on the test set."""
41 |
42 | #DataFlair - Initialize a TfidfVectorizer
43 | tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.7)
44 | #DataFlair - Fit and transform train set, transform test set
45 | tfidf_train=tfidf_vectorizer.fit_transform(x_train)
46 | tfidf_test=tfidf_vectorizer.transform(x_test)
47 |
48 | """# initialize a PassiveAggressiveClassifier"""
49 |
50 | pac=PassiveAggressiveClassifier(max_iter=50)
51 | pac.fit(tfidf_train,y_train)
52 | #DataFlair - Predict on the test set and calculate accuracy
53 | y_pred=pac.predict(tfidf_test)
54 | score=accuracy_score(y_test,y_pred)
55 | print(f'Accuracy: {round(score*100,2)}%')
56 |
57 | """# confusion matrix"""
58 |
59 | confusion_matrix(y_test,y_pred, labels=['FAKE','REAL'])
--------------------------------------------------------------------------------
/Fake News Detection/fake_news_detection.py:
--------------------------------------------------------------------------------
1 | pip install numpy pandas sklear
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Data_Science_projects
2 | These are some of the ML project ideas along with the source code
3 |
--------------------------------------------------------------------------------