├── README.md ├── Text Classification.pdf └── Text Classification.py /README.md: -------------------------------------------------------------------------------- 1 | # Text Classification 2 | 3 | We have implemented Text Classififcation in *python* using Naive Bayes Classifier Approach. 4 | 5 | We have used 20 News group dataset to train the model. 6 | 7 | -------------------------------------------------------------------------------- /Text Classification.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codewrestling/TextClassification/32e3f7c1cd9e316f71c94efe38c5606676093981/Text Classification.pdf -------------------------------------------------------------------------------- /Text Classification.py: -------------------------------------------------------------------------------- 1 | import sklearn.datasets as skd 2 | 3 | categories = ['alt.atheism', 'soc.religion.christian','comp.graphics', 'sci.med'] 4 | news_train = skd.load_files('/home/ayush/Desktop/Fetch20newsgroup/train', categories= categories, encoding= 'ISO-8859-1') 5 | news_test = skd.load_files('/home/ayush/Desktop/Fetch20newsgroup/test/',categories= categories, encoding= 'ISO-8859-1') 6 | 7 | from sklearn.pipeline import Pipeline 8 | from sklearn.feature_extraction.text import TfidfVectorizer 9 | from sklearn.naive_bayes import MultinomialNB 10 | 11 | text_clf = Pipeline([('vect', TfidfVectorizer()), 12 | ('clf', MultinomialNB()) ]) 13 | 14 | # train the model 15 | text_clf.fit(news_train.data, news_train.target) 16 | # Predict the test cases 17 | predicted = text_clf.predict(news_test.data) 18 | 19 | from sklearn import metrics 20 | from sklearn.metrics import accuracy_score 21 | import numpy as np 22 | 23 | print('Accuracy achieved is ' + str(np.mean(predicted == news_test.target))) 24 | print(metrics.classification_report(news_test.target, predicted, target_names=news_test.target_names)), 25 | metrics.confusion_matrix(news_test.target, predicted) 26 | 27 | --------------------------------------------------------------------------------