├── PICKLE FILES ├── pickled_algos_documents.pickle ├── pickled_algos_MNB_classifier5k.pickle ├── pickled_algos_SGDC_classifier5k.pickle ├── pickled_algos_word_features5k.pickle ├── pickled_algos_LinearSVC_classifier5k.pickle ├── pickled_algos_originalnaivebayes5k.pickle ├── pickled_algos_BernoulliNB_classifier5k.pickle └── pickled_algos_LogisticRegression_classifier5k.pickle ├── README.md ├── AUDIO SCRIPT.py ├── INSTALLATION GUIDE ├── sentiment.py └── classifier.py /PICKLE FILES/pickled_algos_documents.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_documents.pickle -------------------------------------------------------------------------------- /PICKLE FILES/pickled_algos_MNB_classifier5k.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_MNB_classifier5k.pickle -------------------------------------------------------------------------------- /PICKLE FILES/pickled_algos_SGDC_classifier5k.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_SGDC_classifier5k.pickle -------------------------------------------------------------------------------- /PICKLE FILES/pickled_algos_word_features5k.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_word_features5k.pickle -------------------------------------------------------------------------------- /PICKLE FILES/pickled_algos_LinearSVC_classifier5k.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_LinearSVC_classifier5k.pickle -------------------------------------------------------------------------------- /PICKLE FILES/pickled_algos_originalnaivebayes5k.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_originalnaivebayes5k.pickle -------------------------------------------------------------------------------- /PICKLE FILES/pickled_algos_BernoulliNB_classifier5k.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_BernoulliNB_classifier5k.pickle -------------------------------------------------------------------------------- /PICKLE FILES/pickled_algos_LogisticRegression_classifier5k.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_LogisticRegression_classifier5k.pickle -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # -sentiment-analysis-using-speech-recognition 2 | SENTIMENT ANALYSIS WILL BE DONE USING SPEECH RECOGNITION 3 | 4 | HELLO EVERYONE:-) 5 | IN THIS PROJECT WE HAVE CREATED A SENTIMENT CLASSIFIER WITH HELP OF PYTHON.HERE WE HAVE USED AUDIO THROUGH MICROPHONE AS AN INPUT. 6 | 7 | WE CAN USE THIS SOFTWARE IN REAL TIME ANALYSIS OF ANYTHING(PRODUCTS,THOUGHTS...ANYTHING). 8 | -------------------------------------------------------------------------------- /AUDIO SCRIPT.py: -------------------------------------------------------------------------------- 1 | #DEPENDENCIES 2 | import speech_recognition as sr 3 | import sentiment as s 4 | 5 | #RECORD SOUND 6 | r = sr.Recognizer() 7 | with sr.Microphone() as source: 8 | print("Say something!") 9 | audio = r.listen(source) 10 | #RECOGNISE AUDIO 11 | text = r.recognize_google(audio) 12 | print(text) 13 | try: 14 | #PRINT THE SENTIMENT AS WELL AS CONFIDENCE 15 | print(s.sentiment(text)) 16 | except sr.UnknownValueError: 17 | print("Google Speech Recognition could not understand audio") 18 | except sr.RequestError as e: 19 | print("Could not request results from Google Speech Recognition service; {0}".format(e)) -------------------------------------------------------------------------------- /INSTALLATION GUIDE: -------------------------------------------------------------------------------- 1 | HELLO EVERYONE TO USE THIS SOFTWARE YOU HAVE TWO CHOICES EITHER USE IT WITH HELP OF TRAINED DATASET THAT I HAVE PROVIDED 2 | OR USE IT TO TRAIN ON YOUR OWN DATASET. 3 | 4 | DEPENDENCIES:- 5 | NLTK 6 | Sklearn 7 | speech_recognition 8 | pyaudio 9 | statistics 10 | pickle 11 | 12 | USING THE PRETRAINED CLASSIFIER:- 13 | 1. COPY THE PICKLE FILES AND THE SENTIMENT SCRIPT AS WELL AS THE INTERFACE SCRIPT IN YOUR WORKING DIRECTORY. 14 | 2. THEN CHANGE THE PATH OF PICKLE FILES IN SENTIMENT SCRIPT. 15 | 3. THROUGH THE Audio SCRIPT YOU CAN USE THE PROGRAM. 16 | 17 | 18 | USING YOUR OWN DATASET:- 19 | 1.COPY THE THREE SCRIPTS. 20 | 2.OPEN THE CLASSIFIER SCRIPT AND THEN PLACE YOUR DATASET PATH. 21 | 3.THROUGH audio SCRIPT YOU CAN RUN THE PROGRAM. 22 | 23 | -------------------------------------------------------------------------------- /sentiment.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from nltk.classify import ClassifierI 3 | from statistics import mode 4 | from nltk.tokenize import word_tokenize 5 | 6 | 7 | class VoteClassifier(ClassifierI): 8 | def __init__(self, *classifiers): 9 | self._classifiers = classifiers 10 | 11 | def classify(self, features): 12 | votes = [] 13 | for c in self._classifiers: 14 | v = c.classify(features) 15 | votes.append(v) 16 | return mode(votes) 17 | 18 | def confidence(self, features): 19 | votes = [] 20 | for c in self._classifiers: 21 | v = c.classify(features) 22 | votes.append(v) 23 | 24 | choice_votes = votes.count(mode(votes)) 25 | conf = choice_votes / len(votes) 26 | return conf 27 | 28 | #READS THE PICKLED WORD FEATURES AS WELL AS SAVED CLASSIFIERS 29 | 30 | documents_f = open("PICKLE FILES/pickled_algos_documents.pickle", "rb") 31 | documents = pickle.load(documents_f) 32 | documents_f.close() 33 | 34 | word_features5k_f = open("PICKLE FILES/pickled_algos_word_features5k.pickle", "rb") 35 | word_features = pickle.load(word_features5k_f) 36 | word_features5k_f.close() 37 | 38 | 39 | def find_features(document): 40 | words = word_tokenize(document) 41 | features = {} 42 | for w in word_features: 43 | features[w] = (w in words) 44 | 45 | return features 46 | 47 | open_file = open("PICKLE FILES/pickled_algos_originalnaivebayes5k.pickle", "rb") 48 | classifier = pickle.load(open_file) 49 | open_file.close() 50 | 51 | 52 | open_file = open("PICKLE FILES/pickled_algos_MNB_classifier5k.pickle", "rb") 53 | MNB_classifier = pickle.load(open_file) 54 | open_file.close() 55 | 56 | 57 | 58 | open_file = open("PICKLE FILES/pickled_algos_BernoulliNB_classifier5k.pickle", "rb") 59 | BernoulliNB_classifier = pickle.load(open_file) 60 | open_file.close() 61 | 62 | 63 | open_file = open("PICKLE FILES/pickled_algos_LogisticRegression_classifier5k.pickle", "rb") 64 | LogisticRegression_classifier = pickle.load(open_file) 65 | open_file.close() 66 | 67 | 68 | open_file = open("PICKLE FILES/pickled_algos_LinearSVC_classifier5k.pickle", "rb") 69 | LinearSVC_classifier = pickle.load(open_file) 70 | open_file.close() 71 | 72 | 73 | open_file = open("PICKLE FILES/pickled_algos_SGDC_classifier5k.pickle", "rb") 74 | SGDC_classifier = pickle.load(open_file) 75 | open_file.close() 76 | 77 | 78 | voted_classifier = VoteClassifier( 79 | classifier, 80 | LinearSVC_classifier, 81 | MNB_classifier, 82 | BernoulliNB_classifier, 83 | LogisticRegression_classifier) 84 | 85 | 86 | 87 | def sentiment(text): 88 | feats = find_features(text) 89 | return voted_classifier.classify(feats),voted_classifier.confidence(feats) 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /classifier.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | import random 3 | from nltk.classify.scikitlearn import SklearnClassifier 4 | import pickle 5 | from sklearn.naive_bayes import MultinomialNB, BernoulliNB 6 | from sklearn.linear_model import LogisticRegression, SGDClassifier 7 | from sklearn.svm import SVC, LinearSVC, NuSVC 8 | from nltk.classify import ClassifierI 9 | from statistics import mode 10 | from nltk.tokenize import word_tokenize 11 | 12 | 13 | class VoteClassifier(ClassifierI): 14 | def __init__(self, *classifiers): 15 | self._classifiers = classifiers 16 | 17 | def classify(self, features): 18 | votes = [] 19 | for c in self._classifiers: 20 | v = c.classify(features) 21 | votes.append(v) 22 | return mode(votes) 23 | 24 | def confidence(self, features): 25 | votes = [] 26 | for c in self._classifiers: 27 | v = c.classify(features) 28 | votes.append(v) 29 | 30 | choice_votes = votes.count(mode(votes)) 31 | conf = choice_votes / len(votes) 32 | return conf 33 | 34 | 35 | short_pos = open("DATASET USED/pos.txt", "r").read() 36 | short_neg = open("DATASET USED/neg.txt", "r").read() 37 | 38 | # move this up here 39 | all_words = [] 40 | documents = [] 41 | 42 | # j is adject, r is adverb, and v is verb 43 | # allowed_word_types = ["J","R","V"] 44 | allowed_word_types = ["J"] 45 | 46 | for p in short_pos.split('\n'): 47 | documents.append((p, "pos")) 48 | words = word_tokenize(p) 49 | pos = nltk.pos_tag(words) 50 | for w in pos: 51 | if w[1][0] in allowed_word_types: 52 | all_words.append(w[0].lower()) 53 | 54 | for p in short_neg.split('\n'): 55 | documents.append((p, "neg")) 56 | words = word_tokenize(p) 57 | pos = nltk.pos_tag(words) 58 | for w in pos: 59 | if w[1][0] in allowed_word_types: 60 | all_words.append(w[0].lower()) 61 | 62 | save_documents = open("PICKLE FILES/pickled_algos_documents.pickle", "wb") 63 | pickle.dump(documents, save_documents) 64 | save_documents.close() 65 | 66 | all_words = nltk.FreqDist(all_words) 67 | 68 | word_features = list(all_words.keys())[:5000] 69 | 70 | save_word_features = open("PICKLE FILES/pickled_algos_word_features5k.pickle", "wb") 71 | pickle.dump(word_features, save_word_features) 72 | save_word_features.close() 73 | 74 | 75 | def find_features(document): 76 | words = word_tokenize(document) 77 | features = {} 78 | for w in word_features: 79 | features[w] = (w in words) 80 | 81 | return features 82 | 83 | 84 | featuresets = [(find_features(rev), category) for (rev, category) in documents] 85 | 86 | random.shuffle(featuresets) 87 | print(len(featuresets)) 88 | 89 | testing_set = featuresets[10000:] 90 | training_set = featuresets[:10000] 91 | 92 | classifier = nltk.NaiveBayesClassifier.train(training_set) 93 | print("Original Naive Bayes Algo accuracy percent:", (nltk.classify.accuracy(classifier, testing_set)) * 100) 94 | classifier.show_most_informative_features(15) 95 | 96 | ############### 97 | save_classifier = open("PICKLE FILES/pickled_algos_originalnaivebayes5k.pickle", "wb") 98 | pickle.dump(classifier, save_classifier) 99 | save_classifier.close() 100 | 101 | MNB_classifier = SklearnClassifier(MultinomialNB()) 102 | MNB_classifier.train(training_set) 103 | print("MNB_classifier accuracy percent:", (nltk.classify.accuracy(MNB_classifier, testing_set)) * 100) 104 | 105 | save_classifier = open("PICKLE FILES/pickled_algos_MNB_classifier5k.pickle", "wb") 106 | pickle.dump(MNB_classifier, save_classifier) 107 | save_classifier.close() 108 | 109 | BernoulliNB_classifier = SklearnClassifier(BernoulliNB()) 110 | BernoulliNB_classifier.train(training_set) 111 | print("BernoulliNB_classifier accuracy percent:", (nltk.classify.accuracy(BernoulliNB_classifier, testing_set)) * 100) 112 | 113 | save_classifier = open("PICKLE FILES/pickled_algos_BernoulliNB_classifier5k.pickle", "wb") 114 | pickle.dump(BernoulliNB_classifier, save_classifier) 115 | save_classifier.close() 116 | 117 | LogisticRegression_classifier = SklearnClassifier(LogisticRegression()) 118 | LogisticRegression_classifier.train(training_set) 119 | print("LogisticRegression_classifier accuracy percent:", 120 | (nltk.classify.accuracy(LogisticRegression_classifier, testing_set)) * 100) 121 | 122 | save_classifier = open("PICKLE FILES/pickled_algos_LogisticRegression_classifier5k.pickle", "wb") 123 | pickle.dump(LogisticRegression_classifier, save_classifier) 124 | save_classifier.close() 125 | 126 | LinearSVC_classifier = SklearnClassifier(LinearSVC()) 127 | LinearSVC_classifier.train(training_set) 128 | print("LinearSVC_classifier accuracy percent:", (nltk.classify.accuracy(LinearSVC_classifier, testing_set)) * 100) 129 | 130 | save_classifier = open("PICKLE FILES/pickled_algos_LinearSVC_classifier5k.pickle", "wb") 131 | pickle.dump(LinearSVC_classifier, save_classifier) 132 | save_classifier.close() 133 | 134 | ##NuSVC_classifier = SklearnClassifier(NuSVC()) 135 | ##NuSVC_classifier.train(training_set) 136 | ##print("NuSVC_classifier accuracy percent:", (nltk.classify.accuracy(NuSVC_classifier, testing_set))*100) 137 | 138 | 139 | SGDC_classifier = SklearnClassifier(SGDClassifier()) 140 | SGDC_classifier.train(training_set) 141 | print("SGDClassifier accuracy percent:", nltk.classify.accuracy(SGDC_classifier, testing_set) * 100) 142 | 143 | save_classifier = open("PICKLE FILES/pickled_algos_SGDC_classifier5k.pickle", "wb") 144 | pickle.dump(SGDC_classifier, save_classifier) 145 | save_classifier.close() 146 | --------------------------------------------------------------------------------