├── PICKLE FILES
    ├── pickled_algos_documents.pickle
    ├── pickled_algos_MNB_classifier5k.pickle
    ├── pickled_algos_SGDC_classifier5k.pickle
    ├── pickled_algos_word_features5k.pickle
    ├── pickled_algos_LinearSVC_classifier5k.pickle
    ├── pickled_algos_originalnaivebayes5k.pickle
    ├── pickled_algos_BernoulliNB_classifier5k.pickle
    └── pickled_algos_LogisticRegression_classifier5k.pickle
├── README.md
├── AUDIO SCRIPT.py
├── INSTALLATION GUIDE
├── sentiment.py
└── classifier.py


/PICKLE FILES/pickled_algos_documents.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_documents.pickle


--------------------------------------------------------------------------------
/PICKLE FILES/pickled_algos_MNB_classifier5k.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_MNB_classifier5k.pickle


--------------------------------------------------------------------------------
/PICKLE FILES/pickled_algos_SGDC_classifier5k.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_SGDC_classifier5k.pickle


--------------------------------------------------------------------------------
/PICKLE FILES/pickled_algos_word_features5k.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_word_features5k.pickle


--------------------------------------------------------------------------------
/PICKLE FILES/pickled_algos_LinearSVC_classifier5k.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_LinearSVC_classifier5k.pickle


--------------------------------------------------------------------------------
/PICKLE FILES/pickled_algos_originalnaivebayes5k.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_originalnaivebayes5k.pickle


--------------------------------------------------------------------------------
/PICKLE FILES/pickled_algos_BernoulliNB_classifier5k.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_BernoulliNB_classifier5k.pickle


--------------------------------------------------------------------------------
/PICKLE FILES/pickled_algos_LogisticRegression_classifier5k.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chestnut3108/SENTIMENT-ANALYSIS-ON-AUDIO/HEAD/PICKLE FILES/pickled_algos_LogisticRegression_classifier5k.pickle


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # -sentiment-analysis-using-speech-recognition
2 | SENTIMENT ANALYSIS WILL BE DONE USING SPEECH RECOGNITION
3 | 
4 | HELLO EVERYONE:-)
5 |  IN THIS PROJECT WE HAVE CREATED A SENTIMENT CLASSIFIER WITH HELP OF PYTHON.HERE WE HAVE USED AUDIO THROUGH MICROPHONE AS AN INPUT.
6 | 
7 | WE CAN USE THIS SOFTWARE IN REAL TIME ANALYSIS OF ANYTHING(PRODUCTS,THOUGHTS...ANYTHING).
8 | 


--------------------------------------------------------------------------------
/AUDIO SCRIPT.py:
--------------------------------------------------------------------------------
 1 | #DEPENDENCIES
 2 | import speech_recognition as sr
 3 | import sentiment as s
 4 | 
 5 | #RECORD SOUND
 6 | r = sr.Recognizer()
 7 | with sr.Microphone() as source:
 8 |     print("Say something!")
 9 |     audio = r.listen(source)
10 |     #RECOGNISE AUDIO
11 |     text = r.recognize_google(audio)
12 |     print(text)
13 | try:
14 |     #PRINT THE SENTIMENT AS WELL AS CONFIDENCE
15 |     print(s.sentiment(text))
16 | except sr.UnknownValueError:
17 |     print("Google Speech Recognition could not understand audio")
18 | except sr.RequestError as e:
19 |     print("Could not request results from Google Speech Recognition service; {0}".format(e))


--------------------------------------------------------------------------------
/INSTALLATION GUIDE:
--------------------------------------------------------------------------------
 1 | HELLO EVERYONE TO USE THIS SOFTWARE YOU HAVE TWO CHOICES EITHER USE IT WITH HELP OF TRAINED DATASET THAT I HAVE PROVIDED 
 2 | OR USE IT TO TRAIN ON YOUR OWN DATASET.
 3 | 
 4 | DEPENDENCIES:-
 5 |   NLTK
 6 |   Sklearn
 7 |   speech_recognition
 8 |   pyaudio
 9 |   statistics
10 |   pickle
11 | 
12 | USING THE PRETRAINED CLASSIFIER:-
13 |     1. COPY THE PICKLE FILES AND THE SENTIMENT SCRIPT AS WELL AS THE INTERFACE SCRIPT IN YOUR WORKING DIRECTORY.
14 |     2. THEN CHANGE THE PATH OF PICKLE FILES IN SENTIMENT SCRIPT.
15 |     3. THROUGH THE Audio SCRIPT YOU CAN USE THE PROGRAM.
16 |     
17 | 
18 | USING YOUR OWN DATASET:-
19 |     1.COPY THE THREE SCRIPTS.
20 |     2.OPEN THE CLASSIFIER SCRIPT AND THEN PLACE YOUR DATASET PATH.
21 |     3.THROUGH audio SCRIPT YOU CAN RUN THE PROGRAM.
22 | 
23 | 


--------------------------------------------------------------------------------
/sentiment.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | from nltk.classify import ClassifierI
 3 | from statistics import mode
 4 | from nltk.tokenize import word_tokenize
 5 | 
 6 | 
 7 | class VoteClassifier(ClassifierI):
 8 |     def __init__(self, *classifiers):
 9 |         self._classifiers = classifiers
10 | 
11 |     def classify(self, features):
12 |         votes = []
13 |         for c in self._classifiers:
14 |             v = c.classify(features)
15 |             votes.append(v)
16 |         return mode(votes)
17 | 
18 |     def confidence(self, features):
19 |         votes = []
20 |         for c in self._classifiers:
21 |             v = c.classify(features)
22 |             votes.append(v)
23 | 
24 |         choice_votes = votes.count(mode(votes))
25 |         conf = choice_votes / len(votes)
26 |         return conf
27 | 
28 | #READS THE PICKLED WORD FEATURES AS WELL AS SAVED CLASSIFIERS
29 | 
30 | documents_f = open("PICKLE FILES/pickled_algos_documents.pickle", "rb")
31 | documents = pickle.load(documents_f)
32 | documents_f.close()
33 | 
34 | word_features5k_f = open("PICKLE FILES/pickled_algos_word_features5k.pickle", "rb")
35 | word_features = pickle.load(word_features5k_f)
36 | word_features5k_f.close()
37 | 
38 | 
39 | def find_features(document):
40 |     words = word_tokenize(document)
41 |     features = {}
42 |     for w in word_features:
43 |         features[w] = (w in words)
44 | 
45 |     return features
46 | 
47 | open_file = open("PICKLE FILES/pickled_algos_originalnaivebayes5k.pickle", "rb")
48 | classifier = pickle.load(open_file)
49 | open_file.close()
50 | 
51 | 
52 | open_file = open("PICKLE FILES/pickled_algos_MNB_classifier5k.pickle", "rb")
53 | MNB_classifier = pickle.load(open_file)
54 | open_file.close()
55 | 
56 | 
57 | 
58 | open_file = open("PICKLE FILES/pickled_algos_BernoulliNB_classifier5k.pickle", "rb")
59 | BernoulliNB_classifier = pickle.load(open_file)
60 | open_file.close()
61 | 
62 | 
63 | open_file = open("PICKLE FILES/pickled_algos_LogisticRegression_classifier5k.pickle", "rb")
64 | LogisticRegression_classifier = pickle.load(open_file)
65 | open_file.close()
66 | 
67 | 
68 | open_file = open("PICKLE FILES/pickled_algos_LinearSVC_classifier5k.pickle", "rb")
69 | LinearSVC_classifier = pickle.load(open_file)
70 | open_file.close()
71 | 
72 | 
73 | open_file = open("PICKLE FILES/pickled_algos_SGDC_classifier5k.pickle", "rb")
74 | SGDC_classifier = pickle.load(open_file)
75 | open_file.close()
76 | 
77 | 
78 | voted_classifier = VoteClassifier(
79 |                                   classifier,
80 |                                   LinearSVC_classifier,
81 |                                   MNB_classifier,
82 |                                   BernoulliNB_classifier,
83 |                                   LogisticRegression_classifier)
84 | 
85 | 
86 | 
87 | def sentiment(text):
88 |     feats = find_features(text)
89 |     return voted_classifier.classify(feats),voted_classifier.confidence(feats)
90 | 
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/classifier.py:
--------------------------------------------------------------------------------
  1 | import nltk
  2 | import random
  3 | from nltk.classify.scikitlearn import SklearnClassifier
  4 | import pickle
  5 | from sklearn.naive_bayes import MultinomialNB, BernoulliNB
  6 | from sklearn.linear_model import LogisticRegression, SGDClassifier
  7 | from sklearn.svm import SVC, LinearSVC, NuSVC
  8 | from nltk.classify import ClassifierI
  9 | from statistics import mode
 10 | from nltk.tokenize import word_tokenize
 11 | 
 12 | 
 13 | class VoteClassifier(ClassifierI):
 14 |     def __init__(self, *classifiers):
 15 |         self._classifiers = classifiers
 16 | 
 17 |     def classify(self, features):
 18 |         votes = []
 19 |         for c in self._classifiers:
 20 |             v = c.classify(features)
 21 |             votes.append(v)
 22 |         return mode(votes)
 23 | 
 24 |     def confidence(self, features):
 25 |         votes = []
 26 |         for c in self._classifiers:
 27 |             v = c.classify(features)
 28 |             votes.append(v)
 29 | 
 30 |         choice_votes = votes.count(mode(votes))
 31 |         conf = choice_votes / len(votes)
 32 |         return conf
 33 | 
 34 | 
 35 | short_pos = open("DATASET USED/pos.txt", "r").read()
 36 | short_neg = open("DATASET USED/neg.txt", "r").read()
 37 | 
 38 | # move this up here
 39 | all_words = []
 40 | documents = []
 41 | 
 42 | #  j is adject, r is adverb, and v is verb
 43 | # allowed_word_types = ["J","R","V"]
 44 | allowed_word_types = ["J"]
 45 | 
 46 | for p in short_pos.split('\n'):
 47 |     documents.append((p, "pos"))
 48 |     words = word_tokenize(p)
 49 |     pos = nltk.pos_tag(words)
 50 |     for w in pos:
 51 |         if w[1][0] in allowed_word_types:
 52 |             all_words.append(w[0].lower())
 53 | 
 54 | for p in short_neg.split('\n'):
 55 |     documents.append((p, "neg"))
 56 |     words = word_tokenize(p)
 57 |     pos = nltk.pos_tag(words)
 58 |     for w in pos:
 59 |         if w[1][0] in allowed_word_types:
 60 |             all_words.append(w[0].lower())
 61 | 
 62 | save_documents = open("PICKLE FILES/pickled_algos_documents.pickle", "wb")
 63 | pickle.dump(documents, save_documents)
 64 | save_documents.close()
 65 | 
 66 | all_words = nltk.FreqDist(all_words)
 67 | 
 68 | word_features = list(all_words.keys())[:5000]
 69 | 
 70 | save_word_features = open("PICKLE FILES/pickled_algos_word_features5k.pickle", "wb")
 71 | pickle.dump(word_features, save_word_features)
 72 | save_word_features.close()
 73 | 
 74 | 
 75 | def find_features(document):
 76 |     words = word_tokenize(document)
 77 |     features = {}
 78 |     for w in word_features:
 79 |         features[w] = (w in words)
 80 | 
 81 |     return features
 82 | 
 83 | 
 84 | featuresets = [(find_features(rev), category) for (rev, category) in documents]
 85 | 
 86 | random.shuffle(featuresets)
 87 | print(len(featuresets))
 88 | 
 89 | testing_set = featuresets[10000:]
 90 | training_set = featuresets[:10000]
 91 | 
 92 | classifier = nltk.NaiveBayesClassifier.train(training_set)
 93 | print("Original Naive Bayes Algo accuracy percent:", (nltk.classify.accuracy(classifier, testing_set)) * 100)
 94 | classifier.show_most_informative_features(15)
 95 | 
 96 | ###############
 97 | save_classifier = open("PICKLE FILES/pickled_algos_originalnaivebayes5k.pickle", "wb")
 98 | pickle.dump(classifier, save_classifier)
 99 | save_classifier.close()
100 | 
101 | MNB_classifier = SklearnClassifier(MultinomialNB())
102 | MNB_classifier.train(training_set)
103 | print("MNB_classifier accuracy percent:", (nltk.classify.accuracy(MNB_classifier, testing_set)) * 100)
104 | 
105 | save_classifier = open("PICKLE FILES/pickled_algos_MNB_classifier5k.pickle", "wb")
106 | pickle.dump(MNB_classifier, save_classifier)
107 | save_classifier.close()
108 | 
109 | BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
110 | BernoulliNB_classifier.train(training_set)
111 | print("BernoulliNB_classifier accuracy percent:", (nltk.classify.accuracy(BernoulliNB_classifier, testing_set)) * 100)
112 | 
113 | save_classifier = open("PICKLE FILES/pickled_algos_BernoulliNB_classifier5k.pickle", "wb")
114 | pickle.dump(BernoulliNB_classifier, save_classifier)
115 | save_classifier.close()
116 | 
117 | LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
118 | LogisticRegression_classifier.train(training_set)
119 | print("LogisticRegression_classifier accuracy percent:",
120 |       (nltk.classify.accuracy(LogisticRegression_classifier, testing_set)) * 100)
121 | 
122 | save_classifier = open("PICKLE FILES/pickled_algos_LogisticRegression_classifier5k.pickle", "wb")
123 | pickle.dump(LogisticRegression_classifier, save_classifier)
124 | save_classifier.close()
125 | 
126 | LinearSVC_classifier = SklearnClassifier(LinearSVC())
127 | LinearSVC_classifier.train(training_set)
128 | print("LinearSVC_classifier accuracy percent:", (nltk.classify.accuracy(LinearSVC_classifier, testing_set)) * 100)
129 | 
130 | save_classifier = open("PICKLE FILES/pickled_algos_LinearSVC_classifier5k.pickle", "wb")
131 | pickle.dump(LinearSVC_classifier, save_classifier)
132 | save_classifier.close()
133 | 
134 | ##NuSVC_classifier = SklearnClassifier(NuSVC())
135 | ##NuSVC_classifier.train(training_set)
136 | ##print("NuSVC_classifier accuracy percent:", (nltk.classify.accuracy(NuSVC_classifier, testing_set))*100)
137 | 
138 | 
139 | SGDC_classifier = SklearnClassifier(SGDClassifier())
140 | SGDC_classifier.train(training_set)
141 | print("SGDClassifier accuracy percent:", nltk.classify.accuracy(SGDC_classifier, testing_set) * 100)
142 | 
143 | save_classifier = open("PICKLE FILES/pickled_algos_SGDC_classifier5k.pickle", "wb")
144 | pickle.dump(SGDC_classifier, save_classifier)
145 | save_classifier.close()
146 | 


--------------------------------------------------------------------------------