├── requirements.txt
├── demo
    └── demo.py
├── README.md
└── predict_stock.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | tweepy
4 | textblob
5 | requests
6 | pyyaml
7 | tensorflow
8 | keras
9 | 


--------------------------------------------------------------------------------
/demo/demo.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from keras.models import Sequential
 3 | from keras.layers import Dense
 4 | 
 5 | 
 6 | # Teach "Table 3" to the network 
 7 | trainX = np.array([1, 2 ,3 ,4 ,  5 , 6 , 7,  8,  9, 10])
 8 | trainY = np.array([3, 6, 9, 12, 15, 18, 21, 24, 27, 30])
 9 | 
10 | model = Sequential()
11 | model.add(Dense(8, input_dim=1, activation='relu'))
12 | model.add(Dense(1))
13 | model.compile(loss='mean_squared_error', optimizer='adam')
14 | model.fit(trainX, trainY, nb_epoch=1200, batch_size=2, verbose=2)
15 | 
16 | 
17 | # Predict  3x20, answer = 60
18 | dataPrediction = model.predict(np.array([20]))
19 | print int(dataPrediction[0][0]), '<--- Predicted number'
20 | print '60 <-- Correct answer \n'
21 | 
22 | # Predict  3x25, answer = 75
23 | dataPrediction = model.predict(np.array([25]))
24 | print int(dataPrediction[0][0]), '<--- Predicted number'
25 | print '75 <-- Correct answer \n'
26 | 
27 | # Predict  3x345, answer = 1035
28 | dataPrediction = model.predict(np.array([345]))
29 | print int(dataPrediction[0][0]), '<--- Predicted number'
30 | print '1035 <-- Correct answer \n'
31 | 
32 | # Predict  3x2, answer = 6
33 | dataPrediction = model.predict(np.array([2]))
34 | print int(dataPrediction[0][0]), '<--- Predicted number'
35 | print '6 <-- Correct answer \n'
36 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # predict_stock_py
 2 | This is a submission for the "Predicting Stock Prices challenge" by @Sirajology on [Youtube](https://www.youtube.com/watch?v=SSu00IRRraY).
 3 | 
 4 | 
 5 | ## Overview
 6 | The python script "predict_stock.py" does the following:
 7 | 
 8 | 1. Asks the user for a stock quote from NASDAQ (e.j: AAPL, FB, GOOGL)
 9 | 2. Uses Tweepy to retrieve tweets about that stock.
10 | 3. Uses TextBlob to determine if the majority of the tweets are positive using sentiment analisys.
11 | 4. If the last is True, downloads the last year of prices for that stock, and trains a neural net with that data to predict the price for tomorrow.
12 | 
13 | The folder "demo" contains a test training 'Table 3' to the same network that is used to predict the price.
14 | 
15 | 
16 | ## Dependencies
17 | * numpy (http://www.numpy.org/)
18 | * tweepy (http://www.tweepy.org/)
19 | * textblob (https://textblob.readthedocs.io/en/dev/)
20 | * requests(http://docs.python-requests.org/en/master/)
21 | * keras(https://keras.io/) Runs with [TensorFlow](https://www.tensorflow.org/) or [Theano](http://deeplearning.net/software/theano/), so you will need one of them.
22 | 
23 | 
24 | # Usage
25 | Install all the necesary dependencies.
26 | Then just run:
27 | ```
28 | python predict_stock.py
29 | ```
30 | It will ask you for a NASDAQ quote, e.j: AAPL, then if the sentiment is positive and the stock you entered exists it will start training the network and give you a result.
31 | 
32 | 
33 | # Credits
34 | Credits to [Siraj](https://github.com/llSourcell) and to this [blog post](http://machinelearningmastery.com/time-series-prediction-with-deep-learning-in-python-with-keras/).
35 | 
36 | 
37 | # Disclaimer
38 | Do not use this code to invest in the stock market, if you are interested in stocks start by reading "The Intelligent Investor" by Benjamin Graham.
39 | 


--------------------------------------------------------------------------------
/predict_stock.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import tweepy
  4 | import requests
  5 | import numpy as np
  6 | 
  7 | from keras.models import Sequential
  8 | from keras.layers import Dense
  9 | from textblob import TextBlob
 10 | 
 11 | 
 12 | # First we login into twitter
 13 | consumer_key = ''
 14 | consumer_secret = ''
 15 | access_token = ''
 16 | access_token_secret = ''
 17 | auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
 18 | auth.set_access_token(access_token, access_token_secret)
 19 | user = tweepy.API(auth)
 20 | 
 21 | # Where the csv file will live
 22 | FILE_NAME = 'historical.csv'
 23 | 
 24 | 
 25 | def stock_sentiment(quote, num_tweets):
 26 |     # Checks if the sentiment for our quote is
 27 |     # positive or negative, returns True if
 28 |     # majority of valid tweets have positive sentiment
 29 |     list_of_tweets = user.search(quote, count=num_tweets)
 30 |     positive, null = 0, 0
 31 | 
 32 |     for tweet in list_of_tweets:
 33 |         blob = TextBlob(tweet.text).sentiment
 34 |         if blob.subjectivity == 0:
 35 |             null += 1
 36 |             next
 37 |         if blob.polarity > 0:
 38 |             positive += 1
 39 | 
 40 |     if positive > ((num_tweets - null)/2):
 41 |         return True
 42 | 
 43 | 
 44 | def get_historical(quote):
 45 |     # Download our file from google finance
 46 |     url = 'http://www.google.com/finance/historical?q=NASDAQ%3A'+quote+'&output=csv'
 47 |     r = requests.get(url, stream=True)
 48 | 
 49 |     if r.status_code != 400:
 50 |         with open(FILE_NAME, 'wb') as f:
 51 |             for chunk in r:
 52 |                 f.write(chunk)
 53 | 
 54 |         return True
 55 | 
 56 | 
 57 | def stock_prediction():
 58 | 
 59 |     # Collect data points from csv
 60 |     dataset = []
 61 | 
 62 |     with open(FILE_NAME) as f:
 63 |         for n, line in enumerate(f):
 64 |             if n != 0:
 65 |                 dataset.append(float(line.split(',')[1]))
 66 | 
 67 |     dataset = np.array(dataset)
 68 | 
 69 |     # Create dataset matrix (X=t and Y=t+1)
 70 |     def create_dataset(dataset):
 71 |         dataX = [dataset[n+1] for n in range(len(dataset)-2)]
 72 |         return np.array(dataX), dataset[2:]
 73 |         
 74 |     trainX, trainY = create_dataset(dataset)
 75 | 
 76 |     # Create and fit Multilinear Perceptron model
 77 |     model = Sequential()
 78 |     model.add(Dense(8, input_dim=1, activation='relu'))
 79 |     model.add(Dense(1))
 80 |     model.compile(loss='mean_squared_error', optimizer='adam')
 81 |     model.fit(trainX, trainY, nb_epoch=200, batch_size=2, verbose=2)
 82 | 
 83 |     # Our prediction for tomorrow
 84 |     prediction = model.predict(np.array([dataset[0]]))
 85 |     result = 'The price will move from %s to %s' % (dataset[0], prediction[0][0])
 86 | 
 87 |     return result
 88 | 
 89 |     
 90 | # Ask user for a stock quote
 91 | stock_quote = raw_input('Enter a stock quote from NASDAQ (e.j: AAPL, FB, GOOGL): ').upper()
 92 | 
 93 | # Check if the stock sentiment is positve
 94 | if not stock_sentiment(stock_quote, num_tweets=100):
 95 |     print 'This stock has bad sentiment, please re-run the script'
 96 |     sys.exit()
 97 | 
 98 | # Check if we got te historical data
 99 | if not get_historical(stock_quote):
100 |     print 'Google returned a 404, please re-run the script and'
101 |     print 'enter a valid stock quote from NASDAQ'
102 |     sys.exit()
103 | 
104 | # We have our file so we create the neural net and get the prediction
105 | print stock_prediction()
106 | 
107 | # We are done so we delete the csv file
108 | os.remove(FILE_NAME)
109 | 


--------------------------------------------------------------------------------