├── requirements.txt ├── demo └── demo.py ├── README.md └── predict_stock.py /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | tweepy 4 | textblob 5 | requests 6 | pyyaml 7 | tensorflow 8 | keras 9 | -------------------------------------------------------------------------------- /demo/demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.models import Sequential 3 | from keras.layers import Dense 4 | 5 | 6 | # Teach "Table 3" to the network 7 | trainX = np.array([1, 2 ,3 ,4 , 5 , 6 , 7, 8, 9, 10]) 8 | trainY = np.array([3, 6, 9, 12, 15, 18, 21, 24, 27, 30]) 9 | 10 | model = Sequential() 11 | model.add(Dense(8, input_dim=1, activation='relu')) 12 | model.add(Dense(1)) 13 | model.compile(loss='mean_squared_error', optimizer='adam') 14 | model.fit(trainX, trainY, nb_epoch=1200, batch_size=2, verbose=2) 15 | 16 | 17 | # Predict 3x20, answer = 60 18 | dataPrediction = model.predict(np.array([20])) 19 | print int(dataPrediction[0][0]), '<--- Predicted number' 20 | print '60 <-- Correct answer \n' 21 | 22 | # Predict 3x25, answer = 75 23 | dataPrediction = model.predict(np.array([25])) 24 | print int(dataPrediction[0][0]), '<--- Predicted number' 25 | print '75 <-- Correct answer \n' 26 | 27 | # Predict 3x345, answer = 1035 28 | dataPrediction = model.predict(np.array([345])) 29 | print int(dataPrediction[0][0]), '<--- Predicted number' 30 | print '1035 <-- Correct answer \n' 31 | 32 | # Predict 3x2, answer = 6 33 | dataPrediction = model.predict(np.array([2])) 34 | print int(dataPrediction[0][0]), '<--- Predicted number' 35 | print '6 <-- Correct answer \n' 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # predict_stock_py 2 | This is a submission for the "Predicting Stock Prices challenge" by @Sirajology on [Youtube](https://www.youtube.com/watch?v=SSu00IRRraY). 3 | 4 | 5 | ## Overview 6 | The python script "predict_stock.py" does the following: 7 | 8 | 1. Asks the user for a stock quote from NASDAQ (e.j: AAPL, FB, GOOGL) 9 | 2. Uses Tweepy to retrieve tweets about that stock. 10 | 3. Uses TextBlob to determine if the majority of the tweets are positive using sentiment analisys. 11 | 4. If the last is True, downloads the last year of prices for that stock, and trains a neural net with that data to predict the price for tomorrow. 12 | 13 | The folder "demo" contains a test training 'Table 3' to the same network that is used to predict the price. 14 | 15 | 16 | ## Dependencies 17 | * numpy (http://www.numpy.org/) 18 | * tweepy (http://www.tweepy.org/) 19 | * textblob (https://textblob.readthedocs.io/en/dev/) 20 | * requests(http://docs.python-requests.org/en/master/) 21 | * keras(https://keras.io/) Runs with [TensorFlow](https://www.tensorflow.org/) or [Theano](http://deeplearning.net/software/theano/), so you will need one of them. 22 | 23 | 24 | # Usage 25 | Install all the necesary dependencies. 26 | Then just run: 27 | ``` 28 | python predict_stock.py 29 | ``` 30 | It will ask you for a NASDAQ quote, e.j: AAPL, then if the sentiment is positive and the stock you entered exists it will start training the network and give you a result. 31 | 32 | 33 | # Credits 34 | Credits to [Siraj](https://github.com/llSourcell) and to this [blog post](http://machinelearningmastery.com/time-series-prediction-with-deep-learning-in-python-with-keras/). 35 | 36 | 37 | # Disclaimer 38 | Do not use this code to invest in the stock market, if you are interested in stocks start by reading "The Intelligent Investor" by Benjamin Graham. 39 | -------------------------------------------------------------------------------- /predict_stock.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import tweepy 4 | import requests 5 | import numpy as np 6 | 7 | from keras.models import Sequential 8 | from keras.layers import Dense 9 | from textblob import TextBlob 10 | 11 | 12 | # First we login into twitter 13 | consumer_key = '' 14 | consumer_secret = '' 15 | access_token = '' 16 | access_token_secret = '' 17 | auth = tweepy.OAuthHandler(consumer_key, consumer_secret) 18 | auth.set_access_token(access_token, access_token_secret) 19 | user = tweepy.API(auth) 20 | 21 | # Where the csv file will live 22 | FILE_NAME = 'historical.csv' 23 | 24 | 25 | def stock_sentiment(quote, num_tweets): 26 | # Checks if the sentiment for our quote is 27 | # positive or negative, returns True if 28 | # majority of valid tweets have positive sentiment 29 | list_of_tweets = user.search(quote, count=num_tweets) 30 | positive, null = 0, 0 31 | 32 | for tweet in list_of_tweets: 33 | blob = TextBlob(tweet.text).sentiment 34 | if blob.subjectivity == 0: 35 | null += 1 36 | next 37 | if blob.polarity > 0: 38 | positive += 1 39 | 40 | if positive > ((num_tweets - null)/2): 41 | return True 42 | 43 | 44 | def get_historical(quote): 45 | # Download our file from google finance 46 | url = 'http://www.google.com/finance/historical?q=NASDAQ%3A'+quote+'&output=csv' 47 | r = requests.get(url, stream=True) 48 | 49 | if r.status_code != 400: 50 | with open(FILE_NAME, 'wb') as f: 51 | for chunk in r: 52 | f.write(chunk) 53 | 54 | return True 55 | 56 | 57 | def stock_prediction(): 58 | 59 | # Collect data points from csv 60 | dataset = [] 61 | 62 | with open(FILE_NAME) as f: 63 | for n, line in enumerate(f): 64 | if n != 0: 65 | dataset.append(float(line.split(',')[1])) 66 | 67 | dataset = np.array(dataset) 68 | 69 | # Create dataset matrix (X=t and Y=t+1) 70 | def create_dataset(dataset): 71 | dataX = [dataset[n+1] for n in range(len(dataset)-2)] 72 | return np.array(dataX), dataset[2:] 73 | 74 | trainX, trainY = create_dataset(dataset) 75 | 76 | # Create and fit Multilinear Perceptron model 77 | model = Sequential() 78 | model.add(Dense(8, input_dim=1, activation='relu')) 79 | model.add(Dense(1)) 80 | model.compile(loss='mean_squared_error', optimizer='adam') 81 | model.fit(trainX, trainY, nb_epoch=200, batch_size=2, verbose=2) 82 | 83 | # Our prediction for tomorrow 84 | prediction = model.predict(np.array([dataset[0]])) 85 | result = 'The price will move from %s to %s' % (dataset[0], prediction[0][0]) 86 | 87 | return result 88 | 89 | 90 | # Ask user for a stock quote 91 | stock_quote = raw_input('Enter a stock quote from NASDAQ (e.j: AAPL, FB, GOOGL): ').upper() 92 | 93 | # Check if the stock sentiment is positve 94 | if not stock_sentiment(stock_quote, num_tweets=100): 95 | print 'This stock has bad sentiment, please re-run the script' 96 | sys.exit() 97 | 98 | # Check if we got te historical data 99 | if not get_historical(stock_quote): 100 | print 'Google returned a 404, please re-run the script and' 101 | print 'enter a valid stock quote from NASDAQ' 102 | sys.exit() 103 | 104 | # We have our file so we create the neural net and get the prediction 105 | print stock_prediction() 106 | 107 | # We are done so we delete the csv file 108 | os.remove(FILE_NAME) 109 | --------------------------------------------------------------------------------