├── README.md ├── Section-5 ├── Importing data from local machine.py ├── 3 D Plotting.py ├── Importing data from web sources.py ├── Candle Sticks .py └── 2 D plotting.py ├── Section-2 ├── Modules.py └── My First Python code.py ├── Section-4 ├── Dictionary.py ├── Lists.py ├── Stacks, Queues, Graphs, Trees.py └── Tuples and Sets.py ├── Section-6 ├── Lambda.py └── Function.py ├── Section-3 └── Expressions .py ├── Section-9 ├── Conditional Statement .py └── Loops.py ├── Section-8 ├── Pandas - Series .py ├── Pandas - Dataframe & Basic Functionality.py ├── Pandas- Grouping and Reshaping.py ├── Pandas - Descriptive Statistical Functions .py └── Pandas - Indexing, Reindexing & Missing values .py ├── Section-7 ├── Vectorization and Broadcasting in Arrays.py ├── Indexing and Slicing.py └── Numpy Introduction to arrays.py └── Pandas- Grouping and Reshaping.py /README.md: -------------------------------------------------------------------------------- 1 | # python-for-trading-basic -------------------------------------------------------------------------------- /Section-5/Importing data from local machine.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Notebook Instructions 5 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter. While a cell is running, a [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook [8]. 6 | # 7 | # Enter edit mode by pressing `Enter` or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 8 | 9 | # ## Pandas.read_csv 10 | # 11 | # Pandas.read_csv() function helps you to read Comma Seperated Files using Python and converts it into a dataframe. 12 | 13 | # In[1]: 14 | 15 | 16 | # You have to download the 'Infosys' company's CSV file from www.nseindia.com. 17 | 18 | import numpy as np 19 | import pandas as pd 20 | 21 | infy = pd.read_csv ('infy_data.csv') 22 | 23 | # This code will work only if you have stored the 'infy.csv' in the same folder where this notebook is saved. 24 | 25 | # If you store it at some other location, then the line of code would have to specify the location. 26 | 27 | # infy = pd.read_csv ('C:/Users/academy/Desktop/infy_data.csv') 28 | 29 | 30 | # In[2]: 31 | 32 | 33 | infy # this is our entire "Infosys" stock data frame 34 | 35 | 36 | # In[3]: 37 | 38 | 39 | infy.head () # You will see the top 5 rows 40 | 41 | 42 | # In[4]: 43 | 44 | 45 | infy.tail () # You will see the bottom 5 rows 46 | 47 | 48 | # The reason why we are studying this seperately is because it is important to understand this function. You will be using this function the most while making financial trading strategies.
49 | #
50 | # Another reason is, once you download a CSV file, it becomes a stable data source. This is unlike the one you fetch from web data sources.
51 | #
52 | # We will see more of this in the Pandas section of our course. 53 | 54 | # ### In the upcoming iPython notebook: 55 | # 56 | # We will learn about 2D plotting of financial market data, but before that let us solve an exercise on this. 57 | # 58 | # Happy Learning! 59 | -------------------------------------------------------------------------------- /Section-5/3 D Plotting.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # ## 3D plotting 11 | # 12 | # (Optional Read) 13 | # 14 | # We are going to plot a 3 dimensional figure using 3 datasets.
15 | #
16 | # Not many financial data visualisations benefit from 3-D plotting but one of the applications is the volatility surfaces showing implied volatilities simultaneously.
17 | #
18 | # You may just go through the codes. It is not a problem if you do not understand them and the the motive of this notebook is more for representation purposes and show you the power of data visualisation in Python by plotting even 3D plots. 19 | 20 | # In[99]: 21 | 22 | 23 | # Random data creation using the numpy library 24 | 25 | import numpy as np 26 | 27 | strike_price = np.linspace (50,150,25) # Strike values between 100 to 150 28 | time = np.linspace (0.5, 2, 25) # Time to maturity between 0.5 to 2.5 years 29 | 30 | # The numpy's meshgrid() function helps us to create a rectangular grid out of an array of x values and y values 31 | 32 | strike_price, time = np.meshgrid (strike_price, time) 33 | 34 | 35 | # In[100]: 36 | 37 | 38 | strike_price, time [:] # Printing the mesh grid array 39 | 40 | 41 | # In[101]: 42 | 43 | 44 | # generate fake implied volatilities 45 | 46 | implied_volatility = (strike_price - 100) ** 2/ (100 * strike_price)/ time 47 | 48 | 49 | # In[102]: 50 | 51 | 52 | # Plotting a 3D figure 53 | 54 | import matplotlib.pyplot as plt 55 | 56 | # Importing the required packages for 3D plotting 57 | from mpl_toolkits.mplot3d import Axes3D 58 | 59 | fig = plt.figure (figsize = (9,6)) 60 | 61 | # If 'fig' is a variable holding a figure, fig.gca() returns the axes associated with the figure. 62 | # With this 3 dimensional axes is enabled 63 | axis = fig.gca (projection = '3d') 64 | 65 | # To plot the surface and passing the required arguments 66 | surface = axis.plot_surface (strike_price, time, implied_volatility, rstride = 1, cstride = 1, cmap = plt.cm.coolwarm, linewidth = 0.5, antialiased = False) 67 | 68 | axis.set_xlabel ('strike') 69 | axis.set_ylabel ('time-to-maturity') 70 | axis.set_zlabel ('implied volatility') 71 | 72 | # Adding a colorbar which maps values to colors 73 | fig.colorbar (surface, shrink = 0.5, aspect=5) 74 | 75 | plt.show() 76 | 77 | 78 | # ### In the upcoming iPython notebook: 79 | # 80 | # We will learn about Candlesticks in Python. Even that is an optional read. 81 | # 82 | # Happy Learning! 83 | -------------------------------------------------------------------------------- /Section-2/Modules.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # # Modules 11 | # 12 | # Any file in python which has a .py extension can be a module. A module can consist of arbitrary objects, classes, attributes or functions which can be imported by users. 13 | 14 | # ### Importing Modules 15 | # 16 | # There are different ways to import modules. Let us begin by importing the 'math' module. 17 | 18 | # In[67]: 19 | 20 | 21 | import math 22 | 23 | 24 | # Math module which consists of mathematical constants and functions like math.pi, math.sine, math.cosine etc. 25 | 26 | # In[68]: 27 | 28 | 29 | math.pi # The value of pi 30 | 31 | 32 | # In[69]: 33 | 34 | 35 | math.cos (1) # The cosine value of 1 36 | 37 | 38 | # In[70]: 39 | 40 | 41 | math.sin (1) # The sine value of 1 42 | 43 | 44 | # ### The dir () function 45 | # 46 | # The built-in function called dir() is used to find out what functions a module defines. It returns a sorted list of strings. 47 | 48 | # In[71]: 49 | 50 | 51 | dir (math) 52 | 53 | 54 | # If you require only certain objects from the module then: 55 | 56 | # In[72]: 57 | 58 | 59 | from scipy import mean # We will import only the 'mean' object from the 'scipy' package 60 | 61 | 62 | # In[73]: 63 | 64 | 65 | mean ([1,2,3,4,5]) # This will give arithmetic mean of the numbers 66 | 67 | 68 | # But if we want to find out, the harmonic mean. The following cells is the piece of code. 69 | 70 | # In[74]: 71 | 72 | 73 | from scipy import stats 74 | 75 | 76 | # In[75]: 77 | 78 | 79 | stats.hmean ([1,2,3,4,5]) 80 | 81 | 82 | # If at all you require to import all the objects from the module, you may use * 83 | 84 | # In[76]: 85 | 86 | 87 | from numpy import * 88 | 89 | 90 | # In[77]: 91 | 92 | 93 | sin (1) 94 | 95 | 96 | # In[78]: 97 | 98 | 99 | diag([1,5,9,6]) 100 | 101 | 102 | # One can even import a module/package as an alias and prefix it before using the objects. 103 | 104 | # In[79]: 105 | 106 | 107 | import numpy as np 108 | 109 | 110 | # In[80]: 111 | 112 | 113 | dir (np) 114 | 115 | 116 | # In[81]: 117 | 118 | 119 | np.median([4,5,6,3,4,5,9,8,7,12]) # Will return the median of the number set 120 | 121 | 122 | # In[82]: 123 | 124 | 125 | np.min([4,5,6,3,4,5,9,8,7,12]) # Will return the minimum number of the number set 126 | 127 | 128 | # In[83]: 129 | 130 | 131 | np.max([4,5,6,3,4,5,9,8,7,12]) # Will return the maximum number of the number set 132 | 133 | 134 | # ### Stay tuned for more on python. 135 | -------------------------------------------------------------------------------- /Section-4/Dictionary.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # # Dictionaries 11 | # 12 | # A dictionary is generally used for mapping. Similarly, a dictionary in Python also has mapping between its “Key” and “Value” pairs. You can access the dictionary using ‘keys’ to get the information or ‘value’ stored within these ‘keys’. 13 | # 14 | # 15 | # ## Creating & Printing Dictionaries 16 | # 17 | # Dictionaries are enclosed in brace brackets and the key:value pair should be separated by a comma. 18 | 19 | # In[165]: 20 | 21 | 22 | new_dict = { } # Empty Dictionary 23 | 24 | type (new_dict) 25 | 26 | 27 | # In[166]: 28 | 29 | 30 | # Creating a new dictionary 31 | 32 | new_dict = {'Jack': 2563, 'Rose': 8965, 'Hockley': 7412, 'Fabrizo':9632, 'Molly Brown': 4563} 33 | 34 | type (new_dict) 35 | 36 | 37 | # In[167]: 38 | 39 | 40 | # Printing the dictionary 41 | 42 | print (new_dict) 43 | 44 | 45 | # In[168]: 46 | 47 | 48 | # Printing the value for a particular key 49 | 50 | new_dict ['Jack'] 51 | 52 | 53 | # In[169]: 54 | 55 | 56 | # Printing multiple values of various keys 57 | 58 | new_dict ['Rose'], new_dict ['Hockley'] 59 | 60 | 61 | # ## Dictionary Manipulations 62 | # 63 | # Let us have a look at the few functions for accessing or manipulating dictionaries. 64 | 65 | # len (x_dict)
66 | # To know the number of key:value pairs in the dictionary. 67 | 68 | # In[170]: 69 | 70 | 71 | print (new_dict) 72 | 73 | 74 | # In[171]: 75 | 76 | 77 | len (new_dict) 78 | 79 | 80 | # x_dict.keys ( )
81 | # Returns all the 'keys' of dictionaries 82 | 83 | # In[172]: 84 | 85 | 86 | new_dict.keys () 87 | 88 | 89 | # x_dict.values ( )
90 | # Returns all the 'values' of dictionaries 91 | 92 | # In[173]: 93 | 94 | 95 | new_dict.values () 96 | 97 | 98 | # The del statement
99 | # It is used for deleting any keys from the dictionary. 100 | 101 | # In[174]: 102 | 103 | 104 | del new_dict ['Hockley'] 105 | 106 | print (new_dict) 107 | 108 | 109 | # x_dict.pop (key)
110 | # It will pop a 'value' of the reqired key. 111 | # 112 | 113 | # In[175]: 114 | 115 | 116 | new_dict.pop ('Fabrizo') 117 | 118 | 119 | # In[176]: 120 | 121 | 122 | print (new_dict) # Our latest dictionary 123 | 124 | 125 | # sorted (x_dict)
126 | # 127 | # The dictionary will get sorted by its values. 128 | 129 | # In[177]: 130 | 131 | 132 | print (new_dict) 133 | 134 | 135 | # In[178]: 136 | 137 | 138 | sorted (new_dict) # keys sorted by values 139 | 140 | 141 | # x_dict.clear ()
142 | # Clears all the content of the dictionary 143 | 144 | # In[179]: 145 | 146 | 147 | new_dict.clear () 148 | 149 | print (new_dict) 150 | 151 | 152 | # ### In the upcoming iPython Notebook 153 | # 154 | # We will see, how 'Tuples' and 'Sets' are used. 155 | -------------------------------------------------------------------------------- /Section-6/Lambda.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ### Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # ## What is lambda? 11 | # 12 | # The lambda operator is a way to create small anonymous functions i.e. functions without a name.
13 | #
14 | # They are temporary functions i.e. they are needed only where they have been created.
15 | #
16 | # The lambda feature was added in Python due to a high demand from the Lisp programmers (Lisp is a programming language). 17 | 18 | # ## A Simple Lambda Example 19 | # 20 | # The general syntax for Lambda is as follows:
21 | #
lambda argument_list: expression
22 | # Let us have a look at some of the examples. 23 | 24 | # In[101]: 25 | 26 | 27 | sum = lambda x,y : x + y 28 | 29 | 30 | # In[102]: 31 | 32 | 33 | sum (2,3) 34 | 35 | 36 | # It is similar to defining a function where x and y are the parameters and x + y is the operation performed in the block of codes.
37 | #
38 | # You can even observe, that the usage lambda is same as a function call. 39 | 40 | # In[103]: 41 | 42 | 43 | # Another example 44 | 45 | product = lambda x,y : x * y 46 | 47 | 48 | # In[104]: 49 | 50 | 51 | product (2,3) 52 | 53 | 54 | # In[105]: 55 | 56 | 57 | # One more example 58 | 59 | my_operation = lambda x,y,z : x + y - z 60 | 61 | 62 | # In[106]: 63 | 64 | 65 | my_operation (10,20,30) 66 | 67 | 68 | # ### map () 69 | # 70 | # One of the advantages of using a lambda is the map() function.
71 | #
 map (lambda, sequence of lists)
72 | # map() applies the lambda function to all the elements within the sequence. These elements are generally lists. 73 | 74 | # In[107]: 75 | 76 | 77 | # The lists have to be of same length to apply the map () function in lambda. 78 | 79 | list_1 = [1,2,3,4] 80 | 81 | list_2 = [10,20,30,40] 82 | 83 | list_3 = [100,200,300,400] 84 | 85 | 86 | # In[108]: 87 | 88 | 89 | map (lambda x,y : x + y, list_1, list_2 ) 90 | 91 | 92 | # In[109]: 93 | 94 | 95 | map (lambda x,y,z : x + y + z, list_1, list_2, list_3 ) 96 | 97 | 98 | # In[110]: 99 | 100 | 101 | map (lambda y,z : y + z, list_2, list_3 ) 102 | 103 | 104 | # ### filter () 105 | # Another advantage of using a lambda is the filter() function.
106 | #
 filter (lambda, list)
107 | # It is an elegant way to filter out the required elements from a list. 108 | 109 | # In[111]: 110 | 111 | 112 | fib = [0,1,1,2,3,5,8,13,21,34,55] # This is a list 113 | 114 | 115 | # In[112]: 116 | 117 | 118 | filter (lambda x: x > 8, fib) 119 | 120 | 121 | # In[113]: 122 | 123 | 124 | filter (lambda x: x < 8, fib) 125 | 126 | 127 | # In[114]: 128 | 129 | 130 | signals = ['Buy','Sell','Sell','Buy','Buy','Sell','Buy'] # This is a list 131 | 132 | 133 | # In[115]: 134 | 135 | 136 | filter (lambda x: x == 'Buy', signals) 137 | 138 | 139 | # ### In the upcoming iPython Notebooks: 140 | # 141 | # We will understand about the Numpy library, in python. 142 | -------------------------------------------------------------------------------- /Section-6/Function.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ### Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # ## A Simple User-Defined Function 11 | # 12 | # Let us create a simple mathematical function. 13 | # 14 | # The syntax for constructing a function is: 15 | #
 16 | # def function_name (parameter-list):
 17 | # 	Statements, i.e function body
 18 | #     return a value, if required
 19 | # 
20 | # Let us create ‘my_function’. 21 | # 22 | 23 | # In[28]: 24 | 25 | 26 | def my_function(x, n): 27 | output = x ** n 28 | return output 29 | 30 | 31 | # This is a simple function which we have created to calculate the exponential of any number. Now, whenever we need to perform this particular calculation, all we need to do is call this function and insert the values for ‘x’ and ‘n’. You may have a look it. 32 | 33 | # In[29]: 34 | 35 | 36 | my_function (10, 2) ## 10 raise to 2 = 100 37 | 38 | 39 | # In[30]: 40 | 41 | 42 | my_function (5,3) ## 5 raise to 3 = 125 43 | 44 | 45 | # ## Bollinger Band Function 46 | # 47 | # This is the function which we discussed in the video unit. 48 | 49 | # In[31]: 50 | 51 | 52 | def Bollinger_Bands (data, n): 53 | 54 | #MA = data['Close'].rolling(window=n).mean() # Calculating the moving average 55 | MA = pd.rolling_mean(data['Close'],n) 56 | 57 | #SD = data['Close'].rolling(window=n).std() # Calculating the standard deviation 58 | SD = pd.rolling_std(data['Close'],n) 59 | 60 | data['Lower_BB'] = MA - (2 * SD) # Lower Bollinger Band 61 | data['Upper_BB'] = MA + (2 * SD) # Upper Bollinger Band 62 | 63 | return data 64 | 65 | 66 | # In[32]: 67 | 68 | 69 | ## Load and view Nifty data 70 | 71 | import pandas as pd 72 | 73 | nifty = pd.read_csv('nifty_data.csv') 74 | nifty.head() 75 | 76 | 77 | 78 | # In[33]: 79 | 80 | 81 | # Calling Bollinger Bands for 'Nifty' index price data 82 | 83 | n = 21 # We have kept the window of the moving average as 21 days 84 | 85 | nifty_bb = Bollinger_Bands(nifty, n) # Calling the Bollinger Bands function cerated by us 86 | 87 | nifty_bb.tail() 88 | 89 | 90 | # In[34]: 91 | 92 | 93 | # Plotting the Bollinger Bands for "Nifty' index 94 | 95 | import matplotlib.pyplot as plt 96 | get_ipython().magic(u'matplotlib inline') 97 | 98 | plt.figure(figsize=(20,10)) 99 | 100 | plt.plot(nifty_bb.Close) 101 | plt.plot(nifty_bb.Lower_BB) 102 | plt.plot(nifty_bb.Upper_BB) 103 | plt.grid(True) 104 | 105 | plt.show() 106 | 107 | 108 | # In[35]: 109 | 110 | 111 | # Calling Bollinger Bands for 'Infosys' price data 112 | 113 | import pandas as pd 114 | 115 | infy = pd.read_csv ('infy_data_bb.csv') # Loading 'Nifty Index' data 116 | 117 | n = 21 # We have kept the window of the moving average as 21 days 118 | 119 | infy_bb = Bollinger_Bands(infy, n) # Calling the Bollinger Bands function cerated by us 120 | 121 | infy_bb.tail() 122 | 123 | 124 | # In[36]: 125 | 126 | 127 | # Plotting the Bollinger Bands for "Infosys" stock 128 | 129 | import matplotlib.pyplot as plt 130 | get_ipython().magic(u'matplotlib inline') 131 | 132 | plt.figure(figsize=(20,10)) 133 | 134 | plt.plot(infy_bb.Close) 135 | plt.plot(infy_bb.Lower_BB) 136 | plt.plot(infy_bb.Upper_BB) 137 | plt.grid(True) 138 | 139 | plt.show() 140 | 141 | 142 | # ### In the upcoming iPython Notebook: 143 | # 144 | # We will understand the Lambda operator and its relation with functions. 145 | -------------------------------------------------------------------------------- /Section-3/Expressions .py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # ## Expressions 11 | # 12 | # 'Expressions' are generally a combination of numbers, variables and operators.
13 | #
14 | # In this iPython notebook, we will make use of Expressions to understand the TVM concepts. 15 | 16 | # ### Future Value (FV) 17 | # 18 | # What would be the FV, if I have $1000 with me now and I will be investing it for 1 year, at an annual return of 5%? 19 | 20 | # In[52]: 21 | 22 | 23 | PV = 1000 24 | r = 0.05 25 | n = 1 26 | 27 | FV = PV * ((1+r) ** n) # Formula for calculating Future Value 28 | 29 | print (FV) 30 | 31 | 32 | # ### Present Value 33 | # 34 | # What would be the PV, if I have to discount $1050 at 5% annual rate for a period of 1 year? 35 | 36 | # In[53]: 37 | 38 | 39 | FV = 1050 40 | r = 0.05 41 | n = 1 42 | 43 | PV = FV / ((1 + r) ** n) # Formula for calculating Present Value 44 | 45 | print (PV) 46 | 47 | 48 | # ### Compounding 49 | # 50 | # Assume that the 5% annual interest rate bond makes semiannual payments. That is, for an investment of $1000, you will get 25 dollars, after first 6 months and another 25 dollars after 1 year. The annual rate of interest is 5%. What would be the FV, if I hold the bond for 1 year? 51 | 52 | # In[54]: 53 | 54 | 55 | PV = 1000 56 | r = 0.05 57 | n = 2 # number of periods = 2 since bond makes semiannual payments 58 | t = 1 # number of years 59 | 60 | FV = PV * ((1+(r/n)) ** (n*t)) # Formula for compounding 61 | 62 | print (FV) 63 | 64 | 65 | # ### Annuity Payments 66 | # 67 | # What would be the annual periodic saving amount, if you want a lumsum of $9476.96 at the end of 3 years? The rate of return is 10%?
68 | #
69 | # (This is one of the required calculation from 'PDF : TVM Applications' unit) 70 | 71 | # In[55]: 72 | 73 | 74 | r = 0.1 75 | n = 3 76 | PV = 0 77 | FV = 9476.96 78 | 79 | AP = (FV * r) / (((1 + r) ** n) - 1) # Formula for Annuity payments, given Future Value 80 | 81 | print (AP) 82 | 83 | 84 | # What would be the PV, given a cash outfolw of $2500 for a period of 5 years and rate of return being 10%?
85 | #
86 | # (This is one of the required calculation from 'PDF : TVM Applications' unit) 87 | 88 | # In[56]: 89 | 90 | 91 | r = 0.1 92 | n = 5 93 | AP = 2500 94 | 95 | PV = (AP * (1 - ((1 + r) ** -n))) / r # Formula for PV, given Annuity payments 96 | 97 | print (PV) 98 | 99 | 100 | # What would be the PV, given a cash outflow of $30,000 for a period of 45 years and rate of return being 8%?
101 | #
102 | # (This is one of the required calculation from 'PDF : TVM Applications' unit) 103 | 104 | # In[57]: 105 | 106 | 107 | r = 0.08 108 | n = 45 109 | AP1 = 30000 110 | 111 | PV = (AP1 * (1 - ((1 + r) ** -n))) / r # Formula for PV, given Annuity payments 112 | 113 | print (PV) 114 | 115 | 116 | # What would be the annual saving amount (AP), if you want to save a lumpsum of $363252.045 in 25 years and rate of return being 15%?
117 | #
118 | # (This is one of the required calculation from 'PDF : TVM Applications' unit) 119 | 120 | # In[58]: 121 | 122 | 123 | r = 0.15 124 | n = 25 125 | PV = 0 126 | FV = 363252.045095 127 | 128 | AP = (FV * r) / (((1 + r) ** n) - 1) # Formula to calculate Annuity Payments, given FV 129 | 130 | #AP = (r * PV) / (1 - ((1 + r) ** -n)) # Formula to calculate Annuity Payments, given PV 131 | 132 | print (AP) 133 | 134 | 135 | # These are some ways, one can use Expressions.
136 | # 137 | # ### Stay tuned for more on python. 138 | -------------------------------------------------------------------------------- /Section-9/Conditional Statement .py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # ## If and elif 11 | # 12 | # We have seen the working of an 'if' statement in the previous video unit. Let us go through it once again. 13 | # 14 | # In python, the syntax for an ‘if' conditional statement is as follows: 15 | # 16 | #
if (condition_1):
17 | # statement_block_1
18 | # elif (condition_2):
19 | # statement_block_2
20 | # elif (condition_3):
21 | # statement_block_3
22 | #
23 | # Let us consider an example to understand the working of an 'if' statement. 24 | # 25 | 26 | # In[57]: 27 | 28 | 29 | stock_price_ABC = 299 # Variable value 30 | 31 | if (stock_price_ABC < 300): # if condition_1 is true then... 32 | print ("We will buy 500 shares of ABC") # statement_block_1 will get executed 33 | 34 | elif (stock_price_ABC == 300): 35 | print ("We will buy 200 shares of ABC") 36 | 37 | elif (stock_price_ABC > 300): 38 | print ("We will buy 150 shares of ABC") 39 | 40 | 41 | # If you change the value of the variable 'stock_price_ABC' to... 42 | 43 | # In[58]: 44 | 45 | 46 | stock_price_ABC = 300 # then... 47 | 48 | if (stock_price_ABC < 300): 49 | print ("We will buy 500 shares of ABC") 50 | 51 | elif (stock_price_ABC == 300): # if condition_2 is true then... 52 | print ("We will buy 200 shares of ABC") # statement_block_2 will get executed 53 | 54 | elif (stock_price_ABC > 300): 55 | print ("We will buy 150 shares of ABC") 56 | 57 | 58 | # If you change the value of the variable 'stock_price_ABC' to... 59 | 60 | # In[59]: 61 | 62 | 63 | stock_price_ABC = 301 # then... 64 | 65 | if (stock_price_ABC < 300): 66 | print ("We will buy 500 shares of ABC") 67 | 68 | elif (stock_price_ABC == 300): 69 | print ("We will buy 200 shares of ABC") 70 | 71 | elif (stock_price_ABC > 300): # if condition_3 is true then... 72 | print ("We will buy 150 shares of ABC") # statement_block_3 will get executed 73 | 74 | 75 | # ## If and else 76 | # 77 | # If - else block of conditional statements is similar to the working of 'if' statements. If the 'if' condition is true, then the statements inside the 'if' block will be executed. If the 'if condition is false, then the statements inside the 'else' block will be executed. 78 | # 79 | # In python, the syntax for an ‘if else' conditional statement is as follows: 80 | # 81 | #
if (condition_1):
82 | # statement_block_1
83 | # else:
84 | # statement_block_2
85 | #
86 | # Let us consider an example to understand the working of an 'if else' statement. 87 | # 88 | 89 | # In[60]: 90 | 91 | 92 | stock_price_ABC = 300 93 | 94 | if (stock_price_ABC > 250): # if condition 1 is true then.... 95 | print ("We will sell the stock and book the profit") # this block of code will be executed 96 | 97 | else: 98 | print ("We will keep buying the stock") 99 | 100 | 101 | 102 | # If you change the value of the variable 'stock_price_ABC' to... 103 | 104 | # In[61]: 105 | 106 | 107 | stock_price_ABC = 200 # then... 108 | 109 | if (stock_price_ABC > 250): # if condition 1 is false then.... 110 | print ("We will sell the stock and book the profit") 111 | 112 | else: 113 | print (" We will keep buying the stock") # this block of code will be executed 114 | 115 | 116 | # ### In the upcoming iPython Notebook: 117 | # 118 | # We will understand about Loops. 119 | -------------------------------------------------------------------------------- /Section-9/Loops.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # ## For Loop 11 | # 12 | # In the programming languages, there are many situations when you need to execute a block of code several number of times. A loop statement allows us to execute a statement or group of statements multiple times 13 | # 14 | # The general syntax for a ‘for’ loop is as follows: 15 | # 16 | #
 17 | # for (variable) in sequence
 18 | #     block of statements
 19 | # 
20 | # 21 | # Here, the block of statements within the loop will get executed, until all ‘sequence’ elements get exhausted. Once all sequence elements are exhausted, the program, will come out of the loop. 22 | # 23 | # 24 | 25 | # In[6]: 26 | 27 | 28 | # Closing Prices of the ABC Stock over 10 days 29 | 30 | Close_Price_ABC = [300,305,287,298,335,300,297,300,295,310] # Our sequence 31 | 32 | for i in Close_Price_ABC: 33 | 34 | if i < 300: 35 | print ("We Buy") 36 | 37 | if i == 300: 38 | print ("No new positions") 39 | 40 | if i > 300: 41 | print ("We Sell") 42 | 43 | print ("We are now out of the loop") 44 | 45 | 46 | # Here, the output is what was discussed in the previous video unit.
47 | #
48 | # The variable ‘i’ first stores the value ‘300’ in it and runs it through the loop to execute the statements. Here, we have placed a condition that if ‘i == 300’ we will print “No new positions”. Hence, as you can see, this is the first statement in our output.
49 | #
50 | # Now, ‘i’ will run through the sequence and pick the second element of the sequence which is ‘305’. It will run it through the statements of the loop. When i = 305, it will execute the block where ‘if i>300: print (“We Sell”). Check the second output.
51 | #
52 | # Similarly, it will keep executing all the elements of the loop. Observe the output. 53 | # 54 | 55 | # Let us take another example... 56 | 57 | # In[7]: 58 | 59 | 60 | import numpy as np 61 | import pandas as pd 62 | 63 | infy = pd.read_csv ('infy_twoweeks.csv') 64 | infy 65 | 66 | # We have delibrately taken a smaller dataframe to understand the output. 67 | # You may experiment using bigger data frames to understand the power of 'for' loop 68 | 69 | 70 | # In[8]: 71 | 72 | 73 | # We will just take the 'Close Price' Column to run the 'for' loop 74 | 75 | for i in range (len(infy)): 76 | 77 | if (infy.iloc[i]["Close Price"] < 1120): 78 | print ("We buy") 79 | 80 | elif ((infy.iloc[i]["Close Price"] > 1120) & (infy.iloc[i]["Close Price"] < 1150)): 81 | print ("We do nothing") 82 | 83 | elif (infy.iloc[i]["Close Price"] > 1150): 84 | print ("We Sell") 85 | 86 | 87 | # ## While Loop 88 | # 89 | # (Optional Read) 90 | # 91 | # The while construct consists of a condition and block of code. 92 | # 93 | # The general syntax for a ‘while’ loop is as follows: 94 | # 95 | #
 96 | # while condition/expression
 97 | #     block of statements
 98 | # 
99 | # 100 | # To begin, the condition is evaluated.
101 | #
102 | # If the condition is true, the 'block of statements' is executed. Everytime, the condition is checked before executing the block of statements.
103 | #
104 | # This keeps on repeating unitl the condition becomes false. Once the condition is false, it comes out of the loop to execute the other statements. 105 | 106 | # In[9]: 107 | 108 | 109 | a = 0 # variable 110 | 111 | while a <= 10: # this is the condition...the loop will execute until the condition becomes 'false' 112 | a = a + 1 113 | print a 114 | print ("We are now out of the loop") 115 | 116 | -------------------------------------------------------------------------------- /Section-5/Importing data from web sources.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ### Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # ## Importing data from Investors Exchange (IEX) 11 | 12 | # To fetch data from IEX, you need to first pip install iexfinace. The 'pip' command is a tool for installing and managing Python packages. 13 | ! pip install iexfinance 14 | # Using iexfinace to access data from IEX is quite easy. First, you need to import get_historical_data function from iexfinance library.
15 | #
16 | # This will return the daily time series of the stock's ticker requested over the desired date range. You can select the date range using the datetime function. The output format (data frame creation, if pandas) is selected using the output_format parameter. 17 | # 18 | # The resulting DataFrame is indexed by date, with a column for each OHLCV datapoint as you can see in the below example. 19 | # 20 | 21 | # In[8]: 22 | 23 | 24 | from iexfinance import get_historical_data 25 | from datetime import datetime 26 | 27 | start = datetime(2017, 1, 1) # starting date: year-month-date 28 | end = datetime(2018, 1, 1) # ending date: year-month-date 29 | 30 | data = get_historical_data('AAPL', start=start, end=end, output_format='pandas') 31 | data.head() 32 | 33 | 34 | # In[9]: 35 | 36 | 37 | data.tail() 38 | 39 | 40 | # ## Importing data from NSEpy 41 | 42 | # Similar to IEX, you need to first pip install nsepy module to fetch the data. 43 | ! pip install nsepy 44 | # To fetch historical data of stocks from nsepy, you have to use get_history function which returns daily data of stock's ticker requested over the desired timeframe in a pandas format. 45 | # 46 | # Note: Only price data of Indain stocks/indices/derivatives can be fetched from nsepy. 47 | 48 | # In[10]: 49 | 50 | 51 | from nsepy import get_history 52 | from datetime import datetime 53 | 54 | start = datetime(2017, 1, 1) 55 | end = datetime(2018, 1, 1) 56 | 57 | data = get_history(symbol='SBIN',start=start,end=end) 58 | 59 | data.head() 60 | 61 | 62 | # In[11]: 63 | 64 | 65 | data.tail() 66 | 67 | 68 | # ## Importing data from Quandl 69 | 70 | # To fetch data from Quandl, first import quandl. Here, quandl.get function is used to fetch data for a security over a specific time period. 71 | 72 | # In[12]: 73 | 74 | 75 | import quandl 76 | from datetime import datetime 77 | 78 | # quantrautil is a module specific to Quantra to fetch stock data 79 | from quantrautil import get_quantinsti_api_key 80 | 81 | api_key = get_quantinsti_api_key() 82 | data = quandl.get('EOD/AAPL', start_date='2017-1-1', end_date='2018-1-1', api_key=api_key) 83 | 84 | # Note that you need to know the "Quandl code" of each dataset you download. In the above example, it is 'EOD/AAPL'. 85 | # To get your personal API key, sign up for a free Quandl account. Then, you can find your API key on Quandl account settings page. 86 | 87 | data.head() 88 | 89 | 90 | # ## Importing data from Yahoo 91 | 92 | # First you need to import data from pandas_datareader module. Here data.get_data_yahoo function is used to return the historical price of a stock's ticker, over a specifc time range. 93 | 94 | # In[13]: 95 | 96 | 97 | ## Yahoo recently has become an unstable data source. 98 | 99 | ## If it gives an error, you may run the cell again, or try again sometime later 100 | 101 | import pandas as pd 102 | from pandas_datareader import data 103 | data = data.get_data_yahoo('AAPL', '2017-01-01', '2018-01-01') 104 | data.head() 105 | 106 | 107 | # ### In the upcoming iPython notebook: 108 | # 109 | # We will learn about Importing Data from our local machine. Till then, get ready to solve some exercises. 110 | -------------------------------------------------------------------------------- /Section-5/Candle Sticks .py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # ## Plotting Candle sticks 11 | # 12 | # The following code will help you to plot an interactive graph of the S&P 500 index using candlesticks. 13 | 14 | # In[ ]: 15 | 16 | 17 | from iexfinance import get_historical_data 18 | from datetime import datetime 19 | 20 | start = datetime(2017, 1, 1) # starting date: year-month-date 21 | end = datetime(2018, 5, 13) # ending date: year-month-date 22 | 23 | df = get_historical_data('SPY', start=start, end=end, output_format='pandas') 24 | df.head() 25 | 26 | 27 | # In[ ]: 28 | 29 | 30 | # Importing the necessary packages 31 | 32 | import matplotlib.pyplot as plt 33 | import matplotlib.finance as mpf 34 | from matplotlib.finance import candlestick_ohlc 35 | from bokeh.plotting import figure, show, output_file 36 | 37 | 38 | # In[ ]: 39 | 40 | 41 | # Indexing 42 | import pandas as pd 43 | w = 12*60*60*1000 # half day in ms 44 | df.index = pd.to_datetime(df.index) 45 | 46 | 47 | # ## Remember: 48 | # 49 | # 1. If the opening price is greater than the closing price then a green candle stick has to be created to represent the day. 50 | # 2. If the opening price is less than the closing price then a red candlestick is to be created to represent the day. 51 | # 1. We will use 'inc' and 'dec' as the varieble to capture this facr further in the code 52 | 53 | # In[ ]: 54 | 55 | 56 | inc = df.close > df.open 57 | dec = df.open > df.close 58 | 59 | 60 | # In[ ]: 61 | 62 | 63 | # The various 'interactions' we want in our candlestick graph. This is an argument to be passed in figure () from bokeh.plotting 64 | 65 | TOOLS = "pan,wheel_zoom,box_zoom,reset,save" 66 | 67 | # Pan: It helps you pan/move the plot 68 | 69 | # Wheel Zoom: You can zoom in using the wheel of your mouse 70 | 71 | # Box Zoom: You can zoom in by creating a box on the specific area of the plot. Use the mouse, click and drag to create the box 72 | 73 | # Reset: If you want to reset the visualisation of the plot 74 | 75 | # Save: Saving the plot (entire or the part which you want) as an image file 76 | 77 | 78 | # In[ ]: 79 | 80 | 81 | # Passing the arguments of our bokeh plot 82 | 83 | p = figure(x_axis_type="datetime", tools= TOOLS, plot_width=1000, title="SPY Candlestick") 84 | 85 | 86 | # In[ ]: 87 | 88 | 89 | from math import pi 90 | 91 | # The orientation of major tick labels can be controlled with the major_label_orientation property. 92 | # This property accepts the values "horizontal" or "vertical" or a floating point number that gives 93 | # the angle (in radians) to rotate from the horizontal. 94 | 95 | p.xaxis.major_label_orientation = pi/4 96 | 97 | 98 | # In[ ]: 99 | 100 | 101 | # Alpha signifies the floating point between 0 (transparent) and 1 (opaque). 102 | # The line specifies the alpha for the grid lines in the plot. 103 | 104 | p.grid.grid_line_alpha = 0.3 105 | 106 | 107 | # In[ ]: 108 | 109 | 110 | # Configure and add segment glyphs to the figure 111 | 112 | p.segment(df.index,df.high,df.index,df.low,color="red") 113 | 114 | 115 | # In[ ]: 116 | 117 | 118 | # Adds vbar glyphs to the Figure 119 | 120 | p.vbar(df.index[inc],w,df.open[inc],df.close[inc], fill_color="#1ED837",line_color="black") 121 | p.vbar(df.index[dec],w,df.open[dec],df.close[dec], fill_color="#F2583E",line_color="black") 122 | 123 | 124 | # In[ ]: 125 | 126 | 127 | # Generates simple standalone HTML documents for Bokeh visualization 128 | 129 | output_file("candlestick.html", title="candlestick.py example") 130 | 131 | 132 | # In[ ]: 133 | 134 | 135 | # The graph will open in another tab of the browser 136 | 137 | show(p) 138 | 139 | # The code ends here 140 | 141 | 142 | # ### In the upcoming iPython notebook: 143 | # 144 | # We will learn about Functions in Python 145 | # 146 | # Happy Learning! 147 | -------------------------------------------------------------------------------- /Section-4/Lists.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # # Lists 11 | # 12 | # Lists in Python, are used to store heterogeneous types of data. Lists are mutable i.e. one can change the content within a list, without changing its identity. 13 | # 14 | # ## Creating Lists 15 | # List are enclosed by square brackets and elements should be separated by comma. 16 | 17 | # In[59]: 18 | 19 | 20 | new_list = [ ] # Empty List 21 | type (new_list) 22 | 23 | 24 | # In[60]: 25 | 26 | 27 | new_list = [10, 20, 30, 40] # A list of integers 28 | type (new_list) 29 | 30 | 31 | # In[61]: 32 | 33 | 34 | new_list = [10, 20.2, "thirty", 40] # A list of mixed data types 35 | type (new_list) 36 | 37 | 38 | # In[62]: 39 | 40 | 41 | new_list = [[10,20,30], [10.1, 20.2, 30.3],["ten", "twenty", "thirty"]] # A nested list 42 | type (new_list) 43 | 44 | 45 | # In[63]: 46 | 47 | 48 | new_list = [10,[20.2,["thirty",[40]]]] # A deeply nested list 49 | type (new_list) 50 | 51 | 52 | # ## Different Methods for List Manipulation 53 | # Let us have a look at few of the methods, with which we can manipulate lists.
54 | #
55 | # Please Note: A function or a method is a block of code which is used to perform a single task or a set of tasks repeatedly. 56 | 57 | # In[64]: 58 | 59 | 60 | my_list = [10,20,30,40] # This is the 'original' list which you have cerated 61 | 62 | print (my_list) 63 | 64 | 65 | # list.append (x)
66 | # Add an item to the end of the list. 67 | 68 | # In[65]: 69 | 70 | 71 | my_list.append (50) 72 | 73 | print (my_list) 74 | 75 | 76 | # list.extend (x)
77 | # Extend the list by appending all the items at the end of the list. 78 | 79 | # In[66]: 80 | 81 | 82 | my_list.extend ([60,70,80,90]) 83 | 84 | print (my_list) 85 | 86 | 87 | # list.insert (i,x)
88 | # Insert an item at any given position within the list. The first argument 'i', is the index of the item before which you want to insert something. To insert something at the beginning of the list, you may type list.insert (0,x) 89 | 90 | # In[67]: 91 | 92 | 93 | my_list.insert (0,0) # Inserting an item in the beginning 94 | 95 | print (my_list) 96 | 97 | 98 | # In[68]: 99 | 100 | 101 | my_list.insert (10,100) # Inserting an item at the end or at the integer location of 10 in this case 102 | 103 | print (my_list) 104 | 105 | 106 | # In[69]: 107 | 108 | 109 | my_list.insert (6,55) # Inserting an item at the 6th position in a list 110 | 111 | print (my_list) 112 | 113 | 114 | # list.remove (x)
115 | # Remove the first item from the list whose value is 'x'. It is an error if there is no such item. 116 | 117 | # In[70]: 118 | 119 | 120 | my_list.remove(0) 121 | 122 | print (my_list) 123 | 124 | 125 | # list.pop (i)
126 | # Remove any item from any given position (index) in the list. If no index is specified, it removes and returns the last element from the list. 127 | 128 | # In[71]: 129 | 130 | 131 | my_list.pop (5) # Removes and returns the '5th' element from the list 132 | 133 | 134 | # In[72]: 135 | 136 | 137 | print (my_list) 138 | 139 | 140 | # In[73]: 141 | 142 | 143 | my_list.pop () # Removes and returns the last element from the list 144 | 145 | 146 | # In[74]: 147 | 148 | 149 | print (my_list) 150 | 151 | 152 | # list.index (x)
153 | # It returns a zero-based index in the list of the first item whose value is x. Raises an error of there is no such item as 'x'. 154 | 155 | # In[75]: 156 | 157 | 158 | my_list.index (50) 159 | 160 | 161 | # In[76]: 162 | 163 | 164 | my_list.index(10) 165 | 166 | 167 | # In[77]: 168 | 169 | 170 | print (my_list) 171 | 172 | 173 | # list.count (x)
174 | # Returns the number of times 'x' appears in the list 175 | 176 | # In[78]: 177 | 178 | 179 | new_list = [10,10,10,20,30,40,50] # This is a new list 180 | 181 | new_list.count(10) 182 | 183 | 184 | # list.reverse ()
185 | # It reverses the items of the list. 186 | 187 | # In[79]: 188 | 189 | 190 | print (my_list) 191 | 192 | 193 | # In[80]: 194 | 195 | 196 | my_list.reverse () 197 | 198 | print (my_list) 199 | 200 | 201 | # list.sort ()
202 | # It sorts the items in the list. 203 | 204 | # In[81]: 205 | 206 | 207 | new_list = [12, 35, 76, 20, 56, 34, 65] 208 | print (new_list) 209 | 210 | 211 | # In[82]: 212 | 213 | 214 | new_list.sort() 215 | 216 | print (new_list) 217 | 218 | 219 | # ### In the upcoming iPython Notebook 220 | # 221 | # We will see, how Lists are used as: 222 | # 223 | # 1. 'Stacks' 224 | # 2. 'Queues' 225 | # 3. 'Graphs' 226 | # 4. 'Trees' 227 | # 228 | # So, Stay Tuned! 229 | -------------------------------------------------------------------------------- /Section-5/2 D plotting.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Notebook Instructions 5 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter. While a cell is running, a [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook [8]. 6 | # 7 | # Enter edit mode by pressing `Enter` or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 8 | 9 | # ## Data Visualization 10 | # 11 | # This python notebook is for understanding the capabilities of 'matplotlib' library. Matplotlib is a reliable, robust and easy to use library for standard plots and is flexible when it comes to complex plots and customizations. 12 | 13 | # In[74]: 14 | 15 | 16 | # Loading and viewing the dataframe 17 | 18 | import pandas as pd 19 | 20 | infy = pd.read_csv ('infy_dv.csv') 21 | 22 | # infy = pd.read_csv ('C:/Users/academy/Desktop/infy_dv.csv') 23 | 24 | infy.head () 25 | 26 | 27 | # In[75]: 28 | 29 | 30 | # Preparing Data to visualise 31 | 32 | infy_close = infy [['Date','Close Price']] # The columns which we require 33 | 34 | infy_close.set_index('Date', inplace=True) # Setting index as date 35 | 36 | # More on this in the upcoming section on 'Pandas' 37 | 38 | infy_close 39 | 40 | 41 | # ### Importing libraries 42 | # 43 | # To begin with, we will import the required libraries. The main plotting functions are found in the sublibrary matplotlib.pyplot. 44 | 45 | # In[76]: 46 | 47 | 48 | import matplotlib.pyplot as plt 49 | get_ipython().magic(u'matplotlib inline') 50 | 51 | plt.plot(infy_close) 52 | plt.show () 53 | 54 | 55 | # ### A better plot representation 56 | # 57 | # There are always different requirements and plotting style for presenting graphs/reports. Let us try out a few functions and customize it. 58 | 59 | # In[77]: 60 | 61 | 62 | import matplotlib.pyplot as plt 63 | get_ipython().magic(u'matplotlib inline') 64 | 65 | # This customizes the size of the plot as per the inputs. Here 14,5 represents the breadth and length of the plot. 66 | plt.figure(figsize = (14,5)) 67 | 68 | # This helps in plotting the blue color of the ‘infy_close’ series line graph. 69 | plt.plot(infy_close, 'b') 70 | # plt.plot (infy_close, 'g') # to plot green color 71 | 72 | # This helps in plotting the discrete red data points of the closing prices of ‘infy_close’ series. 73 | plt.plot(infy_close,'ro') 74 | # Here ‘r’ stands for ‘red’ and ‘o’ stands for circles while plotting our discrete data points. 75 | # That is why the points are colored red and default line color is blue. 76 | 77 | # This gives a grid layout to the plot. 78 | plt.grid(True) 79 | 80 | # This gives the title to the plot. 81 | plt.title ('Infosys Close Price Representation') 82 | 83 | # This labels the x axis 84 | plt.xlabel ('Trading Days') 85 | 86 | # This labels the y axis 87 | plt.ylabel ('Infosys Close Price') 88 | 89 | 90 | # To plot and visualise the data 91 | plt.show () 92 | 93 | 94 | # ### Plot with labelled datasets 95 | # 96 | # Something that is different in this cell is the fact that we are plotting two datasets or columns in this case. 97 | 98 | # In[78]: 99 | 100 | 101 | # Preparing data 102 | 103 | import pandas as pd 104 | 105 | infy2 = pd.read_csv ('infy_dv.csv') 106 | 107 | #infy2 = pd.read_csv ('C:/Users/academy/Desktop/infy_dv.csv') 108 | 109 | infy2 = infy2 [['Date','Close Price', 'Open Price']] # Choosing more columns 110 | 111 | infy2.set_index('Date', inplace=True) # Setting 'Date' column as an index 112 | 113 | infy2 114 | 115 | 116 | # To read the plot better, we use the plt.legend() function. plt.legend() accepts different locality parameters where 0 stands for the best location of the legend, in the sense that little data is hidden by the legend. 117 | 118 | # In[79]: 119 | 120 | 121 | # PLotting data 122 | 123 | plt.figure(figsize=(20,7)) 124 | 125 | plt.plot(infy2["Close Price"], lw=1.5, label = 'Close Price') 126 | plt.plot(infy2["Open Price"], lw=1.5, label = 'Open Price') 127 | 128 | plt.plot(infy2,'ro') 129 | 130 | plt.grid(True) 131 | 132 | plt.legend(loc=0) 133 | 134 | #This helps us tighten the figure margins 135 | plt.axis ('tight') 136 | 137 | plt.xlabel('Time') 138 | plt.ylabel('Index') 139 | plt.title ('Representative plot with two datasets') 140 | 141 | plt.show() 142 | 143 | 144 | # ### Scatter Plots 145 | # 146 | # (Optional Read) 147 | # 148 | # In a scatter plot, the values of one data serve as the x values for the other data set. Such plots are usually used while plotting financial time series. Matplotlib provides a specific function to generate scatter plots known as the plt.scatter() function. 149 | 150 | # In[80]: 151 | 152 | 153 | import numpy as np 154 | 155 | y = np.random.standard_normal((100,2)) # Random data created 156 | 157 | plt.figure (figsize = (7,5)) 158 | 159 | # The function 'scatter' is called to our 'plt' object 160 | plt.scatter(y[:,0], y[:,1], marker='o') 161 | 162 | plt.grid(True) 163 | plt.xlabel ('1st dataset') 164 | plt.ylabel ('2nd dataset') 165 | plt.title('Scatter Plot') 166 | plt.show() 167 | 168 | 169 | # ### Plotting a histogram 170 | # 171 | # (Optional Read) 172 | # 173 | # Another type of plot apart from line graphs are histograms. They are often used in the context of financial returns. The code puts the frequency value of two datasets next to each other in the same plot. We use the plt.hist() function to plot the diagram. 174 | # 175 | 176 | # In[81]: 177 | 178 | 179 | # Random data created 180 | 181 | np.random.seed(100) 182 | y = np.random.standard_normal((25,2)).cumsum(axis=0) 183 | 184 | plt.figure(figsize=(10,5)) 185 | 186 | # The function 'hist' is called to our 'plt' object 187 | plt.hist(y, label = ['1st','2nd'], bins=25) 188 | 189 | plt.grid(True) 190 | plt.legend(loc=0) 191 | plt.xlabel('Index Returns') 192 | plt.ylabel ('Stock Returns') 193 | plt.title ('Histogram') 194 | plt.show() 195 | 196 | 197 | # ### In the upcoming iPython notebook: 198 | # 199 | # We will learn about 3-D plotting in Python. 3-D plotting is an optional read. 200 | # 201 | # Happy Learning! 202 | -------------------------------------------------------------------------------- /Section-4/Stacks, Queues, Graphs, Trees.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # ## Lists as Stacks 11 | # 12 | # A stack is a collection of objects which work as per the LIFO - Last in First Out principle. Consider a simple example: You are throwing a dinner party at your place. You will place a stack of plates at the buffet table. Whenever you are adding new plates to the stack, you will place the plates at the top. Similarly, whenever a guest takes a plate, he/she will take it from the top of the stack. This is the Last in, First Out principle. 13 | # 14 | # In the world of programming it, learning LIFO for data is very handy. We will do the same in the upcoming code. When you add items to the stack, it is known as push operation. You will do this using the append ( ) method. When you remove items from a stack, it is known as pop operation. You actually have a pop ( ) method in python. 15 | # 16 | # This is not something new which you have learnt. But understanding how data works in a stack (LIFO principle) is important, since this concept is used for evaluating expressions and syntax parsing, scheduling algorithms, routines, writing effective functions and so on. 17 | 18 | # In[54]: 19 | 20 | 21 | # Bottom --> 10, 20, 30, 40, 50 --> Top 22 | 23 | my_stack = [10, 20, 30, 40, 50] # List 24 | 25 | my_stack.append (60) # The PUSH OPERATION 26 | 27 | print (my_stack) 28 | 29 | 30 | # In[55]: 31 | 32 | 33 | # New Stack: Bottom ---> 10, 20, 30, 40, 50, 60 ---> Top 34 | 35 | my_stack.pop () # The POP OPERATION 36 | 37 | my_stack.pop () # The same operation 'twice' 38 | 39 | print (my_stack) # From the 'top', 50 and 60 will be removed. 40 | 41 | 42 | # ## Lists as Queues 43 | # 44 | # A queue is a collection of objects which works as per the FIFO - First in First Out principle. Consider a simple example: You are at the concert to listen to your favourite artist. The tickets for this concert are in great demand. Hence, all the fans form a queue outside the ticket collection centre. The fan to arrive first, will be the first one to get the ticket while the one to arrive last may or may not get the ticket. This is the First in, First Out principle. 45 | 46 | # In[56]: 47 | 48 | 49 | # 'collections' is a package which contains high performance container datatyes 50 | 51 | # 'deque' us a list-like container with fast appends and pops on either ends 52 | 53 | from collections import deque 54 | 55 | # This is your queue. "Roger Federer" is the first to arrive while "Novak Djokovic is the last. 56 | 57 | my_queue = deque(["Roger Federer", "Rafael Nadal", "Novak Djokovic"]) 58 | 59 | my_queue.append ("Andre Agassi") # Now Andre Agassi arrives 60 | 61 | my_queue.append ("Pete Sampras") # Now Pete Sampras arrives 62 | 63 | print (my_queue) # You may have a look at the queue below 64 | 65 | 66 | # In[57]: 67 | 68 | 69 | my_queue.popleft() # The first to arrives leaves first 70 | 71 | 72 | # In[58]: 73 | 74 | 75 | my_queue.popleft() # The second to arrive leaves now 76 | 77 | 78 | # In[59]: 79 | 80 | 81 | print (my_queue) # This is your present queue in the order of arrival 82 | 83 | 84 | # Using deque from the collection module is one way of doing it. 85 | # 86 | # Another way of doing this is using the insert() and remove() functions. However, lists as queues are not that efficient. Adding and removing from the beginning of the list is slow since all the elements have to be shifted by one. 87 | 88 | # ## Graphs 89 | # 90 | # (Optional Read) 91 | # 92 | # A graph in computer science is a network consisting of different nodes or vertices. These nodes may or may not be connected to each other. The line that joins the nodes is called an edge. If the edge has a particular direction it is a directed graph. If not, it is an undirected graph. 93 | 94 | # This is an example of an Undirected Graph, where A, B, C, D and E are the various nodes. The following list shows that these five nodes are connected to which other nodes. For diagram, you may refer to the graph, taught in the video lecture. 95 | # 96 | # A <---> B,C
97 | # B <---> A,C,D
98 | # C <---> A,B,D,E
99 | # D <---> B,C,E
100 | # E <---> D,C
101 | 102 | # In[61]: 103 | 104 | 105 | # Please Note: At present we are using dictionaries, functions and loops which have not been taught. 106 | 107 | # We will take up all of these concepts in the upcoming units or sections of this course. 108 | 109 | 110 | # The following code is just to display all the different edges of the graph, as shown in the video lecture. 111 | 112 | my_graph = {'A' : ['B', 'C'], 'B': ['A','C','D'], 'C' : ['A','B','D','E'], 'D': ['B','C','E'], 'E': ['D','C']} 113 | 114 | 115 | # In[62]: 116 | 117 | 118 | def define_edges(my_graph): 119 | edges = [] 120 | for nodes in my_graph: 121 | for adjacent_nodes in my_graph [nodes]: 122 | edges.append((nodes, adjacent_nodes)) 123 | return edges 124 | 125 | print(define_edges(my_graph)) 126 | 127 | 128 | # ## Trees 129 | # 130 | # (Optional Read) 131 | # 132 | # A 'tree' in real world has roots below the ground, a trunk, and the branches that are spread all across the trunk in an organised way. These branches have leaves on them. 133 | # 134 | # In the programming world, a tree is upside down of what you see in the real world. At the top is the root node. The other node that follow the root node are called branch nodes. The final nodes of these branches are called leaf nodes. 135 | 136 | # In[63]: 137 | 138 | 139 | # In the code below, we have shown how to 'travel' through a tree. The tree is same as that shown in the video lecture. 140 | 141 | # We have done this wih the help of 'classes'. We have no covered classes. 142 | 143 | 144 | # In[64]: 145 | 146 | 147 | class Tree: 148 | def __init__(self, info, left=None, right=None): 149 | self.info = info 150 | self.left = left 151 | self.right = right 152 | 153 | def __str__(self): 154 | return (str(self.info) + ', Left node: ' + str(self.left) + ', Right node: ' + str(self.right)) 155 | 156 | tree = Tree("Root Node", Tree("Branch_1", "Leave_1", "Leave_2"), Tree("Branch_2", "Leave_3", "Leave_4")) 157 | print(tree) 158 | 159 | 160 | # ### In the upcoming iPython Notebook 161 | # 162 | # We will see a new data structure called 'Dictionary'. 163 | # 164 | # #### So, Stay Tuned! 165 | -------------------------------------------------------------------------------- /Section-8/Pandas - Series .py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # # Notebook Contents 11 | # 12 | # ##### 1. Why are we studying series? 13 | # ##### 2. Series datastructure 14 | # ##### 3. Methods or Functions 15 | # ##### 4. pandas.Series.apply() 16 | 17 | # # Why are we studying Series? 18 | 19 | # In python, understanding Series is a natural predecessor to understanding dataframes.
20 | #
21 | # Series are indexed data frame with only one data column. It is easier to understand them first before moving to study complex data frames. 22 | # 23 | 24 | # # Series 25 | # 26 | # A series is a one-dimensional labelled 'array-like' object. The labels are nothing but the index of the data.
27 | # Or
28 | # A series is a special case of a two-dimensional array, which has only 2 columns- one column is for the index and the other column is for data. 29 | 30 | # In[1]: 31 | 32 | 33 | import pandas as pd 34 | 35 | My_Series_int = pd.Series([10, 20, 30, 40, 50, 60]) # Series created using a list 36 | 37 | print (My_Series_int) 38 | 39 | 40 | # The constructor for Series data structure is pandas.Series (data=None, index=None, dtype=None, name=None). If you are using 'pd' as alias, then it would be pd.Series() 41 | 42 | # In[2]: 43 | 44 | 45 | import pandas 46 | 47 | My_Series_flt = pandas.Series ([10.1, 20.2, 30.4, 40.4, 50.5, 60.6]) # Series created using a list 48 | 49 | print (My_Series_flt) 50 | 51 | 52 | # You can see that it returns an indexed column and the data type of that column which is 'int' in this case. 53 | 54 | # A Series is capable of holding any data type. For e.g. integers, float, strings and so on. A series can contain multiple data types too. 55 | 56 | # In[3]: 57 | 58 | 59 | My_Series_mixed = pd.Series ([10.1, 20, 'jay' , 40.4]) # Series created using a list 60 | 61 | print (My_Series_mixed) 62 | 63 | 64 | # The above series returns an 'object' datatype since a Python object is created at this instance. 65 | 66 | # Let us have a look at few other ways of creating series objects. 67 | 68 | # In[4]: 69 | 70 | 71 | # Defining series objects with individual indices 72 | 73 | countries = ['India', 'USA', 'Japan', 'Russia', 'China'] 74 | leaders = ['Narendra Modi', 'Donald Trump', 'Shinzo Abe', 'Vladimir Putin', 'Xi Jinpin'] 75 | 76 | S = pd.Series (leaders, index=countries) # Index is explicitly defined here 77 | S 78 | 79 | 80 | # In[5]: 81 | 82 | 83 | # Have a look at the series S1 84 | 85 | stocks_set1 = ['Alphabet', 'IBM', 'Tesla', 'Infosys'] 86 | 87 | # Here, we are inserting data as a list in Series constructor, but the argument of its index is passed as a pre-defined list 88 | S1 = pd.Series([100, 250, 300, 500], index = stocks_set1) 89 | 90 | print (S1) 91 | print ("\n") 92 | 93 | # Now, have a look at the series S2 94 | 95 | stocks_set2 = ['Alphabet', 'IBM', 'Tesla', 'Infosys'] 96 | 97 | # Here, we are inserting data as a list in Series constructor, but the argument of its index is passed as a pre-defined list 98 | 99 | S2 = pd.Series([500, 400, 110, 700], index = stocks_set2) 100 | 101 | print (S2) 102 | print ("\n") 103 | 104 | # We will add Series S1 and S2 105 | 106 | print (S1 + S2) 107 | 108 | 109 | # In[6]: 110 | 111 | 112 | # Adding lists that have different indexes will create 'NaN' values 113 | 114 | stocks_set1 = ['Alphabet', 'IBM', 'Tesla', 'Infosys'] 115 | stocks_set2 = ['Alphabet', 'Facebook', 'Tesla', 'Infosys'] 116 | 117 | S3 = pd.Series([100, 250, 300, 500], index = stocks_set1) 118 | S4 = pd.Series([500, 700, 110, 700], index = stocks_set2) 119 | 120 | 121 | print (S3) 122 | print("\n") 123 | 124 | print (S4) 125 | print("\n") 126 | 127 | print(S3+S4) 128 | 129 | 130 | # 'NaN' is short for 'Not a Number'. It fills the space for missing or corrupt data.
131 | # It is important to understand how to deal with NaN values, because when you import actual time series data, you are bound to find some missing or corrupted data. 132 | 133 | # ## Methods or Functions 134 | # 135 | # We will have a look at few important methods or functions that can be applied on Series. 136 | 137 | # ##### Series.index 138 | # It is useful to know the range of the index when the series is large. 139 | 140 | # In[7]: 141 | 142 | 143 | My_Series = pd.Series ([10,20,30,40,50]) # Give a better example pls, maybe import data and show range for it? 144 | 145 | print (My_Series.index) 146 | 147 | 148 | # ##### Series.values 149 | # It returns the values of the series. 150 | 151 | # In[8]: 152 | 153 | 154 | My_Series = pd.Series ([10,20,30,40,50]) 155 | 156 | print (My_Series.values) 157 | 158 | 159 | # ##### Series.isnull() 160 | # We can check for missing values with this method. 161 | 162 | # In[9]: 163 | 164 | 165 | # Remember the (S3 + S4) series? You may have a look at it 166 | 167 | print (S3 + S4) 168 | 169 | 170 | # In[10]: 171 | 172 | 173 | # Returns whether the values are null or not. If it is 'True' then the value for that index is a 'NaN value 174 | 175 | (S3 + S4).isnull() 176 | 177 | 178 | # ##### Series.dropna() 179 | # One way to deal with the 'NaN' values is to drop them completely from the Series. This method filters out missing data. 180 | 181 | # In[11]: 182 | 183 | 184 | print ((S3 + S4).dropna()) 185 | 186 | 187 | # In the above output, we have produced the (S3 + S4) addition of the values and along with the series elements, and we have even dropped the 'NaN' values. 188 | 189 | # ##### Series.fillna(1) 190 | # Another way to deal with the 'NaN' values is to fill a custom value of your choice. Here, we are filling the 'NaN' values with the value '1'. 191 | 192 | # In[12]: 193 | 194 | 195 | print ((S3 + S4).fillna(1)) # The output is self-explanatory in this case 196 | 197 | 198 | # ## pandas.Series.apply() 199 | # 200 | # If at all one wants to 'apply' any functions on a particular series, for eg. one wants to 'sine' of each value in the series, then it is possible in pandas. 201 | #
202 | # Series.apply (func) 203 | #
204 | # func = A python function that will be applied to every single value of the series. 205 | 206 | # In[13]: 207 | 208 | 209 | import numpy as np #Create a new series as My_Series 210 | 211 | My_Series = pd.Series([10, 20, 30, 40, 50, 60]) 212 | 213 | print (My_Series) 214 | 215 | 216 | # In[14]: 217 | 218 | 219 | My_Series.apply(np.sin) # Find 'sine' of each value in the series 220 | 221 | 222 | # In[15]: 223 | 224 | 225 | My_Series.apply(np.tan) # Finding 'tan' of each value in the series 226 | 227 | -------------------------------------------------------------------------------- /Section-7/Vectorization and Broadcasting in Arrays.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # ## Vectorization 11 | # 12 | # Vectorization of code helps us write complex codes in a compact way and execute them faster. 13 | # 14 | # It allows to **operate** or apply a function on a complex object, like an array, "at once" rather than iterating over the individual elements. Numpy supports vectorization in an efficient way. 15 | 16 | # # Notebook Contents 17 | # 18 | # ##### 1) 1D or 2D Array operations with a scalar 19 | # ##### 2) 2D Array operations with another 2D array 20 | # ##### 3) 2D Array operations with a 1D array or vector 21 | # ##### 4) Other operators: Compare & Logical 22 | # ##### 5) Just for fun 23 | 24 | # ### Array operations with a scalar 25 | # 26 | # Every element of the array is added/multiplied/operated with the given scalar. We will discuss: 27 | # - Addition 28 | # - Subtraction 29 | # - Multiplication 30 | 31 | # In[49]: 32 | 33 | 34 | import numpy as np #Start the notebook with importing the packing 35 | 36 | my_list = [1, 2, 3, 4, 5.5, 6.6, 7.123, 8.456] 37 | 38 | V = np.array(my_list) # Creating a 1D array or vector 39 | 40 | print (V) 41 | 42 | 43 | # #### Vectorization Using Scalars - Addition 44 | 45 | # In[50]: 46 | 47 | 48 | V_a = V + 2 #Every element is increased by 2. 49 | 50 | print(V_a) 51 | 52 | 53 | # #### Vectorization Using Scalars - Subtraction 54 | 55 | # In[51]: 56 | 57 | 58 | V_s = V - 2.4 #Every element is reduced by 2.4. 59 | 60 | print(V_s) 61 | 62 | 63 | # #### Vectorization Using Scalars - Multiplication 64 | 65 | # In[52]: 66 | 67 | 68 | V2 = np.array([ [1, 2, 3], [4,5,6], [7, 8, 9] ]) #Array of shape 3,3 69 | 70 | V_m = V2 * 10 #Every element is multiplied by 10. 71 | 72 | print(V2) 73 | print(V_m) 74 | 75 | 76 | # #### Try on your own 77 | 78 | # In[53]: 79 | 80 | 81 | V_e = V2 ** 2 #See the output and suggest what this operation is? 82 | 83 | print(V_e) 84 | 85 | 86 | # ### 2D Array operations with another 2D array 87 | # 88 | # This is only possible when the shape of the two arrays is the same. For example, a (2,2) array can be operated with another (2,2) array. 89 | # 90 | 91 | # In[54]: 92 | 93 | 94 | A = np.array([ [1, 2, 3], [11, 22, 33], [111, 222, 333] ]) #Array of shape 3,3 95 | B = np.ones ((3,3)) #Array of shape 3,3 96 | C= np.ones ((4,4)) #Array of shape 4,4 97 | print (A) 98 | print (B) 99 | print (C) 100 | 101 | 102 | # In[55]: 103 | 104 | 105 | # Addition of 2 arrays of same dimensions (3, 3) 106 | 107 | print("Adding the arrays is element wise: ") 108 | 109 | print(A + B) 110 | 111 | 112 | # In[56]: 113 | 114 | 115 | # Addition of 2 arrays of different shapes or dimensions is NOT allowed 116 | 117 | print("Addition of 2 arrays of different shapes or dimensions will throw a ValueError.") 118 | 119 | print(A + C) 120 | 121 | 122 | # In[57]: 123 | 124 | 125 | # Subtraction of 2 arrays 126 | 127 | print("Subtracting array B from A is element wise: ") 128 | 129 | print(A - B) 130 | 131 | 132 | # In[58]: 133 | 134 | 135 | # Multiplication of 2 arrays 136 | 137 | A1 = np.array([ [1, 2, 3], [4, 5, 6] ]) # Array of shape 2,3 138 | A2 = np.array([ [1, 0, -1], [0, 1, -1] ]) # Array of shape 2,3 139 | 140 | print("Array 1", A1) 141 | print("Array 2", A2) 142 | print("Multiplying two arrays: ", A1 * A2) 143 | print("As you can see above, the multiplication happens element by element.") 144 | 145 | 146 | # You can further try out various combinations yourself, in combining scalars and arithmetic operations to get a hand on vectorization. 147 | 148 | # ### Broadcasting allows 2D Array operations with a 1D array or vector 149 | # 150 | # Numpy also supports broadcasting. Broadcasting allows us to combine objects of different shapes within a single operation. 151 | # 152 | # But, do remember that to perform this operation one of the matix needs to be a vector with its length equal to one of the dimensions of the other matrix. 153 | 154 | # #### Try changing the shape of B and observe the results 155 | 156 | # In[59]: 157 | 158 | 159 | import numpy as np 160 | 161 | A = np.array([ [1, 2, 3], [11, 22, 33], [111, 222, 333] ]) 162 | B = np.array ([1,2,3]) 163 | 164 | print (A) 165 | print (B) 166 | 167 | 168 | # In[60]: 169 | 170 | 171 | print( "Multiplication with broadcasting: " ) 172 | 173 | print (A * B) 174 | 175 | 176 | # In[61]: 177 | 178 | 179 | print( "... and now addition with broadcasting: " ) 180 | 181 | print (A + B) 182 | 183 | 184 | # In[62]: 185 | 186 | 187 | # Try to understand the difference between the two 'B' arrays 188 | 189 | B = np.array ([[1, 2, 3] * 3]) 190 | 191 | print (B) 192 | 193 | 194 | # In[63]: 195 | 196 | 197 | B = np.array([[1, 2, 3],] * 3) 198 | 199 | print(B) 200 | 201 | # Hint: look at the brackets 202 | 203 | 204 | # In[64]: 205 | 206 | 207 | # Another example type 208 | 209 | B = np.array([1, 2, 3]) 210 | B[:, np.newaxis] 211 | 212 | # We have changed a row vector into a column vector 213 | 214 | 215 | # In[65]: 216 | 217 | 218 | # Broadcasting in a different way (by changing the vector shape) 219 | 220 | A * B [:, np.newaxis] 221 | 222 | 223 | # In[66]: 224 | 225 | 226 | # This example should be self explanatory by now 227 | 228 | A = np.array([10, 20, 30]) 229 | B = np.array([1, 2, 3]) 230 | A[:, np.newaxis] 231 | 232 | 233 | # In[67]: 234 | 235 | 236 | A[:, np.newaxis] * B 237 | 238 | 239 | # ### Other operations 240 | # 241 | # - Comparison operators: Comparing arrays and the elements of two similar shaped arrays 242 | # - Logical operators: AND/OR operants 243 | 244 | # In[68]: 245 | 246 | 247 | import numpy as np 248 | 249 | A = np.array([ [11, 12, 13], [21, 22, 23], [31, 32, 33] ]) 250 | B = np.array([ [11, 102, 13], [201, 22, 203], [31, 32, 303] ]) 251 | 252 | print (A) 253 | print (B) 254 | 255 | 256 | # In[69]: 257 | 258 | 259 | # It will compare all the elements of the array with each other 260 | 261 | A == B 262 | 263 | 264 | # In[70]: 265 | 266 | 267 | # Will return 'True' only if each and every element is same in both the arrays 268 | 269 | print(np.array_equal(A, B)) 270 | 271 | print(np.array_equal(A, A)) 272 | 273 | 274 | # ### Logical Operators 275 | 276 | # In[71]: 277 | 278 | 279 | # This should be self explanatory by now 280 | 281 | a = np.array([ [True, True], [False, False]]) 282 | b = np.array([ [True, False], [True, False]]) 283 | 284 | print(np.logical_or(a, b)) 285 | 286 | 287 | # In[72]: 288 | 289 | 290 | print(np.logical_and(a, b)) 291 | 292 | 293 | # This is where we will end our iPython notebooks on Numpy. 294 | # 295 | # ### Happy Learning! 296 | -------------------------------------------------------------------------------- /Section-2/My First Python code.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # ## Let us begin 11 | # 12 | # Now that you have learned how to setup Anaconda, it is time to understand how to code programs in Python. Python uses a simple syntax which makes it very easy for someone learning to program for the first time. This notebook is comprehensively designed to help you get familiarized with programming and learn basics of Python. 13 | 14 | # ## What is programming? 15 | # 16 | # Programming is the way of telling a machine what to do. This machine might be your computer, smartphone, or tablet. The task might be something as simple as noting down today’s date or capturing information about the Earth’s atmosphere on a satellite. Programming has a lot of alias names and they’re used interchangeably. It goes by programming, developing, or coding all of which involves creating software that gets a machine to do what you want it to do. 17 | 18 | # ### Hello World Program 19 | # 20 | # How would you make Python print "Hello World" for you? Well, it's never been this easy, just use the print command. 21 | 22 | # In[1]: 23 | 24 | 25 | print ("Hello World!") 26 | 27 | 28 | # In[2]: 29 | 30 | 31 | # You may try other variations 32 | print ("I am new to programming!") 33 | print ("Python is cool!") 34 | 35 | 36 | # ## Introduction to Python programming 37 | # Python design places more weight on coding productivity and code readability. Python 38 | # makes use of simple syntax which looks like written English. It talks with words and 39 | # sentences, rather than characters. Python is a portable language. Python can be installed 40 | # and run on any computer. 41 | # 42 | # Python coding is a lot fun and is easy. Take this python code for an example: 43 | # 44 | 45 | # In[3]: 46 | 47 | 48 | x = 2 49 | y = 3 50 | sum = x + y 51 | print (sum) 52 | 53 | 54 | # Even without any coding background, you can easily make out that the code adds up two numbers and prints it. You may modify the code above and try different mathematical operations on different variables. 55 | 56 | # ## Variables, Data Types and Objects 57 | # 58 | # We have studied how to use a variable in python in the previous video unit. 59 | 60 | # In[ ]: 61 | 62 | 63 | x = 100 64 | 65 | 66 | # One thing to keep in mind, the equal '=' sign used while assigning a value to a variable. It should not be read as 'equal to'. It should be read or interpreted as "is set to". 67 | # 68 | # In the previous example, we will read that the value of variable 'x' is set to '100'. 69 | 70 | # In[ ]: 71 | 72 | 73 | y = 50 # Initialising a new variable 'y' whose value is set to 50 74 | 75 | 76 | # ### ID of an object 77 | # The keyword id () specifies the object's address in memory. Look at the code below for seeing the addresses of different objects. 78 | 79 | # In[ ]: 80 | 81 | 82 | id (x) 83 | 84 | 85 | # You may change the variable name inside the function id() to print the id's of other variables. 86 | 87 | # In[ ]: 88 | 89 | 90 | id (y) 91 | 92 | 93 | # Note : The IDs of 'x' and 'y' are different. 94 | 95 | # ### Data Type of an Object 96 | # 97 | # The type of an object cannot change. It specifies two things, the operations that are allowed and the set of values that the object can hold. The keyword type() is used to check the type of an object. 98 | 99 | # In[ ]: 100 | 101 | 102 | type (x) 103 | 104 | 105 | # In[ ]: 106 | 107 | 108 | type (y) 109 | 110 | 111 | # Now, let us try something more. 112 | 113 | # In[ ]: 114 | 115 | 116 | x = x + 1.11 117 | print (x) # This will print the new value of 'x' variable 118 | type(x) # This will print the most updated data type of 'x' 119 | 120 | 121 | # Now you may check the ID of the new 'x' object which is now a float and not a integer. 122 | 123 | # In[ ]: 124 | 125 | 126 | id (x) 127 | 128 | 129 | # Note this is different form the 'int x' ID. 130 | # 131 | # Python automatically takes care of the physical representation of different data types i.e. an integer value will be stored in a different memory location than a float or string. 132 | 133 | # In[ ]: 134 | 135 | 136 | # let us now convert variable 'x' to a string data type and observe the changes 137 | 138 | x = "hundred" 139 | print (x) 140 | type (x) 141 | 142 | 143 | # In[ ]: 144 | 145 | 146 | id (x) 147 | 148 | 149 | # ### Object References 150 | # 151 | # Let us observe the following code. 152 | 153 | # In[ ]: 154 | 155 | 156 | a = 123 157 | b = a 158 | 159 | 160 | # Where will the object point? Will it be to the same object ID? 161 | 162 | # In[ ]: 163 | 164 | 165 | id (a) 166 | 167 | 168 | # In[ ]: 169 | 170 | 171 | id (b) 172 | 173 | 174 | # Yes, Since same value is stored in both the variables 'a' and 'b', they will point to the same memory location or in other words, they will have the same object ID. 175 | 176 | # ## Multi-Line Statements 177 | # 178 | # There is no semicolon to indicate an end of statement and therefore Python interprets the end of line as the end of statement. 179 | # 180 | # For example, a code to calculate total marks. 181 | 182 | # In[ ]: 183 | 184 | 185 | biology_marks = 82 186 | physics_marks = 91 187 | maths_marks = 96 188 | chemistry_marks = 88 189 | total_marks = biology_marks + physics_marks + maths_marks + chemistry_marks 190 | print (total_marks) 191 | 192 | 193 | # However, if a line is too long, code can be made readable by adding a split, to a single line of code and convert them into multiple lines. In such scenarios, use backward slash as line continuation character to specify that the line should continue. 194 | # 195 | 196 | # In[ ]: 197 | 198 | 199 | total_marks = biology_marks + physics_marks + maths_marks + chemistry_marks 200 | print (total_marks) 201 | 202 | 203 | # ## Indentation 204 | # 205 | # Python forces you to follow proper indentation. The number of spaces in indentation can be different, but all lines of code within the same block should have the same number of spaces in the indentation. 206 | # 207 | # For example, the 3rd line of the code in the cell below shows incorrect indentation. Try running the code to see the error that it throws. 208 | 209 | # In[ ]: 210 | 211 | 212 | # Python Program to calculate the square of number 213 | num = 8 214 | num_sq = num ** 2 215 | print (num_sq) 216 | 217 | 218 | # In[ ]: 219 | 220 | 221 | # On removing the indent 222 | num = 8 223 | num_sq = num ** 2 224 | print (num_sq) 225 | 226 | 227 | # ## Further Resources 228 | # 229 | # As you begin your journey of learning Python programming, we would recommend you to extensively use freely available resources online to understand simple syntax and application of available Python libraries. You can use these following resources in addition to others available online: 230 | # 1. http://docs.python.org/reference/introduction.html 231 | # Reference manual 232 | # 2. http://wiki.python.org/moin/BeginnersGuide 233 | # A guide for writing and running Python programs 234 | -------------------------------------------------------------------------------- /Section-8/Pandas - Dataframe & Basic Functionality.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ### Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # # Notebook Contents 11 | # 12 | # ##### 1. Creating Data Frames 13 | # ##### 2. Customizing index of a Data Frame 14 | # ##### 3. Rearranging the order of columns in a Data Frame 15 | # ##### 4. Existing Column as the index of the Data Frame 16 | # ##### 5. Accessing column from a Data Frame 17 | # ##### 6. Loading and viewing Data in a Data Frame 18 | # ##### 7. Dropping Rows and Columns from a Data Frame 19 | # ##### 8. Renaming columns of a Data Frame 20 | # ##### 9. Sorting a Data Frame using a column 21 | # ##### 10. Just for Fun 22 | 23 | # ## Creating Data Frames 24 | # 25 | # The underlying idea of a Data Frame is based on 'spreadsheets'. In other words, data frames stores data in discrete Rows and Columns where each column can be named (something that is not possible in Arrays but is possible in Series). There are also multiple columns in a Data Frame (as opposed to Series, where there can be only one discrete indexed column).
26 | #
27 | # The constructor for a Data Frame is pandas.DataFrame(data=None, index=None) or if you are using 'pd' as alias, then it would be pd.Series()
28 | #
29 | # Let us have a look at the following example 30 | 31 | # In[18]: 32 | 33 | 34 | import pandas as pd 35 | import numpy as np 36 | 37 | # A DataFrame has a row and column index; it's like a dict of Series with a common index. 38 | 39 | my_portfolio = { 40 | "stock_name": ["Alphabet", "Facebook", "Apple", "Tesla", "Infosys"], 41 | "quantity_owned": [1564, 6546, 5464, 6513, 4155], 42 | "average_buy_price": ["$950", "$160", "$120", "$270", "$15"] 43 | } 44 | 45 | my_portfolio_frame = pd.DataFrame (my_portfolio) # We have passed the 'data' argument in the Data Frame constructor 46 | 47 | my_portfolio_frame 48 | 49 | 50 | # ## Customizing index of a Data Frame 51 | # 52 | # In the above output, you can see that the 'index' is the default one which starts from 0,1,...4. One can even customize the index for a better understanding of the Data Frame, while working with it. 53 | 54 | # In[19]: 55 | 56 | 57 | ordinals = ["first", "second", "third", "fourth", "fifth"] # list 58 | 59 | my_portfolio_frame = pd.DataFrame (my_portfolio, index=ordinals) #Please notice that we have not kept index as default i.e.'none' 60 | 61 | my_portfolio_frame 62 | 63 | 64 | # ## Rearranging the order of columns in a Data Frame 65 | # 66 | # We can also define or rearrange the order of columns. 67 | 68 | # In[20]: 69 | 70 | 71 | # please observe the 'columns' names parameter while constructing the Data Frame 72 | 73 | my_portfolio_frame = pd.DataFrame(my_portfolio, columns=["stock_name", "quantity_owned", "average_buy_price"], index=ordinals) 74 | 75 | my_portfolio_frame 76 | 77 | 78 | # ## Existing Column as the index of dataframe 79 | # 80 | # If we want to create a more useful index of our existing Data Frame, we can do that using the column 'stock name' as our index. It will make more sense than the 'ordinals' index. 81 | 82 | # In[21]: 83 | 84 | 85 | my_portfolio_frame = pd.DataFrame (my_portfolio, 86 | columns = ["quantity_owned","average_buy_price"], 87 | index = my_portfolio ["stock_name"]) 88 | 89 | my_portfolio_frame 90 | 91 | 92 | # ## Accessing column from a data frame 93 | # 94 | # It is even possible to just veiw one single or selective columns of the entire data frame. 95 | 96 | # In[22]: 97 | 98 | 99 | # The index at present is the 'stock_name'. Refer to above code. 100 | 101 | # This makes sense if we just want to know the quantity of stock that we own for each stock (which is our index, currently) 102 | 103 | print (my_portfolio_frame["quantity_owned"]) 104 | 105 | 106 | # ## Loading and viewing data in a Data Frame 107 | # 108 | # This is something that we have seen in the 'Data Visualisation' section of this course. We can even import data from online sources and view them as data frames or we can take a local 'csv' file of a stock data and view them as data frame. 109 | # 110 | # 111 | 112 | # In[23]: 113 | 114 | 115 | # Loading and viewing data 116 | 117 | # We have stored a 'infy_data.csv' on our desktop 118 | 119 | import numpy as np 120 | import pandas as pd 121 | 122 | infy = pd.read_csv ('C:/Users/academy/Desktop/infy_data.csv') 123 | 124 | 125 | # In[24]: 126 | 127 | 128 | infy # this is our entire "Infosys" stock data frame 129 | 130 | 131 | # In[25]: 132 | 133 | 134 | infy.shape 135 | 136 | 137 | # In[26]: 138 | 139 | 140 | infy.head () # You will see the top 5 rows 141 | 142 | 143 | # In[27]: 144 | 145 | 146 | infy.tail () # You will see the bottom 5 rows 147 | 148 | 149 | # ## Dropping Rows and Columns from a Data Frame 150 | # 151 | # In the above Infosys stock data, it is not necessary that you need all the columns which are present in the .csv file. Hence, to make your data frame more understandable, you may drop the columns that you do not need using drop function. 152 | 153 | # In[28]: 154 | 155 | 156 | # The axis=1 represents that we are considering columns while dropping. 157 | 158 | infy_new = infy.drop (['Prev Close', 'Last Price', 'Average Price', 'Total Traded Quantity', 159 | 'Turnover', 'No. of Trades', 'Symbol','Series'], axis = 1) 160 | 161 | infy_new.head () 162 | 163 | 164 | # In[29]: 165 | 166 | 167 | #V15 video (I have to delete this) 168 | 169 | # Sorting a data frame 170 | 171 | infy_new = infy_new.sort_values(by="Close Price", ascending=False) 172 | 173 | print(infy_new) 174 | 175 | 176 | # In[30]: 177 | 178 | 179 | # Dropping rows: 31 March2016, 01 April 2016 180 | 181 | infy_new.drop (infy_new.index [[3,4]] ) 182 | 183 | 184 | # ## Renaming Columns of a Data Frame 185 | # 186 | # If we want to rename the column names according to our wish, for better understanding while dealing with the data frame, we can also in python. 187 | 188 | # In[31]: 189 | 190 | 191 | # Renaming Columns: Have a quick look at the code, It should be self-explanatory by now 192 | 193 | infy_new=infy_new.rename(columns={'Date':'Date','Open Price':'Open','High Price':'High','Low Price':'Low','Close Price':'Close'}) 194 | 195 | infy_new.head() 196 | 197 | 198 | # ## Sorting a Data Frame using a column 199 | # 200 | # Sometimes it becomes necessary to sort a stock price data frame, based on the 'Closing Price'. 201 | 202 | # In[32]: 203 | 204 | 205 | # Sorting Dataframe 206 | 207 | infy_new = infy_new.sort_values(by="Close", ascending=False) 208 | 209 | print(infy_new) 210 | 211 | 212 | # ## Just for Fun 213 | 214 | # In[33]: 215 | 216 | 217 | # If at all you want to practice, on a customised data frame, just fill it with random values and go ahead 218 | 219 | import numpy as np 220 | names = ['Jay', 'Varun', 'Devang', 'Ishan', 'Vibhu'] 221 | 222 | months = ["January", "February", "March", 223 | "April", "May", "June", 224 | "July", "August", "September", 225 | "October", "November", "December"] 226 | 227 | df = pd.DataFrame(np.random.randn (12, 5)*10000, columns = names, index = months) 228 | 229 | df 230 | 231 | 232 | # ### In the upcoming iPython Notebook:
233 | # We will understand Statistics and Statistical Functions on a Data Frame. 234 | # 235 | -------------------------------------------------------------------------------- /Section-4/Tuples and Sets.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # # Tuples 11 | # 12 | # Tuple is an immutable list. Similar to lists, a tuple can contain heterogeneous sequence of elements, but it is not possible to append, edit or remove any individual elements within a tuple. 13 | # 14 | # ## Creating Tuples 15 | # Tuples are enclosed in parenthesis and the items within them are separated by commas. 16 | 17 | # In[1]: 18 | 19 | 20 | new_tup = () # Empty Tuple 21 | type (new_tup) 22 | 23 | 24 | # In[2]: 25 | 26 | 27 | new_tup = (10, 20, 30, 40) # A tuple of integers 28 | type (new_tup) 29 | 30 | 31 | # In[3]: 32 | 33 | 34 | new_tup = (10, 20.2, 'thirty', 40) # A tuple of mixed data type 35 | type (new_tup) 36 | 37 | 38 | # In[4]: 39 | 40 | 41 | new_tup = ((10,20,30), (10.1, 20.2, 30.3),("ten", "twenty", "thirty")) # A nested tuple 42 | type (new_tup) 43 | 44 | 45 | # In[5]: 46 | 47 | 48 | new_tup = (10,(20.2,("thirty",(40)))) # A deeply nested tuple 49 | type (new_tup) 50 | 51 | 52 | # ## Can we manipulate a tuple? 53 | # 54 | # There are no methods supported by tuples that can help us manipulate a tuple once formed. Tuple does not even support assigning a new item at any particular index. 55 | 56 | # In[6]: 57 | 58 | 59 | my_tup = (10, 20, 30, 40) # This is the 'original' tuple which you have created 60 | 61 | print (my_tup) 62 | 63 | 64 | # In[827]: 65 | 66 | 67 | my_tup [0] # Returning the item at the 0th index 68 | 69 | 70 | # In[865]: 71 | 72 | 73 | my_tup [0] = "40" # Assigning a new item to the 0th index 74 | 75 | 76 | # In[866]: 77 | 78 | 79 | my_tup.append (50) # Trying to Append '50' at the 4th index of the created tuple. 80 | 81 | 82 | # But we can certainly find the length of a tuple. 83 | # 84 | # len (x)
85 | # It returns the length of the tuple. 86 | 87 | # In[830]: 88 | 89 | 90 | len (my_tup) 91 | 92 | 93 | # # Sets 94 | # 95 | # A set is an unordered collection with no duplicate elements. They are useful to create lists that hold only unique values and are also mutable. The elements of a set can be anything like numbers, strings or characters. 96 | # 97 | # ## Creating & Printing Sets 98 | # Curly braces or the set () function can be used to create sets and the items within them are separated by commas. 99 | 100 | # In[831]: 101 | 102 | 103 | new_set = { } # Empty Set ---> An empty set cannot be created 104 | type (new_set) 105 | 106 | 107 | # In[832]: 108 | 109 | 110 | new_set = {'Neo', 'Morphius', 'Trinity', 'Agent Smith', 'Oracle'} # A new set 111 | type (new_set) 112 | 113 | 114 | # In[833]: 115 | 116 | 117 | print (new_set) 118 | 119 | 120 | # In[834]: 121 | 122 | 123 | # Now there are 5 'Agent Smiths' in our set. What will happen if we print this set? 124 | 125 | new_set = {'Neo', 'Morphius', 'Trinity', 'Agent Smith', 'Agent Smith', 'Agent Smith', 'Agent Smith', 'Oracle'} 126 | 127 | print (new_set) # The set will only print unique values 128 | 129 | 130 | # In[835]: 131 | 132 | 133 | # Using the set () function to create sets 134 | 135 | x_set = set ('THEMATRIX') 136 | 137 | type (x_set) 138 | 139 | 140 | # In[836]: 141 | 142 | 143 | print (x_set) # 'THE MATRIX' has two 'T's. Only unique values will be printed. 144 | 145 | 146 | # In[837]: 147 | 148 | 149 | # An additional example 150 | 151 | y_set = set ('THETERMINATOR') 152 | 153 | print (y_set) 154 | 155 | 156 | # ## Set Operations 157 | # 158 | # You can even perform mathematical operations like set union, set intersection, set difference and symmetric difference amongst different datasets. 159 | 160 | # In[1]: 161 | 162 | 163 | # We will create 2 new sets. The 'x_set' and the 'y_set'. 164 | 165 | x_set = set ('ABCDE') 166 | y_set = set ('CDEFG') 167 | 168 | print (x_set) 169 | print (y_set) 170 | 171 | 172 | # x.union(y)
173 | # This method returns all the unique items that are present in the two sets, as a new set. 174 | 175 | # In[2]: 176 | 177 | 178 | x_set.union(y_set) 179 | 180 | 181 | # In[3]: 182 | 183 | 184 | x_set | y_set # Union can be performed by using the pipe '|' operator also 185 | 186 | 187 | # x.intersection(y)
188 | # This method returns the common items that are present in two sets, as a new set. 189 | 190 | # In[5]: 191 | 192 | 193 | x_set.intersection(y_set) 194 | 195 | 196 | # In[6]: 197 | 198 | 199 | x_set & y_set # Intersection can be performed by using the ampersand '&' operator 200 | 201 | 202 | # x.difference(y)
203 | # This method returns the items of 'set 1' which are not common (repetitive) to the 'set 2', as a new set. 204 | 205 | # In[843]: 206 | 207 | 208 | x_set.difference(y_set) 209 | 210 | 211 | # In[844]: 212 | 213 | 214 | x_set - y_set # Difference can be performed using the minus '-' operator 215 | 216 | 217 | # difference_update ()
218 | # This method removes all the elements of 'set 2' common to 'set 1' in 'set1'. It updates 'set 1'. 219 | 220 | # In[845]: 221 | 222 | 223 | x_set.difference_update(y_set) 224 | 225 | print (x_set) 226 | print (y_set) 227 | 228 | 229 | # In[846]: 230 | 231 | 232 | x_set = set ('ABCDE') 233 | y_set = set ('CDEFG') 234 | 235 | x_set = x_set - y_set # Difference update can be abbreviated in the shown manner i.e. 'x = x-y' 236 | 237 | print (x_set) 238 | print (y_set) 239 | 240 | 241 | # x.isdisjoint(y)
242 | # This method returns True if two sets have null intersection. 243 | 244 | # In[847]: 245 | 246 | 247 | x_set = set ('ABCDE') 248 | y_set = set ('CDEFG') 249 | 250 | x_set.isdisjoint(y_set) 251 | 252 | 253 | # In[848]: 254 | 255 | 256 | x_set = set ('ABC') 257 | y_set = set ('EFG') 258 | 259 | x_set.isdisjoint(y_set) 260 | 261 | 262 | # y.issubset(x)
263 | # This method returns True for 'Set 2', if all the elements of 'Set 2' are present in 'Set 1' 264 | 265 | # In[849]: 266 | 267 | 268 | x_set = set ('ABCDE') 269 | y_set = set ('CDEFG') 270 | 271 | y_set.issubset(x_set) 272 | 273 | 274 | # In[850]: 275 | 276 | 277 | x_set = set ('ABCDE') 278 | y_set = set ('CDE') 279 | 280 | y_set.issubset(x_set) 281 | 282 | 283 | # In[851]: 284 | 285 | 286 | y_set < x_set # One can check a subset using a less than '<' operator. 287 | 288 | 289 | # x.issuperset(y)
290 | # This method returns True for 'Set 1' if all the elements of Set 2 are present in 'Set 1'. 291 | 292 | # In[852]: 293 | 294 | 295 | x_set = set ('ABCDE') 296 | y_set = set ('CDEFG') 297 | 298 | x_set.issuperset(y_set) 299 | 300 | 301 | # In[853]: 302 | 303 | 304 | x_set = set ('ABCDE') 305 | y_set = set ('CDE') 306 | 307 | x_set.issuperset(y_set) 308 | 309 | 310 | # In[854]: 311 | 312 | 313 | x_set > y_set # One can check a superset using a greater than '>' operator. 314 | 315 | 316 | # x.add(e)
317 | # It adds a single item to the set and updates the set. 318 | 319 | # In[855]: 320 | 321 | 322 | x_set = set ('ABCDE') 323 | 324 | print (x_set) 325 | 326 | 327 | # In[856]: 328 | 329 | 330 | x_set.add('FGH') 331 | 332 | print (x_set) 333 | 334 | 335 | # x.discard(e)
336 | # It removes a single item from the set and updates it. 337 | 338 | # In[857]: 339 | 340 | 341 | print (x_set) 342 | 343 | 344 | # In[858]: 345 | 346 | 347 | x_set.discard('FGH') 348 | 349 | print (x_set) 350 | 351 | 352 | # x.pop ()
353 | # It pops and returns any arbitary item from the set. 354 | 355 | # In[859]: 356 | 357 | 358 | print (x_set) 359 | 360 | 361 | # In[860]: 362 | 363 | 364 | x_set.pop() 365 | 366 | 367 | # x.copy ()
368 | # It creates a shallow copy of any set. 369 | 370 | # In[861]: 371 | 372 | 373 | print (x_set) # There are only 4 items in the set, since one just got popped in the above cell execution. 374 | 375 | 376 | # In[862]: 377 | 378 | 379 | x_set.copy() 380 | 381 | 382 | # x.clear()
383 | # It clears all the items of the set. 384 | 385 | # In[863]: 386 | 387 | 388 | print (x_set) 389 | 390 | 391 | # In[864]: 392 | 393 | 394 | x_set.clear() 395 | 396 | print (x_set) 397 | 398 | 399 | # ### This is where we will end this section on Data Structures 400 | # 401 | # Stay tuned for the next Section. 402 | -------------------------------------------------------------------------------- /Section-7/Indexing and Slicing.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ### Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # We will continue from where we left in the previous notebook. 11 | # 12 | # # Notebook Contents 13 | # 14 | # ##### 1. Indexing 15 | # ##### 2. Slicing 16 | # ##### 3. Arrays of 1s and 0s 17 | # ##### 4. Identity function 18 | 19 | # ## Indexing 20 | 21 | # We can access the elements of an array using its **index**. Index gives the location of an element of an array. 22 | # 23 | # - The first index is '0'. 24 | # - The second index is '1' and so on. 25 | # - The second last index is '-2'. 26 | # - The last index is '-1'. 27 | # 28 | # ### Indexing in a one-dimensional array 29 | # 30 | # A one-dimensional array is indexed just like a list. 31 | 32 | # In[62]: 33 | 34 | 35 | import numpy as np 36 | 37 | # One dimensional array 38 | 39 | A = np.array([10, 21, 32, 43, 54, 65, 76, 87]) 40 | 41 | # Print the first element of A 42 | print(A [0]) 43 | 44 | # Remember, in pyhton, counting starts from 0 and not from 1 45 | 46 | 47 | # In[63]: 48 | 49 | 50 | # Print the last element of A 51 | print(A [-1]) 52 | 53 | 54 | # In[64]: 55 | 56 | 57 | # Print the third element of A 58 | print (A [2]) 59 | 60 | 61 | # In[65]: 62 | 63 | 64 | # Print the second last element 65 | print (A [-2]) 66 | 67 | 68 | # ### Indexing in a two-dimensional array 69 | # 70 | # A 2-Dimensional Array consists of rows and columns, so you need to specify both rows and columns, to locate an element. 71 | 72 | # In[66]: 73 | 74 | 75 | # Create a 2-Dimensional Array 76 | 77 | A = np.array ([ [1,2,3], [4,5,6], [7,8,9], [10,11,12] ]) 78 | 79 | print (A) 80 | 81 | # The shape of the array is : 4 rows and 3 columns 82 | 83 | 84 | # In[67]: 85 | 86 | 87 | # Print the element of Row 1, column 1 88 | print (A [0] [0]) 89 | 90 | 91 | # In[68]: 92 | 93 | 94 | # Print the element of row 2, column 1 95 | print (A [1] [0]) 96 | 97 | 98 | # In[69]: 99 | 100 | 101 | # Print the element of row 4, column 3 102 | print (A [3] [2]) 103 | 104 | 105 | # In[70]: 106 | 107 | 108 | # Another way to print the element of row 3, column 2 109 | print (A [2,1]) 110 | 111 | 112 | # #### Try on your own 113 | 114 | # In[71]: 115 | 116 | 117 | # Can you guess what will be the output of these print statement? 118 | 119 | print (A [4,3]) 120 | 121 | 122 | # ## Slicing 123 | # 124 | # When you want to select a certain section of an array, then you slice it. It could be a bunch of elements in a one-dimensional array and/or entire rows and columns in a two-dimensional array. 125 | # 126 | # ### Slicing a one-dimensional array 127 | # 128 | # You can slice a one-dimensional array in various ways: 129 | # - Print first few elements 130 | # - Print last few elements 131 | # - Print middle elements 132 | # - Print elements after certain step. 133 | # 134 | # Syntax: 135 | # #### array_name [start: stop: step] 136 | # 137 | 138 | # In[72]: 139 | 140 | 141 | # Consider a one-dimensional array A 142 | 143 | A = np.array([1, 2, 3, 4, 5, 6, 7, 8]) 144 | 145 | # By default, the step = 1 146 | 147 | # To print the first 4 elements (i.e. indices 0, 1, 2, 3, those before index 4) 148 | print(A [:4]) 149 | 150 | # To print the elements from the index = 6 till the end 151 | print(A [6:]) 152 | 153 | # To print the elements starting from index=2 and it will stop BEFORE index=5 154 | 155 | print(A [2:5]) 156 | 157 | # To print all the elements of the array 158 | print(A [:]) 159 | 160 | 161 | # In[73]: 162 | 163 | 164 | # Introducing step = 2 165 | 166 | # This will print alternate index elements of the entire array, starting from index = 0 167 | 168 | print (A [::2]) 169 | 170 | 171 | # #### Try on your own 172 | 173 | # In[74]: 174 | 175 | 176 | # Can you guess what will be the output of these print statement? 177 | 178 | print (A [::3]) 179 | 180 | 181 | # ### Slicing a two-dimensional array 182 | # 183 | # You can slice a two-dimensional array in various ways: 184 | # - Print a row or a column 185 | # - Print multiple rows or columns 186 | # - Print a section of table for given rows and columns 187 | # - Print first and/or last rows and/or columns. 188 | # - Print rows and columns after certain step. 189 | # 190 | # Syntax: 191 | # #### array_name [row start: row stop: row step], [col start, col stop, col step] 192 | 193 | # In[138]: 194 | 195 | 196 | # A two-dimensional Array 197 | 198 | A = np.array([ 199 | ["00", "01", "02", "03", "04"], 200 | [10, 11, 12, 13, 14], 201 | [20, 21, 22, 23, 24], 202 | [30, 31, 32, 33, 34], 203 | [40, 41, 42, 43, 44] 204 | ]) 205 | 206 | print (A) 207 | 208 | 209 | # In[139]: 210 | 211 | 212 | # Print a row or a column 213 | 214 | print(A[1,]) # Printing Row 2 215 | 216 | 217 | # In[140]: 218 | 219 | 220 | print(A[:,1]) # Column 2 221 | 222 | 223 | # In[141]: 224 | 225 | 226 | # Print multiple rows or columns 227 | 228 | print(A[:2,]) #Rows 1 & 2 229 | 230 | print(A[:,1:3]) #Columns 2 & 3 231 | 232 | 233 | # In[142]: 234 | 235 | 236 | # Print first or last rows and columns 237 | 238 | print(A[:3,]) # Printing first three rows 239 | 240 | print(A[:,3:]) # Printing 4th column and onwards 241 | 242 | 243 | # In[143]: 244 | 245 | 246 | # Print selected rows and columns 247 | 248 | print(A[:2,2]) # Rows Rows 1 & 2 for column3 249 | 250 | 251 | # In[144]: 252 | 253 | 254 | print(A[:3,2:]) # 1st three rows for the last three columns 255 | 256 | 257 | # In[145]: 258 | 259 | 260 | print(A[:,:-2]) # Array without last three columns 261 | 262 | 263 | # In[146]: 264 | 265 | 266 | print(A[:-3,:]) # Array without last 3 rows 267 | 268 | 269 | # #### Using step 270 | 271 | # In[147]: 272 | 273 | 274 | # Let us create a new array using the arange method for this exercise 275 | 276 | A2 = np.arange(50).reshape(5,10) #Create an array with 5 rows, 10 columns that has values from 1 to 50. 277 | 278 | print(A2) 279 | 280 | 281 | # In[148]: 282 | 283 | 284 | # Using step in slicing 285 | 286 | print(A[::2,]) # Print Rows 1, 3, and 5 287 | 288 | 289 | # In[149]: 290 | 291 | 292 | print(A[:, 1::2]) # Print Columns 2 & 4 293 | 294 | 295 | # In[150]: 296 | 297 | 298 | print(A[:, 1:10:2]) # Print Columns 2,4, 6, 8, 10 299 | 300 | 301 | # In[151]: 302 | 303 | 304 | # This will print an intersection of elements of rows 0, 2, 4 and columns 0, 3, 9, 6 305 | 306 | print(A2 [::2, ::3]) 307 | 308 | 309 | # In[152]: 310 | 311 | 312 | # Let us print all the rows and columns 313 | 314 | print (A2 [::,::]) 315 | 316 | 317 | # #### Try on your own 318 | 319 | # In[153]: 320 | 321 | 322 | # If the following line of code is self explanatory to you, then you have understood the entire concept of 2D slicing 323 | 324 | print (A2 [2:4:1, 2:7:4]) 325 | 326 | 327 | # In[154]: 328 | 329 | 330 | # This should be self explanatory 331 | 332 | A = np.arange(12) 333 | B = A.reshape(3, 4) 334 | 335 | A[0] = 42 336 | print(B) 337 | 338 | 339 | # ## Array of Ones and Zeros 340 | 341 | # We will be initialising arrays which have all the elements either as zeros or one. Such arrays help us while performing arithmentic operations 342 | 343 | # In[155]: 344 | 345 | 346 | O = np.ones((4,4)) 347 | print(O) 348 | 349 | # This is defaulty datatype 'float' 350 | 351 | 352 | # In[156]: 353 | 354 | 355 | O = np.ones((4,4), dtype=int) # Changing data type to integers 356 | print(O) 357 | 358 | 359 | # In[157]: 360 | 361 | 362 | Z = np.zeros((3,3)) 363 | print(Z) 364 | 365 | 366 | # In[158]: 367 | 368 | 369 | Z = np.zeros((3,3), dtype = int) 370 | print(Z) 371 | 372 | 373 | # ## Identity Function 374 | # 375 | # An Identity Array has equal number of rows and columns. It is a square array so that the diagonal elements are all 'ones'. 376 | 377 | # In[159]: 378 | 379 | 380 | I = np.identity(4) 381 | 382 | print (I) 383 | 384 | 385 | # In[160]: 386 | 387 | 388 | I = np.identity (3, dtype = int) 389 | 390 | print (I) 391 | 392 | 393 | # ### In the upcoming iPython Notebook: 394 | # 395 | # We will continue understanding about arrays and learn about Vectorization, Arithmetic Operation, Broadcasting and Array Comparisons. 396 | -------------------------------------------------------------------------------- /Pandas- Grouping and Reshaping.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # ## Pandas 'Groupby' 11 | # 12 | # Any groupby operation involves one of the following operations on the original dataframe/object. They are: 13 | #
14 | # 1. Splitting the data into groups based on some criteria.
15 | #
16 | # 2. Applying a function to each group seperately.
17 | #
18 | # 3. Combining the results into a single data frame.
19 | #
20 | # Splitting the data is pretty straight forward. What adds value to this split is the 'Apply' step. This makes 'Groupby' function interesting. In the apply step, you may wish to do one of the following:
21 | #
22 | # a. Aggregation − Computing a summary statistic. Eg: Compute group sums or means.
23 | #
24 | # b. Transformation − perform some group-specific operation. Eg: Standarizing data (computing zscore) within the group.
25 | #
26 | # c. Filtration − discarding the data with some condition.
27 | #
28 | # Let us now create a DataFrame object and perform all the operations on it 29 | 30 | # In[1]: 31 | 32 | 33 | # Creating a data frame 34 | 35 | import pandas as pd 36 | 37 | my_portfolio = {'Sector': ['IT', 'FMCG', 'Finance', 'Pharma', 'Pharma', 38 | 'FMCG', 'FMCG', 'IT', 'Finance', 'Real Estate'], 39 | 40 | 'Company': ['Infosys', 'Dabur', 'DHFL', 'Divis Lab', 'Lupin', 41 | 'Ruchira Papers', 'Britianna','Persistent Systems','Bajaj Finance', 'DLF'], 42 | 43 | 'MarketCap': ['Large Cap','Large Cap','Mid Cap','Mid Cap','Mid Cap', 44 | 'Small Cap','Mid Cap','Small Cap','Large Cap','Mid Cap'], 45 | 46 | 'Share Price': [1120,341,610,1123,741,185,5351,720,1937,217], 47 | 48 | 'Amount Invested': [24000,16000,50000,23000,45000,12000,52000,18000,5000,3500]} 49 | 50 | mp = pd.DataFrame(my_portfolio) 51 | 52 | mp 53 | 54 | 55 | # ### View Groups 56 | 57 | # In[2]: 58 | 59 | 60 | print (mp.groupby('MarketCap').groups) 61 | 62 | 63 | # There are 3 Groups formed, if we group it by 'Market Cap'. They are:
64 | #
65 | # Group 1: 'Large Cap' (3 companies at index 0,1,8)
66 | # Group 2: 'Mid Cap' (5 companies at index 2,3,4,6,9)
67 | # Group 3: 'Small Cap' (2 companies at index 5,7)
68 | 69 | # In[3]: 70 | 71 | 72 | # Understand this Grouping 73 | 74 | print (mp.groupby('Sector').groups) 75 | 76 | 77 | # There are 5 Groups formed, if we group it by 'Sector'. They are:
78 | #
79 | # Group 1: 'FMCG' (3 companies at index 1,5,6)
80 | # Group 2: 'IT' (2 companies at index 0,7)
81 | # Group 3: 'Pharma' (2 companies at index 3,4)
82 | # Group 4: 'Finance' (2 companies at index 2,8)
83 | # Group 5: 'Real Estate' (1 company at index 9)
84 | 85 | # In[4]: 86 | 87 | 88 | # Group by with multiple columns 89 | 90 | print (mp.groupby(['MarketCap','Sector']).groups) 91 | 92 | 93 | # There are 8 Groups formed, if we group it by 'Sector' and 'MarketCap'. They are:
94 | #
95 | # Group 1: 'Large Cap, FMCG' (1 company at index 1)
96 | # Group 2: 'Mid Cap, FMCG' (1 company at index 6)
97 | # Group 3: 'Large Cap, IT' (1 company at index 0)
98 | # Group 4: 'Small Cap, FMCG' (1 company at index 5)
99 | # Group 5: 'Mid Cap, Real Estate' (1 company at index 9)
100 | # Group 6: 'Small Cap, IT' (1 company at index 7)
101 | # Group 7: 'Mid Cap, Pharma' (2 companies at index 3,4)
102 | # Group 8: 'Mid Cap, Finance' (1 company at index 2)
103 | 104 | # ### Iterating through groups 105 | 106 | # In[6]: 107 | 108 | 109 | # A better way to visualise 110 | 111 | grouped = mp.groupby('Sector') 112 | 113 | for name,group in grouped: 114 | print (name) 115 | print (group) 116 | 117 | 118 | # In[7]: 119 | 120 | 121 | # Just so that you feel comfortable, go through this line of code too 122 | 123 | grouped = mp.groupby('MarketCap') 124 | 125 | for name,group in grouped: # We will learn 'for' loop in further sections. It is usually used for iterations 126 | print (name) 127 | print (group) 128 | 129 | 130 | # ### Select a group 131 | 132 | # In[9]: 133 | 134 | 135 | import pandas as pd 136 | 137 | my_portfolio = {'Sector': ['IT', 'FMCG', 'Finance', 'Pharma', 'Pharma', 138 | 'FMCG', 'FMCG', 'IT', 'Finance', 'Real Estate'], 139 | 140 | 'Company': ['Infosys', 'Dabur', 'DHFL', 'Divis Lab', 'Lupin', 141 | 'Ruchira Papers', 'Britianna','Persistent Systems','Bajaj Finance', 'DLF'], 142 | 143 | 'MarketCap': ['Large Cap','Large Cap','Mid Cap','Mid Cap','Mid Cap', 144 | 'Small Cap','Mid Cap','Small Cap','Large Cap','Mid Cap'], 145 | 146 | 'Share Price': [1120,341,610,1123,741,185,5351,720,1937,217], 147 | 148 | 'Amount Invested': [24000,16000,50000,23000,45000,12000,52000,18000,5000,3500]} 149 | 150 | mp = pd.DataFrame(my_portfolio) 151 | 152 | grouped = mp.groupby('MarketCap') 153 | 154 | print (grouped.get_group('Mid Cap')) 155 | 156 | 157 | # ### Aggregations 158 | 159 | # In[10]: 160 | 161 | 162 | import numpy as np 163 | 164 | grouped = mp.groupby('MarketCap') 165 | 166 | print (grouped['Amount Invested'].agg(np.mean)) 167 | 168 | 169 | # What does this mean?
170 | #
171 | # This means that on an average, we have invested Rs. 15000 per script in Large Cap, Rs. 34700 per script in Mid Cap and Rs. 15000 per script in Small Cap 172 | 173 | # In[11]: 174 | 175 | 176 | grouped = mp.groupby('MarketCap') 177 | 178 | print (grouped.agg(np.size)) 179 | 180 | 181 | # What does this mean?
182 | # 183 | # This just shows the size of the group. 184 | 185 | # In[12]: 186 | 187 | 188 | # Applying multiple aggregation functions at once 189 | 190 | grouped = mp.groupby('MarketCap') 191 | 192 | print (grouped['Amount Invested'].agg([np.sum, np.mean])) 193 | 194 | 195 | # What does this mean?
196 | #
197 | # This means that the 'total amount' invested in a particular sector is the 'sum' and 'average amount per script' invested in that sector is the 'mean' value. 198 | 199 | # ### Transformations 200 | 201 | # In[13]: 202 | 203 | 204 | import pandas as pd 205 | 206 | my_portfolio = {'Sector': ['IT', 'FMCG', 'Finance', 'Pharma', 'Pharma', 207 | 'FMCG', 'FMCG', 'IT', 'Finance', 'Real Estate'], 208 | 209 | 'Company': ['Infosys', 'Dabur', 'DHFL', 'Divis Lab', 'Lupin', 210 | 'Ruchira Papers', 'Britianna','Persistent Systems','Bajaj Finance', 'DLF'], 211 | 212 | 'MarketCap': ['Large Cap','Large Cap','Mid Cap','Mid Cap','Mid Cap', 213 | 'Small Cap','Mid Cap','Small Cap','Large Cap','Mid Cap'], 214 | 215 | 'Share Price': [1120,341,610,1123,741,185,5351,720,1937,217], 216 | 217 | 'Amount Invested': [24000,16000,50000,23000,45000,12000,52000,18000,5000,3500]} 218 | 219 | mp = pd.DataFrame(my_portfolio) 220 | 221 | print (mp) 222 | 223 | grouped = mp.groupby('MarketCap') 224 | 225 | z_score = lambda x: (x - x.mean()) / x.std() 226 | 227 | print (grouped.transform(z_score)) 228 | 229 | 230 | # ### Filteration 231 | 232 | # In[14]: 233 | 234 | 235 | print (mp.groupby('MarketCap').filter(lambda x: len(x)>= 3)) 236 | 237 | 238 | # What does this mean?
239 | #
240 | # It will not filter the Groups that has 3 or less than 3 companies in that particular group. 241 | 242 | # ### Merging/Joining 243 | 244 | # In[15]: 245 | 246 | 247 | import pandas as pd 248 | 249 | 250 | left_df = pd.DataFrame({ 251 | 'id':[1,2,3,4,5], 252 | 'Company': ['Infosys', 'SBI', 'Asian Paints', 'Maruti', 'Sun Pharma'], 253 | 'Sector':['IT','Banks','Paints and Varnishes','Auto','Pharma']}) 254 | 255 | right_df = pd.DataFrame( 256 | {'id':[1,2,3,4,5], 257 | 'Company': ['NTPC', 'TCS', 'Lupin', 'ICICI', 'M&M'], 258 | 'Sector':['Power','IT','Pharma','Banks','Auto']}) 259 | 260 | 261 | # In[16]: 262 | 263 | 264 | left_df 265 | 266 | 267 | # In[17]: 268 | 269 | 270 | right_df 271 | 272 | 273 | # In[18]: 274 | 275 | 276 | # Merge 2 DF on a key 277 | 278 | print (pd.merge(left_df,right_df, on='id')) 279 | 280 | 281 | # In[19]: 282 | 283 | 284 | print (pd.merge(left_df,right_df, on='Sector')) 285 | 286 | 287 | # In[20]: 288 | 289 | 290 | # Merge 2 DFs on multiple keys 291 | 292 | print (pd.merge(left_df,right_df,on=['Sector','Company'])) 293 | 294 | 295 | # In[21]: 296 | 297 | 298 | # Merge using 'how' argument 299 | 300 | # Left join 301 | 302 | print (pd.merge(left_df, right_df, on='Sector', how='left')) 303 | 304 | 305 | # In[22]: 306 | 307 | 308 | # Right join 309 | 310 | print (pd.merge(left_df, right_df, how='outer', on='Sector')) 311 | 312 | 313 | # In[23]: 314 | 315 | 316 | # Outer Join 317 | 318 | print (pd.merge(left_df, right_df, how='outer', on='Sector')) 319 | 320 | 321 | # In[24]: 322 | 323 | 324 | # Inner Join 325 | 326 | print (pd.merge(left_df, right_df, on='Sector', how='inner')) 327 | 328 | 329 | # ### Concatenation 330 | 331 | # In[25]: 332 | 333 | 334 | print (pd.concat([left_df,right_df])) 335 | 336 | 337 | # In[26]: 338 | 339 | 340 | print (pd.concat([left_df, right_df],keys=['x','y'])) 341 | 342 | 343 | # In[27]: 344 | 345 | 346 | print (pd.concat([left_df,right_df],keys=['x','y'],ignore_index=True)) 347 | 348 | 349 | # In[28]: 350 | 351 | 352 | print (pd.concat([left_df,right_df],axis=1)) 353 | 354 | 355 | # In[29]: 356 | 357 | 358 | # Concatenating using append 359 | 360 | print (left_df.append(right_df)) 361 | 362 | 363 | # In[30]: 364 | 365 | 366 | print (left_df.append([right_df,left_df, right_df])) 367 | 368 | -------------------------------------------------------------------------------- /Section-8/Pandas- Grouping and Reshaping.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # ## Pandas 'Groupby' 11 | # 12 | # Any groupby operation involves one of the following operations on the original dataframe/object. They are: 13 | #
14 | # 1. Splitting the data into groups based on some criteria.
15 | #
16 | # 2. Applying a function to each group seperately.
17 | #
18 | # 3. Combining the results into a single data frame.
19 | #
20 | # Splitting the data is pretty straight forward. What adds value to this split is the 'Apply' step. This makes 'Groupby' function interesting. In the apply step, you may wish to do one of the following:
21 | #
22 | # a. Aggregation − Computing a summary statistic. Eg: Compute group sums or means.
23 | #
24 | # b. Transformation − perform some group-specific operation. Eg: Standarizing data (computing zscore) within the group.
25 | #
26 | # c. Filtration − discarding the data with some condition.
27 | #
28 | # Let us now create a DataFrame object and perform all the operations on it 29 | 30 | # In[1]: 31 | 32 | 33 | # Creating a data frame 34 | 35 | import pandas as pd 36 | 37 | my_portfolio = {'Sector': ['IT', 'FMCG', 'Finance', 'Pharma', 'Pharma', 38 | 'FMCG', 'FMCG', 'IT', 'Finance', 'Real Estate'], 39 | 40 | 'Company': ['Infosys', 'Dabur', 'DHFL', 'Divis Lab', 'Lupin', 41 | 'Ruchira Papers', 'Britianna','Persistent Systems','Bajaj Finance', 'DLF'], 42 | 43 | 'MarketCap': ['Large Cap','Large Cap','Mid Cap','Mid Cap','Mid Cap', 44 | 'Small Cap','Mid Cap','Small Cap','Large Cap','Mid Cap'], 45 | 46 | 'Share Price': [1120,341,610,1123,741,185,5351,720,1937,217], 47 | 48 | 'Amount Invested': [24000,16000,50000,23000,45000,12000,52000,18000,5000,3500]} 49 | 50 | mp = pd.DataFrame(my_portfolio) 51 | 52 | mp 53 | 54 | 55 | # ### View Groups 56 | 57 | # In[2]: 58 | 59 | 60 | print (mp.groupby('MarketCap').groups) 61 | 62 | 63 | # There are 3 Groups formed, if we group it by 'Market Cap'. They are:
64 | #
65 | # Group 1: 'Large Cap' (3 companies at index 0,1,8)
66 | # Group 2: 'Mid Cap' (5 companies at index 2,3,4,6,9)
67 | # Group 3: 'Small Cap' (2 companies at index 5,7)
68 | 69 | # In[3]: 70 | 71 | 72 | # Understand this Grouping 73 | 74 | print (mp.groupby('Sector').groups) 75 | 76 | 77 | # There are 5 Groups formed, if we group it by 'Sector'. They are:
78 | #
79 | # Group 1: 'FMCG' (3 companies at index 1,5,6)
80 | # Group 2: 'IT' (2 companies at index 0,7)
81 | # Group 3: 'Pharma' (2 companies at index 3,4)
82 | # Group 4: 'Finance' (2 companies at index 2,8)
83 | # Group 5: 'Real Estate' (1 company at index 9)
84 | 85 | # In[4]: 86 | 87 | 88 | # Group by with multiple columns 89 | 90 | print (mp.groupby(['MarketCap','Sector']).groups) 91 | 92 | 93 | # There are 8 Groups formed, if we group it by 'Sector' and 'MarketCap'. They are:
94 | #
95 | # Group 1: 'Large Cap, FMCG' (1 company at index 1)
96 | # Group 2: 'Mid Cap, FMCG' (1 company at index 6)
97 | # Group 3: 'Large Cap, IT' (1 company at index 0)
98 | # Group 4: 'Small Cap, FMCG' (1 company at index 5)
99 | # Group 5: 'Mid Cap, Real Estate' (1 company at index 9)
100 | # Group 6: 'Small Cap, IT' (1 company at index 7)
101 | # Group 7: 'Mid Cap, Pharma' (2 companies at index 3,4)
102 | # Group 8: 'Mid Cap, Finance' (1 company at index 2)
103 | 104 | # ### Iterating through groups 105 | 106 | # In[6]: 107 | 108 | 109 | # A better way to visualise 110 | 111 | grouped = mp.groupby('Sector') 112 | 113 | for name,group in grouped: 114 | print (name) 115 | print (group) 116 | 117 | 118 | # In[7]: 119 | 120 | 121 | # Just so that you feel comfortable, go through this line of code too 122 | 123 | grouped = mp.groupby('MarketCap') 124 | 125 | for name,group in grouped: # We will learn 'for' loop in further sections. It is usually used for iterations 126 | print (name) 127 | print (group) 128 | 129 | 130 | # ### Select a group 131 | 132 | # In[9]: 133 | 134 | 135 | import pandas as pd 136 | 137 | my_portfolio = {'Sector': ['IT', 'FMCG', 'Finance', 'Pharma', 'Pharma', 138 | 'FMCG', 'FMCG', 'IT', 'Finance', 'Real Estate'], 139 | 140 | 'Company': ['Infosys', 'Dabur', 'DHFL', 'Divis Lab', 'Lupin', 141 | 'Ruchira Papers', 'Britianna','Persistent Systems','Bajaj Finance', 'DLF'], 142 | 143 | 'MarketCap': ['Large Cap','Large Cap','Mid Cap','Mid Cap','Mid Cap', 144 | 'Small Cap','Mid Cap','Small Cap','Large Cap','Mid Cap'], 145 | 146 | 'Share Price': [1120,341,610,1123,741,185,5351,720,1937,217], 147 | 148 | 'Amount Invested': [24000,16000,50000,23000,45000,12000,52000,18000,5000,3500]} 149 | 150 | mp = pd.DataFrame(my_portfolio) 151 | 152 | grouped = mp.groupby('MarketCap') 153 | 154 | print (grouped.get_group('Mid Cap')) 155 | 156 | 157 | # ### Aggregations 158 | 159 | # In[10]: 160 | 161 | 162 | import numpy as np 163 | 164 | grouped = mp.groupby('MarketCap') 165 | 166 | print (grouped['Amount Invested'].agg(np.mean)) 167 | 168 | 169 | # What does this mean?
170 | #
171 | # This means that on an average, we have invested Rs. 15000 per script in Large Cap, Rs. 34700 per script in Mid Cap and Rs. 15000 per script in Small Cap 172 | 173 | # In[11]: 174 | 175 | 176 | grouped = mp.groupby('MarketCap') 177 | 178 | print (grouped.agg(np.size)) 179 | 180 | 181 | # What does this mean?
182 | # 183 | # This just shows the size of the group. 184 | 185 | # In[12]: 186 | 187 | 188 | # Applying multiple aggregation functions at once 189 | 190 | grouped = mp.groupby('MarketCap') 191 | 192 | print (grouped['Amount Invested'].agg([np.sum, np.mean])) 193 | 194 | 195 | # What does this mean?
196 | #
197 | # This means that the 'total amount' invested in a particular sector is the 'sum' and 'average amount per script' invested in that sector is the 'mean' value. 198 | 199 | # ### Transformations 200 | 201 | # In[13]: 202 | 203 | 204 | import pandas as pd 205 | 206 | my_portfolio = {'Sector': ['IT', 'FMCG', 'Finance', 'Pharma', 'Pharma', 207 | 'FMCG', 'FMCG', 'IT', 'Finance', 'Real Estate'], 208 | 209 | 'Company': ['Infosys', 'Dabur', 'DHFL', 'Divis Lab', 'Lupin', 210 | 'Ruchira Papers', 'Britianna','Persistent Systems','Bajaj Finance', 'DLF'], 211 | 212 | 'MarketCap': ['Large Cap','Large Cap','Mid Cap','Mid Cap','Mid Cap', 213 | 'Small Cap','Mid Cap','Small Cap','Large Cap','Mid Cap'], 214 | 215 | 'Share Price': [1120,341,610,1123,741,185,5351,720,1937,217], 216 | 217 | 'Amount Invested': [24000,16000,50000,23000,45000,12000,52000,18000,5000,3500]} 218 | 219 | mp = pd.DataFrame(my_portfolio) 220 | 221 | print (mp) 222 | 223 | grouped = mp.groupby('MarketCap') 224 | 225 | z_score = lambda x: (x - x.mean()) / x.std() 226 | 227 | print (grouped.transform(z_score)) 228 | 229 | 230 | # ### Filteration 231 | 232 | # In[14]: 233 | 234 | 235 | print (mp.groupby('MarketCap').filter(lambda x: len(x)>= 3)) 236 | 237 | 238 | # What does this mean?
239 | #
240 | # It will not filter the Groups that has 3 or less than 3 companies in that particular group. 241 | 242 | # ### Merging/Joining 243 | 244 | # In[15]: 245 | 246 | 247 | import pandas as pd 248 | 249 | 250 | left_df = pd.DataFrame({ 251 | 'id':[1,2,3,4,5], 252 | 'Company': ['Infosys', 'SBI', 'Asian Paints', 'Maruti', 'Sun Pharma'], 253 | 'Sector':['IT','Banks','Paints and Varnishes','Auto','Pharma']}) 254 | 255 | right_df = pd.DataFrame( 256 | {'id':[1,2,3,4,5], 257 | 'Company': ['NTPC', 'TCS', 'Lupin', 'ICICI', 'M&M'], 258 | 'Sector':['Power','IT','Pharma','Banks','Auto']}) 259 | 260 | 261 | # In[16]: 262 | 263 | 264 | left_df 265 | 266 | 267 | # In[17]: 268 | 269 | 270 | right_df 271 | 272 | 273 | # In[18]: 274 | 275 | 276 | # Merge 2 DF on a key 277 | 278 | print (pd.merge(left_df,right_df, on='id')) 279 | 280 | 281 | # In[19]: 282 | 283 | 284 | print (pd.merge(left_df,right_df, on='Sector')) 285 | 286 | 287 | # In[20]: 288 | 289 | 290 | # Merge 2 DFs on multiple keys 291 | 292 | print (pd.merge(left_df,right_df,on=['Sector','Company'])) 293 | 294 | 295 | # In[21]: 296 | 297 | 298 | # Merge using 'how' argument 299 | 300 | # Left join 301 | 302 | print (pd.merge(left_df, right_df, on='Sector', how='left')) 303 | 304 | 305 | # In[22]: 306 | 307 | 308 | # Right join 309 | 310 | print (pd.merge(left_df, right_df, how='outer', on='Sector')) 311 | 312 | 313 | # In[23]: 314 | 315 | 316 | # Outer Join 317 | 318 | print (pd.merge(left_df, right_df, how='outer', on='Sector')) 319 | 320 | 321 | # In[24]: 322 | 323 | 324 | # Inner Join 325 | 326 | print (pd.merge(left_df, right_df, on='Sector', how='inner')) 327 | 328 | 329 | # ### Concatenation 330 | 331 | # In[25]: 332 | 333 | 334 | print (pd.concat([left_df,right_df])) 335 | 336 | 337 | # In[26]: 338 | 339 | 340 | print (pd.concat([left_df, right_df],keys=['x','y'])) 341 | 342 | 343 | # In[27]: 344 | 345 | 346 | print (pd.concat([left_df,right_df],keys=['x','y'],ignore_index=True)) 347 | 348 | 349 | # In[28]: 350 | 351 | 352 | print (pd.concat([left_df,right_df],axis=1)) 353 | 354 | 355 | # In[29]: 356 | 357 | 358 | # Concatenating using append 359 | 360 | print (left_df.append(right_df)) 361 | 362 | 363 | # In[30]: 364 | 365 | 366 | print (left_df.append([right_df,left_df, right_df])) 367 | 368 | -------------------------------------------------------------------------------- /Section-8/Pandas - Descriptive Statistical Functions .py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ### Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # In this particular notebook, we will have a look at the different descripive statistical functions available in Python. 11 | # 12 | # # Notebook Contents 13 | # 14 | # ##### 1. DataFrame.count() 15 | # ##### 2. DataFrame.min() 16 | # ##### 3. DataFrame.max() 17 | # ##### 4. DataFrame.mean() 18 | # ##### 5. DataFrame.median 19 | # ##### 6. DataFrame.mode() 20 | # ##### 7. DataFrame.sum() 21 | # ##### 8. DataFrame.diff() 22 | # ##### 9. DataFrame.pct_change() 23 | # ##### 10. DataFrame.var() 24 | # ##### 11. DataFrame.std() 25 | # ##### 12. DataFrame.rolling(window=).mean() 26 | # ##### 13. DataFrame.expanding(min_periods=).mean() 27 | # ##### 14. DataFrame.cov() 28 | # ##### 15. DataFrame.cor() 29 | # ##### 16. DataFrame.kur() 30 | # ##### 17. DataFrame.skew() 31 | 32 | # In[619]: 33 | 34 | 35 | # Loading and viewing data 36 | 37 | # We have stored an 'infy.csv' file on our desktop 38 | 39 | import numpy as np 40 | import pandas as pd 41 | 42 | infy = pd.read_csv ('C:/Users/academy/Desktop/infy.csv') 43 | 44 | 45 | # Once you import or load your OHLC data in a data frame, it is a good habit to print the the 'head' and 'tail' of that data frame.
46 | #
47 | # This helps you to be sure, whether the 'dates' of your data frame, is correct or not. Further, the 'column names' are also displayed, which helps you in easy manipulation of your data frame. 48 | # 49 | 50 | # In[620]: 51 | 52 | 53 | infy.head() # Printing the first five rows of your data frame 54 | 55 | 56 | # In[621]: 57 | 58 | 59 | infy.tail() # Printing the last five rows of your data frame 60 | 61 | 62 | # ### DataFrame.count() 63 | # 64 | # This method returns the number of non-null observations over the requested observations. 65 | 66 | # In[622]: 67 | 68 | 69 | print (infy.count()) 70 | 71 | 72 | # If you want to know, the number of non-null observations in a particular column then below is how you do it. 73 | 74 | # In[623]: 75 | 76 | 77 | print (infy["Close Price"].count()) 78 | 79 | 80 | # ### DataFrame.min() 81 | # 82 | # This method returns the minimum value over the requested observations. 83 | 84 | # In[624]: 85 | 86 | 87 | print(infy["Close Price"].min()) 88 | 89 | 90 | # ### DataFrame.max() 91 | # 92 | # This method returns the maximum value over the requested observations. 93 | 94 | # In[625]: 95 | 96 | 97 | print(infy["Close Price"].max()) 98 | 99 | 100 | # ### DataFrame.mean() 101 | # 102 | # This method returns the mean of the requested observations. 103 | 104 | # In[626]: 105 | 106 | 107 | print(infy["Close Price"].mean()) 108 | 109 | 110 | # ### DataFrame.median() 111 | # 112 | # This method returns the median of the requested observations. 113 | 114 | # In[627]: 115 | 116 | 117 | print(infy["Close Price"].median()) 118 | 119 | 120 | # ### DataFrame.mode() 121 | # 122 | # This method returns the mode of the requested observations. 123 | 124 | # In[628]: 125 | 126 | 127 | print(infy["Close Price"].mode()) # The "Close Price" series of infosys stock is multi-modal 128 | 129 | 130 | # ### DataFrame.sum() 131 | # 132 | # This method returns the sum of all the values of the requested observations. 133 | 134 | # In[629]: 135 | 136 | 137 | print (infy["Total Traded Quantity"].sum())# If someone just wants to know the sheer amount of Infosys stocks traded over 2 years 138 | 139 | 140 | # ### DataFrame.diff() 141 | # 142 | # This method returns the 'difference' between the current observation and the previous observation. 143 | 144 | # In[630]: 145 | 146 | 147 | print (infy["Close Price"].diff()) 148 | 149 | 150 | # ### DataFrame.pct_change()] 151 | # 152 | # This method returns the percentage change of the current observation with the previous observation. 153 | 154 | # In[631]: 155 | 156 | 157 | print (infy["Close Price"].pct_change()) 158 | 159 | 160 | # Visualising this, give us a generic inference about the daily price fluctuation in the closing price of Infosys stock. 161 | 162 | # In[632]: 163 | 164 | 165 | import matplotlib.pyplot as plt 166 | get_ipython().magic(u'matplotlib inline') 167 | 168 | plt.figure(figsize = (20,10)) 169 | plt.ylabel('Daily returns of Infosys') 170 | infy["Close Price"].pct_change().plot() 171 | plt.show() 172 | 173 | 174 | # ### DataFrame.var() 175 | # 176 | # This method returns of the variance of the requested observations. 177 | 178 | # In[633]: 179 | 180 | 181 | print (infy["Close Price"].var()) 182 | 183 | 184 | # ### DataFrame.std() 185 | # 186 | # This method returns the standard deviation of the requested observations. 187 | 188 | # In[634]: 189 | 190 | 191 | print (infy["Close Price"].std()) 192 | 193 | 194 | # ### DataFrame.rolling(window=).mean() 195 | # 196 | # This method helps us to calculate the moving average of the observations. 197 | 198 | # In[635]: 199 | 200 | 201 | print (infy["Close Price"].rolling(window = 20).mean()) # The moving average window is 20 in this case 202 | 203 | 204 | # A moving average of the Close price with window = 20, smoothens the closing price data. You may have a look at it. We have plotted the daily Closing Price of Infosys and Moving Average (window = 20) of the daily Closing Price of Infosys against time. 205 | 206 | # In[636]: 207 | 208 | 209 | import matplotlib.pyplot as plt 210 | get_ipython().magic(u'matplotlib inline') 211 | 212 | plt.figure(figsize = (20,10)) 213 | plt.ylabel('Closing Price') 214 | 215 | infy["Close Price"].rolling(window = 20).mean().plot() 216 | infy["Close Price"].plot() 217 | plt.show() 218 | 219 | 220 | # ### DataFrame.expanding(min_periods=).mean() 221 | # 222 | # This method returns the 'expanding' mean of the requested observations. 223 | # 224 | # A common alternative to rolling mean is to use an expanding window mean, which returns the value of the mean with all the observations avaliable up to that point in time. 225 | 226 | # In[637]: 227 | 228 | 229 | print (infy["Close Price"].expanding(min_periods = 20).mean()) 230 | 231 | 232 | # You may visualise expanding mean with the below code. 233 | 234 | # In[638]: 235 | 236 | 237 | import matplotlib.pyplot as plt 238 | get_ipython().magic(u'matplotlib inline') 239 | 240 | plt.figure(figsize = (20,10)) 241 | plt.ylabel('Daily returns of Infosys') 242 | 243 | infy["Close Price"].expanding(min_periods = 20).mean().plot() 244 | infy["Close Price"].plot() 245 | plt.show() 246 | 247 | 248 | # Let us import another stock's data. We have saved the TCS (Tata Consultancy Services) data in our local machine as 'tcs.csv'. 249 | 250 | # In[639]: 251 | 252 | 253 | import numpy as np 254 | import pandas as pd 255 | 256 | tcs = pd.read_csv ('C:/Users/academy/Desktop/tcs.csv') 257 | 258 | tcs.head() 259 | 260 | 261 | # In[640]: 262 | 263 | 264 | tcs.tail() 265 | 266 | 267 | # In[641]: 268 | 269 | 270 | tcs["Close Price"].count() 271 | 272 | 273 | # ### DataFrame.cov() 274 | # 275 | # This method returns the covariance between the closing price of the Infosys stock with the closing price of the TCS stock. 276 | 277 | # In[642]: 278 | 279 | 280 | print (infy["Close Price"].cov(tcs["Close Price"])) 281 | 282 | 283 | # ### DataFrame.corr() 284 | # 285 | # This method returns the correlation between the closing price of the infosys stock with the closing price of the TCS stock. 286 | 287 | # In[643]: 288 | 289 | 290 | print (infy["Close Price"].corr(tcs["Close Price"])) 291 | 292 | 293 | # A correlation of 0.53 indicates a quite strong correlation between these two stocks. 294 | 295 | # ### DataFrame.kurt() 296 | # 297 | # This method returns unbiased kurtosis over the requested data set using the Fisher's definition of kurtosis (where kurtosis of normal distribution = 0). 298 | 299 | # In[644]: 300 | 301 | 302 | print (tcs["Close Price"].kurt()) 303 | 304 | 305 | # A positive kurtosis value indicates a leptokurtic distribution. 306 | 307 | # In[645]: 308 | 309 | 310 | print (infy["Close Price"].kurt()) 311 | 312 | 313 | # A negative kurtosis value indicates a platykurtic distribution. 314 | 315 | # ### DataFrame.skew() 316 | # 317 | # This method unbiased skew over the requested data set. 318 | 319 | # In[646]: 320 | 321 | 322 | print (tcs["Close Price"].skew()) 323 | 324 | 325 | # The distribution is positively skewed. 326 | 327 | # In[647]: 328 | 329 | 330 | print (infy["Close Price"].skew()) 331 | 332 | 333 | # The distribution is positively skewed. However, TCS' distribution is more positively skewed than Infosys' distribution. 334 | 335 | # Let us have visualise both the distributions and see whether the above said sentences are making sense or not. 336 | 337 | # In[648]: 338 | 339 | 340 | # Infosys Distribution 341 | 342 | import seaborn as sns 343 | 344 | sns.set(color_codes = True) 345 | 346 | sns.distplot(infy["Close Price"]); 347 | 348 | 349 | # In th above diagram, you can see why the infosys close price distribution is platykurtic and positively skewed. 350 | 351 | # In[649]: 352 | 353 | 354 | # TCS Distribution 355 | 356 | import seaborn as sns 357 | 358 | sns.set(color_codes = True) 359 | 360 | sns.distplot(tcs["Close Price"]); 361 | 362 | 363 | # In th above diagram, you can see why the TCS close price distribution is leptokurtic and positively skewed.
364 | #
365 | # A trained eye is statistics will also be able to see that the TCS stock closing prices are more positively skewed than the Infosys stock closing prices. 366 | # 367 | 368 | # ### In the upcoming iPython Notebook: 369 | # 370 | # We will continue understanding about Pandas: Grouping and Reshaping. 371 | -------------------------------------------------------------------------------- /Section-8/Pandas - Indexing, Reindexing & Missing values .py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # In this notebook, we will have a look at the different descripive statistical functions available in Python. 11 | # 12 | # ## Notebook Contents 13 | # 14 | # ##### 1. Indexing using .loc() 15 | # ##### 2. Indexing using .iloc() 16 | # ##### 3. Indexing using .ix() 17 | # ##### 4. Missing Values 18 | # ##### 5. Data Frame.isnull() 19 | # ##### 6. Data Frame.notnull() 20 | # ##### 7. DataFrame.fillna() 21 | # ##### 8. DataFrame.dropna() 22 | # ##### 9. Replacing values 23 | # ##### 10. Reindexing 24 | 25 | # # Loading and Viewing Data 26 | 27 | # Before we start, let us import OHLC time series data of Infosys stock for only 'two weeks'. With a smaller data frame, understanding 'Indexing' would be more intutive. 28 | 29 | # In[31]: 30 | 31 | 32 | # Loading and Viewing data 33 | 34 | import numpy as np 35 | import pandas as pd 36 | 37 | infy = pd.read_csv ('infy_twoweeks.csv') 38 | 39 | 40 | # In[32]: 41 | 42 | 43 | infy # This is the entire 'Infosys two weeks' time series data frame. 44 | 45 | 46 | # In[33]: 47 | 48 | 49 | infy.shape # This data frame has 10 rows and 12 columns 50 | 51 | 52 | # ## Indexing 53 | # 54 | # Indexing provides us with the axis labelling information in pandas. Further, it helps us to identify the exact position of data, which is important while analysing data.
55 | #
56 | # While sudying indexing, we will also focus on how to slice and dice the data according to our needs in a Data Frame. 57 | 58 | # ## Indexing using .loc() 59 | # 60 | # It is a 'label-location' based indexer for selection of data points. 61 | 62 | # In[34]: 63 | 64 | 65 | # Using .loc() 66 | 67 | #import the pandas library and aliasing as pd 68 | 69 | import pandas as pd 70 | import numpy as np 71 | 72 | #select all rows for a specific column 73 | 74 | print (infy.loc[:,'Close Price']) 75 | 76 | 77 | # In[35]: 78 | 79 | 80 | # Select all the rows of these specific columns 81 | 82 | print (infy.loc[:, ['Close Price','Open Price']]) 83 | 84 | 85 | # In[36]: 86 | 87 | 88 | # Select the first five rows of the specific columns 89 | 90 | # Remember that the '.loc()' method INCLUDES the rows and columns in its stop argument. 91 | 92 | # Observe that '0:4' will include 5 rows from index 0 to 4 93 | 94 | # The loc indexer takes the row arguments first and the column arguments second. 95 | 96 | print (infy.loc[:4,['Close Price','Open Price']]) 97 | 98 | 99 | # In[37]: 100 | 101 | 102 | # Select the rows 2 to 7 of all the columns from the data frame 103 | 104 | print (infy.loc[2:7]) 105 | 106 | 107 | # In[38]: 108 | 109 | 110 | # Select the rows and columns specified 111 | 112 | print (infy.loc[[0,1,2,3,4,5],['Open Price', 'High Price', 'Low Price', 'Close Price']]) 113 | 114 | 115 | # In[39]: 116 | 117 | 118 | # To check if the fifth row's values are greater than 1130. 119 | 120 | print (infy.loc[4]>1130) 121 | 122 | 123 | # ## Indexing using .iloc() 124 | # 125 | # Another way to perform indexing is using the 'iloc()' method. 126 | 127 | # In[40]: 128 | 129 | 130 | # Using .iloc() 131 | 132 | # Select the first four rows of all the columns 133 | 134 | # Remember that the '.loc()' method DOES NOT include the rows and columns in its stop argument 135 | 136 | # Observe that '0:4' will include 4 rows from index 0 to 3 137 | 138 | print (infy.iloc[:4]) 139 | 140 | 141 | # In[41]: 142 | 143 | 144 | # Let us play more with the indexes of both rows and columns 145 | 146 | # Select the rows from index 1 to index 4 (4 rows in total) and Columns with index from 2 to 3 (2 columns) 147 | 148 | # .iloc() is similar to numpy array indexing 149 | 150 | # iloc is extremely useful when your data is not labelled and you need to refer to columns using their integer location instead 151 | 152 | print (infy.iloc[1:5, 2:4]) 153 | 154 | 155 | # In[42]: 156 | 157 | 158 | # Selecting the exact requested columns 159 | 160 | print (infy.iloc[[1, 3, 5,7], [1, 3, 5, 7, 9]]) 161 | 162 | 163 | # In[43]: 164 | 165 | 166 | # Selecting the first two rows and all the columns 167 | 168 | print (infy.iloc[1:3, :]) 169 | 170 | 171 | # In[44]: 172 | 173 | 174 | print (infy.iloc[:,1:3]) 175 | 176 | 177 | # ## Indexing using .ix() 178 | # 179 | # Another way to perform indexing is using the 'ix()' method. 180 | # 181 | # ##### ix indexer has been depricated in the latest version of pandas, but we having discussed it just for your information 182 | # 183 | 184 | # In[45]: 185 | 186 | 187 | # Using .ix() 188 | 189 | # Remember that the '.ix()' method INCLUDES the rows and columns in its stop argument. 190 | 191 | # Observe that '0:4' will include 5 rows from index 0 to 4 192 | 193 | # We are selecting the first five rows and all the columns of our data frame 194 | 195 | print (infy.ix[:4]) 196 | 197 | 198 | # In[46]: 199 | 200 | 201 | # Select rows from index 2 to index 5, only of the 'Close Price' Column 202 | 203 | print (infy.ix[2:5,'Close Price']) 204 | 205 | 206 | # In[47]: 207 | 208 | 209 | # You will be able to understand this by now! 210 | 211 | print (infy.ix[2:5, 4:9]) 212 | 213 | 214 | # In[48]: 215 | 216 | 217 | # Just some revision for choosing columns in a data frame, since it is important 218 | 219 | # Choosing a specific column from a data frame 220 | 221 | print (infy['Close Price']) 222 | 223 | 224 | # In[49]: 225 | 226 | 227 | # Choosing multiple columns from a data Frame 228 | 229 | print (infy[['Open Price', 'High Price', 'Low Price', 'Close Price']]) 230 | 231 | 232 | # ## Missing Values 233 | # 234 | # Missing values are values that are absent from the data frame. Usually, all the data frames that you would work on, would be large and there will be a case of 'missing values' in most of them.
235 | #
236 | # Hence, it becomes important for you to learn how to handle these missing values. 237 | 238 | # In[51]: 239 | 240 | 241 | # We have deliberately created 'missing values' in the same 'Infosys two weeks' data which you have used above. 242 | 243 | # Have a look at the entire data frame 244 | 245 | import numpy as np 246 | import pandas as pd 247 | 248 | infy = pd.read_csv ('infy_twoweeks_nan.csv') 249 | 250 | infy 251 | 252 | 253 | # ## DataFrame.isnull() 254 | # 255 | # This method returns a Boolean result.
256 | #
257 | # It will return 'True' if the data point has a 'NaN' (Not a Number) value. Missing data is represented by a NaN value. 258 | 259 | # In[52]: 260 | 261 | 262 | # Understanding the 'NaN' values of the 'Close Price' column in the infy data frame 263 | 264 | print (infy['Close Price'].isnull()) 265 | 266 | 267 | # In[53]: 268 | 269 | 270 | # Understanding the 'NaN' values of the entire data frame 271 | 272 | print (infy.isnull()) 273 | 274 | 275 | # ## DataFrame.notnull() 276 | # 277 | # This method returns a Boolean result.
278 | #
279 | # It will return 'Flase' if the data point is not a 'NaN' (Not a Number) value. Missing data is represented by a NaN value. 280 | 281 | # In[54]: 282 | 283 | 284 | print (infy['Close Price'].notnull()) 285 | 286 | 287 | # ## DataFrame.fillna() 288 | # 289 | # The .fillna() method will fill all the 'NaN' Values of the entire data frame or of the requested columns with a scalar value of your choice. 290 | 291 | # In[55]: 292 | 293 | 294 | # Replace NaN with a Scalar Value of 1000 295 | 296 | print (infy.fillna(1000)) 297 | 298 | 299 | # In[56]: 300 | 301 | 302 | # This will fill the 'Close Price' column with the scalar value of 5 303 | 304 | print (infy['Close Price'].fillna(5)) 305 | 306 | 307 | # In[57]: 308 | 309 | 310 | # If we want to do 'fillna()' using the 'backfill' method, then backfill will the take the value from the next row 311 | # and fill the NaN value with that same value 312 | 313 | print (infy['Close Price']) 314 | 315 | print (infy['Close Price'].fillna(method='backfill')) 316 | 317 | 318 | # In[58]: 319 | 320 | 321 | # It is even possible to do it for the entire data frame with the 'backfill' values 322 | 323 | print (infy.fillna(method='backfill')) 324 | 325 | 326 | # In[59]: 327 | 328 | 329 | # 'bfill' does the same thing as 'backfill' 330 | 331 | print (infy['Close Price']) 332 | 333 | print (infy['Close Price'].fillna(method='bfill')) 334 | 335 | 336 | # In[60]: 337 | 338 | 339 | # If we want to do 'fillna()' using the 'ffill' method, then ffill will the take the value from the previous row.. 340 | # ..and fill the NaN value with that same value 341 | 342 | print (infy['Close Price']) 343 | 344 | print (infy['Close Price'].fillna(method='ffill')) 345 | 346 | 347 | # In[61]: 348 | 349 | 350 | # 'pad' does the same thing as 'ffill' 351 | 352 | print (infy['Close Price']) 353 | 354 | print (infy['Close Price'].fillna(method='pad')) 355 | 356 | 357 | # ## DataFrame.dropna() 358 | # 359 | # This method will drop the entire 'row' or 'column' which has even a single 'NaN' value present, as per the request. 360 | 361 | # In[62]: 362 | 363 | 364 | # By default, dropna() will exclude or drop all the rows which has even one NaN value in it 365 | 366 | print (infy.dropna()) 367 | 368 | 369 | # In[63]: 370 | 371 | 372 | # If we specify the axis = 1, it will exclude or drop all the columns which has even one NaN value in it 373 | 374 | print (infy.dropna(axis=1)) 375 | 376 | 377 | # ## Replacing values 378 | # 379 | # Replacing helps us to select any data point in the entire data frame and replace it with the value of our choice. 380 | 381 | # In[64]: 382 | 383 | 384 | # Replace Missing (or) Generic Values 385 | 386 | import pandas as pd 387 | import numpy as np 388 | 389 | # Let us do this a bit differently. We will create a Data Frame using the 'pd.DataFrame' constructor 390 | 391 | df = pd.DataFrame({'one':[10,20,30,40,50,2000],'two':[1000,0,30,40,50,60]}) 392 | 393 | print (df) 394 | 395 | 396 | # In[65]: 397 | 398 | 399 | # .replace() will first find the value which you want to replace and replace it the value you have given. 400 | 401 | # eg: In the below '1000' is the value it will find and replace it with '10' 402 | 403 | print (df.replace({1000:10,2000:60})) 404 | 405 | 406 | # In[66]: 407 | 408 | 409 | print (infy['Close Price']) 410 | 411 | 412 | # In[67]: 413 | 414 | 415 | # This should be self explanatory 416 | 417 | print (infy['Close Price'].replace({1147.55:3000})) 418 | 419 | 420 | # In[68]: 421 | 422 | 423 | print (infy['Close Price'].replace({NaN:1000000})) # We cannot replace NaN values, since they are not defined. 424 | 425 | 426 | # ## Reindexing 427 | # 428 | # Reindexing changes the row labels and column labels of a DataFrame.
429 | #
430 | # To reindex means to conform the data to match a given set of labels along a particular axis. 431 | 432 | # In[69]: 433 | 434 | 435 | import pandas as pd 436 | import numpy as np 437 | 438 | print (infy) 439 | 440 | 441 | # In[70]: 442 | 443 | 444 | # Here we have changed the shape of data frame by using reindexing 445 | 446 | infy_reindexed = infy.reindex(index = [0,2,4,6,8], columns = ['Open Price', 'High Price', 'Low Price','Close Price']) 447 | 448 | print (infy_reindexed) 449 | 450 | -------------------------------------------------------------------------------- /Section-7/Numpy Introduction to arrays.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ### Notebook Instructions 5 | # 6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]: 7 | # 8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area. 9 | 10 | # # NumPy 11 | # 12 | # NumPy is an acronym for "Numeric Python" or "Numerical Python". 13 | # 14 | # NumPy is the fundamental package for scientific computing with Python. It is an open source extension module for Python. 15 | # 16 | # 1. A powerful N-dimensional array object 17 | # 2. Sophisticated (broadcasting) functions 18 | # 3. Useful linear algebra, Fourier transform, and random number capabilities 19 | # 4. Besides its obvious scientific uses, NumPy can also be used as an efficient multi-dimensional container of generic data 20 | # 5. Arbitrary data-types can be defined. This allows NumPy to seamlessly and speedily integrate with a wide variety of database 21 | # 22 | # Source : numpy.org 23 | 24 | # # Notebook Contents 25 | # 26 | # ##### 1. A simple numpy array example 27 | # ##### 2. Functions to create an array 28 | # ##### 3. Dimensionality of an array 29 | # ##### 4. Shape of an array 30 | # ##### 5. Just for fun 31 | 32 | # ## A simple numpy array example 33 | # 34 | # We will create two arrays SV and S_V 35 | # - Using Lists 36 | # - Using Tuples 37 | 38 | # In[4]: 39 | 40 | 41 | # We will first import the 'numpy' module 42 | 43 | import numpy as np 44 | 45 | 46 | # In[5]: 47 | 48 | 49 | stock_values = [20.3, 25.3, 22.7, 19.0, 18.5, 21.2, 24.5, 26.6, 23.2, 21.2] # This is a list 50 | 51 | 52 | # In[6]: 53 | 54 | 55 | # Converting list into array 56 | 57 | SV = np.array(stock_values) 58 | 59 | print (SV) 60 | 61 | 62 | # In[7]: 63 | 64 | 65 | type (SV) # Understanding the data type of 'SV' 66 | 67 | 68 | # In[8]: 69 | 70 | 71 | stockvalues = (20.3, 25.3, 22.7, 19.0, 18.5, 21.2, 24.5, 26.6, 23.2, 21.2) # This is a tuple 72 | 73 | # Converting tuple into array 74 | 75 | S_V = np.array(stockvalues) 76 | 77 | print (S_V) 78 | 79 | 80 | # In[9]: 81 | 82 | 83 | type(S_V) # Understanding the data type of 'S_V' 84 | 85 | 86 | # ## Functions to create arrays quickly 87 | # 88 | # The above discussed methods to create arrays require us to manually input the data points. To automatically create data points for an array we use these functions: 89 | # - **arange** 90 | # - **linspace** 91 | # 92 | # Both these functions create data points lying between two end points, starting and ending, so that they are evenly distributed. For example, we can create 50 data points lying between 1 and 10. 93 | # 94 | 95 | # ### arange 96 | # 97 | # Numpy.arange returns evenly spaced arrays by using a 'given' step or interval by the user. 98 | 99 | # Syntax: 100 | # #### arange ([start], [stop], [step], [dtype=None]) 101 | # 102 | # The 'start and the 'stop' determines the range of the array. 'Step' determines the spacing between two adjacent values. The datatype of the output array can be determined by setting the parameter 'dtype'. 103 | 104 | # In[10]: 105 | 106 | 107 | # If the start parameter is not given, it will be set to 0 108 | 109 | # '10' is the stop parameter 110 | 111 | # The default interval for a step is '1' 112 | 113 | # If the 'dtype' is not given, then it will be automatically inferred from the other input arguments 114 | 115 | a = np.arange (10) # Syntax a = np.arange (0,10,1,None) 116 | print (a) 117 | 118 | 119 | # In[11]: 120 | 121 | 122 | # Here the range is '1 to 15'. It will include 1 and exclude 15 123 | 124 | b = np.arange (1,15) 125 | print (b) 126 | 127 | 128 | # In[12]: 129 | 130 | 131 | # We have changed the 'step' or spacing between two adjacent values, from a default 1, to a user given value of 2 132 | 133 | c = np.arange (0,21,2) 134 | print (c) 135 | 136 | 137 | # In[13]: 138 | 139 | 140 | # Even though our input arguments are of the datatype 'float', it will return an 'int' array 141 | # Since we have set the 'dtype' parameter as 'int' 142 | 143 | d = np.arange (1.3,23.3,2.1,int) 144 | print (d) 145 | 146 | 147 | # #### Try on your own 148 | 149 | # In[14]: 150 | 151 | 152 | # You may now be able to understand this example, all by yourself 153 | 154 | e = np.arange (1.4, 23.6, 1, float) 155 | print (e) 156 | 157 | 158 | # ### linspace 159 | 160 | # Numpy.linspace also returns an evenly spaced array but needs the 'number of array elements' as an input from the user and creates the distance automatically. 161 | 162 | # Syntax: 163 | # #### linspace(start, stop, num=50, endpoint=True, retstep=False) 164 | # 165 | # The 'start and the 'stop' determines the range of the array. 'num' determines the number of elements in the array. If the 'endpoint' is True, it will include the stop value and if it is false, the array will exclude the stop value. 166 | # 167 | # If the optional parameter 'retstep' is set, the function will return the value of the spacing between adjacent values. 168 | 169 | # In[15]: 170 | 171 | 172 | # By default, since the 'num' is not given, it will divide the range into 50 individual array elements 173 | 174 | # By default, it even includes the 'endpoint' of the range, since it is set to True by default 175 | 176 | a = np.linspace (1,10) 177 | print (a) 178 | 179 | 180 | # In[16]: 181 | 182 | 183 | # This time around, we have specified that we want the range of 1 - 10 to be divided into 8 individual array elements 184 | 185 | b = np.linspace (1,10,8) 186 | print (b) 187 | 188 | 189 | # In[17]: 190 | 191 | 192 | # In this line, we have specified not to include the end point of the range 193 | 194 | c = np.linspace (1,10,8,False) 195 | print (c) 196 | 197 | 198 | # In[18]: 199 | 200 | 201 | # In this line, we have specified 'retstep' as true, the function will return the value of the spacing between adjacent values 202 | 203 | d = np.linspace (1,10,8,True,True) 204 | print (d) 205 | 206 | 207 | # #### Try on your own 208 | 209 | # In[19]: 210 | 211 | 212 | # This line should be self-explanatory 213 | 214 | e = np.linspace(1,10,10,True,True) 215 | print (e) 216 | 217 | 218 | # ## Dimensionality of Arrays 219 | 220 | # ### Zero Dimensional Arrays or Scalars 221 | 222 | # What we encountered in the above examples are all 'one dimensional arrays', also known as 'vectors'. "Scalars' are zero-dimensional arrays, with a maximum of one element in it. 223 | 224 | # In[20]: 225 | 226 | 227 | # Creating a 'scalar' 228 | 229 | a = np.array (50) #Should have only 1 element, at the maximum! 230 | 231 | print ("a:", a) 232 | 233 | 234 | # In[21]: 235 | 236 | 237 | # To print the dimension of any array, we use 'np.dim' method 238 | 239 | print ("The dimension of array 'a' is", np.ndim (a)) 240 | 241 | 242 | # In[22]: 243 | 244 | 245 | # To know the datatype of the array 246 | 247 | print ("The datatype of array 'a' is", a.dtype) 248 | 249 | 250 | # In[23]: 251 | 252 | 253 | # Combining it all together 254 | 255 | scalar_array = np.array("one_element") 256 | print (scalar_array, np.ndim (scalar_array), scalar_array.dtype) 257 | 258 | 259 | # ## One Dimensional Arrays 260 | # 261 | # One dimensional arrays, are arrays with minimum of two elements in it in a single row. 262 | 263 | # In[43]: 264 | 265 | 266 | one_d_array = np.array(["one_element", "second_element"]) 267 | 268 | print (one_d_array, np.ndim(one_d_array), one_d_array.dtype) 269 | 270 | 271 | # In[44]: 272 | 273 | 274 | # We have already worked with one-dimensional arrays. Let us revise what we did so far! 275 | 276 | a = np.array([1, 1, 2, 3, 5, 8, 13, 21]) # Fibonnacci series 277 | b = np.array([4.4, 6.6, 8.8, 10.1, 12.12]) 278 | 279 | print("a: ", a) 280 | print("b: ", b) 281 | 282 | print("Type of 'a': ", a.dtype) 283 | print("Type of 'b': ", b.dtype) 284 | 285 | print("Dimension of 'a':", np.ndim(a)) 286 | print("Dimension of 'b':", np.ndim(b)) 287 | 288 | 289 | # ## Two Dimensional 290 | # 291 | # Two-dimensional arrays have more than one row and more than one column. 292 | 293 | # In[45]: 294 | 295 | 296 | # The elements of the 2D arrays are stored as 'rows' and 'columns' 297 | 298 | two_d_array = np.array([ ["row1col1", "row1col2", "row1col3"], 299 | ["row2col1", "row2col2", "row2col3"]]) 300 | 301 | print(two_d_array) 302 | 303 | print("Dimension of 'two_d_array' :", np.ndim (two_d_array)) 304 | 305 | 306 | # In[46]: 307 | 308 | 309 | # Another example of a data table! 310 | # You can see how working with numpy arrays will help us working with dataframes further on! 311 | 312 | studentdata = np.array([ ["Name", "Year", "Marks"], 313 | ["Bela", 2014, 78.2], 314 | ["Joe", 1987, 59.1], 315 | ["Sugar", 1990, 70]]) 316 | 317 | print(studentdata) 318 | 319 | print("Dimension of 'studentdata' :", np.ndim (studentdata)) 320 | 321 | 322 | # Even though Year and Marks are non-strings, they are by default ... so I can't perform any operations on these values. 323 | # 324 | # That is where dataframe, which we will study in next section, becomes powerful 2-d data structures to be used. 325 | # 326 | # For example: 327 | 328 | # In[41]: 329 | 330 | 331 | # Example when we save this data as a dataframe and not as a numpy array. 332 | 333 | import numpy as np 334 | import pandas as pd 335 | 336 | studentdata1 = { 337 | "Name": ["Bela", "Joe", "Sugar"], 338 | "Year": [2014, 1987, 1990], 339 | "Marks": [78.2, 59.1, 70] 340 | } 341 | 342 | studentdata1_df = pd.DataFrame (studentdata1) 343 | print (studentdata1_df) 344 | print(np.mean(studentdata1_df.Marks)) 345 | 346 | # Now we are able to find average of Marks of these three students. 347 | 348 | 349 | # In[29]: 350 | 351 | 352 | # The elements of the 2D arrays are stored as 'rows' and 'columns' 353 | 354 | a = np.array([ [1.8, 2.4, 5.3, 8.2], 355 | [7.8, 5.1, 9.2, 17.13], 356 | [6.1, -2.13, -6.3, -9.1]]) 357 | print(a) 358 | print("Dimension of 'a' :", np.ndim (a)) 359 | 360 | # In this array we have 3 rows and 4 columns 361 | 362 | 363 | # In[30]: 364 | 365 | 366 | # A 3D array is an 'array of arrays'. Have a quick look at it 367 | 368 | b = np.array([ [[111, 222], [333, 444]], 369 | [[121, 212], [221, 222]], 370 | [[555, 560], [565, 570]] ]) 371 | 372 | print(b) 373 | print("Dimension of 'b' :", np.ndim (b)) 374 | 375 | # In this array, there are three, 2-D arrays 376 | 377 | 378 | # ## Shape of an array 379 | # 380 | # **What it is:** Ths shape of an array returns the number of rows (axis = 0) and the number of columns (axis = 1) 381 | # 382 | # **Why is it important to understand:** It helps you to understand the number of rows and columns in an array 383 | # 384 | # **How is it different from Dimensions:** It is not that different from dimensions, just that functions called are different. 385 | 386 | # In[31]: 387 | 388 | 389 | a = np.array([ [11, 22, 33], 390 | [12, 24, 36], 391 | [13, 26, 39], 392 | [14, 28, 42], 393 | [15, 30, 45], 394 | [16, 32, 48]]) 395 | 396 | print (a) 397 | 398 | 399 | # In[32]: 400 | 401 | 402 | print(a.shape) 403 | 404 | 405 | # We can even change the shape of the array. 406 | 407 | # In[33]: 408 | 409 | 410 | a.shape = (9,2) 411 | print (a) 412 | 413 | 414 | # You might have guessed by now that the new shape must correspond to the number of elements of the array, i.e. the total size of the new array must be the same as the old one. We will raise an exception, if this is not the case. 415 | 416 | # In[37]: 417 | 418 | 419 | # Shape of a 1 dimension array or scalar 420 | 421 | a = np.array(165416113) 422 | print(np.shape(a)) 423 | 424 | 425 | # ### In the upcoming iPython Notebook: 426 | # 427 | # We will continue understanding arrays and learning about Array indexing, Array Slicing and Arrays of Zeros and Ones, but before that let us solve some Quiz quesions and Exercises. 428 | --------------------------------------------------------------------------------