├── README.md
├── Section-5
├── Importing data from local machine.py
├── 3 D Plotting.py
├── Importing data from web sources.py
├── Candle Sticks .py
└── 2 D plotting.py
├── Section-2
├── Modules.py
└── My First Python code.py
├── Section-4
├── Dictionary.py
├── Lists.py
├── Stacks, Queues, Graphs, Trees.py
└── Tuples and Sets.py
├── Section-6
├── Lambda.py
└── Function.py
├── Section-3
└── Expressions .py
├── Section-9
├── Conditional Statement .py
└── Loops.py
├── Section-8
├── Pandas - Series .py
├── Pandas - Dataframe & Basic Functionality.py
├── Pandas- Grouping and Reshaping.py
├── Pandas - Descriptive Statistical Functions .py
└── Pandas - Indexing, Reindexing & Missing values .py
├── Section-7
├── Vectorization and Broadcasting in Arrays.py
├── Indexing and Slicing.py
└── Numpy Introduction to arrays.py
└── Pandas- Grouping and Reshaping.py
/README.md:
--------------------------------------------------------------------------------
1 | # python-for-trading-basic
--------------------------------------------------------------------------------
/Section-5/Importing data from local machine.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # # Notebook Instructions
5 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter. While a cell is running, a [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook [8].
6 | #
7 | # Enter edit mode by pressing `Enter` or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
8 |
9 | # ## Pandas.read_csv
10 | #
11 | # Pandas.read_csv() function helps you to read Comma Seperated Files using Python and converts it into a dataframe.
12 |
13 | # In[1]:
14 |
15 |
16 | # You have to download the 'Infosys' company's CSV file from www.nseindia.com.
17 |
18 | import numpy as np
19 | import pandas as pd
20 |
21 | infy = pd.read_csv ('infy_data.csv')
22 |
23 | # This code will work only if you have stored the 'infy.csv' in the same folder where this notebook is saved.
24 |
25 | # If you store it at some other location, then the line of code would have to specify the location.
26 |
27 | # infy = pd.read_csv ('C:/Users/academy/Desktop/infy_data.csv')
28 |
29 |
30 | # In[2]:
31 |
32 |
33 | infy # this is our entire "Infosys" stock data frame
34 |
35 |
36 | # In[3]:
37 |
38 |
39 | infy.head () # You will see the top 5 rows
40 |
41 |
42 | # In[4]:
43 |
44 |
45 | infy.tail () # You will see the bottom 5 rows
46 |
47 |
48 | # The reason why we are studying this seperately is because it is important to understand this function. You will be using this function the most while making financial trading strategies.
49 | #
50 | # Another reason is, once you download a CSV file, it becomes a stable data source. This is unlike the one you fetch from web data sources.
51 | #
52 | # We will see more of this in the Pandas section of our course.
53 |
54 | # ### In the upcoming iPython notebook:
55 | #
56 | # We will learn about 2D plotting of financial market data, but before that let us solve an exercise on this.
57 | #
58 | # Happy Learning!
59 |
--------------------------------------------------------------------------------
/Section-5/3 D Plotting.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # ## 3D plotting
11 | #
12 | # (Optional Read)
13 | #
14 | # We are going to plot a 3 dimensional figure using 3 datasets.
15 | #
16 | # Not many financial data visualisations benefit from 3-D plotting but one of the applications is the volatility surfaces showing implied volatilities simultaneously.
17 | #
18 | # You may just go through the codes. It is not a problem if you do not understand them and the the motive of this notebook is more for representation purposes and show you the power of data visualisation in Python by plotting even 3D plots.
19 |
20 | # In[99]:
21 |
22 |
23 | # Random data creation using the numpy library
24 |
25 | import numpy as np
26 |
27 | strike_price = np.linspace (50,150,25) # Strike values between 100 to 150
28 | time = np.linspace (0.5, 2, 25) # Time to maturity between 0.5 to 2.5 years
29 |
30 | # The numpy's meshgrid() function helps us to create a rectangular grid out of an array of x values and y values
31 |
32 | strike_price, time = np.meshgrid (strike_price, time)
33 |
34 |
35 | # In[100]:
36 |
37 |
38 | strike_price, time [:] # Printing the mesh grid array
39 |
40 |
41 | # In[101]:
42 |
43 |
44 | # generate fake implied volatilities
45 |
46 | implied_volatility = (strike_price - 100) ** 2/ (100 * strike_price)/ time
47 |
48 |
49 | # In[102]:
50 |
51 |
52 | # Plotting a 3D figure
53 |
54 | import matplotlib.pyplot as plt
55 |
56 | # Importing the required packages for 3D plotting
57 | from mpl_toolkits.mplot3d import Axes3D
58 |
59 | fig = plt.figure (figsize = (9,6))
60 |
61 | # If 'fig' is a variable holding a figure, fig.gca() returns the axes associated with the figure.
62 | # With this 3 dimensional axes is enabled
63 | axis = fig.gca (projection = '3d')
64 |
65 | # To plot the surface and passing the required arguments
66 | surface = axis.plot_surface (strike_price, time, implied_volatility, rstride = 1, cstride = 1, cmap = plt.cm.coolwarm, linewidth = 0.5, antialiased = False)
67 |
68 | axis.set_xlabel ('strike')
69 | axis.set_ylabel ('time-to-maturity')
70 | axis.set_zlabel ('implied volatility')
71 |
72 | # Adding a colorbar which maps values to colors
73 | fig.colorbar (surface, shrink = 0.5, aspect=5)
74 |
75 | plt.show()
76 |
77 |
78 | # ### In the upcoming iPython notebook:
79 | #
80 | # We will learn about Candlesticks in Python. Even that is an optional read.
81 | #
82 | # Happy Learning!
83 |
--------------------------------------------------------------------------------
/Section-2/Modules.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # # Modules
11 | #
12 | # Any file in python which has a .py extension can be a module. A module can consist of arbitrary objects, classes, attributes or functions which can be imported by users.
13 |
14 | # ### Importing Modules
15 | #
16 | # There are different ways to import modules. Let us begin by importing the 'math' module.
17 |
18 | # In[67]:
19 |
20 |
21 | import math
22 |
23 |
24 | # Math module which consists of mathematical constants and functions like math.pi, math.sine, math.cosine etc.
25 |
26 | # In[68]:
27 |
28 |
29 | math.pi # The value of pi
30 |
31 |
32 | # In[69]:
33 |
34 |
35 | math.cos (1) # The cosine value of 1
36 |
37 |
38 | # In[70]:
39 |
40 |
41 | math.sin (1) # The sine value of 1
42 |
43 |
44 | # ### The dir () function
45 | #
46 | # The built-in function called dir() is used to find out what functions a module defines. It returns a sorted list of strings.
47 |
48 | # In[71]:
49 |
50 |
51 | dir (math)
52 |
53 |
54 | # If you require only certain objects from the module then:
55 |
56 | # In[72]:
57 |
58 |
59 | from scipy import mean # We will import only the 'mean' object from the 'scipy' package
60 |
61 |
62 | # In[73]:
63 |
64 |
65 | mean ([1,2,3,4,5]) # This will give arithmetic mean of the numbers
66 |
67 |
68 | # But if we want to find out, the harmonic mean. The following cells is the piece of code.
69 |
70 | # In[74]:
71 |
72 |
73 | from scipy import stats
74 |
75 |
76 | # In[75]:
77 |
78 |
79 | stats.hmean ([1,2,3,4,5])
80 |
81 |
82 | # If at all you require to import all the objects from the module, you may use *
83 |
84 | # In[76]:
85 |
86 |
87 | from numpy import *
88 |
89 |
90 | # In[77]:
91 |
92 |
93 | sin (1)
94 |
95 |
96 | # In[78]:
97 |
98 |
99 | diag([1,5,9,6])
100 |
101 |
102 | # One can even import a module/package as an alias and prefix it before using the objects.
103 |
104 | # In[79]:
105 |
106 |
107 | import numpy as np
108 |
109 |
110 | # In[80]:
111 |
112 |
113 | dir (np)
114 |
115 |
116 | # In[81]:
117 |
118 |
119 | np.median([4,5,6,3,4,5,9,8,7,12]) # Will return the median of the number set
120 |
121 |
122 | # In[82]:
123 |
124 |
125 | np.min([4,5,6,3,4,5,9,8,7,12]) # Will return the minimum number of the number set
126 |
127 |
128 | # In[83]:
129 |
130 |
131 | np.max([4,5,6,3,4,5,9,8,7,12]) # Will return the maximum number of the number set
132 |
133 |
134 | # ### Stay tuned for more on python.
135 |
--------------------------------------------------------------------------------
/Section-4/Dictionary.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # # Dictionaries
11 | #
12 | # A dictionary is generally used for mapping. Similarly, a dictionary in Python also has mapping between its “Key” and “Value” pairs. You can access the dictionary using ‘keys’ to get the information or ‘value’ stored within these ‘keys’.
13 | #
14 | #
15 | # ## Creating & Printing Dictionaries
16 | #
17 | # Dictionaries are enclosed in brace brackets and the key:value pair should be separated by a comma.
18 |
19 | # In[165]:
20 |
21 |
22 | new_dict = { } # Empty Dictionary
23 |
24 | type (new_dict)
25 |
26 |
27 | # In[166]:
28 |
29 |
30 | # Creating a new dictionary
31 |
32 | new_dict = {'Jack': 2563, 'Rose': 8965, 'Hockley': 7412, 'Fabrizo':9632, 'Molly Brown': 4563}
33 |
34 | type (new_dict)
35 |
36 |
37 | # In[167]:
38 |
39 |
40 | # Printing the dictionary
41 |
42 | print (new_dict)
43 |
44 |
45 | # In[168]:
46 |
47 |
48 | # Printing the value for a particular key
49 |
50 | new_dict ['Jack']
51 |
52 |
53 | # In[169]:
54 |
55 |
56 | # Printing multiple values of various keys
57 |
58 | new_dict ['Rose'], new_dict ['Hockley']
59 |
60 |
61 | # ## Dictionary Manipulations
62 | #
63 | # Let us have a look at the few functions for accessing or manipulating dictionaries.
64 |
65 | # len (x_dict)
66 | # To know the number of key:value pairs in the dictionary.
67 |
68 | # In[170]:
69 |
70 |
71 | print (new_dict)
72 |
73 |
74 | # In[171]:
75 |
76 |
77 | len (new_dict)
78 |
79 |
80 | # x_dict.keys ( )
81 | # Returns all the 'keys' of dictionaries
82 |
83 | # In[172]:
84 |
85 |
86 | new_dict.keys ()
87 |
88 |
89 | # x_dict.values ( )
90 | # Returns all the 'values' of dictionaries
91 |
92 | # In[173]:
93 |
94 |
95 | new_dict.values ()
96 |
97 |
98 | # The del statement
99 | # It is used for deleting any keys from the dictionary.
100 |
101 | # In[174]:
102 |
103 |
104 | del new_dict ['Hockley']
105 |
106 | print (new_dict)
107 |
108 |
109 | # x_dict.pop (key)
110 | # It will pop a 'value' of the reqired key.
111 | #
112 |
113 | # In[175]:
114 |
115 |
116 | new_dict.pop ('Fabrizo')
117 |
118 |
119 | # In[176]:
120 |
121 |
122 | print (new_dict) # Our latest dictionary
123 |
124 |
125 | # sorted (x_dict)
126 | #
127 | # The dictionary will get sorted by its values.
128 |
129 | # In[177]:
130 |
131 |
132 | print (new_dict)
133 |
134 |
135 | # In[178]:
136 |
137 |
138 | sorted (new_dict) # keys sorted by values
139 |
140 |
141 | # x_dict.clear ()
142 | # Clears all the content of the dictionary
143 |
144 | # In[179]:
145 |
146 |
147 | new_dict.clear ()
148 |
149 | print (new_dict)
150 |
151 |
152 | # ### In the upcoming iPython Notebook
153 | #
154 | # We will see, how 'Tuples' and 'Sets' are used.
155 |
--------------------------------------------------------------------------------
/Section-6/Lambda.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ### Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # ## What is lambda?
11 | #
12 | # The lambda operator is a way to create small anonymous functions i.e. functions without a name.
13 | #
14 | # They are temporary functions i.e. they are needed only where they have been created.
15 | #
16 | # The lambda feature was added in Python due to a high demand from the Lisp programmers (Lisp is a programming language).
17 |
18 | # ## A Simple Lambda Example
19 | #
20 | # The general syntax for Lambda is as follows:
21 | #
lambda argument_list: expression
22 | # Let us have a look at some of the examples.
23 |
24 | # In[101]:
25 |
26 |
27 | sum = lambda x,y : x + y
28 |
29 |
30 | # In[102]:
31 |
32 |
33 | sum (2,3)
34 |
35 |
36 | # It is similar to defining a function where x and y are the parameters and x + y is the operation performed in the block of codes.
37 | #
38 | # You can even observe, that the usage lambda is same as a function call.
39 |
40 | # In[103]:
41 |
42 |
43 | # Another example
44 |
45 | product = lambda x,y : x * y
46 |
47 |
48 | # In[104]:
49 |
50 |
51 | product (2,3)
52 |
53 |
54 | # In[105]:
55 |
56 |
57 | # One more example
58 |
59 | my_operation = lambda x,y,z : x + y - z
60 |
61 |
62 | # In[106]:
63 |
64 |
65 | my_operation (10,20,30)
66 |
67 |
68 | # ### map ()
69 | #
70 | # One of the advantages of using a lambda is the map() function.
71 | # map (lambda, sequence of lists)
72 | # map() applies the lambda function to all the elements within the sequence. These elements are generally lists.
73 |
74 | # In[107]:
75 |
76 |
77 | # The lists have to be of same length to apply the map () function in lambda.
78 |
79 | list_1 = [1,2,3,4]
80 |
81 | list_2 = [10,20,30,40]
82 |
83 | list_3 = [100,200,300,400]
84 |
85 |
86 | # In[108]:
87 |
88 |
89 | map (lambda x,y : x + y, list_1, list_2 )
90 |
91 |
92 | # In[109]:
93 |
94 |
95 | map (lambda x,y,z : x + y + z, list_1, list_2, list_3 )
96 |
97 |
98 | # In[110]:
99 |
100 |
101 | map (lambda y,z : y + z, list_2, list_3 )
102 |
103 |
104 | # ### filter ()
105 | # Another advantage of using a lambda is the filter() function.
106 | # filter (lambda, list)
107 | # It is an elegant way to filter out the required elements from a list.
108 |
109 | # In[111]:
110 |
111 |
112 | fib = [0,1,1,2,3,5,8,13,21,34,55] # This is a list
113 |
114 |
115 | # In[112]:
116 |
117 |
118 | filter (lambda x: x > 8, fib)
119 |
120 |
121 | # In[113]:
122 |
123 |
124 | filter (lambda x: x < 8, fib)
125 |
126 |
127 | # In[114]:
128 |
129 |
130 | signals = ['Buy','Sell','Sell','Buy','Buy','Sell','Buy'] # This is a list
131 |
132 |
133 | # In[115]:
134 |
135 |
136 | filter (lambda x: x == 'Buy', signals)
137 |
138 |
139 | # ### In the upcoming iPython Notebooks:
140 | #
141 | # We will understand about the Numpy library, in python.
142 |
--------------------------------------------------------------------------------
/Section-6/Function.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ### Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # ## A Simple User-Defined Function
11 | #
12 | # Let us create a simple mathematical function.
13 | #
14 | # The syntax for constructing a function is:
15 | #
16 | # def function_name (parameter-list):
17 | # Statements, i.e function body
18 | # return a value, if required
19 | #
20 | # Let us create ‘my_function’.
21 | #
22 |
23 | # In[28]:
24 |
25 |
26 | def my_function(x, n):
27 | output = x ** n
28 | return output
29 |
30 |
31 | # This is a simple function which we have created to calculate the exponential of any number. Now, whenever we need to perform this particular calculation, all we need to do is call this function and insert the values for ‘x’ and ‘n’. You may have a look it.
32 |
33 | # In[29]:
34 |
35 |
36 | my_function (10, 2) ## 10 raise to 2 = 100
37 |
38 |
39 | # In[30]:
40 |
41 |
42 | my_function (5,3) ## 5 raise to 3 = 125
43 |
44 |
45 | # ## Bollinger Band Function
46 | #
47 | # This is the function which we discussed in the video unit.
48 |
49 | # In[31]:
50 |
51 |
52 | def Bollinger_Bands (data, n):
53 |
54 | #MA = data['Close'].rolling(window=n).mean() # Calculating the moving average
55 | MA = pd.rolling_mean(data['Close'],n)
56 |
57 | #SD = data['Close'].rolling(window=n).std() # Calculating the standard deviation
58 | SD = pd.rolling_std(data['Close'],n)
59 |
60 | data['Lower_BB'] = MA - (2 * SD) # Lower Bollinger Band
61 | data['Upper_BB'] = MA + (2 * SD) # Upper Bollinger Band
62 |
63 | return data
64 |
65 |
66 | # In[32]:
67 |
68 |
69 | ## Load and view Nifty data
70 |
71 | import pandas as pd
72 |
73 | nifty = pd.read_csv('nifty_data.csv')
74 | nifty.head()
75 |
76 |
77 |
78 | # In[33]:
79 |
80 |
81 | # Calling Bollinger Bands for 'Nifty' index price data
82 |
83 | n = 21 # We have kept the window of the moving average as 21 days
84 |
85 | nifty_bb = Bollinger_Bands(nifty, n) # Calling the Bollinger Bands function cerated by us
86 |
87 | nifty_bb.tail()
88 |
89 |
90 | # In[34]:
91 |
92 |
93 | # Plotting the Bollinger Bands for "Nifty' index
94 |
95 | import matplotlib.pyplot as plt
96 | get_ipython().magic(u'matplotlib inline')
97 |
98 | plt.figure(figsize=(20,10))
99 |
100 | plt.plot(nifty_bb.Close)
101 | plt.plot(nifty_bb.Lower_BB)
102 | plt.plot(nifty_bb.Upper_BB)
103 | plt.grid(True)
104 |
105 | plt.show()
106 |
107 |
108 | # In[35]:
109 |
110 |
111 | # Calling Bollinger Bands for 'Infosys' price data
112 |
113 | import pandas as pd
114 |
115 | infy = pd.read_csv ('infy_data_bb.csv') # Loading 'Nifty Index' data
116 |
117 | n = 21 # We have kept the window of the moving average as 21 days
118 |
119 | infy_bb = Bollinger_Bands(infy, n) # Calling the Bollinger Bands function cerated by us
120 |
121 | infy_bb.tail()
122 |
123 |
124 | # In[36]:
125 |
126 |
127 | # Plotting the Bollinger Bands for "Infosys" stock
128 |
129 | import matplotlib.pyplot as plt
130 | get_ipython().magic(u'matplotlib inline')
131 |
132 | plt.figure(figsize=(20,10))
133 |
134 | plt.plot(infy_bb.Close)
135 | plt.plot(infy_bb.Lower_BB)
136 | plt.plot(infy_bb.Upper_BB)
137 | plt.grid(True)
138 |
139 | plt.show()
140 |
141 |
142 | # ### In the upcoming iPython Notebook:
143 | #
144 | # We will understand the Lambda operator and its relation with functions.
145 |
--------------------------------------------------------------------------------
/Section-3/Expressions .py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # ## Expressions
11 | #
12 | # 'Expressions' are generally a combination of numbers, variables and operators.
13 | #
14 | # In this iPython notebook, we will make use of Expressions to understand the TVM concepts.
15 |
16 | # ### Future Value (FV)
17 | #
18 | # What would be the FV, if I have $1000 with me now and I will be investing it for 1 year, at an annual return of 5%?
19 |
20 | # In[52]:
21 |
22 |
23 | PV = 1000
24 | r = 0.05
25 | n = 1
26 |
27 | FV = PV * ((1+r) ** n) # Formula for calculating Future Value
28 |
29 | print (FV)
30 |
31 |
32 | # ### Present Value
33 | #
34 | # What would be the PV, if I have to discount $1050 at 5% annual rate for a period of 1 year?
35 |
36 | # In[53]:
37 |
38 |
39 | FV = 1050
40 | r = 0.05
41 | n = 1
42 |
43 | PV = FV / ((1 + r) ** n) # Formula for calculating Present Value
44 |
45 | print (PV)
46 |
47 |
48 | # ### Compounding
49 | #
50 | # Assume that the 5% annual interest rate bond makes semiannual payments. That is, for an investment of $1000, you will get 25 dollars, after first 6 months and another 25 dollars after 1 year. The annual rate of interest is 5%. What would be the FV, if I hold the bond for 1 year?
51 |
52 | # In[54]:
53 |
54 |
55 | PV = 1000
56 | r = 0.05
57 | n = 2 # number of periods = 2 since bond makes semiannual payments
58 | t = 1 # number of years
59 |
60 | FV = PV * ((1+(r/n)) ** (n*t)) # Formula for compounding
61 |
62 | print (FV)
63 |
64 |
65 | # ### Annuity Payments
66 | #
67 | # What would be the annual periodic saving amount, if you want a lumsum of $9476.96 at the end of 3 years? The rate of return is 10%?
68 | #
69 | # (This is one of the required calculation from 'PDF : TVM Applications' unit)
70 |
71 | # In[55]:
72 |
73 |
74 | r = 0.1
75 | n = 3
76 | PV = 0
77 | FV = 9476.96
78 |
79 | AP = (FV * r) / (((1 + r) ** n) - 1) # Formula for Annuity payments, given Future Value
80 |
81 | print (AP)
82 |
83 |
84 | # What would be the PV, given a cash outfolw of $2500 for a period of 5 years and rate of return being 10%?
85 | #
86 | # (This is one of the required calculation from 'PDF : TVM Applications' unit)
87 |
88 | # In[56]:
89 |
90 |
91 | r = 0.1
92 | n = 5
93 | AP = 2500
94 |
95 | PV = (AP * (1 - ((1 + r) ** -n))) / r # Formula for PV, given Annuity payments
96 |
97 | print (PV)
98 |
99 |
100 | # What would be the PV, given a cash outflow of $30,000 for a period of 45 years and rate of return being 8%?
101 | #
102 | # (This is one of the required calculation from 'PDF : TVM Applications' unit)
103 |
104 | # In[57]:
105 |
106 |
107 | r = 0.08
108 | n = 45
109 | AP1 = 30000
110 |
111 | PV = (AP1 * (1 - ((1 + r) ** -n))) / r # Formula for PV, given Annuity payments
112 |
113 | print (PV)
114 |
115 |
116 | # What would be the annual saving amount (AP), if you want to save a lumpsum of $363252.045 in 25 years and rate of return being 15%?
117 | #
118 | # (This is one of the required calculation from 'PDF : TVM Applications' unit)
119 |
120 | # In[58]:
121 |
122 |
123 | r = 0.15
124 | n = 25
125 | PV = 0
126 | FV = 363252.045095
127 |
128 | AP = (FV * r) / (((1 + r) ** n) - 1) # Formula to calculate Annuity Payments, given FV
129 |
130 | #AP = (r * PV) / (1 - ((1 + r) ** -n)) # Formula to calculate Annuity Payments, given PV
131 |
132 | print (AP)
133 |
134 |
135 | # These are some ways, one can use Expressions.
136 | #
137 | # ### Stay tuned for more on python.
138 |
--------------------------------------------------------------------------------
/Section-9/Conditional Statement .py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # ## If and elif
11 | #
12 | # We have seen the working of an 'if' statement in the previous video unit. Let us go through it once again.
13 | #
14 | # In python, the syntax for an ‘if' conditional statement is as follows:
15 | #
16 | # if (condition_1):
17 | # statement_block_1
18 | # elif (condition_2):
19 | # statement_block_2
20 | # elif (condition_3):
21 | # statement_block_3
22 | #
23 | # Let us consider an example to understand the working of an 'if' statement.
24 | #
25 |
26 | # In[57]:
27 |
28 |
29 | stock_price_ABC = 299 # Variable value
30 |
31 | if (stock_price_ABC < 300): # if condition_1 is true then...
32 | print ("We will buy 500 shares of ABC") # statement_block_1 will get executed
33 |
34 | elif (stock_price_ABC == 300):
35 | print ("We will buy 200 shares of ABC")
36 |
37 | elif (stock_price_ABC > 300):
38 | print ("We will buy 150 shares of ABC")
39 |
40 |
41 | # If you change the value of the variable 'stock_price_ABC' to...
42 |
43 | # In[58]:
44 |
45 |
46 | stock_price_ABC = 300 # then...
47 |
48 | if (stock_price_ABC < 300):
49 | print ("We will buy 500 shares of ABC")
50 |
51 | elif (stock_price_ABC == 300): # if condition_2 is true then...
52 | print ("We will buy 200 shares of ABC") # statement_block_2 will get executed
53 |
54 | elif (stock_price_ABC > 300):
55 | print ("We will buy 150 shares of ABC")
56 |
57 |
58 | # If you change the value of the variable 'stock_price_ABC' to...
59 |
60 | # In[59]:
61 |
62 |
63 | stock_price_ABC = 301 # then...
64 |
65 | if (stock_price_ABC < 300):
66 | print ("We will buy 500 shares of ABC")
67 |
68 | elif (stock_price_ABC == 300):
69 | print ("We will buy 200 shares of ABC")
70 |
71 | elif (stock_price_ABC > 300): # if condition_3 is true then...
72 | print ("We will buy 150 shares of ABC") # statement_block_3 will get executed
73 |
74 |
75 | # ## If and else
76 | #
77 | # If - else block of conditional statements is similar to the working of 'if' statements. If the 'if' condition is true, then the statements inside the 'if' block will be executed. If the 'if condition is false, then the statements inside the 'else' block will be executed.
78 | #
79 | # In python, the syntax for an ‘if else' conditional statement is as follows:
80 | #
81 | # if (condition_1):
82 | # statement_block_1
83 | # else:
84 | # statement_block_2
85 | #
86 | # Let us consider an example to understand the working of an 'if else' statement.
87 | #
88 |
89 | # In[60]:
90 |
91 |
92 | stock_price_ABC = 300
93 |
94 | if (stock_price_ABC > 250): # if condition 1 is true then....
95 | print ("We will sell the stock and book the profit") # this block of code will be executed
96 |
97 | else:
98 | print ("We will keep buying the stock")
99 |
100 |
101 |
102 | # If you change the value of the variable 'stock_price_ABC' to...
103 |
104 | # In[61]:
105 |
106 |
107 | stock_price_ABC = 200 # then...
108 |
109 | if (stock_price_ABC > 250): # if condition 1 is false then....
110 | print ("We will sell the stock and book the profit")
111 |
112 | else:
113 | print (" We will keep buying the stock") # this block of code will be executed
114 |
115 |
116 | # ### In the upcoming iPython Notebook:
117 | #
118 | # We will understand about Loops.
119 |
--------------------------------------------------------------------------------
/Section-9/Loops.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # ## For Loop
11 | #
12 | # In the programming languages, there are many situations when you need to execute a block of code several number of times. A loop statement allows us to execute a statement or group of statements multiple times
13 | #
14 | # The general syntax for a ‘for’ loop is as follows:
15 | #
16 | #
17 | # for (variable) in sequence
18 | # block of statements
19 | #
20 | #
21 | # Here, the block of statements within the loop will get executed, until all ‘sequence’ elements get exhausted. Once all sequence elements are exhausted, the program, will come out of the loop.
22 | #
23 | #
24 |
25 | # In[6]:
26 |
27 |
28 | # Closing Prices of the ABC Stock over 10 days
29 |
30 | Close_Price_ABC = [300,305,287,298,335,300,297,300,295,310] # Our sequence
31 |
32 | for i in Close_Price_ABC:
33 |
34 | if i < 300:
35 | print ("We Buy")
36 |
37 | if i == 300:
38 | print ("No new positions")
39 |
40 | if i > 300:
41 | print ("We Sell")
42 |
43 | print ("We are now out of the loop")
44 |
45 |
46 | # Here, the output is what was discussed in the previous video unit.
47 | #
48 | # The variable ‘i’ first stores the value ‘300’ in it and runs it through the loop to execute the statements. Here, we have placed a condition that if ‘i == 300’ we will print “No new positions”. Hence, as you can see, this is the first statement in our output.
49 | #
50 | # Now, ‘i’ will run through the sequence and pick the second element of the sequence which is ‘305’. It will run it through the statements of the loop. When i = 305, it will execute the block where ‘if i>300: print (“We Sell”). Check the second output.
51 | #
52 | # Similarly, it will keep executing all the elements of the loop. Observe the output.
53 | #
54 |
55 | # Let us take another example...
56 |
57 | # In[7]:
58 |
59 |
60 | import numpy as np
61 | import pandas as pd
62 |
63 | infy = pd.read_csv ('infy_twoweeks.csv')
64 | infy
65 |
66 | # We have delibrately taken a smaller dataframe to understand the output.
67 | # You may experiment using bigger data frames to understand the power of 'for' loop
68 |
69 |
70 | # In[8]:
71 |
72 |
73 | # We will just take the 'Close Price' Column to run the 'for' loop
74 |
75 | for i in range (len(infy)):
76 |
77 | if (infy.iloc[i]["Close Price"] < 1120):
78 | print ("We buy")
79 |
80 | elif ((infy.iloc[i]["Close Price"] > 1120) & (infy.iloc[i]["Close Price"] < 1150)):
81 | print ("We do nothing")
82 |
83 | elif (infy.iloc[i]["Close Price"] > 1150):
84 | print ("We Sell")
85 |
86 |
87 | # ## While Loop
88 | #
89 | # (Optional Read)
90 | #
91 | # The while construct consists of a condition and block of code.
92 | #
93 | # The general syntax for a ‘while’ loop is as follows:
94 | #
95 | #
96 | # while condition/expression
97 | # block of statements
98 | #
99 | #
100 | # To begin, the condition is evaluated.
101 | #
102 | # If the condition is true, the 'block of statements' is executed. Everytime, the condition is checked before executing the block of statements.
103 | #
104 | # This keeps on repeating unitl the condition becomes false. Once the condition is false, it comes out of the loop to execute the other statements.
105 |
106 | # In[9]:
107 |
108 |
109 | a = 0 # variable
110 |
111 | while a <= 10: # this is the condition...the loop will execute until the condition becomes 'false'
112 | a = a + 1
113 | print a
114 | print ("We are now out of the loop")
115 |
116 |
--------------------------------------------------------------------------------
/Section-5/Importing data from web sources.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ### Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # ## Importing data from Investors Exchange (IEX)
11 |
12 | # To fetch data from IEX, you need to first pip install iexfinace. The 'pip' command is a tool for installing and managing Python packages.
13 | ! pip install iexfinance
14 | # Using iexfinace to access data from IEX is quite easy. First, you need to import get_historical_data function from iexfinance library.
15 | #
16 | # This will return the daily time series of the stock's ticker requested over the desired date range. You can select the date range using the datetime function. The output format (data frame creation, if pandas) is selected using the output_format parameter.
17 | #
18 | # The resulting DataFrame is indexed by date, with a column for each OHLCV datapoint as you can see in the below example.
19 | #
20 |
21 | # In[8]:
22 |
23 |
24 | from iexfinance import get_historical_data
25 | from datetime import datetime
26 |
27 | start = datetime(2017, 1, 1) # starting date: year-month-date
28 | end = datetime(2018, 1, 1) # ending date: year-month-date
29 |
30 | data = get_historical_data('AAPL', start=start, end=end, output_format='pandas')
31 | data.head()
32 |
33 |
34 | # In[9]:
35 |
36 |
37 | data.tail()
38 |
39 |
40 | # ## Importing data from NSEpy
41 |
42 | # Similar to IEX, you need to first pip install nsepy module to fetch the data.
43 | ! pip install nsepy
44 | # To fetch historical data of stocks from nsepy, you have to use get_history function which returns daily data of stock's ticker requested over the desired timeframe in a pandas format.
45 | #
46 | # Note: Only price data of Indain stocks/indices/derivatives can be fetched from nsepy.
47 |
48 | # In[10]:
49 |
50 |
51 | from nsepy import get_history
52 | from datetime import datetime
53 |
54 | start = datetime(2017, 1, 1)
55 | end = datetime(2018, 1, 1)
56 |
57 | data = get_history(symbol='SBIN',start=start,end=end)
58 |
59 | data.head()
60 |
61 |
62 | # In[11]:
63 |
64 |
65 | data.tail()
66 |
67 |
68 | # ## Importing data from Quandl
69 |
70 | # To fetch data from Quandl, first import quandl. Here, quandl.get function is used to fetch data for a security over a specific time period.
71 |
72 | # In[12]:
73 |
74 |
75 | import quandl
76 | from datetime import datetime
77 |
78 | # quantrautil is a module specific to Quantra to fetch stock data
79 | from quantrautil import get_quantinsti_api_key
80 |
81 | api_key = get_quantinsti_api_key()
82 | data = quandl.get('EOD/AAPL', start_date='2017-1-1', end_date='2018-1-1', api_key=api_key)
83 |
84 | # Note that you need to know the "Quandl code" of each dataset you download. In the above example, it is 'EOD/AAPL'.
85 | # To get your personal API key, sign up for a free Quandl account. Then, you can find your API key on Quandl account settings page.
86 |
87 | data.head()
88 |
89 |
90 | # ## Importing data from Yahoo
91 |
92 | # First you need to import data from pandas_datareader module. Here data.get_data_yahoo function is used to return the historical price of a stock's ticker, over a specifc time range.
93 |
94 | # In[13]:
95 |
96 |
97 | ## Yahoo recently has become an unstable data source.
98 |
99 | ## If it gives an error, you may run the cell again, or try again sometime later
100 |
101 | import pandas as pd
102 | from pandas_datareader import data
103 | data = data.get_data_yahoo('AAPL', '2017-01-01', '2018-01-01')
104 | data.head()
105 |
106 |
107 | # ### In the upcoming iPython notebook:
108 | #
109 | # We will learn about Importing Data from our local machine. Till then, get ready to solve some exercises.
110 |
--------------------------------------------------------------------------------
/Section-5/Candle Sticks .py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # ## Plotting Candle sticks
11 | #
12 | # The following code will help you to plot an interactive graph of the S&P 500 index using candlesticks.
13 |
14 | # In[ ]:
15 |
16 |
17 | from iexfinance import get_historical_data
18 | from datetime import datetime
19 |
20 | start = datetime(2017, 1, 1) # starting date: year-month-date
21 | end = datetime(2018, 5, 13) # ending date: year-month-date
22 |
23 | df = get_historical_data('SPY', start=start, end=end, output_format='pandas')
24 | df.head()
25 |
26 |
27 | # In[ ]:
28 |
29 |
30 | # Importing the necessary packages
31 |
32 | import matplotlib.pyplot as plt
33 | import matplotlib.finance as mpf
34 | from matplotlib.finance import candlestick_ohlc
35 | from bokeh.plotting import figure, show, output_file
36 |
37 |
38 | # In[ ]:
39 |
40 |
41 | # Indexing
42 | import pandas as pd
43 | w = 12*60*60*1000 # half day in ms
44 | df.index = pd.to_datetime(df.index)
45 |
46 |
47 | # ## Remember:
48 | #
49 | # 1. If the opening price is greater than the closing price then a green candle stick has to be created to represent the day.
50 | # 2. If the opening price is less than the closing price then a red candlestick is to be created to represent the day.
51 | # 1. We will use 'inc' and 'dec' as the varieble to capture this facr further in the code
52 |
53 | # In[ ]:
54 |
55 |
56 | inc = df.close > df.open
57 | dec = df.open > df.close
58 |
59 |
60 | # In[ ]:
61 |
62 |
63 | # The various 'interactions' we want in our candlestick graph. This is an argument to be passed in figure () from bokeh.plotting
64 |
65 | TOOLS = "pan,wheel_zoom,box_zoom,reset,save"
66 |
67 | # Pan: It helps you pan/move the plot
68 |
69 | # Wheel Zoom: You can zoom in using the wheel of your mouse
70 |
71 | # Box Zoom: You can zoom in by creating a box on the specific area of the plot. Use the mouse, click and drag to create the box
72 |
73 | # Reset: If you want to reset the visualisation of the plot
74 |
75 | # Save: Saving the plot (entire or the part which you want) as an image file
76 |
77 |
78 | # In[ ]:
79 |
80 |
81 | # Passing the arguments of our bokeh plot
82 |
83 | p = figure(x_axis_type="datetime", tools= TOOLS, plot_width=1000, title="SPY Candlestick")
84 |
85 |
86 | # In[ ]:
87 |
88 |
89 | from math import pi
90 |
91 | # The orientation of major tick labels can be controlled with the major_label_orientation property.
92 | # This property accepts the values "horizontal" or "vertical" or a floating point number that gives
93 | # the angle (in radians) to rotate from the horizontal.
94 |
95 | p.xaxis.major_label_orientation = pi/4
96 |
97 |
98 | # In[ ]:
99 |
100 |
101 | # Alpha signifies the floating point between 0 (transparent) and 1 (opaque).
102 | # The line specifies the alpha for the grid lines in the plot.
103 |
104 | p.grid.grid_line_alpha = 0.3
105 |
106 |
107 | # In[ ]:
108 |
109 |
110 | # Configure and add segment glyphs to the figure
111 |
112 | p.segment(df.index,df.high,df.index,df.low,color="red")
113 |
114 |
115 | # In[ ]:
116 |
117 |
118 | # Adds vbar glyphs to the Figure
119 |
120 | p.vbar(df.index[inc],w,df.open[inc],df.close[inc], fill_color="#1ED837",line_color="black")
121 | p.vbar(df.index[dec],w,df.open[dec],df.close[dec], fill_color="#F2583E",line_color="black")
122 |
123 |
124 | # In[ ]:
125 |
126 |
127 | # Generates simple standalone HTML documents for Bokeh visualization
128 |
129 | output_file("candlestick.html", title="candlestick.py example")
130 |
131 |
132 | # In[ ]:
133 |
134 |
135 | # The graph will open in another tab of the browser
136 |
137 | show(p)
138 |
139 | # The code ends here
140 |
141 |
142 | # ### In the upcoming iPython notebook:
143 | #
144 | # We will learn about Functions in Python
145 | #
146 | # Happy Learning!
147 |
--------------------------------------------------------------------------------
/Section-4/Lists.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # # Lists
11 | #
12 | # Lists in Python, are used to store heterogeneous types of data. Lists are mutable i.e. one can change the content within a list, without changing its identity.
13 | #
14 | # ## Creating Lists
15 | # List are enclosed by square brackets and elements should be separated by comma.
16 |
17 | # In[59]:
18 |
19 |
20 | new_list = [ ] # Empty List
21 | type (new_list)
22 |
23 |
24 | # In[60]:
25 |
26 |
27 | new_list = [10, 20, 30, 40] # A list of integers
28 | type (new_list)
29 |
30 |
31 | # In[61]:
32 |
33 |
34 | new_list = [10, 20.2, "thirty", 40] # A list of mixed data types
35 | type (new_list)
36 |
37 |
38 | # In[62]:
39 |
40 |
41 | new_list = [[10,20,30], [10.1, 20.2, 30.3],["ten", "twenty", "thirty"]] # A nested list
42 | type (new_list)
43 |
44 |
45 | # In[63]:
46 |
47 |
48 | new_list = [10,[20.2,["thirty",[40]]]] # A deeply nested list
49 | type (new_list)
50 |
51 |
52 | # ## Different Methods for List Manipulation
53 | # Let us have a look at few of the methods, with which we can manipulate lists.
54 | #
55 | # Please Note: A function or a method is a block of code which is used to perform a single task or a set of tasks repeatedly.
56 |
57 | # In[64]:
58 |
59 |
60 | my_list = [10,20,30,40] # This is the 'original' list which you have cerated
61 |
62 | print (my_list)
63 |
64 |
65 | # list.append (x)
66 | # Add an item to the end of the list.
67 |
68 | # In[65]:
69 |
70 |
71 | my_list.append (50)
72 |
73 | print (my_list)
74 |
75 |
76 | # list.extend (x)
77 | # Extend the list by appending all the items at the end of the list.
78 |
79 | # In[66]:
80 |
81 |
82 | my_list.extend ([60,70,80,90])
83 |
84 | print (my_list)
85 |
86 |
87 | # list.insert (i,x)
88 | # Insert an item at any given position within the list. The first argument 'i', is the index of the item before which you want to insert something. To insert something at the beginning of the list, you may type list.insert (0,x)
89 |
90 | # In[67]:
91 |
92 |
93 | my_list.insert (0,0) # Inserting an item in the beginning
94 |
95 | print (my_list)
96 |
97 |
98 | # In[68]:
99 |
100 |
101 | my_list.insert (10,100) # Inserting an item at the end or at the integer location of 10 in this case
102 |
103 | print (my_list)
104 |
105 |
106 | # In[69]:
107 |
108 |
109 | my_list.insert (6,55) # Inserting an item at the 6th position in a list
110 |
111 | print (my_list)
112 |
113 |
114 | # list.remove (x)
115 | # Remove the first item from the list whose value is 'x'. It is an error if there is no such item.
116 |
117 | # In[70]:
118 |
119 |
120 | my_list.remove(0)
121 |
122 | print (my_list)
123 |
124 |
125 | # list.pop (i)
126 | # Remove any item from any given position (index) in the list. If no index is specified, it removes and returns the last element from the list.
127 |
128 | # In[71]:
129 |
130 |
131 | my_list.pop (5) # Removes and returns the '5th' element from the list
132 |
133 |
134 | # In[72]:
135 |
136 |
137 | print (my_list)
138 |
139 |
140 | # In[73]:
141 |
142 |
143 | my_list.pop () # Removes and returns the last element from the list
144 |
145 |
146 | # In[74]:
147 |
148 |
149 | print (my_list)
150 |
151 |
152 | # list.index (x)
153 | # It returns a zero-based index in the list of the first item whose value is x. Raises an error of there is no such item as 'x'.
154 |
155 | # In[75]:
156 |
157 |
158 | my_list.index (50)
159 |
160 |
161 | # In[76]:
162 |
163 |
164 | my_list.index(10)
165 |
166 |
167 | # In[77]:
168 |
169 |
170 | print (my_list)
171 |
172 |
173 | # list.count (x)
174 | # Returns the number of times 'x' appears in the list
175 |
176 | # In[78]:
177 |
178 |
179 | new_list = [10,10,10,20,30,40,50] # This is a new list
180 |
181 | new_list.count(10)
182 |
183 |
184 | # list.reverse ()
185 | # It reverses the items of the list.
186 |
187 | # In[79]:
188 |
189 |
190 | print (my_list)
191 |
192 |
193 | # In[80]:
194 |
195 |
196 | my_list.reverse ()
197 |
198 | print (my_list)
199 |
200 |
201 | # list.sort ()
202 | # It sorts the items in the list.
203 |
204 | # In[81]:
205 |
206 |
207 | new_list = [12, 35, 76, 20, 56, 34, 65]
208 | print (new_list)
209 |
210 |
211 | # In[82]:
212 |
213 |
214 | new_list.sort()
215 |
216 | print (new_list)
217 |
218 |
219 | # ### In the upcoming iPython Notebook
220 | #
221 | # We will see, how Lists are used as:
222 | #
223 | # 1. 'Stacks'
224 | # 2. 'Queues'
225 | # 3. 'Graphs'
226 | # 4. 'Trees'
227 | #
228 | # So, Stay Tuned!
229 |
--------------------------------------------------------------------------------
/Section-5/2 D plotting.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # # Notebook Instructions
5 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter. While a cell is running, a [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook [8].
6 | #
7 | # Enter edit mode by pressing `Enter` or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
8 |
9 | # ## Data Visualization
10 | #
11 | # This python notebook is for understanding the capabilities of 'matplotlib' library. Matplotlib is a reliable, robust and easy to use library for standard plots and is flexible when it comes to complex plots and customizations.
12 |
13 | # In[74]:
14 |
15 |
16 | # Loading and viewing the dataframe
17 |
18 | import pandas as pd
19 |
20 | infy = pd.read_csv ('infy_dv.csv')
21 |
22 | # infy = pd.read_csv ('C:/Users/academy/Desktop/infy_dv.csv')
23 |
24 | infy.head ()
25 |
26 |
27 | # In[75]:
28 |
29 |
30 | # Preparing Data to visualise
31 |
32 | infy_close = infy [['Date','Close Price']] # The columns which we require
33 |
34 | infy_close.set_index('Date', inplace=True) # Setting index as date
35 |
36 | # More on this in the upcoming section on 'Pandas'
37 |
38 | infy_close
39 |
40 |
41 | # ### Importing libraries
42 | #
43 | # To begin with, we will import the required libraries. The main plotting functions are found in the sublibrary matplotlib.pyplot.
44 |
45 | # In[76]:
46 |
47 |
48 | import matplotlib.pyplot as plt
49 | get_ipython().magic(u'matplotlib inline')
50 |
51 | plt.plot(infy_close)
52 | plt.show ()
53 |
54 |
55 | # ### A better plot representation
56 | #
57 | # There are always different requirements and plotting style for presenting graphs/reports. Let us try out a few functions and customize it.
58 |
59 | # In[77]:
60 |
61 |
62 | import matplotlib.pyplot as plt
63 | get_ipython().magic(u'matplotlib inline')
64 |
65 | # This customizes the size of the plot as per the inputs. Here 14,5 represents the breadth and length of the plot.
66 | plt.figure(figsize = (14,5))
67 |
68 | # This helps in plotting the blue color of the ‘infy_close’ series line graph.
69 | plt.plot(infy_close, 'b')
70 | # plt.plot (infy_close, 'g') # to plot green color
71 |
72 | # This helps in plotting the discrete red data points of the closing prices of ‘infy_close’ series.
73 | plt.plot(infy_close,'ro')
74 | # Here ‘r’ stands for ‘red’ and ‘o’ stands for circles while plotting our discrete data points.
75 | # That is why the points are colored red and default line color is blue.
76 |
77 | # This gives a grid layout to the plot.
78 | plt.grid(True)
79 |
80 | # This gives the title to the plot.
81 | plt.title ('Infosys Close Price Representation')
82 |
83 | # This labels the x axis
84 | plt.xlabel ('Trading Days')
85 |
86 | # This labels the y axis
87 | plt.ylabel ('Infosys Close Price')
88 |
89 |
90 | # To plot and visualise the data
91 | plt.show ()
92 |
93 |
94 | # ### Plot with labelled datasets
95 | #
96 | # Something that is different in this cell is the fact that we are plotting two datasets or columns in this case.
97 |
98 | # In[78]:
99 |
100 |
101 | # Preparing data
102 |
103 | import pandas as pd
104 |
105 | infy2 = pd.read_csv ('infy_dv.csv')
106 |
107 | #infy2 = pd.read_csv ('C:/Users/academy/Desktop/infy_dv.csv')
108 |
109 | infy2 = infy2 [['Date','Close Price', 'Open Price']] # Choosing more columns
110 |
111 | infy2.set_index('Date', inplace=True) # Setting 'Date' column as an index
112 |
113 | infy2
114 |
115 |
116 | # To read the plot better, we use the plt.legend() function. plt.legend() accepts different locality parameters where 0 stands for the best location of the legend, in the sense that little data is hidden by the legend.
117 |
118 | # In[79]:
119 |
120 |
121 | # PLotting data
122 |
123 | plt.figure(figsize=(20,7))
124 |
125 | plt.plot(infy2["Close Price"], lw=1.5, label = 'Close Price')
126 | plt.plot(infy2["Open Price"], lw=1.5, label = 'Open Price')
127 |
128 | plt.plot(infy2,'ro')
129 |
130 | plt.grid(True)
131 |
132 | plt.legend(loc=0)
133 |
134 | #This helps us tighten the figure margins
135 | plt.axis ('tight')
136 |
137 | plt.xlabel('Time')
138 | plt.ylabel('Index')
139 | plt.title ('Representative plot with two datasets')
140 |
141 | plt.show()
142 |
143 |
144 | # ### Scatter Plots
145 | #
146 | # (Optional Read)
147 | #
148 | # In a scatter plot, the values of one data serve as the x values for the other data set. Such plots are usually used while plotting financial time series. Matplotlib provides a specific function to generate scatter plots known as the plt.scatter() function.
149 |
150 | # In[80]:
151 |
152 |
153 | import numpy as np
154 |
155 | y = np.random.standard_normal((100,2)) # Random data created
156 |
157 | plt.figure (figsize = (7,5))
158 |
159 | # The function 'scatter' is called to our 'plt' object
160 | plt.scatter(y[:,0], y[:,1], marker='o')
161 |
162 | plt.grid(True)
163 | plt.xlabel ('1st dataset')
164 | plt.ylabel ('2nd dataset')
165 | plt.title('Scatter Plot')
166 | plt.show()
167 |
168 |
169 | # ### Plotting a histogram
170 | #
171 | # (Optional Read)
172 | #
173 | # Another type of plot apart from line graphs are histograms. They are often used in the context of financial returns. The code puts the frequency value of two datasets next to each other in the same plot. We use the plt.hist() function to plot the diagram.
174 | #
175 |
176 | # In[81]:
177 |
178 |
179 | # Random data created
180 |
181 | np.random.seed(100)
182 | y = np.random.standard_normal((25,2)).cumsum(axis=0)
183 |
184 | plt.figure(figsize=(10,5))
185 |
186 | # The function 'hist' is called to our 'plt' object
187 | plt.hist(y, label = ['1st','2nd'], bins=25)
188 |
189 | plt.grid(True)
190 | plt.legend(loc=0)
191 | plt.xlabel('Index Returns')
192 | plt.ylabel ('Stock Returns')
193 | plt.title ('Histogram')
194 | plt.show()
195 |
196 |
197 | # ### In the upcoming iPython notebook:
198 | #
199 | # We will learn about 3-D plotting in Python. 3-D plotting is an optional read.
200 | #
201 | # Happy Learning!
202 |
--------------------------------------------------------------------------------
/Section-4/Stacks, Queues, Graphs, Trees.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # ## Lists as Stacks
11 | #
12 | # A stack is a collection of objects which work as per the LIFO - Last in First Out principle. Consider a simple example: You are throwing a dinner party at your place. You will place a stack of plates at the buffet table. Whenever you are adding new plates to the stack, you will place the plates at the top. Similarly, whenever a guest takes a plate, he/she will take it from the top of the stack. This is the Last in, First Out principle.
13 | #
14 | # In the world of programming it, learning LIFO for data is very handy. We will do the same in the upcoming code. When you add items to the stack, it is known as push operation. You will do this using the append ( ) method. When you remove items from a stack, it is known as pop operation. You actually have a pop ( ) method in python.
15 | #
16 | # This is not something new which you have learnt. But understanding how data works in a stack (LIFO principle) is important, since this concept is used for evaluating expressions and syntax parsing, scheduling algorithms, routines, writing effective functions and so on.
17 |
18 | # In[54]:
19 |
20 |
21 | # Bottom --> 10, 20, 30, 40, 50 --> Top
22 |
23 | my_stack = [10, 20, 30, 40, 50] # List
24 |
25 | my_stack.append (60) # The PUSH OPERATION
26 |
27 | print (my_stack)
28 |
29 |
30 | # In[55]:
31 |
32 |
33 | # New Stack: Bottom ---> 10, 20, 30, 40, 50, 60 ---> Top
34 |
35 | my_stack.pop () # The POP OPERATION
36 |
37 | my_stack.pop () # The same operation 'twice'
38 |
39 | print (my_stack) # From the 'top', 50 and 60 will be removed.
40 |
41 |
42 | # ## Lists as Queues
43 | #
44 | # A queue is a collection of objects which works as per the FIFO - First in First Out principle. Consider a simple example: You are at the concert to listen to your favourite artist. The tickets for this concert are in great demand. Hence, all the fans form a queue outside the ticket collection centre. The fan to arrive first, will be the first one to get the ticket while the one to arrive last may or may not get the ticket. This is the First in, First Out principle.
45 |
46 | # In[56]:
47 |
48 |
49 | # 'collections' is a package which contains high performance container datatyes
50 |
51 | # 'deque' us a list-like container with fast appends and pops on either ends
52 |
53 | from collections import deque
54 |
55 | # This is your queue. "Roger Federer" is the first to arrive while "Novak Djokovic is the last.
56 |
57 | my_queue = deque(["Roger Federer", "Rafael Nadal", "Novak Djokovic"])
58 |
59 | my_queue.append ("Andre Agassi") # Now Andre Agassi arrives
60 |
61 | my_queue.append ("Pete Sampras") # Now Pete Sampras arrives
62 |
63 | print (my_queue) # You may have a look at the queue below
64 |
65 |
66 | # In[57]:
67 |
68 |
69 | my_queue.popleft() # The first to arrives leaves first
70 |
71 |
72 | # In[58]:
73 |
74 |
75 | my_queue.popleft() # The second to arrive leaves now
76 |
77 |
78 | # In[59]:
79 |
80 |
81 | print (my_queue) # This is your present queue in the order of arrival
82 |
83 |
84 | # Using deque from the collection module is one way of doing it.
85 | #
86 | # Another way of doing this is using the insert() and remove() functions. However, lists as queues are not that efficient. Adding and removing from the beginning of the list is slow since all the elements have to be shifted by one.
87 |
88 | # ## Graphs
89 | #
90 | # (Optional Read)
91 | #
92 | # A graph in computer science is a network consisting of different nodes or vertices. These nodes may or may not be connected to each other. The line that joins the nodes is called an edge. If the edge has a particular direction it is a directed graph. If not, it is an undirected graph.
93 |
94 | # This is an example of an Undirected Graph, where A, B, C, D and E are the various nodes. The following list shows that these five nodes are connected to which other nodes. For diagram, you may refer to the graph, taught in the video lecture.
95 | #
96 | # A <---> B,C
97 | # B <---> A,C,D
98 | # C <---> A,B,D,E
99 | # D <---> B,C,E
100 | # E <---> D,C
101 |
102 | # In[61]:
103 |
104 |
105 | # Please Note: At present we are using dictionaries, functions and loops which have not been taught.
106 |
107 | # We will take up all of these concepts in the upcoming units or sections of this course.
108 |
109 |
110 | # The following code is just to display all the different edges of the graph, as shown in the video lecture.
111 |
112 | my_graph = {'A' : ['B', 'C'], 'B': ['A','C','D'], 'C' : ['A','B','D','E'], 'D': ['B','C','E'], 'E': ['D','C']}
113 |
114 |
115 | # In[62]:
116 |
117 |
118 | def define_edges(my_graph):
119 | edges = []
120 | for nodes in my_graph:
121 | for adjacent_nodes in my_graph [nodes]:
122 | edges.append((nodes, adjacent_nodes))
123 | return edges
124 |
125 | print(define_edges(my_graph))
126 |
127 |
128 | # ## Trees
129 | #
130 | # (Optional Read)
131 | #
132 | # A 'tree' in real world has roots below the ground, a trunk, and the branches that are spread all across the trunk in an organised way. These branches have leaves on them.
133 | #
134 | # In the programming world, a tree is upside down of what you see in the real world. At the top is the root node. The other node that follow the root node are called branch nodes. The final nodes of these branches are called leaf nodes.
135 |
136 | # In[63]:
137 |
138 |
139 | # In the code below, we have shown how to 'travel' through a tree. The tree is same as that shown in the video lecture.
140 |
141 | # We have done this wih the help of 'classes'. We have no covered classes.
142 |
143 |
144 | # In[64]:
145 |
146 |
147 | class Tree:
148 | def __init__(self, info, left=None, right=None):
149 | self.info = info
150 | self.left = left
151 | self.right = right
152 |
153 | def __str__(self):
154 | return (str(self.info) + ', Left node: ' + str(self.left) + ', Right node: ' + str(self.right))
155 |
156 | tree = Tree("Root Node", Tree("Branch_1", "Leave_1", "Leave_2"), Tree("Branch_2", "Leave_3", "Leave_4"))
157 | print(tree)
158 |
159 |
160 | # ### In the upcoming iPython Notebook
161 | #
162 | # We will see a new data structure called 'Dictionary'.
163 | #
164 | # #### So, Stay Tuned!
165 |
--------------------------------------------------------------------------------
/Section-8/Pandas - Series .py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # # Notebook Contents
11 | #
12 | # ##### 1. Why are we studying series?
13 | # ##### 2. Series datastructure
14 | # ##### 3. Methods or Functions
15 | # ##### 4. pandas.Series.apply()
16 |
17 | # # Why are we studying Series?
18 |
19 | # In python, understanding Series is a natural predecessor to understanding dataframes.
20 | #
21 | # Series are indexed data frame with only one data column. It is easier to understand them first before moving to study complex data frames.
22 | #
23 |
24 | # # Series
25 | #
26 | # A series is a one-dimensional labelled 'array-like' object. The labels are nothing but the index of the data.
27 | # Or
28 | # A series is a special case of a two-dimensional array, which has only 2 columns- one column is for the index and the other column is for data.
29 |
30 | # In[1]:
31 |
32 |
33 | import pandas as pd
34 |
35 | My_Series_int = pd.Series([10, 20, 30, 40, 50, 60]) # Series created using a list
36 |
37 | print (My_Series_int)
38 |
39 |
40 | # The constructor for Series data structure is pandas.Series (data=None, index=None, dtype=None, name=None). If you are using 'pd' as alias, then it would be pd.Series()
41 |
42 | # In[2]:
43 |
44 |
45 | import pandas
46 |
47 | My_Series_flt = pandas.Series ([10.1, 20.2, 30.4, 40.4, 50.5, 60.6]) # Series created using a list
48 |
49 | print (My_Series_flt)
50 |
51 |
52 | # You can see that it returns an indexed column and the data type of that column which is 'int' in this case.
53 |
54 | # A Series is capable of holding any data type. For e.g. integers, float, strings and so on. A series can contain multiple data types too.
55 |
56 | # In[3]:
57 |
58 |
59 | My_Series_mixed = pd.Series ([10.1, 20, 'jay' , 40.4]) # Series created using a list
60 |
61 | print (My_Series_mixed)
62 |
63 |
64 | # The above series returns an 'object' datatype since a Python object is created at this instance.
65 |
66 | # Let us have a look at few other ways of creating series objects.
67 |
68 | # In[4]:
69 |
70 |
71 | # Defining series objects with individual indices
72 |
73 | countries = ['India', 'USA', 'Japan', 'Russia', 'China']
74 | leaders = ['Narendra Modi', 'Donald Trump', 'Shinzo Abe', 'Vladimir Putin', 'Xi Jinpin']
75 |
76 | S = pd.Series (leaders, index=countries) # Index is explicitly defined here
77 | S
78 |
79 |
80 | # In[5]:
81 |
82 |
83 | # Have a look at the series S1
84 |
85 | stocks_set1 = ['Alphabet', 'IBM', 'Tesla', 'Infosys']
86 |
87 | # Here, we are inserting data as a list in Series constructor, but the argument of its index is passed as a pre-defined list
88 | S1 = pd.Series([100, 250, 300, 500], index = stocks_set1)
89 |
90 | print (S1)
91 | print ("\n")
92 |
93 | # Now, have a look at the series S2
94 |
95 | stocks_set2 = ['Alphabet', 'IBM', 'Tesla', 'Infosys']
96 |
97 | # Here, we are inserting data as a list in Series constructor, but the argument of its index is passed as a pre-defined list
98 |
99 | S2 = pd.Series([500, 400, 110, 700], index = stocks_set2)
100 |
101 | print (S2)
102 | print ("\n")
103 |
104 | # We will add Series S1 and S2
105 |
106 | print (S1 + S2)
107 |
108 |
109 | # In[6]:
110 |
111 |
112 | # Adding lists that have different indexes will create 'NaN' values
113 |
114 | stocks_set1 = ['Alphabet', 'IBM', 'Tesla', 'Infosys']
115 | stocks_set2 = ['Alphabet', 'Facebook', 'Tesla', 'Infosys']
116 |
117 | S3 = pd.Series([100, 250, 300, 500], index = stocks_set1)
118 | S4 = pd.Series([500, 700, 110, 700], index = stocks_set2)
119 |
120 |
121 | print (S3)
122 | print("\n")
123 |
124 | print (S4)
125 | print("\n")
126 |
127 | print(S3+S4)
128 |
129 |
130 | # 'NaN' is short for 'Not a Number'. It fills the space for missing or corrupt data.
131 | # It is important to understand how to deal with NaN values, because when you import actual time series data, you are bound to find some missing or corrupted data.
132 |
133 | # ## Methods or Functions
134 | #
135 | # We will have a look at few important methods or functions that can be applied on Series.
136 |
137 | # ##### Series.index
138 | # It is useful to know the range of the index when the series is large.
139 |
140 | # In[7]:
141 |
142 |
143 | My_Series = pd.Series ([10,20,30,40,50]) # Give a better example pls, maybe import data and show range for it?
144 |
145 | print (My_Series.index)
146 |
147 |
148 | # ##### Series.values
149 | # It returns the values of the series.
150 |
151 | # In[8]:
152 |
153 |
154 | My_Series = pd.Series ([10,20,30,40,50])
155 |
156 | print (My_Series.values)
157 |
158 |
159 | # ##### Series.isnull()
160 | # We can check for missing values with this method.
161 |
162 | # In[9]:
163 |
164 |
165 | # Remember the (S3 + S4) series? You may have a look at it
166 |
167 | print (S3 + S4)
168 |
169 |
170 | # In[10]:
171 |
172 |
173 | # Returns whether the values are null or not. If it is 'True' then the value for that index is a 'NaN value
174 |
175 | (S3 + S4).isnull()
176 |
177 |
178 | # ##### Series.dropna()
179 | # One way to deal with the 'NaN' values is to drop them completely from the Series. This method filters out missing data.
180 |
181 | # In[11]:
182 |
183 |
184 | print ((S3 + S4).dropna())
185 |
186 |
187 | # In the above output, we have produced the (S3 + S4) addition of the values and along with the series elements, and we have even dropped the 'NaN' values.
188 |
189 | # ##### Series.fillna(1)
190 | # Another way to deal with the 'NaN' values is to fill a custom value of your choice. Here, we are filling the 'NaN' values with the value '1'.
191 |
192 | # In[12]:
193 |
194 |
195 | print ((S3 + S4).fillna(1)) # The output is self-explanatory in this case
196 |
197 |
198 | # ## pandas.Series.apply()
199 | #
200 | # If at all one wants to 'apply' any functions on a particular series, for eg. one wants to 'sine' of each value in the series, then it is possible in pandas.
201 | #
202 | # Series.apply (func)
203 | #
204 | # func = A python function that will be applied to every single value of the series.
205 |
206 | # In[13]:
207 |
208 |
209 | import numpy as np #Create a new series as My_Series
210 |
211 | My_Series = pd.Series([10, 20, 30, 40, 50, 60])
212 |
213 | print (My_Series)
214 |
215 |
216 | # In[14]:
217 |
218 |
219 | My_Series.apply(np.sin) # Find 'sine' of each value in the series
220 |
221 |
222 | # In[15]:
223 |
224 |
225 | My_Series.apply(np.tan) # Finding 'tan' of each value in the series
226 |
227 |
--------------------------------------------------------------------------------
/Section-7/Vectorization and Broadcasting in Arrays.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # ## Vectorization
11 | #
12 | # Vectorization of code helps us write complex codes in a compact way and execute them faster.
13 | #
14 | # It allows to **operate** or apply a function on a complex object, like an array, "at once" rather than iterating over the individual elements. Numpy supports vectorization in an efficient way.
15 |
16 | # # Notebook Contents
17 | #
18 | # ##### 1) 1D or 2D Array operations with a scalar
19 | # ##### 2) 2D Array operations with another 2D array
20 | # ##### 3) 2D Array operations with a 1D array or vector
21 | # ##### 4) Other operators: Compare & Logical
22 | # ##### 5) Just for fun
23 |
24 | # ### Array operations with a scalar
25 | #
26 | # Every element of the array is added/multiplied/operated with the given scalar. We will discuss:
27 | # - Addition
28 | # - Subtraction
29 | # - Multiplication
30 |
31 | # In[49]:
32 |
33 |
34 | import numpy as np #Start the notebook with importing the packing
35 |
36 | my_list = [1, 2, 3, 4, 5.5, 6.6, 7.123, 8.456]
37 |
38 | V = np.array(my_list) # Creating a 1D array or vector
39 |
40 | print (V)
41 |
42 |
43 | # #### Vectorization Using Scalars - Addition
44 |
45 | # In[50]:
46 |
47 |
48 | V_a = V + 2 #Every element is increased by 2.
49 |
50 | print(V_a)
51 |
52 |
53 | # #### Vectorization Using Scalars - Subtraction
54 |
55 | # In[51]:
56 |
57 |
58 | V_s = V - 2.4 #Every element is reduced by 2.4.
59 |
60 | print(V_s)
61 |
62 |
63 | # #### Vectorization Using Scalars - Multiplication
64 |
65 | # In[52]:
66 |
67 |
68 | V2 = np.array([ [1, 2, 3], [4,5,6], [7, 8, 9] ]) #Array of shape 3,3
69 |
70 | V_m = V2 * 10 #Every element is multiplied by 10.
71 |
72 | print(V2)
73 | print(V_m)
74 |
75 |
76 | # #### Try on your own
77 |
78 | # In[53]:
79 |
80 |
81 | V_e = V2 ** 2 #See the output and suggest what this operation is?
82 |
83 | print(V_e)
84 |
85 |
86 | # ### 2D Array operations with another 2D array
87 | #
88 | # This is only possible when the shape of the two arrays is the same. For example, a (2,2) array can be operated with another (2,2) array.
89 | #
90 |
91 | # In[54]:
92 |
93 |
94 | A = np.array([ [1, 2, 3], [11, 22, 33], [111, 222, 333] ]) #Array of shape 3,3
95 | B = np.ones ((3,3)) #Array of shape 3,3
96 | C= np.ones ((4,4)) #Array of shape 4,4
97 | print (A)
98 | print (B)
99 | print (C)
100 |
101 |
102 | # In[55]:
103 |
104 |
105 | # Addition of 2 arrays of same dimensions (3, 3)
106 |
107 | print("Adding the arrays is element wise: ")
108 |
109 | print(A + B)
110 |
111 |
112 | # In[56]:
113 |
114 |
115 | # Addition of 2 arrays of different shapes or dimensions is NOT allowed
116 |
117 | print("Addition of 2 arrays of different shapes or dimensions will throw a ValueError.")
118 |
119 | print(A + C)
120 |
121 |
122 | # In[57]:
123 |
124 |
125 | # Subtraction of 2 arrays
126 |
127 | print("Subtracting array B from A is element wise: ")
128 |
129 | print(A - B)
130 |
131 |
132 | # In[58]:
133 |
134 |
135 | # Multiplication of 2 arrays
136 |
137 | A1 = np.array([ [1, 2, 3], [4, 5, 6] ]) # Array of shape 2,3
138 | A2 = np.array([ [1, 0, -1], [0, 1, -1] ]) # Array of shape 2,3
139 |
140 | print("Array 1", A1)
141 | print("Array 2", A2)
142 | print("Multiplying two arrays: ", A1 * A2)
143 | print("As you can see above, the multiplication happens element by element.")
144 |
145 |
146 | # You can further try out various combinations yourself, in combining scalars and arithmetic operations to get a hand on vectorization.
147 |
148 | # ### Broadcasting allows 2D Array operations with a 1D array or vector
149 | #
150 | # Numpy also supports broadcasting. Broadcasting allows us to combine objects of different shapes within a single operation.
151 | #
152 | # But, do remember that to perform this operation one of the matix needs to be a vector with its length equal to one of the dimensions of the other matrix.
153 |
154 | # #### Try changing the shape of B and observe the results
155 |
156 | # In[59]:
157 |
158 |
159 | import numpy as np
160 |
161 | A = np.array([ [1, 2, 3], [11, 22, 33], [111, 222, 333] ])
162 | B = np.array ([1,2,3])
163 |
164 | print (A)
165 | print (B)
166 |
167 |
168 | # In[60]:
169 |
170 |
171 | print( "Multiplication with broadcasting: " )
172 |
173 | print (A * B)
174 |
175 |
176 | # In[61]:
177 |
178 |
179 | print( "... and now addition with broadcasting: " )
180 |
181 | print (A + B)
182 |
183 |
184 | # In[62]:
185 |
186 |
187 | # Try to understand the difference between the two 'B' arrays
188 |
189 | B = np.array ([[1, 2, 3] * 3])
190 |
191 | print (B)
192 |
193 |
194 | # In[63]:
195 |
196 |
197 | B = np.array([[1, 2, 3],] * 3)
198 |
199 | print(B)
200 |
201 | # Hint: look at the brackets
202 |
203 |
204 | # In[64]:
205 |
206 |
207 | # Another example type
208 |
209 | B = np.array([1, 2, 3])
210 | B[:, np.newaxis]
211 |
212 | # We have changed a row vector into a column vector
213 |
214 |
215 | # In[65]:
216 |
217 |
218 | # Broadcasting in a different way (by changing the vector shape)
219 |
220 | A * B [:, np.newaxis]
221 |
222 |
223 | # In[66]:
224 |
225 |
226 | # This example should be self explanatory by now
227 |
228 | A = np.array([10, 20, 30])
229 | B = np.array([1, 2, 3])
230 | A[:, np.newaxis]
231 |
232 |
233 | # In[67]:
234 |
235 |
236 | A[:, np.newaxis] * B
237 |
238 |
239 | # ### Other operations
240 | #
241 | # - Comparison operators: Comparing arrays and the elements of two similar shaped arrays
242 | # - Logical operators: AND/OR operants
243 |
244 | # In[68]:
245 |
246 |
247 | import numpy as np
248 |
249 | A = np.array([ [11, 12, 13], [21, 22, 23], [31, 32, 33] ])
250 | B = np.array([ [11, 102, 13], [201, 22, 203], [31, 32, 303] ])
251 |
252 | print (A)
253 | print (B)
254 |
255 |
256 | # In[69]:
257 |
258 |
259 | # It will compare all the elements of the array with each other
260 |
261 | A == B
262 |
263 |
264 | # In[70]:
265 |
266 |
267 | # Will return 'True' only if each and every element is same in both the arrays
268 |
269 | print(np.array_equal(A, B))
270 |
271 | print(np.array_equal(A, A))
272 |
273 |
274 | # ### Logical Operators
275 |
276 | # In[71]:
277 |
278 |
279 | # This should be self explanatory by now
280 |
281 | a = np.array([ [True, True], [False, False]])
282 | b = np.array([ [True, False], [True, False]])
283 |
284 | print(np.logical_or(a, b))
285 |
286 |
287 | # In[72]:
288 |
289 |
290 | print(np.logical_and(a, b))
291 |
292 |
293 | # This is where we will end our iPython notebooks on Numpy.
294 | #
295 | # ### Happy Learning!
296 |
--------------------------------------------------------------------------------
/Section-2/My First Python code.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # ## Let us begin
11 | #
12 | # Now that you have learned how to setup Anaconda, it is time to understand how to code programs in Python. Python uses a simple syntax which makes it very easy for someone learning to program for the first time. This notebook is comprehensively designed to help you get familiarized with programming and learn basics of Python.
13 |
14 | # ## What is programming?
15 | #
16 | # Programming is the way of telling a machine what to do. This machine might be your computer, smartphone, or tablet. The task might be something as simple as noting down today’s date or capturing information about the Earth’s atmosphere on a satellite. Programming has a lot of alias names and they’re used interchangeably. It goes by programming, developing, or coding all of which involves creating software that gets a machine to do what you want it to do.
17 |
18 | # ### Hello World Program
19 | #
20 | # How would you make Python print "Hello World" for you? Well, it's never been this easy, just use the print command.
21 |
22 | # In[1]:
23 |
24 |
25 | print ("Hello World!")
26 |
27 |
28 | # In[2]:
29 |
30 |
31 | # You may try other variations
32 | print ("I am new to programming!")
33 | print ("Python is cool!")
34 |
35 |
36 | # ## Introduction to Python programming
37 | # Python design places more weight on coding productivity and code readability. Python
38 | # makes use of simple syntax which looks like written English. It talks with words and
39 | # sentences, rather than characters. Python is a portable language. Python can be installed
40 | # and run on any computer.
41 | #
42 | # Python coding is a lot fun and is easy. Take this python code for an example:
43 | #
44 |
45 | # In[3]:
46 |
47 |
48 | x = 2
49 | y = 3
50 | sum = x + y
51 | print (sum)
52 |
53 |
54 | # Even without any coding background, you can easily make out that the code adds up two numbers and prints it. You may modify the code above and try different mathematical operations on different variables.
55 |
56 | # ## Variables, Data Types and Objects
57 | #
58 | # We have studied how to use a variable in python in the previous video unit.
59 |
60 | # In[ ]:
61 |
62 |
63 | x = 100
64 |
65 |
66 | # One thing to keep in mind, the equal '=' sign used while assigning a value to a variable. It should not be read as 'equal to'. It should be read or interpreted as "is set to".
67 | #
68 | # In the previous example, we will read that the value of variable 'x' is set to '100'.
69 |
70 | # In[ ]:
71 |
72 |
73 | y = 50 # Initialising a new variable 'y' whose value is set to 50
74 |
75 |
76 | # ### ID of an object
77 | # The keyword id () specifies the object's address in memory. Look at the code below for seeing the addresses of different objects.
78 |
79 | # In[ ]:
80 |
81 |
82 | id (x)
83 |
84 |
85 | # You may change the variable name inside the function id() to print the id's of other variables.
86 |
87 | # In[ ]:
88 |
89 |
90 | id (y)
91 |
92 |
93 | # Note : The IDs of 'x' and 'y' are different.
94 |
95 | # ### Data Type of an Object
96 | #
97 | # The type of an object cannot change. It specifies two things, the operations that are allowed and the set of values that the object can hold. The keyword type() is used to check the type of an object.
98 |
99 | # In[ ]:
100 |
101 |
102 | type (x)
103 |
104 |
105 | # In[ ]:
106 |
107 |
108 | type (y)
109 |
110 |
111 | # Now, let us try something more.
112 |
113 | # In[ ]:
114 |
115 |
116 | x = x + 1.11
117 | print (x) # This will print the new value of 'x' variable
118 | type(x) # This will print the most updated data type of 'x'
119 |
120 |
121 | # Now you may check the ID of the new 'x' object which is now a float and not a integer.
122 |
123 | # In[ ]:
124 |
125 |
126 | id (x)
127 |
128 |
129 | # Note this is different form the 'int x' ID.
130 | #
131 | # Python automatically takes care of the physical representation of different data types i.e. an integer value will be stored in a different memory location than a float or string.
132 |
133 | # In[ ]:
134 |
135 |
136 | # let us now convert variable 'x' to a string data type and observe the changes
137 |
138 | x = "hundred"
139 | print (x)
140 | type (x)
141 |
142 |
143 | # In[ ]:
144 |
145 |
146 | id (x)
147 |
148 |
149 | # ### Object References
150 | #
151 | # Let us observe the following code.
152 |
153 | # In[ ]:
154 |
155 |
156 | a = 123
157 | b = a
158 |
159 |
160 | # Where will the object point? Will it be to the same object ID?
161 |
162 | # In[ ]:
163 |
164 |
165 | id (a)
166 |
167 |
168 | # In[ ]:
169 |
170 |
171 | id (b)
172 |
173 |
174 | # Yes, Since same value is stored in both the variables 'a' and 'b', they will point to the same memory location or in other words, they will have the same object ID.
175 |
176 | # ## Multi-Line Statements
177 | #
178 | # There is no semicolon to indicate an end of statement and therefore Python interprets the end of line as the end of statement.
179 | #
180 | # For example, a code to calculate total marks.
181 |
182 | # In[ ]:
183 |
184 |
185 | biology_marks = 82
186 | physics_marks = 91
187 | maths_marks = 96
188 | chemistry_marks = 88
189 | total_marks = biology_marks + physics_marks + maths_marks + chemistry_marks
190 | print (total_marks)
191 |
192 |
193 | # However, if a line is too long, code can be made readable by adding a split, to a single line of code and convert them into multiple lines. In such scenarios, use backward slash as line continuation character to specify that the line should continue.
194 | #
195 |
196 | # In[ ]:
197 |
198 |
199 | total_marks = biology_marks + physics_marks + maths_marks + chemistry_marks
200 | print (total_marks)
201 |
202 |
203 | # ## Indentation
204 | #
205 | # Python forces you to follow proper indentation. The number of spaces in indentation can be different, but all lines of code within the same block should have the same number of spaces in the indentation.
206 | #
207 | # For example, the 3rd line of the code in the cell below shows incorrect indentation. Try running the code to see the error that it throws.
208 |
209 | # In[ ]:
210 |
211 |
212 | # Python Program to calculate the square of number
213 | num = 8
214 | num_sq = num ** 2
215 | print (num_sq)
216 |
217 |
218 | # In[ ]:
219 |
220 |
221 | # On removing the indent
222 | num = 8
223 | num_sq = num ** 2
224 | print (num_sq)
225 |
226 |
227 | # ## Further Resources
228 | #
229 | # As you begin your journey of learning Python programming, we would recommend you to extensively use freely available resources online to understand simple syntax and application of available Python libraries. You can use these following resources in addition to others available online:
230 | # 1. http://docs.python.org/reference/introduction.html
231 | # Reference manual
232 | # 2. http://wiki.python.org/moin/BeginnersGuide
233 | # A guide for writing and running Python programs
234 |
--------------------------------------------------------------------------------
/Section-8/Pandas - Dataframe & Basic Functionality.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ### Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # # Notebook Contents
11 | #
12 | # ##### 1. Creating Data Frames
13 | # ##### 2. Customizing index of a Data Frame
14 | # ##### 3. Rearranging the order of columns in a Data Frame
15 | # ##### 4. Existing Column as the index of the Data Frame
16 | # ##### 5. Accessing column from a Data Frame
17 | # ##### 6. Loading and viewing Data in a Data Frame
18 | # ##### 7. Dropping Rows and Columns from a Data Frame
19 | # ##### 8. Renaming columns of a Data Frame
20 | # ##### 9. Sorting a Data Frame using a column
21 | # ##### 10. Just for Fun
22 |
23 | # ## Creating Data Frames
24 | #
25 | # The underlying idea of a Data Frame is based on 'spreadsheets'. In other words, data frames stores data in discrete Rows and Columns where each column can be named (something that is not possible in Arrays but is possible in Series). There are also multiple columns in a Data Frame (as opposed to Series, where there can be only one discrete indexed column).
26 | #
27 | # The constructor for a Data Frame is pandas.DataFrame(data=None, index=None) or if you are using 'pd' as alias, then it would be pd.Series()
28 | #
29 | # Let us have a look at the following example
30 |
31 | # In[18]:
32 |
33 |
34 | import pandas as pd
35 | import numpy as np
36 |
37 | # A DataFrame has a row and column index; it's like a dict of Series with a common index.
38 |
39 | my_portfolio = {
40 | "stock_name": ["Alphabet", "Facebook", "Apple", "Tesla", "Infosys"],
41 | "quantity_owned": [1564, 6546, 5464, 6513, 4155],
42 | "average_buy_price": ["$950", "$160", "$120", "$270", "$15"]
43 | }
44 |
45 | my_portfolio_frame = pd.DataFrame (my_portfolio) # We have passed the 'data' argument in the Data Frame constructor
46 |
47 | my_portfolio_frame
48 |
49 |
50 | # ## Customizing index of a Data Frame
51 | #
52 | # In the above output, you can see that the 'index' is the default one which starts from 0,1,...4. One can even customize the index for a better understanding of the Data Frame, while working with it.
53 |
54 | # In[19]:
55 |
56 |
57 | ordinals = ["first", "second", "third", "fourth", "fifth"] # list
58 |
59 | my_portfolio_frame = pd.DataFrame (my_portfolio, index=ordinals) #Please notice that we have not kept index as default i.e.'none'
60 |
61 | my_portfolio_frame
62 |
63 |
64 | # ## Rearranging the order of columns in a Data Frame
65 | #
66 | # We can also define or rearrange the order of columns.
67 |
68 | # In[20]:
69 |
70 |
71 | # please observe the 'columns' names parameter while constructing the Data Frame
72 |
73 | my_portfolio_frame = pd.DataFrame(my_portfolio, columns=["stock_name", "quantity_owned", "average_buy_price"], index=ordinals)
74 |
75 | my_portfolio_frame
76 |
77 |
78 | # ## Existing Column as the index of dataframe
79 | #
80 | # If we want to create a more useful index of our existing Data Frame, we can do that using the column 'stock name' as our index. It will make more sense than the 'ordinals' index.
81 |
82 | # In[21]:
83 |
84 |
85 | my_portfolio_frame = pd.DataFrame (my_portfolio,
86 | columns = ["quantity_owned","average_buy_price"],
87 | index = my_portfolio ["stock_name"])
88 |
89 | my_portfolio_frame
90 |
91 |
92 | # ## Accessing column from a data frame
93 | #
94 | # It is even possible to just veiw one single or selective columns of the entire data frame.
95 |
96 | # In[22]:
97 |
98 |
99 | # The index at present is the 'stock_name'. Refer to above code.
100 |
101 | # This makes sense if we just want to know the quantity of stock that we own for each stock (which is our index, currently)
102 |
103 | print (my_portfolio_frame["quantity_owned"])
104 |
105 |
106 | # ## Loading and viewing data in a Data Frame
107 | #
108 | # This is something that we have seen in the 'Data Visualisation' section of this course. We can even import data from online sources and view them as data frames or we can take a local 'csv' file of a stock data and view them as data frame.
109 | #
110 | #
111 |
112 | # In[23]:
113 |
114 |
115 | # Loading and viewing data
116 |
117 | # We have stored a 'infy_data.csv' on our desktop
118 |
119 | import numpy as np
120 | import pandas as pd
121 |
122 | infy = pd.read_csv ('C:/Users/academy/Desktop/infy_data.csv')
123 |
124 |
125 | # In[24]:
126 |
127 |
128 | infy # this is our entire "Infosys" stock data frame
129 |
130 |
131 | # In[25]:
132 |
133 |
134 | infy.shape
135 |
136 |
137 | # In[26]:
138 |
139 |
140 | infy.head () # You will see the top 5 rows
141 |
142 |
143 | # In[27]:
144 |
145 |
146 | infy.tail () # You will see the bottom 5 rows
147 |
148 |
149 | # ## Dropping Rows and Columns from a Data Frame
150 | #
151 | # In the above Infosys stock data, it is not necessary that you need all the columns which are present in the .csv file. Hence, to make your data frame more understandable, you may drop the columns that you do not need using drop function.
152 |
153 | # In[28]:
154 |
155 |
156 | # The axis=1 represents that we are considering columns while dropping.
157 |
158 | infy_new = infy.drop (['Prev Close', 'Last Price', 'Average Price', 'Total Traded Quantity',
159 | 'Turnover', 'No. of Trades', 'Symbol','Series'], axis = 1)
160 |
161 | infy_new.head ()
162 |
163 |
164 | # In[29]:
165 |
166 |
167 | #V15 video (I have to delete this)
168 |
169 | # Sorting a data frame
170 |
171 | infy_new = infy_new.sort_values(by="Close Price", ascending=False)
172 |
173 | print(infy_new)
174 |
175 |
176 | # In[30]:
177 |
178 |
179 | # Dropping rows: 31 March2016, 01 April 2016
180 |
181 | infy_new.drop (infy_new.index [[3,4]] )
182 |
183 |
184 | # ## Renaming Columns of a Data Frame
185 | #
186 | # If we want to rename the column names according to our wish, for better understanding while dealing with the data frame, we can also in python.
187 |
188 | # In[31]:
189 |
190 |
191 | # Renaming Columns: Have a quick look at the code, It should be self-explanatory by now
192 |
193 | infy_new=infy_new.rename(columns={'Date':'Date','Open Price':'Open','High Price':'High','Low Price':'Low','Close Price':'Close'})
194 |
195 | infy_new.head()
196 |
197 |
198 | # ## Sorting a Data Frame using a column
199 | #
200 | # Sometimes it becomes necessary to sort a stock price data frame, based on the 'Closing Price'.
201 |
202 | # In[32]:
203 |
204 |
205 | # Sorting Dataframe
206 |
207 | infy_new = infy_new.sort_values(by="Close", ascending=False)
208 |
209 | print(infy_new)
210 |
211 |
212 | # ## Just for Fun
213 |
214 | # In[33]:
215 |
216 |
217 | # If at all you want to practice, on a customised data frame, just fill it with random values and go ahead
218 |
219 | import numpy as np
220 | names = ['Jay', 'Varun', 'Devang', 'Ishan', 'Vibhu']
221 |
222 | months = ["January", "February", "March",
223 | "April", "May", "June",
224 | "July", "August", "September",
225 | "October", "November", "December"]
226 |
227 | df = pd.DataFrame(np.random.randn (12, 5)*10000, columns = names, index = months)
228 |
229 | df
230 |
231 |
232 | # ### In the upcoming iPython Notebook:
233 | # We will understand Statistics and Statistical Functions on a Data Frame.
234 | #
235 |
--------------------------------------------------------------------------------
/Section-4/Tuples and Sets.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # # Tuples
11 | #
12 | # Tuple is an immutable list. Similar to lists, a tuple can contain heterogeneous sequence of elements, but it is not possible to append, edit or remove any individual elements within a tuple.
13 | #
14 | # ## Creating Tuples
15 | # Tuples are enclosed in parenthesis and the items within them are separated by commas.
16 |
17 | # In[1]:
18 |
19 |
20 | new_tup = () # Empty Tuple
21 | type (new_tup)
22 |
23 |
24 | # In[2]:
25 |
26 |
27 | new_tup = (10, 20, 30, 40) # A tuple of integers
28 | type (new_tup)
29 |
30 |
31 | # In[3]:
32 |
33 |
34 | new_tup = (10, 20.2, 'thirty', 40) # A tuple of mixed data type
35 | type (new_tup)
36 |
37 |
38 | # In[4]:
39 |
40 |
41 | new_tup = ((10,20,30), (10.1, 20.2, 30.3),("ten", "twenty", "thirty")) # A nested tuple
42 | type (new_tup)
43 |
44 |
45 | # In[5]:
46 |
47 |
48 | new_tup = (10,(20.2,("thirty",(40)))) # A deeply nested tuple
49 | type (new_tup)
50 |
51 |
52 | # ## Can we manipulate a tuple?
53 | #
54 | # There are no methods supported by tuples that can help us manipulate a tuple once formed. Tuple does not even support assigning a new item at any particular index.
55 |
56 | # In[6]:
57 |
58 |
59 | my_tup = (10, 20, 30, 40) # This is the 'original' tuple which you have created
60 |
61 | print (my_tup)
62 |
63 |
64 | # In[827]:
65 |
66 |
67 | my_tup [0] # Returning the item at the 0th index
68 |
69 |
70 | # In[865]:
71 |
72 |
73 | my_tup [0] = "40" # Assigning a new item to the 0th index
74 |
75 |
76 | # In[866]:
77 |
78 |
79 | my_tup.append (50) # Trying to Append '50' at the 4th index of the created tuple.
80 |
81 |
82 | # But we can certainly find the length of a tuple.
83 | #
84 | # len (x)
85 | # It returns the length of the tuple.
86 |
87 | # In[830]:
88 |
89 |
90 | len (my_tup)
91 |
92 |
93 | # # Sets
94 | #
95 | # A set is an unordered collection with no duplicate elements. They are useful to create lists that hold only unique values and are also mutable. The elements of a set can be anything like numbers, strings or characters.
96 | #
97 | # ## Creating & Printing Sets
98 | # Curly braces or the set () function can be used to create sets and the items within them are separated by commas.
99 |
100 | # In[831]:
101 |
102 |
103 | new_set = { } # Empty Set ---> An empty set cannot be created
104 | type (new_set)
105 |
106 |
107 | # In[832]:
108 |
109 |
110 | new_set = {'Neo', 'Morphius', 'Trinity', 'Agent Smith', 'Oracle'} # A new set
111 | type (new_set)
112 |
113 |
114 | # In[833]:
115 |
116 |
117 | print (new_set)
118 |
119 |
120 | # In[834]:
121 |
122 |
123 | # Now there are 5 'Agent Smiths' in our set. What will happen if we print this set?
124 |
125 | new_set = {'Neo', 'Morphius', 'Trinity', 'Agent Smith', 'Agent Smith', 'Agent Smith', 'Agent Smith', 'Oracle'}
126 |
127 | print (new_set) # The set will only print unique values
128 |
129 |
130 | # In[835]:
131 |
132 |
133 | # Using the set () function to create sets
134 |
135 | x_set = set ('THEMATRIX')
136 |
137 | type (x_set)
138 |
139 |
140 | # In[836]:
141 |
142 |
143 | print (x_set) # 'THE MATRIX' has two 'T's. Only unique values will be printed.
144 |
145 |
146 | # In[837]:
147 |
148 |
149 | # An additional example
150 |
151 | y_set = set ('THETERMINATOR')
152 |
153 | print (y_set)
154 |
155 |
156 | # ## Set Operations
157 | #
158 | # You can even perform mathematical operations like set union, set intersection, set difference and symmetric difference amongst different datasets.
159 |
160 | # In[1]:
161 |
162 |
163 | # We will create 2 new sets. The 'x_set' and the 'y_set'.
164 |
165 | x_set = set ('ABCDE')
166 | y_set = set ('CDEFG')
167 |
168 | print (x_set)
169 | print (y_set)
170 |
171 |
172 | # x.union(y)
173 | # This method returns all the unique items that are present in the two sets, as a new set.
174 |
175 | # In[2]:
176 |
177 |
178 | x_set.union(y_set)
179 |
180 |
181 | # In[3]:
182 |
183 |
184 | x_set | y_set # Union can be performed by using the pipe '|' operator also
185 |
186 |
187 | # x.intersection(y)
188 | # This method returns the common items that are present in two sets, as a new set.
189 |
190 | # In[5]:
191 |
192 |
193 | x_set.intersection(y_set)
194 |
195 |
196 | # In[6]:
197 |
198 |
199 | x_set & y_set # Intersection can be performed by using the ampersand '&' operator
200 |
201 |
202 | # x.difference(y)
203 | # This method returns the items of 'set 1' which are not common (repetitive) to the 'set 2', as a new set.
204 |
205 | # In[843]:
206 |
207 |
208 | x_set.difference(y_set)
209 |
210 |
211 | # In[844]:
212 |
213 |
214 | x_set - y_set # Difference can be performed using the minus '-' operator
215 |
216 |
217 | # difference_update ()
218 | # This method removes all the elements of 'set 2' common to 'set 1' in 'set1'. It updates 'set 1'.
219 |
220 | # In[845]:
221 |
222 |
223 | x_set.difference_update(y_set)
224 |
225 | print (x_set)
226 | print (y_set)
227 |
228 |
229 | # In[846]:
230 |
231 |
232 | x_set = set ('ABCDE')
233 | y_set = set ('CDEFG')
234 |
235 | x_set = x_set - y_set # Difference update can be abbreviated in the shown manner i.e. 'x = x-y'
236 |
237 | print (x_set)
238 | print (y_set)
239 |
240 |
241 | # x.isdisjoint(y)
242 | # This method returns True if two sets have null intersection.
243 |
244 | # In[847]:
245 |
246 |
247 | x_set = set ('ABCDE')
248 | y_set = set ('CDEFG')
249 |
250 | x_set.isdisjoint(y_set)
251 |
252 |
253 | # In[848]:
254 |
255 |
256 | x_set = set ('ABC')
257 | y_set = set ('EFG')
258 |
259 | x_set.isdisjoint(y_set)
260 |
261 |
262 | # y.issubset(x)
263 | # This method returns True for 'Set 2', if all the elements of 'Set 2' are present in 'Set 1'
264 |
265 | # In[849]:
266 |
267 |
268 | x_set = set ('ABCDE')
269 | y_set = set ('CDEFG')
270 |
271 | y_set.issubset(x_set)
272 |
273 |
274 | # In[850]:
275 |
276 |
277 | x_set = set ('ABCDE')
278 | y_set = set ('CDE')
279 |
280 | y_set.issubset(x_set)
281 |
282 |
283 | # In[851]:
284 |
285 |
286 | y_set < x_set # One can check a subset using a less than '<' operator.
287 |
288 |
289 | # x.issuperset(y)
290 | # This method returns True for 'Set 1' if all the elements of Set 2 are present in 'Set 1'.
291 |
292 | # In[852]:
293 |
294 |
295 | x_set = set ('ABCDE')
296 | y_set = set ('CDEFG')
297 |
298 | x_set.issuperset(y_set)
299 |
300 |
301 | # In[853]:
302 |
303 |
304 | x_set = set ('ABCDE')
305 | y_set = set ('CDE')
306 |
307 | x_set.issuperset(y_set)
308 |
309 |
310 | # In[854]:
311 |
312 |
313 | x_set > y_set # One can check a superset using a greater than '>' operator.
314 |
315 |
316 | # x.add(e)
317 | # It adds a single item to the set and updates the set.
318 |
319 | # In[855]:
320 |
321 |
322 | x_set = set ('ABCDE')
323 |
324 | print (x_set)
325 |
326 |
327 | # In[856]:
328 |
329 |
330 | x_set.add('FGH')
331 |
332 | print (x_set)
333 |
334 |
335 | # x.discard(e)
336 | # It removes a single item from the set and updates it.
337 |
338 | # In[857]:
339 |
340 |
341 | print (x_set)
342 |
343 |
344 | # In[858]:
345 |
346 |
347 | x_set.discard('FGH')
348 |
349 | print (x_set)
350 |
351 |
352 | # x.pop ()
353 | # It pops and returns any arbitary item from the set.
354 |
355 | # In[859]:
356 |
357 |
358 | print (x_set)
359 |
360 |
361 | # In[860]:
362 |
363 |
364 | x_set.pop()
365 |
366 |
367 | # x.copy ()
368 | # It creates a shallow copy of any set.
369 |
370 | # In[861]:
371 |
372 |
373 | print (x_set) # There are only 4 items in the set, since one just got popped in the above cell execution.
374 |
375 |
376 | # In[862]:
377 |
378 |
379 | x_set.copy()
380 |
381 |
382 | # x.clear()
383 | # It clears all the items of the set.
384 |
385 | # In[863]:
386 |
387 |
388 | print (x_set)
389 |
390 |
391 | # In[864]:
392 |
393 |
394 | x_set.clear()
395 |
396 | print (x_set)
397 |
398 |
399 | # ### This is where we will end this section on Data Structures
400 | #
401 | # Stay tuned for the next Section.
402 |
--------------------------------------------------------------------------------
/Section-7/Indexing and Slicing.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ### Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # We will continue from where we left in the previous notebook.
11 | #
12 | # # Notebook Contents
13 | #
14 | # ##### 1. Indexing
15 | # ##### 2. Slicing
16 | # ##### 3. Arrays of 1s and 0s
17 | # ##### 4. Identity function
18 |
19 | # ## Indexing
20 |
21 | # We can access the elements of an array using its **index**. Index gives the location of an element of an array.
22 | #
23 | # - The first index is '0'.
24 | # - The second index is '1' and so on.
25 | # - The second last index is '-2'.
26 | # - The last index is '-1'.
27 | #
28 | # ### Indexing in a one-dimensional array
29 | #
30 | # A one-dimensional array is indexed just like a list.
31 |
32 | # In[62]:
33 |
34 |
35 | import numpy as np
36 |
37 | # One dimensional array
38 |
39 | A = np.array([10, 21, 32, 43, 54, 65, 76, 87])
40 |
41 | # Print the first element of A
42 | print(A [0])
43 |
44 | # Remember, in pyhton, counting starts from 0 and not from 1
45 |
46 |
47 | # In[63]:
48 |
49 |
50 | # Print the last element of A
51 | print(A [-1])
52 |
53 |
54 | # In[64]:
55 |
56 |
57 | # Print the third element of A
58 | print (A [2])
59 |
60 |
61 | # In[65]:
62 |
63 |
64 | # Print the second last element
65 | print (A [-2])
66 |
67 |
68 | # ### Indexing in a two-dimensional array
69 | #
70 | # A 2-Dimensional Array consists of rows and columns, so you need to specify both rows and columns, to locate an element.
71 |
72 | # In[66]:
73 |
74 |
75 | # Create a 2-Dimensional Array
76 |
77 | A = np.array ([ [1,2,3], [4,5,6], [7,8,9], [10,11,12] ])
78 |
79 | print (A)
80 |
81 | # The shape of the array is : 4 rows and 3 columns
82 |
83 |
84 | # In[67]:
85 |
86 |
87 | # Print the element of Row 1, column 1
88 | print (A [0] [0])
89 |
90 |
91 | # In[68]:
92 |
93 |
94 | # Print the element of row 2, column 1
95 | print (A [1] [0])
96 |
97 |
98 | # In[69]:
99 |
100 |
101 | # Print the element of row 4, column 3
102 | print (A [3] [2])
103 |
104 |
105 | # In[70]:
106 |
107 |
108 | # Another way to print the element of row 3, column 2
109 | print (A [2,1])
110 |
111 |
112 | # #### Try on your own
113 |
114 | # In[71]:
115 |
116 |
117 | # Can you guess what will be the output of these print statement?
118 |
119 | print (A [4,3])
120 |
121 |
122 | # ## Slicing
123 | #
124 | # When you want to select a certain section of an array, then you slice it. It could be a bunch of elements in a one-dimensional array and/or entire rows and columns in a two-dimensional array.
125 | #
126 | # ### Slicing a one-dimensional array
127 | #
128 | # You can slice a one-dimensional array in various ways:
129 | # - Print first few elements
130 | # - Print last few elements
131 | # - Print middle elements
132 | # - Print elements after certain step.
133 | #
134 | # Syntax:
135 | # #### array_name [start: stop: step]
136 | #
137 |
138 | # In[72]:
139 |
140 |
141 | # Consider a one-dimensional array A
142 |
143 | A = np.array([1, 2, 3, 4, 5, 6, 7, 8])
144 |
145 | # By default, the step = 1
146 |
147 | # To print the first 4 elements (i.e. indices 0, 1, 2, 3, those before index 4)
148 | print(A [:4])
149 |
150 | # To print the elements from the index = 6 till the end
151 | print(A [6:])
152 |
153 | # To print the elements starting from index=2 and it will stop BEFORE index=5
154 |
155 | print(A [2:5])
156 |
157 | # To print all the elements of the array
158 | print(A [:])
159 |
160 |
161 | # In[73]:
162 |
163 |
164 | # Introducing step = 2
165 |
166 | # This will print alternate index elements of the entire array, starting from index = 0
167 |
168 | print (A [::2])
169 |
170 |
171 | # #### Try on your own
172 |
173 | # In[74]:
174 |
175 |
176 | # Can you guess what will be the output of these print statement?
177 |
178 | print (A [::3])
179 |
180 |
181 | # ### Slicing a two-dimensional array
182 | #
183 | # You can slice a two-dimensional array in various ways:
184 | # - Print a row or a column
185 | # - Print multiple rows or columns
186 | # - Print a section of table for given rows and columns
187 | # - Print first and/or last rows and/or columns.
188 | # - Print rows and columns after certain step.
189 | #
190 | # Syntax:
191 | # #### array_name [row start: row stop: row step], [col start, col stop, col step]
192 |
193 | # In[138]:
194 |
195 |
196 | # A two-dimensional Array
197 |
198 | A = np.array([
199 | ["00", "01", "02", "03", "04"],
200 | [10, 11, 12, 13, 14],
201 | [20, 21, 22, 23, 24],
202 | [30, 31, 32, 33, 34],
203 | [40, 41, 42, 43, 44]
204 | ])
205 |
206 | print (A)
207 |
208 |
209 | # In[139]:
210 |
211 |
212 | # Print a row or a column
213 |
214 | print(A[1,]) # Printing Row 2
215 |
216 |
217 | # In[140]:
218 |
219 |
220 | print(A[:,1]) # Column 2
221 |
222 |
223 | # In[141]:
224 |
225 |
226 | # Print multiple rows or columns
227 |
228 | print(A[:2,]) #Rows 1 & 2
229 |
230 | print(A[:,1:3]) #Columns 2 & 3
231 |
232 |
233 | # In[142]:
234 |
235 |
236 | # Print first or last rows and columns
237 |
238 | print(A[:3,]) # Printing first three rows
239 |
240 | print(A[:,3:]) # Printing 4th column and onwards
241 |
242 |
243 | # In[143]:
244 |
245 |
246 | # Print selected rows and columns
247 |
248 | print(A[:2,2]) # Rows Rows 1 & 2 for column3
249 |
250 |
251 | # In[144]:
252 |
253 |
254 | print(A[:3,2:]) # 1st three rows for the last three columns
255 |
256 |
257 | # In[145]:
258 |
259 |
260 | print(A[:,:-2]) # Array without last three columns
261 |
262 |
263 | # In[146]:
264 |
265 |
266 | print(A[:-3,:]) # Array without last 3 rows
267 |
268 |
269 | # #### Using step
270 |
271 | # In[147]:
272 |
273 |
274 | # Let us create a new array using the arange method for this exercise
275 |
276 | A2 = np.arange(50).reshape(5,10) #Create an array with 5 rows, 10 columns that has values from 1 to 50.
277 |
278 | print(A2)
279 |
280 |
281 | # In[148]:
282 |
283 |
284 | # Using step in slicing
285 |
286 | print(A[::2,]) # Print Rows 1, 3, and 5
287 |
288 |
289 | # In[149]:
290 |
291 |
292 | print(A[:, 1::2]) # Print Columns 2 & 4
293 |
294 |
295 | # In[150]:
296 |
297 |
298 | print(A[:, 1:10:2]) # Print Columns 2,4, 6, 8, 10
299 |
300 |
301 | # In[151]:
302 |
303 |
304 | # This will print an intersection of elements of rows 0, 2, 4 and columns 0, 3, 9, 6
305 |
306 | print(A2 [::2, ::3])
307 |
308 |
309 | # In[152]:
310 |
311 |
312 | # Let us print all the rows and columns
313 |
314 | print (A2 [::,::])
315 |
316 |
317 | # #### Try on your own
318 |
319 | # In[153]:
320 |
321 |
322 | # If the following line of code is self explanatory to you, then you have understood the entire concept of 2D slicing
323 |
324 | print (A2 [2:4:1, 2:7:4])
325 |
326 |
327 | # In[154]:
328 |
329 |
330 | # This should be self explanatory
331 |
332 | A = np.arange(12)
333 | B = A.reshape(3, 4)
334 |
335 | A[0] = 42
336 | print(B)
337 |
338 |
339 | # ## Array of Ones and Zeros
340 |
341 | # We will be initialising arrays which have all the elements either as zeros or one. Such arrays help us while performing arithmentic operations
342 |
343 | # In[155]:
344 |
345 |
346 | O = np.ones((4,4))
347 | print(O)
348 |
349 | # This is defaulty datatype 'float'
350 |
351 |
352 | # In[156]:
353 |
354 |
355 | O = np.ones((4,4), dtype=int) # Changing data type to integers
356 | print(O)
357 |
358 |
359 | # In[157]:
360 |
361 |
362 | Z = np.zeros((3,3))
363 | print(Z)
364 |
365 |
366 | # In[158]:
367 |
368 |
369 | Z = np.zeros((3,3), dtype = int)
370 | print(Z)
371 |
372 |
373 | # ## Identity Function
374 | #
375 | # An Identity Array has equal number of rows and columns. It is a square array so that the diagonal elements are all 'ones'.
376 |
377 | # In[159]:
378 |
379 |
380 | I = np.identity(4)
381 |
382 | print (I)
383 |
384 |
385 | # In[160]:
386 |
387 |
388 | I = np.identity (3, dtype = int)
389 |
390 | print (I)
391 |
392 |
393 | # ### In the upcoming iPython Notebook:
394 | #
395 | # We will continue understanding about arrays and learn about Vectorization, Arithmetic Operation, Broadcasting and Array Comparisons.
396 |
--------------------------------------------------------------------------------
/Pandas- Grouping and Reshaping.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # ## Pandas 'Groupby'
11 | #
12 | # Any groupby operation involves one of the following operations on the original dataframe/object. They are:
13 | #
14 | # 1. Splitting the data into groups based on some criteria.
15 | #
16 | # 2. Applying a function to each group seperately.
17 | #
18 | # 3. Combining the results into a single data frame.
19 | #
20 | # Splitting the data is pretty straight forward. What adds value to this split is the 'Apply' step. This makes 'Groupby' function interesting. In the apply step, you may wish to do one of the following:
21 | #
22 | # a. Aggregation − Computing a summary statistic. Eg: Compute group sums or means.
23 | #
24 | # b. Transformation − perform some group-specific operation. Eg: Standarizing data (computing zscore) within the group.
25 | #
26 | # c. Filtration − discarding the data with some condition.
27 | #
28 | # Let us now create a DataFrame object and perform all the operations on it
29 |
30 | # In[1]:
31 |
32 |
33 | # Creating a data frame
34 |
35 | import pandas as pd
36 |
37 | my_portfolio = {'Sector': ['IT', 'FMCG', 'Finance', 'Pharma', 'Pharma',
38 | 'FMCG', 'FMCG', 'IT', 'Finance', 'Real Estate'],
39 |
40 | 'Company': ['Infosys', 'Dabur', 'DHFL', 'Divis Lab', 'Lupin',
41 | 'Ruchira Papers', 'Britianna','Persistent Systems','Bajaj Finance', 'DLF'],
42 |
43 | 'MarketCap': ['Large Cap','Large Cap','Mid Cap','Mid Cap','Mid Cap',
44 | 'Small Cap','Mid Cap','Small Cap','Large Cap','Mid Cap'],
45 |
46 | 'Share Price': [1120,341,610,1123,741,185,5351,720,1937,217],
47 |
48 | 'Amount Invested': [24000,16000,50000,23000,45000,12000,52000,18000,5000,3500]}
49 |
50 | mp = pd.DataFrame(my_portfolio)
51 |
52 | mp
53 |
54 |
55 | # ### View Groups
56 |
57 | # In[2]:
58 |
59 |
60 | print (mp.groupby('MarketCap').groups)
61 |
62 |
63 | # There are 3 Groups formed, if we group it by 'Market Cap'. They are:
64 | #
65 | # Group 1: 'Large Cap' (3 companies at index 0,1,8)
66 | # Group 2: 'Mid Cap' (5 companies at index 2,3,4,6,9)
67 | # Group 3: 'Small Cap' (2 companies at index 5,7)
68 |
69 | # In[3]:
70 |
71 |
72 | # Understand this Grouping
73 |
74 | print (mp.groupby('Sector').groups)
75 |
76 |
77 | # There are 5 Groups formed, if we group it by 'Sector'. They are:
78 | #
79 | # Group 1: 'FMCG' (3 companies at index 1,5,6)
80 | # Group 2: 'IT' (2 companies at index 0,7)
81 | # Group 3: 'Pharma' (2 companies at index 3,4)
82 | # Group 4: 'Finance' (2 companies at index 2,8)
83 | # Group 5: 'Real Estate' (1 company at index 9)
84 |
85 | # In[4]:
86 |
87 |
88 | # Group by with multiple columns
89 |
90 | print (mp.groupby(['MarketCap','Sector']).groups)
91 |
92 |
93 | # There are 8 Groups formed, if we group it by 'Sector' and 'MarketCap'. They are:
94 | #
95 | # Group 1: 'Large Cap, FMCG' (1 company at index 1)
96 | # Group 2: 'Mid Cap, FMCG' (1 company at index 6)
97 | # Group 3: 'Large Cap, IT' (1 company at index 0)
98 | # Group 4: 'Small Cap, FMCG' (1 company at index 5)
99 | # Group 5: 'Mid Cap, Real Estate' (1 company at index 9)
100 | # Group 6: 'Small Cap, IT' (1 company at index 7)
101 | # Group 7: 'Mid Cap, Pharma' (2 companies at index 3,4)
102 | # Group 8: 'Mid Cap, Finance' (1 company at index 2)
103 |
104 | # ### Iterating through groups
105 |
106 | # In[6]:
107 |
108 |
109 | # A better way to visualise
110 |
111 | grouped = mp.groupby('Sector')
112 |
113 | for name,group in grouped:
114 | print (name)
115 | print (group)
116 |
117 |
118 | # In[7]:
119 |
120 |
121 | # Just so that you feel comfortable, go through this line of code too
122 |
123 | grouped = mp.groupby('MarketCap')
124 |
125 | for name,group in grouped: # We will learn 'for' loop in further sections. It is usually used for iterations
126 | print (name)
127 | print (group)
128 |
129 |
130 | # ### Select a group
131 |
132 | # In[9]:
133 |
134 |
135 | import pandas as pd
136 |
137 | my_portfolio = {'Sector': ['IT', 'FMCG', 'Finance', 'Pharma', 'Pharma',
138 | 'FMCG', 'FMCG', 'IT', 'Finance', 'Real Estate'],
139 |
140 | 'Company': ['Infosys', 'Dabur', 'DHFL', 'Divis Lab', 'Lupin',
141 | 'Ruchira Papers', 'Britianna','Persistent Systems','Bajaj Finance', 'DLF'],
142 |
143 | 'MarketCap': ['Large Cap','Large Cap','Mid Cap','Mid Cap','Mid Cap',
144 | 'Small Cap','Mid Cap','Small Cap','Large Cap','Mid Cap'],
145 |
146 | 'Share Price': [1120,341,610,1123,741,185,5351,720,1937,217],
147 |
148 | 'Amount Invested': [24000,16000,50000,23000,45000,12000,52000,18000,5000,3500]}
149 |
150 | mp = pd.DataFrame(my_portfolio)
151 |
152 | grouped = mp.groupby('MarketCap')
153 |
154 | print (grouped.get_group('Mid Cap'))
155 |
156 |
157 | # ### Aggregations
158 |
159 | # In[10]:
160 |
161 |
162 | import numpy as np
163 |
164 | grouped = mp.groupby('MarketCap')
165 |
166 | print (grouped['Amount Invested'].agg(np.mean))
167 |
168 |
169 | # What does this mean?
170 | #
171 | # This means that on an average, we have invested Rs. 15000 per script in Large Cap, Rs. 34700 per script in Mid Cap and Rs. 15000 per script in Small Cap
172 |
173 | # In[11]:
174 |
175 |
176 | grouped = mp.groupby('MarketCap')
177 |
178 | print (grouped.agg(np.size))
179 |
180 |
181 | # What does this mean?
182 | #
183 | # This just shows the size of the group.
184 |
185 | # In[12]:
186 |
187 |
188 | # Applying multiple aggregation functions at once
189 |
190 | grouped = mp.groupby('MarketCap')
191 |
192 | print (grouped['Amount Invested'].agg([np.sum, np.mean]))
193 |
194 |
195 | # What does this mean?
196 | #
197 | # This means that the 'total amount' invested in a particular sector is the 'sum' and 'average amount per script' invested in that sector is the 'mean' value.
198 |
199 | # ### Transformations
200 |
201 | # In[13]:
202 |
203 |
204 | import pandas as pd
205 |
206 | my_portfolio = {'Sector': ['IT', 'FMCG', 'Finance', 'Pharma', 'Pharma',
207 | 'FMCG', 'FMCG', 'IT', 'Finance', 'Real Estate'],
208 |
209 | 'Company': ['Infosys', 'Dabur', 'DHFL', 'Divis Lab', 'Lupin',
210 | 'Ruchira Papers', 'Britianna','Persistent Systems','Bajaj Finance', 'DLF'],
211 |
212 | 'MarketCap': ['Large Cap','Large Cap','Mid Cap','Mid Cap','Mid Cap',
213 | 'Small Cap','Mid Cap','Small Cap','Large Cap','Mid Cap'],
214 |
215 | 'Share Price': [1120,341,610,1123,741,185,5351,720,1937,217],
216 |
217 | 'Amount Invested': [24000,16000,50000,23000,45000,12000,52000,18000,5000,3500]}
218 |
219 | mp = pd.DataFrame(my_portfolio)
220 |
221 | print (mp)
222 |
223 | grouped = mp.groupby('MarketCap')
224 |
225 | z_score = lambda x: (x - x.mean()) / x.std()
226 |
227 | print (grouped.transform(z_score))
228 |
229 |
230 | # ### Filteration
231 |
232 | # In[14]:
233 |
234 |
235 | print (mp.groupby('MarketCap').filter(lambda x: len(x)>= 3))
236 |
237 |
238 | # What does this mean?
239 | #
240 | # It will not filter the Groups that has 3 or less than 3 companies in that particular group.
241 |
242 | # ### Merging/Joining
243 |
244 | # In[15]:
245 |
246 |
247 | import pandas as pd
248 |
249 |
250 | left_df = pd.DataFrame({
251 | 'id':[1,2,3,4,5],
252 | 'Company': ['Infosys', 'SBI', 'Asian Paints', 'Maruti', 'Sun Pharma'],
253 | 'Sector':['IT','Banks','Paints and Varnishes','Auto','Pharma']})
254 |
255 | right_df = pd.DataFrame(
256 | {'id':[1,2,3,4,5],
257 | 'Company': ['NTPC', 'TCS', 'Lupin', 'ICICI', 'M&M'],
258 | 'Sector':['Power','IT','Pharma','Banks','Auto']})
259 |
260 |
261 | # In[16]:
262 |
263 |
264 | left_df
265 |
266 |
267 | # In[17]:
268 |
269 |
270 | right_df
271 |
272 |
273 | # In[18]:
274 |
275 |
276 | # Merge 2 DF on a key
277 |
278 | print (pd.merge(left_df,right_df, on='id'))
279 |
280 |
281 | # In[19]:
282 |
283 |
284 | print (pd.merge(left_df,right_df, on='Sector'))
285 |
286 |
287 | # In[20]:
288 |
289 |
290 | # Merge 2 DFs on multiple keys
291 |
292 | print (pd.merge(left_df,right_df,on=['Sector','Company']))
293 |
294 |
295 | # In[21]:
296 |
297 |
298 | # Merge using 'how' argument
299 |
300 | # Left join
301 |
302 | print (pd.merge(left_df, right_df, on='Sector', how='left'))
303 |
304 |
305 | # In[22]:
306 |
307 |
308 | # Right join
309 |
310 | print (pd.merge(left_df, right_df, how='outer', on='Sector'))
311 |
312 |
313 | # In[23]:
314 |
315 |
316 | # Outer Join
317 |
318 | print (pd.merge(left_df, right_df, how='outer', on='Sector'))
319 |
320 |
321 | # In[24]:
322 |
323 |
324 | # Inner Join
325 |
326 | print (pd.merge(left_df, right_df, on='Sector', how='inner'))
327 |
328 |
329 | # ### Concatenation
330 |
331 | # In[25]:
332 |
333 |
334 | print (pd.concat([left_df,right_df]))
335 |
336 |
337 | # In[26]:
338 |
339 |
340 | print (pd.concat([left_df, right_df],keys=['x','y']))
341 |
342 |
343 | # In[27]:
344 |
345 |
346 | print (pd.concat([left_df,right_df],keys=['x','y'],ignore_index=True))
347 |
348 |
349 | # In[28]:
350 |
351 |
352 | print (pd.concat([left_df,right_df],axis=1))
353 |
354 |
355 | # In[29]:
356 |
357 |
358 | # Concatenating using append
359 |
360 | print (left_df.append(right_df))
361 |
362 |
363 | # In[30]:
364 |
365 |
366 | print (left_df.append([right_df,left_df, right_df]))
367 |
368 |
--------------------------------------------------------------------------------
/Section-8/Pandas- Grouping and Reshaping.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # ## Pandas 'Groupby'
11 | #
12 | # Any groupby operation involves one of the following operations on the original dataframe/object. They are:
13 | #
14 | # 1. Splitting the data into groups based on some criteria.
15 | #
16 | # 2. Applying a function to each group seperately.
17 | #
18 | # 3. Combining the results into a single data frame.
19 | #
20 | # Splitting the data is pretty straight forward. What adds value to this split is the 'Apply' step. This makes 'Groupby' function interesting. In the apply step, you may wish to do one of the following:
21 | #
22 | # a. Aggregation − Computing a summary statistic. Eg: Compute group sums or means.
23 | #
24 | # b. Transformation − perform some group-specific operation. Eg: Standarizing data (computing zscore) within the group.
25 | #
26 | # c. Filtration − discarding the data with some condition.
27 | #
28 | # Let us now create a DataFrame object and perform all the operations on it
29 |
30 | # In[1]:
31 |
32 |
33 | # Creating a data frame
34 |
35 | import pandas as pd
36 |
37 | my_portfolio = {'Sector': ['IT', 'FMCG', 'Finance', 'Pharma', 'Pharma',
38 | 'FMCG', 'FMCG', 'IT', 'Finance', 'Real Estate'],
39 |
40 | 'Company': ['Infosys', 'Dabur', 'DHFL', 'Divis Lab', 'Lupin',
41 | 'Ruchira Papers', 'Britianna','Persistent Systems','Bajaj Finance', 'DLF'],
42 |
43 | 'MarketCap': ['Large Cap','Large Cap','Mid Cap','Mid Cap','Mid Cap',
44 | 'Small Cap','Mid Cap','Small Cap','Large Cap','Mid Cap'],
45 |
46 | 'Share Price': [1120,341,610,1123,741,185,5351,720,1937,217],
47 |
48 | 'Amount Invested': [24000,16000,50000,23000,45000,12000,52000,18000,5000,3500]}
49 |
50 | mp = pd.DataFrame(my_portfolio)
51 |
52 | mp
53 |
54 |
55 | # ### View Groups
56 |
57 | # In[2]:
58 |
59 |
60 | print (mp.groupby('MarketCap').groups)
61 |
62 |
63 | # There are 3 Groups formed, if we group it by 'Market Cap'. They are:
64 | #
65 | # Group 1: 'Large Cap' (3 companies at index 0,1,8)
66 | # Group 2: 'Mid Cap' (5 companies at index 2,3,4,6,9)
67 | # Group 3: 'Small Cap' (2 companies at index 5,7)
68 |
69 | # In[3]:
70 |
71 |
72 | # Understand this Grouping
73 |
74 | print (mp.groupby('Sector').groups)
75 |
76 |
77 | # There are 5 Groups formed, if we group it by 'Sector'. They are:
78 | #
79 | # Group 1: 'FMCG' (3 companies at index 1,5,6)
80 | # Group 2: 'IT' (2 companies at index 0,7)
81 | # Group 3: 'Pharma' (2 companies at index 3,4)
82 | # Group 4: 'Finance' (2 companies at index 2,8)
83 | # Group 5: 'Real Estate' (1 company at index 9)
84 |
85 | # In[4]:
86 |
87 |
88 | # Group by with multiple columns
89 |
90 | print (mp.groupby(['MarketCap','Sector']).groups)
91 |
92 |
93 | # There are 8 Groups formed, if we group it by 'Sector' and 'MarketCap'. They are:
94 | #
95 | # Group 1: 'Large Cap, FMCG' (1 company at index 1)
96 | # Group 2: 'Mid Cap, FMCG' (1 company at index 6)
97 | # Group 3: 'Large Cap, IT' (1 company at index 0)
98 | # Group 4: 'Small Cap, FMCG' (1 company at index 5)
99 | # Group 5: 'Mid Cap, Real Estate' (1 company at index 9)
100 | # Group 6: 'Small Cap, IT' (1 company at index 7)
101 | # Group 7: 'Mid Cap, Pharma' (2 companies at index 3,4)
102 | # Group 8: 'Mid Cap, Finance' (1 company at index 2)
103 |
104 | # ### Iterating through groups
105 |
106 | # In[6]:
107 |
108 |
109 | # A better way to visualise
110 |
111 | grouped = mp.groupby('Sector')
112 |
113 | for name,group in grouped:
114 | print (name)
115 | print (group)
116 |
117 |
118 | # In[7]:
119 |
120 |
121 | # Just so that you feel comfortable, go through this line of code too
122 |
123 | grouped = mp.groupby('MarketCap')
124 |
125 | for name,group in grouped: # We will learn 'for' loop in further sections. It is usually used for iterations
126 | print (name)
127 | print (group)
128 |
129 |
130 | # ### Select a group
131 |
132 | # In[9]:
133 |
134 |
135 | import pandas as pd
136 |
137 | my_portfolio = {'Sector': ['IT', 'FMCG', 'Finance', 'Pharma', 'Pharma',
138 | 'FMCG', 'FMCG', 'IT', 'Finance', 'Real Estate'],
139 |
140 | 'Company': ['Infosys', 'Dabur', 'DHFL', 'Divis Lab', 'Lupin',
141 | 'Ruchira Papers', 'Britianna','Persistent Systems','Bajaj Finance', 'DLF'],
142 |
143 | 'MarketCap': ['Large Cap','Large Cap','Mid Cap','Mid Cap','Mid Cap',
144 | 'Small Cap','Mid Cap','Small Cap','Large Cap','Mid Cap'],
145 |
146 | 'Share Price': [1120,341,610,1123,741,185,5351,720,1937,217],
147 |
148 | 'Amount Invested': [24000,16000,50000,23000,45000,12000,52000,18000,5000,3500]}
149 |
150 | mp = pd.DataFrame(my_portfolio)
151 |
152 | grouped = mp.groupby('MarketCap')
153 |
154 | print (grouped.get_group('Mid Cap'))
155 |
156 |
157 | # ### Aggregations
158 |
159 | # In[10]:
160 |
161 |
162 | import numpy as np
163 |
164 | grouped = mp.groupby('MarketCap')
165 |
166 | print (grouped['Amount Invested'].agg(np.mean))
167 |
168 |
169 | # What does this mean?
170 | #
171 | # This means that on an average, we have invested Rs. 15000 per script in Large Cap, Rs. 34700 per script in Mid Cap and Rs. 15000 per script in Small Cap
172 |
173 | # In[11]:
174 |
175 |
176 | grouped = mp.groupby('MarketCap')
177 |
178 | print (grouped.agg(np.size))
179 |
180 |
181 | # What does this mean?
182 | #
183 | # This just shows the size of the group.
184 |
185 | # In[12]:
186 |
187 |
188 | # Applying multiple aggregation functions at once
189 |
190 | grouped = mp.groupby('MarketCap')
191 |
192 | print (grouped['Amount Invested'].agg([np.sum, np.mean]))
193 |
194 |
195 | # What does this mean?
196 | #
197 | # This means that the 'total amount' invested in a particular sector is the 'sum' and 'average amount per script' invested in that sector is the 'mean' value.
198 |
199 | # ### Transformations
200 |
201 | # In[13]:
202 |
203 |
204 | import pandas as pd
205 |
206 | my_portfolio = {'Sector': ['IT', 'FMCG', 'Finance', 'Pharma', 'Pharma',
207 | 'FMCG', 'FMCG', 'IT', 'Finance', 'Real Estate'],
208 |
209 | 'Company': ['Infosys', 'Dabur', 'DHFL', 'Divis Lab', 'Lupin',
210 | 'Ruchira Papers', 'Britianna','Persistent Systems','Bajaj Finance', 'DLF'],
211 |
212 | 'MarketCap': ['Large Cap','Large Cap','Mid Cap','Mid Cap','Mid Cap',
213 | 'Small Cap','Mid Cap','Small Cap','Large Cap','Mid Cap'],
214 |
215 | 'Share Price': [1120,341,610,1123,741,185,5351,720,1937,217],
216 |
217 | 'Amount Invested': [24000,16000,50000,23000,45000,12000,52000,18000,5000,3500]}
218 |
219 | mp = pd.DataFrame(my_portfolio)
220 |
221 | print (mp)
222 |
223 | grouped = mp.groupby('MarketCap')
224 |
225 | z_score = lambda x: (x - x.mean()) / x.std()
226 |
227 | print (grouped.transform(z_score))
228 |
229 |
230 | # ### Filteration
231 |
232 | # In[14]:
233 |
234 |
235 | print (mp.groupby('MarketCap').filter(lambda x: len(x)>= 3))
236 |
237 |
238 | # What does this mean?
239 | #
240 | # It will not filter the Groups that has 3 or less than 3 companies in that particular group.
241 |
242 | # ### Merging/Joining
243 |
244 | # In[15]:
245 |
246 |
247 | import pandas as pd
248 |
249 |
250 | left_df = pd.DataFrame({
251 | 'id':[1,2,3,4,5],
252 | 'Company': ['Infosys', 'SBI', 'Asian Paints', 'Maruti', 'Sun Pharma'],
253 | 'Sector':['IT','Banks','Paints and Varnishes','Auto','Pharma']})
254 |
255 | right_df = pd.DataFrame(
256 | {'id':[1,2,3,4,5],
257 | 'Company': ['NTPC', 'TCS', 'Lupin', 'ICICI', 'M&M'],
258 | 'Sector':['Power','IT','Pharma','Banks','Auto']})
259 |
260 |
261 | # In[16]:
262 |
263 |
264 | left_df
265 |
266 |
267 | # In[17]:
268 |
269 |
270 | right_df
271 |
272 |
273 | # In[18]:
274 |
275 |
276 | # Merge 2 DF on a key
277 |
278 | print (pd.merge(left_df,right_df, on='id'))
279 |
280 |
281 | # In[19]:
282 |
283 |
284 | print (pd.merge(left_df,right_df, on='Sector'))
285 |
286 |
287 | # In[20]:
288 |
289 |
290 | # Merge 2 DFs on multiple keys
291 |
292 | print (pd.merge(left_df,right_df,on=['Sector','Company']))
293 |
294 |
295 | # In[21]:
296 |
297 |
298 | # Merge using 'how' argument
299 |
300 | # Left join
301 |
302 | print (pd.merge(left_df, right_df, on='Sector', how='left'))
303 |
304 |
305 | # In[22]:
306 |
307 |
308 | # Right join
309 |
310 | print (pd.merge(left_df, right_df, how='outer', on='Sector'))
311 |
312 |
313 | # In[23]:
314 |
315 |
316 | # Outer Join
317 |
318 | print (pd.merge(left_df, right_df, how='outer', on='Sector'))
319 |
320 |
321 | # In[24]:
322 |
323 |
324 | # Inner Join
325 |
326 | print (pd.merge(left_df, right_df, on='Sector', how='inner'))
327 |
328 |
329 | # ### Concatenation
330 |
331 | # In[25]:
332 |
333 |
334 | print (pd.concat([left_df,right_df]))
335 |
336 |
337 | # In[26]:
338 |
339 |
340 | print (pd.concat([left_df, right_df],keys=['x','y']))
341 |
342 |
343 | # In[27]:
344 |
345 |
346 | print (pd.concat([left_df,right_df],keys=['x','y'],ignore_index=True))
347 |
348 |
349 | # In[28]:
350 |
351 |
352 | print (pd.concat([left_df,right_df],axis=1))
353 |
354 |
355 | # In[29]:
356 |
357 |
358 | # Concatenating using append
359 |
360 | print (left_df.append(right_df))
361 |
362 |
363 | # In[30]:
364 |
365 |
366 | print (left_df.append([right_df,left_df, right_df]))
367 |
368 |
--------------------------------------------------------------------------------
/Section-8/Pandas - Descriptive Statistical Functions .py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ### Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # In this particular notebook, we will have a look at the different descripive statistical functions available in Python.
11 | #
12 | # # Notebook Contents
13 | #
14 | # ##### 1. DataFrame.count()
15 | # ##### 2. DataFrame.min()
16 | # ##### 3. DataFrame.max()
17 | # ##### 4. DataFrame.mean()
18 | # ##### 5. DataFrame.median
19 | # ##### 6. DataFrame.mode()
20 | # ##### 7. DataFrame.sum()
21 | # ##### 8. DataFrame.diff()
22 | # ##### 9. DataFrame.pct_change()
23 | # ##### 10. DataFrame.var()
24 | # ##### 11. DataFrame.std()
25 | # ##### 12. DataFrame.rolling(window=).mean()
26 | # ##### 13. DataFrame.expanding(min_periods=).mean()
27 | # ##### 14. DataFrame.cov()
28 | # ##### 15. DataFrame.cor()
29 | # ##### 16. DataFrame.kur()
30 | # ##### 17. DataFrame.skew()
31 |
32 | # In[619]:
33 |
34 |
35 | # Loading and viewing data
36 |
37 | # We have stored an 'infy.csv' file on our desktop
38 |
39 | import numpy as np
40 | import pandas as pd
41 |
42 | infy = pd.read_csv ('C:/Users/academy/Desktop/infy.csv')
43 |
44 |
45 | # Once you import or load your OHLC data in a data frame, it is a good habit to print the the 'head' and 'tail' of that data frame.
46 | #
47 | # This helps you to be sure, whether the 'dates' of your data frame, is correct or not. Further, the 'column names' are also displayed, which helps you in easy manipulation of your data frame.
48 | #
49 |
50 | # In[620]:
51 |
52 |
53 | infy.head() # Printing the first five rows of your data frame
54 |
55 |
56 | # In[621]:
57 |
58 |
59 | infy.tail() # Printing the last five rows of your data frame
60 |
61 |
62 | # ### DataFrame.count()
63 | #
64 | # This method returns the number of non-null observations over the requested observations.
65 |
66 | # In[622]:
67 |
68 |
69 | print (infy.count())
70 |
71 |
72 | # If you want to know, the number of non-null observations in a particular column then below is how you do it.
73 |
74 | # In[623]:
75 |
76 |
77 | print (infy["Close Price"].count())
78 |
79 |
80 | # ### DataFrame.min()
81 | #
82 | # This method returns the minimum value over the requested observations.
83 |
84 | # In[624]:
85 |
86 |
87 | print(infy["Close Price"].min())
88 |
89 |
90 | # ### DataFrame.max()
91 | #
92 | # This method returns the maximum value over the requested observations.
93 |
94 | # In[625]:
95 |
96 |
97 | print(infy["Close Price"].max())
98 |
99 |
100 | # ### DataFrame.mean()
101 | #
102 | # This method returns the mean of the requested observations.
103 |
104 | # In[626]:
105 |
106 |
107 | print(infy["Close Price"].mean())
108 |
109 |
110 | # ### DataFrame.median()
111 | #
112 | # This method returns the median of the requested observations.
113 |
114 | # In[627]:
115 |
116 |
117 | print(infy["Close Price"].median())
118 |
119 |
120 | # ### DataFrame.mode()
121 | #
122 | # This method returns the mode of the requested observations.
123 |
124 | # In[628]:
125 |
126 |
127 | print(infy["Close Price"].mode()) # The "Close Price" series of infosys stock is multi-modal
128 |
129 |
130 | # ### DataFrame.sum()
131 | #
132 | # This method returns the sum of all the values of the requested observations.
133 |
134 | # In[629]:
135 |
136 |
137 | print (infy["Total Traded Quantity"].sum())# If someone just wants to know the sheer amount of Infosys stocks traded over 2 years
138 |
139 |
140 | # ### DataFrame.diff()
141 | #
142 | # This method returns the 'difference' between the current observation and the previous observation.
143 |
144 | # In[630]:
145 |
146 |
147 | print (infy["Close Price"].diff())
148 |
149 |
150 | # ### DataFrame.pct_change()]
151 | #
152 | # This method returns the percentage change of the current observation with the previous observation.
153 |
154 | # In[631]:
155 |
156 |
157 | print (infy["Close Price"].pct_change())
158 |
159 |
160 | # Visualising this, give us a generic inference about the daily price fluctuation in the closing price of Infosys stock.
161 |
162 | # In[632]:
163 |
164 |
165 | import matplotlib.pyplot as plt
166 | get_ipython().magic(u'matplotlib inline')
167 |
168 | plt.figure(figsize = (20,10))
169 | plt.ylabel('Daily returns of Infosys')
170 | infy["Close Price"].pct_change().plot()
171 | plt.show()
172 |
173 |
174 | # ### DataFrame.var()
175 | #
176 | # This method returns of the variance of the requested observations.
177 |
178 | # In[633]:
179 |
180 |
181 | print (infy["Close Price"].var())
182 |
183 |
184 | # ### DataFrame.std()
185 | #
186 | # This method returns the standard deviation of the requested observations.
187 |
188 | # In[634]:
189 |
190 |
191 | print (infy["Close Price"].std())
192 |
193 |
194 | # ### DataFrame.rolling(window=).mean()
195 | #
196 | # This method helps us to calculate the moving average of the observations.
197 |
198 | # In[635]:
199 |
200 |
201 | print (infy["Close Price"].rolling(window = 20).mean()) # The moving average window is 20 in this case
202 |
203 |
204 | # A moving average of the Close price with window = 20, smoothens the closing price data. You may have a look at it. We have plotted the daily Closing Price of Infosys and Moving Average (window = 20) of the daily Closing Price of Infosys against time.
205 |
206 | # In[636]:
207 |
208 |
209 | import matplotlib.pyplot as plt
210 | get_ipython().magic(u'matplotlib inline')
211 |
212 | plt.figure(figsize = (20,10))
213 | plt.ylabel('Closing Price')
214 |
215 | infy["Close Price"].rolling(window = 20).mean().plot()
216 | infy["Close Price"].plot()
217 | plt.show()
218 |
219 |
220 | # ### DataFrame.expanding(min_periods=).mean()
221 | #
222 | # This method returns the 'expanding' mean of the requested observations.
223 | #
224 | # A common alternative to rolling mean is to use an expanding window mean, which returns the value of the mean with all the observations avaliable up to that point in time.
225 |
226 | # In[637]:
227 |
228 |
229 | print (infy["Close Price"].expanding(min_periods = 20).mean())
230 |
231 |
232 | # You may visualise expanding mean with the below code.
233 |
234 | # In[638]:
235 |
236 |
237 | import matplotlib.pyplot as plt
238 | get_ipython().magic(u'matplotlib inline')
239 |
240 | plt.figure(figsize = (20,10))
241 | plt.ylabel('Daily returns of Infosys')
242 |
243 | infy["Close Price"].expanding(min_periods = 20).mean().plot()
244 | infy["Close Price"].plot()
245 | plt.show()
246 |
247 |
248 | # Let us import another stock's data. We have saved the TCS (Tata Consultancy Services) data in our local machine as 'tcs.csv'.
249 |
250 | # In[639]:
251 |
252 |
253 | import numpy as np
254 | import pandas as pd
255 |
256 | tcs = pd.read_csv ('C:/Users/academy/Desktop/tcs.csv')
257 |
258 | tcs.head()
259 |
260 |
261 | # In[640]:
262 |
263 |
264 | tcs.tail()
265 |
266 |
267 | # In[641]:
268 |
269 |
270 | tcs["Close Price"].count()
271 |
272 |
273 | # ### DataFrame.cov()
274 | #
275 | # This method returns the covariance between the closing price of the Infosys stock with the closing price of the TCS stock.
276 |
277 | # In[642]:
278 |
279 |
280 | print (infy["Close Price"].cov(tcs["Close Price"]))
281 |
282 |
283 | # ### DataFrame.corr()
284 | #
285 | # This method returns the correlation between the closing price of the infosys stock with the closing price of the TCS stock.
286 |
287 | # In[643]:
288 |
289 |
290 | print (infy["Close Price"].corr(tcs["Close Price"]))
291 |
292 |
293 | # A correlation of 0.53 indicates a quite strong correlation between these two stocks.
294 |
295 | # ### DataFrame.kurt()
296 | #
297 | # This method returns unbiased kurtosis over the requested data set using the Fisher's definition of kurtosis (where kurtosis of normal distribution = 0).
298 |
299 | # In[644]:
300 |
301 |
302 | print (tcs["Close Price"].kurt())
303 |
304 |
305 | # A positive kurtosis value indicates a leptokurtic distribution.
306 |
307 | # In[645]:
308 |
309 |
310 | print (infy["Close Price"].kurt())
311 |
312 |
313 | # A negative kurtosis value indicates a platykurtic distribution.
314 |
315 | # ### DataFrame.skew()
316 | #
317 | # This method unbiased skew over the requested data set.
318 |
319 | # In[646]:
320 |
321 |
322 | print (tcs["Close Price"].skew())
323 |
324 |
325 | # The distribution is positively skewed.
326 |
327 | # In[647]:
328 |
329 |
330 | print (infy["Close Price"].skew())
331 |
332 |
333 | # The distribution is positively skewed. However, TCS' distribution is more positively skewed than Infosys' distribution.
334 |
335 | # Let us have visualise both the distributions and see whether the above said sentences are making sense or not.
336 |
337 | # In[648]:
338 |
339 |
340 | # Infosys Distribution
341 |
342 | import seaborn as sns
343 |
344 | sns.set(color_codes = True)
345 |
346 | sns.distplot(infy["Close Price"]);
347 |
348 |
349 | # In th above diagram, you can see why the infosys close price distribution is platykurtic and positively skewed.
350 |
351 | # In[649]:
352 |
353 |
354 | # TCS Distribution
355 |
356 | import seaborn as sns
357 |
358 | sns.set(color_codes = True)
359 |
360 | sns.distplot(tcs["Close Price"]);
361 |
362 |
363 | # In th above diagram, you can see why the TCS close price distribution is leptokurtic and positively skewed.
364 | #
365 | # A trained eye is statistics will also be able to see that the TCS stock closing prices are more positively skewed than the Infosys stock closing prices.
366 | #
367 |
368 | # ### In the upcoming iPython Notebook:
369 | #
370 | # We will continue understanding about Pandas: Grouping and Reshaping.
371 |
--------------------------------------------------------------------------------
/Section-8/Pandas - Indexing, Reindexing & Missing values .py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ## Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # In this notebook, we will have a look at the different descripive statistical functions available in Python.
11 | #
12 | # ## Notebook Contents
13 | #
14 | # ##### 1. Indexing using .loc()
15 | # ##### 2. Indexing using .iloc()
16 | # ##### 3. Indexing using .ix()
17 | # ##### 4. Missing Values
18 | # ##### 5. Data Frame.isnull()
19 | # ##### 6. Data Frame.notnull()
20 | # ##### 7. DataFrame.fillna()
21 | # ##### 8. DataFrame.dropna()
22 | # ##### 9. Replacing values
23 | # ##### 10. Reindexing
24 |
25 | # # Loading and Viewing Data
26 |
27 | # Before we start, let us import OHLC time series data of Infosys stock for only 'two weeks'. With a smaller data frame, understanding 'Indexing' would be more intutive.
28 |
29 | # In[31]:
30 |
31 |
32 | # Loading and Viewing data
33 |
34 | import numpy as np
35 | import pandas as pd
36 |
37 | infy = pd.read_csv ('infy_twoweeks.csv')
38 |
39 |
40 | # In[32]:
41 |
42 |
43 | infy # This is the entire 'Infosys two weeks' time series data frame.
44 |
45 |
46 | # In[33]:
47 |
48 |
49 | infy.shape # This data frame has 10 rows and 12 columns
50 |
51 |
52 | # ## Indexing
53 | #
54 | # Indexing provides us with the axis labelling information in pandas. Further, it helps us to identify the exact position of data, which is important while analysing data.
55 | #
56 | # While sudying indexing, we will also focus on how to slice and dice the data according to our needs in a Data Frame.
57 |
58 | # ## Indexing using .loc()
59 | #
60 | # It is a 'label-location' based indexer for selection of data points.
61 |
62 | # In[34]:
63 |
64 |
65 | # Using .loc()
66 |
67 | #import the pandas library and aliasing as pd
68 |
69 | import pandas as pd
70 | import numpy as np
71 |
72 | #select all rows for a specific column
73 |
74 | print (infy.loc[:,'Close Price'])
75 |
76 |
77 | # In[35]:
78 |
79 |
80 | # Select all the rows of these specific columns
81 |
82 | print (infy.loc[:, ['Close Price','Open Price']])
83 |
84 |
85 | # In[36]:
86 |
87 |
88 | # Select the first five rows of the specific columns
89 |
90 | # Remember that the '.loc()' method INCLUDES the rows and columns in its stop argument.
91 |
92 | # Observe that '0:4' will include 5 rows from index 0 to 4
93 |
94 | # The loc indexer takes the row arguments first and the column arguments second.
95 |
96 | print (infy.loc[:4,['Close Price','Open Price']])
97 |
98 |
99 | # In[37]:
100 |
101 |
102 | # Select the rows 2 to 7 of all the columns from the data frame
103 |
104 | print (infy.loc[2:7])
105 |
106 |
107 | # In[38]:
108 |
109 |
110 | # Select the rows and columns specified
111 |
112 | print (infy.loc[[0,1,2,3,4,5],['Open Price', 'High Price', 'Low Price', 'Close Price']])
113 |
114 |
115 | # In[39]:
116 |
117 |
118 | # To check if the fifth row's values are greater than 1130.
119 |
120 | print (infy.loc[4]>1130)
121 |
122 |
123 | # ## Indexing using .iloc()
124 | #
125 | # Another way to perform indexing is using the 'iloc()' method.
126 |
127 | # In[40]:
128 |
129 |
130 | # Using .iloc()
131 |
132 | # Select the first four rows of all the columns
133 |
134 | # Remember that the '.loc()' method DOES NOT include the rows and columns in its stop argument
135 |
136 | # Observe that '0:4' will include 4 rows from index 0 to 3
137 |
138 | print (infy.iloc[:4])
139 |
140 |
141 | # In[41]:
142 |
143 |
144 | # Let us play more with the indexes of both rows and columns
145 |
146 | # Select the rows from index 1 to index 4 (4 rows in total) and Columns with index from 2 to 3 (2 columns)
147 |
148 | # .iloc() is similar to numpy array indexing
149 |
150 | # iloc is extremely useful when your data is not labelled and you need to refer to columns using their integer location instead
151 |
152 | print (infy.iloc[1:5, 2:4])
153 |
154 |
155 | # In[42]:
156 |
157 |
158 | # Selecting the exact requested columns
159 |
160 | print (infy.iloc[[1, 3, 5,7], [1, 3, 5, 7, 9]])
161 |
162 |
163 | # In[43]:
164 |
165 |
166 | # Selecting the first two rows and all the columns
167 |
168 | print (infy.iloc[1:3, :])
169 |
170 |
171 | # In[44]:
172 |
173 |
174 | print (infy.iloc[:,1:3])
175 |
176 |
177 | # ## Indexing using .ix()
178 | #
179 | # Another way to perform indexing is using the 'ix()' method.
180 | #
181 | # ##### ix indexer has been depricated in the latest version of pandas, but we having discussed it just for your information
182 | #
183 |
184 | # In[45]:
185 |
186 |
187 | # Using .ix()
188 |
189 | # Remember that the '.ix()' method INCLUDES the rows and columns in its stop argument.
190 |
191 | # Observe that '0:4' will include 5 rows from index 0 to 4
192 |
193 | # We are selecting the first five rows and all the columns of our data frame
194 |
195 | print (infy.ix[:4])
196 |
197 |
198 | # In[46]:
199 |
200 |
201 | # Select rows from index 2 to index 5, only of the 'Close Price' Column
202 |
203 | print (infy.ix[2:5,'Close Price'])
204 |
205 |
206 | # In[47]:
207 |
208 |
209 | # You will be able to understand this by now!
210 |
211 | print (infy.ix[2:5, 4:9])
212 |
213 |
214 | # In[48]:
215 |
216 |
217 | # Just some revision for choosing columns in a data frame, since it is important
218 |
219 | # Choosing a specific column from a data frame
220 |
221 | print (infy['Close Price'])
222 |
223 |
224 | # In[49]:
225 |
226 |
227 | # Choosing multiple columns from a data Frame
228 |
229 | print (infy[['Open Price', 'High Price', 'Low Price', 'Close Price']])
230 |
231 |
232 | # ## Missing Values
233 | #
234 | # Missing values are values that are absent from the data frame. Usually, all the data frames that you would work on, would be large and there will be a case of 'missing values' in most of them.
235 | #
236 | # Hence, it becomes important for you to learn how to handle these missing values.
237 |
238 | # In[51]:
239 |
240 |
241 | # We have deliberately created 'missing values' in the same 'Infosys two weeks' data which you have used above.
242 |
243 | # Have a look at the entire data frame
244 |
245 | import numpy as np
246 | import pandas as pd
247 |
248 | infy = pd.read_csv ('infy_twoweeks_nan.csv')
249 |
250 | infy
251 |
252 |
253 | # ## DataFrame.isnull()
254 | #
255 | # This method returns a Boolean result.
256 | #
257 | # It will return 'True' if the data point has a 'NaN' (Not a Number) value. Missing data is represented by a NaN value.
258 |
259 | # In[52]:
260 |
261 |
262 | # Understanding the 'NaN' values of the 'Close Price' column in the infy data frame
263 |
264 | print (infy['Close Price'].isnull())
265 |
266 |
267 | # In[53]:
268 |
269 |
270 | # Understanding the 'NaN' values of the entire data frame
271 |
272 | print (infy.isnull())
273 |
274 |
275 | # ## DataFrame.notnull()
276 | #
277 | # This method returns a Boolean result.
278 | #
279 | # It will return 'Flase' if the data point is not a 'NaN' (Not a Number) value. Missing data is represented by a NaN value.
280 |
281 | # In[54]:
282 |
283 |
284 | print (infy['Close Price'].notnull())
285 |
286 |
287 | # ## DataFrame.fillna()
288 | #
289 | # The .fillna() method will fill all the 'NaN' Values of the entire data frame or of the requested columns with a scalar value of your choice.
290 |
291 | # In[55]:
292 |
293 |
294 | # Replace NaN with a Scalar Value of 1000
295 |
296 | print (infy.fillna(1000))
297 |
298 |
299 | # In[56]:
300 |
301 |
302 | # This will fill the 'Close Price' column with the scalar value of 5
303 |
304 | print (infy['Close Price'].fillna(5))
305 |
306 |
307 | # In[57]:
308 |
309 |
310 | # If we want to do 'fillna()' using the 'backfill' method, then backfill will the take the value from the next row
311 | # and fill the NaN value with that same value
312 |
313 | print (infy['Close Price'])
314 |
315 | print (infy['Close Price'].fillna(method='backfill'))
316 |
317 |
318 | # In[58]:
319 |
320 |
321 | # It is even possible to do it for the entire data frame with the 'backfill' values
322 |
323 | print (infy.fillna(method='backfill'))
324 |
325 |
326 | # In[59]:
327 |
328 |
329 | # 'bfill' does the same thing as 'backfill'
330 |
331 | print (infy['Close Price'])
332 |
333 | print (infy['Close Price'].fillna(method='bfill'))
334 |
335 |
336 | # In[60]:
337 |
338 |
339 | # If we want to do 'fillna()' using the 'ffill' method, then ffill will the take the value from the previous row..
340 | # ..and fill the NaN value with that same value
341 |
342 | print (infy['Close Price'])
343 |
344 | print (infy['Close Price'].fillna(method='ffill'))
345 |
346 |
347 | # In[61]:
348 |
349 |
350 | # 'pad' does the same thing as 'ffill'
351 |
352 | print (infy['Close Price'])
353 |
354 | print (infy['Close Price'].fillna(method='pad'))
355 |
356 |
357 | # ## DataFrame.dropna()
358 | #
359 | # This method will drop the entire 'row' or 'column' which has even a single 'NaN' value present, as per the request.
360 |
361 | # In[62]:
362 |
363 |
364 | # By default, dropna() will exclude or drop all the rows which has even one NaN value in it
365 |
366 | print (infy.dropna())
367 |
368 |
369 | # In[63]:
370 |
371 |
372 | # If we specify the axis = 1, it will exclude or drop all the columns which has even one NaN value in it
373 |
374 | print (infy.dropna(axis=1))
375 |
376 |
377 | # ## Replacing values
378 | #
379 | # Replacing helps us to select any data point in the entire data frame and replace it with the value of our choice.
380 |
381 | # In[64]:
382 |
383 |
384 | # Replace Missing (or) Generic Values
385 |
386 | import pandas as pd
387 | import numpy as np
388 |
389 | # Let us do this a bit differently. We will create a Data Frame using the 'pd.DataFrame' constructor
390 |
391 | df = pd.DataFrame({'one':[10,20,30,40,50,2000],'two':[1000,0,30,40,50,60]})
392 |
393 | print (df)
394 |
395 |
396 | # In[65]:
397 |
398 |
399 | # .replace() will first find the value which you want to replace and replace it the value you have given.
400 |
401 | # eg: In the below '1000' is the value it will find and replace it with '10'
402 |
403 | print (df.replace({1000:10,2000:60}))
404 |
405 |
406 | # In[66]:
407 |
408 |
409 | print (infy['Close Price'])
410 |
411 |
412 | # In[67]:
413 |
414 |
415 | # This should be self explanatory
416 |
417 | print (infy['Close Price'].replace({1147.55:3000}))
418 |
419 |
420 | # In[68]:
421 |
422 |
423 | print (infy['Close Price'].replace({NaN:1000000})) # We cannot replace NaN values, since they are not defined.
424 |
425 |
426 | # ## Reindexing
427 | #
428 | # Reindexing changes the row labels and column labels of a DataFrame.
429 | #
430 | # To reindex means to conform the data to match a given set of labels along a particular axis.
431 |
432 | # In[69]:
433 |
434 |
435 | import pandas as pd
436 | import numpy as np
437 |
438 | print (infy)
439 |
440 |
441 | # In[70]:
442 |
443 |
444 | # Here we have changed the shape of data frame by using reindexing
445 |
446 | infy_reindexed = infy.reindex(index = [0,2,4,6,8], columns = ['Open Price', 'High Price', 'Low Price','Close Price'])
447 |
448 | print (infy_reindexed)
449 |
450 |
--------------------------------------------------------------------------------
/Section-7/Numpy Introduction to arrays.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ### Notebook Instructions
5 | #
6 | # You can run the notebook document sequentially (one cell a time) by pressing shift + enter . While a cell is running, In [*] will display on the left. When it has been run, a number will display indicating the order in which it was run in the notebook. Example: In [8]:
7 | #
8 | # Enter edit mode by pressing Enter or using the mouse to click on a cell's editor area. Edit mode is indicated by a green cell border and a prompt showing in the editor area.
9 |
10 | # # NumPy
11 | #
12 | # NumPy is an acronym for "Numeric Python" or "Numerical Python".
13 | #
14 | # NumPy is the fundamental package for scientific computing with Python. It is an open source extension module for Python.
15 | #
16 | # 1. A powerful N-dimensional array object
17 | # 2. Sophisticated (broadcasting) functions
18 | # 3. Useful linear algebra, Fourier transform, and random number capabilities
19 | # 4. Besides its obvious scientific uses, NumPy can also be used as an efficient multi-dimensional container of generic data
20 | # 5. Arbitrary data-types can be defined. This allows NumPy to seamlessly and speedily integrate with a wide variety of database
21 | #
22 | # Source : numpy.org
23 |
24 | # # Notebook Contents
25 | #
26 | # ##### 1. A simple numpy array example
27 | # ##### 2. Functions to create an array
28 | # ##### 3. Dimensionality of an array
29 | # ##### 4. Shape of an array
30 | # ##### 5. Just for fun
31 |
32 | # ## A simple numpy array example
33 | #
34 | # We will create two arrays SV and S_V
35 | # - Using Lists
36 | # - Using Tuples
37 |
38 | # In[4]:
39 |
40 |
41 | # We will first import the 'numpy' module
42 |
43 | import numpy as np
44 |
45 |
46 | # In[5]:
47 |
48 |
49 | stock_values = [20.3, 25.3, 22.7, 19.0, 18.5, 21.2, 24.5, 26.6, 23.2, 21.2] # This is a list
50 |
51 |
52 | # In[6]:
53 |
54 |
55 | # Converting list into array
56 |
57 | SV = np.array(stock_values)
58 |
59 | print (SV)
60 |
61 |
62 | # In[7]:
63 |
64 |
65 | type (SV) # Understanding the data type of 'SV'
66 |
67 |
68 | # In[8]:
69 |
70 |
71 | stockvalues = (20.3, 25.3, 22.7, 19.0, 18.5, 21.2, 24.5, 26.6, 23.2, 21.2) # This is a tuple
72 |
73 | # Converting tuple into array
74 |
75 | S_V = np.array(stockvalues)
76 |
77 | print (S_V)
78 |
79 |
80 | # In[9]:
81 |
82 |
83 | type(S_V) # Understanding the data type of 'S_V'
84 |
85 |
86 | # ## Functions to create arrays quickly
87 | #
88 | # The above discussed methods to create arrays require us to manually input the data points. To automatically create data points for an array we use these functions:
89 | # - **arange**
90 | # - **linspace**
91 | #
92 | # Both these functions create data points lying between two end points, starting and ending, so that they are evenly distributed. For example, we can create 50 data points lying between 1 and 10.
93 | #
94 |
95 | # ### arange
96 | #
97 | # Numpy.arange returns evenly spaced arrays by using a 'given' step or interval by the user.
98 |
99 | # Syntax:
100 | # #### arange ([start], [stop], [step], [dtype=None])
101 | #
102 | # The 'start and the 'stop' determines the range of the array. 'Step' determines the spacing between two adjacent values. The datatype of the output array can be determined by setting the parameter 'dtype'.
103 |
104 | # In[10]:
105 |
106 |
107 | # If the start parameter is not given, it will be set to 0
108 |
109 | # '10' is the stop parameter
110 |
111 | # The default interval for a step is '1'
112 |
113 | # If the 'dtype' is not given, then it will be automatically inferred from the other input arguments
114 |
115 | a = np.arange (10) # Syntax a = np.arange (0,10,1,None)
116 | print (a)
117 |
118 |
119 | # In[11]:
120 |
121 |
122 | # Here the range is '1 to 15'. It will include 1 and exclude 15
123 |
124 | b = np.arange (1,15)
125 | print (b)
126 |
127 |
128 | # In[12]:
129 |
130 |
131 | # We have changed the 'step' or spacing between two adjacent values, from a default 1, to a user given value of 2
132 |
133 | c = np.arange (0,21,2)
134 | print (c)
135 |
136 |
137 | # In[13]:
138 |
139 |
140 | # Even though our input arguments are of the datatype 'float', it will return an 'int' array
141 | # Since we have set the 'dtype' parameter as 'int'
142 |
143 | d = np.arange (1.3,23.3,2.1,int)
144 | print (d)
145 |
146 |
147 | # #### Try on your own
148 |
149 | # In[14]:
150 |
151 |
152 | # You may now be able to understand this example, all by yourself
153 |
154 | e = np.arange (1.4, 23.6, 1, float)
155 | print (e)
156 |
157 |
158 | # ### linspace
159 |
160 | # Numpy.linspace also returns an evenly spaced array but needs the 'number of array elements' as an input from the user and creates the distance automatically.
161 |
162 | # Syntax:
163 | # #### linspace(start, stop, num=50, endpoint=True, retstep=False)
164 | #
165 | # The 'start and the 'stop' determines the range of the array. 'num' determines the number of elements in the array. If the 'endpoint' is True, it will include the stop value and if it is false, the array will exclude the stop value.
166 | #
167 | # If the optional parameter 'retstep' is set, the function will return the value of the spacing between adjacent values.
168 |
169 | # In[15]:
170 |
171 |
172 | # By default, since the 'num' is not given, it will divide the range into 50 individual array elements
173 |
174 | # By default, it even includes the 'endpoint' of the range, since it is set to True by default
175 |
176 | a = np.linspace (1,10)
177 | print (a)
178 |
179 |
180 | # In[16]:
181 |
182 |
183 | # This time around, we have specified that we want the range of 1 - 10 to be divided into 8 individual array elements
184 |
185 | b = np.linspace (1,10,8)
186 | print (b)
187 |
188 |
189 | # In[17]:
190 |
191 |
192 | # In this line, we have specified not to include the end point of the range
193 |
194 | c = np.linspace (1,10,8,False)
195 | print (c)
196 |
197 |
198 | # In[18]:
199 |
200 |
201 | # In this line, we have specified 'retstep' as true, the function will return the value of the spacing between adjacent values
202 |
203 | d = np.linspace (1,10,8,True,True)
204 | print (d)
205 |
206 |
207 | # #### Try on your own
208 |
209 | # In[19]:
210 |
211 |
212 | # This line should be self-explanatory
213 |
214 | e = np.linspace(1,10,10,True,True)
215 | print (e)
216 |
217 |
218 | # ## Dimensionality of Arrays
219 |
220 | # ### Zero Dimensional Arrays or Scalars
221 |
222 | # What we encountered in the above examples are all 'one dimensional arrays', also known as 'vectors'. "Scalars' are zero-dimensional arrays, with a maximum of one element in it.
223 |
224 | # In[20]:
225 |
226 |
227 | # Creating a 'scalar'
228 |
229 | a = np.array (50) #Should have only 1 element, at the maximum!
230 |
231 | print ("a:", a)
232 |
233 |
234 | # In[21]:
235 |
236 |
237 | # To print the dimension of any array, we use 'np.dim' method
238 |
239 | print ("The dimension of array 'a' is", np.ndim (a))
240 |
241 |
242 | # In[22]:
243 |
244 |
245 | # To know the datatype of the array
246 |
247 | print ("The datatype of array 'a' is", a.dtype)
248 |
249 |
250 | # In[23]:
251 |
252 |
253 | # Combining it all together
254 |
255 | scalar_array = np.array("one_element")
256 | print (scalar_array, np.ndim (scalar_array), scalar_array.dtype)
257 |
258 |
259 | # ## One Dimensional Arrays
260 | #
261 | # One dimensional arrays, are arrays with minimum of two elements in it in a single row.
262 |
263 | # In[43]:
264 |
265 |
266 | one_d_array = np.array(["one_element", "second_element"])
267 |
268 | print (one_d_array, np.ndim(one_d_array), one_d_array.dtype)
269 |
270 |
271 | # In[44]:
272 |
273 |
274 | # We have already worked with one-dimensional arrays. Let us revise what we did so far!
275 |
276 | a = np.array([1, 1, 2, 3, 5, 8, 13, 21]) # Fibonnacci series
277 | b = np.array([4.4, 6.6, 8.8, 10.1, 12.12])
278 |
279 | print("a: ", a)
280 | print("b: ", b)
281 |
282 | print("Type of 'a': ", a.dtype)
283 | print("Type of 'b': ", b.dtype)
284 |
285 | print("Dimension of 'a':", np.ndim(a))
286 | print("Dimension of 'b':", np.ndim(b))
287 |
288 |
289 | # ## Two Dimensional
290 | #
291 | # Two-dimensional arrays have more than one row and more than one column.
292 |
293 | # In[45]:
294 |
295 |
296 | # The elements of the 2D arrays are stored as 'rows' and 'columns'
297 |
298 | two_d_array = np.array([ ["row1col1", "row1col2", "row1col3"],
299 | ["row2col1", "row2col2", "row2col3"]])
300 |
301 | print(two_d_array)
302 |
303 | print("Dimension of 'two_d_array' :", np.ndim (two_d_array))
304 |
305 |
306 | # In[46]:
307 |
308 |
309 | # Another example of a data table!
310 | # You can see how working with numpy arrays will help us working with dataframes further on!
311 |
312 | studentdata = np.array([ ["Name", "Year", "Marks"],
313 | ["Bela", 2014, 78.2],
314 | ["Joe", 1987, 59.1],
315 | ["Sugar", 1990, 70]])
316 |
317 | print(studentdata)
318 |
319 | print("Dimension of 'studentdata' :", np.ndim (studentdata))
320 |
321 |
322 | # Even though Year and Marks are non-strings, they are by default ... so I can't perform any operations on these values.
323 | #
324 | # That is where dataframe, which we will study in next section, becomes powerful 2-d data structures to be used.
325 | #
326 | # For example:
327 |
328 | # In[41]:
329 |
330 |
331 | # Example when we save this data as a dataframe and not as a numpy array.
332 |
333 | import numpy as np
334 | import pandas as pd
335 |
336 | studentdata1 = {
337 | "Name": ["Bela", "Joe", "Sugar"],
338 | "Year": [2014, 1987, 1990],
339 | "Marks": [78.2, 59.1, 70]
340 | }
341 |
342 | studentdata1_df = pd.DataFrame (studentdata1)
343 | print (studentdata1_df)
344 | print(np.mean(studentdata1_df.Marks))
345 |
346 | # Now we are able to find average of Marks of these three students.
347 |
348 |
349 | # In[29]:
350 |
351 |
352 | # The elements of the 2D arrays are stored as 'rows' and 'columns'
353 |
354 | a = np.array([ [1.8, 2.4, 5.3, 8.2],
355 | [7.8, 5.1, 9.2, 17.13],
356 | [6.1, -2.13, -6.3, -9.1]])
357 | print(a)
358 | print("Dimension of 'a' :", np.ndim (a))
359 |
360 | # In this array we have 3 rows and 4 columns
361 |
362 |
363 | # In[30]:
364 |
365 |
366 | # A 3D array is an 'array of arrays'. Have a quick look at it
367 |
368 | b = np.array([ [[111, 222], [333, 444]],
369 | [[121, 212], [221, 222]],
370 | [[555, 560], [565, 570]] ])
371 |
372 | print(b)
373 | print("Dimension of 'b' :", np.ndim (b))
374 |
375 | # In this array, there are three, 2-D arrays
376 |
377 |
378 | # ## Shape of an array
379 | #
380 | # **What it is:** Ths shape of an array returns the number of rows (axis = 0) and the number of columns (axis = 1)
381 | #
382 | # **Why is it important to understand:** It helps you to understand the number of rows and columns in an array
383 | #
384 | # **How is it different from Dimensions:** It is not that different from dimensions, just that functions called are different.
385 |
386 | # In[31]:
387 |
388 |
389 | a = np.array([ [11, 22, 33],
390 | [12, 24, 36],
391 | [13, 26, 39],
392 | [14, 28, 42],
393 | [15, 30, 45],
394 | [16, 32, 48]])
395 |
396 | print (a)
397 |
398 |
399 | # In[32]:
400 |
401 |
402 | print(a.shape)
403 |
404 |
405 | # We can even change the shape of the array.
406 |
407 | # In[33]:
408 |
409 |
410 | a.shape = (9,2)
411 | print (a)
412 |
413 |
414 | # You might have guessed by now that the new shape must correspond to the number of elements of the array, i.e. the total size of the new array must be the same as the old one. We will raise an exception, if this is not the case.
415 |
416 | # In[37]:
417 |
418 |
419 | # Shape of a 1 dimension array or scalar
420 |
421 | a = np.array(165416113)
422 | print(np.shape(a))
423 |
424 |
425 | # ### In the upcoming iPython Notebook:
426 | #
427 | # We will continue understanding arrays and learning about Array indexing, Array Slicing and Arrays of Zeros and Ones, but before that let us solve some Quiz quesions and Exercises.
428 |
--------------------------------------------------------------------------------