├── .gitattributes ├── 9781484241080.jpg ├── Ch01 ├── Embarak _Ch01_Introduction- String Processing.pdf ├── Embarak _Ch01_Introduction- String Processing.py ├── Embarak _Ch01_Introduction_ Part 1.pdf ├── Embarak _Ch01_Introduction_ Part 1.py ├── Embarak _Ch01_Introduction_ Part 2.pdf ├── Embarak _Ch01_Introduction_ Part 2.py ├── Embarak _Ch01_Introduction_Functions and Modules.pdf └── Embarak _Ch01_Introduction_Functions and Modules.py ├── Ch02 ├── Embarak _Ch02_The importance of data visualization in business.pdf └── Embarak _Ch02_The importance of data visualization in business.py ├── Ch03 ├── Embarak _Ch03_Data Collections Structure .pdf └── Embarak _Ch03_Data Collections Structure .py ├── Ch04 ├── Embarak _Ch04_File IO Processing _ Regular Expressions .pdf ├── Embarak _Ch04_File IO Processing _ Regular Expressions .py ├── MailsData.txt └── Wild-Card.txt ├── Ch05 ├── 1. Export1_Columns.csv ├── 1. Export1_Columns.xlsx ├── 1. Export2_Columns.csv ├── 1. Export2_Columns.xlsx ├── Embarak _Ch05_Data Gathering and Cleaning.pdf ├── Embarak _Ch05_Data Gathering and Cleaning.py ├── Import_1.xlsx ├── Import_2.xlsx ├── Sales.csv └── Sales.xlsx ├── Ch06 ├── Embarak _Ch06_Data Exploring and Analysis.pdf └── Embarak _Ch06_Data Exploring and Analysis.py ├── Ch07 ├── Embarak _Ch07_Data Visualization.pdf ├── Embarak _Ch07_Data Visualization.py └── Salaries.csv ├── Ch08 ├── Death data.csv ├── Embarak Ch08 NCHS Case Study.pdf ├── Embarak Ch08 NCHS Case Study.py └── NCHS.csv ├── Contributing.md ├── LICENSE.txt ├── README.md └── errata.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /9781484241080.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/9781484241080.jpg -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction- String Processing.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch01/Embarak _Ch01_Introduction- String Processing.pdf -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction- String Processing.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[3]: 5 | 6 | 7 | var1 = 'Welcome to Dubai' 8 | var2 = "Python Programming" 9 | 10 | print ("var1[0]:", var1[0]) 11 | print ("var2[1:5]:", var2[1:5]) 12 | 13 | 14 | # In[5]: 15 | 16 | 17 | st1="Hello" 18 | st2=' World' 19 | fullst=st1 + st2 20 | print (fullst) 21 | 22 | 23 | # In[11]: 24 | 25 | 26 | # looking inside strings 27 | fruit = 'banana' 28 | letter= fruit[1] 29 | print (letter) 30 | 31 | index=3 32 | w = fruit[index-1] 33 | 34 | print (w) 35 | print (len(fruit)) 36 | 37 | 38 | # In[14]: 39 | 40 | 41 | # Convert string to int 42 | str3 = '123' 43 | str3= int (str3)+1 44 | print (str3) 45 | 46 | 47 | # In[15]: 48 | 49 | 50 | # Read and convert data 51 | name=input('Enter your name: ') 52 | age=input('Enter your age: ') 53 | age= int(age) + 1 54 | 55 | print ("Name:%s"% name ,"\t Age:%d"% age) 56 | 57 | 58 | # In[30]: 59 | 60 | 61 | # Looking through string 62 | fruit ='banana' 63 | index=0 64 | while index< len(fruit): 65 | letter = fruit [index] 66 | print (index, letter ) 67 | index=index+1 68 | 69 | 70 | # In[31]: 71 | 72 | 73 | print ("\n Implementing iteration with continue") 74 | while True: 75 | line = input('Enter your data>') 76 | if line[0]=='#': 77 | continue 78 | if line =='done': 79 | break 80 | print (line ) 81 | print ('End!') 82 | 83 | 84 | # In[32]: 85 | 86 | 87 | print ("\nPrinting in reverse order") 88 | index=len(fruit)-1 89 | while index>=0 : 90 | letter = fruit [index] 91 | print (index, letter ) 92 | index=index-1 93 | 94 | 95 | # In[33]: 96 | 97 | 98 | Country='Egypt' 99 | for letter in Country: 100 | print (letter) 101 | 102 | 103 | # In[2]: 104 | 105 | 106 | # Looking and counting 107 | word='banana' 108 | count=0 109 | for letter in word: 110 | if letter =='a': 111 | count +=1 112 | print ("Number of a in ", word, "is :", count ) 113 | 114 | 115 | # In[3]: 116 | 117 | 118 | # Slicing Strings 119 | s="Welcome to Higher Colleges of Technology" 120 | print (s[0:4]) 121 | print (s[6:7]) 122 | print (s[6:20]) 123 | print (s[:12]) 124 | print (s[2:]) 125 | print (s [:]) 126 | print (s) 127 | 128 | 129 | # In[43]: 130 | 131 | 132 | var1 =' Higher Colleges of Technology ' 133 | var2='College' 134 | var3='g' 135 | 136 | print ( var2 in var1) 137 | print ( var2 not in var1) 138 | 139 | 140 | # In[29]: 141 | 142 | 143 | var1 =' Higher Colleges of Technology ' 144 | var2='College' 145 | var3='g' 146 | 147 | print (var1.upper()) 148 | print (var1.lower()) 149 | print ('WELCOME TO'.lower()) 150 | print (len(var1)) 151 | print (var1.count(var3, 2, 29) ) # find how many g letters in var1 152 | print ( var2.count(var3) ) 153 | 154 | 155 | # In[33]: 156 | 157 | 158 | print (var1.endswith('r')) 159 | print (var1.startswith('O')) 160 | print (var1.find('h', 0, 29)) 161 | 162 | print (var1.lstrip()) # It removes all leading whitespace of a string in var1 163 | print (var1.rstrip()) # It removes all trailing whitespace of a string in var1 164 | print (var1.strip()) 165 | print ('\n') 166 | print (var1.replace('Colleges', 'University')) 167 | 168 | 169 | # In[39]: 170 | 171 | 172 | # Parsing and Extracting strings 173 | Maindata = 'From ossama.embarak@hct.ac.ae Sunday Jan 4 09:30:50 2017' 174 | atpost = Maindata.find('@') 175 | print ("\n<<<<<<<<<<<<<<>>>>>>>>>>>>>") 176 | print (atpost) 177 | print (Maindata[ :atpost]) 178 | data = Maindata[ :atpost] 179 | name=data.split(' ') 180 | print (name) 181 | print (name[1].replace('.', ' ').upper()) 182 | print ("\n<<<<<<<<<<<<<<>>>>>>>>>>>>>") 183 | 184 | 185 | # In[41]: 186 | 187 | 188 | # Another way to split strings 189 | Maindata = 'From ossama.embarak@hct.ac.ae Sunday Jan 4 09:30:50 2017' 190 | name= Maindata[ :atpost].replace('From','').upper() 191 | print (name.replace('.',' ').upper().lstrip()) 192 | print ("\n<<<<<<<<<<<<<<>>>>>>>>>>>>>") 193 | sppos=Maindata.find(' ', atpost) 194 | print (sppos) 195 | print (Maindata[ :sppos]) 196 | host = Maindata [atpost + 1 : sppos ] 197 | print (host) 198 | print ("\n<<<<<<<<<<<<<<>>>>>>>>>>>>>") 199 | 200 | 201 | # # EXERCISES AND ANSWERS 202 | 203 | # In[47]: 204 | 205 | 206 | var1 ='HCT' 207 | index=0 208 | while index< len(var1): 209 | letter = var1[index] 210 | print (letter) 211 | index+=1 212 | 213 | 214 | # In[48]: 215 | 216 | 217 | var1 ='HCT' 218 | index=0 219 | while len(var1)> index: 220 | letter = var1[index] 221 | print (letter) 222 | index+=1 223 | 224 | 225 | # In[54]: 226 | 227 | 228 | strvar1 = 'X-DSPAM-Confidence: 0.8475' 229 | post = strvar1.find(':') 230 | numer=float(strvar1[post+1:]) 231 | print (numer ) 232 | 233 | -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction_ Part 1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch01/Embarak _Ch01_Introduction_ Part 1.pdf -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction_ Part 1.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Chapter 1 Getting Started with Python 5 | 6 | # In[47]: 7 | 8 | 9 | get_ipython().magic(u'pinfo help') 10 | 11 | 12 | # In[4]: 13 | 14 | 15 | age,mark,code=10,75,"CIS2403" 16 | print (age) 17 | print (mark) 18 | print (code) 19 | 20 | 21 | # In[5]: 22 | 23 | 24 | TV=15 25 | Mobile=20 26 | Tablet = 30 27 | 28 | total = TV + 29 | Mobile + 30 | Tablet 31 | print (total) 32 | 33 | 34 | # In[6]: 35 | 36 | 37 | TV=15 38 | Mobile=20 39 | Tablet = 30 40 | 41 | total = TV + Mobile + Tablet 42 | print (total) 43 | 44 | 45 | # In[7]: 46 | 47 | 48 | days = ['Monday', 'Tuesday', 'Wednesday', 49 | 'Thursday', 'Friday'] 50 | print (days) 51 | 52 | 53 | # In[8]: 54 | 55 | 56 | sms1 = 'Hellow World' 57 | sms2 = "Hellow World" 58 | sms3 = """ Hellow World""" 59 | sms4 = """ Hellow 60 | World""" 61 | 62 | print (sms1) 63 | print (sms2) 64 | print (sms3) 65 | print (sms4) 66 | 67 | 68 | # In[9]: 69 | 70 | 71 | TV=15; name="Nour"; print (name); print ("Welcome to\nDubai Festivale 2018") 72 | 73 | 74 | # In[10]: 75 | 76 | 77 | name = input("Enter your name ") 78 | age = int (input("Enter your age")) 79 | 80 | print ("\nName=", name); print ("\nAge=", age) 81 | 82 | 83 | # ### 1.2 Declaring Variable and Assigning Values 84 | 85 | # In[11]: 86 | 87 | 88 | age = 11 89 | name ="Nour" 90 | tall=100.50 91 | 92 | 93 | # In[12]: 94 | 95 | 96 | print (age) 97 | print (name) 98 | print (tall) 99 | 100 | 101 | # In[13]: 102 | 103 | 104 | age= mark = code =25 105 | print (age) 106 | print (mark) 107 | print (code) 108 | 109 | 110 | # In[14]: 111 | 112 | 113 | age,mark,code=10,75,"CIS2403" 114 | print (age) 115 | print (mark) 116 | print (code) 117 | 118 | 119 | # In[16]: 120 | 121 | 122 | # Expressions 123 | x=0.6 124 | x=3.9 * x * (1-x) 125 | print (round(x, 2)) 126 | 127 | 128 | # In[18]: 129 | 130 | 131 | # Python single line comment 132 | 133 | 134 | # In[19]: 135 | 136 | 137 | ''' This 138 | Is 139 | Multipline comment''' 140 | 141 | 142 | # In[20]: 143 | 144 | 145 | print ("pi=%s"%"3.14159") 146 | 147 | 148 | # In[1]: 149 | 150 | 151 | print("The value of %s is = %02f" % ("pi", 3.14159)) 152 | 153 | 154 | # In[21]: 155 | 156 | 157 | print ("Your name is %s, and your height is %.2f while your weight is %.2d" % 158 | ('Ossama', 172.156783, 75.56647)) 159 | 160 | 161 | # In[23]: 162 | 163 | 164 | print ("Hi %(Name)s, your height is %(height).2f" %{'Name':"Ossama", 165 | 'height': 172.156783}) 166 | 167 | 168 | # In[24]: 169 | 170 | 171 | x = "price is" 172 | print ("{1} {0} {2}".format(x, "The", 1920.345)) 173 | 174 | 175 | # In[34]: 176 | 177 | 178 | class A():x=9 179 | w=A() 180 | print ("{0} {1[2]} {2[test]} {3.x}".format("This", ["a", "or", "is"], 181 | {"test": "another"},w)) 182 | print ("{1[1]} {0} {1[2]} {2[test]} {3.x}".format("This", 183 | ["a", "or", "is"], {"test": "another"},w)) 184 | 185 | 186 | # In[42]: 187 | 188 | 189 | import time 190 | localtime = time.asctime(time.localtime(time.time())) 191 | print ("Formatted time :", localtime) 192 | print (time.localtime()) 193 | print (time.time()) 194 | 195 | 196 | # In[45]: 197 | 198 | 199 | import calendar 200 | calendar.prcal(2018) 201 | 202 | 203 | # In[46]: 204 | 205 | 206 | ########### End 207 | 208 | 209 | # In[48]: 210 | 211 | 212 | print (13//5) 213 | 214 | 215 | # In[50]: 216 | 217 | 218 | print (13<5) 219 | print (13>5) 220 | print (13<=5) 221 | print (2>=5) 222 | print (13==5) 223 | print (13!=5) 224 | 225 | 226 | # In[56]: 227 | 228 | 229 | x=10 230 | print (x) 231 | x=10; x/=2 232 | print (x) 233 | x=10; x+=7 234 | print (x) 235 | x=10; x-=5 236 | print (x) 237 | x=10; x*=5 238 | print (x) 239 | x=13; x%=5 240 | print (x) 241 | x=10; x**=3 242 | print(x) 243 | x=10; x//=2 244 | print(x) 245 | 246 | 247 | # In[57]: 248 | 249 | 250 | x=10>5 and 4>20 251 | print (x) 252 | 253 | x=10>5 or 4>20 254 | print (x) 255 | 256 | x=not(10<4) 257 | print (x) 258 | 259 | 260 | # In[45]: 261 | 262 | 263 | print (13/5) 264 | 265 | 266 | # In[46]: 267 | 268 | 269 | print (13%5) 270 | 271 | 272 | # In[47]: 273 | 274 | 275 | print (2**3) 276 | 277 | 278 | # In[7]: 279 | 280 | 281 | 282 | 283 | 284 | # In[10]: 285 | 286 | 287 | #single line comment 288 | 289 | '''This is 290 | multiline comment''' 291 | 292 | 293 | # In[5]: 294 | 295 | 296 | # Expressions 297 | x=0.6 298 | x=3.9 *x *(1-x) 299 | print (round( x,2) ) 300 | 301 | 302 | # In[10]: 303 | 304 | 305 | largest = None 306 | print ('Before:', largest) 307 | for val in [30, 45, 12, 90, 74, 15]: 308 | if largest is None or val > largest : 309 | largest = val 310 | print ('Loop:', val, largest) 311 | print ('Largest:', largest) 312 | 313 | 314 | # 315 | # # Pandas and other libraries 316 | 317 | # In[34]: 318 | 319 | 320 | #Create series from array using pandas and numpy 321 | import pandas as pd 322 | import numpy as np 323 | data = np.array([90,75,50,66]) 324 | s = pd.Series(data,index=['A','B','C','D']) 325 | print (s) 326 | 327 | 328 | # In[36]: 329 | 330 | 331 | print (s[1]) 332 | 333 | 334 | # In[37]: 335 | 336 | 337 | #Create series from dictionary using pandas and numpy 338 | import pandas as pd 339 | import numpy as np 340 | data = {'Ahmed' : 92, 'Ali' : 55, 'Omar' : 83} 341 | s = pd.Series(data,index=['Ali','Ahmed','Omar']) 342 | print (s) 343 | 344 | 345 | # In[38]: 346 | 347 | 348 | print (s[1:]) 349 | 350 | 351 | # # DataFrame 352 | 353 | # In[39]: 354 | 355 | 356 | import pandas as pd 357 | data = [['Ahmed',35],['Ali',17],['Omar',25]] 358 | DataFrame1 = pd.DataFrame(data,columns=['Name','Age']) 359 | print (DataFrame1) 360 | 361 | 362 | # In[40]: 363 | 364 | 365 | DataFrame1[1:] 366 | 367 | 368 | # In[41]: 369 | 370 | 371 | import pandas as pd 372 | data = {'Name':['Ahmed', 'Ali', 'Omar', 'Salwa'],'Age':[35,17,25,30]} 373 | dataframe2 = pd.DataFrame(data, index=[100, 101, 102, 103]) 374 | print (dataframe2) 375 | 376 | 377 | # In[42]: 378 | 379 | 380 | dataframe2[:2] 381 | 382 | 383 | # In[43]: 384 | 385 | 386 | dataframe2['Name'] 387 | 388 | 389 | # # Panel 390 | 391 | # In[44]: 392 | 393 | 394 | # creating a panel 395 | import pandas as pd 396 | import numpy as np 397 | data = {'Temprature Day1' : pd.DataFrame(np.random.randn(4, 3)), 398 | 'Temprature Day2' : pd.DataFrame(np.random.randn(4, 2))} 399 | p = pd.Panel(data) 400 | print (p['Temprature Day1']) 401 | 402 | 403 | # # 1.6.3 PYTHON LAMBDAS, AND THE NUMPY LIBRARY. 404 | 405 | # In[46]: 406 | 407 | 408 | result = lambda x, y : x * y 409 | result(2,5) 410 | 411 | 412 | # In[47]: 413 | 414 | 415 | result(4,10) 416 | 417 | 418 | # In[65]: 419 | 420 | 421 | def fahrenheit(T): 422 | return ((float(9)/5)*T + 32) 423 | def celsius(T): 424 | return (float(5)/9)*(T-32) 425 | Temp = (15.8, 25, 30.5,25) 426 | 427 | F = list ( map(fahrenheit, Temp)) 428 | C = list ( map(celsius, F)) 429 | print (F) 430 | print (C) 431 | 432 | 433 | # In[72]: 434 | 435 | 436 | Celsius = [39.2, 36.5, 37.3, 37.8] 437 | Fahrenheit = map(lambda x: (float(9)/5)*x + 32, Celsius) 438 | for x in Fahrenheit: 439 | print(x) 440 | 441 | 442 | # In[79]: 443 | 444 | 445 | fib = [0,1,1,2,3,5,8,13,21,34,55] 446 | result = filter(lambda x: x % 2==0, fib) 447 | for x in result: 448 | print(x) 449 | 450 | 451 | # In[81]: 452 | 453 | 454 | f = lambda a,b: a if (a > b) else b 455 | reduce(f, [47,11,42,102,13]) 456 | 457 | 458 | # In[82]: 459 | 460 | 461 | reduce(lambda x,y: x+y, [47,11,42,13]) 462 | 463 | 464 | # In[83]: 465 | 466 | 467 | a=np.array([[1,2,3],[4,5,6]]) 468 | b=np.array([[7,8,9],[10,11,12]]) 469 | np.add(a,b) 470 | 471 | 472 | # In[84]: 473 | 474 | 475 | np.subtract(a,b) #Same as a-b 476 | 477 | 478 | # # Series 479 | 480 | # In[6]: 481 | 482 | 483 | import pandas as pd 484 | animals = ["Lion", "Tiger", "Bear"] 485 | pd.Series(animals) 486 | 487 | 488 | # In[5]: 489 | 490 | 491 | marks = [95, 84, 55, 75] 492 | pd.Series(marks) 493 | 494 | 495 | # In[11]: 496 | 497 | 498 | # Create series from dictionary where indices are the dictionary keys 499 | quiz1 = {"Ahmed":75, "Omar": 84, "Salwa": 70} 500 | q = pd.Series(quiz1) 501 | q 502 | 503 | 504 | # In[13]: 505 | 506 | 507 | # query series 508 | q.loc['Ahmed'] 509 | 510 | 511 | # In[20]: 512 | 513 | 514 | q['Ahmed'] 515 | 516 | 517 | # In[19]: 518 | 519 | 520 | q.iloc[2] 521 | 522 | 523 | # In[21]: 524 | 525 | 526 | q[2] 527 | 528 | 529 | # In[25]: 530 | 531 | 532 | # implement numpy operation on a series 533 | s = pd.Series([70,90,65,25, 99]) 534 | s 535 | 536 | 537 | # In[27]: 538 | 539 | 540 | total =0 541 | for val in s: 542 | total += val 543 | print (total) 544 | 545 | 546 | # In[28]: 547 | 548 | 549 | import numpy as np 550 | total = np.sum(s) 551 | print (total) 552 | 553 | 554 | # In[29]: 555 | 556 | 557 | # add new values to series 558 | s = pd.Series ([99,55,66,88]) 559 | s.loc['Ahmed'] = 85 560 | s 561 | 562 | 563 | # In[32]: 564 | 565 | 566 | # Append Series 567 | test = [95, 84, 55, 75] 568 | marks = pd.Series(test) 569 | s = pd.Series ([99,55,66,88]) 570 | s.loc['Ahmed'] = 85 571 | s 572 | NewSeries = s.append(marks) 573 | NewSeries 574 | 575 | 576 | # # 1.6.6 RUN BASIC INFERENTIAL STATISTICAL ANALYSES. 577 | 578 | # In[37]: 579 | 580 | 581 | import numpy as np 582 | x = np.random.binomial(20, .5, 10000) 583 | print((x>=15).mean()) 584 | 585 | 586 | # In[ ]: 587 | 588 | 589 | sb.regplot(x = "Total Bill", y = "Bill's Tips", data = df) 590 | 591 | 592 | # # Regression 593 | 594 | # In[65]: 595 | 596 | 597 | import seaborn as sb 598 | from matplotlib import pyplot as plt 599 | df = sb.load_dataset('tips') 600 | sb.regplot(x = "total_bill", y = "tip", data = df) 601 | plt.xlabel('Total Bill') 602 | plt.ylabel('Bill Tips') 603 | 604 | plt.show() 605 | 606 | 607 | # In[39]: 608 | 609 | 610 | df 611 | 612 | 613 | # # Python - Chi-Square Test 614 | 615 | # In[41]: 616 | 617 | 618 | from scipy import stats 619 | import numpy as np 620 | import matplotlib.pyplot as plt 621 | 622 | x = np.linspace(0, 10, 100) 623 | fig,ax = plt.subplots(1,1) 624 | 625 | linestyles = [':', '--', '-.', '-'] 626 | deg_of_freedom = [1, 4, 7, 6] 627 | for df, ls in zip(deg_of_freedom, linestyles): 628 | ax.plot(x, stats.chi2.pdf(x, df), linestyle=ls) 629 | 630 | plt.xlim(0, 10) 631 | plt.ylim(0, 0.4) 632 | 633 | plt.xlabel('Value') 634 | plt.ylabel('Frequency') 635 | plt.title('Chi-Square Distribution') 636 | 637 | plt.legend() 638 | plt.show() 639 | 640 | 641 | # # correlation 642 | 643 | # In[42]: 644 | 645 | 646 | import matplotlib.pyplot as plt 647 | import seaborn as sns 648 | df = sns.load_dataset('iris') 649 | 650 | 651 | #without regression 652 | sns.pairplot(df, kind="scatter") 653 | plt.show() 654 | 655 | 656 | # In[46]: 657 | 658 | 659 | from scipy.stats import binom 660 | import seaborn as sb 661 | 662 | data_binom = binom.rvs(n=20,p=0.8,loc=0,size=1000) 663 | ax = sb.distplot(data_binom, 664 | kde=True, 665 | color='blue', 666 | hist_kws={"linewidth": 25,'alpha':1}) 667 | ax.set(xlabel='Binomial', ylabel='Frequency') 668 | 669 | 670 | # In[58]: 671 | 672 | 673 | import pandas as pd 674 | 675 | d = {'Name':pd.Series(['Ahmed','Omar','Ali','Salwa','Majid','Othman','Gameel', 676 | 'Ziad','Ahlam','Zahrah','Ayman','Alaa']), 677 | 'Age':pd.Series([34,26,25,27,30,54,23,43,40,30,28,46]), 678 | 'Height':pd.Series([114.23,173.24,153.98,172.0,153.20,164.6,183.8,163.78,172.0,164.80,174.10,183.65])} 679 | 680 | #Create a DataFrame 681 | df = pd.DataFrame(d) 682 | 683 | # Calculate the standard deviation 684 | print (df.std()) 685 | 686 | 687 | # In[59]: 688 | 689 | 690 | print (df.describe()) 691 | 692 | 693 | # In[60]: 694 | 695 | 696 | print ("Mean Values in the Distribution") 697 | print (df.mean()) 698 | print ("*******************************") 699 | print ("Median Values in the Distribution") 700 | print (df.median()) 701 | print ("*******************************") 702 | print ("Mode Values in the Distribution") 703 | print (df['Height'].mode()) 704 | 705 | 706 | # ### 1.5 EXERCISES 707 | 708 | # In[2]: 709 | 710 | 711 | # Store input numbers: 712 | num1 = input('Enter first number: ') 713 | num2 = input('Enter second number: ') 714 | 715 | sumval = float(num1) + float(num2) # Add two numbers 716 | minval = float(num1) - float(num2) # Subtract two numbers 717 | mulval = float(num1) * float(num2) # Multiply two numbers 718 | divval = float(num1) / float(num2) #Divide two numbers 719 | 720 | # Display the sum 721 | print('The sum of {0} and {1} is {2}'.format(num1, num2, sumval)) 722 | # Display the subtraction 723 | print('The subtraction of {0} and {1} is {2}'.format(num1, num2, minval)) 724 | # Display the multiplication 725 | print('The multiplication of {0} and {1} is {2}'.format(num1, num2, mulval)) 726 | # Display the division 727 | print('The division of {0} and {1} is {2}'.format(num1, num2, divval)) 728 | 729 | 730 | # In[3]: 731 | 732 | 733 | # A. write a python script to prompt the user to enter the triangle first side (a), 734 | #second side (b) and third side (c) lengths. Then calculate the semi-perimeter (s). 735 | #calculate the triangle area and display the result to the user. 736 | #Area of a triangle = (s*(s-a)*(s-b)*(s-c))-1/2. 737 | a = float(input('Enter first side: ')) 738 | b = float(input('Enter second side: ')) 739 | c = float(input('Enter third side: ')) 740 | s = (a + b + c) / 2 # calculate the semi-perimeter 741 | 742 | # calculate the area 743 | area = (s*(s-a)*(s-b)*(s-c)) ** 0.5 744 | print('The area of the triangle is %0.2f' %area) 745 | 746 | 747 | # In[7]: 748 | 749 | 750 | import random 751 | a = int(input('Enter the starting value : ')) 752 | b = int(input('Enter the end value : ')) 753 | print(random.randint(a,b)) 754 | random.sample(range(a, b), 3) 755 | 756 | 757 | # In[9]: 758 | 759 | 760 | # convert kilometers to miles 761 | kilometers = float(input('Enter the distance in kilometers: ')) 762 | # conversion factor 763 | Miles = kilometers * 0.62137 764 | print('%0.2f kilometers is equal to %0.2f miles' %(kilometers,Miles)) 765 | 766 | 767 | # In[11]: 768 | 769 | 770 | # convert convert Celsius to Fahrenheit 771 | Celsius = float(input('Enter temperature in Celsius: ')) 772 | # conversion factor 773 | Fahrenheit = (Celsius * 1.8) + 32 774 | print('%0.2f Celsius is equal to %0.2f Fahrenheit' %(Celsius,Fahrenheit)) 775 | 776 | 777 | # ## End Chapter 1 778 | -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction_ Part 2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch01/Embarak _Ch01_Introduction_ Part 2.pdf -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction_ Part 2.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Chapter 1 Control Statements 5 | 6 | # ## Control Statements 7 | 8 | # ## 1) If Statements 9 | 10 | # In[13]: 11 | 12 | 13 | # Comparison operators 14 | x=5 15 | if x==5: 16 | print ('Equal 5') 17 | elif x>5: 18 | print ('Greater than 5') 19 | elif x<5: 20 | print ('Less than 5') 21 | 22 | 23 | # In[12]: 24 | 25 | 26 | # Identation 27 | x=5 28 | if x<2: 29 | print ("Bigger than 2") 30 | print (" X Value bigger than 2") 31 | 32 | print ("Now we are out of if block\n") 33 | 34 | 35 | # In[14]: 36 | 37 | 38 | year=2000 39 | if year%4==0: 40 | print("Year(", year ,")is Leap") 41 | else: 42 | print (year , "Year is not Leap" ) 43 | 44 | 45 | 46 | # In[2]: 47 | 48 | 49 | a=10 50 | if a>=20: 51 | print ("Condition is True" ) 52 | else: 53 | if a>=15: 54 | print ("Checking second value" ) 55 | else: 56 | print ("All Conditions are false" ) 57 | 58 | 59 | # In[23]: 60 | 61 | 62 | # use the range statement 63 | for a in range (1,4): 64 | print ( a ) 65 | 66 | 67 | # In[24]: 68 | 69 | 70 | # use the range statement 71 | for a in range (4): 72 | print ( a ) 73 | 74 | 75 | # In[32]: 76 | 77 | 78 | ticket=4 79 | while ticket>0: 80 | print ("Your ticket number is ",ticket) 81 | ticket -=1 82 | 83 | 84 | # ### use break, continue and pass statements 85 | 86 | # In[44]: 87 | 88 | 89 | for letter in 'Python3': 90 | if letter == 'o': 91 | break 92 | print (letter) 93 | 94 | 95 | # In[45]: 96 | 97 | 98 | a=0 99 | while a<=5: 100 | a=a+1 101 | if a%2==0: 102 | continue 103 | print (a) 104 | print ("End of Loop" ) 105 | 106 | 107 | # In[46]: 108 | 109 | 110 | for i in [1,2,3,4,5]: 111 | if i==3: 112 | pass 113 | print ("Pass when value is",i ) 114 | print (i), 115 | 116 | 117 | # ## Excercise , using try and except 118 | # Write a program to prompt the user for hours and 119 | # rate per hour to compute gross pay, the program 120 | # should gives employee 1.5 time the hourse worked 121 | # above 30 hours 122 | # Enter Hours: 50 123 | # Enter Rate: 10 124 | # Pay: 550.0 125 | # 126 | 127 | # In[6]: 128 | 129 | 130 | Hflage=True 131 | Rflage=True 132 | while Hflage & Rflage : 133 | hours = input ('Enter Hours:') 134 | try: 135 | hours = int(hours) 136 | Hflage=False 137 | except: 138 | print ("Incorrect hours number !!!!") 139 | try: 140 | rate = input ('Enter Rate:') 141 | rate=float(rate) 142 | Rflage=False 143 | except: 144 | print ("Incorrect rate !!") 145 | 146 | if hours>40: 147 | pay= 40 * rate + (rate*1.5) * (hours-40) 148 | else: 149 | pay= hours * rate 150 | 151 | print ('Pay:',pay) 152 | 153 | 154 | # In[14]: 155 | 156 | 157 | # Try and Except 158 | astr='Fujairah' 159 | errosms='' 160 | try: 161 | istr=int(astr) # error 162 | except: 163 | istr=-1 164 | errosms="\nIncorrect entery" 165 | 166 | print ("Firs Try:", istr , errosms) 167 | 168 | 169 | # In[15]: 170 | 171 | 172 | # Try and Except 173 | astr='12' 174 | errosms='' 175 | try: 176 | istr=int(astr) # error 177 | except: 178 | istr=-1 179 | errosms="\nIncorrect entery" 180 | 181 | print ("Firs Try:", istr , errosms) 182 | 183 | 184 | # ### Python Program to check if a Number is Positive, Negative or Zero 185 | 186 | # In[1]: 187 | 188 | 189 | Val = float(input("Enter a number: ")) 190 | 191 | if Val > 0: 192 | print("{0} is a positive number".format(Val)) 193 | elif Val == 0: 194 | print("{0} is zero".format(Val)) 195 | else: 196 | print("{0} is negative number".format(Val)) 197 | 198 | 199 | # In[4]: 200 | 201 | 202 | # Check if a Number is Odd or Even 203 | val = int(input("Enter a number: ")) 204 | if (val % 2) == 0: 205 | print("{0} is an Even number".format(val)) 206 | else: 207 | print("{0} is an Odd number".format(val)) 208 | 209 | 210 | # In[5]: 211 | 212 | 213 | # Write a python program that displays specific messages using the IF Statement: 214 | #It should ask the user to enter the age of a person, and then using a conditional statement, 215 | #it should print one of the following messages: 216 | 217 | 218 | # In[6]: 219 | 220 | 221 | age = int(input("Enter age of a person")) 222 | if(age < 13): 223 | print("This is a child") 224 | elif(age >= 13 and age <=17): 225 | print("This is a teenager") 226 | elif(age >= 18 and age <=59): 227 | print("This is an adult") 228 | else: 229 | print("This is a senior") 230 | 231 | 232 | # In[7]: 233 | 234 | 235 | Speed = int(input("Enter your car speed")) 236 | if(Speed < 80): 237 | print("No Fines") 238 | elif(Speed >= 81 and Speed <=99): 239 | print("200 AE Fine ") 240 | elif(Speed >= 100 and Speed <=109): 241 | print("350 AE Fine ") 242 | else: 243 | print("500 AE Fine ") 244 | 245 | 246 | # In[11]: 247 | 248 | 249 | year = int(input("Enter a year: ")) 250 | if (year % 4) == 0: 251 | if (year % 100) == 0: 252 | if (year % 400) == 0: 253 | print("{0} is a leap year".format(year)) 254 | else: 255 | print("{0} is not a leap year".format(year)) 256 | else: 257 | print("{0} is a leap year".format(year)) 258 | else: 259 | print("{0} is not a leap year".format(year)) 260 | 261 | 262 | # ## Print the Fibonacci sequence 263 | 264 | # In[14]: 265 | 266 | 267 | nterms = int(input("How many terms you want? ")) 268 | # first two terms 269 | n1 = 0 270 | n2 = 1 271 | count = 2 272 | # check if the number of terms is valid 273 | if nterms <= 0: 274 | print("Plese enter a positive integer") 275 | elif nterms == 1: 276 | print("Fibonacci sequence:") 277 | print(n1) 278 | else: 279 | print("Fibonacci sequence:") 280 | print(n1,",",n2,end=', ') 281 | while count < nterms: 282 | nth = n1 + n2 283 | print(nth,end=' , ') 284 | # update values 285 | n1 = n2 286 | n2 = nth 287 | count += 1 288 | 289 | 290 | # In[2]: 291 | 292 | 293 | largest = None 294 | print ('Before:', largest) 295 | for val in [30, 45, 12, 90, 74, 15]: 296 | if largest is None or val>largest: 297 | largest = val 298 | print ("Loop", val, largest) 299 | print ("Largest", largest) 300 | 301 | -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction_Functions and Modules.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch01/Embarak _Ch01_Introduction_Functions and Modules.pdf -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction_Functions and Modules.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Functions 5 | 6 | # In[20]: 7 | 8 | 9 | def thing(): # function header {def, function name, function argumets } 10 | print ('Hello ', end='') # function body 11 | print ('World') 12 | 13 | 14 | 15 | thing() # function calling 16 | 17 | 18 | # In[13]: 19 | 20 | 21 | def print_Sms(): 22 | print ("Welcome to Python PD session") 23 | print ("Jan 2017\n") 24 | 25 | print ("\nPD Session on HCT Dubai") 26 | print_Sms() 27 | 28 | 29 | # In[18]: 30 | 31 | 32 | def sumvalues(x,y): 33 | print ('The summation of ',x,'+',y,'= ', end='') 34 | return x+y 35 | 36 | a=5 37 | b=a+2 38 | print (sumvalues(a,b) ) # Function calling 39 | 40 | 41 | # In[3]: 42 | 43 | 44 | def Details(name, mark): 45 | if mark>60: 46 | print ("Congratulation ",name," you pass the course") 47 | else: 48 | print ("Unfortunately ",name," you didn’t pass the course") 49 | 50 | 51 | # In[4]: 52 | 53 | 54 | Details("Ossama", 90) 55 | 56 | 57 | # In[5]: 58 | 59 | 60 | Details( 90,"Ossama") 61 | 62 | 63 | # In[6]: 64 | 65 | 66 | Details( mark=90, name="Ossama") 67 | 68 | 69 | # In[7]: 70 | 71 | 72 | 73 | 74 | 75 | # In[9]: 76 | 77 | 78 | def Details(name, mark): 79 | if mark>60: 80 | print ("Congratulation ",name," you pass the course") 81 | else: 82 | print ("Unfortunately ",name," you didn’t pass the course") 83 | 84 | 85 | # In[10]: 86 | 87 | 88 | Details( "Ossama") 89 | 90 | 91 | # In[11]: 92 | 93 | 94 | def Details(name, mark=0): 95 | if mark>60: 96 | print ("Congratulation ",name," you pass the course") 97 | else: 98 | print ("Unfortunately ",name," you didn’t pass the course") 99 | 100 | 101 | # In[12]: 102 | 103 | 104 | Details( "Ossama") 105 | 106 | 107 | # In[1]: 108 | 109 | 110 | max('Welcome to Egypt') 111 | 112 | 113 | # In[2]: 114 | 115 | 116 | min(3,5,8,9,100,2) 117 | 118 | 119 | # In[3]: 120 | 121 | 122 | len('Welcome to Egypt') 123 | 124 | 125 | # In[8]: 126 | 127 | 128 | mark=input("Enter your exam mark: ") 129 | mark=float(mark) 130 | if (mark>59.5): 131 | print ("Pass") 132 | else: 133 | print ("Fail") 134 | 135 | 136 | # # Convert Celsius to Fahrenheit 137 | # ## F = 1.8 C + 32 138 | 139 | # In[9]: 140 | 141 | 142 | value = input("Enter the Celsius value: ") 143 | c = int(value) 144 | f = 1.8 * (c) + 32 145 | print (c , " Celsius = ", f , "Fahrenheit") 146 | 147 | 148 | # In[2]: 149 | 150 | 151 | import random 152 | for i in range(5): 153 | x = random.random() 154 | print (x) 155 | 156 | 157 | # In[4]: 158 | 159 | 160 | import random 161 | for i in range(5): 162 | x = random.random() 163 | print (round(x,3)) 164 | 165 | 166 | # In[5]: 167 | 168 | 169 | random.randint(5, 10) 170 | 171 | 172 | # In[9]: 173 | 174 | 175 | random.randint(5, 10) 176 | 177 | 178 | # In[7]: 179 | 180 | 181 | random.randint(5, 10) 182 | 183 | 184 | # In[12]: 185 | 186 | 187 | random.randint(5, 10) 188 | 189 | 190 | # In[16]: 191 | 192 | 193 | t = [30, "Omar", 7, 10] 194 | random.choice(t) 195 | 196 | 197 | # In[17]: 198 | 199 | 200 | random.choice(t) 201 | 202 | 203 | # In[18]: 204 | 205 | 206 | random.choice(t) 207 | 208 | 209 | # In[23]: 210 | 211 | 212 | import math 213 | value = 120 214 | decibels = 10 * math.log10(value) 215 | print (decibels) 216 | 217 | 218 | # In[24]: 219 | 220 | 221 | degrees = 45 222 | radians = degrees / 360.0 * 2 * math.pi 223 | val= math.sin(radians) 224 | print (val) 225 | 226 | 227 | # In[30]: 228 | 229 | 230 | print (math.sqrt(16)) 231 | 232 | 233 | # In[34]: 234 | 235 | 236 | # Anonymous Function Definiton 237 | summation=lambda val1, val2: val1 + val2 238 | 239 | #Calling summation as a function 240 | print ("The summation of 7 + 10 = ", summation(7,10) ) 241 | 242 | 243 | # In[35]: 244 | 245 | 246 | quiz = 50 247 | def readgrade(): 248 | quiz = input("Enter your quiz mark: ") 249 | quiz = int(quiz) 250 | print ("Your quiz score is ", quiz) 251 | 252 | readgrade() 253 | print ("Your quiz score is ", quiz) 254 | 255 | 256 | # In[ ]: 257 | 258 | 259 | print ("\n******** Greeting ***********") 260 | def greeting(lang): 261 | if lang=='es': 262 | print ('Hola') 263 | elif lang=='fr': 264 | print ('Bonjour') 265 | else: 266 | print ('Hello') 267 | 268 | greeting('en') 269 | greeting('es') 270 | greeting('fr') 271 | 272 | 273 | # In[1]: 274 | 275 | 276 | def computepay(hours, rate): 277 | if hours>40: 278 | pay= 40 * rate + (rate*1.5) * (hours-40) 279 | else: 280 | pay= hours * rate 281 | return pay 282 | 283 | 284 | hours = input ('Enter Hours: ') 285 | try: 286 | hours = int(hours) 287 | except: 288 | print ("Incorrect hours number !!!!") 289 | 290 | try: 291 | rate = input ("Enter Rate: ") 292 | rate=float(rate) 293 | except: 294 | print ("Incorrect rate !!") 295 | 296 | fullpay =computepay(hours, rate) 297 | 298 | print ("Gross Pay: ", fullpay) 299 | 300 | 301 | # ## Exrcises 302 | # ### find the Highest Common Factor of two values. 303 | 304 | # In[5]: 305 | 306 | 307 | def HCF(x, y): 308 | if x > y: 309 | smaller = y 310 | else: 311 | smaller = x 312 | for i in range(1,smaller + 1): 313 | if((x % i == 0) and (y % i == 0)): 314 | HCF = i 315 | return HCF 316 | 317 | Number1 = int(input("Enter first number: ")) 318 | Number2 = int(input("Enter second number: ")) 319 | print("The Highest Common Factor of", Number1,"and", Number2,"is", HCF(Number1, Number2)) 320 | 321 | 322 | # In[6]: 323 | 324 | 325 | #Find Factorial of Number Using Recursion 326 | 327 | 328 | # In[9]: 329 | 330 | 331 | def RecurFactorial(n): 332 | if n == 1: 333 | return n 334 | else: 335 | return n*RecurFactorial(n-1) 336 | 337 | # read the value from the user 338 | Number = int(input("Enter a number: ")) 339 | 340 | # check is the number is negative 341 | if Number < 0: 342 | print("Sorry, factorial does not exist for negative numbers") 343 | elif Number == 0: 344 | print("The factorial of 0 is 1") 345 | else: 346 | print("The factorial of",Number,"is",RecurFactorial(Number)) 347 | 348 | 349 | # In[12]: 350 | 351 | 352 | def RecurFibo(n): 353 | if n <= 1: 354 | return n 355 | else: 356 | return(RecurFibo(n-1) + RecurFibo(n-2)) 357 | 358 | # read input from the user 359 | nlength = int(input("Enter your length? ")) 360 | # check if the number of terms is valid 361 | if nlength <= 0: 362 | print("Plese enter a positive integer") 363 | else: 364 | print("Fibonacci sequence:") 365 | for i in range(nlength): 366 | print(RecurFibo(i), end=' , ') 367 | 368 | 369 | # ## 4.6 CREATE PYTHON MODULES 370 | 371 | # In[6]: 372 | 373 | 374 | import addition 375 | addition.add(10,20) 376 | addition.add(30,40) 377 | 378 | 379 | # In[7]: 380 | 381 | 382 | "{1} {0}".format(x, "The") 383 | "{first} {second}".format(first="The", second=x) 384 | 385 | -------------------------------------------------------------------------------- /Ch02/Embarak _Ch02_The importance of data visualization in business.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch02/Embarak _Ch02_The importance of data visualization in business.pdf -------------------------------------------------------------------------------- /Ch02/Embarak _Ch02_The importance of data visualization in business.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[ ]: 5 | 6 | 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import pandas as pd 10 | import seaborn as sns 11 | import pygal 12 | from mayavi import mlab 13 | 14 | 15 | # In[5]: 16 | 17 | 18 | try: 19 | import matplotlib 20 | except: 21 | import pip 22 | pip.main(['install', 'matplotlib']) 23 | import matplotlib 24 | 25 | 26 | # # Matplotlib 27 | 28 | # In[23]: 29 | 30 | 31 | import numpy as np 32 | import matplotlib.pyplot as plt 33 | get_ipython().magic(u'matplotlib inline') 34 | plt.style.use('seaborn-whitegrid') 35 | 36 | X = [590,540,740,130,810,300,320,230,470,620,770,250] 37 | Y = [32,36,39,52,61,72,77,75,68,57,48,48] 38 | 39 | plt.scatter(X,Y) 40 | plt.xlim(0,1000) 41 | plt.ylim(0,100) 42 | 43 | #scatter plot color 44 | plt.scatter(X, Y, s=800, c='red', marker='+') 45 | 46 | #change axes ranges 47 | plt.xlim(0,1000) 48 | plt.ylim(0,100) 49 | 50 | #add title 51 | plt.title('Relationship Between Temperature and Iced Coffee Sales') 52 | 53 | #add x and y labels 54 | plt.xlabel('Sold Coffee') 55 | plt.ylabel('Temperature in Fahrenheit') 56 | 57 | #show plot 58 | plt.show() 59 | 60 | 61 | # In[20]: 62 | 63 | 64 | get_ipython().magic(u'matplotlib inline') 65 | import matplotlib.pyplot as plt 66 | plt.style.use('seaborn-whitegrid') 67 | import numpy as np 68 | 69 | # Create empty figure 70 | fig = plt.figure() 71 | ax = plt.axes() 72 | 73 | x = np.linspace(0, 10, 1000) 74 | ax.plot(x, np.sin(x)); 75 | 76 | plt.plot(x, np.sin(x)) 77 | plt.plot(x, np.cos(x)) 78 | plt.xlim(0, 11) 79 | plt.ylim(-2, 2) 80 | plt.axis('tight') 81 | #add title 82 | plt.title('Plotting data using sin and cos') 83 | 84 | 85 | # In[18]: 86 | 87 | 88 | plt.plot(x, np.sin(x - 0), color='blue') # specify color by name 89 | plt.plot(x, np.sin(x - 1), color='g') # short color code (rgbcmyk) 90 | plt.plot(x, np.sin(x - 2), color='0.75') # Grayscale between 0 and 1 91 | plt.plot(x, np.sin(x - 3), color='#FFDD44') # Hex code (RRGGBB from 00 to FF) 92 | plt.plot(x, np.sin(x - 4), color=(1.0,0.2,0.3)) # RGB tuple, values 0 to 1 93 | plt.plot(x, np.sin(x - 5), color='chartreuse'); # all HTML color names supported 94 | 95 | 96 | # # Seaborn 97 | 98 | # In[34]: 99 | 100 | 101 | import matplotlib.pyplot as plt 102 | get_ipython().magic(u'matplotlib inline') 103 | import numpy as np 104 | import pandas as pd 105 | import seaborn as sns 106 | plt.style.use('classic') 107 | plt.style.use('seaborn-whitegrid') 108 | 109 | # Create some data 110 | data = np.random.multivariate_normal([0, 0], [[5, 2], [2, 2]], size=2000) 111 | data = pd.DataFrame(data, columns=['x', 'y']) 112 | 113 | # Plot the data with seaborn 114 | sns.distplot(data['x']) 115 | sns.distplot(data['y']); 116 | 117 | 118 | # In[35]: 119 | 120 | 121 | for col in 'xy': 122 | sns.kdeplot(data[col], shade=True) 123 | 124 | 125 | # In[36]: 126 | 127 | 128 | sns.kdeplot(data); 129 | 130 | 131 | # In[37]: 132 | 133 | 134 | with sns.axes_style('white'): 135 | sns.jointplot("x", "y", data, kind='kde'); 136 | 137 | 138 | # In[38]: 139 | 140 | 141 | with sns.axes_style('white'): 142 | sns.jointplot("x", "y", data, kind='hex') 143 | 144 | 145 | # In[41]: 146 | 147 | 148 | sns.pairplot(data); 149 | 150 | 151 | # In[45]: 152 | 153 | 154 | sns.stripplot( x = data['x']) 155 | sns.stripplot( x = data['y']) 156 | 157 | 158 | # In[47]: 159 | 160 | 161 | # box plot per rank 162 | sns.boxplot(x = 'x', y = 'y', data=data) 163 | 164 | 165 | # In[50]: 166 | 167 | 168 | # box plot salaries 169 | sns.boxplot(x = data['y'], whis=2) 170 | 171 | 172 | # # Plotly 173 | 174 | # In[64]: 175 | 176 | 177 | from plotly import __version__ 178 | from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot 179 | init_notebook_mode(connected=True) 180 | print (__version__) 181 | 182 | 183 | # In[91]: 184 | 185 | 186 | import plotly.graph_objs as go 187 | 188 | plot([go.Scatter(x=[95, 77, 84], y=[75, 67, 56])]) 189 | 190 | 191 | # In[67]: 192 | 193 | 194 | import plotly.graph_objs as go 195 | import numpy as np 196 | 197 | x = np.random.randn(2000) 198 | y = np.random.randn(2000) 199 | iplot([go.Histogram2dContour(x=x, y=y, contours=dict(coloring='heatmap')), 200 | go.Scatter(x=x, y=y, mode='markers', marker=dict(color='white', size=3, opacity=0.3))], show_link=False) 201 | 202 | 203 | # In[90]: 204 | 205 | 206 | import plotly.offline as offline 207 | import plotly.graph_objs as go 208 | 209 | offline.plot({'data': [{'y': [14, 22, 30, 44]}], 210 | 'layout': {'title': 'Offline Plotly', 'font': dict(size=16)}}, image='png') 211 | 212 | 213 | # In[88]: 214 | 215 | 216 | import plotly.plotly as py 217 | import plotly.graph_objs as go 218 | import plotly 219 | import plotly.offline as offline 220 | 221 | 222 | df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/school_earnings.csv") 223 | 224 | 225 | schools = df.School 226 | 227 | 228 | data = [go.Bar(x=df.School,y=df.Gap)] 229 | 230 | py.iplot(data, filename='jupyter-basic_bar') 231 | 232 | 233 | # # geoplotlib 234 | 235 | # In[ ]: 236 | 237 | 238 | import geoplotlib 239 | from geoplotlib.utils import read_csv 240 | 241 | data = read_csv('bus.csv') 242 | geoplotlib.dot(data) 243 | geoplotlib.show() 244 | 245 | 246 | # # Direct plotting 247 | 248 | # In[116]: 249 | 250 | 251 | import pandas as pd 252 | import numpy as np 253 | 254 | df = pd.DataFrame(np.random.randn(200,6),index=pd.date_range('1/9/2009', 255 | periods=200), columns=list('ABCDEF')) 256 | 257 | 258 | df.plot(figsize=(20, 10)).legend(bbox_to_anchor=(1, 1)) 259 | #Shape of passed values is (10, 200), indices imply (4, 10) 260 | 261 | 262 | # In[123]: 263 | 264 | 265 | import pandas as pd 266 | import numpy as np 267 | df = pd.DataFrame(np.random.rand(20,5),columns=['Jan','Feb','March','April', 'May']) 268 | df.plot.bar(figsize=(20, 10)).legend(bbox_to_anchor=(1.1, 1)) 269 | 270 | 271 | # In[124]: 272 | 273 | 274 | import pandas as pd 275 | df = pd.DataFrame(np.random.rand(20,5),columns=['Jan','Feb','March','April', 'May']) 276 | df.plot.bar(stacked=True, figsize=(20, 10)).legend(bbox_to_anchor=(1.1, 1)) 277 | 278 | 279 | # In[126]: 280 | 281 | 282 | import pandas as pd 283 | df = pd.DataFrame(np.random.rand(20,5),columns=['Jan','Feb','March','April', 'May']) 284 | df.plot.barh(stacked=True, figsize=(20, 10)).legend(bbox_to_anchor=(1.1, 1)) 285 | 286 | 287 | # In[131]: 288 | 289 | 290 | import pandas as pd 291 | df = pd.DataFrame(np.random.rand(20,5),columns=['Jan','Feb','March','April', 'May']) 292 | df.plot.hist(bins= 20, figsize=(10, 8)).legend(bbox_to_anchor=(1.2, 1)) 293 | 294 | 295 | # In[139]: 296 | 297 | 298 | import pandas as pd 299 | import numpy as np 300 | 301 | df=pd.DataFrame({'April':np.random.randn(1000)+1,'May':np.random.randn(1000),'June': 302 | np.random.randn(1000) - 1}, columns=['April', 'May', 'June']) 303 | 304 | df.hist(bins=20) 305 | 306 | 307 | # In[140]: 308 | 309 | 310 | import pandas as pd 311 | import numpy as np 312 | df = pd.DataFrame(np.random.rand(20,5),columns=['Jan','Feb','March','April', 'May']) 313 | df.plot.box() 314 | 315 | 316 | # In[145]: 317 | 318 | 319 | import pandas as pd 320 | import numpy as np 321 | 322 | df = pd.DataFrame(np.random.rand(20,5),columns=['Jan','Feb','March','April', 'May']) 323 | df.plot.area(figsize=(6, 4)).legend(bbox_to_anchor=(1.3, 1)) 324 | 325 | 326 | # In[150]: 327 | 328 | 329 | import pandas as pd 330 | import numpy as np 331 | df = pd.DataFrame(np.random.rand(20,5),columns=['Jan','Feb','March','April', 'May']) 332 | df.plot.scatter(x='Feb', y='Jan', title='Temprature over two months ') 333 | 334 | 335 | # In[155]: 336 | 337 | 338 | import pandas as pd 339 | import numpy as np 340 | 341 | df = pd.DataFrame(10 * np.random.rand(5), index=['Jan','Feb','March','April', 'May'], columns=['Month']) 342 | df.plot.pie(subplots=True) 343 | 344 | 345 | # # Exercise 346 | 347 | # In[14]: 348 | 349 | 350 | import pandas as pd 351 | import numpy as np 352 | import matplotlib.pyplot as plt 353 | 354 | salesMen = ['Ahmed', 'Omar', 'Ali', 'Ziad', 'Salwa', 'Lila'] 355 | Mobile_Sales = [2540, 1370, 1320, 2000, 2100, 2150] 356 | TV_Sales = [2200, 1900, 2150, 1850, 1770, 2000] 357 | 358 | df = pd.DataFrame() 359 | df ['Name'] =salesMen 360 | df ['Mobile_Sales'] = Mobile_Sales 361 | df ['TV_Sales'] = TV_Sales 362 | df.set_index("Name",drop=True,inplace=True) 363 | 364 | 365 | # In[15]: 366 | 367 | 368 | df 369 | 370 | 371 | # In[16]: 372 | 373 | 374 | df.plot.bar( figsize=(20, 10), rot=0).legend(bbox_to_anchor=(1.1, 1)) 375 | plt.xlabel('Salesmen') 376 | plt.ylabel('Sales') 377 | plt.title('Sales Volume for two salesmen in \nJanuray and April 2017') 378 | plt.show() 379 | 380 | 381 | # In[17]: 382 | 383 | 384 | import pandas as pd 385 | import numpy as np 386 | import matplotlib.pyplot as plt 387 | 388 | salesMen = ['Ahmed', 'Omar', 'Ali', 'Ziad', 'Salwa', 'Lila'] 389 | Mobile_Sales = [2540, 1370, 1320, 2000, 2100, 2150] 390 | TV_Sales = [2200, 1900, 2150, 1850, 1770, 2000] 391 | 392 | df = pd.DataFrame() 393 | df ['Name'] =salesMen 394 | df ['Mobile_Sales'] = Mobile_Sales 395 | df ['TV_Sales'] = TV_Sales 396 | df.set_index("Name",drop=True,inplace=True) 397 | 398 | 399 | df.plot.pie(subplots=True) 400 | 401 | 402 | # In[18]: 403 | 404 | 405 | df.plot.box() 406 | 407 | 408 | # In[19]: 409 | 410 | 411 | df.plot.area(figsize=(6, 4)).legend(bbox_to_anchor=(1.3, 1)) 412 | 413 | 414 | # In[20]: 415 | 416 | 417 | df.plot.bar(stacked=True, figsize=(20, 10)).legend(bbox_to_anchor=(1.1, 1)) 418 | 419 | -------------------------------------------------------------------------------- /Ch03/Embarak _Ch03_Data Collections Structure .pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch03/Embarak _Ch03_Data Collections Structure .pdf -------------------------------------------------------------------------------- /Ch03/Embarak _Ch03_Data Collections Structure .py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Chapter : Collections 5 | 6 | # In[1]: 7 | 8 | 9 | # Create List 10 | List1 = [1, 24, 76] 11 | print (List1) 12 | 13 | colors=['red', 'yellow', 'blue'] 14 | print (colors) 15 | 16 | mix=['red', 24, 98.6] 17 | print (mix) 18 | 19 | nested= [ 1, [5, 6], 7] 20 | print (nested) 21 | 22 | print ([]) 23 | 24 | 25 | # In[9]: 26 | 27 | 28 | list1 = ['Egypt', 'chemistry', 2017, 2018]; 29 | list2 = [1, 2, 3, [4, 5] ]; 30 | list3 = ["a", 3.7, '330', "Omar"] 31 | 32 | print (list1[2]) 33 | print (list2 [3:]) 34 | print (list3 [-3:-1]) 35 | print (list3[-3]) 36 | 37 | 38 | # In[50]: 39 | 40 | 41 | courses=["OOP","Networking","MIS","Project"] 42 | students=["Ahmed", "Ali", "Salim", "Abdullah", "Salwa"] 43 | OOP_marks = [65, 85, 92] 44 | 45 | OOP_marks.append(50) # Add new element 46 | OOP_marks.append(77)# Add new element 47 | print (OOP_marks[ : ]) # Print list before updateing 48 | 49 | OOP_marks[0]=70 # update new element 50 | OOP_marks[1]=45 # update new element 51 | list1 = [88, 93] 52 | OOP_marks.extend(list1) # extend list with another list 53 | print (OOP_marks[ : ]) # Print list after updateing 54 | 55 | 56 | # In[28]: 57 | 58 | 59 | 60 | 61 | 62 | # In[48]: 63 | 64 | 65 | OOP_marks = [70, 45, 92, 50, 77, 45] 66 | print (OOP_marks) 67 | 68 | del OOP_marks[0] # delete an element using del 69 | print (OOP_marks) 70 | 71 | OOP_marks.remove (45) # remove an element using remove() method 72 | print (OOP_marks) 73 | 74 | 75 | OOP_marks.pop (2) # remove an element using remove() method 76 | print (OOP_marks) 77 | 78 | 79 | # In[42]: 80 | 81 | 82 | len([5, "Omar", 3]) # find the list length. 83 | [3, 4, 1] + ["0", 5, 6] # concatenate lists. 84 | ['Hi!'] * 4 # repeate an element in a list. 85 | 3 in [1, 2, 3] # check if element in a list 86 | for x in [1, 2, 3]: print (x) # traverse list elements 87 | 88 | 89 | # In[46]: 90 | 91 | 92 | print (len([5, "Omar", 3])) # find the list length. 93 | print ([3, 4, 1] + ["Omar", 5, 6]) # concatenate lists. 94 | print (['Eg!'] * 4) # repeate an element in a list. 95 | print (3 in [1, 2, 3]) # check if element in a list 96 | for x in [1, 2, 3]: print (x, end=' ') # traverse list elements 97 | 98 | 99 | # In[51]: 100 | 101 | 102 | #Built-in Functions and Lists 103 | tickets = [3, 41, 12, 9, 74, 15] 104 | print (tickets) 105 | print (len(tickets)) 106 | print (max(tickets)) 107 | print (min(tickets)) 108 | print (sum(tickets)) 109 | print (sum(tickets)/len(tickets)) 110 | 111 | 112 | # In[58]: 113 | 114 | 115 | #List sorting and Traversing 116 | seq=(41, 12, 9, 74, 3, 15) # use sequence for creating a list 117 | tickets=list(seq) 118 | 119 | print (tickets) 120 | tickets.sort() 121 | print (tickets) 122 | 123 | print ("\nSorted list elements ") 124 | for ticket in tickets: 125 | print (ticket) 126 | 127 | 128 | # ## LISTS AND STRINGS 129 | 130 | # In[63]: 131 | 132 | 133 | # convert string to a list of characters 134 | Word = 'Egypt' 135 | List1 = list(Word) 136 | print (List1) 137 | 138 | 139 | # In[70]: 140 | 141 | 142 | # we can break a string into words using the split method 143 | Greeting= 'Welcome to Egypt' 144 | List2 =Greeting.split() 145 | print (List2) 146 | print (List2[2]) 147 | 148 | 149 | # In[69]: 150 | 151 | 152 | # use the delimiter 153 | Greeting= 'Welcome-to-Egypt' 154 | List2 =Greeting.split("-") 155 | print (List2) 156 | 157 | Greeting= 'Welcome-to-Egypt' 158 | delimiter='-' 159 | List2 =Greeting.split(delimiter) 160 | print (List2) 161 | 162 | 163 | # In[73]: 164 | 165 | 166 | List1 = ['Welcome', 'to', 'Egypt'] 167 | delimiter = ' ' 168 | delimiter.join(List1) 169 | 170 | 171 | # In[74]: 172 | 173 | 174 | List1 = ['Welcome', 'to', 'Egypt'] 175 | delimiter = '-' 176 | delimiter.join(List1) 177 | 178 | 179 | # In[105]: 180 | 181 | 182 | filesdata="From oembarak@hct.ac.ae Sat Jan 5 09:14:16 2016 mak.jon@ec.ac.ae Sat Jan 5 09:14:16 2011 From ossama.embarak@ar.ac.eg Sat Jan 5 09:14:16 2010 From usa.mak@gmail.com Jan 5 09:14:16 2015" 183 | #print (filesdata) 184 | for line in filesdata: 185 | #line = line.rstrip() 186 | if not line.startswith('From ') : continue 187 | words = line.split() 188 | print (words[2]) 189 | 190 | 191 | # In[117]: 192 | 193 | 194 | a = [1, 2, 3] 195 | b = a 196 | print (a) 197 | print (b) 198 | 199 | 200 | # In[118]: 201 | 202 | 203 | a.append(77) 204 | print (a) 205 | print (b) 206 | 207 | 208 | # In[119]: 209 | 210 | 211 | b is a 212 | 213 | 214 | # In[120]: 215 | 216 | 217 | a = [1, 2, 3] 218 | b = [1, 2, 3] 219 | print (a) 220 | print (b) 221 | 222 | 223 | # In[121]: 224 | 225 | 226 | a.append(77) 227 | print (a) 228 | print (b) 229 | 230 | 231 | # In[122]: 232 | 233 | 234 | b is a 235 | 236 | 237 | # In[124]: 238 | 239 | 240 | Students =["Ahmed", "Ali", "Salim", "Abdullah", "Salwa"] 241 | def displaynames (x): 242 | for name in x: 243 | print (name) 244 | 245 | displaynames(Students) # Call the function displaynames 246 | 247 | 248 | # # Dictionaries 249 | 250 | # In[36]: 251 | 252 | 253 | Prices = {"Honda":40000, "Suzuki":50000, "Mercedes":85000, "Nissan":35000, "Mitsubishi":43000 } 254 | print (Prices) 255 | 256 | 257 | # In[37]: 258 | 259 | 260 | Staff_Salary = { 'Omar Ahmed' : 30000 , 'Ali Ziad' : 24000, 'Ossama Hashim': 25000, 'Majid Hatem':10000} 261 | print(Staff_Salary) 262 | STDMarks={"Salwa Ahmed":50, "Abdullah Mohamed":80, "Sultan Ghanim":90} 263 | print(STDMarks) 264 | 265 | 266 | # In[38]: 267 | 268 | 269 | STDMarks = dict() 270 | STDMarks['Salwa Ahmed']=50 271 | STDMarks['Abdullah Mohamed']=80 272 | STDMarks['Sultan Ghanim']=90 273 | print (STDMarks) 274 | 275 | 276 | # In[39]: 277 | 278 | 279 | STDMarks={"Salwa Ahmed":50, "Abdullah Mohamed":80, "Sultan Ghanim":90} 280 | STDMarks['Salwa Ahmed'] = 85 # update current value of the key 'Salwa Ahmed' 281 | STDMarks['Omar Majid'] = 74 # Add a new item to the dictionary 282 | print (STDMarks) 283 | 284 | 285 | # In[40]: 286 | 287 | 288 | STDMarks={"Salwa Ahmed":50, "Abdullah Mohamed":80, "Sultan Ghanim":90} 289 | print (STDMarks) 290 | del STDMarks['Abdullah Mohamed'] # remove entry with key 'Abdullah Mohamed' 291 | print (STDMarks) 292 | STDMarks.clear() # remove all entries in STDMarks dictionary 293 | print (STDMarks) 294 | del STDMarks # delete entire dictionary 295 | 296 | 297 | # In[2]: 298 | 299 | 300 | Staff_Salary = { 'Omar Ahmed' : 30000 , 'Ali Ziad' : 24000, 'Ossama Hashim': 25000, 'Majid Hatem':10000} 301 | print('Salary package for Ossama Hashim is ', end='') 302 | print(Staff_Salary['Ossama Hashim']) # access specific dictionary element 303 | 304 | 305 | # In[3]: 306 | 307 | 308 | # Define a function to return salary after dicount tax 5% 309 | def Netsalary (salary): 310 | return salary - (salary * 0.05) # also could be retunr salary *0.95 311 | 312 | #iterate all elements in a dcitionary 313 | print ("Name " , '\t', "Net Salary" ) 314 | for key, value in Staff_Salary.items(): 315 | print (key , '\t', Netsalary(value)) 316 | 317 | 318 | # In[43]: 319 | 320 | 321 | Staff_Salary = { 'Omar Ahmed' : 30000 , 'Ali Ziad' : 24000, 'Ossama Hashim': 25000, 'Majid Hatem':10000} 322 | STDMarks={"Salwa Ahmed":50, "Abdullah Mohamed":80, "Sultan Ghanim":90} 323 | 324 | 325 | # In[52]: 326 | 327 | 328 | def cmp(a, b): 329 | for key, value in a.items(): 330 | for key1, value1 in b.items(): 331 | return (key >key1) - (key < key1) 332 | 333 | 334 | # In[54]: 335 | 336 | 337 | print (cmp(STDMarks,Staff_Salary) ) 338 | print (cmp(STDMarks,STDMarks) ) 339 | print (len(STDMarks) ) 340 | print (str(STDMarks) ) 341 | print (type(STDMarks) ) 342 | 343 | 344 | # In[ ]: 345 | 346 | 347 | 348 | 349 | 350 | # In[71]: 351 | 352 | 353 | Staff_Salary = { 'Omar Ahmed' : 30000 , 'Ali Ziad' : 24000, 'Ossama Hashim': 25000, 'Majid Hatem':10000} 354 | STDMarks={"Salwa Ahmed":50, "Abdullah Mohamed":80, "Sultan Ghanim":90} 355 | dic3 = Staff_Salary.copy() 356 | Staff_Salary.clear() # clear all elements in Staff_Salary dictionary 357 | print (Staff_Salary) 358 | print (dic3) 359 | 360 | dict1= dict() 361 | sequence=('Id' , 'Number' , 'Email') 362 | print (dict1.fromkeys(sequence)) 363 | print (dict1.fromkeys(sequence, '####')) 364 | 365 | 366 | # In[89]: 367 | 368 | 369 | Staff_Salary = { 'Omar Ahmed' : 30000 , 'Ali Ziad' : 24000, 'Ossama Hashim': 25000, 'Majid Hatem':10000} 370 | STDMarks={"Salwa Ahmed":50, "Abdullah Mohamed":80, "Sultan Ghanim":90} 371 | print (Staff_Salary.get('Ali Ziad') ) 372 | print (STDMarks.items()) 373 | print (Staff_Salary.keys()) 374 | 375 | print() 376 | STDMarks.setdefault('Ali Ziad') 377 | print (STDMarks) 378 | print (STDMarks.update(dict1)) 379 | print (STDMarks) 380 | 381 | 382 | # In[96]: 383 | 384 | 385 | Staff_Salary = { 'Omar Ahmed' : 30000 , 'Ali Ziad' : 24000, 'Ossama Hashim': 25000, 'Majid Hatem':10000} 386 | print ("\nSorted by key") 387 | for k in sorted(Staff_Salary): 388 | print (k, Staff_Salary[k]) 389 | 390 | 391 | # In[97]: 392 | 393 | 394 | Staff_Salary = { 'Omar Ahmed' : 30000 , 'Ali Ziad' : 24000, 'Ossama Hashim': 25000, 'Majid Hatem':10000} 395 | print ("\nSorted by value") 396 | for w in sorted(Staff_Salary, key=Staff_Salary.get, reverse=True): 397 | print (w, Staff_Salary[w]) 398 | 399 | 400 | # # Tuples 401 | 402 | # In[1]: 403 | 404 | 405 | Names = ('Omar', 'Ali', 'Bahaa') 406 | Marks = ( 75, 65, 95 ) 407 | 408 | print (Names[2]) 409 | print (Marks) 410 | print (max(Marks)) 411 | 412 | 413 | # In[2]: 414 | 415 | 416 | for name in Names: 417 | print (name) 418 | 419 | 420 | # In[3]: 421 | 422 | 423 | Marks[1]=66 424 | 425 | 426 | # In[4]: 427 | 428 | 429 | Names = ( 'Omar Ahmed', 'Ali Ziad' , 'Ossama Hashim', 'Majid Hatem') 430 | print (Names) 431 | Names.sort(reverse=True) 432 | print (Names) 433 | 434 | 435 | # In[9]: 436 | 437 | 438 | MarksCIS=(70,85,90) 439 | print (MarksCIS) 440 | 441 | 442 | 443 | # In[14]: 444 | 445 | 446 | 447 | MarksCIS.sort(key=lambda x: int(x[0])) 448 | 449 | 450 | # In[1]: 451 | 452 | 453 | import operator 454 | MarksCIS = [(88,65),(70,90,85), (55,88,44)] 455 | print (MarksCIS) # original tuples 456 | print (sorted(MarksCIS)) # direct sorting 457 | 458 | 459 | # In[2]: 460 | 461 | 462 | print (MarksCIS) # original tuples 463 | #create a new sorted tuple 464 | MarksCIS2 = sorted(MarksCIS, key=lambda x: (x[0], x[1])) 465 | print (MarksCIS2) 466 | 467 | 468 | # In[3]: 469 | 470 | 471 | print (MarksCIS) # original tuples 472 | MarksCIS.sort(key=lambda x: (x[0], x[1])) # sort in tuple 473 | print (MarksCIS) 474 | 475 | 476 | # In[4]: 477 | 478 | 479 | MarksCIS = (70, 85, 55) 480 | MarksCIN = (90, 75, 60) 481 | print ("The third mark in CIS is ", MarksCIS[2]) 482 | print ("The third mark in CIN is ", MarksCIN[2]) 483 | 484 | 485 | # In[5]: 486 | 487 | 488 | MarksCIN = (90, 75, 60) 489 | print (MarksCIN) 490 | del MarksCIN 491 | print (MarksCIN) 492 | 493 | 494 | # In[6]: 495 | 496 | 497 | MarksCIS = (88, 65, 70,90,85,45,78,95,55) 498 | print ("\nForward slicing") 499 | print (MarksCIS[1:4]) 500 | print (MarksCIS[:3]) 501 | print (MarksCIS[6:]) 502 | print (MarksCIS[4:6]) 503 | 504 | print ("\nBackward slicing") 505 | print (MarksCIS[-4:-2]) 506 | print (MarksCIS[-3]) 507 | print (MarksCIS[-3:]) 508 | print (MarksCIS[ :-3]) 509 | 510 | 511 | # In[8]: 512 | 513 | 514 | import operator 515 | MarksCIS = [(88,65),(70,90,85), (55,88,44)] 516 | print (MarksCIS) # original tuples 517 | print (sorted(MarksCIS)) # direct sorting 518 | 519 | 520 | MarksCIS2 = sorted(MarksCIS, key=lambda x: (x[0], x[1])) 521 | 522 | print (MarksCIS2) 523 | 524 | MarksCIS.sort(key=lambda x: (x[0], x[1])) # sorts in place 525 | print (MarksCIS) 526 | 527 | 528 | # In[ ]: 529 | 530 | 531 | students = [ 532 | ('John', 'A', 2), 533 | ('Zoro', 'C', 1), 534 | ('Dave', 'B', 3), 535 | ] 536 | print(students) 537 | 538 | 539 | # In[5]: 540 | 541 | 542 | MarksCIS=(70,85,55) 543 | MarksCIN=(90,75,60) 544 | Combind=MarksCIS+MarksCIN 545 | print (Combind) 546 | 547 | 548 | # # a series from ndarray with labels. 549 | 550 | # In[8]: 551 | 552 | 553 | import numpy as np 554 | import pandas as pd 555 | Series1 = pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']) 556 | print(Series1) 557 | print(Series1.index) 558 | 559 | 560 | # In[9]: 561 | 562 | 563 | import numpy as np 564 | import pandas as pd 565 | Series2 = pd.Series(np.random.randn(4)) 566 | print(Series2) 567 | print(Series2.index) 568 | 569 | 570 | # In[10]: 571 | 572 | 573 | print (" \n Series slicing ") 574 | print (Series1[:3]) 575 | print ("\nIndex accessing") 576 | print (Series1[[3,1,0]]) 577 | print ("\nSingle index") 578 | x = Series1[0] 579 | print (x) 580 | 581 | 582 | # In[19]: 583 | 584 | 585 | print ("\nSeries Sample operations") 586 | print ("\n Series values greater than the mean: %.4f" % Series1.mean()) 587 | print (Series1 [Series1> Series1.mean()]) 588 | print ("\n Series values greater than the Meadian:%.4f" % Series1.median()) 589 | print (Series1 [Series1> Series1.median()]) 590 | print ("\nExponential value ") 591 | Series1Exp = np.exp(Series1) 592 | print (Series1Exp) 593 | 594 | 595 | # In[12]: 596 | 597 | 598 | dict = {'m' : 2, 'y' : 2018, 'd' : 'Sunday'} 599 | print ("\nSeries of non declared index") 600 | SeriesDict1 = pd.Series(dict) 601 | print(SeriesDict1) 602 | 603 | print ("\nSeries of declared index") 604 | SeriesDict2 = pd.Series(dict, index=['y', 'm', 'd', 's']) 605 | print(SeriesDict2) 606 | 607 | 608 | # In[13]: 609 | 610 | 611 | print ("\nUse the get and set methods to access" 612 | "a series values by index label\n") 613 | SeriesDict2 = pd.Series(dict, index=['y', 'm', 'd', 's']) 614 | print (SeriesDict2['y']) # Display the year 615 | SeriesDict2['y']=1999 # change the year vlaue 616 | print (SeriesDict2) # Display all dictionary values 617 | print (SeriesDict2.get('y')) # get specific value by its key 618 | 619 | 620 | # In[14]: 621 | 622 | 623 | print ("\n CREATE SERIES FORM SCALAR VALUE ") 624 | Scl = pd.Series(8., index=['a', 'b', 'c', 'd']) 625 | print (Scl) 626 | 627 | 628 | # In[18]: 629 | 630 | 631 | SerX = pd.Series([1,2,3,4], index=['a', 'b', 'c', 'd']) 632 | print ("Addition"); 633 | print( SerX + SerX) 634 | print ("Addition with non matched labels"); 635 | print (SerX[1:] + SerX[:-1]) 636 | print ("Multiplication"); 637 | print (SerX * SerX) 638 | print ("Expponential"); 639 | print (np.exp(SerX)) 640 | 641 | 642 | # In[17]: 643 | 644 | 645 | std = pd.Series([77,89,65,90], name='StudentsMarks') 646 | print (std.name) 647 | std = std.rename("Marks") 648 | print (std.name) 649 | 650 | 651 | # In[4]: 652 | 653 | 654 | # read data from file and add it to dictionary for processing 655 | handle = open("Egypt.txt") 656 | text = handle.read() 657 | words = text.split() 658 | #print (words) 659 | counts = dict() 660 | for word in words: 661 | counts[word] = counts.get(word,0) + 1 662 | 663 | print (counts) 664 | bigcount = None 665 | bigword = None 666 | for word,count in counts.items(): 667 | if bigcount is None or count > bigcount: 668 | bigword = word 669 | bigcount = count 670 | print ("\n bigword and bigcount") 671 | print (bigword, bigcount) 672 | 673 | 674 | # In[14]: 675 | 676 | 677 | print ((100, 1, 2) > (150, 1, 2)) 678 | print ((0, 1, 120) < (0, 3, 4)) 679 | print (( 'Javed', 'Salwa' ) > ('Omar', 'Sam')) 680 | print (( 'Khalid', 'Ahmed') < ('Ziad', 'Majid')) 681 | 682 | 683 | # In[5]: 684 | 685 | 686 | import pandas as pd 687 | dict1 = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 688 | 'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])} 689 | df = pd.DataFrame(dict1) 690 | df 691 | 692 | 693 | # In[6]: 694 | 695 | 696 | # set index for DataFrame 697 | pd.DataFrame(dict1, index=['d', 'b', 'a']) 698 | 699 | 700 | # In[8]: 701 | 702 | 703 | # Control the labels appearance of the DataFrame 704 | pd.DataFrame(dict1, index=['d', 'b', 'a'], columns=['two', 'three', 'one']) 705 | 706 | 707 | # In[11]: 708 | 709 | 710 | # without index 711 | ndarrdict = {'one' : [1., 2., 3., 4.], 712 | 'two' : [4., 3., 2., 1.]} 713 | pd.DataFrame(ndarrdict) 714 | 715 | 716 | # In[12]: 717 | 718 | 719 | # Assign index 720 | pd.DataFrame(ndarrdict, index=['a', 'b', 'c', 'd']) 721 | 722 | 723 | # In[18]: 724 | 725 | 726 | import pandas as pd 727 | import numpy as np 728 | data = np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')]) 729 | data[:] = [(1,2.,'Hello'), (2,3.,"World")] 730 | pd.DataFrame(data) 731 | 732 | 733 | # In[16]: 734 | 735 | 736 | pd.DataFrame(data, index=['First', 'Second']) 737 | 738 | 739 | # In[17]: 740 | 741 | 742 | pd.DataFrame(data, columns=['C', 'A', 'B']) 743 | 744 | 745 | # In[19]: 746 | 747 | 748 | data2 = [{'A': 1, 'B': 2}, {'A': 5, 'B': 10, 'C': 20}] 749 | pd.DataFrame(data2) 750 | 751 | 752 | # In[20]: 753 | 754 | 755 | pd.DataFrame(data2, index=['First', 'Second']) 756 | 757 | 758 | # In[21]: 759 | 760 | 761 | pd.DataFrame(data2, columns=['A', 'B']) 762 | 763 | 764 | # In[22]: 765 | 766 | 767 | pd.DataFrame({('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2}, 768 | ('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4}, 769 | ('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6}, 770 | ('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8}, 771 | ('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10}}) 772 | 773 | 774 | # In[25]: 775 | 776 | 777 | # DATAFRAME COLUMN SELECTION, ADDITION, DELETION 778 | ndarrdict = {'one' : [1., 2., 3., 4.], 779 | 'two' : [4., 3., 2., 1.]} 780 | df = pd.DataFrame(ndarrdict, index=['a', 'b', 'c', 'd']) 781 | df 782 | 783 | 784 | # In[26]: 785 | 786 | 787 | df['three'] = df['one'] * df['two'] # Add column 788 | df['flag'] = df['one'] > 2 # Add column 789 | df 790 | 791 | 792 | # In[27]: 793 | 794 | 795 | df['Filler'] = 'HCT' 796 | df['Slic'] = df['one'][:2] 797 | df 798 | 799 | 800 | # In[28]: 801 | 802 | 803 | # Delet columns 804 | del df['two'] 805 | Three = df.pop('three') 806 | df 807 | 808 | 809 | # In[29]: 810 | 811 | 812 | df.insert(1, 'bar', df['one']) 813 | df 814 | 815 | 816 | # In[54]: 817 | 818 | 819 | import numpy as np 820 | import pandas as pd 821 | df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) 822 | df = df.assign(C=lambda x: x['A'] + x['B']) 823 | df = df.assign( D=lambda x: x['A'] + x['C']) 824 | df 825 | 826 | 827 | # In[55]: 828 | 829 | 830 | df = df.assign( A=lambda x: x['A'] *2) 831 | df 832 | 833 | 834 | # In[56]: 835 | 836 | 837 | df 838 | 839 | 840 | # In[61]: 841 | 842 | 843 | df['B'] 844 | 845 | 846 | # In[59]: 847 | 848 | 849 | df.iloc[2] 850 | 851 | 852 | # In[62]: 853 | 854 | 855 | df[1:] 856 | 857 | 858 | # In[65]: 859 | 860 | 861 | df[df['C']>7] 862 | 863 | 864 | # In[69]: 865 | 866 | 867 | df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) 868 | df2 = pd.DataFrame({"A": [7, 4, 6], "B": [10, 4, 15]}) 869 | print (df1) 870 | print() 871 | print(df2) 872 | 873 | 874 | # In[70]: 875 | 876 | 877 | df1+df2 878 | 879 | 880 | # In[71]: 881 | 882 | 883 | df1-df2 884 | 885 | 886 | # In[72]: 887 | 888 | 889 | df2 - df1.iloc[2] 890 | 891 | 892 | # In[75]: 893 | 894 | 895 | df2 896 | 897 | 898 | # In[78]: 899 | 900 | 901 | df2*2+1 902 | 903 | 904 | # In[3]: 905 | 906 | 907 | import pandas as pd 908 | import numpy as np 909 | P1 = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], 910 | major_axis=pd.date_range('10/05/2018', periods=5), 911 | minor_axis=['A', 'B', 'C', 'D']) 912 | P1 913 | 914 | 915 | # In[4]: 916 | 917 | 918 | data = {'Item1' : pd.DataFrame(np.random.randn(4, 3)), 919 | 'Item2' : pd.DataFrame(np.random.randn(4, 2))} 920 | P2 = pd.Panel(data) 921 | P2 922 | 923 | 924 | # In[5]: 925 | 926 | 927 | p3 = pd.Panel.from_dict(data, orient='minor') 928 | p3 929 | 930 | 931 | # In[26]: 932 | 933 | 934 | df = pd.DataFrame({'Item': ['TV', 'Mobile', 'Laptop'], 935 | 'Price': np.random.randn(3)**2*1000}) 936 | df 937 | 938 | 939 | # In[29]: 940 | 941 | 942 | data = {'stock1': df, 'stock2': df} 943 | panel = pd.Panel.from_dict(data, orient='minor') 944 | panel['Item'] 945 | 946 | 947 | # In[30]: 948 | 949 | 950 | wp['Price'] 951 | 952 | 953 | # In[33]: 954 | 955 | 956 | import pandas as pd 957 | import numpy as np 958 | P1 = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], 959 | major_axis=pd.date_range('10/05/2018', periods=5), 960 | minor_axis=['A', 'B', 'C', 'D']) 961 | P1['Item1'] 962 | 963 | 964 | # In[34]: 965 | 966 | 967 | P1.major_xs(P1.major_axis[2]) 968 | 969 | 970 | # In[35]: 971 | 972 | 973 | P1.minor_axis 974 | 975 | 976 | # In[36]: 977 | 978 | 979 | P1.minor_xs('C') 980 | 981 | 982 | # In[28]: 983 | 984 | 985 | data = {'Omar': 2.5, 'Ali': 3.5, 'Osama': 3.0} 986 | pd.Series(data) 987 | 988 | 989 | # In[30]: 990 | 991 | 992 | pd.Series(data, index = ['Omar', 'Ali', 'Osama']) 993 | 994 | 995 | # In[31]: 996 | 997 | 998 | data = {'Omar': [90, 50, 89], 999 | 'Ali': [78, 75, 73], 1000 | 'Osama': [67, 85, 80]} 1001 | df1 = pd.DataFrame (data, index= ['Course1', 'Course2', 'Course3']) 1002 | df1 1003 | 1004 | 1005 | # In[32]: 1006 | 1007 | 1008 | df1['Omar'] 1009 | 1010 | 1011 | # In[33]: 1012 | 1013 | 1014 | df1['Mean'] = (df1['Ali'] + df1['Omar'] + df1['Osama'])/3 1015 | df1 1016 | 1017 | -------------------------------------------------------------------------------- /Ch04/Embarak _Ch04_File IO Processing _ Regular Expressions .pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch04/Embarak _Ch04_File IO Processing _ Regular Expressions .pdf -------------------------------------------------------------------------------- /Ch04/Embarak _Ch04_File IO Processing _ Regular Expressions .py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Ch04 File processing and Regular expressions 5 | 6 | # # File processing 7 | 8 | # In[2]: 9 | 10 | 11 | Name = input ("Enter your name: ") 12 | Name 13 | 14 | 15 | # In[3]: 16 | 17 | 18 | Mark = input("Enter your mark: ") 19 | Mark = float(Mark) 20 | 21 | 22 | # In[4]: 23 | 24 | 25 | print ("Welcome to Grading System \nHCT 2018") 26 | print ("\nCampus\t Name\t\tMark\tGrade") 27 | if (Mark>=85): 28 | Grade="B+" 29 | print ("FMC\t", Name,"\t",Mark,"\t", Grade) 30 | 31 | 32 | # ### Files attributes 33 | 34 | # In[41]: 35 | 36 | 37 | # Open a file and find its attributes 38 | Filehndl = open("Egypt.txt", "r") 39 | print ("Name of the file: ", Filehndl.name) 40 | print ("Closed or not : ", Filehndl.closed) 41 | print ("Opening mode : ", Filehndl.mode) 42 | 43 | 44 | # ### Open and close files 45 | 46 | # In[40]: 47 | 48 | 49 | Filehndl = open("Egypt.txt", "r") 50 | print ("Closed or not : ", Filehndl.closed) 51 | Filehndl.close() 52 | print ("Closed or not : ", Filehndl.closed) 53 | 54 | 55 | # In[39]: 56 | 57 | 58 | Filehndl = open("Egypt.txt", "w+") 59 | Filehndl.write( "Python Processing Files\nMay 2018!!\n") 60 | 61 | # Close opend file 62 | Filehndl.close() 63 | 64 | 65 | # ### Rename and delete files 66 | 67 | # In[34]: 68 | 69 | 70 | import os 71 | os.rename( "Egypt.txt", "test2.txt" ) 72 | os.remove( "test2.txt" ) 73 | 74 | 75 | # ## Directories in Python 76 | 77 | # In[ ]: 78 | 79 | 80 | import os 81 | os.mkdir("Data 1") # create a directory 82 | os.mkdir("Data_2") 83 | os.chdir("Data_2") # create a childe directory 84 | os.getcwd() # Get the current working directory 85 | 86 | os.rmdir('Data 1') # remove a directory 87 | os.rmdir('Data_2') # remove a directory 88 | 89 | 90 | # In[44]: 91 | 92 | 93 | import os 94 | os.getcwd() # Get the current working directory 95 | 96 | 97 | # In[43]: 98 | 99 | 100 | os.chdir('/home/nbuser/library') 101 | 102 | 103 | # ## open and process files 104 | 105 | # In[45]: 106 | 107 | 108 | print("\nSearching Through a File\n") 109 | fhand = open('Emails.txt') 110 | for line in fhand: 111 | line = line.rstrip() 112 | if line.startswith('From:') : 113 | print (line) 114 | 115 | 116 | # In[46]: 117 | 118 | 119 | print ("\nUsing in to select lines // only print lines which has specific string ") 120 | fhand = open('Emails.txt') 121 | for line in fhand: 122 | line = line.rstrip() 123 | if not '@uct.ac.za' in line : 124 | continue 125 | print (line) 126 | 127 | 128 | # In[47]: 129 | 130 | 131 | print("\nSearching Through a File\n") 132 | fhand = open('Emails.txt') 133 | for line in fhand: 134 | line = line.rstrip() 135 | if line.startswith('From:') : 136 | line = line.split() 137 | print (line[1]) 138 | 139 | 140 | # ## Regular Expressions 141 | 142 | # In[48]: 143 | 144 | 145 | import re 146 | print ("\nRegular Expressions\n'^X.*:' \n") 147 | hand = open('Data.txt') 148 | for line in hand: 149 | line = line.rstrip() 150 | y = re.findall('^X.*:',line) 151 | print (y) 152 | 153 | 154 | # In[49]: 155 | 156 | 157 | print ("\nRegular Expressions\nWild-Card Characters '^X-\S+:'\n") 158 | hand = open('Data.txt') 159 | for line in hand: 160 | line = line.rstrip() 161 | y = re.findall('^X-\S+:',line) # match any non white space characters 162 | print (y) 163 | 164 | 165 | 166 | # In[50]: 167 | 168 | 169 | print ("\n Matching and Extracting Data \n") 170 | x = 'My 2 favorite numbers are 19 and 42' 171 | y = re.findall('[0-9]+',x) 172 | print (y) 173 | 174 | 175 | # In[51]: 176 | 177 | 178 | y = re.findall('[AEsOUn]+',x) # find any of these characters in string 179 | print (y) 180 | 181 | 182 | # In[52]: 183 | 184 | 185 | print ("\nGreedy Matching \n") 186 | x = 'From: Using the : character' 187 | y = re.findall('^F.+:', x) 188 | print (y) 189 | 190 | 191 | # In[53]: 192 | 193 | 194 | print ("\nNon-Greedy Matching \n") 195 | x = 'From: Using the : character' 196 | y = re.findall('^F.+?:', x) 197 | print (y) 198 | 199 | 200 | # In[54]: 201 | 202 | 203 | import re 204 | print ("\nFine-Tuning String Extraction \n") 205 | mystr="From ossama.embarak@hct.ac.ae Sat Jun 5 08:14:16 2018" 206 | Extract = re.findall('\S+@\S+',mystr) 207 | print (Extract) 208 | E_xtracted = re.findall('^From.*? (\S+@\S+)',mystr) # non greedy white space 209 | print (E_xtracted) 210 | print (E_xtracted[0]) 211 | 212 | 213 | # In[57]: 214 | 215 | 216 | mystr="From ossama.embarak@hct.ac.ae Sat Jun 5 08:14:16 2018" 217 | atpos = mystr.find('@') 218 | sppos = mystr.find(' ',atpos) # find white space starting from atpos 219 | host = mystr[atpos+1 : sppos] 220 | print (host) 221 | usernamepos =mystr.find(' ') 222 | username = mystr[usernamepos+1 : atpos] 223 | print (username) 224 | 225 | 226 | # In[58]: 227 | 228 | 229 | print ("\n The Regex Version\n") 230 | import re 231 | mystr="From ossama.embarak@hct.ac.ae Sat Jun 5 08:14:16 2018" 232 | Extract = re.findall('@([^ ]*)',mystr) 233 | print (Extract) 234 | Extract = re.findall('^From .*@([^ ]*)',mystr) 235 | print (Extract) 236 | 237 | 238 | # In[59]: 239 | 240 | 241 | print ("\nScape character \n") 242 | mystr = 'We just received $10.00 for cookies and $20.23 for juice' 243 | Extract = re.findall('\$[0-9.]+',mystr) 244 | print (Extract) 245 | 246 | 247 | # ## Exercises 248 | 249 | # In[60]: 250 | 251 | 252 | import re 253 | CoursesData = """101 COM Computers 254 | 205 MAT Mathematics 255 | 189 ENG English""" 256 | 257 | 258 | # In[61]: 259 | 260 | 261 | # Extract all course numbers 262 | Course_numbers = re.findall('[0-9]+', CoursesData) 263 | print (Course_numbers) 264 | 265 | # Extract all course codes 266 | Course_codes = re.findall('[A-Z]{3}', CoursesData) 267 | print (Course_codes) 268 | 269 | # Extract all course names 270 | Course_names = re.findall('[A-Za-z]{4,}', CoursesData) 271 | print (Course_names) 272 | 273 | 274 | # In[62]: 275 | 276 | 277 | # compile the regex and search the pattern 278 | regex_num = re.compile('\d+') 279 | s = regex_num.search(CoursesData) 280 | 281 | print('Starting Position: ', s.start()) 282 | print('Ending Position: ', s.end()) 283 | print(CoursesData[s.start():s.end()]) 284 | 285 | 286 | # In[63]: 287 | 288 | 289 | # define the course text pattern groups and extract 290 | course_pattern = '([0-9]+)\s*([A-Z]{3})\s*([A-Za-z]{4,})' 291 | re.findall(course_pattern, CoursesData) 292 | 293 | 294 | # In[64]: 295 | 296 | 297 | print(re.findall('[a-zA-Z]+', CoursesData)) # [] Matches any character inside 298 | 299 | 300 | # In[65]: 301 | 302 | 303 | print(re.findall('[0-9]+', CoursesData)) # [] Matches any character inside 304 | 305 | 306 | # In[66]: 307 | 308 | 309 | import re 310 | CoursesData = """10 COM Computers 311 | 205 MAT Mathematics 312 | 1899 ENG English""" 313 | print(re.findall('\d{4}', CoursesData)) # {n} Matches repeat n times. 314 | print(re.findall('\d{2,4}', CoursesData)) 315 | 316 | -------------------------------------------------------------------------------- /Ch04/Wild-Card.txt: -------------------------------------------------------------------------------- 1 | X-Sieve: CMU Sieve 2.3 2 | X-DSPAM-Result: Innocent 3 | X-DSPAM-Confidence: 0.8475 4 | X- Content-Type-Message-Body: text/plain 5 | X-Plane is behind schedule: two weeks -------------------------------------------------------------------------------- /Ch05/1. Export1_Columns.csv: -------------------------------------------------------------------------------- 1 | Country Name,Country Code,2004,2005,2006,2007 2 | Benin,BEN,811,940,869,1076 3 | Burkina Faso,BFA,548,532,673,714 4 | Bangladesh,BGD,7257,9995,11745,13530 5 | Bulgaria,BGR,10713,12703,16151,23263 6 | Bahrain,BHR,10337,13397,15662,17314 7 | "Bahamas, The",BHS,3161,3482,3558,3888 8 | Bosnia and Herzegovina,BIH,3232,3550,4505,4078 9 | Belarus,BLR,15710,18065,22200,27592 10 | Belize,BLZ,535,609,744,788 11 | Bermuda,BMU,0,0,0,0 12 | Bolivia,BOL,2732,3395,4784,5484 13 | Brazil,BRA,110744,135919,159214,186203 14 | Barbados,BRB,1436,1712,1939,2044 15 | Brunei Darussalam,BRN,5416,6688,8227,8310 16 | Bhutan,BTN,220,313,489,657 17 | Botswana,BWA,4444,5256,5292,5964 18 | Central African Republic,CAF,178,179,209,240 19 | Canada,CAN,381529,430267,464728,500338 20 | Central Europe and the Baltics,CEB,354718,422597,512548,648165 21 | Switzerland,CHE,202880,219836,243432,294204 22 | Channel Islands,CHI,0,0,0,0 23 | Chile,CHL,38094,47749,65633,75856 24 | China,CHN,593264,764531,973211,1230720 25 | Cote d'Ivoire,CIV,7682,8525,9322,9607 26 | Cameroon,CMR,3061,3393,4130,4889 27 | "Congo, Rep.",COG,3744,5123,6507,6592 28 | Colombia,COL,19634,24696,28677,34305 29 | Comoros,COM,57,57,60,71 30 | Cabo Verde,CPV,296,367,500,477 31 | Costa Rica,CRI,8602,9683,11067,12822 32 | Caribbean small states,CSS,19017,23308,29288,29312 33 | Cuba,CUB,6121,8963,9870,11918 34 | Curacao,CUW,0,0,0,0 35 | Cayman Islands,CYM,0,0,0,0 36 | Cyprus,CYP,9854,10266,10725,12765 37 | Czech Republic,CZE,68328,84742,101341,125664 38 | Germany,DEU,999334,1079830,1236570,1479310 39 | Djibouti,DJI,246,288,307,484 40 | Dominica,DMA,130,129,144,148 41 | Denmark,DNK,110049,125389,142993,164020 42 | Dominican Republic,DOM,9381,10040,10731,11722 43 | Algeria,DZA,34175,48715,57122,63531 44 | East Asia & Pacific (developing only),EAP,1020490,1256000,1545380,1890140 45 | East Asia & Pacific (all income levels),EAS,2853990,3288690,3815750,4442960 46 | Europe & Central Asia (developing only),ECA,241546,285610,345567,436385 47 | Europe & Central Asia (all income levels),ECS,5377840,5933000,6756610,7964430 48 | Ecuador,ECU,8985,11463,14196,16288 49 | "Egypt, Arab Rep.",EGY,22258,27214,32191,39470 50 | Euro area,EMU,3545370,3813120,4271090,5075590 51 | Eritrea,ERI,64,68,84,76 52 | Spain,ESP,269302,285474,314519,380305 53 | Estonia,EST,7419,9231,10770,14053 54 | Ethiopia,ETH,1494,1858,2105,2489 55 | European Union,EUU,4653420,5063310,5728280,6732610 56 | Fragile and conflict affected situations,FCS,97395,121788,147124,178795 57 | Finland,FIN,75931,82297,93488,112361 58 | Fiji,FJI,1468,1598,1548,1670 59 | France,FRA,550336,581084,631842,722460 60 | Faeroe Islands,FRO,705,736,825,960 61 | "Micronesia, Fed. Sts.",FSM,0,0,0,0 62 | Gabon,GAB,4269,5742,6255,7280 63 | United Kingdom,GBR,559810,621255,718296,759840 64 | Georgia,GEO,1617,2164,2546,3175 65 | Ghana,GHA,3491,3912,5142,6072 66 | Guinea,GIN,903,1022,1190,1189 67 | "Gambia, The",GMB,118,129,97,118 68 | Guinea-Bissau,GNB,98,117,101,140 69 | Equatorial Guinea,GNQ,4879,7183,8606,10565 70 | Greece,GRC,49803,52821,57897,71752 71 | Grenada,GRD,198,149,162,210 72 | Greenland,GRL,0,0,0,0 73 | Guatemala,GTM,6465,6818,7537,8721 74 | Guam,GUM,0,0,0,0 75 | Guyana,GUY,753,698,0,0 76 | High income,HIC,9114030,10169100,11523000,13268000 77 | "Hong Kong SAR, China",HKG,315629,353428,390556,431069 78 | Honduras,HND,5125,5707,6077,6568 79 | Heavily indebted poor countries (HIPC),HPC,58552,68096,82231,98241 80 | Croatia,HRV,16403,17846,20005,23432 81 | Haiti,HTI,543,605,689,779 82 | Hungary,HUN,61919,70662,85253,108939 83 | Indonesia,IDN,82744,97388,113143,127226 84 | Isle of Man,IMN,0,0,0,0 85 | India,IND,126648,160838,199974,253077 86 | Not classified,INX,0,0,0,0 87 | Ireland,IRL,155263,164817,176526,208616 88 | "Iran, Islamic Rep.",IRN,47413,63474,72326,92050 89 | Iraq,IRQ,20611,27149,33242,40777 90 | Iceland,ISL,4479,5112,5314,7157 91 | Israel,ISR,52835,57878,62478,72029 92 | Italy,ITA,432727,456712,509513,604236 93 | Jamaica,JAM,3820,3966,4788,5085 94 | Jordan,JOR,5962,6635,8112,9280 95 | Japan,JPN,615047,654356,704556,773111 96 | Kazakhstan,KAZ,22655,30387,41292,51704 97 | Kenya,KEN,4283,5342,5936,7005 98 | Kyrgyz Republic,KGZ,941,942,1182,2012 99 | Cambodia,KHM,3395,4033,4990,5644 100 | Kiribati,KIR,12,15,11,21 101 | St. Kitts and Nevis,KNA,194,227,236,233 102 | "Korea, Rep.",KOR,292911,330601,376047,439918 103 | Kosovo,KSV,0,0,530,748 104 | Kuwait,KWT,33831,51692,66566,72695 105 | Latin America & Caribbean (developing only),LAC,421379,496874,579544,655234 106 | Lao PDR,LAO,723,934,1395,1457 107 | Lebanon,LBN,7591,8050,7995,9395 108 | Liberia,LBR,124,129,186,239 109 | Libya,LBY,21117,30160,40430,49084 110 | St. Lucia,LCA,464,525,440,457 111 | Latin America & Caribbean (all income levels),LCN,635611,754182,882680,987691 112 | Least developed countries: UN classification,LDC,71671,96350,120298,154304 113 | Low income,LIC,30325,34116,39013,48263 114 | Liechtenstein,LIE,0,0,0,0 115 | Sri Lanka,LKA,7300,7892,8520,9419 116 | Lower middle income,LMC,506014,609332,747227,876659 117 | Low & middle income,LMY,2218060,2714100,3299440,3972260 118 | Lesotho,LSO,696,669,765,832 119 | Lithuania,LTU,10735,14087,16828,20035 120 | Luxembourg,LUX,52634,60030,73949,92081 121 | Latvia,LVA,5935,7375,8655,11928 122 | "Macao SAR, China",MAC,10988,11154,13121,16879 123 | St. Martin (French part),MAF,0,0,0,0 124 | Morocco,MAR,16726,19234,22450,26891 125 | Monaco,MCO,0,0,0,0 126 | Moldova,MDA,1317,1528,1542,2088 127 | Madagascar,MDG,1424,1422,1640,2227 128 | Maldives,MDV,689,485,777,1804 129 | Middle East & North Africa (all income levels),MEA,565362,748777,899078,1053270 130 | Mexico,MEX,202070,230169,266428,289465 131 | Marshall Islands,MHL,0,0,0,0 132 | Middle income,MIC,2188160,2680310,3260680,3924370 133 | "Macedonia, FYR",MKD,1745,2178,2593,3678 134 | Mali,MLI,1237,1359,1884,1871 135 | Malta,MLT,4425,4602,5505,6661 136 | Myanmar,MMR,0,0,0,0 137 | Middle East & North Africa (developing only),MNA,204958,264651,311776,377041 138 | Montenegro,MNE,871,983,1331,1629 139 | Mongolia,MNG,1211,1483,2029,2525 140 | Northern Mariana Islands,MNP,0,0,0,0 141 | Mozambique,MOZ,1759,2087,2722,2888 142 | Mauritania,MRT,470,671,1420,1506 143 | Mauritius,MUS,3450,3761,4068,4509 144 | Malawi,MWI,655,662,705,1033 145 | Malaysia,MYS,143928,162049,182517,205489 146 | North America,NAC,1563100,1739230,1941120,2165040 147 | Namibia,NAM,2630,2937,3628,4413 148 | New Caledonia,NCL,0,0,0,0 149 | Niger,NER,530,565,599,748 150 | Nigeria,NGA,26495,35534,62697,56142 151 | Nicaragua,NIC,1336,1541,1835,2164 152 | Netherlands,NLD,409334,446527,498358,591200 153 | High income: nonOECD,NOC,1530190,1857670,2179260,2471570 154 | Norway,NOR,108690,134064,154283,173600 155 | Nepal,NPL,1213,1186,1216,1327 156 | New Zealand,NZL,30697,32449,33025,40159 157 | High income: OECD,OEC,7581110,8310100,9343040,10795300 158 | OECD members,OED,7875540,8645830,9729830,11229200 159 | Oman,OMN,12723,18114,20905,23771 160 | Other small states,OSS,25694,31313,35452,42265 161 | Pakistan,PAK,15350,17180,19401,20137 162 | Panama,PAN,9587,11674,13147,16072 163 | Peru,PER,14951,19913,26334,31161 164 | Philippines,PHL,44381,47554,56923,64614 165 | Palau,PLW,70,83,84,91 166 | Papua New Guinea,PNG,2834,0,0,0 167 | Poland,POL,87825,106314,131127,166464 168 | Puerto Rico,PRI,66393,68553,72625,72906 169 | "Korea, Dem. Rep.",PRK,0,0,0,0 170 | Portugal,PRT,51562,52748,62397,74466 171 | Paraguay,PRY,4372,5084,6252,7818 172 | Pacific island small states,PSS,2089,2295,2278,2526 173 | French Polynesia,PYF,0,0,0,0 174 | Qatar,QAT,20363,28983,38245,48048 175 | Romania,ROU,27162,32810,39605,49992 176 | Russian Federation,RUS,203415,268957,333908,392044 177 | Rwanda,RWA,232,296,382,600 178 | South Asia,SAS,160474,199608,243990,301707 179 | Saudi Arabia,SAU,131921,187389,225507,249318 180 | Sudan,SDN,3810,5087,6833,10046 181 | Senegal,SEN,2180,2352,2398,2872 182 | Singapore,SGP,247027,288066,340079,386495 183 | Solomon Islands,SLB,116,141,165,215 184 | Sierra Leone,SLE,239,290,318,337 185 | El Salvador,SLV,4259,4383,4764,5204 186 | San Marino,SMR,0,0,0,0 187 | Somalia,SOM,0,0,0,0 188 | Serbia,SRB,6021,7125,9264,11428 189 | Sub-Saharan Africa (developing only),SSA,168818,210630,272350,310879 190 | South Sudan,SSD,0,0,0,0 191 | Sub-Saharan Africa (all income levels),SSF,174317,218533,281811,322381 192 | Small states,SST,46759,56877,66892,74131 193 | Sao Tome and Principe,STP,15,16,16,14 194 | Suriname,SUR,387,546,0,0 195 | Slovak Republic,SVK,39331,45171,57202,71871 196 | Slovenia,SVN,18947,21656,25611,32524 197 | Sweden,SWE,165839,178466,202413,235411 198 | Swaziland,SWZ,2056,2250,2259,2311 199 | Sint Maarten (Dutch part),SXM,0,0,0,0 200 | Seychelles,SYC,621,720,858,937 201 | Syrian Arab Republic,SYR,10048,11656,13168,15614 202 | Turks and Caicos Islands,TCA,0,0,0,0 203 | Chad,TCD,2252,3189,3532,3845 204 | Togo,TGO,747,847,841,957 205 | Thailand,THA,114062,129738,152515,181342 206 | Tajikistan,TJK,1211,601,656,767 207 | Turkmenistan,TKM,4216,5270,7512,9548 208 | Timor-Leste,TLS,39,40,45,60 209 | Tonga,TON,47,47,42,37 210 | Trinidad and Tobago,TTO,7220,10520,14954,14101 211 | Tunisia,TUN,13166,14501,15823,19877 212 | Turkey,TUR,92361,105558,120355,144466 213 | Tuvalu,TUV,0,0,0,0 214 | "Taiwan, China",TWN,0,0,0,0 215 | Tanzania,TZA,2520,2864,3183,4068 216 | Uganda,UGA,1008,1278,1519,2056 217 | Ukraine,UKR,39716,44344,50239,64001 218 | Upper middle income,UMC,1682610,2071850,2514560,3048460 219 | Uruguay,URY,4395,5279,5932,6810 220 | United States,USA,1181510,1308900,1476320,1664620 221 | Uzbekistan,UZB,4837,5416,6326,8851 222 | St. Vincent and the Grenadines,VCT,185,201,212,212 223 | "Venezuela, RB",VEN,40706,57709,67003,71714 224 | Virgin Islands (U.S.),VIR,0,0,0,0 225 | Vietnam,VNM,27135,36712,44945,54591 226 | Vanuatu,VUT,166,179,181,215 227 | West Bank and Gaza,PSE,597,724,737,1067 228 | World,WLD,11332200,12883600,14822400,17239700 229 | Samoa,WSM,119,132,146,167 230 | "Yemen, Rep.",YEM,5048,6852,7873,0 231 | South Africa,ZAF,58216,68172,79519,93339 232 | "Congo, Dem. Rep.",COD,2341,2442,2765,6540 233 | Zambia,ZMB,2087,2550,4158,4722 234 | Zimbabwe,ZWE,2001,1931,1957,2000 235 | -------------------------------------------------------------------------------- /Ch05/1. Export1_Columns.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch05/1. Export1_Columns.xlsx -------------------------------------------------------------------------------- /Ch05/1. Export2_Columns.csv: -------------------------------------------------------------------------------- 1 | Country Name,Country Code,2008,2009,2010,2011,2012,2013,2014 2 | Benin,BEN,1312,1039,991,1040,1154,1518,1656 3 | Burkina Faso,BFA,834,1063,1727,2681,2849,3166,3551 4 | Bangladesh,BGD,16181,17360,18472,25627,26887,29305,34344 5 | Bulgaria,BGR,28591,21964,26836,35488,33975,37260,37845 6 | Bahrain,BHR,21231,15705,17880,22945,22853,0,0 7 | "Bahamas, The",BHS,3797,3117,3223,3443,3733,3715,3739 8 | Bosnia and Herzegovina,BIH,4911,4265,4901,5703,5219,5706,0 9 | Belarus,BLR,37021,24865,29402,48462,51745,44058,43555 10 | Belize,BLZ,854,690,813,894,983,988,0 11 | Bermuda,BMU,0,2699,2705,2644,2589,2657,0 12 | Bolivia,BOL,7488,6194,8093,10566,12775,13518,0 13 | Brazil,BRA,229517,180892,237189,299972,288558,287520,270214 14 | Barbados,BRB,2090,1905,2055,1699,1702,1592,0 15 | Brunei Darussalam,BRN,11270,7812,10074,13297,13795,12270,0 16 | Bhutan,BTN,586,565,673,750,706,728,711 17 | Botswana,BWA,4999,3573,4926,6964,6278,8131,7876 18 | Central African Republic,CAF,218,212,234,295,271,219,181 19 | Canada,CAN,531714,389895,469047,546709,553579,555263,563927 20 | Central Europe and the Baltics,CEB,769904,607197,695190,833145,809264,869375,0 21 | Switzerland,CHE,347368,309906,373423,458002,446652,494530,0 22 | Channel Islands,CHI,0,0,0,0,0,0,0 23 | Chile,CHL,74557,63918,82795,95443,90993,89662,87167 24 | China,CHN,1444800,1200770,1580770,1907310,2049320,2213300,2342540 25 | Cote d'Ivoire,CIV,11414,12308,12600,13660,13108,13693,14864 26 | Cameroon,CMR,5628,3750,4098,4889,4975,6108,7016 27 | "Congo, Rep.",COG,8912,6756,10221,12591,11459,10780,11321 28 | Colombia,COL,43403,37484,45739,62844,67495,66949,60583 29 | Comoros,COM,73,75,82,93,94,105,113 30 | Cabo Verde,CPV,574,462,481,587,0,0,0 31 | Costa Rica,CRI,13557,12423,13855,15345,16849,17459,17372 32 | Caribbean small states,CSS,36306,22964,24721,28811,27252,29601,0 33 | Cuba,CUB,12506,10839,14519,17319,18659,18593,0 34 | Curacao,CUW,0,0,0,0,0,0,0 35 | Cayman Islands,CYM,0,0,0,0,0,0,0 36 | Cyprus,CYP,13795,12116,12046,13419,12403,12228,12875 37 | Czech Republic,CZE,149036,120995,136996,162791,158218,161291,171867 38 | Germany,DEU,1630570,1291720,1443240,1681100,1622620,1699680,1757960 39 | Djibouti,DJI,0,0,0,0,0,0,0 40 | Dominica,DMA,157,148,174,191,160,175,184 41 | Denmark,DNK,189863,149383,159010,180803,173929,182292,183561 42 | Dominican Republic,DOM,11400,10117,11630,13752,14758,15568,16465 43 | Algeria,DZA,82035,48534,61975,77581,77123,69659,63810 44 | East Asia & Pacific (developing only),EAP,2199840,1842590,2408330,2893220,3055750,3238590,3389990 45 | East Asia & Pacific (all income levels),EAS,5037450,4199380,5380790,6326360,6567820,6733200,6940060 46 | Europe & Central Asia (developing only),ECA,561284,427165,502542,637040,661386,673275,672209 47 | Europe & Central Asia (all income levels),ECS,8988100,7097250,7943420,9357270,9158240,9552280,9756010 48 | Ecuador,ECU,21100,15786,19402,24672,26315,27570,28518 49 | "Egypt, Arab Rep.",EGY,53800,47164,46731,48540,45809,49111,43520 50 | Euro area,EMU,5613920,4491910,4908200,5690140,5503870,5785780,5930130 51 | Eritrea,ERI,61,84,101,375,0,0,0 52 | Spain,ESP,413929,339897,365360,430324,411004,439579,449768 53 | Estonia,EST,16161,11950,14634,20050,20031,21419,21949 54 | Ethiopia,ETH,3085,3405,4071,5332,5963,5934,6416 55 | European Union,EUU,7428790,5931830,6528220,7578880,7338580,7693330,7890080 56 | Fragile and conflict affected situations,FCS,240623,171820,216666,224537,282436,268347,0 57 | Finland,FIN,127905,91221,95849,107163,101351,103094,100887 58 | Fiji,FJI,2013,1421,1816,2227,2412,2266,2460 59 | France,FRA,800627,648448,689363,795876,764789,800948,811712 60 | Faeroe Islands,FRO,1104,934,1026,1213,1149,1323,0 61 | "Micronesia, Fed. Sts.",FSM,0,0,0,0,0,0,0 62 | Gabon,GAB,9518,6199,8329,10546,10957,9728,8786 63 | United Kingdom,GBR,773713,623555,690756,800276,790992,806509,834035 64 | Georgia,GEO,3662,3202,4068,5231,6046,7213,7090 65 | Ghana,GHA,7140,7609,9484,14596,16812,16206,15022 66 | Guinea,GIN,1578,1223,1343,1533,1684,1773,1831 67 | "Gambia, The",GMB,156,206,259,303,328,329,0 68 | Guinea-Bissau,GNB,172,155,169,296,143,165,168 69 | Equatorial Guinea,GNQ,14832,8550,10298,14295,15310,13782,12585 70 | Greece,GRC,82859,62722,66204,73506,70460,73221,78310 71 | Grenada,GRD,208,187,184,196,207,214,234 72 | Greenland,GRL,0,0,0,0,0,0,0 73 | Guatemala,GTM,9674,9047,10668,12688,12531,12773,13537 74 | Guam,GUM,0,0,0,0,0,0,0 75 | Guyana,GUY,0,0,0,0,0,0,0 76 | High income,HIC,14929800,11953700,13925100,16342400,16401700,16851900,17090000 77 | "Hong Kong SAR, China",HKG,457803,409319,501661,560284,592390,628504,638875 78 | Honduras,HND,7078,5766,7248,9078,9432,8867,9086 79 | Heavily indebted poor countries (HIPC),HPC,119684,102927,129749,155392,159097,165390,172395 80 | Croatia,HRV,27108,21624,22516,25147,23483,24851,26170 81 | Haiti,HTI,834,929,802,1010,1047,1213,1303 82 | Hungary,HUN,125194,97100,107066,122056,110865,118429,0 83 | Indonesia,IDN,152090,130358,183481,235095,225744,218307,210801 84 | Isle of Man,IMN,0,0,0,0,0,0,0 85 | India,IND,288902,273752,375353,445636,447414,468478,487653 86 | Not classified,INX,0,0,0,0,0,0,0 87 | Ireland,IRL,219979,203338,209021,232257,234495,244379,0 88 | "Iran, Islamic Rep.",IRN,0,0,0,0,0,0,0 89 | Iraq,IRQ,66239,43995,54599,82505,96986,94800,0 90 | Iceland,ISL,7253,6372,7099,8278,8074,8561,9129 91 | Israel,ISR,82626,69163,81621,91635,93223,95664,96718 92 | Italy,ITA,644649,491150,535296,614705,592542,615764,629732 93 | Jamaica,JAM,5750,4179,4143,4387,4482,4277,0 94 | Jordan,JOR,12416,10928,12745,13744,14307,14270,15507 95 | Japan,JPN,858847,639245,833705,893378,874354,794578,0 96 | Kazakhstan,KAZ,76257,48243,65502,89503,91746,88692,0 97 | Kenya,KEN,8139,7416,8263,9073,9990,9828,9993 98 | Kyrgyz Republic,KGZ,2752,2565,2472,3380,2933,3099,2732 99 | Cambodia,KHM,6785,5120,6080,6938,8825,10016,11425 100 | Kiribati,KIR,19,18,16,21,20,18,18 101 | St. Kitts and Nevis,KNA,235,177,212,248,264,309,328 102 | "Korea, Rep.",KOR,500723,428868,540896,670343,688933,703396,714235 103 | Kosovo,KSV,892,965,1163,1311,1185,1231,1448 104 | Kuwait,KWT,98390,62981,76952,112784,130069,125823,0 105 | Latin America & Caribbean (developing only),LAC,745902,606782,770915,942982,970434,983844,978871 106 | Lao PDR,LAO,1743,1801,2552,3080,3635,4165,4742 107 | Lebanon,LBN,11432,11988,13782,14519,24406,25223,26295 108 | Liberia,LBR,292,176,248,423,561,658,522 109 | Libya,LBY,63183,37335,49055,19025,61096,46140,13775 110 | St. Lucia,LCA,536,544,609,573,604,585,616 111 | Latin America & Caribbean (all income levels),LCN,1132140,911877,1170890,1368240,1387920,1393640,1373460 112 | Least developed countries: UN classification,LDC,200221,151845,186796,233531,230799,238511,262146 113 | Low income,LIC,57765,49488,60406,71752,68204,73576,83146 114 | Liechtenstein,LIE,0,0,0,0,0,0,0 115 | Sri Lanka,LKA,10114,8972,11091,13644,13561,15102,16735 116 | Lower middle income,LMC,1053780,906363,1198050,1456330,1486690,1478910,1509660 117 | Low & middle income,LMY,4726390,3856620,4952850,5984130,6271960,6441410,6595450 118 | Lesotho,LSO,914,783,971,1221,1047,897,0 119 | Lithuania,LTU,27342,19446,24262,32641,34991,39012,39386 120 | Luxembourg,LUX,105101,84328,94204,109162,108854,122259,0 121 | Latvia,LVA,14077,11103,12646,16378,17376,18359,18521 122 | "Macao SAR, China",MAC,20117,20063,30047,40970,46869,55205,55018 123 | St. Martin (French part),MAF,0,0,0,0,0,0,0 124 | Morocco,MAR,33310,26094,30169,35295,34441,34964,36975 125 | Monaco,MCO,0,0,0,0,0,0,0 126 | Moldova,MDA,2472,2006,2280,3155,3168,3461,3345 127 | Madagascar,MDG,2498,1913,2180,2646,2878,3191,0 128 | Maldives,MDV,1970,1712,2007,2420,2835,3193,3193 129 | Middle East & North Africa (all income levels),MEA,1391620,990522,1206430,1588050,1789860,1786960,1679060 130 | Mexico,MEX,307236,244146,313989,366164,387301,400639,419849 131 | Marshall Islands,MHL,0,0,0,0,0,0,0 132 | Middle income,MIC,4669090,3807150,4892950,5913180,6205610,6369360,6513050 133 | "Macedonia, FYR",MKD,4283,3084,3743,4946,4422,4715,5420 134 | Mali,MLI,2551,2128,2449,2802,3233,0,0 135 | Malta,MLT,7509,6380,7199,8708,0,0,0 136 | Myanmar,MMR,0,0,0,0,0,0,0 137 | Middle East & North Africa (developing only),MNA,507006,359184,425431,458794,549908,520883,0 138 | Montenegro,MNE,1784,1330,1427,1922,1785,1846,1817 139 | Mongolia,MNG,3037,2305,3356,5471,5356,5021,6428 140 | Northern Mariana Islands,MNP,0,0,0,0,0,0,0 141 | Mozambique,MOZ,3194,3114,3040,3899,4195,4304,4458 142 | Mauritania,MRT,1852,1499,2201,2985,2802,2820,2419 143 | Mauritius,MUS,5103,4326,5101,6011,6246,6475,6771 144 | Malawi,MWI,1206,1240,1586,1663,1580,1847,1950 145 | Malaysia,MYS,229657,184897,230988,264778,259985,255787,260387 146 | North America,NAC,2373750,1977710,2321000,2652210,2746680,2816410,0 147 | Namibia,NAM,4613,4646,5388,5650,5668,5685,5319 148 | New Caledonia,NCL,0,0,0,0,0,0,0 149 | Niger,NER,958,1097,1269,1340,1517,1736,1454 150 | Nigeria,NGA,82983,52147,93240,128999,144918,92907,91530 151 | Nicaragua,NIC,2660,2589,3361,4168,4724,4605,4997 152 | Netherlands,NLD,670035,548366,601852,691336,675271,707933,722933 153 | High income: nonOECD,NOC,2974090,2274000,2884430,3551030,3732430,3811260,3793210 154 | Norway,NOR,212250,151524,170638,205923,207022,202677,190043 155 | Nepal,NPL,1603,1597,1533,1684,1899,2060,2385 156 | New Zealand,NZL,42707,34875,44357,51152,50849,55088,0 157 | High income: OECD,OEC,11956500,9679110,11042400,12795100,12675200,13045000,13303300 158 | OECD members,OED,12438400,10066500,11511400,13347000,13269900,13656700,13952900 159 | Oman,OMN,35618,24502,33503,46655,48549,0,0 160 | Other small states,OSS,48713,35863,43700,55051,56047,55819,53069 161 | Pakistan,PAK,21060,20844,23979,29854,27849,30708,30481 162 | Panama,PAN,19596,19582,20337,26380,30284,0,0 163 | Peru,PER,34518,30523,39447,50581,52279,48305,45168 164 | Philippines,PHL,64299,54258,69464,71795,77025,75934,82863 165 | Palau,PLW,100,93,96,115,118,139,158 166 | Papua New Guinea,PNG,0,0,0,0,0,0,0 167 | Poland,POL,203158,164011,192916,226188,223544,242468,0 168 | Puerto Rico,PRI,76613,74213,74310,77273,73909,77915,0 169 | "Korea, Dem. Rep.",PRK,0,0,0,0,0,0,0 170 | Portugal,PRT,81551,66008,71193,83973,81594,89037,91621 171 | Paraguay,PRY,9994,8212,11046,13186,12278,14268,14001 172 | Pacific island small states,PSS,3039,2395,2962,3717,4052,3913,4219 173 | French Polynesia,PYF,0,0,0,0,0,0,0 174 | Qatar,QAT,70732,50309,75065,121692,143640,0,0 175 | Romania,ROU,62182,50292,58372,73107,67751,79614,81865 176 | Russian Federation,RUS,520004,341584,445513,576568,597056,594797,0 177 | Rwanda,RWA,611,632,689,925,1021,1175,1179 178 | South Asia,SAS,342249,326638,434707,520711,522284,550850,576855 179 | Saudi Arabia,SAU,322854,202056,261831,376224,399420,387644,354541 180 | Sudan,SDN,13139,8487,12958,11828,6281,6371,6695 181 | Senegal,SEN,3498,3117,3216,3634,3412,4148,4239 182 | Singapore,SGP,442649,369190,471089,554241,566663,578961,577704 183 | Solomon Islands,SLB,269,235,336,542,635,598,0 184 | Sierra Leone,SLE,339,331,433,479,1242,2196,2022 185 | El Salvador,SLV,5761,4793,5553,6474,6094,6403,0 186 | San Marino,SMR,0,0,0,0,0,0,0 187 | Somalia,SOM,0,0,0,0,0,0,0 188 | Serbia,SRB,14343,11441,12995,15788,15045,18754,19448 189 | Sub-Saharan Africa (developing only),SSA,386870,291864,396406,496099,497409,446234,456068 190 | South Sudan,SSD,10267,7377,9662,11779,1049,2147,5505 191 | Sub-Saharan Africa (all income levels),SSF,402684,301388,407732,511532,513868,461304,469922 192 | Small states,SST,88025,61288,71544,87839,87651,89592,0 193 | Sao Tome and Principe,STP,18,20,24,29,34,34,40 194 | Suriname,SUR,0,0,0,0,0,0,0 195 | Slovak Republic,SVK,80305,59950,68087,83209,85143,90827,91738 196 | Slovenia,SVN,36750,28761,30858,36089,33886,35844,37954 197 | Sweden,SWE,256022,190992,225559,262878,251943,254850,254268 198 | Swaziland,SWZ,1793,1860,2063,2205,2168,2128,0 199 | Sint Maarten (Dutch part),SXM,0,0,0,0,0,0,0 200 | Seychelles,SYC,981,915,910,1021,1052,1206,1182 201 | Syrian Arab Republic,SYR,0,0,0,0,0,0,0 202 | Turks and Caicos Islands,TCA,0,0,0,0,0,0,0 203 | Chad,TCD,4420,3252,3927,4726,4758,4347,4756 204 | Togo,TGO,1123,1162,1274,1481,1686,1923,2277 205 | Thailand,THA,208371,180251,227336,265972,274400,284890,280535 206 | Tajikistan,TJK,865,754,866,1164,1644,1631,0 207 | Turkmenistan,TKM,12345,15079,17234,21836,25761,0,0 208 | Timor-Leste,TLS,70,75,91,108,154,0,0 209 | Tonga,TON,47,46,49,79,83,93,78 210 | Trinidad and Tobago,TTO,19906,10037,11282,14922,12924,15430,0 211 | Tunisia,TUN,25197,19917,22236,22603,22250,22083,0 212 | Turkey,TUR,174608,143292,155074,185760,207440,211045,221605 213 | Tuvalu,TUV,0,0,0,0,0,0,0 214 | "Taiwan, China",TWN,0,0,0,0,0,0,0 215 | Tanzania,TZA,5108,4964,5831,6966,8261,7825,9582 216 | Uganda,UGA,3457,3367,3283,3441,4723,4999,5220 217 | Ukraine,UKR,84458,54364,69228,81280,83884,78743,64788 218 | Upper middle income,UMC,3616360,2902330,3697350,4459940,4721540,4893010,5006080 219 | Uruguay,URY,9172,8580,10612,12673,13288,13507,13433 220 | United States,USA,1841940,1587740,1852330,2106370,2194150,2262220,0 221 | Uzbekistan,UZB,12158,11679,12453,14994,14165,16835,18377 222 | St. Vincent and the Grenadines,VCT,210,192,183,183,191,186,188 223 | "Venezuela, RB",VEN,97273,59531,112353,94764,99786,91961,0 224 | Virgin Islands (U.S.),VIR,0,0,0,0,0,0,0 225 | Vietnam,VNM,69725,66759,83474,107606,124701,143186,160890 226 | Vanuatu,VUT,275,300,327,351,384,383,0 227 | West Bank and Gaza,PSE,1165,1133,1367,1799,1871,2072,2293 228 | World,WLD,19649200,15803000,18858700,22300100,22641000,23259800,23666400 229 | Samoa,WSM,183,177,192,219,223,243,0 230 | "Yemen, Rep.",YEM,0,0,0,0,0,0,0 231 | South Africa,ZAF,102154,82601,107407,126830,118126,113388,109341 232 | "Congo, Dem. Rep.",COD,7723,5000,8928,10818,9336,10166,10992 233 | Zambia,ZMB,5180,4484,7504,9034,10511,11601,11071 234 | Zimbabwe,ZWE,1831,2250,3245,3557,3884,3507,3625 235 | -------------------------------------------------------------------------------- /Ch05/1. Export2_Columns.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch05/1. Export2_Columns.xlsx -------------------------------------------------------------------------------- /Ch05/Embarak _Ch05_Data Gathering and Cleaning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch05/Embarak _Ch05_Data Gathering and Cleaning.pdf -------------------------------------------------------------------------------- /Ch05/Embarak _Ch05_Data Gathering and Cleaning.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Chapter 5: Data Gathering and Cleaning 5 | 6 | # In[46]: 7 | 8 | 9 | import numpy as np 10 | np.random.randn(5, 3) 11 | 12 | 13 | # In[47]: 14 | 15 | 16 | import pandas as pd 17 | import numpy as np 18 | 19 | dataset = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f', 20 | 'h'],columns=['stock1', 'stock2', 'stock3']) 21 | dataset.rename(columns={"one":'stock1',"two":'stock2', "three":'stock3'}, inplace=True) 22 | dataset = dataset.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']) 23 | 24 | print (dataset) 25 | 26 | 27 | # In[48]: 28 | 29 | 30 | print (dataset['stock1'].isnull()) 31 | 32 | 33 | # In[49]: 34 | 35 | 36 | print (dataset) 37 | dataset.fillna(0) 38 | 39 | 40 | # In[50]: 41 | 42 | 43 | # Fill missing values forward 44 | print (dataset) 45 | dataset.fillna(method='pad') 46 | 47 | 48 | # In[51]: 49 | 50 | 51 | print (dataset) 52 | dataset.dropna() 53 | 54 | 55 | # In[52]: 56 | 57 | 58 | print (dataset) 59 | dataset.replace(np.nan, 0 ) 60 | 61 | 62 | # # Read CSV files 63 | 64 | # In[53]: 65 | 66 | 67 | import pandas as pd 68 | sales = pd.read_csv("Sales.csv") 69 | print ("\n\n<<<<<<< First 5 records <<<<<<<\n\n" ) 70 | print (sales.head()) 71 | 72 | 73 | # In[54]: 74 | 75 | 76 | print ("\n\n<<<<<<< Last 5 records <<<<<<<\n\n" ) 77 | print (sales.tail()) 78 | 79 | 80 | # In[55]: 81 | 82 | 83 | #import pandas as pd 84 | salesNrows = pd.read_csv("Sales.csv", nrows=4) 85 | salesNrows 86 | 87 | 88 | # In[56]: 89 | 90 | 91 | salesNrows.rename(columns={"SALES_ID":'ID',"SALES_BY_REGION":'REGION'}, inplace=True) 92 | salesNrows 93 | 94 | 95 | # # Find unique values 96 | 97 | # In[57]: 98 | 99 | 100 | print (len(salesNrows['JANUARY'].unique())) 101 | print (len(salesNrows['REGION'].unique())) 102 | print (salesNrows['JANUARY'].unique()) 103 | 104 | 105 | # In[58]: 106 | 107 | 108 | #[0, 1, 2] or ['SALES_ID' , 'SALES_BY_REGION', 'JANUARY'] 109 | salesNrows = pd.read_csv("Sales.csv", nrows=4, usecols=[0, 1, 6]) 110 | salesNrows 111 | 112 | 113 | # In[60]: 114 | 115 | 116 | # Read specific fields of data [0, 1, 2] or 117 | #['SALES_ID' , 'SALES_BY_REGION', 'JANUARY'] 118 | salesNrows = pd.read_csv("Sales.csv", nrows=4, 119 | usecols=['SALES_ID' , 'SALES_BY_REGION', 'FEBRUARY', 'MARCH']) 120 | salesNrows 121 | 122 | 123 | # In[61]: 124 | 125 | 126 | sales = pd.read_csv("Sales.csv", nrows=7, 127 | na_values =["n.a.", "not avilable"]) 128 | mydata = sales.head(7) 129 | mydata 130 | 131 | 132 | # In[62]: 133 | 134 | 135 | sales = pd.read_csv("Sales.csv", nrows=7, 136 | na_values =["n.a.", "not avilable", -1]) 137 | mydata = sales.head(7) 138 | mydata 139 | 140 | 141 | # # Data Integration 142 | # ## Read Data 143 | 144 | # In[63]: 145 | 146 | 147 | import pandas as pd 148 | 149 | a = pd.read_csv("1. Export1_Columns.csv") 150 | b = pd.read_csv("1. Export2_Columns.csv") 151 | 152 | 153 | # In[64]: 154 | 155 | 156 | a.head() 157 | 158 | 159 | # In[65]: 160 | 161 | 162 | b.head() 163 | 164 | 165 | # In[66]: 166 | 167 | 168 | a.head() 169 | 170 | 171 | # In[67]: 172 | 173 | 174 | b.drop('2014', axis=1, inplace=True) 175 | columns = ['2013', '2012'] 176 | b.drop(columns, inplace=True, axis=1) 177 | b.drop(b.columns[[3]], axis=1, inplace=True) 178 | b.head() 179 | 180 | 181 | # In[68]: 182 | 183 | 184 | mergedDataSet = a.merge(b, on="Country Name") 185 | mergedDataSet.head() 186 | 187 | 188 | # In[69]: 189 | 190 | 191 | dataX = a.merge(b) 192 | dataX.head() 193 | 194 | 195 | # # Merge two data sets using Index 196 | # ### Rows Union 197 | 198 | # In[70]: 199 | 200 | 201 | Data1 = a.head() 202 | Data1=Data1.reset_index() 203 | Data1 204 | 205 | 206 | # In[71]: 207 | 208 | 209 | Data2 = a.tail() 210 | Data2=Data2.reset_index() 211 | Data2 212 | 213 | 214 | # In[72]: 215 | 216 | 217 | # stack the DataFrames on top of each othe 218 | VerticalStack = pd.concat((Data1, Data2), axis=0) 219 | VerticalStack 220 | 221 | 222 | # # Read Jason data 223 | 224 | # In[73]: 225 | 226 | 227 | import json 228 | data = '''{ 229 | "name" : "Ossama", 230 | "phone" : { 231 | "type" : "intl", 232 | "number" : "+971 50 244 5467" 233 | }, 234 | "email" : { 235 | "hide" : "No" 236 | } 237 | }''' 238 | info = json.loads(data) 239 | print ('Name:',info["name"]) 240 | print ('Hide:',info["email"]["hide"]) 241 | 242 | 243 | # In[74]: 244 | 245 | 246 | input = '''[ 247 | { "id" : "001", 248 | "x" : "5", 249 | "name" : "Ossama" 250 | } , 251 | { "id" : "009", 252 | "x" : "10", 253 | "name" : "Omar" 254 | } 255 | ]''' 256 | info = json.loads(input) 257 | print ('User count:', len(info)) 258 | for item in info: 259 | print ('\nName', item['name']) 260 | print ('Id', item['id']) 261 | print ('Attribute', item['x']) 262 | 263 | 264 | # ## Read Jason from the cloud 265 | 266 | # In[91]: 267 | 268 | 269 | import urllib.request 270 | import json 271 | 272 | 273 | with urllib.request.urlopen("http://python-data.dr-chuck.net/comments_244984.json") as url: 274 | uh = url.read() 275 | 276 | print ('Retrieving', url) 277 | 278 | data = uh 279 | print ('Retrieved',len(data),'characters') 280 | 281 | try: 282 | js = json.loads(str(data)) 283 | except: 284 | js = None 285 | 286 | print (json.dumps(js, indent=4)) 287 | 288 | 289 | # In[99]: 290 | 291 | 292 | from urllib.request import urlopen 293 | import json 294 | req = urlopen("http://python-data.dr-chuck.net/comments_244984.json") 295 | json = json.loads(req.read()) 296 | print (json) 297 | print (json['comments']) 298 | 299 | 300 | # In[100]: 301 | 302 | 303 | sum=0 304 | counter=0 305 | for i in range(len(json["comments"])): 306 | counter+=1 307 | Name = json["comments"][i]["name"] 308 | Count = json["comments"][i]["count"] 309 | sum+=int(Count) 310 | print (Name," ", Count) 311 | 312 | print ("\nCount: ", counter) 313 | print ("Sum: ", sum) 314 | 315 | 316 | # In[101]: 317 | 318 | 319 | import json 320 | with open('comments.json') as json_data: 321 | jasondta = json.load(json_data) 322 | print(jasondta) 323 | 324 | 325 | # In[102]: 326 | 327 | 328 | sum=0 329 | counter=0 330 | for i in range(len(jasondta["comments"])): 331 | counter+=1 332 | Name = jasondta["comments"][i]["name"] 333 | Count = jasondta["comments"][i]["count"] 334 | sum+=int(Count) 335 | print (Name," ", Count) 336 | 337 | print ("\nCount: ", counter) 338 | print ("Sum: ", sum) 339 | 340 | 341 | # # Read and process HTML tags 342 | 343 | # In[103]: 344 | 345 | 346 | import urllib.request 347 | with urllib.request.urlopen("http://python-data.dr-chuck.net/known_by_Rona.html") as url: 348 | strhtml = url.read() 349 | #I'm guessing this would output the html source code? 350 | print(strhtml[:700]) 351 | 352 | 353 | # In[104]: 354 | 355 | 356 | import urllib 357 | from bs4 import BeautifulSoup 358 | 359 | response = urllib.request.urlopen('http://python-data.dr-chuck.net/known_by_Rona.html') 360 | html_doc = response.read() 361 | 362 | soup = BeautifulSoup(html_doc, 'html.parser') 363 | 364 | print(html_doc[:700]) 365 | print("\n") 366 | print (soup.title) 367 | print(soup.title.string) 368 | print(soup.a.string) 369 | 370 | 371 | # In[106]: 372 | 373 | 374 | for x in soup.find_all('b'): 375 | print(x.string) 376 | 377 | 378 | # In[107]: 379 | 380 | 381 | import urllib 382 | from bs4 import BeautifulSoup 383 | 384 | response = urllib.request.urlopen('http://python-data.dr-chuck.net/known_by_Rona.html') 385 | html_doc = response.read() 386 | print (html_doc[:300]) 387 | soup = BeautifulSoup(html_doc, 'html.parser') 388 | 389 | print ("\n") 390 | counter=0 391 | for link in soup.findAll("a"): 392 | print(link.get("href")) 393 | if counter<10: 394 | counter+=1 395 | continue 396 | else: break 397 | 398 | 399 | # In[108]: 400 | 401 | 402 | htmldata=""" 403 |
404 |410 | 411 | The Dormouse's story 412 | 413 |
414 |415 | Once upon a time there were three little sisters; and their names were 416 | 417 | Elsie 418 | 419 | , 420 | 421 | Lacie 422 | 423 | and 424 | 425 | Tillie 426 | 427 | ; and they lived at the bottom of a well. 428 |
429 |430 | ... 431 |
432 | 433 | 434 | """ 435 | 436 | from bs4 import BeautifulSoup 437 | soup = BeautifulSoup(htmldata, 'html.parser') 438 | print(soup.prettify()) 439 | 440 | 441 | # In[109]: 442 | 443 | 444 | soup.title 445 | 446 | 447 | # In[110]: 448 | 449 | 450 | soup.title.name 451 | 452 | 453 | # In[111]: 454 | 455 | 456 | soup.title.string 457 | 458 | 459 | # In[112]: 460 | 461 | 462 | soup.title.parent.name 463 | 464 | 465 | # In[113]: 466 | 467 | 468 | soup.p 469 | 470 | 471 | # In[114]: 472 | 473 | 474 | soup.p['class'] 475 | 476 | 477 | # In[115]: 478 | 479 | 480 | soup.a 481 | 482 | 483 | # In[116]: 484 | 485 | 486 | soup.find_all('a') 487 | 488 | 489 | # In[117]: 490 | 491 | 492 | soup.find(id="link2") 493 | 494 | 495 | # In[118]: 496 | 497 | 498 | for link in soup.find_all('a'): 499 | print(link.get('href')) 500 | 501 | 502 | # In[119]: 503 | 504 | 505 | print(soup.get_text()) 506 | 507 | 508 | # In[120]: 509 | 510 | 511 | htmldata=""" 512 | 513 |519 | 520 | Author Name: Ossama Embarak 521 | 522 |
523 |524 | Python techniques for gathering and cleaning data 525 | 530 | Data Cleaning 531 | 532 | , Data Processing and Visulization 533 | 538 | Data Visualization 539 | 540 | 541 |
542 |543 | @July 2018 544 |
545 | 546 | 547 | """ 548 | 549 | from bs4 import BeautifulSoup 550 | soup = BeautifulSoup(htmldata, 'html.parser') 551 | print(soup.prettify()) 552 | 553 | 554 | # In[121]: 555 | 556 | 557 | print(soup.get_text()) 558 | 559 | 560 | # In[128]: 561 | 562 | 563 | xmldata = """ 564 | 565 | 566 |