├── .gitattributes ├── 9781484241080.jpg ├── Ch01 ├── Embarak _Ch01_Introduction- String Processing.pdf ├── Embarak _Ch01_Introduction- String Processing.py ├── Embarak _Ch01_Introduction_ Part 1.pdf ├── Embarak _Ch01_Introduction_ Part 1.py ├── Embarak _Ch01_Introduction_ Part 2.pdf ├── Embarak _Ch01_Introduction_ Part 2.py ├── Embarak _Ch01_Introduction_Functions and Modules.pdf └── Embarak _Ch01_Introduction_Functions and Modules.py ├── Ch02 ├── Embarak _Ch02_The importance of data visualization in business.pdf └── Embarak _Ch02_The importance of data visualization in business.py ├── Ch03 ├── Embarak _Ch03_Data Collections Structure .pdf └── Embarak _Ch03_Data Collections Structure .py ├── Ch04 ├── Embarak _Ch04_File IO Processing _ Regular Expressions .pdf ├── Embarak _Ch04_File IO Processing _ Regular Expressions .py ├── MailsData.txt └── Wild-Card.txt ├── Ch05 ├── 1. Export1_Columns.csv ├── 1. Export1_Columns.xlsx ├── 1. Export2_Columns.csv ├── 1. Export2_Columns.xlsx ├── Embarak _Ch05_Data Gathering and Cleaning.pdf ├── Embarak _Ch05_Data Gathering and Cleaning.py ├── Import_1.xlsx ├── Import_2.xlsx ├── Sales.csv └── Sales.xlsx ├── Ch06 ├── Embarak _Ch06_Data Exploring and Analysis.pdf └── Embarak _Ch06_Data Exploring and Analysis.py ├── Ch07 ├── Embarak _Ch07_Data Visualization.pdf ├── Embarak _Ch07_Data Visualization.py └── Salaries.csv ├── Ch08 ├── Death data.csv ├── Embarak Ch08 NCHS Case Study.pdf ├── Embarak Ch08 NCHS Case Study.py └── NCHS.csv ├── Contributing.md ├── LICENSE.txt ├── README.md └── errata.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /9781484241080.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/9781484241080.jpg -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction- String Processing.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch01/Embarak _Ch01_Introduction- String Processing.pdf -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction- String Processing.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[3]: 5 | 6 | 7 | var1 = 'Welcome to Dubai' 8 | var2 = "Python Programming" 9 | 10 | print ("var1[0]:", var1[0]) 11 | print ("var2[1:5]:", var2[1:5]) 12 | 13 | 14 | # In[5]: 15 | 16 | 17 | st1="Hello" 18 | st2=' World' 19 | fullst=st1 + st2 20 | print (fullst) 21 | 22 | 23 | # In[11]: 24 | 25 | 26 | # looking inside strings 27 | fruit = 'banana' 28 | letter= fruit[1] 29 | print (letter) 30 | 31 | index=3 32 | w = fruit[index-1] 33 | 34 | print (w) 35 | print (len(fruit)) 36 | 37 | 38 | # In[14]: 39 | 40 | 41 | # Convert string to int 42 | str3 = '123' 43 | str3= int (str3)+1 44 | print (str3) 45 | 46 | 47 | # In[15]: 48 | 49 | 50 | # Read and convert data 51 | name=input('Enter your name: ') 52 | age=input('Enter your age: ') 53 | age= int(age) + 1 54 | 55 | print ("Name:%s"% name ,"\t Age:%d"% age) 56 | 57 | 58 | # In[30]: 59 | 60 | 61 | # Looking through string 62 | fruit ='banana' 63 | index=0 64 | while index< len(fruit): 65 | letter = fruit [index] 66 | print (index, letter ) 67 | index=index+1 68 | 69 | 70 | # In[31]: 71 | 72 | 73 | print ("\n Implementing iteration with continue") 74 | while True: 75 | line = input('Enter your data>') 76 | if line[0]=='#': 77 | continue 78 | if line =='done': 79 | break 80 | print (line ) 81 | print ('End!') 82 | 83 | 84 | # In[32]: 85 | 86 | 87 | print ("\nPrinting in reverse order") 88 | index=len(fruit)-1 89 | while index>=0 : 90 | letter = fruit [index] 91 | print (index, letter ) 92 | index=index-1 93 | 94 | 95 | # In[33]: 96 | 97 | 98 | Country='Egypt' 99 | for letter in Country: 100 | print (letter) 101 | 102 | 103 | # In[2]: 104 | 105 | 106 | # Looking and counting 107 | word='banana' 108 | count=0 109 | for letter in word: 110 | if letter =='a': 111 | count +=1 112 | print ("Number of a in ", word, "is :", count ) 113 | 114 | 115 | # In[3]: 116 | 117 | 118 | # Slicing Strings 119 | s="Welcome to Higher Colleges of Technology" 120 | print (s[0:4]) 121 | print (s[6:7]) 122 | print (s[6:20]) 123 | print (s[:12]) 124 | print (s[2:]) 125 | print (s [:]) 126 | print (s) 127 | 128 | 129 | # In[43]: 130 | 131 | 132 | var1 =' Higher Colleges of Technology ' 133 | var2='College' 134 | var3='g' 135 | 136 | print ( var2 in var1) 137 | print ( var2 not in var1) 138 | 139 | 140 | # In[29]: 141 | 142 | 143 | var1 =' Higher Colleges of Technology ' 144 | var2='College' 145 | var3='g' 146 | 147 | print (var1.upper()) 148 | print (var1.lower()) 149 | print ('WELCOME TO'.lower()) 150 | print (len(var1)) 151 | print (var1.count(var3, 2, 29) ) # find how many g letters in var1 152 | print ( var2.count(var3) ) 153 | 154 | 155 | # In[33]: 156 | 157 | 158 | print (var1.endswith('r')) 159 | print (var1.startswith('O')) 160 | print (var1.find('h', 0, 29)) 161 | 162 | print (var1.lstrip()) # It removes all leading whitespace of a string in var1 163 | print (var1.rstrip()) # It removes all trailing whitespace of a string in var1 164 | print (var1.strip()) 165 | print ('\n') 166 | print (var1.replace('Colleges', 'University')) 167 | 168 | 169 | # In[39]: 170 | 171 | 172 | # Parsing and Extracting strings 173 | Maindata = 'From ossama.embarak@hct.ac.ae Sunday Jan 4 09:30:50 2017' 174 | atpost = Maindata.find('@') 175 | print ("\n<<<<<<<<<<<<<<>>>>>>>>>>>>>") 176 | print (atpost) 177 | print (Maindata[ :atpost]) 178 | data = Maindata[ :atpost] 179 | name=data.split(' ') 180 | print (name) 181 | print (name[1].replace('.', ' ').upper()) 182 | print ("\n<<<<<<<<<<<<<<>>>>>>>>>>>>>") 183 | 184 | 185 | # In[41]: 186 | 187 | 188 | # Another way to split strings 189 | Maindata = 'From ossama.embarak@hct.ac.ae Sunday Jan 4 09:30:50 2017' 190 | name= Maindata[ :atpost].replace('From','').upper() 191 | print (name.replace('.',' ').upper().lstrip()) 192 | print ("\n<<<<<<<<<<<<<<>>>>>>>>>>>>>") 193 | sppos=Maindata.find(' ', atpost) 194 | print (sppos) 195 | print (Maindata[ :sppos]) 196 | host = Maindata [atpost + 1 : sppos ] 197 | print (host) 198 | print ("\n<<<<<<<<<<<<<<>>>>>>>>>>>>>") 199 | 200 | 201 | # # EXERCISES AND ANSWERS 202 | 203 | # In[47]: 204 | 205 | 206 | var1 ='HCT' 207 | index=0 208 | while index< len(var1): 209 | letter = var1[index] 210 | print (letter) 211 | index+=1 212 | 213 | 214 | # In[48]: 215 | 216 | 217 | var1 ='HCT' 218 | index=0 219 | while len(var1)> index: 220 | letter = var1[index] 221 | print (letter) 222 | index+=1 223 | 224 | 225 | # In[54]: 226 | 227 | 228 | strvar1 = 'X-DSPAM-Confidence: 0.8475' 229 | post = strvar1.find(':') 230 | numer=float(strvar1[post+1:]) 231 | print (numer ) 232 | 233 | -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction_ Part 1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch01/Embarak _Ch01_Introduction_ Part 1.pdf -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction_ Part 1.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Chapter 1 Getting Started with Python 5 | 6 | # In[47]: 7 | 8 | 9 | get_ipython().magic(u'pinfo help') 10 | 11 | 12 | # In[4]: 13 | 14 | 15 | age,mark,code=10,75,"CIS2403" 16 | print (age) 17 | print (mark) 18 | print (code) 19 | 20 | 21 | # In[5]: 22 | 23 | 24 | TV=15 25 | Mobile=20 26 | Tablet = 30 27 | 28 | total = TV + 29 | Mobile + 30 | Tablet 31 | print (total) 32 | 33 | 34 | # In[6]: 35 | 36 | 37 | TV=15 38 | Mobile=20 39 | Tablet = 30 40 | 41 | total = TV + Mobile + Tablet 42 | print (total) 43 | 44 | 45 | # In[7]: 46 | 47 | 48 | days = ['Monday', 'Tuesday', 'Wednesday', 49 | 'Thursday', 'Friday'] 50 | print (days) 51 | 52 | 53 | # In[8]: 54 | 55 | 56 | sms1 = 'Hellow World' 57 | sms2 = "Hellow World" 58 | sms3 = """ Hellow World""" 59 | sms4 = """ Hellow 60 | World""" 61 | 62 | print (sms1) 63 | print (sms2) 64 | print (sms3) 65 | print (sms4) 66 | 67 | 68 | # In[9]: 69 | 70 | 71 | TV=15; name="Nour"; print (name); print ("Welcome to\nDubai Festivale 2018") 72 | 73 | 74 | # In[10]: 75 | 76 | 77 | name = input("Enter your name ") 78 | age = int (input("Enter your age")) 79 | 80 | print ("\nName=", name); print ("\nAge=", age) 81 | 82 | 83 | # ### 1.2 Declaring Variable and Assigning Values 84 | 85 | # In[11]: 86 | 87 | 88 | age = 11 89 | name ="Nour" 90 | tall=100.50 91 | 92 | 93 | # In[12]: 94 | 95 | 96 | print (age) 97 | print (name) 98 | print (tall) 99 | 100 | 101 | # In[13]: 102 | 103 | 104 | age= mark = code =25 105 | print (age) 106 | print (mark) 107 | print (code) 108 | 109 | 110 | # In[14]: 111 | 112 | 113 | age,mark,code=10,75,"CIS2403" 114 | print (age) 115 | print (mark) 116 | print (code) 117 | 118 | 119 | # In[16]: 120 | 121 | 122 | # Expressions 123 | x=0.6 124 | x=3.9 * x * (1-x) 125 | print (round(x, 2)) 126 | 127 | 128 | # In[18]: 129 | 130 | 131 | # Python single line comment 132 | 133 | 134 | # In[19]: 135 | 136 | 137 | ''' This 138 | Is 139 | Multipline comment''' 140 | 141 | 142 | # In[20]: 143 | 144 | 145 | print ("pi=%s"%"3.14159") 146 | 147 | 148 | # In[1]: 149 | 150 | 151 | print("The value of %s is = %02f" % ("pi", 3.14159)) 152 | 153 | 154 | # In[21]: 155 | 156 | 157 | print ("Your name is %s, and your height is %.2f while your weight is %.2d" % 158 | ('Ossama', 172.156783, 75.56647)) 159 | 160 | 161 | # In[23]: 162 | 163 | 164 | print ("Hi %(Name)s, your height is %(height).2f" %{'Name':"Ossama", 165 | 'height': 172.156783}) 166 | 167 | 168 | # In[24]: 169 | 170 | 171 | x = "price is" 172 | print ("{1} {0} {2}".format(x, "The", 1920.345)) 173 | 174 | 175 | # In[34]: 176 | 177 | 178 | class A():x=9 179 | w=A() 180 | print ("{0} {1[2]} {2[test]} {3.x}".format("This", ["a", "or", "is"], 181 | {"test": "another"},w)) 182 | print ("{1[1]} {0} {1[2]} {2[test]} {3.x}".format("This", 183 | ["a", "or", "is"], {"test": "another"},w)) 184 | 185 | 186 | # In[42]: 187 | 188 | 189 | import time 190 | localtime = time.asctime(time.localtime(time.time())) 191 | print ("Formatted time :", localtime) 192 | print (time.localtime()) 193 | print (time.time()) 194 | 195 | 196 | # In[45]: 197 | 198 | 199 | import calendar 200 | calendar.prcal(2018) 201 | 202 | 203 | # In[46]: 204 | 205 | 206 | ########### End 207 | 208 | 209 | # In[48]: 210 | 211 | 212 | print (13//5) 213 | 214 | 215 | # In[50]: 216 | 217 | 218 | print (13<5) 219 | print (13>5) 220 | print (13<=5) 221 | print (2>=5) 222 | print (13==5) 223 | print (13!=5) 224 | 225 | 226 | # In[56]: 227 | 228 | 229 | x=10 230 | print (x) 231 | x=10; x/=2 232 | print (x) 233 | x=10; x+=7 234 | print (x) 235 | x=10; x-=5 236 | print (x) 237 | x=10; x*=5 238 | print (x) 239 | x=13; x%=5 240 | print (x) 241 | x=10; x**=3 242 | print(x) 243 | x=10; x//=2 244 | print(x) 245 | 246 | 247 | # In[57]: 248 | 249 | 250 | x=10>5 and 4>20 251 | print (x) 252 | 253 | x=10>5 or 4>20 254 | print (x) 255 | 256 | x=not(10<4) 257 | print (x) 258 | 259 | 260 | # In[45]: 261 | 262 | 263 | print (13/5) 264 | 265 | 266 | # In[46]: 267 | 268 | 269 | print (13%5) 270 | 271 | 272 | # In[47]: 273 | 274 | 275 | print (2**3) 276 | 277 | 278 | # In[7]: 279 | 280 | 281 | 282 | 283 | 284 | # In[10]: 285 | 286 | 287 | #single line comment 288 | 289 | '''This is 290 | multiline comment''' 291 | 292 | 293 | # In[5]: 294 | 295 | 296 | # Expressions 297 | x=0.6 298 | x=3.9 *x *(1-x) 299 | print (round( x,2) ) 300 | 301 | 302 | # In[10]: 303 | 304 | 305 | largest = None 306 | print ('Before:', largest) 307 | for val in [30, 45, 12, 90, 74, 15]: 308 | if largest is None or val > largest : 309 | largest = val 310 | print ('Loop:', val, largest) 311 | print ('Largest:', largest) 312 | 313 | 314 | # 315 | # # Pandas and other libraries 316 | 317 | # In[34]: 318 | 319 | 320 | #Create series from array using pandas and numpy 321 | import pandas as pd 322 | import numpy as np 323 | data = np.array([90,75,50,66]) 324 | s = pd.Series(data,index=['A','B','C','D']) 325 | print (s) 326 | 327 | 328 | # In[36]: 329 | 330 | 331 | print (s[1]) 332 | 333 | 334 | # In[37]: 335 | 336 | 337 | #Create series from dictionary using pandas and numpy 338 | import pandas as pd 339 | import numpy as np 340 | data = {'Ahmed' : 92, 'Ali' : 55, 'Omar' : 83} 341 | s = pd.Series(data,index=['Ali','Ahmed','Omar']) 342 | print (s) 343 | 344 | 345 | # In[38]: 346 | 347 | 348 | print (s[1:]) 349 | 350 | 351 | # # DataFrame 352 | 353 | # In[39]: 354 | 355 | 356 | import pandas as pd 357 | data = [['Ahmed',35],['Ali',17],['Omar',25]] 358 | DataFrame1 = pd.DataFrame(data,columns=['Name','Age']) 359 | print (DataFrame1) 360 | 361 | 362 | # In[40]: 363 | 364 | 365 | DataFrame1[1:] 366 | 367 | 368 | # In[41]: 369 | 370 | 371 | import pandas as pd 372 | data = {'Name':['Ahmed', 'Ali', 'Omar', 'Salwa'],'Age':[35,17,25,30]} 373 | dataframe2 = pd.DataFrame(data, index=[100, 101, 102, 103]) 374 | print (dataframe2) 375 | 376 | 377 | # In[42]: 378 | 379 | 380 | dataframe2[:2] 381 | 382 | 383 | # In[43]: 384 | 385 | 386 | dataframe2['Name'] 387 | 388 | 389 | # # Panel 390 | 391 | # In[44]: 392 | 393 | 394 | # creating a panel 395 | import pandas as pd 396 | import numpy as np 397 | data = {'Temprature Day1' : pd.DataFrame(np.random.randn(4, 3)), 398 | 'Temprature Day2' : pd.DataFrame(np.random.randn(4, 2))} 399 | p = pd.Panel(data) 400 | print (p['Temprature Day1']) 401 | 402 | 403 | # # 1.6.3 PYTHON LAMBDAS, AND THE NUMPY LIBRARY. 404 | 405 | # In[46]: 406 | 407 | 408 | result = lambda x, y : x * y 409 | result(2,5) 410 | 411 | 412 | # In[47]: 413 | 414 | 415 | result(4,10) 416 | 417 | 418 | # In[65]: 419 | 420 | 421 | def fahrenheit(T): 422 | return ((float(9)/5)*T + 32) 423 | def celsius(T): 424 | return (float(5)/9)*(T-32) 425 | Temp = (15.8, 25, 30.5,25) 426 | 427 | F = list ( map(fahrenheit, Temp)) 428 | C = list ( map(celsius, F)) 429 | print (F) 430 | print (C) 431 | 432 | 433 | # In[72]: 434 | 435 | 436 | Celsius = [39.2, 36.5, 37.3, 37.8] 437 | Fahrenheit = map(lambda x: (float(9)/5)*x + 32, Celsius) 438 | for x in Fahrenheit: 439 | print(x) 440 | 441 | 442 | # In[79]: 443 | 444 | 445 | fib = [0,1,1,2,3,5,8,13,21,34,55] 446 | result = filter(lambda x: x % 2==0, fib) 447 | for x in result: 448 | print(x) 449 | 450 | 451 | # In[81]: 452 | 453 | 454 | f = lambda a,b: a if (a > b) else b 455 | reduce(f, [47,11,42,102,13]) 456 | 457 | 458 | # In[82]: 459 | 460 | 461 | reduce(lambda x,y: x+y, [47,11,42,13]) 462 | 463 | 464 | # In[83]: 465 | 466 | 467 | a=np.array([[1,2,3],[4,5,6]]) 468 | b=np.array([[7,8,9],[10,11,12]]) 469 | np.add(a,b) 470 | 471 | 472 | # In[84]: 473 | 474 | 475 | np.subtract(a,b) #Same as a-b 476 | 477 | 478 | # # Series 479 | 480 | # In[6]: 481 | 482 | 483 | import pandas as pd 484 | animals = ["Lion", "Tiger", "Bear"] 485 | pd.Series(animals) 486 | 487 | 488 | # In[5]: 489 | 490 | 491 | marks = [95, 84, 55, 75] 492 | pd.Series(marks) 493 | 494 | 495 | # In[11]: 496 | 497 | 498 | # Create series from dictionary where indices are the dictionary keys 499 | quiz1 = {"Ahmed":75, "Omar": 84, "Salwa": 70} 500 | q = pd.Series(quiz1) 501 | q 502 | 503 | 504 | # In[13]: 505 | 506 | 507 | # query series 508 | q.loc['Ahmed'] 509 | 510 | 511 | # In[20]: 512 | 513 | 514 | q['Ahmed'] 515 | 516 | 517 | # In[19]: 518 | 519 | 520 | q.iloc[2] 521 | 522 | 523 | # In[21]: 524 | 525 | 526 | q[2] 527 | 528 | 529 | # In[25]: 530 | 531 | 532 | # implement numpy operation on a series 533 | s = pd.Series([70,90,65,25, 99]) 534 | s 535 | 536 | 537 | # In[27]: 538 | 539 | 540 | total =0 541 | for val in s: 542 | total += val 543 | print (total) 544 | 545 | 546 | # In[28]: 547 | 548 | 549 | import numpy as np 550 | total = np.sum(s) 551 | print (total) 552 | 553 | 554 | # In[29]: 555 | 556 | 557 | # add new values to series 558 | s = pd.Series ([99,55,66,88]) 559 | s.loc['Ahmed'] = 85 560 | s 561 | 562 | 563 | # In[32]: 564 | 565 | 566 | # Append Series 567 | test = [95, 84, 55, 75] 568 | marks = pd.Series(test) 569 | s = pd.Series ([99,55,66,88]) 570 | s.loc['Ahmed'] = 85 571 | s 572 | NewSeries = s.append(marks) 573 | NewSeries 574 | 575 | 576 | # # 1.6.6 RUN BASIC INFERENTIAL STATISTICAL ANALYSES. 577 | 578 | # In[37]: 579 | 580 | 581 | import numpy as np 582 | x = np.random.binomial(20, .5, 10000) 583 | print((x>=15).mean()) 584 | 585 | 586 | # In[ ]: 587 | 588 | 589 | sb.regplot(x = "Total Bill", y = "Bill's Tips", data = df) 590 | 591 | 592 | # # Regression 593 | 594 | # In[65]: 595 | 596 | 597 | import seaborn as sb 598 | from matplotlib import pyplot as plt 599 | df = sb.load_dataset('tips') 600 | sb.regplot(x = "total_bill", y = "tip", data = df) 601 | plt.xlabel('Total Bill') 602 | plt.ylabel('Bill Tips') 603 | 604 | plt.show() 605 | 606 | 607 | # In[39]: 608 | 609 | 610 | df 611 | 612 | 613 | # # Python - Chi-Square Test 614 | 615 | # In[41]: 616 | 617 | 618 | from scipy import stats 619 | import numpy as np 620 | import matplotlib.pyplot as plt 621 | 622 | x = np.linspace(0, 10, 100) 623 | fig,ax = plt.subplots(1,1) 624 | 625 | linestyles = [':', '--', '-.', '-'] 626 | deg_of_freedom = [1, 4, 7, 6] 627 | for df, ls in zip(deg_of_freedom, linestyles): 628 | ax.plot(x, stats.chi2.pdf(x, df), linestyle=ls) 629 | 630 | plt.xlim(0, 10) 631 | plt.ylim(0, 0.4) 632 | 633 | plt.xlabel('Value') 634 | plt.ylabel('Frequency') 635 | plt.title('Chi-Square Distribution') 636 | 637 | plt.legend() 638 | plt.show() 639 | 640 | 641 | # # correlation 642 | 643 | # In[42]: 644 | 645 | 646 | import matplotlib.pyplot as plt 647 | import seaborn as sns 648 | df = sns.load_dataset('iris') 649 | 650 | 651 | #without regression 652 | sns.pairplot(df, kind="scatter") 653 | plt.show() 654 | 655 | 656 | # In[46]: 657 | 658 | 659 | from scipy.stats import binom 660 | import seaborn as sb 661 | 662 | data_binom = binom.rvs(n=20,p=0.8,loc=0,size=1000) 663 | ax = sb.distplot(data_binom, 664 | kde=True, 665 | color='blue', 666 | hist_kws={"linewidth": 25,'alpha':1}) 667 | ax.set(xlabel='Binomial', ylabel='Frequency') 668 | 669 | 670 | # In[58]: 671 | 672 | 673 | import pandas as pd 674 | 675 | d = {'Name':pd.Series(['Ahmed','Omar','Ali','Salwa','Majid','Othman','Gameel', 676 | 'Ziad','Ahlam','Zahrah','Ayman','Alaa']), 677 | 'Age':pd.Series([34,26,25,27,30,54,23,43,40,30,28,46]), 678 | 'Height':pd.Series([114.23,173.24,153.98,172.0,153.20,164.6,183.8,163.78,172.0,164.80,174.10,183.65])} 679 | 680 | #Create a DataFrame 681 | df = pd.DataFrame(d) 682 | 683 | # Calculate the standard deviation 684 | print (df.std()) 685 | 686 | 687 | # In[59]: 688 | 689 | 690 | print (df.describe()) 691 | 692 | 693 | # In[60]: 694 | 695 | 696 | print ("Mean Values in the Distribution") 697 | print (df.mean()) 698 | print ("*******************************") 699 | print ("Median Values in the Distribution") 700 | print (df.median()) 701 | print ("*******************************") 702 | print ("Mode Values in the Distribution") 703 | print (df['Height'].mode()) 704 | 705 | 706 | # ### 1.5 EXERCISES 707 | 708 | # In[2]: 709 | 710 | 711 | # Store input numbers: 712 | num1 = input('Enter first number: ') 713 | num2 = input('Enter second number: ') 714 | 715 | sumval = float(num1) + float(num2) # Add two numbers 716 | minval = float(num1) - float(num2) # Subtract two numbers 717 | mulval = float(num1) * float(num2) # Multiply two numbers 718 | divval = float(num1) / float(num2) #Divide two numbers 719 | 720 | # Display the sum 721 | print('The sum of {0} and {1} is {2}'.format(num1, num2, sumval)) 722 | # Display the subtraction 723 | print('The subtraction of {0} and {1} is {2}'.format(num1, num2, minval)) 724 | # Display the multiplication 725 | print('The multiplication of {0} and {1} is {2}'.format(num1, num2, mulval)) 726 | # Display the division 727 | print('The division of {0} and {1} is {2}'.format(num1, num2, divval)) 728 | 729 | 730 | # In[3]: 731 | 732 | 733 | # A. write a python script to prompt the user to enter the triangle first side (a), 734 | #second side (b) and third side (c) lengths. Then calculate the semi-perimeter (s). 735 | #calculate the triangle area and display the result to the user. 736 | #Area of a triangle = (s*(s-a)*(s-b)*(s-c))-1/2. 737 | a = float(input('Enter first side: ')) 738 | b = float(input('Enter second side: ')) 739 | c = float(input('Enter third side: ')) 740 | s = (a + b + c) / 2 # calculate the semi-perimeter 741 | 742 | # calculate the area 743 | area = (s*(s-a)*(s-b)*(s-c)) ** 0.5 744 | print('The area of the triangle is %0.2f' %area) 745 | 746 | 747 | # In[7]: 748 | 749 | 750 | import random 751 | a = int(input('Enter the starting value : ')) 752 | b = int(input('Enter the end value : ')) 753 | print(random.randint(a,b)) 754 | random.sample(range(a, b), 3) 755 | 756 | 757 | # In[9]: 758 | 759 | 760 | # convert kilometers to miles 761 | kilometers = float(input('Enter the distance in kilometers: ')) 762 | # conversion factor 763 | Miles = kilometers * 0.62137 764 | print('%0.2f kilometers is equal to %0.2f miles' %(kilometers,Miles)) 765 | 766 | 767 | # In[11]: 768 | 769 | 770 | # convert convert Celsius to Fahrenheit 771 | Celsius = float(input('Enter temperature in Celsius: ')) 772 | # conversion factor 773 | Fahrenheit = (Celsius * 1.8) + 32 774 | print('%0.2f Celsius is equal to %0.2f Fahrenheit' %(Celsius,Fahrenheit)) 775 | 776 | 777 | # ## End Chapter 1 778 | -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction_ Part 2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch01/Embarak _Ch01_Introduction_ Part 2.pdf -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction_ Part 2.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Chapter 1 Control Statements 5 | 6 | # ## Control Statements 7 | 8 | # ## 1) If Statements 9 | 10 | # In[13]: 11 | 12 | 13 | # Comparison operators 14 | x=5 15 | if x==5: 16 | print ('Equal 5') 17 | elif x>5: 18 | print ('Greater than 5') 19 | elif x<5: 20 | print ('Less than 5') 21 | 22 | 23 | # In[12]: 24 | 25 | 26 | # Identation 27 | x=5 28 | if x<2: 29 | print ("Bigger than 2") 30 | print (" X Value bigger than 2") 31 | 32 | print ("Now we are out of if block\n") 33 | 34 | 35 | # In[14]: 36 | 37 | 38 | year=2000 39 | if year%4==0: 40 | print("Year(", year ,")is Leap") 41 | else: 42 | print (year , "Year is not Leap" ) 43 | 44 | 45 | 46 | # In[2]: 47 | 48 | 49 | a=10 50 | if a>=20: 51 | print ("Condition is True" ) 52 | else: 53 | if a>=15: 54 | print ("Checking second value" ) 55 | else: 56 | print ("All Conditions are false" ) 57 | 58 | 59 | # In[23]: 60 | 61 | 62 | # use the range statement 63 | for a in range (1,4): 64 | print ( a ) 65 | 66 | 67 | # In[24]: 68 | 69 | 70 | # use the range statement 71 | for a in range (4): 72 | print ( a ) 73 | 74 | 75 | # In[32]: 76 | 77 | 78 | ticket=4 79 | while ticket>0: 80 | print ("Your ticket number is ",ticket) 81 | ticket -=1 82 | 83 | 84 | # ### use break, continue and pass statements 85 | 86 | # In[44]: 87 | 88 | 89 | for letter in 'Python3': 90 | if letter == 'o': 91 | break 92 | print (letter) 93 | 94 | 95 | # In[45]: 96 | 97 | 98 | a=0 99 | while a<=5: 100 | a=a+1 101 | if a%2==0: 102 | continue 103 | print (a) 104 | print ("End of Loop" ) 105 | 106 | 107 | # In[46]: 108 | 109 | 110 | for i in [1,2,3,4,5]: 111 | if i==3: 112 | pass 113 | print ("Pass when value is",i ) 114 | print (i), 115 | 116 | 117 | # ## Excercise , using try and except 118 | # Write a program to prompt the user for hours and 119 | # rate per hour to compute gross pay, the program 120 | # should gives employee 1.5 time the hourse worked 121 | # above 30 hours 122 | # Enter Hours: 50 123 | # Enter Rate: 10 124 | # Pay: 550.0 125 | # 126 | 127 | # In[6]: 128 | 129 | 130 | Hflage=True 131 | Rflage=True 132 | while Hflage & Rflage : 133 | hours = input ('Enter Hours:') 134 | try: 135 | hours = int(hours) 136 | Hflage=False 137 | except: 138 | print ("Incorrect hours number !!!!") 139 | try: 140 | rate = input ('Enter Rate:') 141 | rate=float(rate) 142 | Rflage=False 143 | except: 144 | print ("Incorrect rate !!") 145 | 146 | if hours>40: 147 | pay= 40 * rate + (rate*1.5) * (hours-40) 148 | else: 149 | pay= hours * rate 150 | 151 | print ('Pay:',pay) 152 | 153 | 154 | # In[14]: 155 | 156 | 157 | # Try and Except 158 | astr='Fujairah' 159 | errosms='' 160 | try: 161 | istr=int(astr) # error 162 | except: 163 | istr=-1 164 | errosms="\nIncorrect entery" 165 | 166 | print ("Firs Try:", istr , errosms) 167 | 168 | 169 | # In[15]: 170 | 171 | 172 | # Try and Except 173 | astr='12' 174 | errosms='' 175 | try: 176 | istr=int(astr) # error 177 | except: 178 | istr=-1 179 | errosms="\nIncorrect entery" 180 | 181 | print ("Firs Try:", istr , errosms) 182 | 183 | 184 | # ### Python Program to check if a Number is Positive, Negative or Zero 185 | 186 | # In[1]: 187 | 188 | 189 | Val = float(input("Enter a number: ")) 190 | 191 | if Val > 0: 192 | print("{0} is a positive number".format(Val)) 193 | elif Val == 0: 194 | print("{0} is zero".format(Val)) 195 | else: 196 | print("{0} is negative number".format(Val)) 197 | 198 | 199 | # In[4]: 200 | 201 | 202 | # Check if a Number is Odd or Even 203 | val = int(input("Enter a number: ")) 204 | if (val % 2) == 0: 205 | print("{0} is an Even number".format(val)) 206 | else: 207 | print("{0} is an Odd number".format(val)) 208 | 209 | 210 | # In[5]: 211 | 212 | 213 | # Write a python program that displays specific messages using the IF Statement: 214 | #It should ask the user to enter the age of a person, and then using a conditional statement, 215 | #it should print one of the following messages: 216 | 217 | 218 | # In[6]: 219 | 220 | 221 | age = int(input("Enter age of a person")) 222 | if(age < 13): 223 | print("This is a child") 224 | elif(age >= 13 and age <=17): 225 | print("This is a teenager") 226 | elif(age >= 18 and age <=59): 227 | print("This is an adult") 228 | else: 229 | print("This is a senior") 230 | 231 | 232 | # In[7]: 233 | 234 | 235 | Speed = int(input("Enter your car speed")) 236 | if(Speed < 80): 237 | print("No Fines") 238 | elif(Speed >= 81 and Speed <=99): 239 | print("200 AE Fine ") 240 | elif(Speed >= 100 and Speed <=109): 241 | print("350 AE Fine ") 242 | else: 243 | print("500 AE Fine ") 244 | 245 | 246 | # In[11]: 247 | 248 | 249 | year = int(input("Enter a year: ")) 250 | if (year % 4) == 0: 251 | if (year % 100) == 0: 252 | if (year % 400) == 0: 253 | print("{0} is a leap year".format(year)) 254 | else: 255 | print("{0} is not a leap year".format(year)) 256 | else: 257 | print("{0} is a leap year".format(year)) 258 | else: 259 | print("{0} is not a leap year".format(year)) 260 | 261 | 262 | # ## Print the Fibonacci sequence 263 | 264 | # In[14]: 265 | 266 | 267 | nterms = int(input("How many terms you want? ")) 268 | # first two terms 269 | n1 = 0 270 | n2 = 1 271 | count = 2 272 | # check if the number of terms is valid 273 | if nterms <= 0: 274 | print("Plese enter a positive integer") 275 | elif nterms == 1: 276 | print("Fibonacci sequence:") 277 | print(n1) 278 | else: 279 | print("Fibonacci sequence:") 280 | print(n1,",",n2,end=', ') 281 | while count < nterms: 282 | nth = n1 + n2 283 | print(nth,end=' , ') 284 | # update values 285 | n1 = n2 286 | n2 = nth 287 | count += 1 288 | 289 | 290 | # In[2]: 291 | 292 | 293 | largest = None 294 | print ('Before:', largest) 295 | for val in [30, 45, 12, 90, 74, 15]: 296 | if largest is None or val>largest: 297 | largest = val 298 | print ("Loop", val, largest) 299 | print ("Largest", largest) 300 | 301 | -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction_Functions and Modules.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch01/Embarak _Ch01_Introduction_Functions and Modules.pdf -------------------------------------------------------------------------------- /Ch01/Embarak _Ch01_Introduction_Functions and Modules.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Functions 5 | 6 | # In[20]: 7 | 8 | 9 | def thing(): # function header {def, function name, function argumets } 10 | print ('Hello ', end='') # function body 11 | print ('World') 12 | 13 | 14 | 15 | thing() # function calling 16 | 17 | 18 | # In[13]: 19 | 20 | 21 | def print_Sms(): 22 | print ("Welcome to Python PD session") 23 | print ("Jan 2017\n") 24 | 25 | print ("\nPD Session on HCT Dubai") 26 | print_Sms() 27 | 28 | 29 | # In[18]: 30 | 31 | 32 | def sumvalues(x,y): 33 | print ('The summation of ',x,'+',y,'= ', end='') 34 | return x+y 35 | 36 | a=5 37 | b=a+2 38 | print (sumvalues(a,b) ) # Function calling 39 | 40 | 41 | # In[3]: 42 | 43 | 44 | def Details(name, mark): 45 | if mark>60: 46 | print ("Congratulation ",name," you pass the course") 47 | else: 48 | print ("Unfortunately ",name," you didn’t pass the course") 49 | 50 | 51 | # In[4]: 52 | 53 | 54 | Details("Ossama", 90) 55 | 56 | 57 | # In[5]: 58 | 59 | 60 | Details( 90,"Ossama") 61 | 62 | 63 | # In[6]: 64 | 65 | 66 | Details( mark=90, name="Ossama") 67 | 68 | 69 | # In[7]: 70 | 71 | 72 | 73 | 74 | 75 | # In[9]: 76 | 77 | 78 | def Details(name, mark): 79 | if mark>60: 80 | print ("Congratulation ",name," you pass the course") 81 | else: 82 | print ("Unfortunately ",name," you didn’t pass the course") 83 | 84 | 85 | # In[10]: 86 | 87 | 88 | Details( "Ossama") 89 | 90 | 91 | # In[11]: 92 | 93 | 94 | def Details(name, mark=0): 95 | if mark>60: 96 | print ("Congratulation ",name," you pass the course") 97 | else: 98 | print ("Unfortunately ",name," you didn’t pass the course") 99 | 100 | 101 | # In[12]: 102 | 103 | 104 | Details( "Ossama") 105 | 106 | 107 | # In[1]: 108 | 109 | 110 | max('Welcome to Egypt') 111 | 112 | 113 | # In[2]: 114 | 115 | 116 | min(3,5,8,9,100,2) 117 | 118 | 119 | # In[3]: 120 | 121 | 122 | len('Welcome to Egypt') 123 | 124 | 125 | # In[8]: 126 | 127 | 128 | mark=input("Enter your exam mark: ") 129 | mark=float(mark) 130 | if (mark>59.5): 131 | print ("Pass") 132 | else: 133 | print ("Fail") 134 | 135 | 136 | # # Convert Celsius to Fahrenheit 137 | # ## F = 1.8 C + 32 138 | 139 | # In[9]: 140 | 141 | 142 | value = input("Enter the Celsius value: ") 143 | c = int(value) 144 | f = 1.8 * (c) + 32 145 | print (c , " Celsius = ", f , "Fahrenheit") 146 | 147 | 148 | # In[2]: 149 | 150 | 151 | import random 152 | for i in range(5): 153 | x = random.random() 154 | print (x) 155 | 156 | 157 | # In[4]: 158 | 159 | 160 | import random 161 | for i in range(5): 162 | x = random.random() 163 | print (round(x,3)) 164 | 165 | 166 | # In[5]: 167 | 168 | 169 | random.randint(5, 10) 170 | 171 | 172 | # In[9]: 173 | 174 | 175 | random.randint(5, 10) 176 | 177 | 178 | # In[7]: 179 | 180 | 181 | random.randint(5, 10) 182 | 183 | 184 | # In[12]: 185 | 186 | 187 | random.randint(5, 10) 188 | 189 | 190 | # In[16]: 191 | 192 | 193 | t = [30, "Omar", 7, 10] 194 | random.choice(t) 195 | 196 | 197 | # In[17]: 198 | 199 | 200 | random.choice(t) 201 | 202 | 203 | # In[18]: 204 | 205 | 206 | random.choice(t) 207 | 208 | 209 | # In[23]: 210 | 211 | 212 | import math 213 | value = 120 214 | decibels = 10 * math.log10(value) 215 | print (decibels) 216 | 217 | 218 | # In[24]: 219 | 220 | 221 | degrees = 45 222 | radians = degrees / 360.0 * 2 * math.pi 223 | val= math.sin(radians) 224 | print (val) 225 | 226 | 227 | # In[30]: 228 | 229 | 230 | print (math.sqrt(16)) 231 | 232 | 233 | # In[34]: 234 | 235 | 236 | # Anonymous Function Definiton 237 | summation=lambda val1, val2: val1 + val2 238 | 239 | #Calling summation as a function 240 | print ("The summation of 7 + 10 = ", summation(7,10) ) 241 | 242 | 243 | # In[35]: 244 | 245 | 246 | quiz = 50 247 | def readgrade(): 248 | quiz = input("Enter your quiz mark: ") 249 | quiz = int(quiz) 250 | print ("Your quiz score is ", quiz) 251 | 252 | readgrade() 253 | print ("Your quiz score is ", quiz) 254 | 255 | 256 | # In[ ]: 257 | 258 | 259 | print ("\n******** Greeting ***********") 260 | def greeting(lang): 261 | if lang=='es': 262 | print ('Hola') 263 | elif lang=='fr': 264 | print ('Bonjour') 265 | else: 266 | print ('Hello') 267 | 268 | greeting('en') 269 | greeting('es') 270 | greeting('fr') 271 | 272 | 273 | # In[1]: 274 | 275 | 276 | def computepay(hours, rate): 277 | if hours>40: 278 | pay= 40 * rate + (rate*1.5) * (hours-40) 279 | else: 280 | pay= hours * rate 281 | return pay 282 | 283 | 284 | hours = input ('Enter Hours: ') 285 | try: 286 | hours = int(hours) 287 | except: 288 | print ("Incorrect hours number !!!!") 289 | 290 | try: 291 | rate = input ("Enter Rate: ") 292 | rate=float(rate) 293 | except: 294 | print ("Incorrect rate !!") 295 | 296 | fullpay =computepay(hours, rate) 297 | 298 | print ("Gross Pay: ", fullpay) 299 | 300 | 301 | # ## Exrcises 302 | # ### find the Highest Common Factor of two values. 303 | 304 | # In[5]: 305 | 306 | 307 | def HCF(x, y): 308 | if x > y: 309 | smaller = y 310 | else: 311 | smaller = x 312 | for i in range(1,smaller + 1): 313 | if((x % i == 0) and (y % i == 0)): 314 | HCF = i 315 | return HCF 316 | 317 | Number1 = int(input("Enter first number: ")) 318 | Number2 = int(input("Enter second number: ")) 319 | print("The Highest Common Factor of", Number1,"and", Number2,"is", HCF(Number1, Number2)) 320 | 321 | 322 | # In[6]: 323 | 324 | 325 | #Find Factorial of Number Using Recursion 326 | 327 | 328 | # In[9]: 329 | 330 | 331 | def RecurFactorial(n): 332 | if n == 1: 333 | return n 334 | else: 335 | return n*RecurFactorial(n-1) 336 | 337 | # read the value from the user 338 | Number = int(input("Enter a number: ")) 339 | 340 | # check is the number is negative 341 | if Number < 0: 342 | print("Sorry, factorial does not exist for negative numbers") 343 | elif Number == 0: 344 | print("The factorial of 0 is 1") 345 | else: 346 | print("The factorial of",Number,"is",RecurFactorial(Number)) 347 | 348 | 349 | # In[12]: 350 | 351 | 352 | def RecurFibo(n): 353 | if n <= 1: 354 | return n 355 | else: 356 | return(RecurFibo(n-1) + RecurFibo(n-2)) 357 | 358 | # read input from the user 359 | nlength = int(input("Enter your length? ")) 360 | # check if the number of terms is valid 361 | if nlength <= 0: 362 | print("Plese enter a positive integer") 363 | else: 364 | print("Fibonacci sequence:") 365 | for i in range(nlength): 366 | print(RecurFibo(i), end=' , ') 367 | 368 | 369 | # ## 4.6 CREATE PYTHON MODULES 370 | 371 | # In[6]: 372 | 373 | 374 | import addition 375 | addition.add(10,20) 376 | addition.add(30,40) 377 | 378 | 379 | # In[7]: 380 | 381 | 382 | "{1} {0}".format(x, "The") 383 | "{first} {second}".format(first="The", second=x) 384 | 385 | -------------------------------------------------------------------------------- /Ch02/Embarak _Ch02_The importance of data visualization in business.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch02/Embarak _Ch02_The importance of data visualization in business.pdf -------------------------------------------------------------------------------- /Ch02/Embarak _Ch02_The importance of data visualization in business.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[ ]: 5 | 6 | 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import pandas as pd 10 | import seaborn as sns 11 | import pygal 12 | from mayavi import mlab 13 | 14 | 15 | # In[5]: 16 | 17 | 18 | try: 19 | import matplotlib 20 | except: 21 | import pip 22 | pip.main(['install', 'matplotlib']) 23 | import matplotlib 24 | 25 | 26 | # # Matplotlib 27 | 28 | # In[23]: 29 | 30 | 31 | import numpy as np 32 | import matplotlib.pyplot as plt 33 | get_ipython().magic(u'matplotlib inline') 34 | plt.style.use('seaborn-whitegrid') 35 | 36 | X = [590,540,740,130,810,300,320,230,470,620,770,250] 37 | Y = [32,36,39,52,61,72,77,75,68,57,48,48] 38 | 39 | plt.scatter(X,Y) 40 | plt.xlim(0,1000) 41 | plt.ylim(0,100) 42 | 43 | #scatter plot color 44 | plt.scatter(X, Y, s=800, c='red', marker='+') 45 | 46 | #change axes ranges 47 | plt.xlim(0,1000) 48 | plt.ylim(0,100) 49 | 50 | #add title 51 | plt.title('Relationship Between Temperature and Iced Coffee Sales') 52 | 53 | #add x and y labels 54 | plt.xlabel('Sold Coffee') 55 | plt.ylabel('Temperature in Fahrenheit') 56 | 57 | #show plot 58 | plt.show() 59 | 60 | 61 | # In[20]: 62 | 63 | 64 | get_ipython().magic(u'matplotlib inline') 65 | import matplotlib.pyplot as plt 66 | plt.style.use('seaborn-whitegrid') 67 | import numpy as np 68 | 69 | # Create empty figure 70 | fig = plt.figure() 71 | ax = plt.axes() 72 | 73 | x = np.linspace(0, 10, 1000) 74 | ax.plot(x, np.sin(x)); 75 | 76 | plt.plot(x, np.sin(x)) 77 | plt.plot(x, np.cos(x)) 78 | plt.xlim(0, 11) 79 | plt.ylim(-2, 2) 80 | plt.axis('tight') 81 | #add title 82 | plt.title('Plotting data using sin and cos') 83 | 84 | 85 | # In[18]: 86 | 87 | 88 | plt.plot(x, np.sin(x - 0), color='blue') # specify color by name 89 | plt.plot(x, np.sin(x - 1), color='g') # short color code (rgbcmyk) 90 | plt.plot(x, np.sin(x - 2), color='0.75') # Grayscale between 0 and 1 91 | plt.plot(x, np.sin(x - 3), color='#FFDD44') # Hex code (RRGGBB from 00 to FF) 92 | plt.plot(x, np.sin(x - 4), color=(1.0,0.2,0.3)) # RGB tuple, values 0 to 1 93 | plt.plot(x, np.sin(x - 5), color='chartreuse'); # all HTML color names supported 94 | 95 | 96 | # # Seaborn 97 | 98 | # In[34]: 99 | 100 | 101 | import matplotlib.pyplot as plt 102 | get_ipython().magic(u'matplotlib inline') 103 | import numpy as np 104 | import pandas as pd 105 | import seaborn as sns 106 | plt.style.use('classic') 107 | plt.style.use('seaborn-whitegrid') 108 | 109 | # Create some data 110 | data = np.random.multivariate_normal([0, 0], [[5, 2], [2, 2]], size=2000) 111 | data = pd.DataFrame(data, columns=['x', 'y']) 112 | 113 | # Plot the data with seaborn 114 | sns.distplot(data['x']) 115 | sns.distplot(data['y']); 116 | 117 | 118 | # In[35]: 119 | 120 | 121 | for col in 'xy': 122 | sns.kdeplot(data[col], shade=True) 123 | 124 | 125 | # In[36]: 126 | 127 | 128 | sns.kdeplot(data); 129 | 130 | 131 | # In[37]: 132 | 133 | 134 | with sns.axes_style('white'): 135 | sns.jointplot("x", "y", data, kind='kde'); 136 | 137 | 138 | # In[38]: 139 | 140 | 141 | with sns.axes_style('white'): 142 | sns.jointplot("x", "y", data, kind='hex') 143 | 144 | 145 | # In[41]: 146 | 147 | 148 | sns.pairplot(data); 149 | 150 | 151 | # In[45]: 152 | 153 | 154 | sns.stripplot( x = data['x']) 155 | sns.stripplot( x = data['y']) 156 | 157 | 158 | # In[47]: 159 | 160 | 161 | # box plot per rank 162 | sns.boxplot(x = 'x', y = 'y', data=data) 163 | 164 | 165 | # In[50]: 166 | 167 | 168 | # box plot salaries 169 | sns.boxplot(x = data['y'], whis=2) 170 | 171 | 172 | # # Plotly 173 | 174 | # In[64]: 175 | 176 | 177 | from plotly import __version__ 178 | from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot 179 | init_notebook_mode(connected=True) 180 | print (__version__) 181 | 182 | 183 | # In[91]: 184 | 185 | 186 | import plotly.graph_objs as go 187 | 188 | plot([go.Scatter(x=[95, 77, 84], y=[75, 67, 56])]) 189 | 190 | 191 | # In[67]: 192 | 193 | 194 | import plotly.graph_objs as go 195 | import numpy as np 196 | 197 | x = np.random.randn(2000) 198 | y = np.random.randn(2000) 199 | iplot([go.Histogram2dContour(x=x, y=y, contours=dict(coloring='heatmap')), 200 | go.Scatter(x=x, y=y, mode='markers', marker=dict(color='white', size=3, opacity=0.3))], show_link=False) 201 | 202 | 203 | # In[90]: 204 | 205 | 206 | import plotly.offline as offline 207 | import plotly.graph_objs as go 208 | 209 | offline.plot({'data': [{'y': [14, 22, 30, 44]}], 210 | 'layout': {'title': 'Offline Plotly', 'font': dict(size=16)}}, image='png') 211 | 212 | 213 | # In[88]: 214 | 215 | 216 | import plotly.plotly as py 217 | import plotly.graph_objs as go 218 | import plotly 219 | import plotly.offline as offline 220 | 221 | 222 | df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/school_earnings.csv") 223 | 224 | 225 | schools = df.School 226 | 227 | 228 | data = [go.Bar(x=df.School,y=df.Gap)] 229 | 230 | py.iplot(data, filename='jupyter-basic_bar') 231 | 232 | 233 | # # geoplotlib 234 | 235 | # In[ ]: 236 | 237 | 238 | import geoplotlib 239 | from geoplotlib.utils import read_csv 240 | 241 | data = read_csv('bus.csv') 242 | geoplotlib.dot(data) 243 | geoplotlib.show() 244 | 245 | 246 | # # Direct plotting 247 | 248 | # In[116]: 249 | 250 | 251 | import pandas as pd 252 | import numpy as np 253 | 254 | df = pd.DataFrame(np.random.randn(200,6),index=pd.date_range('1/9/2009', 255 | periods=200), columns=list('ABCDEF')) 256 | 257 | 258 | df.plot(figsize=(20, 10)).legend(bbox_to_anchor=(1, 1)) 259 | #Shape of passed values is (10, 200), indices imply (4, 10) 260 | 261 | 262 | # In[123]: 263 | 264 | 265 | import pandas as pd 266 | import numpy as np 267 | df = pd.DataFrame(np.random.rand(20,5),columns=['Jan','Feb','March','April', 'May']) 268 | df.plot.bar(figsize=(20, 10)).legend(bbox_to_anchor=(1.1, 1)) 269 | 270 | 271 | # In[124]: 272 | 273 | 274 | import pandas as pd 275 | df = pd.DataFrame(np.random.rand(20,5),columns=['Jan','Feb','March','April', 'May']) 276 | df.plot.bar(stacked=True, figsize=(20, 10)).legend(bbox_to_anchor=(1.1, 1)) 277 | 278 | 279 | # In[126]: 280 | 281 | 282 | import pandas as pd 283 | df = pd.DataFrame(np.random.rand(20,5),columns=['Jan','Feb','March','April', 'May']) 284 | df.plot.barh(stacked=True, figsize=(20, 10)).legend(bbox_to_anchor=(1.1, 1)) 285 | 286 | 287 | # In[131]: 288 | 289 | 290 | import pandas as pd 291 | df = pd.DataFrame(np.random.rand(20,5),columns=['Jan','Feb','March','April', 'May']) 292 | df.plot.hist(bins= 20, figsize=(10, 8)).legend(bbox_to_anchor=(1.2, 1)) 293 | 294 | 295 | # In[139]: 296 | 297 | 298 | import pandas as pd 299 | import numpy as np 300 | 301 | df=pd.DataFrame({'April':np.random.randn(1000)+1,'May':np.random.randn(1000),'June': 302 | np.random.randn(1000) - 1}, columns=['April', 'May', 'June']) 303 | 304 | df.hist(bins=20) 305 | 306 | 307 | # In[140]: 308 | 309 | 310 | import pandas as pd 311 | import numpy as np 312 | df = pd.DataFrame(np.random.rand(20,5),columns=['Jan','Feb','March','April', 'May']) 313 | df.plot.box() 314 | 315 | 316 | # In[145]: 317 | 318 | 319 | import pandas as pd 320 | import numpy as np 321 | 322 | df = pd.DataFrame(np.random.rand(20,5),columns=['Jan','Feb','March','April', 'May']) 323 | df.plot.area(figsize=(6, 4)).legend(bbox_to_anchor=(1.3, 1)) 324 | 325 | 326 | # In[150]: 327 | 328 | 329 | import pandas as pd 330 | import numpy as np 331 | df = pd.DataFrame(np.random.rand(20,5),columns=['Jan','Feb','March','April', 'May']) 332 | df.plot.scatter(x='Feb', y='Jan', title='Temprature over two months ') 333 | 334 | 335 | # In[155]: 336 | 337 | 338 | import pandas as pd 339 | import numpy as np 340 | 341 | df = pd.DataFrame(10 * np.random.rand(5), index=['Jan','Feb','March','April', 'May'], columns=['Month']) 342 | df.plot.pie(subplots=True) 343 | 344 | 345 | # # Exercise 346 | 347 | # In[14]: 348 | 349 | 350 | import pandas as pd 351 | import numpy as np 352 | import matplotlib.pyplot as plt 353 | 354 | salesMen = ['Ahmed', 'Omar', 'Ali', 'Ziad', 'Salwa', 'Lila'] 355 | Mobile_Sales = [2540, 1370, 1320, 2000, 2100, 2150] 356 | TV_Sales = [2200, 1900, 2150, 1850, 1770, 2000] 357 | 358 | df = pd.DataFrame() 359 | df ['Name'] =salesMen 360 | df ['Mobile_Sales'] = Mobile_Sales 361 | df ['TV_Sales'] = TV_Sales 362 | df.set_index("Name",drop=True,inplace=True) 363 | 364 | 365 | # In[15]: 366 | 367 | 368 | df 369 | 370 | 371 | # In[16]: 372 | 373 | 374 | df.plot.bar( figsize=(20, 10), rot=0).legend(bbox_to_anchor=(1.1, 1)) 375 | plt.xlabel('Salesmen') 376 | plt.ylabel('Sales') 377 | plt.title('Sales Volume for two salesmen in \nJanuray and April 2017') 378 | plt.show() 379 | 380 | 381 | # In[17]: 382 | 383 | 384 | import pandas as pd 385 | import numpy as np 386 | import matplotlib.pyplot as plt 387 | 388 | salesMen = ['Ahmed', 'Omar', 'Ali', 'Ziad', 'Salwa', 'Lila'] 389 | Mobile_Sales = [2540, 1370, 1320, 2000, 2100, 2150] 390 | TV_Sales = [2200, 1900, 2150, 1850, 1770, 2000] 391 | 392 | df = pd.DataFrame() 393 | df ['Name'] =salesMen 394 | df ['Mobile_Sales'] = Mobile_Sales 395 | df ['TV_Sales'] = TV_Sales 396 | df.set_index("Name",drop=True,inplace=True) 397 | 398 | 399 | df.plot.pie(subplots=True) 400 | 401 | 402 | # In[18]: 403 | 404 | 405 | df.plot.box() 406 | 407 | 408 | # In[19]: 409 | 410 | 411 | df.plot.area(figsize=(6, 4)).legend(bbox_to_anchor=(1.3, 1)) 412 | 413 | 414 | # In[20]: 415 | 416 | 417 | df.plot.bar(stacked=True, figsize=(20, 10)).legend(bbox_to_anchor=(1.1, 1)) 418 | 419 | -------------------------------------------------------------------------------- /Ch03/Embarak _Ch03_Data Collections Structure .pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch03/Embarak _Ch03_Data Collections Structure .pdf -------------------------------------------------------------------------------- /Ch03/Embarak _Ch03_Data Collections Structure .py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Chapter : Collections 5 | 6 | # In[1]: 7 | 8 | 9 | # Create List 10 | List1 = [1, 24, 76] 11 | print (List1) 12 | 13 | colors=['red', 'yellow', 'blue'] 14 | print (colors) 15 | 16 | mix=['red', 24, 98.6] 17 | print (mix) 18 | 19 | nested= [ 1, [5, 6], 7] 20 | print (nested) 21 | 22 | print ([]) 23 | 24 | 25 | # In[9]: 26 | 27 | 28 | list1 = ['Egypt', 'chemistry', 2017, 2018]; 29 | list2 = [1, 2, 3, [4, 5] ]; 30 | list3 = ["a", 3.7, '330', "Omar"] 31 | 32 | print (list1[2]) 33 | print (list2 [3:]) 34 | print (list3 [-3:-1]) 35 | print (list3[-3]) 36 | 37 | 38 | # In[50]: 39 | 40 | 41 | courses=["OOP","Networking","MIS","Project"] 42 | students=["Ahmed", "Ali", "Salim", "Abdullah", "Salwa"] 43 | OOP_marks = [65, 85, 92] 44 | 45 | OOP_marks.append(50) # Add new element 46 | OOP_marks.append(77)# Add new element 47 | print (OOP_marks[ : ]) # Print list before updateing 48 | 49 | OOP_marks[0]=70 # update new element 50 | OOP_marks[1]=45 # update new element 51 | list1 = [88, 93] 52 | OOP_marks.extend(list1) # extend list with another list 53 | print (OOP_marks[ : ]) # Print list after updateing 54 | 55 | 56 | # In[28]: 57 | 58 | 59 | 60 | 61 | 62 | # In[48]: 63 | 64 | 65 | OOP_marks = [70, 45, 92, 50, 77, 45] 66 | print (OOP_marks) 67 | 68 | del OOP_marks[0] # delete an element using del 69 | print (OOP_marks) 70 | 71 | OOP_marks.remove (45) # remove an element using remove() method 72 | print (OOP_marks) 73 | 74 | 75 | OOP_marks.pop (2) # remove an element using remove() method 76 | print (OOP_marks) 77 | 78 | 79 | # In[42]: 80 | 81 | 82 | len([5, "Omar", 3]) # find the list length. 83 | [3, 4, 1] + ["0", 5, 6] # concatenate lists. 84 | ['Hi!'] * 4 # repeate an element in a list. 85 | 3 in [1, 2, 3] # check if element in a list 86 | for x in [1, 2, 3]: print (x) # traverse list elements 87 | 88 | 89 | # In[46]: 90 | 91 | 92 | print (len([5, "Omar", 3])) # find the list length. 93 | print ([3, 4, 1] + ["Omar", 5, 6]) # concatenate lists. 94 | print (['Eg!'] * 4) # repeate an element in a list. 95 | print (3 in [1, 2, 3]) # check if element in a list 96 | for x in [1, 2, 3]: print (x, end=' ') # traverse list elements 97 | 98 | 99 | # In[51]: 100 | 101 | 102 | #Built-in Functions and Lists 103 | tickets = [3, 41, 12, 9, 74, 15] 104 | print (tickets) 105 | print (len(tickets)) 106 | print (max(tickets)) 107 | print (min(tickets)) 108 | print (sum(tickets)) 109 | print (sum(tickets)/len(tickets)) 110 | 111 | 112 | # In[58]: 113 | 114 | 115 | #List sorting and Traversing 116 | seq=(41, 12, 9, 74, 3, 15) # use sequence for creating a list 117 | tickets=list(seq) 118 | 119 | print (tickets) 120 | tickets.sort() 121 | print (tickets) 122 | 123 | print ("\nSorted list elements ") 124 | for ticket in tickets: 125 | print (ticket) 126 | 127 | 128 | # ## LISTS AND STRINGS 129 | 130 | # In[63]: 131 | 132 | 133 | # convert string to a list of characters 134 | Word = 'Egypt' 135 | List1 = list(Word) 136 | print (List1) 137 | 138 | 139 | # In[70]: 140 | 141 | 142 | # we can break a string into words using the split method 143 | Greeting= 'Welcome to Egypt' 144 | List2 =Greeting.split() 145 | print (List2) 146 | print (List2[2]) 147 | 148 | 149 | # In[69]: 150 | 151 | 152 | # use the delimiter 153 | Greeting= 'Welcome-to-Egypt' 154 | List2 =Greeting.split("-") 155 | print (List2) 156 | 157 | Greeting= 'Welcome-to-Egypt' 158 | delimiter='-' 159 | List2 =Greeting.split(delimiter) 160 | print (List2) 161 | 162 | 163 | # In[73]: 164 | 165 | 166 | List1 = ['Welcome', 'to', 'Egypt'] 167 | delimiter = ' ' 168 | delimiter.join(List1) 169 | 170 | 171 | # In[74]: 172 | 173 | 174 | List1 = ['Welcome', 'to', 'Egypt'] 175 | delimiter = '-' 176 | delimiter.join(List1) 177 | 178 | 179 | # In[105]: 180 | 181 | 182 | filesdata="From oembarak@hct.ac.ae Sat Jan 5 09:14:16 2016 mak.jon@ec.ac.ae Sat Jan 5 09:14:16 2011 From ossama.embarak@ar.ac.eg Sat Jan 5 09:14:16 2010 From usa.mak@gmail.com Jan 5 09:14:16 2015" 183 | #print (filesdata) 184 | for line in filesdata: 185 | #line = line.rstrip() 186 | if not line.startswith('From ') : continue 187 | words = line.split() 188 | print (words[2]) 189 | 190 | 191 | # In[117]: 192 | 193 | 194 | a = [1, 2, 3] 195 | b = a 196 | print (a) 197 | print (b) 198 | 199 | 200 | # In[118]: 201 | 202 | 203 | a.append(77) 204 | print (a) 205 | print (b) 206 | 207 | 208 | # In[119]: 209 | 210 | 211 | b is a 212 | 213 | 214 | # In[120]: 215 | 216 | 217 | a = [1, 2, 3] 218 | b = [1, 2, 3] 219 | print (a) 220 | print (b) 221 | 222 | 223 | # In[121]: 224 | 225 | 226 | a.append(77) 227 | print (a) 228 | print (b) 229 | 230 | 231 | # In[122]: 232 | 233 | 234 | b is a 235 | 236 | 237 | # In[124]: 238 | 239 | 240 | Students =["Ahmed", "Ali", "Salim", "Abdullah", "Salwa"] 241 | def displaynames (x): 242 | for name in x: 243 | print (name) 244 | 245 | displaynames(Students) # Call the function displaynames 246 | 247 | 248 | # # Dictionaries 249 | 250 | # In[36]: 251 | 252 | 253 | Prices = {"Honda":40000, "Suzuki":50000, "Mercedes":85000, "Nissan":35000, "Mitsubishi":43000 } 254 | print (Prices) 255 | 256 | 257 | # In[37]: 258 | 259 | 260 | Staff_Salary = { 'Omar Ahmed' : 30000 , 'Ali Ziad' : 24000, 'Ossama Hashim': 25000, 'Majid Hatem':10000} 261 | print(Staff_Salary) 262 | STDMarks={"Salwa Ahmed":50, "Abdullah Mohamed":80, "Sultan Ghanim":90} 263 | print(STDMarks) 264 | 265 | 266 | # In[38]: 267 | 268 | 269 | STDMarks = dict() 270 | STDMarks['Salwa Ahmed']=50 271 | STDMarks['Abdullah Mohamed']=80 272 | STDMarks['Sultan Ghanim']=90 273 | print (STDMarks) 274 | 275 | 276 | # In[39]: 277 | 278 | 279 | STDMarks={"Salwa Ahmed":50, "Abdullah Mohamed":80, "Sultan Ghanim":90} 280 | STDMarks['Salwa Ahmed'] = 85 # update current value of the key 'Salwa Ahmed' 281 | STDMarks['Omar Majid'] = 74 # Add a new item to the dictionary 282 | print (STDMarks) 283 | 284 | 285 | # In[40]: 286 | 287 | 288 | STDMarks={"Salwa Ahmed":50, "Abdullah Mohamed":80, "Sultan Ghanim":90} 289 | print (STDMarks) 290 | del STDMarks['Abdullah Mohamed'] # remove entry with key 'Abdullah Mohamed' 291 | print (STDMarks) 292 | STDMarks.clear() # remove all entries in STDMarks dictionary 293 | print (STDMarks) 294 | del STDMarks # delete entire dictionary 295 | 296 | 297 | # In[2]: 298 | 299 | 300 | Staff_Salary = { 'Omar Ahmed' : 30000 , 'Ali Ziad' : 24000, 'Ossama Hashim': 25000, 'Majid Hatem':10000} 301 | print('Salary package for Ossama Hashim is ', end='') 302 | print(Staff_Salary['Ossama Hashim']) # access specific dictionary element 303 | 304 | 305 | # In[3]: 306 | 307 | 308 | # Define a function to return salary after dicount tax 5% 309 | def Netsalary (salary): 310 | return salary - (salary * 0.05) # also could be retunr salary *0.95 311 | 312 | #iterate all elements in a dcitionary 313 | print ("Name " , '\t', "Net Salary" ) 314 | for key, value in Staff_Salary.items(): 315 | print (key , '\t', Netsalary(value)) 316 | 317 | 318 | # In[43]: 319 | 320 | 321 | Staff_Salary = { 'Omar Ahmed' : 30000 , 'Ali Ziad' : 24000, 'Ossama Hashim': 25000, 'Majid Hatem':10000} 322 | STDMarks={"Salwa Ahmed":50, "Abdullah Mohamed":80, "Sultan Ghanim":90} 323 | 324 | 325 | # In[52]: 326 | 327 | 328 | def cmp(a, b): 329 | for key, value in a.items(): 330 | for key1, value1 in b.items(): 331 | return (key >key1) - (key < key1) 332 | 333 | 334 | # In[54]: 335 | 336 | 337 | print (cmp(STDMarks,Staff_Salary) ) 338 | print (cmp(STDMarks,STDMarks) ) 339 | print (len(STDMarks) ) 340 | print (str(STDMarks) ) 341 | print (type(STDMarks) ) 342 | 343 | 344 | # In[ ]: 345 | 346 | 347 | 348 | 349 | 350 | # In[71]: 351 | 352 | 353 | Staff_Salary = { 'Omar Ahmed' : 30000 , 'Ali Ziad' : 24000, 'Ossama Hashim': 25000, 'Majid Hatem':10000} 354 | STDMarks={"Salwa Ahmed":50, "Abdullah Mohamed":80, "Sultan Ghanim":90} 355 | dic3 = Staff_Salary.copy() 356 | Staff_Salary.clear() # clear all elements in Staff_Salary dictionary 357 | print (Staff_Salary) 358 | print (dic3) 359 | 360 | dict1= dict() 361 | sequence=('Id' , 'Number' , 'Email') 362 | print (dict1.fromkeys(sequence)) 363 | print (dict1.fromkeys(sequence, '####')) 364 | 365 | 366 | # In[89]: 367 | 368 | 369 | Staff_Salary = { 'Omar Ahmed' : 30000 , 'Ali Ziad' : 24000, 'Ossama Hashim': 25000, 'Majid Hatem':10000} 370 | STDMarks={"Salwa Ahmed":50, "Abdullah Mohamed":80, "Sultan Ghanim":90} 371 | print (Staff_Salary.get('Ali Ziad') ) 372 | print (STDMarks.items()) 373 | print (Staff_Salary.keys()) 374 | 375 | print() 376 | STDMarks.setdefault('Ali Ziad') 377 | print (STDMarks) 378 | print (STDMarks.update(dict1)) 379 | print (STDMarks) 380 | 381 | 382 | # In[96]: 383 | 384 | 385 | Staff_Salary = { 'Omar Ahmed' : 30000 , 'Ali Ziad' : 24000, 'Ossama Hashim': 25000, 'Majid Hatem':10000} 386 | print ("\nSorted by key") 387 | for k in sorted(Staff_Salary): 388 | print (k, Staff_Salary[k]) 389 | 390 | 391 | # In[97]: 392 | 393 | 394 | Staff_Salary = { 'Omar Ahmed' : 30000 , 'Ali Ziad' : 24000, 'Ossama Hashim': 25000, 'Majid Hatem':10000} 395 | print ("\nSorted by value") 396 | for w in sorted(Staff_Salary, key=Staff_Salary.get, reverse=True): 397 | print (w, Staff_Salary[w]) 398 | 399 | 400 | # # Tuples 401 | 402 | # In[1]: 403 | 404 | 405 | Names = ('Omar', 'Ali', 'Bahaa') 406 | Marks = ( 75, 65, 95 ) 407 | 408 | print (Names[2]) 409 | print (Marks) 410 | print (max(Marks)) 411 | 412 | 413 | # In[2]: 414 | 415 | 416 | for name in Names: 417 | print (name) 418 | 419 | 420 | # In[3]: 421 | 422 | 423 | Marks[1]=66 424 | 425 | 426 | # In[4]: 427 | 428 | 429 | Names = ( 'Omar Ahmed', 'Ali Ziad' , 'Ossama Hashim', 'Majid Hatem') 430 | print (Names) 431 | Names.sort(reverse=True) 432 | print (Names) 433 | 434 | 435 | # In[9]: 436 | 437 | 438 | MarksCIS=(70,85,90) 439 | print (MarksCIS) 440 | 441 | 442 | 443 | # In[14]: 444 | 445 | 446 | 447 | MarksCIS.sort(key=lambda x: int(x[0])) 448 | 449 | 450 | # In[1]: 451 | 452 | 453 | import operator 454 | MarksCIS = [(88,65),(70,90,85), (55,88,44)] 455 | print (MarksCIS) # original tuples 456 | print (sorted(MarksCIS)) # direct sorting 457 | 458 | 459 | # In[2]: 460 | 461 | 462 | print (MarksCIS) # original tuples 463 | #create a new sorted tuple 464 | MarksCIS2 = sorted(MarksCIS, key=lambda x: (x[0], x[1])) 465 | print (MarksCIS2) 466 | 467 | 468 | # In[3]: 469 | 470 | 471 | print (MarksCIS) # original tuples 472 | MarksCIS.sort(key=lambda x: (x[0], x[1])) # sort in tuple 473 | print (MarksCIS) 474 | 475 | 476 | # In[4]: 477 | 478 | 479 | MarksCIS = (70, 85, 55) 480 | MarksCIN = (90, 75, 60) 481 | print ("The third mark in CIS is ", MarksCIS[2]) 482 | print ("The third mark in CIN is ", MarksCIN[2]) 483 | 484 | 485 | # In[5]: 486 | 487 | 488 | MarksCIN = (90, 75, 60) 489 | print (MarksCIN) 490 | del MarksCIN 491 | print (MarksCIN) 492 | 493 | 494 | # In[6]: 495 | 496 | 497 | MarksCIS = (88, 65, 70,90,85,45,78,95,55) 498 | print ("\nForward slicing") 499 | print (MarksCIS[1:4]) 500 | print (MarksCIS[:3]) 501 | print (MarksCIS[6:]) 502 | print (MarksCIS[4:6]) 503 | 504 | print ("\nBackward slicing") 505 | print (MarksCIS[-4:-2]) 506 | print (MarksCIS[-3]) 507 | print (MarksCIS[-3:]) 508 | print (MarksCIS[ :-3]) 509 | 510 | 511 | # In[8]: 512 | 513 | 514 | import operator 515 | MarksCIS = [(88,65),(70,90,85), (55,88,44)] 516 | print (MarksCIS) # original tuples 517 | print (sorted(MarksCIS)) # direct sorting 518 | 519 | 520 | MarksCIS2 = sorted(MarksCIS, key=lambda x: (x[0], x[1])) 521 | 522 | print (MarksCIS2) 523 | 524 | MarksCIS.sort(key=lambda x: (x[0], x[1])) # sorts in place 525 | print (MarksCIS) 526 | 527 | 528 | # In[ ]: 529 | 530 | 531 | students = [ 532 | ('John', 'A', 2), 533 | ('Zoro', 'C', 1), 534 | ('Dave', 'B', 3), 535 | ] 536 | print(students) 537 | 538 | 539 | # In[5]: 540 | 541 | 542 | MarksCIS=(70,85,55) 543 | MarksCIN=(90,75,60) 544 | Combind=MarksCIS+MarksCIN 545 | print (Combind) 546 | 547 | 548 | # # a series from ndarray with labels. 549 | 550 | # In[8]: 551 | 552 | 553 | import numpy as np 554 | import pandas as pd 555 | Series1 = pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']) 556 | print(Series1) 557 | print(Series1.index) 558 | 559 | 560 | # In[9]: 561 | 562 | 563 | import numpy as np 564 | import pandas as pd 565 | Series2 = pd.Series(np.random.randn(4)) 566 | print(Series2) 567 | print(Series2.index) 568 | 569 | 570 | # In[10]: 571 | 572 | 573 | print (" \n Series slicing ") 574 | print (Series1[:3]) 575 | print ("\nIndex accessing") 576 | print (Series1[[3,1,0]]) 577 | print ("\nSingle index") 578 | x = Series1[0] 579 | print (x) 580 | 581 | 582 | # In[19]: 583 | 584 | 585 | print ("\nSeries Sample operations") 586 | print ("\n Series values greater than the mean: %.4f" % Series1.mean()) 587 | print (Series1 [Series1> Series1.mean()]) 588 | print ("\n Series values greater than the Meadian:%.4f" % Series1.median()) 589 | print (Series1 [Series1> Series1.median()]) 590 | print ("\nExponential value ") 591 | Series1Exp = np.exp(Series1) 592 | print (Series1Exp) 593 | 594 | 595 | # In[12]: 596 | 597 | 598 | dict = {'m' : 2, 'y' : 2018, 'd' : 'Sunday'} 599 | print ("\nSeries of non declared index") 600 | SeriesDict1 = pd.Series(dict) 601 | print(SeriesDict1) 602 | 603 | print ("\nSeries of declared index") 604 | SeriesDict2 = pd.Series(dict, index=['y', 'm', 'd', 's']) 605 | print(SeriesDict2) 606 | 607 | 608 | # In[13]: 609 | 610 | 611 | print ("\nUse the get and set methods to access" 612 | "a series values by index label\n") 613 | SeriesDict2 = pd.Series(dict, index=['y', 'm', 'd', 's']) 614 | print (SeriesDict2['y']) # Display the year 615 | SeriesDict2['y']=1999 # change the year vlaue 616 | print (SeriesDict2) # Display all dictionary values 617 | print (SeriesDict2.get('y')) # get specific value by its key 618 | 619 | 620 | # In[14]: 621 | 622 | 623 | print ("\n CREATE SERIES FORM SCALAR VALUE ") 624 | Scl = pd.Series(8., index=['a', 'b', 'c', 'd']) 625 | print (Scl) 626 | 627 | 628 | # In[18]: 629 | 630 | 631 | SerX = pd.Series([1,2,3,4], index=['a', 'b', 'c', 'd']) 632 | print ("Addition"); 633 | print( SerX + SerX) 634 | print ("Addition with non matched labels"); 635 | print (SerX[1:] + SerX[:-1]) 636 | print ("Multiplication"); 637 | print (SerX * SerX) 638 | print ("Expponential"); 639 | print (np.exp(SerX)) 640 | 641 | 642 | # In[17]: 643 | 644 | 645 | std = pd.Series([77,89,65,90], name='StudentsMarks') 646 | print (std.name) 647 | std = std.rename("Marks") 648 | print (std.name) 649 | 650 | 651 | # In[4]: 652 | 653 | 654 | # read data from file and add it to dictionary for processing 655 | handle = open("Egypt.txt") 656 | text = handle.read() 657 | words = text.split() 658 | #print (words) 659 | counts = dict() 660 | for word in words: 661 | counts[word] = counts.get(word,0) + 1 662 | 663 | print (counts) 664 | bigcount = None 665 | bigword = None 666 | for word,count in counts.items(): 667 | if bigcount is None or count > bigcount: 668 | bigword = word 669 | bigcount = count 670 | print ("\n bigword and bigcount") 671 | print (bigword, bigcount) 672 | 673 | 674 | # In[14]: 675 | 676 | 677 | print ((100, 1, 2) > (150, 1, 2)) 678 | print ((0, 1, 120) < (0, 3, 4)) 679 | print (( 'Javed', 'Salwa' ) > ('Omar', 'Sam')) 680 | print (( 'Khalid', 'Ahmed') < ('Ziad', 'Majid')) 681 | 682 | 683 | # In[5]: 684 | 685 | 686 | import pandas as pd 687 | dict1 = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 688 | 'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])} 689 | df = pd.DataFrame(dict1) 690 | df 691 | 692 | 693 | # In[6]: 694 | 695 | 696 | # set index for DataFrame 697 | pd.DataFrame(dict1, index=['d', 'b', 'a']) 698 | 699 | 700 | # In[8]: 701 | 702 | 703 | # Control the labels appearance of the DataFrame 704 | pd.DataFrame(dict1, index=['d', 'b', 'a'], columns=['two', 'three', 'one']) 705 | 706 | 707 | # In[11]: 708 | 709 | 710 | # without index 711 | ndarrdict = {'one' : [1., 2., 3., 4.], 712 | 'two' : [4., 3., 2., 1.]} 713 | pd.DataFrame(ndarrdict) 714 | 715 | 716 | # In[12]: 717 | 718 | 719 | # Assign index 720 | pd.DataFrame(ndarrdict, index=['a', 'b', 'c', 'd']) 721 | 722 | 723 | # In[18]: 724 | 725 | 726 | import pandas as pd 727 | import numpy as np 728 | data = np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')]) 729 | data[:] = [(1,2.,'Hello'), (2,3.,"World")] 730 | pd.DataFrame(data) 731 | 732 | 733 | # In[16]: 734 | 735 | 736 | pd.DataFrame(data, index=['First', 'Second']) 737 | 738 | 739 | # In[17]: 740 | 741 | 742 | pd.DataFrame(data, columns=['C', 'A', 'B']) 743 | 744 | 745 | # In[19]: 746 | 747 | 748 | data2 = [{'A': 1, 'B': 2}, {'A': 5, 'B': 10, 'C': 20}] 749 | pd.DataFrame(data2) 750 | 751 | 752 | # In[20]: 753 | 754 | 755 | pd.DataFrame(data2, index=['First', 'Second']) 756 | 757 | 758 | # In[21]: 759 | 760 | 761 | pd.DataFrame(data2, columns=['A', 'B']) 762 | 763 | 764 | # In[22]: 765 | 766 | 767 | pd.DataFrame({('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2}, 768 | ('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4}, 769 | ('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6}, 770 | ('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8}, 771 | ('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10}}) 772 | 773 | 774 | # In[25]: 775 | 776 | 777 | # DATAFRAME COLUMN SELECTION, ADDITION, DELETION 778 | ndarrdict = {'one' : [1., 2., 3., 4.], 779 | 'two' : [4., 3., 2., 1.]} 780 | df = pd.DataFrame(ndarrdict, index=['a', 'b', 'c', 'd']) 781 | df 782 | 783 | 784 | # In[26]: 785 | 786 | 787 | df['three'] = df['one'] * df['two'] # Add column 788 | df['flag'] = df['one'] > 2 # Add column 789 | df 790 | 791 | 792 | # In[27]: 793 | 794 | 795 | df['Filler'] = 'HCT' 796 | df['Slic'] = df['one'][:2] 797 | df 798 | 799 | 800 | # In[28]: 801 | 802 | 803 | # Delet columns 804 | del df['two'] 805 | Three = df.pop('three') 806 | df 807 | 808 | 809 | # In[29]: 810 | 811 | 812 | df.insert(1, 'bar', df['one']) 813 | df 814 | 815 | 816 | # In[54]: 817 | 818 | 819 | import numpy as np 820 | import pandas as pd 821 | df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) 822 | df = df.assign(C=lambda x: x['A'] + x['B']) 823 | df = df.assign( D=lambda x: x['A'] + x['C']) 824 | df 825 | 826 | 827 | # In[55]: 828 | 829 | 830 | df = df.assign( A=lambda x: x['A'] *2) 831 | df 832 | 833 | 834 | # In[56]: 835 | 836 | 837 | df 838 | 839 | 840 | # In[61]: 841 | 842 | 843 | df['B'] 844 | 845 | 846 | # In[59]: 847 | 848 | 849 | df.iloc[2] 850 | 851 | 852 | # In[62]: 853 | 854 | 855 | df[1:] 856 | 857 | 858 | # In[65]: 859 | 860 | 861 | df[df['C']>7] 862 | 863 | 864 | # In[69]: 865 | 866 | 867 | df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) 868 | df2 = pd.DataFrame({"A": [7, 4, 6], "B": [10, 4, 15]}) 869 | print (df1) 870 | print() 871 | print(df2) 872 | 873 | 874 | # In[70]: 875 | 876 | 877 | df1+df2 878 | 879 | 880 | # In[71]: 881 | 882 | 883 | df1-df2 884 | 885 | 886 | # In[72]: 887 | 888 | 889 | df2 - df1.iloc[2] 890 | 891 | 892 | # In[75]: 893 | 894 | 895 | df2 896 | 897 | 898 | # In[78]: 899 | 900 | 901 | df2*2+1 902 | 903 | 904 | # In[3]: 905 | 906 | 907 | import pandas as pd 908 | import numpy as np 909 | P1 = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], 910 | major_axis=pd.date_range('10/05/2018', periods=5), 911 | minor_axis=['A', 'B', 'C', 'D']) 912 | P1 913 | 914 | 915 | # In[4]: 916 | 917 | 918 | data = {'Item1' : pd.DataFrame(np.random.randn(4, 3)), 919 | 'Item2' : pd.DataFrame(np.random.randn(4, 2))} 920 | P2 = pd.Panel(data) 921 | P2 922 | 923 | 924 | # In[5]: 925 | 926 | 927 | p3 = pd.Panel.from_dict(data, orient='minor') 928 | p3 929 | 930 | 931 | # In[26]: 932 | 933 | 934 | df = pd.DataFrame({'Item': ['TV', 'Mobile', 'Laptop'], 935 | 'Price': np.random.randn(3)**2*1000}) 936 | df 937 | 938 | 939 | # In[29]: 940 | 941 | 942 | data = {'stock1': df, 'stock2': df} 943 | panel = pd.Panel.from_dict(data, orient='minor') 944 | panel['Item'] 945 | 946 | 947 | # In[30]: 948 | 949 | 950 | wp['Price'] 951 | 952 | 953 | # In[33]: 954 | 955 | 956 | import pandas as pd 957 | import numpy as np 958 | P1 = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], 959 | major_axis=pd.date_range('10/05/2018', periods=5), 960 | minor_axis=['A', 'B', 'C', 'D']) 961 | P1['Item1'] 962 | 963 | 964 | # In[34]: 965 | 966 | 967 | P1.major_xs(P1.major_axis[2]) 968 | 969 | 970 | # In[35]: 971 | 972 | 973 | P1.minor_axis 974 | 975 | 976 | # In[36]: 977 | 978 | 979 | P1.minor_xs('C') 980 | 981 | 982 | # In[28]: 983 | 984 | 985 | data = {'Omar': 2.5, 'Ali': 3.5, 'Osama': 3.0} 986 | pd.Series(data) 987 | 988 | 989 | # In[30]: 990 | 991 | 992 | pd.Series(data, index = ['Omar', 'Ali', 'Osama']) 993 | 994 | 995 | # In[31]: 996 | 997 | 998 | data = {'Omar': [90, 50, 89], 999 | 'Ali': [78, 75, 73], 1000 | 'Osama': [67, 85, 80]} 1001 | df1 = pd.DataFrame (data, index= ['Course1', 'Course2', 'Course3']) 1002 | df1 1003 | 1004 | 1005 | # In[32]: 1006 | 1007 | 1008 | df1['Omar'] 1009 | 1010 | 1011 | # In[33]: 1012 | 1013 | 1014 | df1['Mean'] = (df1['Ali'] + df1['Omar'] + df1['Osama'])/3 1015 | df1 1016 | 1017 | -------------------------------------------------------------------------------- /Ch04/Embarak _Ch04_File IO Processing _ Regular Expressions .pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch04/Embarak _Ch04_File IO Processing _ Regular Expressions .pdf -------------------------------------------------------------------------------- /Ch04/Embarak _Ch04_File IO Processing _ Regular Expressions .py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Ch04 File processing and Regular expressions 5 | 6 | # # File processing 7 | 8 | # In[2]: 9 | 10 | 11 | Name = input ("Enter your name: ") 12 | Name 13 | 14 | 15 | # In[3]: 16 | 17 | 18 | Mark = input("Enter your mark: ") 19 | Mark = float(Mark) 20 | 21 | 22 | # In[4]: 23 | 24 | 25 | print ("Welcome to Grading System \nHCT 2018") 26 | print ("\nCampus\t Name\t\tMark\tGrade") 27 | if (Mark>=85): 28 | Grade="B+" 29 | print ("FMC\t", Name,"\t",Mark,"\t", Grade) 30 | 31 | 32 | # ### Files attributes 33 | 34 | # In[41]: 35 | 36 | 37 | # Open a file and find its attributes 38 | Filehndl = open("Egypt.txt", "r") 39 | print ("Name of the file: ", Filehndl.name) 40 | print ("Closed or not : ", Filehndl.closed) 41 | print ("Opening mode : ", Filehndl.mode) 42 | 43 | 44 | # ### Open and close files 45 | 46 | # In[40]: 47 | 48 | 49 | Filehndl = open("Egypt.txt", "r") 50 | print ("Closed or not : ", Filehndl.closed) 51 | Filehndl.close() 52 | print ("Closed or not : ", Filehndl.closed) 53 | 54 | 55 | # In[39]: 56 | 57 | 58 | Filehndl = open("Egypt.txt", "w+") 59 | Filehndl.write( "Python Processing Files\nMay 2018!!\n") 60 | 61 | # Close opend file 62 | Filehndl.close() 63 | 64 | 65 | # ### Rename and delete files 66 | 67 | # In[34]: 68 | 69 | 70 | import os 71 | os.rename( "Egypt.txt", "test2.txt" ) 72 | os.remove( "test2.txt" ) 73 | 74 | 75 | # ## Directories in Python 76 | 77 | # In[ ]: 78 | 79 | 80 | import os 81 | os.mkdir("Data 1") # create a directory 82 | os.mkdir("Data_2") 83 | os.chdir("Data_2") # create a childe directory 84 | os.getcwd() # Get the current working directory 85 | 86 | os.rmdir('Data 1') # remove a directory 87 | os.rmdir('Data_2') # remove a directory 88 | 89 | 90 | # In[44]: 91 | 92 | 93 | import os 94 | os.getcwd() # Get the current working directory 95 | 96 | 97 | # In[43]: 98 | 99 | 100 | os.chdir('/home/nbuser/library') 101 | 102 | 103 | # ## open and process files 104 | 105 | # In[45]: 106 | 107 | 108 | print("\nSearching Through a File\n") 109 | fhand = open('Emails.txt') 110 | for line in fhand: 111 | line = line.rstrip() 112 | if line.startswith('From:') : 113 | print (line) 114 | 115 | 116 | # In[46]: 117 | 118 | 119 | print ("\nUsing in to select lines // only print lines which has specific string ") 120 | fhand = open('Emails.txt') 121 | for line in fhand: 122 | line = line.rstrip() 123 | if not '@uct.ac.za' in line : 124 | continue 125 | print (line) 126 | 127 | 128 | # In[47]: 129 | 130 | 131 | print("\nSearching Through a File\n") 132 | fhand = open('Emails.txt') 133 | for line in fhand: 134 | line = line.rstrip() 135 | if line.startswith('From:') : 136 | line = line.split() 137 | print (line[1]) 138 | 139 | 140 | # ## Regular Expressions 141 | 142 | # In[48]: 143 | 144 | 145 | import re 146 | print ("\nRegular Expressions\n'^X.*:' \n") 147 | hand = open('Data.txt') 148 | for line in hand: 149 | line = line.rstrip() 150 | y = re.findall('^X.*:',line) 151 | print (y) 152 | 153 | 154 | # In[49]: 155 | 156 | 157 | print ("\nRegular Expressions\nWild-Card Characters '^X-\S+:'\n") 158 | hand = open('Data.txt') 159 | for line in hand: 160 | line = line.rstrip() 161 | y = re.findall('^X-\S+:',line) # match any non white space characters 162 | print (y) 163 | 164 | 165 | 166 | # In[50]: 167 | 168 | 169 | print ("\n Matching and Extracting Data \n") 170 | x = 'My 2 favorite numbers are 19 and 42' 171 | y = re.findall('[0-9]+',x) 172 | print (y) 173 | 174 | 175 | # In[51]: 176 | 177 | 178 | y = re.findall('[AEsOUn]+',x) # find any of these characters in string 179 | print (y) 180 | 181 | 182 | # In[52]: 183 | 184 | 185 | print ("\nGreedy Matching \n") 186 | x = 'From: Using the : character' 187 | y = re.findall('^F.+:', x) 188 | print (y) 189 | 190 | 191 | # In[53]: 192 | 193 | 194 | print ("\nNon-Greedy Matching \n") 195 | x = 'From: Using the : character' 196 | y = re.findall('^F.+?:', x) 197 | print (y) 198 | 199 | 200 | # In[54]: 201 | 202 | 203 | import re 204 | print ("\nFine-Tuning String Extraction \n") 205 | mystr="From ossama.embarak@hct.ac.ae Sat Jun 5 08:14:16 2018" 206 | Extract = re.findall('\S+@\S+',mystr) 207 | print (Extract) 208 | E_xtracted = re.findall('^From.*? (\S+@\S+)',mystr) # non greedy white space 209 | print (E_xtracted) 210 | print (E_xtracted[0]) 211 | 212 | 213 | # In[57]: 214 | 215 | 216 | mystr="From ossama.embarak@hct.ac.ae Sat Jun 5 08:14:16 2018" 217 | atpos = mystr.find('@') 218 | sppos = mystr.find(' ',atpos) # find white space starting from atpos 219 | host = mystr[atpos+1 : sppos] 220 | print (host) 221 | usernamepos =mystr.find(' ') 222 | username = mystr[usernamepos+1 : atpos] 223 | print (username) 224 | 225 | 226 | # In[58]: 227 | 228 | 229 | print ("\n The Regex Version\n") 230 | import re 231 | mystr="From ossama.embarak@hct.ac.ae Sat Jun 5 08:14:16 2018" 232 | Extract = re.findall('@([^ ]*)',mystr) 233 | print (Extract) 234 | Extract = re.findall('^From .*@([^ ]*)',mystr) 235 | print (Extract) 236 | 237 | 238 | # In[59]: 239 | 240 | 241 | print ("\nScape character \n") 242 | mystr = 'We just received $10.00 for cookies and $20.23 for juice' 243 | Extract = re.findall('\$[0-9.]+',mystr) 244 | print (Extract) 245 | 246 | 247 | # ## Exercises 248 | 249 | # In[60]: 250 | 251 | 252 | import re 253 | CoursesData = """101 COM Computers 254 | 205 MAT Mathematics 255 | 189 ENG English""" 256 | 257 | 258 | # In[61]: 259 | 260 | 261 | # Extract all course numbers 262 | Course_numbers = re.findall('[0-9]+', CoursesData) 263 | print (Course_numbers) 264 | 265 | # Extract all course codes 266 | Course_codes = re.findall('[A-Z]{3}', CoursesData) 267 | print (Course_codes) 268 | 269 | # Extract all course names 270 | Course_names = re.findall('[A-Za-z]{4,}', CoursesData) 271 | print (Course_names) 272 | 273 | 274 | # In[62]: 275 | 276 | 277 | # compile the regex and search the pattern 278 | regex_num = re.compile('\d+') 279 | s = regex_num.search(CoursesData) 280 | 281 | print('Starting Position: ', s.start()) 282 | print('Ending Position: ', s.end()) 283 | print(CoursesData[s.start():s.end()]) 284 | 285 | 286 | # In[63]: 287 | 288 | 289 | # define the course text pattern groups and extract 290 | course_pattern = '([0-9]+)\s*([A-Z]{3})\s*([A-Za-z]{4,})' 291 | re.findall(course_pattern, CoursesData) 292 | 293 | 294 | # In[64]: 295 | 296 | 297 | print(re.findall('[a-zA-Z]+', CoursesData)) # [] Matches any character inside 298 | 299 | 300 | # In[65]: 301 | 302 | 303 | print(re.findall('[0-9]+', CoursesData)) # [] Matches any character inside 304 | 305 | 306 | # In[66]: 307 | 308 | 309 | import re 310 | CoursesData = """10 COM Computers 311 | 205 MAT Mathematics 312 | 1899 ENG English""" 313 | print(re.findall('\d{4}', CoursesData)) # {n} Matches repeat n times. 314 | print(re.findall('\d{2,4}', CoursesData)) 315 | 316 | -------------------------------------------------------------------------------- /Ch04/Wild-Card.txt: -------------------------------------------------------------------------------- 1 | X-Sieve: CMU Sieve 2.3 2 | X-DSPAM-Result: Innocent 3 | X-DSPAM-Confidence: 0.8475 4 | X- Content-Type-Message-Body: text/plain 5 | X-Plane is behind schedule: two weeks -------------------------------------------------------------------------------- /Ch05/1. Export1_Columns.csv: -------------------------------------------------------------------------------- 1 | Country Name,Country Code,2004,2005,2006,2007 2 | Benin,BEN,811,940,869,1076 3 | Burkina Faso,BFA,548,532,673,714 4 | Bangladesh,BGD,7257,9995,11745,13530 5 | Bulgaria,BGR,10713,12703,16151,23263 6 | Bahrain,BHR,10337,13397,15662,17314 7 | "Bahamas, The",BHS,3161,3482,3558,3888 8 | Bosnia and Herzegovina,BIH,3232,3550,4505,4078 9 | Belarus,BLR,15710,18065,22200,27592 10 | Belize,BLZ,535,609,744,788 11 | Bermuda,BMU,0,0,0,0 12 | Bolivia,BOL,2732,3395,4784,5484 13 | Brazil,BRA,110744,135919,159214,186203 14 | Barbados,BRB,1436,1712,1939,2044 15 | Brunei Darussalam,BRN,5416,6688,8227,8310 16 | Bhutan,BTN,220,313,489,657 17 | Botswana,BWA,4444,5256,5292,5964 18 | Central African Republic,CAF,178,179,209,240 19 | Canada,CAN,381529,430267,464728,500338 20 | Central Europe and the Baltics,CEB,354718,422597,512548,648165 21 | Switzerland,CHE,202880,219836,243432,294204 22 | Channel Islands,CHI,0,0,0,0 23 | Chile,CHL,38094,47749,65633,75856 24 | China,CHN,593264,764531,973211,1230720 25 | Cote d'Ivoire,CIV,7682,8525,9322,9607 26 | Cameroon,CMR,3061,3393,4130,4889 27 | "Congo, Rep.",COG,3744,5123,6507,6592 28 | Colombia,COL,19634,24696,28677,34305 29 | Comoros,COM,57,57,60,71 30 | Cabo Verde,CPV,296,367,500,477 31 | Costa Rica,CRI,8602,9683,11067,12822 32 | Caribbean small states,CSS,19017,23308,29288,29312 33 | Cuba,CUB,6121,8963,9870,11918 34 | Curacao,CUW,0,0,0,0 35 | Cayman Islands,CYM,0,0,0,0 36 | Cyprus,CYP,9854,10266,10725,12765 37 | Czech Republic,CZE,68328,84742,101341,125664 38 | Germany,DEU,999334,1079830,1236570,1479310 39 | Djibouti,DJI,246,288,307,484 40 | Dominica,DMA,130,129,144,148 41 | Denmark,DNK,110049,125389,142993,164020 42 | Dominican Republic,DOM,9381,10040,10731,11722 43 | Algeria,DZA,34175,48715,57122,63531 44 | East Asia & Pacific (developing only),EAP,1020490,1256000,1545380,1890140 45 | East Asia & Pacific (all income levels),EAS,2853990,3288690,3815750,4442960 46 | Europe & Central Asia (developing only),ECA,241546,285610,345567,436385 47 | Europe & Central Asia (all income levels),ECS,5377840,5933000,6756610,7964430 48 | Ecuador,ECU,8985,11463,14196,16288 49 | "Egypt, Arab Rep.",EGY,22258,27214,32191,39470 50 | Euro area,EMU,3545370,3813120,4271090,5075590 51 | Eritrea,ERI,64,68,84,76 52 | Spain,ESP,269302,285474,314519,380305 53 | Estonia,EST,7419,9231,10770,14053 54 | Ethiopia,ETH,1494,1858,2105,2489 55 | European Union,EUU,4653420,5063310,5728280,6732610 56 | Fragile and conflict affected situations,FCS,97395,121788,147124,178795 57 | Finland,FIN,75931,82297,93488,112361 58 | Fiji,FJI,1468,1598,1548,1670 59 | France,FRA,550336,581084,631842,722460 60 | Faeroe Islands,FRO,705,736,825,960 61 | "Micronesia, Fed. Sts.",FSM,0,0,0,0 62 | Gabon,GAB,4269,5742,6255,7280 63 | United Kingdom,GBR,559810,621255,718296,759840 64 | Georgia,GEO,1617,2164,2546,3175 65 | Ghana,GHA,3491,3912,5142,6072 66 | Guinea,GIN,903,1022,1190,1189 67 | "Gambia, The",GMB,118,129,97,118 68 | Guinea-Bissau,GNB,98,117,101,140 69 | Equatorial Guinea,GNQ,4879,7183,8606,10565 70 | Greece,GRC,49803,52821,57897,71752 71 | Grenada,GRD,198,149,162,210 72 | Greenland,GRL,0,0,0,0 73 | Guatemala,GTM,6465,6818,7537,8721 74 | Guam,GUM,0,0,0,0 75 | Guyana,GUY,753,698,0,0 76 | High income,HIC,9114030,10169100,11523000,13268000 77 | "Hong Kong SAR, China",HKG,315629,353428,390556,431069 78 | Honduras,HND,5125,5707,6077,6568 79 | Heavily indebted poor countries (HIPC),HPC,58552,68096,82231,98241 80 | Croatia,HRV,16403,17846,20005,23432 81 | Haiti,HTI,543,605,689,779 82 | Hungary,HUN,61919,70662,85253,108939 83 | Indonesia,IDN,82744,97388,113143,127226 84 | Isle of Man,IMN,0,0,0,0 85 | India,IND,126648,160838,199974,253077 86 | Not classified,INX,0,0,0,0 87 | Ireland,IRL,155263,164817,176526,208616 88 | "Iran, Islamic Rep.",IRN,47413,63474,72326,92050 89 | Iraq,IRQ,20611,27149,33242,40777 90 | Iceland,ISL,4479,5112,5314,7157 91 | Israel,ISR,52835,57878,62478,72029 92 | Italy,ITA,432727,456712,509513,604236 93 | Jamaica,JAM,3820,3966,4788,5085 94 | Jordan,JOR,5962,6635,8112,9280 95 | Japan,JPN,615047,654356,704556,773111 96 | Kazakhstan,KAZ,22655,30387,41292,51704 97 | Kenya,KEN,4283,5342,5936,7005 98 | Kyrgyz Republic,KGZ,941,942,1182,2012 99 | Cambodia,KHM,3395,4033,4990,5644 100 | Kiribati,KIR,12,15,11,21 101 | St. Kitts and Nevis,KNA,194,227,236,233 102 | "Korea, Rep.",KOR,292911,330601,376047,439918 103 | Kosovo,KSV,0,0,530,748 104 | Kuwait,KWT,33831,51692,66566,72695 105 | Latin America & Caribbean (developing only),LAC,421379,496874,579544,655234 106 | Lao PDR,LAO,723,934,1395,1457 107 | Lebanon,LBN,7591,8050,7995,9395 108 | Liberia,LBR,124,129,186,239 109 | Libya,LBY,21117,30160,40430,49084 110 | St. Lucia,LCA,464,525,440,457 111 | Latin America & Caribbean (all income levels),LCN,635611,754182,882680,987691 112 | Least developed countries: UN classification,LDC,71671,96350,120298,154304 113 | Low income,LIC,30325,34116,39013,48263 114 | Liechtenstein,LIE,0,0,0,0 115 | Sri Lanka,LKA,7300,7892,8520,9419 116 | Lower middle income,LMC,506014,609332,747227,876659 117 | Low & middle income,LMY,2218060,2714100,3299440,3972260 118 | Lesotho,LSO,696,669,765,832 119 | Lithuania,LTU,10735,14087,16828,20035 120 | Luxembourg,LUX,52634,60030,73949,92081 121 | Latvia,LVA,5935,7375,8655,11928 122 | "Macao SAR, China",MAC,10988,11154,13121,16879 123 | St. Martin (French part),MAF,0,0,0,0 124 | Morocco,MAR,16726,19234,22450,26891 125 | Monaco,MCO,0,0,0,0 126 | Moldova,MDA,1317,1528,1542,2088 127 | Madagascar,MDG,1424,1422,1640,2227 128 | Maldives,MDV,689,485,777,1804 129 | Middle East & North Africa (all income levels),MEA,565362,748777,899078,1053270 130 | Mexico,MEX,202070,230169,266428,289465 131 | Marshall Islands,MHL,0,0,0,0 132 | Middle income,MIC,2188160,2680310,3260680,3924370 133 | "Macedonia, FYR",MKD,1745,2178,2593,3678 134 | Mali,MLI,1237,1359,1884,1871 135 | Malta,MLT,4425,4602,5505,6661 136 | Myanmar,MMR,0,0,0,0 137 | Middle East & North Africa (developing only),MNA,204958,264651,311776,377041 138 | Montenegro,MNE,871,983,1331,1629 139 | Mongolia,MNG,1211,1483,2029,2525 140 | Northern Mariana Islands,MNP,0,0,0,0 141 | Mozambique,MOZ,1759,2087,2722,2888 142 | Mauritania,MRT,470,671,1420,1506 143 | Mauritius,MUS,3450,3761,4068,4509 144 | Malawi,MWI,655,662,705,1033 145 | Malaysia,MYS,143928,162049,182517,205489 146 | North America,NAC,1563100,1739230,1941120,2165040 147 | Namibia,NAM,2630,2937,3628,4413 148 | New Caledonia,NCL,0,0,0,0 149 | Niger,NER,530,565,599,748 150 | Nigeria,NGA,26495,35534,62697,56142 151 | Nicaragua,NIC,1336,1541,1835,2164 152 | Netherlands,NLD,409334,446527,498358,591200 153 | High income: nonOECD,NOC,1530190,1857670,2179260,2471570 154 | Norway,NOR,108690,134064,154283,173600 155 | Nepal,NPL,1213,1186,1216,1327 156 | New Zealand,NZL,30697,32449,33025,40159 157 | High income: OECD,OEC,7581110,8310100,9343040,10795300 158 | OECD members,OED,7875540,8645830,9729830,11229200 159 | Oman,OMN,12723,18114,20905,23771 160 | Other small states,OSS,25694,31313,35452,42265 161 | Pakistan,PAK,15350,17180,19401,20137 162 | Panama,PAN,9587,11674,13147,16072 163 | Peru,PER,14951,19913,26334,31161 164 | Philippines,PHL,44381,47554,56923,64614 165 | Palau,PLW,70,83,84,91 166 | Papua New Guinea,PNG,2834,0,0,0 167 | Poland,POL,87825,106314,131127,166464 168 | Puerto Rico,PRI,66393,68553,72625,72906 169 | "Korea, Dem. Rep.",PRK,0,0,0,0 170 | Portugal,PRT,51562,52748,62397,74466 171 | Paraguay,PRY,4372,5084,6252,7818 172 | Pacific island small states,PSS,2089,2295,2278,2526 173 | French Polynesia,PYF,0,0,0,0 174 | Qatar,QAT,20363,28983,38245,48048 175 | Romania,ROU,27162,32810,39605,49992 176 | Russian Federation,RUS,203415,268957,333908,392044 177 | Rwanda,RWA,232,296,382,600 178 | South Asia,SAS,160474,199608,243990,301707 179 | Saudi Arabia,SAU,131921,187389,225507,249318 180 | Sudan,SDN,3810,5087,6833,10046 181 | Senegal,SEN,2180,2352,2398,2872 182 | Singapore,SGP,247027,288066,340079,386495 183 | Solomon Islands,SLB,116,141,165,215 184 | Sierra Leone,SLE,239,290,318,337 185 | El Salvador,SLV,4259,4383,4764,5204 186 | San Marino,SMR,0,0,0,0 187 | Somalia,SOM,0,0,0,0 188 | Serbia,SRB,6021,7125,9264,11428 189 | Sub-Saharan Africa (developing only),SSA,168818,210630,272350,310879 190 | South Sudan,SSD,0,0,0,0 191 | Sub-Saharan Africa (all income levels),SSF,174317,218533,281811,322381 192 | Small states,SST,46759,56877,66892,74131 193 | Sao Tome and Principe,STP,15,16,16,14 194 | Suriname,SUR,387,546,0,0 195 | Slovak Republic,SVK,39331,45171,57202,71871 196 | Slovenia,SVN,18947,21656,25611,32524 197 | Sweden,SWE,165839,178466,202413,235411 198 | Swaziland,SWZ,2056,2250,2259,2311 199 | Sint Maarten (Dutch part),SXM,0,0,0,0 200 | Seychelles,SYC,621,720,858,937 201 | Syrian Arab Republic,SYR,10048,11656,13168,15614 202 | Turks and Caicos Islands,TCA,0,0,0,0 203 | Chad,TCD,2252,3189,3532,3845 204 | Togo,TGO,747,847,841,957 205 | Thailand,THA,114062,129738,152515,181342 206 | Tajikistan,TJK,1211,601,656,767 207 | Turkmenistan,TKM,4216,5270,7512,9548 208 | Timor-Leste,TLS,39,40,45,60 209 | Tonga,TON,47,47,42,37 210 | Trinidad and Tobago,TTO,7220,10520,14954,14101 211 | Tunisia,TUN,13166,14501,15823,19877 212 | Turkey,TUR,92361,105558,120355,144466 213 | Tuvalu,TUV,0,0,0,0 214 | "Taiwan, China",TWN,0,0,0,0 215 | Tanzania,TZA,2520,2864,3183,4068 216 | Uganda,UGA,1008,1278,1519,2056 217 | Ukraine,UKR,39716,44344,50239,64001 218 | Upper middle income,UMC,1682610,2071850,2514560,3048460 219 | Uruguay,URY,4395,5279,5932,6810 220 | United States,USA,1181510,1308900,1476320,1664620 221 | Uzbekistan,UZB,4837,5416,6326,8851 222 | St. Vincent and the Grenadines,VCT,185,201,212,212 223 | "Venezuela, RB",VEN,40706,57709,67003,71714 224 | Virgin Islands (U.S.),VIR,0,0,0,0 225 | Vietnam,VNM,27135,36712,44945,54591 226 | Vanuatu,VUT,166,179,181,215 227 | West Bank and Gaza,PSE,597,724,737,1067 228 | World,WLD,11332200,12883600,14822400,17239700 229 | Samoa,WSM,119,132,146,167 230 | "Yemen, Rep.",YEM,5048,6852,7873,0 231 | South Africa,ZAF,58216,68172,79519,93339 232 | "Congo, Dem. Rep.",COD,2341,2442,2765,6540 233 | Zambia,ZMB,2087,2550,4158,4722 234 | Zimbabwe,ZWE,2001,1931,1957,2000 235 | -------------------------------------------------------------------------------- /Ch05/1. Export1_Columns.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch05/1. Export1_Columns.xlsx -------------------------------------------------------------------------------- /Ch05/1. Export2_Columns.csv: -------------------------------------------------------------------------------- 1 | Country Name,Country Code,2008,2009,2010,2011,2012,2013,2014 2 | Benin,BEN,1312,1039,991,1040,1154,1518,1656 3 | Burkina Faso,BFA,834,1063,1727,2681,2849,3166,3551 4 | Bangladesh,BGD,16181,17360,18472,25627,26887,29305,34344 5 | Bulgaria,BGR,28591,21964,26836,35488,33975,37260,37845 6 | Bahrain,BHR,21231,15705,17880,22945,22853,0,0 7 | "Bahamas, The",BHS,3797,3117,3223,3443,3733,3715,3739 8 | Bosnia and Herzegovina,BIH,4911,4265,4901,5703,5219,5706,0 9 | Belarus,BLR,37021,24865,29402,48462,51745,44058,43555 10 | Belize,BLZ,854,690,813,894,983,988,0 11 | Bermuda,BMU,0,2699,2705,2644,2589,2657,0 12 | Bolivia,BOL,7488,6194,8093,10566,12775,13518,0 13 | Brazil,BRA,229517,180892,237189,299972,288558,287520,270214 14 | Barbados,BRB,2090,1905,2055,1699,1702,1592,0 15 | Brunei Darussalam,BRN,11270,7812,10074,13297,13795,12270,0 16 | Bhutan,BTN,586,565,673,750,706,728,711 17 | Botswana,BWA,4999,3573,4926,6964,6278,8131,7876 18 | Central African Republic,CAF,218,212,234,295,271,219,181 19 | Canada,CAN,531714,389895,469047,546709,553579,555263,563927 20 | Central Europe and the Baltics,CEB,769904,607197,695190,833145,809264,869375,0 21 | Switzerland,CHE,347368,309906,373423,458002,446652,494530,0 22 | Channel Islands,CHI,0,0,0,0,0,0,0 23 | Chile,CHL,74557,63918,82795,95443,90993,89662,87167 24 | China,CHN,1444800,1200770,1580770,1907310,2049320,2213300,2342540 25 | Cote d'Ivoire,CIV,11414,12308,12600,13660,13108,13693,14864 26 | Cameroon,CMR,5628,3750,4098,4889,4975,6108,7016 27 | "Congo, Rep.",COG,8912,6756,10221,12591,11459,10780,11321 28 | Colombia,COL,43403,37484,45739,62844,67495,66949,60583 29 | Comoros,COM,73,75,82,93,94,105,113 30 | Cabo Verde,CPV,574,462,481,587,0,0,0 31 | Costa Rica,CRI,13557,12423,13855,15345,16849,17459,17372 32 | Caribbean small states,CSS,36306,22964,24721,28811,27252,29601,0 33 | Cuba,CUB,12506,10839,14519,17319,18659,18593,0 34 | Curacao,CUW,0,0,0,0,0,0,0 35 | Cayman Islands,CYM,0,0,0,0,0,0,0 36 | Cyprus,CYP,13795,12116,12046,13419,12403,12228,12875 37 | Czech Republic,CZE,149036,120995,136996,162791,158218,161291,171867 38 | Germany,DEU,1630570,1291720,1443240,1681100,1622620,1699680,1757960 39 | Djibouti,DJI,0,0,0,0,0,0,0 40 | Dominica,DMA,157,148,174,191,160,175,184 41 | Denmark,DNK,189863,149383,159010,180803,173929,182292,183561 42 | Dominican Republic,DOM,11400,10117,11630,13752,14758,15568,16465 43 | Algeria,DZA,82035,48534,61975,77581,77123,69659,63810 44 | East Asia & Pacific (developing only),EAP,2199840,1842590,2408330,2893220,3055750,3238590,3389990 45 | East Asia & Pacific (all income levels),EAS,5037450,4199380,5380790,6326360,6567820,6733200,6940060 46 | Europe & Central Asia (developing only),ECA,561284,427165,502542,637040,661386,673275,672209 47 | Europe & Central Asia (all income levels),ECS,8988100,7097250,7943420,9357270,9158240,9552280,9756010 48 | Ecuador,ECU,21100,15786,19402,24672,26315,27570,28518 49 | "Egypt, Arab Rep.",EGY,53800,47164,46731,48540,45809,49111,43520 50 | Euro area,EMU,5613920,4491910,4908200,5690140,5503870,5785780,5930130 51 | Eritrea,ERI,61,84,101,375,0,0,0 52 | Spain,ESP,413929,339897,365360,430324,411004,439579,449768 53 | Estonia,EST,16161,11950,14634,20050,20031,21419,21949 54 | Ethiopia,ETH,3085,3405,4071,5332,5963,5934,6416 55 | European Union,EUU,7428790,5931830,6528220,7578880,7338580,7693330,7890080 56 | Fragile and conflict affected situations,FCS,240623,171820,216666,224537,282436,268347,0 57 | Finland,FIN,127905,91221,95849,107163,101351,103094,100887 58 | Fiji,FJI,2013,1421,1816,2227,2412,2266,2460 59 | France,FRA,800627,648448,689363,795876,764789,800948,811712 60 | Faeroe Islands,FRO,1104,934,1026,1213,1149,1323,0 61 | "Micronesia, Fed. Sts.",FSM,0,0,0,0,0,0,0 62 | Gabon,GAB,9518,6199,8329,10546,10957,9728,8786 63 | United Kingdom,GBR,773713,623555,690756,800276,790992,806509,834035 64 | Georgia,GEO,3662,3202,4068,5231,6046,7213,7090 65 | Ghana,GHA,7140,7609,9484,14596,16812,16206,15022 66 | Guinea,GIN,1578,1223,1343,1533,1684,1773,1831 67 | "Gambia, The",GMB,156,206,259,303,328,329,0 68 | Guinea-Bissau,GNB,172,155,169,296,143,165,168 69 | Equatorial Guinea,GNQ,14832,8550,10298,14295,15310,13782,12585 70 | Greece,GRC,82859,62722,66204,73506,70460,73221,78310 71 | Grenada,GRD,208,187,184,196,207,214,234 72 | Greenland,GRL,0,0,0,0,0,0,0 73 | Guatemala,GTM,9674,9047,10668,12688,12531,12773,13537 74 | Guam,GUM,0,0,0,0,0,0,0 75 | Guyana,GUY,0,0,0,0,0,0,0 76 | High income,HIC,14929800,11953700,13925100,16342400,16401700,16851900,17090000 77 | "Hong Kong SAR, China",HKG,457803,409319,501661,560284,592390,628504,638875 78 | Honduras,HND,7078,5766,7248,9078,9432,8867,9086 79 | Heavily indebted poor countries (HIPC),HPC,119684,102927,129749,155392,159097,165390,172395 80 | Croatia,HRV,27108,21624,22516,25147,23483,24851,26170 81 | Haiti,HTI,834,929,802,1010,1047,1213,1303 82 | Hungary,HUN,125194,97100,107066,122056,110865,118429,0 83 | Indonesia,IDN,152090,130358,183481,235095,225744,218307,210801 84 | Isle of Man,IMN,0,0,0,0,0,0,0 85 | India,IND,288902,273752,375353,445636,447414,468478,487653 86 | Not classified,INX,0,0,0,0,0,0,0 87 | Ireland,IRL,219979,203338,209021,232257,234495,244379,0 88 | "Iran, Islamic Rep.",IRN,0,0,0,0,0,0,0 89 | Iraq,IRQ,66239,43995,54599,82505,96986,94800,0 90 | Iceland,ISL,7253,6372,7099,8278,8074,8561,9129 91 | Israel,ISR,82626,69163,81621,91635,93223,95664,96718 92 | Italy,ITA,644649,491150,535296,614705,592542,615764,629732 93 | Jamaica,JAM,5750,4179,4143,4387,4482,4277,0 94 | Jordan,JOR,12416,10928,12745,13744,14307,14270,15507 95 | Japan,JPN,858847,639245,833705,893378,874354,794578,0 96 | Kazakhstan,KAZ,76257,48243,65502,89503,91746,88692,0 97 | Kenya,KEN,8139,7416,8263,9073,9990,9828,9993 98 | Kyrgyz Republic,KGZ,2752,2565,2472,3380,2933,3099,2732 99 | Cambodia,KHM,6785,5120,6080,6938,8825,10016,11425 100 | Kiribati,KIR,19,18,16,21,20,18,18 101 | St. Kitts and Nevis,KNA,235,177,212,248,264,309,328 102 | "Korea, Rep.",KOR,500723,428868,540896,670343,688933,703396,714235 103 | Kosovo,KSV,892,965,1163,1311,1185,1231,1448 104 | Kuwait,KWT,98390,62981,76952,112784,130069,125823,0 105 | Latin America & Caribbean (developing only),LAC,745902,606782,770915,942982,970434,983844,978871 106 | Lao PDR,LAO,1743,1801,2552,3080,3635,4165,4742 107 | Lebanon,LBN,11432,11988,13782,14519,24406,25223,26295 108 | Liberia,LBR,292,176,248,423,561,658,522 109 | Libya,LBY,63183,37335,49055,19025,61096,46140,13775 110 | St. Lucia,LCA,536,544,609,573,604,585,616 111 | Latin America & Caribbean (all income levels),LCN,1132140,911877,1170890,1368240,1387920,1393640,1373460 112 | Least developed countries: UN classification,LDC,200221,151845,186796,233531,230799,238511,262146 113 | Low income,LIC,57765,49488,60406,71752,68204,73576,83146 114 | Liechtenstein,LIE,0,0,0,0,0,0,0 115 | Sri Lanka,LKA,10114,8972,11091,13644,13561,15102,16735 116 | Lower middle income,LMC,1053780,906363,1198050,1456330,1486690,1478910,1509660 117 | Low & middle income,LMY,4726390,3856620,4952850,5984130,6271960,6441410,6595450 118 | Lesotho,LSO,914,783,971,1221,1047,897,0 119 | Lithuania,LTU,27342,19446,24262,32641,34991,39012,39386 120 | Luxembourg,LUX,105101,84328,94204,109162,108854,122259,0 121 | Latvia,LVA,14077,11103,12646,16378,17376,18359,18521 122 | "Macao SAR, China",MAC,20117,20063,30047,40970,46869,55205,55018 123 | St. Martin (French part),MAF,0,0,0,0,0,0,0 124 | Morocco,MAR,33310,26094,30169,35295,34441,34964,36975 125 | Monaco,MCO,0,0,0,0,0,0,0 126 | Moldova,MDA,2472,2006,2280,3155,3168,3461,3345 127 | Madagascar,MDG,2498,1913,2180,2646,2878,3191,0 128 | Maldives,MDV,1970,1712,2007,2420,2835,3193,3193 129 | Middle East & North Africa (all income levels),MEA,1391620,990522,1206430,1588050,1789860,1786960,1679060 130 | Mexico,MEX,307236,244146,313989,366164,387301,400639,419849 131 | Marshall Islands,MHL,0,0,0,0,0,0,0 132 | Middle income,MIC,4669090,3807150,4892950,5913180,6205610,6369360,6513050 133 | "Macedonia, FYR",MKD,4283,3084,3743,4946,4422,4715,5420 134 | Mali,MLI,2551,2128,2449,2802,3233,0,0 135 | Malta,MLT,7509,6380,7199,8708,0,0,0 136 | Myanmar,MMR,0,0,0,0,0,0,0 137 | Middle East & North Africa (developing only),MNA,507006,359184,425431,458794,549908,520883,0 138 | Montenegro,MNE,1784,1330,1427,1922,1785,1846,1817 139 | Mongolia,MNG,3037,2305,3356,5471,5356,5021,6428 140 | Northern Mariana Islands,MNP,0,0,0,0,0,0,0 141 | Mozambique,MOZ,3194,3114,3040,3899,4195,4304,4458 142 | Mauritania,MRT,1852,1499,2201,2985,2802,2820,2419 143 | Mauritius,MUS,5103,4326,5101,6011,6246,6475,6771 144 | Malawi,MWI,1206,1240,1586,1663,1580,1847,1950 145 | Malaysia,MYS,229657,184897,230988,264778,259985,255787,260387 146 | North America,NAC,2373750,1977710,2321000,2652210,2746680,2816410,0 147 | Namibia,NAM,4613,4646,5388,5650,5668,5685,5319 148 | New Caledonia,NCL,0,0,0,0,0,0,0 149 | Niger,NER,958,1097,1269,1340,1517,1736,1454 150 | Nigeria,NGA,82983,52147,93240,128999,144918,92907,91530 151 | Nicaragua,NIC,2660,2589,3361,4168,4724,4605,4997 152 | Netherlands,NLD,670035,548366,601852,691336,675271,707933,722933 153 | High income: nonOECD,NOC,2974090,2274000,2884430,3551030,3732430,3811260,3793210 154 | Norway,NOR,212250,151524,170638,205923,207022,202677,190043 155 | Nepal,NPL,1603,1597,1533,1684,1899,2060,2385 156 | New Zealand,NZL,42707,34875,44357,51152,50849,55088,0 157 | High income: OECD,OEC,11956500,9679110,11042400,12795100,12675200,13045000,13303300 158 | OECD members,OED,12438400,10066500,11511400,13347000,13269900,13656700,13952900 159 | Oman,OMN,35618,24502,33503,46655,48549,0,0 160 | Other small states,OSS,48713,35863,43700,55051,56047,55819,53069 161 | Pakistan,PAK,21060,20844,23979,29854,27849,30708,30481 162 | Panama,PAN,19596,19582,20337,26380,30284,0,0 163 | Peru,PER,34518,30523,39447,50581,52279,48305,45168 164 | Philippines,PHL,64299,54258,69464,71795,77025,75934,82863 165 | Palau,PLW,100,93,96,115,118,139,158 166 | Papua New Guinea,PNG,0,0,0,0,0,0,0 167 | Poland,POL,203158,164011,192916,226188,223544,242468,0 168 | Puerto Rico,PRI,76613,74213,74310,77273,73909,77915,0 169 | "Korea, Dem. Rep.",PRK,0,0,0,0,0,0,0 170 | Portugal,PRT,81551,66008,71193,83973,81594,89037,91621 171 | Paraguay,PRY,9994,8212,11046,13186,12278,14268,14001 172 | Pacific island small states,PSS,3039,2395,2962,3717,4052,3913,4219 173 | French Polynesia,PYF,0,0,0,0,0,0,0 174 | Qatar,QAT,70732,50309,75065,121692,143640,0,0 175 | Romania,ROU,62182,50292,58372,73107,67751,79614,81865 176 | Russian Federation,RUS,520004,341584,445513,576568,597056,594797,0 177 | Rwanda,RWA,611,632,689,925,1021,1175,1179 178 | South Asia,SAS,342249,326638,434707,520711,522284,550850,576855 179 | Saudi Arabia,SAU,322854,202056,261831,376224,399420,387644,354541 180 | Sudan,SDN,13139,8487,12958,11828,6281,6371,6695 181 | Senegal,SEN,3498,3117,3216,3634,3412,4148,4239 182 | Singapore,SGP,442649,369190,471089,554241,566663,578961,577704 183 | Solomon Islands,SLB,269,235,336,542,635,598,0 184 | Sierra Leone,SLE,339,331,433,479,1242,2196,2022 185 | El Salvador,SLV,5761,4793,5553,6474,6094,6403,0 186 | San Marino,SMR,0,0,0,0,0,0,0 187 | Somalia,SOM,0,0,0,0,0,0,0 188 | Serbia,SRB,14343,11441,12995,15788,15045,18754,19448 189 | Sub-Saharan Africa (developing only),SSA,386870,291864,396406,496099,497409,446234,456068 190 | South Sudan,SSD,10267,7377,9662,11779,1049,2147,5505 191 | Sub-Saharan Africa (all income levels),SSF,402684,301388,407732,511532,513868,461304,469922 192 | Small states,SST,88025,61288,71544,87839,87651,89592,0 193 | Sao Tome and Principe,STP,18,20,24,29,34,34,40 194 | Suriname,SUR,0,0,0,0,0,0,0 195 | Slovak Republic,SVK,80305,59950,68087,83209,85143,90827,91738 196 | Slovenia,SVN,36750,28761,30858,36089,33886,35844,37954 197 | Sweden,SWE,256022,190992,225559,262878,251943,254850,254268 198 | Swaziland,SWZ,1793,1860,2063,2205,2168,2128,0 199 | Sint Maarten (Dutch part),SXM,0,0,0,0,0,0,0 200 | Seychelles,SYC,981,915,910,1021,1052,1206,1182 201 | Syrian Arab Republic,SYR,0,0,0,0,0,0,0 202 | Turks and Caicos Islands,TCA,0,0,0,0,0,0,0 203 | Chad,TCD,4420,3252,3927,4726,4758,4347,4756 204 | Togo,TGO,1123,1162,1274,1481,1686,1923,2277 205 | Thailand,THA,208371,180251,227336,265972,274400,284890,280535 206 | Tajikistan,TJK,865,754,866,1164,1644,1631,0 207 | Turkmenistan,TKM,12345,15079,17234,21836,25761,0,0 208 | Timor-Leste,TLS,70,75,91,108,154,0,0 209 | Tonga,TON,47,46,49,79,83,93,78 210 | Trinidad and Tobago,TTO,19906,10037,11282,14922,12924,15430,0 211 | Tunisia,TUN,25197,19917,22236,22603,22250,22083,0 212 | Turkey,TUR,174608,143292,155074,185760,207440,211045,221605 213 | Tuvalu,TUV,0,0,0,0,0,0,0 214 | "Taiwan, China",TWN,0,0,0,0,0,0,0 215 | Tanzania,TZA,5108,4964,5831,6966,8261,7825,9582 216 | Uganda,UGA,3457,3367,3283,3441,4723,4999,5220 217 | Ukraine,UKR,84458,54364,69228,81280,83884,78743,64788 218 | Upper middle income,UMC,3616360,2902330,3697350,4459940,4721540,4893010,5006080 219 | Uruguay,URY,9172,8580,10612,12673,13288,13507,13433 220 | United States,USA,1841940,1587740,1852330,2106370,2194150,2262220,0 221 | Uzbekistan,UZB,12158,11679,12453,14994,14165,16835,18377 222 | St. Vincent and the Grenadines,VCT,210,192,183,183,191,186,188 223 | "Venezuela, RB",VEN,97273,59531,112353,94764,99786,91961,0 224 | Virgin Islands (U.S.),VIR,0,0,0,0,0,0,0 225 | Vietnam,VNM,69725,66759,83474,107606,124701,143186,160890 226 | Vanuatu,VUT,275,300,327,351,384,383,0 227 | West Bank and Gaza,PSE,1165,1133,1367,1799,1871,2072,2293 228 | World,WLD,19649200,15803000,18858700,22300100,22641000,23259800,23666400 229 | Samoa,WSM,183,177,192,219,223,243,0 230 | "Yemen, Rep.",YEM,0,0,0,0,0,0,0 231 | South Africa,ZAF,102154,82601,107407,126830,118126,113388,109341 232 | "Congo, Dem. Rep.",COD,7723,5000,8928,10818,9336,10166,10992 233 | Zambia,ZMB,5180,4484,7504,9034,10511,11601,11071 234 | Zimbabwe,ZWE,1831,2250,3245,3557,3884,3507,3625 235 | -------------------------------------------------------------------------------- /Ch05/1. Export2_Columns.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch05/1. Export2_Columns.xlsx -------------------------------------------------------------------------------- /Ch05/Embarak _Ch05_Data Gathering and Cleaning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch05/Embarak _Ch05_Data Gathering and Cleaning.pdf -------------------------------------------------------------------------------- /Ch05/Embarak _Ch05_Data Gathering and Cleaning.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Chapter 5: Data Gathering and Cleaning 5 | 6 | # In[46]: 7 | 8 | 9 | import numpy as np 10 | np.random.randn(5, 3) 11 | 12 | 13 | # In[47]: 14 | 15 | 16 | import pandas as pd 17 | import numpy as np 18 | 19 | dataset = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f', 20 | 'h'],columns=['stock1', 'stock2', 'stock3']) 21 | dataset.rename(columns={"one":'stock1',"two":'stock2', "three":'stock3'}, inplace=True) 22 | dataset = dataset.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']) 23 | 24 | print (dataset) 25 | 26 | 27 | # In[48]: 28 | 29 | 30 | print (dataset['stock1'].isnull()) 31 | 32 | 33 | # In[49]: 34 | 35 | 36 | print (dataset) 37 | dataset.fillna(0) 38 | 39 | 40 | # In[50]: 41 | 42 | 43 | # Fill missing values forward 44 | print (dataset) 45 | dataset.fillna(method='pad') 46 | 47 | 48 | # In[51]: 49 | 50 | 51 | print (dataset) 52 | dataset.dropna() 53 | 54 | 55 | # In[52]: 56 | 57 | 58 | print (dataset) 59 | dataset.replace(np.nan, 0 ) 60 | 61 | 62 | # # Read CSV files 63 | 64 | # In[53]: 65 | 66 | 67 | import pandas as pd 68 | sales = pd.read_csv("Sales.csv") 69 | print ("\n\n<<<<<<< First 5 records <<<<<<<\n\n" ) 70 | print (sales.head()) 71 | 72 | 73 | # In[54]: 74 | 75 | 76 | print ("\n\n<<<<<<< Last 5 records <<<<<<<\n\n" ) 77 | print (sales.tail()) 78 | 79 | 80 | # In[55]: 81 | 82 | 83 | #import pandas as pd 84 | salesNrows = pd.read_csv("Sales.csv", nrows=4) 85 | salesNrows 86 | 87 | 88 | # In[56]: 89 | 90 | 91 | salesNrows.rename(columns={"SALES_ID":'ID',"SALES_BY_REGION":'REGION'}, inplace=True) 92 | salesNrows 93 | 94 | 95 | # # Find unique values 96 | 97 | # In[57]: 98 | 99 | 100 | print (len(salesNrows['JANUARY'].unique())) 101 | print (len(salesNrows['REGION'].unique())) 102 | print (salesNrows['JANUARY'].unique()) 103 | 104 | 105 | # In[58]: 106 | 107 | 108 | #[0, 1, 2] or ['SALES_ID' , 'SALES_BY_REGION', 'JANUARY'] 109 | salesNrows = pd.read_csv("Sales.csv", nrows=4, usecols=[0, 1, 6]) 110 | salesNrows 111 | 112 | 113 | # In[60]: 114 | 115 | 116 | # Read specific fields of data [0, 1, 2] or 117 | #['SALES_ID' , 'SALES_BY_REGION', 'JANUARY'] 118 | salesNrows = pd.read_csv("Sales.csv", nrows=4, 119 | usecols=['SALES_ID' , 'SALES_BY_REGION', 'FEBRUARY', 'MARCH']) 120 | salesNrows 121 | 122 | 123 | # In[61]: 124 | 125 | 126 | sales = pd.read_csv("Sales.csv", nrows=7, 127 | na_values =["n.a.", "not avilable"]) 128 | mydata = sales.head(7) 129 | mydata 130 | 131 | 132 | # In[62]: 133 | 134 | 135 | sales = pd.read_csv("Sales.csv", nrows=7, 136 | na_values =["n.a.", "not avilable", -1]) 137 | mydata = sales.head(7) 138 | mydata 139 | 140 | 141 | # # Data Integration 142 | # ## Read Data 143 | 144 | # In[63]: 145 | 146 | 147 | import pandas as pd 148 | 149 | a = pd.read_csv("1. Export1_Columns.csv") 150 | b = pd.read_csv("1. Export2_Columns.csv") 151 | 152 | 153 | # In[64]: 154 | 155 | 156 | a.head() 157 | 158 | 159 | # In[65]: 160 | 161 | 162 | b.head() 163 | 164 | 165 | # In[66]: 166 | 167 | 168 | a.head() 169 | 170 | 171 | # In[67]: 172 | 173 | 174 | b.drop('2014', axis=1, inplace=True) 175 | columns = ['2013', '2012'] 176 | b.drop(columns, inplace=True, axis=1) 177 | b.drop(b.columns[[3]], axis=1, inplace=True) 178 | b.head() 179 | 180 | 181 | # In[68]: 182 | 183 | 184 | mergedDataSet = a.merge(b, on="Country Name") 185 | mergedDataSet.head() 186 | 187 | 188 | # In[69]: 189 | 190 | 191 | dataX = a.merge(b) 192 | dataX.head() 193 | 194 | 195 | # # Merge two data sets using Index 196 | # ### Rows Union 197 | 198 | # In[70]: 199 | 200 | 201 | Data1 = a.head() 202 | Data1=Data1.reset_index() 203 | Data1 204 | 205 | 206 | # In[71]: 207 | 208 | 209 | Data2 = a.tail() 210 | Data2=Data2.reset_index() 211 | Data2 212 | 213 | 214 | # In[72]: 215 | 216 | 217 | # stack the DataFrames on top of each othe 218 | VerticalStack = pd.concat((Data1, Data2), axis=0) 219 | VerticalStack 220 | 221 | 222 | # # Read Jason data 223 | 224 | # In[73]: 225 | 226 | 227 | import json 228 | data = '''{ 229 | "name" : "Ossama", 230 | "phone" : { 231 | "type" : "intl", 232 | "number" : "+971 50 244 5467" 233 | }, 234 | "email" : { 235 | "hide" : "No" 236 | } 237 | }''' 238 | info = json.loads(data) 239 | print ('Name:',info["name"]) 240 | print ('Hide:',info["email"]["hide"]) 241 | 242 | 243 | # In[74]: 244 | 245 | 246 | input = '''[ 247 | { "id" : "001", 248 | "x" : "5", 249 | "name" : "Ossama" 250 | } , 251 | { "id" : "009", 252 | "x" : "10", 253 | "name" : "Omar" 254 | } 255 | ]''' 256 | info = json.loads(input) 257 | print ('User count:', len(info)) 258 | for item in info: 259 | print ('\nName', item['name']) 260 | print ('Id', item['id']) 261 | print ('Attribute', item['x']) 262 | 263 | 264 | # ## Read Jason from the cloud 265 | 266 | # In[91]: 267 | 268 | 269 | import urllib.request 270 | import json 271 | 272 | 273 | with urllib.request.urlopen("http://python-data.dr-chuck.net/comments_244984.json") as url: 274 | uh = url.read() 275 | 276 | print ('Retrieving', url) 277 | 278 | data = uh 279 | print ('Retrieved',len(data),'characters') 280 | 281 | try: 282 | js = json.loads(str(data)) 283 | except: 284 | js = None 285 | 286 | print (json.dumps(js, indent=4)) 287 | 288 | 289 | # In[99]: 290 | 291 | 292 | from urllib.request import urlopen 293 | import json 294 | req = urlopen("http://python-data.dr-chuck.net/comments_244984.json") 295 | json = json.loads(req.read()) 296 | print (json) 297 | print (json['comments']) 298 | 299 | 300 | # In[100]: 301 | 302 | 303 | sum=0 304 | counter=0 305 | for i in range(len(json["comments"])): 306 | counter+=1 307 | Name = json["comments"][i]["name"] 308 | Count = json["comments"][i]["count"] 309 | sum+=int(Count) 310 | print (Name," ", Count) 311 | 312 | print ("\nCount: ", counter) 313 | print ("Sum: ", sum) 314 | 315 | 316 | # In[101]: 317 | 318 | 319 | import json 320 | with open('comments.json') as json_data: 321 | jasondta = json.load(json_data) 322 | print(jasondta) 323 | 324 | 325 | # In[102]: 326 | 327 | 328 | sum=0 329 | counter=0 330 | for i in range(len(jasondta["comments"])): 331 | counter+=1 332 | Name = jasondta["comments"][i]["name"] 333 | Count = jasondta["comments"][i]["count"] 334 | sum+=int(Count) 335 | print (Name," ", Count) 336 | 337 | print ("\nCount: ", counter) 338 | print ("Sum: ", sum) 339 | 340 | 341 | # # Read and process HTML tags 342 | 343 | # In[103]: 344 | 345 | 346 | import urllib.request 347 | with urllib.request.urlopen("http://python-data.dr-chuck.net/known_by_Rona.html") as url: 348 | strhtml = url.read() 349 | #I'm guessing this would output the html source code? 350 | print(strhtml[:700]) 351 | 352 | 353 | # In[104]: 354 | 355 | 356 | import urllib 357 | from bs4 import BeautifulSoup 358 | 359 | response = urllib.request.urlopen('http://python-data.dr-chuck.net/known_by_Rona.html') 360 | html_doc = response.read() 361 | 362 | soup = BeautifulSoup(html_doc, 'html.parser') 363 | 364 | print(html_doc[:700]) 365 | print("\n") 366 | print (soup.title) 367 | print(soup.title.string) 368 | print(soup.a.string) 369 | 370 | 371 | # In[106]: 372 | 373 | 374 | for x in soup.find_all('b'): 375 | print(x.string) 376 | 377 | 378 | # In[107]: 379 | 380 | 381 | import urllib 382 | from bs4 import BeautifulSoup 383 | 384 | response = urllib.request.urlopen('http://python-data.dr-chuck.net/known_by_Rona.html') 385 | html_doc = response.read() 386 | print (html_doc[:300]) 387 | soup = BeautifulSoup(html_doc, 'html.parser') 388 | 389 | print ("\n") 390 | counter=0 391 | for link in soup.findAll("a"): 392 | print(link.get("href")) 393 | if counter<10: 394 | counter+=1 395 | continue 396 | else: break 397 | 398 | 399 | # In[108]: 400 | 401 | 402 | htmldata=""" 403 | 404 | 405 | The Dormouse's story 406 | 407 | 408 | 409 |

410 | 411 | The Dormouse's story 412 | 413 |

414 |

415 | Once upon a time there were three little sisters; and their names were 416 | 417 | Elsie 418 | 419 | , 420 | 421 | Lacie 422 | 423 | and 424 | 425 | Tillie 426 | 427 | ; and they lived at the bottom of a well. 428 |

429 |

430 | ... 431 |

432 | 433 | 434 | """ 435 | 436 | from bs4 import BeautifulSoup 437 | soup = BeautifulSoup(htmldata, 'html.parser') 438 | print(soup.prettify()) 439 | 440 | 441 | # In[109]: 442 | 443 | 444 | soup.title 445 | 446 | 447 | # In[110]: 448 | 449 | 450 | soup.title.name 451 | 452 | 453 | # In[111]: 454 | 455 | 456 | soup.title.string 457 | 458 | 459 | # In[112]: 460 | 461 | 462 | soup.title.parent.name 463 | 464 | 465 | # In[113]: 466 | 467 | 468 | soup.p 469 | 470 | 471 | # In[114]: 472 | 473 | 474 | soup.p['class'] 475 | 476 | 477 | # In[115]: 478 | 479 | 480 | soup.a 481 | 482 | 483 | # In[116]: 484 | 485 | 486 | soup.find_all('a') 487 | 488 | 489 | # In[117]: 490 | 491 | 492 | soup.find(id="link2") 493 | 494 | 495 | # In[118]: 496 | 497 | 498 | for link in soup.find_all('a'): 499 | print(link.get('href')) 500 | 501 | 502 | # In[119]: 503 | 504 | 505 | print(soup.get_text()) 506 | 507 | 508 | # In[120]: 509 | 510 | 511 | htmldata=""" 512 | 513 | 514 | Python Book Verion 2018 515 | 516 | 517 | 518 |

519 | 520 | Author Name: Ossama Embarak 521 | 522 |

523 |

524 | Python techniques for gathering and cleaning data 525 | 530 | Data Cleaning 531 | 532 | , Data Processing and Visulization 533 | 538 | Data Visualization 539 | 540 | 541 |

542 |

543 | @July 2018 544 |

545 | 546 | 547 | """ 548 | 549 | from bs4 import BeautifulSoup 550 | soup = BeautifulSoup(htmldata, 'html.parser') 551 | print(soup.prettify()) 552 | 553 | 554 | # In[121]: 555 | 556 | 557 | print(soup.get_text()) 558 | 559 | 560 | # In[128]: 561 | 562 | 563 | xmldata = """ 564 | 565 | 566 | 567 | 2 568 | 2017 569 | 3.5 570 | 571 | 572 | 573 | 3 574 | 2016 575 | 2.8 576 | 577 | 578 | 579 | 1 580 | 2018 581 | 3.7 582 | 583 | 584 | 585 | """.strip() 586 | 587 | 588 | # In[129]: 589 | 590 | 591 | from xml.etree import ElementTree as ET 592 | stuff = ET.fromstring(xmldata) 593 | lst = stuff.findall('student') 594 | 595 | print ('Students count:', len(lst)) 596 | for item in lst: 597 | print ("\nName:", item.get("name")) 598 | print ('concentration:', item.find("concentration").get("name")) 599 | print ('Rank:', item.find('rank').text) 600 | print ('GPA:', item.find("GPA").text) 601 | 602 | 603 | # In[131]: 604 | 605 | 606 | value = ET.fromstring(xmldata).find('response/result/value') 607 | if value: 608 | print ('Found value:', value.text) 609 | 610 | -------------------------------------------------------------------------------- /Ch05/Import_1.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch05/Import_1.xlsx -------------------------------------------------------------------------------- /Ch05/Import_2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch05/Import_2.xlsx -------------------------------------------------------------------------------- /Ch05/Sales.csv: -------------------------------------------------------------------------------- 1 | SALES_ID,SALES_BY_REGION,JANUARY,FEBRUARY,MARCH,APRIL,MAY,JUNE,JULY,AUGUST,SEPTEMBER,OCTOBER,NOVEMBER,DECEMBER 2 | 1,AUH,"3,469.00",n.a.,not avilable,"3,642.00","5,803.00","5,662.00","1,896.00","2,293.00","2,583.00","5,233.00","4,421.00","4,071.00" 3 | 1,SHJ,"5,840.00","5,270.00","4,114.00","5,605.00","4,387.00","5,026.00","4,055.00","2,782.00","4,578.00","4,993.00","2,859.00","4,853.00" 4 | 1,-1,"2,967.00","2,425.00","5,353.00",n.a.,"5,027.00","4,078.00","3,858.00","1,927.00","3,527.00","4,179.00","1,571.00","5,551.00" 5 | 2,AUH,"1,328.00",-1,"1,574.00","2,343.00","3,826.00","4,932.00","1,710.00","3,221.00","3,381.00","1,313.00","1,765.00","1,214.00" 6 | 3,SHJ,"2,473.00","1,421.00","3,606.00","1,314.00","1,413.00","2,091.00","3,270.00","3,346.00","2,080.00","1,539.00","2,630.00","1,697.00" 7 | 3,not avilable,n.a.,956,"1,297.00","1,984.00","2,744.00","5,793.00","2,261.00","5,607.00","2,437.00","4,328.00","3,317.00","5,390.00" 8 | 3,AUH,"2,634.00","2,143.00","3,698.00","5,767.00","2,782.00","4,444.00","5,036.00","4,805.00","5,792.00","5,256.00","4,096.00","3,170.00" 9 | 4,AJM,"4,673.00","1,322.00","2,615.00","3,423.00","5,694.00","3,544.00","2,093.00","2,676.00","5,979.00","5,481.00","4,786.00","1,637.00" 10 | 4,RAK,"4,862.00",947,"3,400.00","3,913.00","5,654.00","2,504.00","1,922.00","1,577.00","2,766.00","2,318.00","5,906.00","5,477.00" 11 | 4,RAK,"5,829.00","1,802.00","2,345.00","5,131.00","3,662.00","1,323.00","3,214.00","1,150.00","1,191.00","1,328.00","3,766.00","2,050.00" 12 | 4,-1,"4,870.00","4,645.00","5,521.00","4,824.00","3,899.00","1,118.00","3,665.00","5,241.00","2,433.00","5,215.00","1,858.00","4,667.00" 13 | 5, FUJ,"2,152.00","3,064.00","3,981.00","2,958.00","3,794.00","5,453.00","3,324.00",n.a.,"1,516.00","5,640.00","1,301.00","5,513.00" 14 | 5,AJM,"5,584.00","1,103.00","2,838.00","3,520.00","2,250.00","1,435.00","3,035.00","4,572.00","5,450.00","2,374.00","1,549.00","4,885.00" 15 | 5,DXB,"4,258.00","1,397.00","3,824.00","4,715.00","3,150.00","5,572.00",968,"1,014.00","2,784.00","5,913.00",969,"4,153.00" 16 | 5,SHJ,"1,738.00","5,212.00","4,647.00","3,637.00","4,616.00","4,651.00","2,298.00","3,097.00","4,239.00","3,001.00","2,457.00",975 17 | 5,RAK,"5,847.00","5,635.00","2,110.00",944,"2,597.00","2,381.00","2,094.00","5,849.00","2,393.00","3,246.00","5,784.00","1,486.00" 18 | 6,DXB,"2,223.00","2,054.00","3,339.00","4,368.00","4,539.00","5,852.00","2,304.00","1,841.00","4,616.00","4,180.00","2,503.00","4,211.00" 19 | 7, FUJ,"5,564.00","1,972.00","3,522.00","2,779.00","1,371.00","5,419.00","1,398.00","4,277.00","5,467.00","4,831.00","1,507.00","3,749.00" 20 | 7, FUJ,"1,119.00","1,666.00","3,257.00","1,188.00","5,301.00","1,362.00","3,452.00",912,"4,756.00",962,"3,618.00","1,351.00" 21 | 7,SHJ,"1,723.00","1,196.00","2,044.00",925,"2,973.00","1,581.00","1,812.00","4,429.00","1,230.00","5,813.00","2,119.00","3,105.00" 22 | 7,RAK,972,"1,938.00","5,041.00","2,119.00","3,172.00","2,327.00","2,104.00","4,796.00","2,454.00","1,500.00","4,799.00","2,987.00" 23 | 8,DXB,"3,490.00",not avilable,"1,317.00","2,351.00","1,864.00",n.a.,"4,161.00","1,131.00","2,173.00","4,005.00","5,238.00","1,396.00" 24 | 8, FUJ,"1,297.00","1,100.00","3,360.00","4,455.00","3,467.00","3,968.00","3,258.00","2,397.00","3,334.00","1,161.00","4,090.00","5,740.00" 25 | 8,AUH,"3,662.00","1,282.00","5,629.00","3,035.00","4,231.00","3,388.00","3,543.00","2,794.00","2,834.00","1,203.00","3,920.00","4,093.00" 26 | 9,AJM,"4,057.00","4,054.00","5,585.00","1,622.00","5,750.00","3,851.00","1,057.00","2,056.00","5,845.00","3,177.00","4,319.00","1,216.00" 27 | 9,AJM,"4,778.00","1,397.00","4,680.00","5,631.00","3,471.00","3,156.00","5,855.00","5,871.00","1,644.00","5,179.00","4,054.00","1,572.00" 28 | 10, FUJ,"1,085.00","5,189.00",n.a.,"1,142.00","1,951.00","3,967.00","1,993.00","5,116.00","3,785.00","2,733.00","5,686.00",n.a. 29 | 10,AJM,"1,614.00","4,722.00","1,546.00","5,531.00","1,483.00","2,202.00","2,015.00","1,453.00","5,529.00","4,173.00",901,"3,190.00" 30 | 10,AJM,"5,172.00","4,640.00","2,336.00","4,525.00","3,747.00","3,300.00","2,283.00","2,671.00","1,918.00","5,942.00","4,969.00","3,420.00" 31 | 10,UAQ,"5,782.00","1,537.00","1,576.00","2,592.00","3,592.00","5,357.00","3,371.00","5,250.00","1,550.00","2,449.00","1,531.00","4,984.00" 32 | 10,UAQ,"4,075.00","3,173.00","3,093.00","1,994.00","1,924.00","5,040.00","1,973.00","3,972.00","2,122.00","1,804.00","1,656.00","4,904.00" 33 | 11,SHJ,"3,736.00","3,164.00",930,"2,791.00",944,"4,051.00","2,912.00","3,054.00","2,983.00",994,"3,465.00","2,643.00" 34 | 11,UAQ,"5,458.00","2,735.00","5,601.00","3,696.00","3,152.00","1,452.00","2,443.00","2,110.00","5,752.00","5,045.00","2,048.00","2,156.00" 35 | 11,DXB,"4,175.00","3,031.00","2,984.00","4,860.00","3,064.00","2,574.00","5,353.00","3,274.00","3,752.00","1,053.00","3,143.00","3,768.00" 36 | 12,DXB,"2,087.00","4,927.00","2,188.00","4,005.00",952,"4,442.00","4,744.00","4,200.00","3,328.00","3,988.00","2,312.00","4,314.00" 37 | 12,AUH,"1,412.00","5,799.00","2,445.00","1,710.00","3,216.00","5,702.00","3,770.00","1,129.00","2,416.00","4,194.00","5,751.00","4,963.00" 38 | 12,AJM,"2,098.00","5,692.00","1,697.00","3,659.00","1,356.00","1,447.00","3,316.00","2,757.00","1,130.00","5,918.00","4,138.00",953 39 | 13,DXB,"3,644.00","2,625.00","2,655.00","2,245.00","5,184.00","1,853.00","1,659.00","1,777.00","5,669.00","3,493.00","4,469.00","2,158.00" 40 | 13, FUJ,"2,299.00","4,117.00","3,266.00","5,573.00","5,823.00","5,486.00","5,033.00","3,064.00","1,322.00","2,243.00","1,962.00","1,884.00" 41 | 14,AUH,"5,394.00","2,572.00","2,003.00","5,599.00","3,283.00","5,743.00","1,905.00","2,885.00","4,412.00","1,104.00",971,"5,760.00" 42 | 15,AUH,"5,783.00","1,660.00","2,749.00","5,773.00","2,209.00","5,604.00","1,843.00","2,921.00","3,870.00","4,072.00","5,460.00","2,330.00" 43 | 15,UAQ,"5,181.00","5,134.00","1,136.00","5,120.00","2,687.00","2,914.00","2,099.00","3,187.00","4,578.00","2,761.00","3,896.00","1,361.00" 44 | 16,UAQ,n.a.,"5,941.00","3,487.00",-1,"5,433.00","3,511.00","2,555.00",n.a.,"1,929.00","4,285.00","1,198.00",-1 45 | 16,DXB,"4,947.00","5,882.00","3,488.00","3,975.00","5,306.00","5,433.00","3,698.00","2,541.00","3,601.00","2,155.00","5,835.00",996 46 | 16,DXB,"1,900.00","4,003.00","3,662.00","5,126.00","2,988.00","3,602.00","5,351.00","4,534.00","5,439.00","2,501.00","5,619.00","3,875.00" 47 | 17,AJM,"3,718.00","2,921.00",n.a.,"1,567.00","2,975.00","1,001.00","4,736.00","1,104.00","1,247.00","4,191.00","3,513.00","1,645.00" 48 | 17,DXB,"1,755.00","4,724.00","5,373.00","4,205.00","5,086.00","3,074.00","3,311.00","3,221.00","4,513.00","4,043.00","1,511.00","2,833.00" 49 | 17,SHJ,"3,218.00","1,086.00","2,651.00","3,154.00","4,572.00","3,979.00","4,408.00","3,083.00","3,164.00","1,184.00","1,610.00","1,996.00" 50 | 19,AJM,"2,321.00","4,358.00",954,"5,306.00","5,970.00","2,627.00","2,537.00","3,061.00","1,015.00","5,257.00","3,304.00","3,270.00" 51 | 20,DXB,"1,895.00","2,478.00","1,050.00","3,564.00","4,513.00","1,879.00","2,951.00","3,464.00","4,849.00","1,386.00","2,331.00","2,931.00" 52 | 20, FUJ,"5,185.00","2,965.00","3,086.00","5,544.00","4,954.00","1,850.00","4,227.00","5,194.00","1,789.00","4,312.00","3,810.00","3,051.00" 53 | 20,SHJ,"4,300.00","1,906.00","1,362.00","4,339.00","1,100.00","2,488.00","4,218.00","1,396.00","2,802.00","1,623.00","2,953.00","2,978.00" 54 | 20,AUH,"2,551.00","4,957.00","4,401.00",939,"2,539.00","5,449.00","4,153.00","5,768.00","1,350.00","1,110.00","4,871.00","2,402.00" 55 | 20,SHJ,"4,982.00","2,453.00","2,996.00","2,664.00","2,897.00","3,852.00","1,966.00","4,062.00","4,937.00","3,645.00","3,263.00","5,757.00" 56 | 20,AJM,"2,091.00","1,116.00","5,130.00","1,358.00","3,521.00","5,272.00","2,727.00","2,937.00","1,381.00","4,395.00","3,684.00","3,414.00" 57 | 21,SHJ,"4,786.00","3,886.00","1,189.00","4,838.00","1,382.00","4,390.00","1,793.00","3,089.00","3,042.00","5,337.00","5,378.00","1,919.00" 58 | 21,UAQ,"1,094.00","1,759.00","3,711.00","5,719.00","2,635.00","5,147.00","3,268.00","3,995.00","1,582.00","2,178.00","2,589.00","5,593.00" 59 | 21, FUJ,"1,721.00","2,764.00","2,929.00","4,390.00","4,411.00","1,073.00","2,053.00","3,536.00","2,579.00","3,186.00","5,049.00","1,061.00" 60 | 21,RAK,"1,446.00","3,690.00","2,340.00","2,455.00","2,681.00","5,830.00","4,533.00","4,422.00","3,505.00","2,312.00","5,464.00","4,993.00" 61 | 22,AUH,"5,104.00","1,705.00","4,359.00","3,943.00","2,558.00","2,959.00","2,168.00","2,421.00","1,428.00","2,145.00","5,290.00","3,518.00" 62 | 22,AUH,"5,895.00","1,099.00","2,597.00","4,715.00",903,"5,241.00","1,379.00",919,"4,051.00","4,583.00","2,708.00","4,437.00" 63 | 22,SHJ,"4,275.00","5,737.00","5,899.00","3,974.00","1,751.00","3,739.00","3,221.00","2,145.00","5,540.00","3,978.00","3,952.00","5,297.00" 64 | 22,SHJ,"1,371.00","1,019.00","1,887.00","2,783.00","3,723.00","3,223.00","1,126.00","3,850.00","1,561.00","5,736.00","4,837.00","4,161.00" 65 | 22, FUJ,"2,994.00","4,994.00","5,566.00","1,828.00","3,021.00","5,685.00","5,131.00","3,388.00","5,078.00","1,196.00","4,969.00","2,115.00" 66 | 23,DXB,n.a.,"1,565.00","5,691.00","4,346.00","5,537.00","4,459.00","3,929.00","3,355.00","4,024.00","2,838.00","2,521.00","3,346.00" 67 | 23,DXB,"3,897.00","2,795.00","3,500.00","5,302.00","2,662.00",n.a.,not avilable,"4,620.00","1,226.00","5,505.00","1,529.00","4,534.00" 68 | 23,RAK,not avilable,"3,845.00","4,040.00","4,905.00","2,615.00","3,069.00","1,101.00","5,548.00","1,504.00","1,527.00","1,856.00","4,383.00" 69 | 23,AUH,997,"1,268.00","3,981.00","5,091.00","4,021.00","5,931.00","4,691.00","3,481.00","3,487.00","2,082.00","3,179.00","2,537.00" 70 | 23,SHJ,"2,081.00","5,045.00","4,649.00","1,884.00","4,768.00","3,305.00","5,057.00","1,856.00","4,863.00","5,718.00","2,934.00","1,066.00" 71 | 23,SHJ,"2,783.00","3,818.00","1,335.00","2,503.00","2,770.00","1,618.00","5,763.00","1,165.00","2,807.00","3,700.00","4,310.00","3,425.00" 72 | 24,SHJ,"3,435.00","4,180.00","4,428.00","3,803.00","4,870.00","5,462.00","2,084.00","4,043.00",989,"3,482.00","3,999.00","2,884.00" 73 | 24,AJM,"4,500.00","5,624.00","2,172.00","1,018.00","4,830.00","4,805.00","4,757.00","5,604.00","4,298.00","2,382.00","2,006.00","5,594.00" 74 | 24,AJM,"3,357.00","5,226.00","1,798.00","1,355.00","1,872.00","1,807.00","4,542.00","3,924.00","2,053.00","3,276.00","3,081.00","1,297.00" 75 | 25,DXB,"4,195.00","2,733.00","3,133.00","3,719.00",938,"5,499.00","3,244.00","2,725.00","4,310.00","2,034.00","5,559.00","5,951.00" 76 | 25,SHJ,"2,475.00","4,512.00","3,976.00","1,286.00","4,627.00","1,370.00","4,346.00","4,296.00","4,558.00","3,247.00","4,791.00",949 77 | 25,RAK,"4,322.00","1,695.00","3,745.00","1,801.00","2,511.00","2,073.00","1,941.00","1,830.00","1,712.00","2,555.00","1,310.00","4,762.00" 78 | 25,AJM,"2,907.00","1,289.00","1,912.00","4,458.00","1,778.00","2,860.00","5,233.00","1,372.00","4,938.00","4,815.00","4,523.00","4,835.00" 79 | 25,AJM,"2,926.00","3,504.00","2,663.00","2,270.00","5,023.00","5,796.00","3,577.00","3,664.00","5,803.00","4,233.00","3,801.00","1,264.00" 80 | 26,DXB,"1,934.00","2,849.00","5,937.00","3,231.00","3,136.00","3,640.00","4,022.00","4,022.00","4,142.00","5,265.00","1,444.00","3,718.00" 81 | 26,RAK,"2,695.00","4,511.00","2,119.00","4,689.00","1,858.00","5,949.00","5,739.00","1,406.00","1,214.00","1,456.00","4,959.00","3,343.00" 82 | 26, FUJ,"5,875.00","3,569.00","4,970.00","4,298.00","3,061.00","2,990.00","1,101.00","2,366.00",925,"5,375.00","2,681.00","5,602.00" 83 | 26,AUH,"2,348.00","2,759.00","2,304.00","5,093.00","5,551.00","2,759.00","3,339.00","5,626.00","5,363.00","1,110.00","4,452.00","2,148.00" 84 | 27, FUJ,-1,"5,616.00","2,827.00","5,290.00","1,548.00","1,352.00","2,047.00","1,006.00","1,189.00","1,625.00","4,233.00","1,848.00" 85 | 27,UAQ,"2,713.00","3,298.00","1,806.00","4,704.00","3,180.00","4,835.00","4,023.00","4,954.00","5,690.00","3,492.00","2,055.00","4,200.00" 86 | 27,AJM,"3,937.00","2,509.00","5,189.00","3,105.00","5,919.00","5,777.00","2,928.00","3,377.00","1,967.00","2,794.00","1,397.00","4,087.00" 87 | 28,AUH,-1,"1,796.00","2,472.00","4,829.00","2,194.00",-1,"1,142.00","1,674.00","5,857.00","1,500.00","3,219.00","5,033.00" 88 | 28,RAK,n.a.,"5,612.00","5,113.00","5,552.00","4,383.00","3,397.00","3,270.00","4,902.00","2,095.00",n.a.,"5,708.00","1,864.00" 89 | 28,DXB,"2,890.00","1,850.00","1,201.00","2,790.00","5,640.00","3,479.00","3,317.00",940,"2,036.00","2,533.00","4,840.00","5,290.00" 90 | 28,AJM,"2,359.00","3,735.00","5,507.00","5,119.00","2,121.00","1,373.00","3,289.00","4,712.00","3,232.00","3,017.00","2,527.00","2,343.00" 91 | 28,AJM,"1,994.00",936,"1,273.00","3,545.00","5,690.00","4,430.00","2,311.00","1,589.00","1,919.00","3,776.00","3,436.00","2,042.00" 92 | 28,AJM,"2,780.00","5,269.00","2,199.00","2,280.00","4,474.00","3,074.00","3,272.00","1,465.00","3,147.00","2,672.00","3,451.00","2,008.00" 93 | 29,UAQ,"5,325.00","1,905.00","5,049.00","1,311.00","4,146.00","1,706.00","1,689.00","3,190.00","2,915.00","2,183.00","3,301.00","4,365.00" 94 | 29, FUJ,"1,677.00","4,350.00","2,725.00","4,171.00","3,561.00","4,974.00","4,353.00","1,735.00","3,414.00","2,825.00","5,899.00","1,784.00" 95 | 29, FUJ,"2,492.00","1,833.00","2,982.00","4,292.00","5,540.00","5,847.00",909,"2,339.00","4,868.00","5,207.00","5,938.00","1,793.00" 96 | 30,AJM,"2,832.00","5,978.00","1,684.00","1,550.00","1,194.00","3,737.00","5,779.00","4,441.00","1,213.00","3,711.00","5,384.00","1,293.00" 97 | 30, FUJ,"3,402.00","5,283.00","2,229.00","3,758.00","1,427.00","1,057.00","5,277.00","5,231.00","3,909.00","4,345.00","5,287.00","2,638.00" 98 | 30,AJM,"2,028.00","2,006.00","5,120.00","5,959.00","3,127.00","3,962.00","4,780.00","3,200.00","1,836.00","2,623.00","1,607.00","2,371.00" 99 | 30, FUJ,"5,549.00","1,302.00","1,929.00","2,822.00","5,379.00","1,243.00","3,075.00","4,358.00","5,106.00","2,322.00","2,409.00","1,069.00" 100 | -------------------------------------------------------------------------------- /Ch05/Sales.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch05/Sales.xlsx -------------------------------------------------------------------------------- /Ch06/Embarak _Ch06_Data Exploring and Analysis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch06/Embarak _Ch06_Data Exploring and Analysis.pdf -------------------------------------------------------------------------------- /Ch06/Embarak _Ch06_Data Exploring and Analysis.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Chapter 6: Data Exploring Analysis 5 | 6 | # In[5]: 7 | 8 | 9 | import pandas as pd 10 | import numpy as np 11 | data = np.array(['O','S','S','A']) 12 | S1 = pd.Series(data) # without adding index 13 | S2 = pd.Series(data,index=[100,101,102,103]) # with adding index 14 | print (S1) 15 | print ("\n") 16 | print (S2) 17 | 18 | 19 | # ### Create series from dictionary 20 | 21 | # In[6]: 22 | 23 | 24 | import pandas as pd 25 | import numpy as np 26 | data = {'X' : 0., 'Y' : 1., 'Z' : 2.} 27 | SERIES1 = pd.Series(data) 28 | print (SERIES1) 29 | 30 | 31 | # In[7]: 32 | 33 | 34 | import pandas as pd 35 | import numpy as np 36 | data = {'X' : 0., 'Y' : 1., 'Z' : 2.} 37 | SERIES1 = pd.Series(data,index=['Y','Z','W','X']) 38 | print (SERIES1) 39 | 40 | 41 | # In[9]: 42 | 43 | 44 | # Use sclara to create a series 45 | import pandas as pd 46 | import numpy as np 47 | Series1 = pd.Series(7, index=[0, 1, 2, 3, 4]) 48 | print (Series1) 49 | 50 | 51 | # ### Accessing Data from Series 52 | 53 | # In[18]: 54 | 55 | 56 | import pandas as pd 57 | Series1 = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e']) 58 | 59 | print ("Example 1:Retrieve the first element") 60 | print (Series1[0] ) 61 | print ("\nExample 2:Retrieve the first three element") 62 | print (Series1[:3]) 63 | 64 | print ("\nExample 3:Retrieve the last three element") 65 | print(Series1[-3:]) 66 | 67 | print ("\nExample 4:Retrieve a single element") 68 | print (Series1['a']) 69 | 70 | print ("\nExample 5:Retrieve multiple elements") 71 | print (Series1[['a','c','d']]) 72 | 73 | 74 | # In[5]: 75 | 76 | 77 | 78 | 79 | 80 | # In[20]: 81 | 82 | 83 | import pandas as pd 84 | import numpy as np 85 | my_series1 = pd.Series([5, 6, 7, 8, 9, 10]) 86 | print ("my_series1\n", my_series1) 87 | print ("\n Series Analysis\n ") 88 | print ("Series mean value : ", my_series1.mean()) # find mean value in a series 89 | print ("Series max value : ",my_series1.max()) # find max value in a series 90 | print ("Series min value : ",my_series1.min()) # find min value in a series 91 | print ("Series standred deviation value : ",my_series1.std()) # find standred deviation of a series 92 | 93 | 94 | # In[11]: 95 | 96 | 97 | my_series1.describe() 98 | 99 | 100 | # In[17]: 101 | 102 | 103 | my_series_11 = my_series1 104 | print (my_series1) 105 | my_series_11.index = ['A', 'B', 'C', 'D', 'E', 'F'] 106 | print (my_series_11) 107 | print (my_series1) 108 | 109 | 110 | # In[21]: 111 | 112 | 113 | my_series_11 = my_series1.copy() 114 | print (my_series1) 115 | my_series_11.index = ['A', 'B', 'C', 'D', 'E', 'F'] 116 | print (my_series_11) 117 | print (my_series1) 118 | 119 | 120 | # In[23]: 121 | 122 | 123 | 'F' in my_series_11 124 | 125 | 126 | # In[27]: 127 | 128 | 129 | temp = my_series_11 < 8 130 | temp 131 | 132 | 133 | # In[35]: 134 | 135 | 136 | len(my_series_11) 137 | 138 | 139 | # In[28]: 140 | 141 | 142 | temp = my_series_11[my_series_11 < 8 ] * 2 143 | temp 144 | 145 | 146 | # In[37]: 147 | 148 | 149 | def AddSeries(x,y): 150 | for i in range (len(x)): 151 | print (x[i] + y[i]) 152 | 153 | 154 | # In[39]: 155 | 156 | 157 | print ("Add two series\n") 158 | AddSeries (my_series_11, my_series1) 159 | 160 | 161 | # In[40]: 162 | 163 | 164 | import pandas as pd 165 | import numpy as np 166 | my_series2 = np.random.randn(5, 10) 167 | print ("\nmy_series2\n", my_series2) 168 | 169 | 170 | # In[49]: 171 | 172 | 173 | import matplotlib.pyplot as plt 174 | plt.plot(my_series2) 175 | plt.ylabel('index') 176 | plt.show() 177 | 178 | 179 | # In[54]: 180 | 181 | 182 | from numpy import * 183 | import math 184 | import matplotlib.pyplot as plt 185 | 186 | t = linspace(0, 2*math.pi, 400) 187 | a = sin(t) 188 | b = cos(t) 189 | c = a + b 190 | 191 | 192 | # In[50]: 193 | 194 | 195 | plt.plot(t, a, 'r') # plotting t, a separately 196 | plt.plot(t, b, 'b') # plotting t, b separately 197 | plt.plot(t, c, 'g') # plotting t, c separately 198 | plt.show() 199 | 200 | 201 | # ### create Data frame from lisits 202 | 203 | # In[19]: 204 | 205 | 206 | import pandas as pd 207 | data = [10,20,30,40,50] 208 | DF1 = pd.DataFrame(data) 209 | print (DF1) 210 | 211 | 212 | # In[22]: 213 | 214 | 215 | import pandas as pd 216 | data = [['Ossama',25],['Ali',43],['Ziad',32]] 217 | DF1 = pd.DataFrame(data,columns=['Name','Age']) 218 | print (DF1) 219 | 220 | 221 | # In[21]: 222 | 223 | 224 | import pandas as pd 225 | data = [['Ossama',25],['Ali',43],['Ziad',32]] 226 | DF1 = pd.DataFrame(data,columns=['Name','Age'],dtype=float) 227 | print (DF1) 228 | 229 | 230 | # In[ ]: 231 | 232 | 233 | Create data frame from dictionaries 234 | 235 | 236 | # In[24]: 237 | 238 | 239 | import pandas as pd 240 | data = {'Name':['Omar', 'Ali', 'Mohammed', 'Ossama'],'Age':[30,25,44,4237]} 241 | DF1 = pd.DataFrame(data) 242 | print (DF1) 243 | 244 | 245 | # In[26]: 246 | 247 | 248 | import pandas as pd 249 | data = {'Name':['Omar', 'Ali', 'Mohammed', 'Ossama'],'Age':[30,25,44,4237]} 250 | DF1 = pd.DataFrame(data, index=['Employee1','Employee2','Employee3','Employee4']) 251 | print (DF1) 252 | 253 | 254 | # In[3]: 255 | 256 | 257 | import pandas as pd 258 | data = [{'Test1': 10, 'Test2': 20},{'Test3': 30, 'Project': 20, 'Final': 20}] 259 | df = pd.DataFrame(data) 260 | print (df) 261 | 262 | 263 | # In[13]: 264 | 265 | 266 | import pandas as pd 267 | data = [{'Test1': 10, 'Test2': 20},{'Test1': 30, 'Test2': 20, 'Project': 20}] 268 | 269 | #With three column indices, values same as dictionary keys 270 | df1 = pd.DataFrame(data, index=['First', 'Second'], columns=['Test2', 'Project' , 'Test1']) 271 | 272 | #With two column indices with one index with other name 273 | df2 = pd.DataFrame(data, index=['First', 'Second'], columns=['Project', 'Test_1','Test2']) 274 | print (df1) 275 | print ("\n") 276 | print (df2) 277 | 278 | 279 | # In[16]: 280 | 281 | 282 | import pandas as pd 283 | 284 | data = {'Test1' : pd.Series([70, 55, 89], index=['Ahmed', 'Omar', 'Ali']), 285 | 'Test2' : pd.Series([56, 82, 77, 65], index=['Ahmed', 'Omar', 'Ali', 'Salwa'])} 286 | 287 | df1 = pd.DataFrame(data) 288 | print (df1) 289 | 290 | 291 | # In[51]: 292 | 293 | 294 | import pandas as pd 295 | 296 | data = {'Test1' : pd.Series([70, 55, 89], index=['Ahmed', 'Omar', 'Ali']), 297 | 'Test2' : pd.Series([56, 82, 77, 65], index=['Ahmed', 'Omar', 'Ali', 'Salwa'])} 298 | df1 = pd.DataFrame(data) 299 | print (df1['Test2']) # Column selection 300 | print("\n") 301 | print (df1[:]) # Column selection 302 | 303 | 304 | # In[46]: 305 | 306 | 307 | df1.iloc[:, [1,0 ]] 308 | 309 | 310 | # In[39]: 311 | 312 | 313 | df1[0:4:1] 314 | 315 | 316 | # In[66]: 317 | 318 | 319 | # add a new Column 320 | import pandas as pd 321 | data = {'Test1' : pd.Series([70, 55, 89], index=['Ahmed', 'Omar', 'Ali']), 322 | 'Test2' : pd.Series([56, 82, 77, 65], index=['Ahmed', 'Omar', 'Ali', 'Salwa'])} 323 | df1 = pd.DataFrame(data) 324 | print (df1) 325 | df1['Project'] = pd.Series([90,83,67, 87],index=['Ali','Omar','Salwa', 'Ahmed']) 326 | print ("\n") 327 | df1['Average'] = round((df1['Test1']+df1['Test2']+df1['Project'])/3, 2) 328 | 329 | print (df1) 330 | 331 | 332 | # In[70]: 333 | 334 | 335 | import pandas as pd 336 | data = {'Test1' : pd.Series([70, 55, 89], index=['Ahmed', 'Omar', 'Ali']), 337 | 'Test2' : pd.Series([56, 82, 77, 65], index=['Ahmed', 'Omar', 'Ali', 'Salwa'])} 338 | print (df1) 339 | df2 = df1 340 | print ("\n") 341 | print (df2) 342 | 343 | 344 | # In[71]: 345 | 346 | 347 | # Delete a column in data frame using del function 348 | print ("Deleting the first column using DEL function:") 349 | del df2['Test2'] 350 | print (df2) 351 | 352 | # Delete a column in data frame using pop function 353 | print ("\nDeleting another column using POP function:") 354 | df2.pop('Project') 355 | print (df2) 356 | 357 | 358 | # In[72]: 359 | 360 | 361 | print (df1) 362 | 363 | 364 | # In[73]: 365 | 366 | 367 | print (df2) 368 | 369 | 370 | # In[83]: 371 | 372 | 373 | # add a new Column 374 | import pandas as pd 375 | data = {'Test1' : pd.Series([70, 55, 89], index=['Ahmed', 'Omar', 'Ali']), 376 | 'Test2' : pd.Series([56, 82, 77, 65], index=['Ahmed', 'Omar', 'Ali', 'Salwa'])} 377 | df1 = pd.DataFrame(data) 378 | df1['Project'] = pd.Series([90,83,67, 87],index=['Ali','Omar','Salwa', 'Ahmed']) 379 | print ("\n") 380 | df1['Average'] = round((df1['Test1']+df1['Test2']+df1['Project'])/3, 2) 381 | print (df1) 382 | 383 | print ("\n") 384 | df2= df1.copy() # copy df1 into df2 using copy() method 385 | print (df2) 386 | #delete columns using del and pop methods 387 | del df2['Test2'] 388 | df2.pop('Project') 389 | print ("\n") 390 | print (df1) 391 | print ("\n") 392 | print (df2) 393 | 394 | 395 | # In[106]: 396 | 397 | 398 | # add a new Column 399 | import pandas as pd 400 | data = {'Test1' : pd.Series([70, 55, 89], index=['Ahmed', 'Omar', 'Ali']), 401 | 'Test2' : pd.Series([56, 82, 77, 65], index=['Ahmed', 'Omar', 'Ali', 'Salwa'])} 402 | df1 = pd.DataFrame(data) 403 | df1['Project'] = pd.Series([90,83,67, 87],index=['Ali','Omar','Salwa', 'Ahmed']) 404 | print ("\n") 405 | df1['Average'] = round((df1['Test1']+df1['Test2']+df1['Project'])/3, 2) 406 | print (df1) 407 | print ("\nselect iloc function to retrieve row number 2") 408 | print (df1.iloc[2]) 409 | print ("\nslice rows") 410 | print (df1[2:4] ) 411 | 412 | 413 | # In[108]: 414 | 415 | 416 | print (df1) 417 | 418 | 419 | # In[ ]: 420 | 421 | 422 | import pandas as pd 423 | data = {'Test1' : pd.Series([70, 55, 89], index=['Ahmed', 'Omar', 'Ali']), 424 | 'Test2' : pd.Series([56, 82, 77, 65], index=['Ahmed', 'Omar', 'Ali', 'Salwa']), 425 | 'Project' : pd.Series([87, 83, 90, 67], index=['Ahmed', 'Omar', 'Ali', 'Salwa']), 426 | 'Average' : pd.Series([71, 73.33, 85.33, 66], index=['Ahmed', 'Omar', 'Ali', 'Salwa'])} 427 | 428 | data = pd.DataFrame(data) 429 | print (data) 430 | print("\n") 431 | df2 = pd.DataFrame([[80, 70, 90, 80]], columns = ['Test1','Test2','Project','Average'], index=['Khalid']) 432 | data = data.append(df2) 433 | print (data) 434 | 435 | 436 | # In[138]: 437 | 438 | 439 | print (data) 440 | print ('\n') 441 | data = data.drop('Omar') 442 | print (data) 443 | 444 | 445 | # In[74]: 446 | 447 | 448 | import pandas as pd 449 | data = {'Age' : pd.Series([30, 25, 44, ], index=['Ahmed', 'Omar', 'Ali']), 450 | 'Salary' : pd.Series([25000, 17000, 30000, 12000], index=['Ahmed', 'Omar', 'Ali', 'Salwa']), 451 | 'Height' : pd.Series([160, 154, 175, 165], index=['Ahmed', 'Omar', 'Ali', 'Salwa']), 452 | 'Weight' : pd.Series([85, 70, 92, 65], index=['Ahmed', 'Omar', 'Ali', 'Salwa']), 453 | 'Gender' : pd.Series(['Male', 'Male', 'Male', 'Female'], index=['Ahmed', 'Omar', 'Ali', 'Salwa'])} 454 | 455 | data = pd.DataFrame(data) 456 | print (data) 457 | print("\n") 458 | df2 = pd.DataFrame([[42, 31000, 170, 80, 'Female']], columns = ['Age','Salary','Height','Weight', 'Gender'] 459 | , index=['Mona']) 460 | data = data.append(df2) 461 | print (data) 462 | 463 | 464 | # In[63]: 465 | 466 | 467 | data.describe() 468 | 469 | 470 | # In[64]: 471 | 472 | 473 | data.describe(include='all') 474 | 475 | 476 | # In[66]: 477 | 478 | 479 | data.Salary.describe() 480 | 481 | 482 | # In[67]: 483 | 484 | 485 | data.describe(include=[np.number]) 486 | 487 | 488 | # In[68]: 489 | 490 | 491 | data.describe(include=[np.object]) 492 | 493 | 494 | # In[70]: 495 | 496 | 497 | data.describe(exclude=[np.number]) 498 | 499 | 500 | # In[71]: 501 | 502 | 503 | data 504 | 505 | 506 | # In[75]: 507 | 508 | 509 | OptimalWeight = data['Height']- 100 510 | OptimalWeight 511 | 512 | 513 | # In[93]: 514 | 515 | 516 | unOptimalCases = data['Weight'] <= OptimalWeight 517 | unOptimalCases 518 | 519 | 520 | # ## Create Panel 521 | 522 | # In[141]: 523 | 524 | 525 | np.random.randn(4, 3) 526 | 527 | 528 | # In[143]: 529 | 530 | 531 | # creating an empty panel 532 | import pandas as pd 533 | import numpy as np 534 | 535 | data = np.random.rand(2,4,5) 536 | Paneldf = pd.Panel(data) 537 | print (Paneldf) 538 | 539 | 540 | # In[94]: 541 | 542 | 543 | data = {'Item1' : pd.DataFrame(np.random.randn(4, 3)), 544 | 'Item2' : pd.DataFrame(np.random.randn(4, 2))} 545 | p = pd.Panel(data) 546 | 547 | 548 | # In[95]: 549 | 550 | 551 | p 552 | 553 | 554 | # In[97]: 555 | 556 | 557 | p['Item1'].describe() 558 | 559 | 560 | # In[104]: 561 | 562 | 563 | import pandas as pd 564 | data1 = {'Age' : pd.Series([30, 25, 44, ], index=['Ahmed', 'Omar', 'Ali']), 565 | 'Salary' : pd.Series([25000, 17000, 30000, 12000], index=['Ahmed', 'Omar', 'Ali', 'Salwa']), 566 | 'Height' : pd.Series([160, 154, 175, 165], index=['Ahmed', 'Omar', 'Ali', 'Salwa']), 567 | 'Weight' : pd.Series([85, 70, 92, 65], index=['Ahmed', 'Omar', 'Ali', 'Salwa']), 568 | 'Gender' : pd.Series(['Male', 'Male', 'Male', 'Female'], index=['Ahmed', 'Omar', 'Ali', 'Salwa'])} 569 | 570 | data2 = {'Age' : pd.Series([24, 19, 33,25 ], index=['Ziad', 'Majid', 'Ayman', 'Ahlam']), 571 | 'Salary' : pd.Series([17000, 7000, 22000, 21000], index=['Ziad', 'Majid', 'Ayman', 'Ahlam']), 572 | 'Height' : pd.Series([170, 175, 162, 177], index=['Ziad', 'Majid', 'Ayman', 'Ahlam']), 573 | 'Weight' : pd.Series([77, 84, 74, 90], index=['Ziad', 'Majid', 'Ayman', 'Ahlam']), 574 | 'Gender' : pd.Series(['Male', 'Male', 'Male', 'Female'], index=['Ziad', 'Majid', 'Ayman', 'Ahlam'])} 575 | 576 | 577 | # In[105]: 578 | 579 | 580 | data = {'Group1' :data1, 581 | 'Group2' :data2} 582 | p = pd.Panel(data) 583 | 584 | 585 | # In[106]: 586 | 587 | 588 | p['Group1'].describe() 589 | 590 | 591 | # In[107]: 592 | 593 | 594 | p['Group1']['Salary'].describe() 595 | 596 | 597 | # In[147]: 598 | 599 | 600 | # creating an empty panel 601 | import pandas as pd 602 | import numpy as np 603 | data = {'Item1' : pd.DataFrame(np.random.randn(4, 3)), 604 | 'Item2' : pd.DataFrame(np.random.randn(4, 2))} 605 | Paneldf = pd.Panel(data) 606 | print (Paneldf['Item1']) 607 | print ("\n") 608 | print (Paneldf['Item2']) 609 | 610 | 611 | # In[149]: 612 | 613 | 614 | print (Paneldf.major_xs(1)) 615 | 616 | 617 | # In[150]: 618 | 619 | 620 | print (Paneldf.minor_xs(1)) 621 | 622 | 623 | # ## Data anlysis 624 | 625 | # In[11]: 626 | 627 | 628 | import pandas as pd 629 | import numpy as np 630 | Number = [1,2,3,4,5,6,7,8,9,10] 631 | Names = ['Ali Ahmed','Mohamed Ziad','Majid Salim','Salwa Ahmed', 'Ahlam Mohamed', 'Omar Ali', 'Amna Mohammed', 632 | 'Khalid Yousif', 'Safa Humaid', 'Amjad Tayel'] 633 | 634 | City = ['Fujairah','Dubai','Sharjah', 'AbuDhabi','Fujairah','Dubai','Sharjah', 'AbuDhabi','Sharjah','Fujairah'] 635 | columns = ['Number', 'Name', 'City' ] 636 | dataset= pd.DataFrame({'Number': Number , 'Name': Names, 'City': City}, columns = columns ) 637 | Gender= pd.DataFrame({'Gender':['Male','Male','Male','Female', 'Female', 'Male', 'Female','Male', 638 | 'Female', 'Male']}) 639 | Height = pd.DataFrame(np.random.randint(120,175, size=(12, 1))) 640 | Weight = pd.DataFrame(np.random.randint(50,110, size=(12, 1))) 641 | 642 | dataset['Gender']= Gender 643 | dataset['Height']= Height 644 | dataset['Weight']= Weight 645 | dataset.set_index('Number') 646 | 647 | 648 | # In[186]: 649 | 650 | 651 | print ( dataset.describe()) # Summary statistics for numerical columns 652 | 653 | 654 | # In[187]: 655 | 656 | 657 | print (dataset.mean()) # Returns the mean of all columns 658 | 659 | 660 | # In[188]: 661 | 662 | 663 | print (dataset.corr()) # Returns the correlation between columns in a DataFrame 664 | 665 | 666 | # In[189]: 667 | 668 | 669 | print (dataset.count()) # Returns the number of non-null values in each DataFrame column 670 | 671 | 672 | # In[190]: 673 | 674 | 675 | print (dataset.max()) # Returns the highest value in each column 676 | 677 | 678 | # In[191]: 679 | 680 | 681 | print (dataset.min()) # Returns the lowest value in each column 682 | 683 | 684 | # In[192]: 685 | 686 | 687 | print (dataset.median()) # Returns the median of each column 688 | 689 | 690 | # In[193]: 691 | 692 | 693 | print (dataset.std()) # Returns the standard deviation of each column 694 | 695 | 696 | # ### Grouping 697 | 698 | # print (dataset) 699 | 700 | # In[3]: 701 | 702 | 703 | dataset.groupby('City')['Gender'].count() 704 | 705 | 706 | # In[4]: 707 | 708 | 709 | print (dataset.groupby('City').groups) 710 | 711 | 712 | # In[5]: 713 | 714 | 715 | print (dataset.groupby(['City','Gender']).groups) 716 | 717 | 718 | # In[7]: 719 | 720 | 721 | grouped = dataset.groupby('Gender') 722 | 723 | for name,group in grouped: 724 | print (name) 725 | print (group) 726 | print ("\n") 727 | 728 | 729 | # In[9]: 730 | 731 | 732 | grouped = dataset.groupby('Gender') 733 | print (grouped.get_group('Female')) 734 | 735 | 736 | # In[18]: 737 | 738 | 739 | # Aggregation 740 | grouped = dataset.groupby('Gender') 741 | print (grouped['Height'].agg(np.mean)) 742 | print ("\n") 743 | print (grouped['Weight'].agg(np.mean)) 744 | print ("\n") 745 | print (grouped.agg(np.size)) 746 | print ("\n") 747 | print (grouped['Height'].agg([np.sum, np.mean, np.std])) 748 | 749 | 750 | # In[19]: 751 | 752 | 753 | ### Transformations 754 | 755 | 756 | # In[ ]: 757 | 758 | 759 | dataset = dataset.set_index(['Number']) 760 | print (dataset) 761 | 762 | 763 | # In[26]: 764 | 765 | 766 | 767 | 768 | 769 | # In[28]: 770 | 771 | 772 | 773 | grouped = dataset.groupby('Gender') 774 | score = lambda x: (x - x.mean()) / x.std()*10 775 | print (grouped.transform(score)) 776 | 777 | 778 | # ### Filtration 779 | 780 | # In[30]: 781 | 782 | 783 | print (dataset.groupby('City').filter(lambda x: len(x) >= 3)) 784 | 785 | -------------------------------------------------------------------------------- /Ch07/Embarak _Ch07_Data Visualization.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch07/Embarak _Ch07_Data Visualization.pdf -------------------------------------------------------------------------------- /Ch07/Embarak _Ch07_Data Visualization.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Chapter 7: Data Visualization 5 | 6 | # In[3]: 7 | 8 | 9 | import pandas as pd 10 | 11 | dataset = pd.read_csv("./Data/Salaries.csv") 12 | 13 | rank = dataset['rank'] 14 | discipline = dataset['discipline'] 15 | phd = dataset['phd'] 16 | service = dataset['service'] 17 | sex = dataset['sex'] 18 | salary = dataset['salary'] 19 | 20 | dataset.head() 21 | 22 | 23 | # # Line plotting 24 | 25 | # In[4]: 26 | 27 | 28 | dataset.plot() 29 | 30 | 31 | # In[5]: 32 | 33 | 34 | dataset[["rank", "discipline","phd","service", "sex", "salary"]].plot() 35 | 36 | 37 | # In[6]: 38 | 39 | 40 | dataset[["phd","service"]].plot() 41 | 42 | 43 | # # Visualize grouped data 44 | 45 | # In[7]: 46 | 47 | 48 | dataset1 = dataset.groupby(['service']).sum() 49 | dataset1.sort_values("salary", ascending = False, inplace=True) 50 | dataset1.head() 51 | 52 | 53 | # In[8]: 54 | 55 | 56 | dataset1["salary"].plot.bar() 57 | 58 | 59 | # # Bar plotting 60 | 61 | # In[9]: 62 | 63 | 64 | dataset[[ 'phd', 'service' ]].head(10).plot.bar() 65 | 66 | 67 | # In[10]: 68 | 69 | 70 | dataset[['phd', 'service']].head(10).plot.bar(title="Ph.D. Vs Service\n 2018") 71 | 72 | 73 | # In[11]: 74 | 75 | 76 | dataset[['phd', 'service']].head(10).plot.bar(title="Ph.D. Vs Service\n 2018" , color=['g','red']) 77 | 78 | 79 | # # Pie Chart 80 | 81 | # In[12]: 82 | 83 | 84 | dataset["salary"].head(10).plot.pie(autopct='%.2f') 85 | 86 | 87 | # # Box Plotting 88 | 89 | # In[13]: 90 | 91 | 92 | dataset[["phd","salary"]].head(100).plot.box() 93 | 94 | 95 | # In[14]: 96 | 97 | 98 | dataset[["phd","service"]].plot.box() 99 | 100 | 101 | # # Histogram 102 | 103 | # In[15]: 104 | 105 | 106 | dataset["salary"].head(20).plot.hist() 107 | 108 | 109 | # In[ ]: 110 | 111 | 112 | # A scatterplot 113 | 114 | 115 | # In[3]: 116 | 117 | 118 | # Exercises 119 | 120 | 121 | # In[4]: 122 | 123 | 124 | import matplotlib.pyplot as plt 125 | plt.style.use('classic') 126 | get_ipython().magic(u'matplotlib inline') 127 | import numpy as np 128 | import pandas as pd 129 | 130 | 131 | # In[30]: 132 | 133 | 134 | # Create temprature data 135 | rng = np.random.RandomState(0) 136 | season1 = np.cumsum(rng.randn(500, 6), 0) 137 | 138 | 139 | # In[32]: 140 | 141 | 142 | # Plot the data with Matplotlib defaults 143 | plt.plot(season1) 144 | plt.legend('ABCDEF', ncol=2, loc='upper left'); 145 | 146 | 147 | # In[33]: 148 | 149 | 150 | import seaborn as sns 151 | iris = sns.load_dataset("iris") 152 | iris.head() 153 | sns.pairplot(iris, hue='species', size=2.5); 154 | 155 | 156 | # In[36]: 157 | 158 | 159 | import seaborn as sns 160 | tips = sns.load_dataset('tips') 161 | tips.head() 162 | 163 | 164 | # In[37]: 165 | 166 | 167 | tips['Tips Percentage'] = 100 * tips['tip'] / tips['total_bill'] 168 | grid = sns.FacetGrid(tips, row="sex", col="time", margin_titles=True) 169 | grid.map(plt.hist, "Tips Percentage", bins=np.linspace(0, 40, 15)); 170 | 171 | 172 | # In[39]: 173 | 174 | 175 | import seaborn as sns 176 | tips = sns.load_dataset('tips') 177 | with sns.axes_style(style='ticks'): 178 | g = sns.factorplot("day", "total_bill", "sex", data=tips, kind="box") 179 | g.set_axis_labels("Bill Day", "Total Bill Amount"); 180 | 181 | 182 | # In[43]: 183 | 184 | 185 | import seaborn as sns 186 | tips = sns.load_dataset('tips') 187 | with sns.axes_style('white'): 188 | sns.jointplot( "total_bill", "tip", data=tips, kind='hex') 189 | 190 | 191 | # In[25]: 192 | 193 | 194 | import seaborn as sns 195 | planets = sns.load_dataset('planets') 196 | planets.head() 197 | 198 | -------------------------------------------------------------------------------- /Ch07/Salaries.csv: -------------------------------------------------------------------------------- 1 | rank,discipline,phd,service,sex,salary 2 | Prof,B,56,49,Male,186960 3 | Prof,A,12,6,Male,93000 4 | Prof,A,23,20,Male,110515 5 | Prof,A,40,31,Male,131205 6 | Prof,B,20,18,Male,104800 7 | Prof,A,20,20,Male,122400 8 | AssocProf,A,20,17,Male,81285 9 | Prof,A,18,18,Male,126300 10 | Prof,A,29,19,Male,94350 11 | Prof,A,51,51,Male,57800 12 | Prof,B,39,33,Male,128250 13 | Prof,B,23,23,Male,134778 14 | AsstProf,B,1,0,Male,88000 15 | Prof,B,35,33,Male,162200 16 | Prof,B,25,19,Male,153750 17 | Prof,B,17,3,Male,150480 18 | AsstProf,B,8,3,Male,75044 19 | AsstProf,B,4,0,Male,92000 20 | Prof,A,19,7,Male,107300 21 | Prof,A,29,27,Male,150500 22 | AsstProf,B,4,4,Male,92000 23 | Prof,A,33,30,Male,103106 24 | AsstProf,A,4,2,Male,73000 25 | AsstProf,A,2,0,Male,85000 26 | Prof,A,30,23,Male,91100 27 | Prof,B,35,31,Male,99418 28 | Prof,A,38,19,Male,148750 29 | Prof,A,45,43,Male,155865 30 | AsstProf,B,7,2,Male,91300 31 | Prof,B,21,20,Male,123683 32 | AssocProf,B,9,7,Male,107008 33 | Prof,B,22,21,Male,155750 34 | Prof,A,27,19,Male,103275 35 | Prof,B,18,18,Male,120000 36 | AssocProf,B,12,8,Male,119800 37 | Prof,B,28,23,Male,126933 38 | Prof,B,45,45,Male,146856 39 | Prof,A,20,8,Male,102000 40 | AsstProf,B,4,3,Male,91000 41 | Prof,B,18,18,Female,129000 42 | Prof,A,39,36,Female,137000 43 | AssocProf,A,13,8,Female,74830 44 | AsstProf,B,4,2,Female,80225 45 | AsstProf,B,5,0,Female,77000 46 | Prof,B,23,19,Female,151768 47 | Prof,B,25,25,Female,140096 48 | AsstProf,B,11,3,Female,74692 49 | AssocProf,B,11,11,Female,103613 50 | Prof,B,17,17,Female,111512 51 | Prof,B,17,18,Female,122960 52 | AsstProf,B,10,5,Female,97032 53 | Prof,B,20,14,Female,127512 54 | Prof,A,12,0,Female,105000 55 | AsstProf,A,5,3,Female,73500 56 | AssocProf,A,25,22,Female,62884 57 | AsstProf,A,2,0,Female,72500 58 | AssocProf,A,10,8,Female,77500 59 | AsstProf,A,3,1,Female,72500 60 | Prof,B,36,26,Female,144651 61 | AssocProf,B,12,10,Female,103994 62 | AsstProf,B,3,3,Female,92000 63 | AssocProf,B,13,10,Female,103750 64 | AssocProf,B,14,7,Female,109650 65 | Prof,A,29,27,Female,91000 66 | AssocProf,A,26,24,Female,73300 67 | Prof,A,36,19,Female,117555 68 | AsstProf,A,7,6,Female,63100 69 | Prof,A,17,11,Female,90450 70 | AsstProf,A,4,2,Female,77500 71 | Prof,A,28,7,Female,116450 72 | AsstProf,A,8,3,Female,78500 73 | AssocProf,B,12,9,Female,71065 74 | Prof,B,24,15,Female,161101 75 | Prof,B,18,10,Female,105450 76 | AssocProf,B,19,6,Female,104542 77 | Prof,B,17,17,Female,124312 78 | Prof,A,28,14,Female,109954 79 | Prof,A,23,15,Female,109646 80 | -------------------------------------------------------------------------------- /Ch08/Embarak Ch08 NCHS Case Study.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/08f77634d7aa29a06866c22f98a8c5838aa7093a/Ch08/Embarak Ch08 NCHS Case Study.pdf -------------------------------------------------------------------------------- /Ch08/Embarak Ch08 NCHS Case Study.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Embarak Ch08 Case Study --> NCHS Case Study 5 | 6 | #

Prepared by:

Ossama Embarak

7 | 8 | # In[2]: 9 | 10 | 11 | import pandas as pd 12 | data = pd.read_csv("NCHS.csv") 13 | data.head(3) 14 | 15 | 16 | # #### See how many rows and how many columns 17 | 18 | # In[3]: 19 | 20 | 21 | data.shape # 15028 rows and 6 columns 22 | 23 | 24 | # #### Remove all rows with na cases 25 | 26 | # In[4]: 27 | 28 | 29 | data = data.dropna() 30 | data.shape 31 | 32 | 33 | # ##### What are the unique causes of death in this data set? 34 | 35 | # In[5]: 36 | 37 | 38 | data.head(2) 39 | 40 | 41 | # In[7]: 42 | 43 | 44 | causes = data["Cause Name"].unique() 45 | causes 46 | 47 | 48 | # #### Remove 'All Causes' from the Cause death Name column 49 | 50 | # In[8]: 51 | 52 | 53 | data = data[data["Cause Name"] !="All Causes"] 54 | causes = data["Cause Name"].unique() 55 | causes 56 | 57 | 58 | # In[9]: 59 | 60 | 61 | len(causes) 62 | 63 | 64 | # #### Find the unique causes of “State”, 65 | 66 | # In[10]: 67 | 68 | 69 | data.head(3) 70 | 71 | 72 | # In[11]: 73 | 74 | 75 | state = data["State"].unique() 76 | state 77 | 78 | 79 | # In[12]: 80 | 81 | 82 | data1 = data[data["State"] !="United States"] 83 | 84 | state = data1["State"].unique() 85 | state 86 | 87 | 88 | # In[13]: 89 | 90 | 91 | len(state) 92 | 93 | 94 | # ### What were the total number of deaths in the United States from 1999 to 2015? 95 | 96 | # In[14]: 97 | 98 | 99 | data.head(0) 100 | 101 | 102 | # In[15]: 103 | 104 | 105 | data["Deaths"].sum() 106 | 107 | 108 | # ### What is the trend of number of deaths per year? 109 | 110 | # In[16]: 111 | 112 | 113 | dyear= data.groupby(["Year"]).sum() 114 | dyear 115 | 116 | 117 | # In[18]: 118 | 119 | 120 | dyear["Deaths"].plot(title="Death per year \n 1999-2015") 121 | 122 | 123 | # #### Which 10 states had the highest number of deaths in all years? 124 | 125 | # In[19]: 126 | 127 | 128 | data1 = data[data["State"] !="United States"] 129 | dataset2 = data1.groupby("State").sum() 130 | dataset2.sort_values("Deaths", ascending=False , inplace = True) 131 | dataset2.head(10) 132 | 133 | 134 | # In[20]: 135 | 136 | 137 | dataset2["Deaths"].head(10).plot.bar(title="Top ten states with highest death number \n 1999-2015 ") 138 | 139 | 140 | # ## 6. What were the top causes of deaths in the United States during this period? 141 | 142 | # In[21]: 143 | 144 | 145 | dataset1 = data[data["Cause Name"] !="All Causes"] 146 | dataset2 = dataset1.groupby("Cause Name").sum() 147 | dataset2.sort_values("Deaths", ascending=False , inplace = True) 148 | dataset2.head(10) 149 | 150 | 151 | # In[22]: 152 | 153 | 154 | dataset2["Deaths"].head(10).plot.bar(title="Top ten casues of death in USA \n 1999-2015 ") 155 | 156 | 157 | # #### Analyze guns deaths in the US 158 | 159 | # In[3]: 160 | 161 | 162 | import pandas as pd 163 | import numpy as np 164 | import matplotlib.pyplot as plt 165 | import seaborn as sns 166 | sns.set(style='white', color_codes=True) 167 | get_ipython().magic(u'matplotlib inline') 168 | 169 | 170 | # In[2]: 171 | 172 | 173 | dataset = pd.read_csv('Death data.csv', index_col=0) 174 | print(dataset.shape) 175 | dataset.index.name = 'Index' 176 | dataset.columns = map(str.capitalize, dataset.columns) 177 | dataset.head(5) 178 | 179 | 180 | # In[5]: 181 | 182 | 183 | # Organizing the data by the year, then by month: 184 | dataset_Gun = dataset 185 | dataset_Gun.sort_values(['Year', 'Month'], inplace=True) 186 | 187 | 188 | # #### Annual U.S. suicide gun deaths 2012-2014, by gender 189 | 190 | # In[6]: 191 | 192 | 193 | dataset_Gun.Sex.value_counts(normalize=False) 194 | 195 | 196 | # In[8]: 197 | 198 | 199 | dataset_byGender = dataset_Gun.groupby('Sex').count() 200 | dataset_byGender 201 | 202 | 203 | # In[29]: 204 | 205 | 206 | dataset_Gun.Sex.value_counts(normalize=False).plot.bar(title='Annual U.S.\suicide gun deaths \n 2012-2014, by gender') 207 | 208 | 209 | # In[30]: 210 | 211 | 212 | dataset_byGender = dataset_Gun.groupby(['Sex']).count() 213 | dataset_byGender 214 | 215 | 216 | # In[31]: 217 | 218 | 219 | dataset_byGender.plot.bar(title='Annual U.S. suicide gun deaths \n 2012-2014, by gender') 220 | 221 | 222 | # ### Average annual death toll from guns in the United States from 2012 to 2014, by race 223 | 224 | # In[12]: 225 | 226 | 227 | dataset_byRace = dataset 228 | (dataset_byRace.Race.value_counts(ascending=False) *100/100000) 229 | 230 | 231 | # In[13]: 232 | 233 | 234 | (dataset_byRace.Race.value_counts(ascending=False) *100/100000).plot.bar(title=' Percentage of Average annual\death toll from guns in the United States \nfrom 2012 to 2014, by race') 235 | 236 | 237 | # In[34]: 238 | 239 | 240 | dataset_byRace.Race.value_counts(normalize=False) 241 | dataset_byRace.Race.value_counts(normalize=False).plot.bar(title='Annual U.S.\suicide gun deaths \n 2012-2014, by Race') 242 | 243 | 244 | # #### 3. Rate of gun deaths in the U.S. per 100,000 population 2012-2014, by race. 245 | 246 | # In[35]: 247 | 248 | 249 | dataset_byRace = dataset 250 | print (dataset_byRace.shape) 251 | dataset_byRace.head(2) 252 | 253 | 254 | # In[36]: 255 | 256 | 257 | dataset_byRace = dataset 258 | (dataset_byRace.Race.value_counts(ascending=False) *100/100000) 259 | 260 | 261 | # In[37]: 262 | 263 | 264 | (dataset_byRace.Race.value_counts(ascending=False) *100/100000).plot.bar(title='Rate of\gun deaths in the U.S. per 100,000 population \n2012-2014, by race') 265 | 266 | 267 | # 4. Annual number of gun deaths in the United States on average from 2012 to 2014, by cause 268 | 269 | # In[18]: 270 | 271 | 272 | dataset_byRace.Intent.value_counts(sort =True , ascending=False) 273 | 274 | 275 | # In[17]: 276 | 277 | 278 | dataset_byRace.Intent.value_counts(sort=True).plot.bar(title='Annual number\of gun deaths in the United States on average \n from 2012 to 2014, by cause') 279 | 280 | 281 | # 5. Average annual death toll from guns in the United States from 2012 to 2014, by cause 282 | 283 | # In[40]: 284 | 285 | 286 | dataset_byRace.Intent.value_counts(ascending=False) *100/100000 287 | 288 | 289 | # In[21]: 290 | 291 | 292 | (dataset_byRace.Intent.value_counts(ascending=False) *100/100000).plot.bar(title='The 100k Percentage of gun deaths tools in the U.S.\n2012-2014, by cause') 293 | 294 | 295 | # 6. Percentage of annual suicide gun deaths in the United States from 2012 to 2014, by year 296 | 297 | # In[42]: 298 | 299 | 300 | dataset_byRace.Year.value_counts(ascending=True) *100/100000 301 | 302 | 303 | # In[22]: 304 | 305 | 306 | (dataset_byRace.Year.value_counts(ascending=True) *100/100000).plot.bar(title='Percentage of annual suicide gun deaths in the United States \nfrom 2012 to 2014, by year') 307 | 308 | -------------------------------------------------------------------------------- /Contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing to Apress Source Code 2 | 3 | Copyright for Apress source code belongs to the author(s). However, under fair use you are encouraged to fork and contribute minor corrections and updates for the benefit of the author(s) and other readers. 4 | 5 | ## How to Contribute 6 | 7 | 1. Make sure you have a GitHub account. 8 | 2. Fork the repository for the relevant book. 9 | 3. Create a new branch on which to make your change, e.g. 10 | `git checkout -b my_code_contribution` 11 | 4. Commit your change. Include a commit message describing the correction. Please note that if your commit message is not clear, the correction will not be accepted. 12 | 5. Submit a pull request. 13 | 14 | Thank you for your contribution! -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Freeware License, some rights reserved 2 | 3 | Copyright (c) 2018 Dr. Ossama Embarak 4 | 5 | Permission is hereby granted, free of charge, to anyone obtaining a copy 6 | of this software and associated documentation files (the "Software"), 7 | to work with the Software within the limits of freeware distribution and fair use. 8 | This includes the rights to use, copy, and modify the Software for personal use. 9 | Users are also allowed and encouraged to submit corrections and modifications 10 | to the Software for the benefit of other users. 11 | 12 | It is not allowed to reuse, modify, or redistribute the Software for 13 | commercial use in any way, or for a user’s educational materials such as books 14 | or blog articles without prior permission from the copyright holder. 15 | 16 | The above copyright notice and this permission notice need to be included 17 | in all copies or substantial portions of the software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS OR APRESS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apress Source Code 2 | 3 | This repository accompanies [*%title%*](https://www.apress.com/9781484241080) by Dr. Ossama Embarak (Apress, 2018). 4 | 5 | [comment]: #cover 6 | ![Cover image](9781484241080.jpg) 7 | 8 | Download the files as a zip using the green button, or clone the repository to your machine using Git. 9 | 10 | ## Releases 11 | 12 | Release v1.0 corresponds to the code in the published book, without corrections or updates. 13 | 14 | ## Contributions 15 | 16 | See the file Contributing.md for more information on how you can contribute to this repository. -------------------------------------------------------------------------------- /errata.md: -------------------------------------------------------------------------------- 1 | # Errata for *Book Title* 2 | 3 | On **page xx** [Summary of error]: 4 | 5 | Details of error here. Highlight key pieces in **bold**. 6 | 7 | *** 8 | 9 | On **page xx** [Summary of error]: 10 | 11 | Details of error here. Highlight key pieces in **bold**. 12 | 13 | *** --------------------------------------------------------------------------------