','',dealer))
26 | name = re.findall(r'\'>(.*?)(.*)<',dealerInfo[1])[0]
28 | city = re.findall(r'>(.*),\s*(.*)<',dealerInfo[2])[0][0]
29 | state = re.findall(r'>(.*),\s*(.*)<',dealerInfo[2])[0][1]
30 | zip = re.findall(r'>(.*)<',dealerInfo[3])[0]
31 | lat = latlngs[d][0]
32 | lng = latlngs[d][1]
33 | d+=1
34 | dataSet.append([name,address,city,state,zip,lat,lng])
35 |
36 | print(dataSet) #[[name,address, city, state, zip, lat,lng],]
37 |
38 |
39 | #Findall found total LatLngs: 55
40 | #Findall found total Address: 55
41 | #[['Akins Feed & Seed', '206 N Hill Street', 'Griffin', 'GA', '30223', '33.2509855', '-84.2633946'], ['Alf's Farm and Garden', '101 East 1st Street', 'Donalsonville', 'GA', '39845', '31.0426107', '-84.8821949'], ['American Cowboy Shop', '513 D Murphy Hwy', 'Blairsville', 'GA', '30512', '34.8761989', '-83.9582412'], ['Anderson's General Store', '23736 US Hwy 80 E', 'Statesboro', 'GA', '30458', '32.43158', '-81.749293'], ['Bar G Horse & Cattle Supply', '1060 Astondale Road', 'Bishop', 'GA', '30621', '33.8192864', '-83.4387722'], ['Beggs Farm Supply', '5845 Royston Hwy', 'Canon', 'GA', '30520', '34.2959968', '-83.0062267'], ['Big Creek Feed', '218 Hwy 49 N', 'Byron', 'GA', '31025', '32.6537561', '-83.7596295'], ['Blue Ribbon Show Supply', '9416 Lucy Moore Road', 'Nichols', 'GA', '31554', '31.462497', '-82.5866503'], ['Burdette Mill', '216 Depot Street', 'Washington', 'GA', '30673', '33.7340136', '-82.7472304'], ['Burke Feed', '369 Hwy 56 N', 'Waynesboro', 'GA', '30830', '33.1064245', '-81.9852452'], ['Candler Feed and Seed', '1275 Smokey Park Hwy', 'Candler', 'NC', '28715', '35.5401542', '-82.7570303'], ['Cash & Carry Feed', '135 N McGriff St.', 'Whigham', 'GA', '39897', '30.8848506', '-84.3248931'], ['Cherokee Feed and Seed', '869 Grove St', 'Gainesville', 'GA', '30501', '34.289323', '-83.8219858'], ['Cherokee Feed and Seed', '2370 Hightower Rd', 'Ball Ground', 'GA', '30107', '34.3372664', '-84.3779515'], ['Claxton Family Cattle', '240 Old Douglas Road', 'Hazelhurst', 'GA', '31539', '31.836371', '-82.6232915'], ['D&D Irringation', '51 S Rentz St', 'Lenox', 'GA', '31637', '31.2713852', '-83.4629421'], ['Double D Stables and Tack', '4111 Logan Rd', 'Rocky Face', 'GA', '30740', '34.805079', '-85.0274471'], ['Eatonton Co-op', '504 S Jefferson Ave', 'Eatonton', 'GA', '31024', '33.3267997', '-83.3884961'], ['Edenfields Feed and Seed', '709 Hwy 25N', 'Millen', 'GA', '30442', '32.8088128', '-81.9491768'], ['Family Feed', '6424 COLUMBUS HWY 80', 'Box Springs', 'GA', '31801', '32.5580349', '-84.6513774'], ['Farm & Garden Inc.', '646 Clarksville Street', 'Cornelia', 'GA', '30531', '34.5114883', '-83.5271166'], ['Farmer Seed Company', '800 W Broad St', 'Doerun', 'GA', '31744', '31.3200669', '-83.9234872'], ['Farmers Feed', '204 N West St', 'Greensboro', 'GA', '30642', '33.5781281', '-83.1845358'], ['Feed South', '2623 Knight Avenue', 'Waycross', 'GA', '31503', '31.2028754', '-82.316785'], ['Forsyth Feed & Seed', '45 W Jefferson Street', 'Forsyth', 'GA', '31029', '33.035097', '-83.940067'], ['Georgia Deer Farm', '850 Hwy 27 N', 'Roopville', 'GA', '30170', '33.476202', '-85.1082285'], ['H&M Trailers and Feed', '6446 JFH Pkwy', 'Adairsville', 'GA', '30103', '34.3924623', '-84.9333769'], ['Hill Farm Supply', '12700 Augusta Hwy', 'Sparta', 'GA', '31087', '33.2791285', '-82.9646478'], ['Ijon Webb', '1130 Stillwell Rd', 'Springfield', 'GA', '31329', '32.369773', '-81.266672'], ['Jesup Milling', '601 SW Broad Street', 'Jesup', 'GA', '31545', '31.5990992', '-81.8905051'], ['Jump N Run Farm', '1569 Liberty Church Grove Rd', 'Wrightsville', 'GA', '31096', '32.6481899', '-82.6139868'], ['L & C Farm and Garden', '1143 East Fairplay Road', 'Fairplay', 'SC', '29643', '34.5101355', '-82.9602795'], ['Maddox Feed', '1915 Winder Hwy', 'Jefferson', 'GA', '30549', '34.1001367', '-83.5969643'], ['Miller Farm Supply', '2001 Bob Culvern Rd', 'Louisville', 'GA', '30434', '32.9859964', '-82.3913739'], ['North Fulton Feed', '12950 Hwy 9 N', 'Alpharetta', 'GA', '30004', '34.096767', '-84.2735144'], ['North Georgia Co-Op', '951 Progress Rd', 'Ellijay', 'GA', '30540', '34.6739981', '-84.4902665'], ['Oglethorpe Feed and Farm Supply', '900 Athens Road', 'Crawford', 'GA', '30648', '33.8898662', '-83.1358665'], ['Owens Farm Supply', '6414 Mize Road', 'Toccoa', 'GA', '30577', '34.4855944', '-83.3394454'], ['Patricks', '10285 Covington Bypass', 'Covington', 'GA', '30014', '33.5770654', '-83.8354943'], ['Perry Feed and Tack', '309 Kellwood Drive', 'Perry', 'GA', '31069', '32.4443895', '-83.7439432'], ['Pine Ridge Outdoor Supply', '4999 HWY 114', 'Lyerly', 'GA', '30730', '34.4166444', '-85.3925577'], ['Reeves Hardware', '95 BO James St', 'Clayton', 'GA', '30525', '34.8686254', '-83.4026817'], ['Roberts Milling Company', '116 West Albany Ave', 'Pearson', 'GA', '31642', '31.2987063', '-82.8577173'], ['Roche Farm and Garden', '803 E Jackson St', 'Dublin', 'GA', '31040', '32.5444125', '-82.8945945'], ['Roche Farm and Garden', '781 East Court Street', 'Wrightsville', 'GA', '31040', '32.7302168', '-82.7117232'], ['Rodgers Fertilizer', '409 N Main St', 'Saluda', 'SC', '29138', '34.0082425', '-81.7729772'], ['Rogers Feed', '1041 Easley Hwy', 'Pelzer', 'SC', '29669', '34.6639864', '-82.5126743'], ['Ronnie Spivey', '654 Mary Richardson Road', 'Wray', 'GA', '31796', '31.525261', '-83.06603'], ['Shirley Feed & Seed Inc', '2439 North Elm Street', 'Commerce', 'GA', '30529', '34.2068698', '-83.4689814'], ['Southern Home and Farm LLC', '3127 Hamilton Road', 'Lagrange', 'GA', '30241', '32.9765932', '-84.98978'], ['Southland Power Fence', '752 E 5th Ave', 'Colbert', 'GA', '30628', '34.0412765', '-83.2001394'], ['Town & Country General Store', '59 Hwy 212 West', 'Monticello', 'GA', '31064', '33.3066615', '-83.6976187'], ['Twisted Fitterz', '10329 Nashville Enigma Rd', 'Alapaha', 'GA', '31622', '31.3441482', '-83.3002373'], ['Westside Feed II', '230 SE 7th Avenue', 'Lake Butler', 'FL', '32054', '30.02116', '-82.329495'], ['White Co. Farmers Exchange', '951 S Main St', 'Cleveland', 'GA', '30528', '34.58403', '-83.760829']]
42 |
--------------------------------------------------------------------------------
/Chapter09/regex.py:
--------------------------------------------------------------------------------
1 | """
2 | Python Regular Expressions: re
3 | https://regexone.com/references/python
4 | http://www.regular-expressions.info/python.html
5 | https://developers.google.com/edu/python/regular-expressions
6 | # Anchors: ^ begining of Line, $ end of line
7 | # re.search(pattern,str,re.I|re.MULTILINE|re.M)
8 | """
9 | import re
10 |
11 | sentence = """The course assumes a working knowledge of key data science topics
12 | (statistics, machine learning, and general data analytic methods).
13 | Programming experience in some language (such as R, MATLAB, SAS, Mathematica, Java, C, C++, VB, or FORTRAN)
14 | is expected. In particular, participants need to be comfortable with general programming concepts like
15 | variables, loops, and functions. Experience with Python is helpful (but not required)."""
16 | #source: https://www.enthought.com/training/course/python-for-data-science/#/syllabus
17 | splitSentence=sentence.split()
18 |
19 | print("Length of Sentence: ",len(sentence), '& splitSentence: ',len(splitSentence))
20 | print(splitSentence)
21 |
22 | #Findall
23 | matches = re.findall(r"([A-Z+]+)\,",sentence)
24 | print("Findall found total ",len(matches)," Matches >> ",matches)
25 | #Findall found total 6 Matches >> ['R', 'MATLAB', 'SAS', 'C', 'C++', 'VB']
26 |
27 | matches = re.findall(r"([A-Z]+)\,",sentence)
28 | print("Findall found total ",len(matches)," Matches >> ",matches)
29 | #Findall found total 5 Matches >> ['R', 'MATLAB', 'SAS', 'C', 'VB']
30 |
31 | matches = re.findall(r"\s*([\sorA-Z+]+)\)",sentence) #r'\s*([A-Z]+)\)' matches 'FORTRAN'
32 | print("Findall found total ",len(matches)," Matches >> ",matches)
33 | #Findall found total 1 Matches >> ['or FORTRAN']
34 |
35 |
36 | #re.match
37 | fortran = matches[0] # 'or FORTRAN'
38 | if re.match(r'or',fortran):
39 | fortran = re.sub(r'or\s*','',fortran)
40 | print(fortran)
41 | #FORTRAN
42 |
43 | #re.search
44 | if re.search(r'^F.*N$',fortran):
45 | print("True")
46 | #True
47 |
48 | matches = re.findall(r'\s(MAT.*?)\,',sentence,flags=re.IGNORECASE)
49 | print("(MAT.*?)\,: ",matches) #r'(?i)\s(MAT.*?)\,' can also be used
50 | #(MAT.*?)\,: ['MATLAB', 'Mathematica']
51 |
52 | matches = re.findall(r'\s(MAT.*?)\,',sentence)
53 | print("(MAT.*?)\,: ",matches)
54 | #(MAT.*?)\,: ['MATLAB']
55 |
56 | matches = re.findall(r'\s(C.*?)\,',sentence)
57 | print("\s(C.*?)\,: ",matches)
58 | #\s(C.*?)\,: ['C', 'C++']
59 |
60 |
61 | #re.split
62 | matchesOne = re.split(r"\W+",sentence) #\w (word characters, \W - nonword)
63 | print("Regular Split '\W+' found total: ",len(matchesOne ),"\n",matchesOne)
64 | #Regular Split '\W+' found total: 63
65 | #['The', 'course', 'assumes', 'a', 'working', 'knowledge', 'of', 'key', 'data', 'science', 'topics', 'statistics', ......, 'such', 'as', 'R', 'MATLAB', 'SAS', 'Mathematica', 'Java', 'C', 'C', 'VB', 'or', 'FORTRAN', 'is', 'expected', .........., 'and', 'functions', 'Experience', 'with', 'Python', 'is', 'helpful', 'but', 'not', 'required', '']
66 |
67 | matchesTwo = re.split(r"\s",sentence)
68 | print("Regular Split '\s' found total: ",len(matchesTwo),"\n", matchesTwo)
69 | #Regular Split '\s' found total: 63 :
70 | #['The', 'course', 'assumes', 'a', 'working', 'knowledge', 'of', 'key', 'data', 'science', 'topics', '(statistics,', ........., '(such', 'as', 'R,', 'MATLAB,', 'SAS,', 'Mathematica,', 'Java,', 'C,', 'C++,', 'VB,', 'or', 'FORTRAN)', 'is', ......., 'and', 'functions.', 'Experience', 'with', 'Python', 'is', 'helpful', '(but', 'not', 'required).']
71 |
72 |
73 | timeDate= '''
74 |
75 |
76 |
77 |
78 | '''
79 |
80 | pattern = r'(20\d+)([-]+)(0[1-9]|1[012])([-]+)(0[1-9]|[12][0-9]|3[01])'
81 | recompiled = re.compile(pattern) #
82 | dateMatches = recompiled.search(timeDate)
83 |
84 |
85 | print("Group : ",dateMatches.group())
86 | #Group : 2019-02-11
87 |
88 | print("Groups : ",dateMatches.groups())
89 | #Groups : ('2019', '-', '02', '-', '11')
90 |
91 | print("Group 1 : ",dateMatches.group(1))
92 | #Group 1 : 2019
93 |
94 | print("Group 5 : ",dateMatches.group(5))
95 | #Group 5 : 11
96 |
97 |
98 | for match in re.finditer(pattern, timeDate): #
99 | #for match in re.finditer(recompiled, timeDate):
100 | s = match.start()
101 | e = match.end()
102 | l = match.lastindex
103 | g = match.groups()
104 | print('Found {} at {}:{}, groups{} lastindex:{}'.format(timeDate[s:e], s, e,g,l))
105 |
106 |
107 | # Found 2019-02-11 at 16:26, groups('2019', '-', '02', '-', '11') lastindex:5
108 | # Found 2018-02-11 at 67:77, groups('2018', '-', '02', '-', '11') lastindex:5
109 | # Found 2019-02-06 at 118:128, groups('2019', '-', '02', '-', '06') lastindex:5
110 | # Found 2019-02-05 at 176:186, groups('2019', '-', '02', '-', '05') lastindex:5
111 | # Found 2019-02-04 at 234:244, groups('2019', '-', '02', '-', '04') lastindex:5
112 |
113 |
114 | pDate = r'(?P[0-9]{4})(?P[-])(?P0[1-9]|1[012])-(?P0[1-9]|[12][0-9]|3[01])'
115 | recompiled = re.compile(pDate)
116 | for match in re.finditer(recompiled,timeDate):
117 | s = match.start()
118 | e = match.end()
119 | l = match.lastindex
120 | print("Group ALL or 0: ",match.groups(0)) #or match.groups()
121 | print("Group Year: ",match.group('year'))
122 | print("Group Delimiter: ",match.group('sep'))
123 | print('Found {} at {}:{}, lastindex: {}'.format(timeDate[s:e], s, e,l))
124 | print('year :',match.groupdict()['year'])
125 | print('day :',match.groupdict()['day'])
126 | print('lastgroup :',match.lastgroup)
127 |
128 |
129 | # Group ALL or 0: ('2019', '-', '02', '11')
130 | # Group Year: 2019
131 | # Group Month: 02
132 | # Group Day: 11
133 | # Group Delimiter: -
134 | # Found 2019-02-11 at 16:26, lastindex: 4
135 | # year : 2019
136 | # day : 11
137 | # lastgroup : day
138 |
139 |
140 | pTime = r'(?P[0-9]{2})(?P[:])(?P[0-9]{2}):(?P[0-9.:+]+)'
141 | recompiled = re.compile(pTime)
142 | for match in re.finditer(recompiled,timeDate):
143 | print("Group String: ",match.group())
144 | print("Group ALL or 0: ",match.groups())
145 | print("Group Span: ",match.span())
146 | print("Group Span 1: ",match.span(1))
147 | print("Group Span 4: ",match.span(4))
148 | print('hour :',match.groupdict()['hour'])
149 | print('minute :',match.groupdict()['min'])
150 | print('second :',match.groupdict()['sec_mil'])
151 | print('lastgroup :',match.lastgroup)
152 |
153 |
154 | # Group String: 12:53:00+00:00
155 | # Group ALL or 0: ('12', ':', '53', '00+00:00')
156 | # Group Span: (245, 259)
157 | # Group Span 1: (245, 247)
158 | # Group Span 4: (251, 259)
159 | # hour : 12
160 | # minute : 53
161 | # second : 00+00:00
162 | # lastgroup : sec_mil
163 |
--------------------------------------------------------------------------------
/Chapter05/bs4_exploring.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup,SoupStrainer
2 | import re
3 | html_doc = """
4 | The Dormouse's story
5 |
6 |
The Dormouse's story
7 |
Once upon a time there were three little sisters; and their names were
8 | Elsie,
9 | Lacie and
10 | Tillie;
11 | and they lived at the bottom of a well.
12 |
...
13 |
Secret agents
14 |
15 |
Jason Walters, 003: Found dead in "A View to a Kill".
16 |
Alex Trevelyan, 006: Agent turned terrorist leader; James' nemesis in "Goldeneye".
17 |
James Bond, 007: The main man; shaken but not stirred.
# Python 3: Fibonacci series up to n
369 | >>> def fib(n):
370 | >>> a, b = 0, 1
371 | >>> while a < n:
372 | >>> print(a, end=' ')
373 | >>> a, b = b, a+b
374 | >>> print()
375 | >>> fib(1000)
376 | 0 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987
377 |
378 |
Functions Defined
379 |
The core of extensible programming is defining functions. Python allows mandatory and
380 | optional arguments, keyword arguments, and even arbitrary argument lists. More
382 | about defining functions in Python 3
383 |
384 |
385 |
# Python 3: List comprehensions
387 | >>> fruits = ['Banana', 'Apple', 'Lime']
388 | >>> loud_fruits = [fruit.upper() for fruit in fruits]
389 | >>> print(loud_fruits)
390 | ['BANANA', 'APPLE', 'LIME']
391 | # List and the enumerate function
392 | >>> list(enumerate(fruits))
393 | [(0, 'Banana'), (1, 'Apple'), (2, 'Lime')]
394 |
395 |
Compound Data Types
396 |
Lists (known as arrays in other languages) are one of the compound data types that
397 | Python understands. Lists can be indexed, sliced and manipulated with other built-in
398 | functions. More about
399 | lists in Python 3
Calculations are simple with Python, and expression syntax is straightforward: the
415 | operators +, -, * and / work as
416 | expected; parentheses () can be used for grouping. More
418 | about simple math functions in Python 3.
419 |
420 |
421 |
# Python 3: Simple output (with Unicode)
422 | >>> print("Hello, I'm Python!")
423 | Hello, I'm Python!
424 | # Input, assignment
425 | >>> name = input('What is your name?\n')
426 | >>> print('Hi, %s.' % name)
427 | What is your name?
428 | Python
429 | Hi, Python.
430 |
431 |
Quick & Easy to Learn
432 |
Experienced programmers in any other language can pick up Python very quickly, and
433 | beginners find the clean syntax and indentation structure easy to learn. Whet your appetite with our Python 3
435 | overview.
436 |
437 |
438 |
439 |
# For loop on a list
440 | >>> numbers = [2, 4, 6, 8]
441 | >>> product = 1
442 | >>> for number in numbers:
443 | ... product = product * number
444 | ...
445 | >>> print('The product is:', product)
446 | The product is: 384
447 |
448 |
All the Flow You’d Expect
449 |
Python knows the usual control flow statements that other languages speak —
450 | if, for, while and range —
451 | with some of its own twists, of course. More control flow tools
453 | in Python 3
454 |
455 |
456 |
457 |
458 |
459 |
Python is a programming language that lets you work quickly and
460 | integrate systems more effectively. Learn More
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
Get Started
471 |
472 |
Whether you're new to programming or an experienced developer, it's easy to learn and use
473 | Python.
The mission of the Python Software Foundation is to promote, protect, and advance the Python
640 | programming language, and to support and facilitate the growth of a diverse and international
641 | community of Python programmers. Learn more