├── 2016 ├── 2016-09-10 │ ├── Pandas - Python Data Analysis.ipynb │ ├── Salaries.csv │ ├── movie_metadata.csv │ └── stock_data.csv ├── 2016-10-22 │ ├── Advanced Python Hands-on.pptx │ └── Example │ │ ├── __init__.py │ │ ├── class_example.py │ │ ├── decorator_example.py │ │ ├── function_example.py │ │ ├── higher_function_example.py │ │ ├── inheritance_example.py │ │ ├── lambda_expression.py │ │ ├── logger_example.py │ │ ├── magic_method.py │ │ └── package_example.py └── Python 101 Workshop- NMIMS │ ├── NMIMS_Workshop.ipynb │ ├── cs228-python-tutorial.ipynb │ ├── stock_data.csv │ └── store_sales.txt ├── 2017 ├── 2017-04-08 │ ├── Python 101 Session.pdf │ ├── Python 101.ipynb │ ├── Python Fundamentals.pdf │ └── group_1.fasta ├── 2017-07-08 │ └── Machine Learning with TensorFlow & Keras.pdf └── 2017-08-12 │ ├── AI Consciousness.pdf │ └── Data pre-processing in python with scikit.odp ├── 2018 └── 2018-03-17 │ ├── Introduction to data analysis in Python │ ├── UCI Liver patient data.ipynb │ ├── indian-liver-patient-records.zip │ ├── indian_liver_patient.csv │ ├── indian_liver_patient_original.csv │ └── plots.py │ └── Practical Python Design Patterns │ └── PythonDesignPatterns.ipynb ├── 2019 └── 2019-05-18 │ ├── Link Prediction on Hike's Network.pptx │ └── code │ └── code │ ├── prepare_data.py │ ├── prepare_data.sh │ ├── readme.txt │ └── train.py ├── 2020 └── 2020-02-29 │ ├── Linux Essentials.pptx │ ├── Memory Management in Python.pptx │ ├── MetaProgramming In Python.ipynb │ ├── MetaProgramming in Python.pptx │ ├── Python.and.Netflix_Meetup.pptx │ └── Writing your own container in Python.pptx ├── 2023 ├── 13- May-2023 │ ├── Datafication of Indian judicial texts using Natural Language Processing (NLP).pptx │ └── Meetup_details.txt └── readme.txt ├── 2024 ├── 17-Feb-2024 │ ├── Getting started with asyncio.pptx │ ├── HYD MEETUP.pdf │ └── readme.txt ├── April Meetup │ ├── Sharding Using Postgres FDW.pdf │ └── readme.md ├── March Meetup │ └── readme.md ├── May Meetup │ ├── The Guide to building Indic LLMs.pdf │ └── readme.md ├── October Meetup │ ├── Ensuring Data Quality in Web Scraping with Data Contracts │ │ ├── Data_Quality_With_Contracts.pptx │ │ ├── contracts_test.py │ │ └── without_data_contracts.py │ ├── Intro to GenAI Architecture Modelling.pdf │ ├── PandasOptimization │ │ ├── Pandas Optimization - Advanced Techniques.ipynb │ │ ├── Pandas Optimization - Best Practices.ipynb │ │ ├── Pandas Optimization - Use Case.ipynb │ │ ├── Pandas-Optimization.pptx │ │ ├── README.md │ │ └── memory.py │ ├── SplitFXM - Oct '24.pdf │ └── readme.md └── readme.md ├── .github └── ISSUE_TEMPLATE │ └── propose-talk-workshop.md ├── .gitignore ├── LICENSE └── README.md /.github/ISSUE_TEMPLATE/propose-talk-workshop.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Propose a talk/workshop 3 | about: Propose a talk or workshop for upcoming meetup/event 4 | title: '' 5 | labels: 'talk-proposal' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 25 | 26 | **Title of the talk/workshop** 27 | 28 | 29 | **Abstract of the talk/workshop** 30 | 31 | 32 | **Category of the talk/workshop** 33 | 34 | 35 | **Duration (including Q&A)** 36 | 37 | 38 | **Level of Audience** 39 | 40 | 41 | **Speaker Bio** 42 | 48 | 49 | **Prerequisites(if any)** 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .ipynb_checkpoints 3 | -------------------------------------------------------------------------------- /2016/2016-10-22/Advanced Python Hands-on.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2016/2016-10-22/Advanced Python Hands-on.pptx -------------------------------------------------------------------------------- /2016/2016-10-22/Example/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2016/2016-10-22/Example/__init__.py -------------------------------------------------------------------------------- /2016/2016-10-22/Example/class_example.py: -------------------------------------------------------------------------------- 1 | class Ops(object): 2 | ''' this sample class''' 3 | VAR = 10 4 | def __init__(self, a, b): 5 | self.a = a 6 | self.b = b 7 | 8 | def add(self): 9 | print self.x 10 | return self.a+self.b 11 | 12 | 13 | def sub(): 14 | print 'ff' 15 | ## self.x = 10 16 | ## return self.x-self.b 17 | 18 | @classmethod 19 | def clsMethod(cls): 20 | print cls.VAR 21 | print 'Class Method' 22 | 23 | @staticmethod 24 | def stcMethod(): 25 | print 'static method' 26 | 27 | 28 | if __name__ == '__main__': 29 | ## ops = Ops(1,2) 30 | ## print ops.sub() 31 | ## print ops.add() 32 | ## 33 | ## setattr(ops, 'y', 80) 34 | ## print 'after adding y ',ops.y 35 | ## print dir(ops) 36 | ## print ops.__doc__ 37 | 38 | ## print Ops.VAR 39 | ## print Ops.clsMethod() 40 | print Ops.stcMethod() 41 | print Ops.sub() 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /2016/2016-10-22/Example/decorator_example.py: -------------------------------------------------------------------------------- 1 | 2 | def tag(x): 3 | def trace(f): 4 | print 'x',x 5 | def inner(*args): 6 | print 'func f called {0} args'.format(args) 7 | for arg in args: 8 | if not isinstance(arg, int): 9 | raise TypeError('All the values should be Integer type') 10 | result = f(*args) 11 | print 'result',result 12 | return result 13 | return inner 14 | return trace 15 | 16 | 17 | def memotize(f): 18 | cache = {} 19 | def wrapper(*args): 20 | if args in cache: 21 | print 'in cache' 22 | return cache[args] 23 | else: 24 | print 'not in cache' 25 | result = f(*args) 26 | cache[args] = result 27 | return result 28 | return wrapper 29 | 30 | 31 | #@memotize 32 | @tag(1) 33 | def add(a,b): 34 | return a+b 35 | 36 | ##@trace(1) 37 | ##def mult(a,b): 38 | ## return a*b 39 | 40 | 41 | 42 | 43 | if __name__ == '__main__': 44 | print add(3,4) 45 | #print mult('k',4) 46 | #print add(7,4) 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /2016/2016-10-22/Example/function_example.py: -------------------------------------------------------------------------------- 1 | def multi(a,b): 2 | return a*b 3 | 4 | def testFunc(p, a, b): 5 | return p(a,b) 6 | 7 | 8 | def square(x, y): 9 | return (x*x+y*y) 10 | 11 | def odd(x): 12 | return x%2 != 0 13 | if __name__ == '__main__': 14 | ## p = multi 15 | ## print testFunc(p, 1,3) 16 | ## l = [multi] 17 | ## print l[0](3,4) 18 | #print p(2,3) 19 | ## print map(square, range(10), range(10,20)) 20 | print filter(odd, range(10)) 21 | print map(odd, range(10)) 22 | print reduce(multi,range(1,10)) 23 | print sum(range(1,10)) 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /2016/2016-10-22/Example/higher_function_example.py: -------------------------------------------------------------------------------- 1 | 2 | class linear(object): 3 | def __init__(self, a, b): 4 | self.a, self.b = a,b 5 | def __call__(self, x): 6 | return self.a * x + self.b 7 | 8 | 9 | if __name__ == '__main__': 10 | l = linear(2,3) 11 | print l(2) -------------------------------------------------------------------------------- /2016/2016-10-22/Example/inheritance_example.py: -------------------------------------------------------------------------------- 1 | 2 | class Base(object): 3 | def __init__(self, a): 4 | self.a = a 5 | print 'Base' 6 | 7 | def getMethod_(self): 8 | self.a = 10 9 | print 'Base derived getMethod' 10 | 11 | def getValue(self): 12 | return self.a+10 13 | 14 | 15 | class Base1(object): 16 | def __init__(self, c): 17 | self.c = c 18 | print 'Base 1' 19 | 20 | def getMethod(self): 21 | return 'Base1 derived getMethod' 22 | 23 | class Derived(Base, Base1): 24 | def __init__(self, a, b, c): 25 | #super(Derived, self).__init__(a) 26 | Base.__init__(self, a) 27 | Base1.__init__(self, c) 28 | #superDerived, self).__init__(c) 29 | self.b = b 30 | print 'init Derived' 31 | 32 | 33 | def getMethod_(self): 34 | return 'Derived' 35 | 36 | if __name__ == '__main__': 37 | d = Derived(1, 2, 3) 38 | print d.getMethod() 39 | #print d.getValue() 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /2016/2016-10-22/Example/lambda_expression.py: -------------------------------------------------------------------------------- 1 | 2 | # lambda args : expression 3 | 4 | def last(x): 5 | return x[-1] 6 | print sorted( [('a',3),('c',1),('b',2)], key=last) 7 | 8 | print 'with lambda',sorted( [('a',3),('c',1),('b',2)], 9 | key=lambda x:x[-1]) 10 | 11 | p = lambda x,y : x**y 12 | print p(2,3) 13 | 14 | p = lambda x: x*x if x > 5 else (x**3 if x<3 else x) 15 | 16 | print p(6) 17 | print p(2) 18 | print p(5) 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /2016/2016-10-22/Example/logger_example.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import logging.handlers 3 | import time 4 | import os 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | def timeSized(): 9 | filePath = os.path.join(r'c:\temp\log','time.log') 10 | logHandler = logging.handlers.TimedRotatingFileHandler(filePath, 11 | when='s', 12 | interval=10, 13 | backupCount = 10) 14 | format = logging.Formatter('%(asctime)s, %(levelname)s, %(message)s', '%Y-%m-%d %H:%M:%S') 15 | 16 | logger.setLevel(logging.INFO) 17 | logHandler.setFormatter(format) 18 | logHandler.suffix = '%Y%m%d%H%M%S' 19 | logger.addHandler(logHandler) 20 | 21 | while True: 22 | logger.info('in Console') 23 | time.sleep(1) 24 | 25 | def Sizedbased(): 26 | filePath = os.path.join(r'c:\temp\log','size.log') 27 | logHandler = logging.handlers.RotatingFileHandler(filePath, 28 | maxBytes=20, 29 | backupCount = 10) 30 | format = logging.Formatter('%(name)s, %(asctime)s, %(levelname)s, %(message)s', '%Y-%m-%d %H:%M:%S') 31 | 32 | logger.setLevel(logging.INFO) 33 | logHandler.setFormatter(format) 34 | logger.addHandler(logHandler) 35 | 36 | while True: 37 | logger.info('in Console') 38 | time.sleep(1) 39 | 40 | 41 | 42 | def console(): 43 | logging.basicConfig(level=logging.INFO,format='%(name)s, %(asctime)s, %(levelname)s, %(message)s') 44 | while True: 45 | logger.info('in Console') 46 | time.sleep(1) 47 | 48 | if __name__ == '__main__': 49 | console() -------------------------------------------------------------------------------- /2016/2016-10-22/Example/magic_method.py: -------------------------------------------------------------------------------- 1 | class MagicMethod(object): 2 | def __init__(self, a): 3 | self.a = a 4 | #self.b = b 5 | 6 | def __len__(self): 7 | return len(self.a) 8 | 9 | ## def __eq__(self, other): 10 | ## return self.__dict__ == other.__dict__ 11 | ## 12 | ## def __cmp__(self, other): 13 | ## return self.a > other.a 14 | 15 | if __name__ == '__main__': 16 | m1 = MagicMethod([3, 2]) 17 | print len(m1) 18 | ## m2 = MagicMethod(1, 2) 19 | ## print m1 == m2 20 | ## print m1 > m2 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /2016/2016-10-22/Example/package_example.py: -------------------------------------------------------------------------------- 1 | 2 | from Example.higher_function_example import linear 3 | 4 | 5 | if __name__ == '__main__': 6 | print linear -------------------------------------------------------------------------------- /2017/2017-04-08/Python 101 Session.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2017/2017-04-08/Python 101 Session.pdf -------------------------------------------------------------------------------- /2017/2017-04-08/Python Fundamentals.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2017/2017-04-08/Python Fundamentals.pdf -------------------------------------------------------------------------------- /2017/2017-07-08/Machine Learning with TensorFlow & Keras.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2017/2017-07-08/Machine Learning with TensorFlow & Keras.pdf -------------------------------------------------------------------------------- /2017/2017-08-12/AI Consciousness.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2017/2017-08-12/AI Consciousness.pdf -------------------------------------------------------------------------------- /2017/2017-08-12/Data pre-processing in python with scikit.odp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2017/2017-08-12/Data pre-processing in python with scikit.odp -------------------------------------------------------------------------------- /2018/2018-03-17/Introduction to data analysis in Python/indian-liver-patient-records.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2018/2018-03-17/Introduction to data analysis in Python/indian-liver-patient-records.zip -------------------------------------------------------------------------------- /2018/2018-03-17/Introduction to data analysis in Python/indian_liver_patient.csv: -------------------------------------------------------------------------------- 1 | Age,Gender,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Disease 2 | 65,Female,0.7,0.1,187,16,18,6.8,3.3,0.9,1 3 | 62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,1 4 | 62,Male,7.3,4.1,490,60,68,7,3.3,0.89,1 5 | 58,Male,1,0.4,182,14,20,6.8,3.4,1,1 6 | 72,Male,3.9,2,195,27,59,7.3,2.4,0.4,1 7 | 46,Male,1.8,0.7,208,19,14,7.6,4.4,1.3,1 8 | 26,Female,0.9,0.2,154,16,12,7,3.5,1,1 9 | 29,Female,0.9,0.3,202,14,11,6.7,3.6,1.1,1 10 | 17,Male,0.9,0.3,202,22,19,7.4,4.1,1.2,2 11 | 55,Male,0.7,0.2,290,53,58,6.8,3.4,1,1 12 | 57,Male,0.6,0.1,210,51,59,5.9,2.7,0.8,1 13 | 72,Male,2.7,1.3,260,31,56,7.4,3,0.6,1 14 | 64,Male,0.9,0.3,310,61,58,7,3.4,0.9,2 15 | 74,Female,1.1,0.4,214,22,30,8.1,4.1,1,1 16 | 61,Male,0.7,0.2,145,53,41,5.8,2.7,0.87,1 17 | 25,Male,0.6,0.1,183,91,53,5.5,2.3,0.7,2 18 | 38,Male,1.8,0.8,342,168,441,7.6,4.4,1.3,1 19 | 33,Male,1.6,0.5,165,15,23,7.3,3.5,0.92,2 20 | 40,Female,0.9,0.3,293,232,245,6.8,3.1,0.8,1 21 | 40,Female,0.9,0.3,293,232,245,6.8,3.1,0.8,1 22 | 51,Male,2.2,1,610,17,28,7.3,2.6,0.55,1 23 | 51,Male,2.9,1.3,482,22,34,7,2.4,0.5,1 24 | 62,Male,6.8,3,542,116,66,6.4,3.1,0.9,1 25 | 40,Male,1.9,1,231,16,55,4.3,1.6,0.6,1 26 | 63,Male,0.9,0.2,194,52,45,6,3.9,1.85,2 27 | 34,Male,4.1,2,289,875,731,5,2.7,1.1,1 28 | 34,Male,4.1,2,289,875,731,5,2.7,1.1,1 29 | 34,Male,6.2,3,240,1680,850,7.2,4,1.2,1 30 | 20,Male,1.1,0.5,128,20,30,3.9,1.9,0.95,2 31 | 84,Female,0.7,0.2,188,13,21,6,3.2,1.1,2 32 | 57,Male,4,1.9,190,45,111,5.2,1.5,0.4,1 33 | 52,Male,0.9,0.2,156,35,44,4.9,2.9,1.4,1 34 | 57,Male,1,0.3,187,19,23,5.2,2.9,1.2,2 35 | 38,Female,2.6,1.2,410,59,57,5.6,3,0.8,2 36 | 38,Female,2.6,1.2,410,59,57,5.6,3,0.8,2 37 | 30,Male,1.3,0.4,482,102,80,6.9,3.3,0.9,1 38 | 17,Female,0.7,0.2,145,18,36,7.2,3.9,1.18,2 39 | 46,Female,14.2,7.8,374,38,77,4.3,2,0.8,1 40 | 48,Male,1.4,0.6,263,38,66,5.8,2.2,0.61,1 41 | 47,Male,2.7,1.3,275,123,73,6.2,3.3,1.1,1 42 | 45,Male,2.4,1.1,168,33,50,5.1,2.6,1,1 43 | 62,Male,0.6,0.1,160,42,110,4.9,2.6,1.1,2 44 | 42,Male,6.8,3.2,630,25,47,6.1,2.3,0.6,2 45 | 50,Male,2.6,1.2,415,407,576,6.4,3.2,1,1 46 | 85,Female,1,0.3,208,17,15,7,3.6,1,2 47 | 35,Male,1.8,0.6,275,48,178,6.5,3.2,0.9,2 48 | 21,Male,3.9,1.8,150,36,27,6.8,3.9,1.34,1 49 | 40,Male,1.1,0.3,230,1630,960,4.9,2.8,1.3,1 50 | 32,Female,0.6,0.1,176,39,28,6,3,1,1 51 | 55,Male,18.4,8.8,206,64,178,6.2,1.8,0.4,1 52 | 45,Female,0.7,0.2,170,21,14,5.7,2.5,0.7,1 53 | 34,Female,0.6,0.1,161,15,19,6.6,3.4,1,1 54 | 38,Male,3.1,1.6,253,80,406,6.8,3.9,1.3,1 55 | 38,Male,1.1,0.3,198,86,150,6.3,3.5,1.2,1 56 | 42,Male,8.9,4.5,272,31,61,5.8,2,0.5,1 57 | 42,Male,8.9,4.5,272,31,61,5.8,2,0.5,1 58 | 33,Male,0.8,0.2,198,26,23,8,4,1,2 59 | 48,Female,0.9,0.2,175,24,54,5.5,2.7,0.9,2 60 | 51,Male,0.8,0.2,367,42,18,5.2,2,0.6,1 61 | 64,Male,1.1,0.5,145,20,24,5.5,3.2,1.39,2 62 | 31,Female,0.8,0.2,158,21,16,6,3,1,1 63 | 58,Male,1,0.5,158,37,43,7.2,3.6,1,1 64 | 58,Male,1,0.5,158,37,43,7.2,3.6,1,1 65 | 57,Male,0.7,0.2,208,35,97,5.1,2.1,0.7,1 66 | 57,Male,1.3,0.4,259,40,86,6.5,2.5,0.6,1 67 | 57,Male,1.4,0.7,470,62,88,5.6,2.5,0.8,1 68 | 54,Male,2.2,1.2,195,55,95,6,3.7,1.6,1 69 | 37,Male,1.8,0.8,215,53,58,6.4,3.8,1.4,1 70 | 66,Male,0.7,0.2,239,27,26,6.3,3.7,1.4,1 71 | 60,Male,0.8,0.2,215,24,17,6.3,3,0.9,2 72 | 19,Female,0.7,0.2,186,166,397,5.5,3,1.2,1 73 | 75,Female,0.8,0.2,188,20,29,4.4,1.8,0.6,1 74 | 75,Female,0.8,0.2,205,27,24,4.4,2,0.8,1 75 | 52,Male,0.6,0.1,171,22,16,6.6,3.6,1.2,1 76 | 68,Male,0.7,0.1,145,20,22,5.8,2.9,1,1 77 | 29,Female,0.7,0.1,162,52,41,5.2,2.5,0.9,2 78 | 31,Male,0.9,0.2,518,189,17,5.3,2.3,0.7,1 79 | 68,Female,0.6,0.1,1620,95,127,4.6,2.1,0.8,1 80 | 70,Male,1.4,0.6,146,12,24,6.2,3.8,1.58,2 81 | 58,Female,2.8,1.3,670,48,79,4.7,1.6,0.5,1 82 | 58,Female,2.4,1.1,915,60,142,4.7,1.8,0.6,1 83 | 29,Male,1,0.3,75,25,26,5.1,2.9,1.3,1 84 | 49,Male,0.7,0.1,148,14,12,5.4,2.8,1,2 85 | 33,Male,2,1,258,194,152,5.4,3,1.25,1 86 | 32,Male,0.6,0.1,237,45,31,7.5,4.3,1.34,1 87 | 14,Male,1.4,0.5,269,58,45,6.7,3.9,1.4,1 88 | 13,Male,0.6,0.1,320,28,56,7.2,3.6,1,2 89 | 58,Male,0.8,0.2,298,33,59,6.2,3.1,1,1 90 | 18,Male,0.6,0.2,538,33,34,7.5,3.2,0.7,1 91 | 60,Male,4,1.9,238,119,350,7.1,3.3,0.8,1 92 | 60,Male,5.7,2.8,214,412,850,7.3,3.2,0.78,1 93 | 60,Male,6.8,3.2,308,404,794,6.8,3,0.7,1 94 | 60,Male,8.6,4,298,412,850,7.4,3,0.6,1 95 | 60,Male,5.8,2.7,204,220,400,7,3,0.7,1 96 | 60,Male,5.2,2.4,168,126,202,6.8,2.9,0.7,1 97 | 75,Male,0.9,0.2,282,25,23,4.4,2.2,1,1 98 | 39,Male,3.8,1.5,298,102,630,7.1,3.3,0.8,1 99 | 39,Male,6.6,3,215,190,950,4,1.7,0.7,1 100 | 18,Male,0.6,0.1,265,97,161,5.9,3.1,1.1,1 101 | 18,Male,0.7,0.1,312,308,405,6.9,3.7,1.1,1 102 | 27,Male,0.6,0.2,161,27,28,3.7,1.6,0.76,2 103 | 27,Male,0.7,0.2,243,21,23,5.3,2.3,0.7,2 104 | 17,Male,0.9,0.2,224,36,45,6.9,4.2,1.55,1 105 | 55,Female,0.8,0.2,225,14,23,6.1,3.3,1.2,2 106 | 63,Male,0.5,0.1,170,21,28,5.5,2.5,0.8,1 107 | 36,Male,5.3,2.3,145,32,92,5.1,2.6,1,2 108 | 36,Male,5.3,2.3,145,32,92,5.1,2.6,1,2 109 | 36,Male,0.8,0.2,158,29,39,6,2.2,0.5,2 110 | 36,Male,0.8,0.2,158,29,39,6,2.2,0.5,2 111 | 36,Male,0.9,0.1,486,25,34,5.9,2.8,0.9,2 112 | 24,Female,0.7,0.2,188,11,10,5.5,2.3,0.71,2 113 | 48,Male,3.2,1.6,257,33,116,5.7,2.2,0.62,1 114 | 27,Male,1.2,0.4,179,63,39,6.1,3.3,1.1,2 115 | 74,Male,0.6,0.1,272,24,98,5,2,0.6,1 116 | 50,Male,5.8,3,661,181,285,5.7,2.3,0.67,2 117 | 50,Male,7.3,3.6,1580,88,64,5.6,2.3,0.6,2 118 | 48,Male,0.7,0.1,1630,74,149,5.3,2,0.6,1 119 | 32,Male,12.7,6.2,194,2000,2946,5.7,3.3,1.3,1 120 | 32,Male,15.9,7,280,1350,1600,5.6,2.8,1,1 121 | 32,Male,18,8.2,298,1250,1050,5.4,2.6,0.9,1 122 | 32,Male,23,11.3,300,482,275,7.1,3.5,0.9,1 123 | 32,Male,22.7,10.2,290,322,113,6.6,2.8,0.7,1 124 | 58,Male,1.7,0.8,188,60,84,5.9,3.5,1.4,2 125 | 64,Female,0.8,0.2,178,17,18,6.3,3.1,0.9,1 126 | 28,Male,0.6,0.1,177,36,29,6.9,4.1,1.4,2 127 | 60,Male,1.8,0.5,201,45,25,3.9,1.7,0.7,2 128 | 48,Male,5.8,2.5,802,133,88,6,2.8,0.8,1 129 | 64,Male,3,1.4,248,46,40,6.5,3.2,0.9,1 130 | 58,Female,1.7,0.8,1896,61,83,8,3.9,0.95,1 131 | 45,Male,2.8,1.7,263,57,65,5.1,2.3,0.8,1 132 | 45,Male,3.2,1.4,512,50,58,6,2.7,0.8,1 133 | 70,Female,0.7,0.2,237,18,28,5.8,2.5,0.75,2 134 | 18,Female,0.8,0.2,199,34,31,6.5,3.5,1.16,2 135 | 53,Male,0.9,0.4,238,17,14,6.6,2.9,0.8,1 136 | 18,Male,1.8,0.7,178,35,36,6.8,3.6,1.1,1 137 | 66,Male,11.3,5.6,1110,1250,4929,7,2.4,0.5,1 138 | 46,Female,4.7,2.2,310,62,90,6.4,2.5,0.6,1 139 | 18,Male,0.8,0.2,282,72,140,5.5,2.5,0.8,1 140 | 18,Male,0.8,0.2,282,72,140,5.5,2.5,0.8,1 141 | 15,Male,0.8,0.2,380,25,66,6.1,3.7,1.5,1 142 | 60,Male,0.6,0.1,186,20,21,6.2,3.3,1.1,2 143 | 66,Female,4.2,2.1,159,15,30,7.1,2.2,0.4,1 144 | 30,Male,1.6,0.4,332,84,139,5.6,2.7,0.9,1 145 | 30,Male,1.6,0.4,332,84,139,5.6,2.7,0.9,1 146 | 45,Female,3.5,1.5,189,63,87,5.6,2.9,1,1 147 | 65,Male,0.8,0.2,201,18,22,5.4,2.9,1.1,2 148 | 66,Female,2.9,1.3,168,21,38,5.5,1.8,0.4,1 149 | 65,Male,0.7,0.1,392,20,30,5.3,2.8,1.1,1 150 | 50,Male,0.9,0.2,202,20,26,7.2,4.5,1.66,1 151 | 60,Male,0.8,0.2,286,21,27,7.1,4,1.2,1 152 | 56,Male,1.1,0.5,180,30,42,6.9,3.8,1.2,2 153 | 50,Male,1.6,0.8,218,18,20,5.9,2.9,0.96,1 154 | 46,Female,0.8,0.2,182,20,40,6,2.9,0.9,1 155 | 52,Male,0.6,0.1,178,26,27,6.5,3.6,1.2,2 156 | 34,Male,5.9,2.5,290,45,233,5.6,2.7,0.9,1 157 | 34,Male,8.7,4,298,58,138,5.8,2.4,0.7,1 158 | 32,Male,0.9,0.3,462,70,82,6.2,3.1,1,1 159 | 72,Male,0.7,0.1,196,20,35,5.8,2,0.5,1 160 | 72,Male,0.7,0.1,196,20,35,5.8,2,0.5,1 161 | 50,Male,1.2,0.4,282,36,32,7.2,3.9,1.1,1 162 | 60,Male,11,4.9,750,140,350,5.5,2.1,0.6,1 163 | 60,Male,11.5,5,1050,99,187,6.2,2.8,0.8,1 164 | 60,Male,5.8,2.7,599,43,66,5.4,1.8,0.5,1 165 | 39,Male,1.9,0.9,180,42,62,7.4,4.3,1.38,1 166 | 39,Male,1.9,0.9,180,42,62,7.4,4.3,1.38,1 167 | 48,Male,4.5,2.3,282,13,74,7,2.4,0.52,1 168 | 55,Male,75,3.6,332,40,66,6.2,2.5,0.6,1 169 | 47,Female,3,1.5,292,64,67,5.6,1.8,0.47,1 170 | 60,Male,22.8,12.6,962,53,41,6.9,3.3,0.9,1 171 | 60,Male,8.9,4,950,33,32,6.8,3.1,0.8,1 172 | 72,Male,1.7,0.8,200,28,37,6.2,3,0.93,1 173 | 44,Female,1.9,0.6,298,378,602,6.6,3.3,1,1 174 | 55,Male,14.1,7.6,750,35,63,5,1.6,0.47,1 175 | 31,Male,0.6,0.1,175,48,34,6,3.7,1.6,1 176 | 31,Male,0.6,0.1,175,48,34,6,3.7,1.6,1 177 | 31,Male,0.8,0.2,198,43,31,7.3,4,1.2,1 178 | 55,Male,0.8,0.2,482,112,99,5.7,2.6,0.8,1 179 | 75,Male,14.8,9,1020,71,42,5.3,2.2,0.7,1 180 | 75,Male,10.6,5,562,37,29,5.1,1.8,0.5,1 181 | 75,Male,8,4.6,386,30,25,5.5,1.8,0.48,1 182 | 75,Male,2.8,1.3,250,23,29,2.7,0.9,0.5,1 183 | 75,Male,2.9,1.3,218,33,37,3,1.5,1,1 184 | 65,Male,1.9,0.8,170,36,43,3.8,1.4,0.58,2 185 | 40,Male,0.6,0.1,171,20,17,5.4,2.5,0.8,1 186 | 64,Male,1.1,0.4,201,18,19,6.9,4.1,1.4,1 187 | 38,Male,1.5,0.4,298,60,103,6,3,1,2 188 | 60,Male,3.2,1.8,750,79,145,7.8,3.2,0.69,1 189 | 60,Male,2.1,1,191,114,247,4,1.6,0.6,1 190 | 60,Male,1.9,0.8,614,42,38,4.5,1.8,0.6,1 191 | 48,Female,0.8,0.2,218,32,28,5.2,2.5,0.9,2 192 | 60,Male,6.3,3.2,314,118,114,6.6,3.7,1.27,1 193 | 60,Male,5.8,3,257,107,104,6.6,3.5,1.12,1 194 | 60,Male,2.3,0.6,272,79,51,6.6,3.5,1.1,1 195 | 49,Male,1.3,0.4,206,30,25,6,3.1,1.06,2 196 | 49,Male,2,0.6,209,48,32,5.7,3,1.1,2 197 | 60,Male,2.4,1,1124,30,54,5.2,1.9,0.5,1 198 | 60,Male,2,1.1,664,52,104,6,2.1,0.53,1 199 | 26,Female,0.6,0.2,142,12,32,5.7,2.4,0.75,1 200 | 41,Male,0.9,0.2,169,22,18,6.1,3,0.9,2 201 | 7,Female,27.2,11.8,1420,790,1050,6.1,2,0.4,1 202 | 49,Male,0.6,0.1,218,50,53,5,2.4,0.9,1 203 | 49,Male,0.6,0.1,218,50,53,5,2.4,0.9,1 204 | 38,Female,0.8,0.2,145,19,23,6.1,3.1,1.03,2 205 | 21,Male,1,0.3,142,27,21,6.4,3.5,1.2,2 206 | 21,Male,0.7,0.2,135,27,26,6.4,3.3,1,2 207 | 45,Male,2.5,1.2,163,28,22,7.6,4,1.1,1 208 | 40,Male,3.6,1.8,285,50,60,7,2.9,0.7,1 209 | 40,Male,3.9,1.7,350,950,1500,6.7,3.8,1.3,1 210 | 70,Female,0.9,0.3,220,53,95,6.1,2.8,0.68,1 211 | 45,Female,0.9,0.3,189,23,33,6.6,3.9,,1 212 | 28,Male,0.8,0.3,190,20,14,4.1,2.4,1.4,1 213 | 42,Male,2.7,1.3,219,60,180,7,3.2,0.8,1 214 | 22,Male,2.7,1,160,82,127,5.5,3.1,1.2,2 215 | 8,Female,0.9,0.2,401,25,58,7.5,3.4,0.8,1 216 | 38,Male,1.7,1,180,18,34,7.2,3.6,1,1 217 | 66,Male,0.6,0.2,100,17,148,5,3.3,1.9,2 218 | 55,Male,0.9,0.2,116,36,16,6.2,3.2,1,2 219 | 49,Male,1.1,0.5,159,30,31,7,4.3,1.5,1 220 | 6,Male,0.6,0.1,289,38,30,4.8,2,0.7,2 221 | 37,Male,0.8,0.2,125,41,39,6.4,3.4,1.1,1 222 | 37,Male,0.8,0.2,147,27,46,5,2.5,1,1 223 | 47,Male,0.9,0.2,192,38,24,7.3,4.3,1.4,1 224 | 47,Male,0.9,0.2,265,40,28,8,4,1,1 225 | 50,Male,1.1,0.3,175,20,19,7.1,4.5,1.7,2 226 | 70,Male,1.7,0.5,400,56,44,5.7,3.1,1.1,1 227 | 26,Male,0.6,0.2,120,45,51,7.9,4,1,1 228 | 26,Male,1.3,0.4,173,38,62,8,4,1,1 229 | 68,Female,0.7,0.2,186,18,15,6.4,3.8,1.4,1 230 | 65,Female,1,0.3,202,26,13,5.3,2.6,0.9,2 231 | 46,Male,0.6,0.2,290,26,21,6,3,1,1 232 | 61,Male,1.5,0.6,196,61,85,6.7,3.8,1.3,2 233 | 61,Male,0.8,0.1,282,85,231,8.5,4.3,1,1 234 | 50,Male,2.7,1.6,157,149,156,7.9,3.1,0.6,1 235 | 33,Male,2,1.4,2110,48,89,6.2,3,0.9,1 236 | 40,Female,0.9,0.2,285,32,27,7.7,3.5,0.8,1 237 | 60,Male,1.5,0.6,360,230,298,4.5,2,0.8,1 238 | 22,Male,0.8,0.2,300,57,40,7.9,3.8,0.9,2 239 | 35,Female,0.9,0.3,158,20,16,8,4,1,1 240 | 35,Female,0.9,0.2,190,40,35,7.3,4.7,1.8,2 241 | 40,Male,0.9,0.3,196,69,48,6.8,3.1,0.8,1 242 | 48,Male,0.7,0.2,165,32,30,8,4,1,2 243 | 51,Male,0.8,0.2,230,24,46,6.5,3.1,,1 244 | 29,Female,0.8,0.2,205,30,23,8.2,4.1,1,1 245 | 28,Female,0.9,0.2,316,25,23,8.5,5.5,1.8,1 246 | 54,Male,0.8,0.2,218,20,19,6.3,2.5,0.6,1 247 | 54,Male,0.9,0.2,290,15,18,6.1,2.8,0.8,1 248 | 55,Male,1.8,9,272,22,79,6.1,2.7,0.7,1 249 | 55,Male,0.9,0.2,190,25,28,5.9,2.7,0.8,1 250 | 40,Male,0.7,0.1,202,37,29,5,2.6,1,1 251 | 33,Male,1.2,0.3,498,28,25,7,3,0.7,1 252 | 33,Male,2.1,1.3,480,38,22,6.5,3,0.8,1 253 | 33,Male,0.9,0.8,680,37,40,5.9,2.6,0.8,1 254 | 65,Male,1.1,0.3,258,48,40,7,3.9,1.2,2 255 | 35,Female,0.6,0.2,180,12,15,5.2,2.7,,2 256 | 38,Female,0.7,0.1,152,90,21,7.1,4.2,1.4,2 257 | 38,Male,1.7,0.7,859,89,48,6,3,1,1 258 | 50,Male,0.9,0.3,901,23,17,6.2,3.5,1.2,1 259 | 44,Male,0.8,0.2,335,148,86,5.6,3,1.1,1 260 | 36,Male,0.8,0.2,182,31,34,6.4,3.8,1.4,2 261 | 42,Male,30.5,14.2,285,65,130,5.2,2.1,0.6,1 262 | 42,Male,16.4,8.9,245,56,87,5.4,2,0.5,1 263 | 33,Male,1.5,7,505,205,140,7.5,3.9,1,1 264 | 18,Male,0.8,0.2,228,55,54,6.9,4,1.3,1 265 | 38,Female,0.8,0.2,185,25,21,7,3,0.7,1 266 | 38,Male,0.8,0.2,247,55,92,7.4,4.3,1.38,2 267 | 4,Male,0.9,0.2,348,30,34,8,4,1,2 268 | 62,Male,1.2,0.4,195,38,54,6.3,3.8,1.5,1 269 | 43,Female,0.9,0.3,140,12,29,7.4,3.5,1.8,1 270 | 40,Male,14.5,6.4,358,50,75,5.7,2.1,0.5,1 271 | 26,Male,0.6,0.1,110,15,20,2.8,1.6,1.3,1 272 | 37,Male,0.7,0.2,235,96,54,9.5,4.9,1,1 273 | 4,Male,0.8,0.2,460,152,231,6.5,3.2,0.9,2 274 | 21,Male,18.5,9.5,380,390,500,8.2,4.1,1,1 275 | 30,Male,0.7,0.2,262,15,18,9.6,4.7,1.2,1 276 | 33,Male,1.8,0.8,196,25,22,8,4,1,1 277 | 26,Male,1.9,0.8,180,22,19,8.2,4.1,1,2 278 | 35,Male,0.9,0.2,190,25,20,6.4,3.6,1.2,2 279 | 60,Male,2,0.8,190,45,40,6,2.8,0.8,1 280 | 45,Male,2.2,0.8,209,25,20,8,4,1,1 281 | 48,Female,1,1.4,144,18,14,8.3,4.2,1,1 282 | 58,Male,0.8,0.2,123,56,48,6,3,1,1 283 | 50,Male,0.7,0.2,192,18,15,7.4,4.2,1.3,2 284 | 50,Male,0.7,0.2,188,12,14,7,3.4,0.9,1 285 | 18,Male,1.3,0.7,316,10,21,6,2.1,0.5,2 286 | 18,Male,0.9,0.3,300,30,48,8,4,1,1 287 | 13,Male,1.5,0.5,575,29,24,7.9,3.9,0.9,1 288 | 34,Female,0.8,0.2,192,15,12,8.6,4.7,1.2,1 289 | 43,Male,1.3,0.6,155,15,20,8,4,1,2 290 | 50,Female,1,0.5,239,16,39,7.5,3.7,0.9,1 291 | 57,Male,4.5,2.3,315,120,105,7,4,1.3,1 292 | 45,Female,1,0.3,250,48,44,8.6,4.3,1,1 293 | 60,Male,0.7,0.2,174,32,14,7.8,4.2,1.1,2 294 | 45,Male,0.6,0.2,245,22,24,7.1,3.4,0.9,1 295 | 23,Male,1.1,0.5,191,37,41,7.7,4.3,1.2,2 296 | 22,Male,2.4,1,340,25,21,8.3,4.5,1.1,1 297 | 22,Male,0.6,0.2,202,78,41,8,3.9,0.9,1 298 | 74,Female,0.9,0.3,234,16,19,7.9,4,1,1 299 | 25,Female,0.9,0.3,159,24,25,6.9,4.4,1.7,2 300 | 31,Female,1.1,0.3,190,26,15,7.9,3.8,0.9,1 301 | 24,Female,0.9,0.2,195,40,35,7.4,4.1,1.2,2 302 | 58,Male,0.8,0.2,180,32,25,8.2,4.4,1.1,2 303 | 51,Female,0.9,0.2,280,21,30,6.7,3.2,0.8,1 304 | 50,Female,1.7,0.6,430,28,32,6.8,3.5,1,1 305 | 50,Male,0.7,0.2,206,18,17,8.4,4.2,1,2 306 | 55,Female,0.8,0.2,155,21,17,6.9,3.8,1.4,1 307 | 54,Female,1.4,0.7,195,36,16,7.9,3.7,0.9,2 308 | 48,Male,1.6,1,588,74,113,7.3,2.4,0.4,1 309 | 30,Male,0.8,0.2,174,21,47,4.6,2.3,1,1 310 | 45,Female,0.8,0.2,165,22,18,8.2,4.1,1,1 311 | 48,Female,1.1,0.7,527,178,250,8,4.2,1.1,1 312 | 51,Male,0.8,0.2,175,48,22,8.1,4.6,1.3,1 313 | 54,Female,23.2,12.6,574,43,47,7.2,3.5,0.9,1 314 | 27,Male,1.3,0.6,106,25,54,8.5,4.8,,2 315 | 30,Female,0.8,0.2,158,25,22,7.9,4.5,1.3,2 316 | 26,Male,2,0.9,195,24,65,7.8,4.3,1.2,1 317 | 22,Male,0.9,0.3,179,18,21,6.7,3.7,1.2,2 318 | 44,Male,0.9,0.2,182,29,82,7.1,3.7,1,2 319 | 35,Male,0.7,0.2,198,42,30,6.8,3.4,1,1 320 | 38,Male,3.7,2.2,216,179,232,7.8,4.5,1.3,1 321 | 14,Male,0.9,0.3,310,21,16,8.1,4.2,1,2 322 | 30,Female,0.7,0.2,63,31,27,5.8,3.4,1.4,1 323 | 30,Female,0.8,0.2,198,30,58,5.2,2.8,1.1,1 324 | 36,Male,1.7,0.5,205,36,34,7.1,3.9,1.2,1 325 | 12,Male,0.8,0.2,302,47,67,6.7,3.5,1.1,2 326 | 60,Male,2.6,1.2,171,42,37,5.4,2.7,1,1 327 | 42,Male,0.8,0.2,158,27,23,6.7,3.1,0.8,2 328 | 36,Female,1.2,0.4,358,160,90,8.3,4.4,1.1,2 329 | 24,Male,3.3,1.6,174,11,33,7.6,3.9,1,2 330 | 43,Male,0.8,0.2,192,29,20,6,2.9,0.9,2 331 | 21,Male,0.7,0.2,211,14,23,7.3,4.1,1.2,2 332 | 26,Male,2,0.9,157,54,68,6.1,2.7,0.8,1 333 | 26,Male,1.7,0.6,210,62,56,5.4,2.2,0.6,1 334 | 26,Male,7.1,3.3,258,80,113,6.2,2.9,0.8,1 335 | 36,Female,0.7,0.2,152,21,25,5.9,3.1,1.1,2 336 | 13,Female,0.7,0.2,350,17,24,7.4,4,1.1,1 337 | 13,Female,0.7,0.1,182,24,19,8.9,4.9,1.2,1 338 | 75,Male,6.7,3.6,458,198,143,6.2,3.2,1,1 339 | 75,Male,2.5,1.2,375,85,68,6.4,2.9,0.8,1 340 | 75,Male,1.8,0.8,405,79,50,6.1,2.9,0.9,1 341 | 75,Male,1.4,0.4,215,50,30,5.9,2.6,0.7,1 342 | 75,Male,0.9,0.2,206,44,33,6.2,2.9,0.8,1 343 | 36,Female,0.8,0.2,650,70,138,6.6,3.1,0.8,1 344 | 35,Male,0.8,0.2,198,36,32,7,4,1.3,2 345 | 70,Male,3.1,1.6,198,40,28,5.6,2,0.5,1 346 | 37,Male,0.8,0.2,195,60,40,8.2,5,1.5,2 347 | 60,Male,2.9,1.3,230,32,44,5.6,2,0.5,1 348 | 46,Male,0.6,0.2,115,14,11,6.9,3.4,0.9,1 349 | 38,Male,0.7,0.2,216,349,105,7,3.5,1,1 350 | 70,Male,1.3,0.4,358,19,14,6.1,2.8,0.8,1 351 | 49,Female,0.8,0.2,158,19,15,6.6,3.6,1.2,2 352 | 37,Male,1.8,0.8,145,62,58,5.7,2.9,1,1 353 | 37,Male,1.3,0.4,195,41,38,5.3,2.1,0.6,1 354 | 26,Female,0.7,0.2,144,36,33,8.2,4.3,1.1,1 355 | 48,Female,1.4,0.8,621,110,176,7.2,3.9,1.1,1 356 | 48,Female,0.8,0.2,150,25,23,7.5,3.9,1,1 357 | 19,Male,1.4,0.8,178,13,26,8,4.6,1.3,2 358 | 33,Male,0.7,0.2,256,21,30,8.5,3.9,0.8,1 359 | 33,Male,2.1,0.7,205,50,38,6.8,3,0.7,1 360 | 37,Male,0.7,0.2,176,28,34,5.6,2.6,0.8,1 361 | 69,Female,0.8,0.2,146,42,70,8.4,4.9,1.4,2 362 | 24,Male,0.7,0.2,218,47,26,6.6,3.3,1,1 363 | 65,Female,0.7,0.2,182,23,28,6.8,2.9,0.7,2 364 | 55,Male,1.1,0.3,215,21,15,6.2,2.9,0.8,2 365 | 42,Female,0.9,0.2,165,26,29,8.5,4.4,1,2 366 | 21,Male,0.8,0.2,183,33,57,6.8,3.5,1,2 367 | 40,Male,0.7,0.2,176,28,43,5.3,2.4,0.8,2 368 | 16,Male,0.7,0.2,418,28,35,7.2,4.1,1.3,2 369 | 60,Male,2.2,1,271,45,52,6.1,2.9,0.9,2 370 | 42,Female,0.8,0.2,182,22,20,7.2,3.9,1.1,1 371 | 58,Female,0.8,0.2,130,24,25,7,4,1.3,1 372 | 54,Female,22.6,11.4,558,30,37,7.8,3.4,0.8,1 373 | 33,Male,0.8,0.2,135,30,29,7.2,4.4,1.5,2 374 | 48,Male,0.7,0.2,326,29,17,8.7,5.5,1.7,1 375 | 25,Female,0.7,0.1,140,32,25,7.6,4.3,1.3,2 376 | 56,Female,0.7,0.1,145,26,23,7,4,1.3,2 377 | 47,Male,3.5,1.6,206,32,31,6.8,3.4,1,1 378 | 33,Male,0.7,0.1,168,35,33,7,3.7,1.1,1 379 | 20,Female,0.6,0.2,202,12,13,6.1,3,0.9,2 380 | 50,Female,0.7,0.1,192,20,41,7.3,3.3,0.8,1 381 | 72,Male,0.7,0.2,185,16,22,7.3,3.7,1,2 382 | 50,Male,1.7,0.8,331,36,53,7.3,3.4,0.9,1 383 | 39,Male,0.6,0.2,188,28,43,8.1,3.3,0.6,1 384 | 58,Female,0.7,0.1,172,27,22,6.7,3.2,0.9,1 385 | 60,Female,1.4,0.7,159,10,12,4.9,2.5,1,2 386 | 34,Male,3.7,2.1,490,115,91,6.5,2.8,0.7,1 387 | 50,Male,0.8,0.2,152,29,30,7.4,4.1,1.3,1 388 | 38,Male,2.7,1.4,105,25,21,7.5,4.2,1.2,2 389 | 51,Male,0.8,0.2,160,34,20,6.9,3.7,1.1,1 390 | 46,Male,0.8,0.2,160,31,40,7.3,3.8,1.1,1 391 | 72,Male,0.6,0.1,102,31,35,6.3,3.2,1,1 392 | 72,Male,0.8,0.2,148,23,35,6,3,1,1 393 | 75,Male,0.9,0.2,162,25,20,6.9,3.7,1.1,1 394 | 41,Male,7.5,4.3,149,94,92,6.3,3.1,0.9,1 395 | 41,Male,2.7,1.3,580,142,68,8,4,1,1 396 | 48,Female,1,0.3,310,37,56,5.9,2.5,0.7,1 397 | 45,Male,0.8,0.2,140,24,20,6.3,3.2,1,2 398 | 74,Male,1,0.3,175,30,32,6.4,3.4,1.1,1 399 | 78,Male,1,0.3,152,28,70,6.3,3.1,0.9,1 400 | 38,Male,0.8,0.2,208,25,50,7.1,3.7,1,1 401 | 27,Male,1,0.2,205,137,145,6,3,1,1 402 | 66,Female,0.7,0.2,162,24,20,6.4,3.2,1,2 403 | 50,Male,7.3,3.7,92,44,236,6.8,1.6,0.3,1 404 | 42,Female,0.5,0.1,162,155,108,8.1,4,0.9,1 405 | 65,Male,0.7,0.2,199,19,22,6.3,3.6,1.3,2 406 | 22,Male,0.8,0.2,198,20,26,6.8,3.9,1.3,1 407 | 31,Female,0.8,0.2,215,15,21,7.6,4,1.1,1 408 | 45,Male,0.7,0.2,180,18,58,6.7,3.7,1.2,2 409 | 12,Male,1,0.2,719,157,108,7.2,3.7,1,1 410 | 48,Male,2.4,1.1,554,141,73,7.5,3.6,0.9,1 411 | 48,Male,5,2.6,555,284,190,6.5,3.3,1,1 412 | 18,Male,1.4,0.6,215,440,850,5,1.9,0.6,1 413 | 23,Female,2.3,0.8,509,28,44,6.9,2.9,0.7,2 414 | 65,Male,4.9,2.7,190,33,71,7.1,2.9,0.7,1 415 | 48,Male,0.7,0.2,208,15,30,4.6,2.1,0.8,2 416 | 65,Male,1.4,0.6,260,28,24,5.2,2.2,0.7,2 417 | 70,Male,1.3,0.3,690,93,40,3.6,2.7,0.7,1 418 | 70,Male,0.6,0.1,862,76,180,6.3,2.7,0.75,1 419 | 11,Male,0.7,0.1,592,26,29,7.1,4.2,1.4,2 420 | 50,Male,4.2,2.3,450,69,50,7,3,0.7,1 421 | 55,Female,8.2,3.9,1350,52,65,6.7,2.9,0.7,1 422 | 55,Female,10.9,5.1,1350,48,57,6.4,2.3,0.5,1 423 | 26,Male,1,0.3,163,48,71,7.1,3.7,1,2 424 | 41,Male,1.2,0.5,246,34,42,6.9,3.4,0.97,1 425 | 53,Male,1.6,0.9,178,44,59,6.5,3.9,1.5,2 426 | 32,Female,0.7,0.1,240,12,15,7,3,0.7,1 427 | 58,Male,0.4,0.1,100,59,126,4.3,2.5,1.4,1 428 | 45,Male,1.3,0.6,166,49,42,5.6,2.5,0.8,2 429 | 65,Male,0.9,0.2,170,33,66,7,3,0.75,1 430 | 52,Female,0.6,0.1,194,10,12,6.9,3.3,0.9,2 431 | 73,Male,1.9,0.7,1750,102,141,5.5,2,0.5,1 432 | 53,Female,0.7,0.1,182,20,33,4.8,1.9,0.6,1 433 | 47,Female,0.8,0.2,236,10,13,6.7,2.9,0.76,2 434 | 29,Male,0.7,0.2,165,55,87,7.5,4.6,1.58,1 435 | 41,Female,0.9,0.2,201,31,24,7.6,3.8,1,2 436 | 30,Female,0.7,0.2,194,32,36,7.5,3.6,0.92,2 437 | 17,Female,0.5,0.1,206,28,21,7.1,4.5,1.7,2 438 | 23,Male,1,0.3,212,41,80,6.2,3.1,1,1 439 | 35,Male,1.6,0.7,157,15,44,5.2,2.5,0.9,1 440 | 65,Male,0.8,0.2,162,30,90,3.8,1.4,0.5,1 441 | 42,Female,0.8,0.2,168,25,18,6.2,3.1,1,1 442 | 49,Female,0.8,0.2,198,23,20,7,4.3,1.5,1 443 | 42,Female,2.3,1.1,292,29,39,4.1,1.8,0.7,1 444 | 42,Female,7.4,3.6,298,52,102,4.6,1.9,0.7,1 445 | 42,Female,0.7,0.2,152,35,81,6.2,3.2,1.06,1 446 | 61,Male,0.8,0.2,163,18,19,6.3,2.8,0.8,2 447 | 17,Male,0.9,0.2,279,40,46,7.3,4,1.2,2 448 | 54,Male,0.8,0.2,181,35,20,5.5,2.7,0.96,1 449 | 45,Female,23.3,12.8,1550,425,511,7.7,3.5,0.8,1 450 | 48,Female,0.8,0.2,142,26,25,6,2.6,0.7,1 451 | 48,Female,0.9,0.2,173,26,27,6.2,3.1,1,1 452 | 65,Male,7.9,4.3,282,50,72,6,3,1,1 453 | 35,Male,0.8,0.2,279,20,25,7.2,3.2,0.8,1 454 | 58,Male,0.9,0.2,1100,25,36,7.1,3.5,0.9,1 455 | 46,Male,0.7,0.2,224,40,23,7.1,3,0.7,1 456 | 28,Male,0.6,0.2,159,15,16,7,3.5,1,2 457 | 21,Female,0.6,0.1,186,25,22,6.8,3.4,1,1 458 | 32,Male,0.7,0.2,189,22,43,7.4,3.1,0.7,2 459 | 61,Male,0.8,0.2,192,28,35,6.9,3.4,0.9,2 460 | 26,Male,6.8,3.2,140,37,19,3.6,0.9,0.3,1 461 | 65,Male,1.1,0.5,686,16,46,5.7,1.5,0.35,1 462 | 22,Female,2.2,1,215,159,51,5.5,2.5,0.8,1 463 | 28,Female,0.8,0.2,309,55,23,6.8,4.1,1.51,1 464 | 38,Male,0.7,0.2,110,22,18,6.4,2.5,0.64,1 465 | 25,Male,0.8,0.1,130,23,42,8,4,1,1 466 | 45,Female,0.7,0.2,164,21,53,4.5,1.4,0.45,2 467 | 45,Female,0.6,0.1,270,23,42,5.1,2,0.5,2 468 | 28,Female,0.6,0.1,137,22,16,4.9,1.9,0.6,2 469 | 28,Female,1,0.3,90,18,108,6.8,3.1,0.8,2 470 | 66,Male,1,0.3,190,30,54,5.3,2.1,0.6,1 471 | 66,Male,0.8,0.2,165,22,32,4.4,2,0.8,1 472 | 66,Male,1.1,0.5,167,13,56,7.1,4.1,1.36,1 473 | 49,Female,0.6,0.1,185,17,26,6.6,2.9,0.7,2 474 | 42,Male,0.7,0.2,197,64,33,5.8,2.4,0.7,2 475 | 42,Male,1,0.3,154,38,21,6.8,3.9,1.3,2 476 | 35,Male,2,1.1,226,33,135,6,2.7,0.8,2 477 | 38,Male,2.2,1,310,119,42,7.9,4.1,1,2 478 | 38,Male,0.9,0.3,310,15,25,5.5,2.7,1,1 479 | 55,Male,0.6,0.2,220,24,32,5.1,2.4,0.88,1 480 | 33,Male,7.1,3.7,196,622,497,6.9,3.6,1.09,1 481 | 33,Male,3.4,1.6,186,779,844,7.3,3.2,0.7,1 482 | 7,Male,0.5,0.1,352,28,51,7.9,4.2,1.1,2 483 | 45,Male,2.3,1.3,282,132,368,7.3,4,1.2,1 484 | 45,Male,1.1,0.4,92,91,188,7.2,3.8,1.11,1 485 | 30,Male,0.8,0.2,182,46,57,7.8,4.3,1.2,2 486 | 62,Male,5,2.1,103,18,40,5,2.1,1.72,1 487 | 22,Female,6.7,3.2,850,154,248,6.2,2.8,0.8,1 488 | 42,Female,0.8,0.2,195,18,15,6.7,3,0.8,1 489 | 32,Male,0.7,0.2,276,102,190,6,2.9,0.93,1 490 | 60,Male,0.7,0.2,171,31,26,7,3.5,1,2 491 | 65,Male,0.8,0.1,146,17,29,5.9,3.2,1.18,2 492 | 53,Female,0.8,0.2,193,96,57,6.7,3.6,1.16,1 493 | 27,Male,1,0.3,180,56,111,6.8,3.9,1.85,2 494 | 35,Female,1,0.3,805,133,103,7.9,3.3,0.7,1 495 | 65,Male,0.7,0.2,265,30,28,5.2,1.8,0.52,2 496 | 25,Male,0.7,0.2,185,196,401,6.5,3.9,1.5,1 497 | 32,Male,0.7,0.2,165,31,29,6.1,3,0.96,2 498 | 24,Male,1,0.2,189,52,31,8,4.8,1.5,1 499 | 67,Male,2.2,1.1,198,42,39,7.2,3,0.7,1 500 | 68,Male,1.8,0.5,151,18,22,6.5,4,1.6,1 501 | 55,Male,3.6,1.6,349,40,70,7.2,2.9,0.6,1 502 | 70,Male,2.7,1.2,365,62,55,6,2.4,0.6,1 503 | 36,Male,2.8,1.5,305,28,76,5.9,2.5,0.7,1 504 | 42,Male,0.8,0.2,127,29,30,4.9,2.7,1.2,1 505 | 53,Male,19.8,10.4,238,39,221,8.1,2.5,0.4,1 506 | 32,Male,30.5,17.1,218,39,79,5.5,2.7,0.9,1 507 | 32,Male,32.6,14.1,219,95,235,5.8,3.1,1.1,1 508 | 56,Male,17.7,8.8,239,43,185,5.6,2.4,0.7,1 509 | 50,Male,0.9,0.3,194,190,73,7.5,3.9,1,1 510 | 46,Male,18.4,8.5,450,119,230,7.5,3.3,0.7,1 511 | 46,Male,20,10,254,140,540,5.4,3,1.2,1 512 | 37,Female,0.8,0.2,205,31,36,9.2,4.6,1,2 513 | 45,Male,2.2,1.6,320,37,48,6.8,3.4,1,1 514 | 56,Male,1,0.3,195,22,28,5.8,2.6,0.8,2 515 | 69,Male,0.9,0.2,215,32,24,6.9,3,0.7,1 516 | 49,Male,1,0.3,230,48,58,8.4,4.2,1,1 517 | 49,Male,3.9,2.1,189,65,181,6.9,3,0.7,1 518 | 60,Male,0.9,0.3,168,16,24,6.7,3,0.8,1 519 | 28,Male,0.9,0.2,215,50,28,8,4,1,1 520 | 45,Male,2.9,1.4,210,74,68,7.2,3.6,1,1 521 | 35,Male,26.3,12.1,108,168,630,9.2,2,0.3,1 522 | 62,Male,1.8,0.9,224,69,155,8.6,4,0.8,1 523 | 55,Male,4.4,2.9,230,14,25,7.1,2.1,0.4,1 524 | 46,Female,0.8,0.2,185,24,15,7.9,3.7,0.8,1 525 | 50,Male,0.6,0.2,137,15,16,4.8,2.6,1.1,1 526 | 29,Male,0.8,0.2,156,12,15,6.8,3.7,1.1,2 527 | 53,Female,0.9,0.2,210,35,32,8,3.9,0.9,2 528 | 46,Male,9.4,5.2,268,21,63,6.4,2.8,0.8,1 529 | 40,Male,3.5,1.6,298,68,200,7.1,3.4,0.9,1 530 | 45,Male,1.7,0.8,315,12,38,6.3,2.1,0.5,1 531 | 55,Male,3.3,1.5,214,54,152,5.1,1.8,0.5,1 532 | 22,Female,1.1,0.3,138,14,21,7,3.8,1.1,2 533 | 40,Male,30.8,18.3,285,110,186,7.9,2.7,0.5,1 534 | 62,Male,0.7,0.2,162,12,17,8.2,3.2,0.6,2 535 | 46,Female,1.4,0.4,298,509,623,3.6,1,0.3,1 536 | 39,Male,1.6,0.8,230,88,74,8,4,1,2 537 | 60,Male,19.6,9.5,466,46,52,6.1,2,0.4,1 538 | 46,Male,15.8,7.2,227,67,220,6.9,2.6,0.6,1 539 | 10,Female,0.8,0.1,395,25,75,7.6,3.6,0.9,1 540 | 52,Male,1.8,0.8,97,85,78,6.4,2.7,0.7,1 541 | 65,Female,0.7,0.2,406,24,45,7.2,3.5,0.9,2 542 | 42,Male,0.8,0.2,114,21,23,7,3,0.7,2 543 | 42,Male,0.8,0.2,198,29,19,6.6,3,0.8,2 544 | 62,Male,0.7,0.2,173,46,47,7.3,4.1,1.2,2 545 | 40,Male,1.2,0.6,204,23,27,7.6,4,1.1,1 546 | 54,Female,5.5,3.2,350,67,42,7,3.2,0.8,1 547 | 45,Female,0.7,0.2,153,41,42,4.5,2.2,0.9,2 548 | 45,Male,20.2,11.7,188,47,32,5.4,2.3,0.7,1 549 | 50,Female,27.7,10.8,380,39,348,7.1,2.3,0.4,1 550 | 42,Male,11.1,6.1,214,60,186,6.9,2.8,2.8,1 551 | 40,Female,2.1,1,768,74,141,7.8,4.9,1.6,1 552 | 46,Male,3.3,1.5,172,25,41,5.6,2.4,0.7,1 553 | 29,Male,1.2,0.4,160,20,22,6.2,3,0.9,2 554 | 45,Male,0.6,0.1,196,29,30,5.8,2.9,1,1 555 | 46,Male,10.2,4.2,232,58,140,7,2.7,0.6,1 556 | 73,Male,1.8,0.9,220,20,43,6.5,3,0.8,1 557 | 55,Male,0.8,0.2,290,139,87,7,3,0.7,1 558 | 51,Male,0.7,0.1,180,25,27,6.1,3.1,1,1 559 | 51,Male,2.9,1.2,189,80,125,6.2,3.1,1,1 560 | 51,Male,4,2.5,275,382,330,7.5,4,1.1,1 561 | 26,Male,42.8,19.7,390,75,138,7.5,2.6,0.5,1 562 | 66,Male,15.2,7.7,356,321,562,6.5,2.2,0.4,1 563 | 66,Male,16.6,7.6,315,233,384,6.9,2,0.4,1 564 | 66,Male,17.3,8.5,388,173,367,7.8,2.6,0.5,1 565 | 64,Male,1.4,0.5,298,31,83,7.2,2.6,0.5,1 566 | 38,Female,0.6,0.1,165,22,34,5.9,2.9,0.9,2 567 | 43,Male,22.5,11.8,143,22,143,6.6,2.1,0.46,1 568 | 50,Female,1,0.3,191,22,31,7.8,4,1,2 569 | 52,Male,2.7,1.4,251,20,40,6,1.7,0.39,1 570 | 20,Female,16.7,8.4,200,91,101,6.9,3.5,1.02,1 571 | 16,Male,7.7,4.1,268,213,168,7.1,4,1.2,1 572 | 16,Male,2.6,1.2,236,131,90,5.4,2.6,0.9,1 573 | 90,Male,1.1,0.3,215,46,134,6.9,3,0.7,1 574 | 32,Male,15.6,9.5,134,54,125,5.6,4,2.5,1 575 | 32,Male,3.7,1.6,612,50,88,6.2,1.9,0.4,1 576 | 32,Male,12.1,6,515,48,92,6.6,2.4,0.5,1 577 | 32,Male,25,13.7,560,41,88,7.9,2.5,2.5,1 578 | 32,Male,15,8.2,289,58,80,5.3,2.2,0.7,1 579 | 32,Male,12.7,8.4,190,28,47,5.4,2.6,0.9,1 580 | 60,Male,0.5,0.1,500,20,34,5.9,1.6,0.37,2 581 | 40,Male,0.6,0.1,98,35,31,6,3.2,1.1,1 582 | 52,Male,0.8,0.2,245,48,49,6.4,3.2,1,1 583 | 31,Male,1.3,0.5,184,29,32,6.8,3.4,1,1 584 | 38,Male,1,0.3,216,21,24,7.3,4.4,1.5,2 585 | -------------------------------------------------------------------------------- /2018/2018-03-17/Introduction to data analysis in Python/indian_liver_patient_original.csv: -------------------------------------------------------------------------------- 1 | Age,Gender,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Dataset 2 | 65,Female,0.7,0.1,187,16,18,6.8,3.3,0.9,1 3 | 62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,1 4 | 62,Male,7.3,4.1,490,60,68,7,3.3,0.89,1 5 | 58,Male,1,0.4,182,14,20,6.8,3.4,1,1 6 | 72,Male,3.9,2,195,27,59,7.3,2.4,0.4,1 7 | 46,Male,1.8,0.7,208,19,14,7.6,4.4,1.3,1 8 | 26,Female,0.9,0.2,154,16,12,7,3.5,1,1 9 | 29,Female,0.9,0.3,202,14,11,6.7,3.6,1.1,1 10 | 17,Male,0.9,0.3,202,22,19,7.4,4.1,1.2,2 11 | 55,Male,0.7,0.2,290,53,58,6.8,3.4,1,1 12 | 57,Male,0.6,0.1,210,51,59,5.9,2.7,0.8,1 13 | 72,Male,2.7,1.3,260,31,56,7.4,3,0.6,1 14 | 64,Male,0.9,0.3,310,61,58,7,3.4,0.9,2 15 | 74,Female,1.1,0.4,214,22,30,8.1,4.1,1,1 16 | 61,Male,0.7,0.2,145,53,41,5.8,2.7,0.87,1 17 | 25,Male,0.6,0.1,183,91,53,5.5,2.3,0.7,2 18 | 38,Male,1.8,0.8,342,168,441,7.6,4.4,1.3,1 19 | 33,Male,1.6,0.5,165,15,23,7.3,3.5,0.92,2 20 | 40,Female,0.9,0.3,293,232,245,6.8,3.1,0.8,1 21 | 40,Female,0.9,0.3,293,232,245,6.8,3.1,0.8,1 22 | 51,Male,2.2,1,610,17,28,7.3,2.6,0.55,1 23 | 51,Male,2.9,1.3,482,22,34,7,2.4,0.5,1 24 | 62,Male,6.8,3,542,116,66,6.4,3.1,0.9,1 25 | 40,Male,1.9,1,231,16,55,4.3,1.6,0.6,1 26 | 63,Male,0.9,0.2,194,52,45,6,3.9,1.85,2 27 | 34,Male,4.1,2,289,875,731,5,2.7,1.1,1 28 | 34,Male,4.1,2,289,875,731,5,2.7,1.1,1 29 | 34,Male,6.2,3,240,1680,850,7.2,4,1.2,1 30 | 20,Male,1.1,0.5,128,20,30,3.9,1.9,0.95,2 31 | 84,Female,0.7,0.2,188,13,21,6,3.2,1.1,2 32 | 57,Male,4,1.9,190,45,111,5.2,1.5,0.4,1 33 | 52,Male,0.9,0.2,156,35,44,4.9,2.9,1.4,1 34 | 57,Male,1,0.3,187,19,23,5.2,2.9,1.2,2 35 | 38,Female,2.6,1.2,410,59,57,5.6,3,0.8,2 36 | 38,Female,2.6,1.2,410,59,57,5.6,3,0.8,2 37 | 30,Male,1.3,0.4,482,102,80,6.9,3.3,0.9,1 38 | 17,Female,0.7,0.2,145,18,36,7.2,3.9,1.18,2 39 | 46,Female,14.2,7.8,374,38,77,4.3,2,0.8,1 40 | 48,Male,1.4,0.6,263,38,66,5.8,2.2,0.61,1 41 | 47,Male,2.7,1.3,275,123,73,6.2,3.3,1.1,1 42 | 45,Male,2.4,1.1,168,33,50,5.1,2.6,1,1 43 | 62,Male,0.6,0.1,160,42,110,4.9,2.6,1.1,2 44 | 42,Male,6.8,3.2,630,25,47,6.1,2.3,0.6,2 45 | 50,Male,2.6,1.2,415,407,576,6.4,3.2,1,1 46 | 85,Female,1,0.3,208,17,15,7,3.6,1,2 47 | 35,Male,1.8,0.6,275,48,178,6.5,3.2,0.9,2 48 | 21,Male,3.9,1.8,150,36,27,6.8,3.9,1.34,1 49 | 40,Male,1.1,0.3,230,1630,960,4.9,2.8,1.3,1 50 | 32,Female,0.6,0.1,176,39,28,6,3,1,1 51 | 55,Male,18.4,8.8,206,64,178,6.2,1.8,0.4,1 52 | 45,Female,0.7,0.2,170,21,14,5.7,2.5,0.7,1 53 | 34,Female,0.6,0.1,161,15,19,6.6,3.4,1,1 54 | 38,Male,3.1,1.6,253,80,406,6.8,3.9,1.3,1 55 | 38,Male,1.1,0.3,198,86,150,6.3,3.5,1.2,1 56 | 42,Male,8.9,4.5,272,31,61,5.8,2,0.5,1 57 | 42,Male,8.9,4.5,272,31,61,5.8,2,0.5,1 58 | 33,Male,0.8,0.2,198,26,23,8,4,1,2 59 | 48,Female,0.9,0.2,175,24,54,5.5,2.7,0.9,2 60 | 51,Male,0.8,0.2,367,42,18,5.2,2,0.6,1 61 | 64,Male,1.1,0.5,145,20,24,5.5,3.2,1.39,2 62 | 31,Female,0.8,0.2,158,21,16,6,3,1,1 63 | 58,Male,1,0.5,158,37,43,7.2,3.6,1,1 64 | 58,Male,1,0.5,158,37,43,7.2,3.6,1,1 65 | 57,Male,0.7,0.2,208,35,97,5.1,2.1,0.7,1 66 | 57,Male,1.3,0.4,259,40,86,6.5,2.5,0.6,1 67 | 57,Male,1.4,0.7,470,62,88,5.6,2.5,0.8,1 68 | 54,Male,2.2,1.2,195,55,95,6,3.7,1.6,1 69 | 37,Male,1.8,0.8,215,53,58,6.4,3.8,1.4,1 70 | 66,Male,0.7,0.2,239,27,26,6.3,3.7,1.4,1 71 | 60,Male,0.8,0.2,215,24,17,6.3,3,0.9,2 72 | 19,Female,0.7,0.2,186,166,397,5.5,3,1.2,1 73 | 75,Female,0.8,0.2,188,20,29,4.4,1.8,0.6,1 74 | 75,Female,0.8,0.2,205,27,24,4.4,2,0.8,1 75 | 52,Male,0.6,0.1,171,22,16,6.6,3.6,1.2,1 76 | 68,Male,0.7,0.1,145,20,22,5.8,2.9,1,1 77 | 29,Female,0.7,0.1,162,52,41,5.2,2.5,0.9,2 78 | 31,Male,0.9,0.2,518,189,17,5.3,2.3,0.7,1 79 | 68,Female,0.6,0.1,1620,95,127,4.6,2.1,0.8,1 80 | 70,Male,1.4,0.6,146,12,24,6.2,3.8,1.58,2 81 | 58,Female,2.8,1.3,670,48,79,4.7,1.6,0.5,1 82 | 58,Female,2.4,1.1,915,60,142,4.7,1.8,0.6,1 83 | 29,Male,1,0.3,75,25,26,5.1,2.9,1.3,1 84 | 49,Male,0.7,0.1,148,14,12,5.4,2.8,1,2 85 | 33,Male,2,1,258,194,152,5.4,3,1.25,1 86 | 32,Male,0.6,0.1,237,45,31,7.5,4.3,1.34,1 87 | 14,Male,1.4,0.5,269,58,45,6.7,3.9,1.4,1 88 | 13,Male,0.6,0.1,320,28,56,7.2,3.6,1,2 89 | 58,Male,0.8,0.2,298,33,59,6.2,3.1,1,1 90 | 18,Male,0.6,0.2,538,33,34,7.5,3.2,0.7,1 91 | 60,Male,4,1.9,238,119,350,7.1,3.3,0.8,1 92 | 60,Male,5.7,2.8,214,412,850,7.3,3.2,0.78,1 93 | 60,Male,6.8,3.2,308,404,794,6.8,3,0.7,1 94 | 60,Male,8.6,4,298,412,850,7.4,3,0.6,1 95 | 60,Male,5.8,2.7,204,220,400,7,3,0.7,1 96 | 60,Male,5.2,2.4,168,126,202,6.8,2.9,0.7,1 97 | 75,Male,0.9,0.2,282,25,23,4.4,2.2,1,1 98 | 39,Male,3.8,1.5,298,102,630,7.1,3.3,0.8,1 99 | 39,Male,6.6,3,215,190,950,4,1.7,0.7,1 100 | 18,Male,0.6,0.1,265,97,161,5.9,3.1,1.1,1 101 | 18,Male,0.7,0.1,312,308,405,6.9,3.7,1.1,1 102 | 27,Male,0.6,0.2,161,27,28,3.7,1.6,0.76,2 103 | 27,Male,0.7,0.2,243,21,23,5.3,2.3,0.7,2 104 | 17,Male,0.9,0.2,224,36,45,6.9,4.2,1.55,1 105 | 55,Female,0.8,0.2,225,14,23,6.1,3.3,1.2,2 106 | 63,Male,0.5,0.1,170,21,28,5.5,2.5,0.8,1 107 | 36,Male,5.3,2.3,145,32,92,5.1,2.6,1,2 108 | 36,Male,5.3,2.3,145,32,92,5.1,2.6,1,2 109 | 36,Male,0.8,0.2,158,29,39,6,2.2,0.5,2 110 | 36,Male,0.8,0.2,158,29,39,6,2.2,0.5,2 111 | 36,Male,0.9,0.1,486,25,34,5.9,2.8,0.9,2 112 | 24,Female,0.7,0.2,188,11,10,5.5,2.3,0.71,2 113 | 48,Male,3.2,1.6,257,33,116,5.7,2.2,0.62,1 114 | 27,Male,1.2,0.4,179,63,39,6.1,3.3,1.1,2 115 | 74,Male,0.6,0.1,272,24,98,5,2,0.6,1 116 | 50,Male,5.8,3,661,181,285,5.7,2.3,0.67,2 117 | 50,Male,7.3,3.6,1580,88,64,5.6,2.3,0.6,2 118 | 48,Male,0.7,0.1,1630,74,149,5.3,2,0.6,1 119 | 32,Male,12.7,6.2,194,2000,2946,5.7,3.3,1.3,1 120 | 32,Male,15.9,7,280,1350,1600,5.6,2.8,1,1 121 | 32,Male,18,8.2,298,1250,1050,5.4,2.6,0.9,1 122 | 32,Male,23,11.3,300,482,275,7.1,3.5,0.9,1 123 | 32,Male,22.7,10.2,290,322,113,6.6,2.8,0.7,1 124 | 58,Male,1.7,0.8,188,60,84,5.9,3.5,1.4,2 125 | 64,Female,0.8,0.2,178,17,18,6.3,3.1,0.9,1 126 | 28,Male,0.6,0.1,177,36,29,6.9,4.1,1.4,2 127 | 60,Male,1.8,0.5,201,45,25,3.9,1.7,0.7,2 128 | 48,Male,5.8,2.5,802,133,88,6,2.8,0.8,1 129 | 64,Male,3,1.4,248,46,40,6.5,3.2,0.9,1 130 | 58,Female,1.7,0.8,1896,61,83,8,3.9,0.95,1 131 | 45,Male,2.8,1.7,263,57,65,5.1,2.3,0.8,1 132 | 45,Male,3.2,1.4,512,50,58,6,2.7,0.8,1 133 | 70,Female,0.7,0.2,237,18,28,5.8,2.5,0.75,2 134 | 18,Female,0.8,0.2,199,34,31,6.5,3.5,1.16,2 135 | 53,Male,0.9,0.4,238,17,14,6.6,2.9,0.8,1 136 | 18,Male,1.8,0.7,178,35,36,6.8,3.6,1.1,1 137 | 66,Male,11.3,5.6,1110,1250,4929,7,2.4,0.5,1 138 | 46,Female,4.7,2.2,310,62,90,6.4,2.5,0.6,1 139 | 18,Male,0.8,0.2,282,72,140,5.5,2.5,0.8,1 140 | 18,Male,0.8,0.2,282,72,140,5.5,2.5,0.8,1 141 | 15,Male,0.8,0.2,380,25,66,6.1,3.7,1.5,1 142 | 60,Male,0.6,0.1,186,20,21,6.2,3.3,1.1,2 143 | 66,Female,4.2,2.1,159,15,30,7.1,2.2,0.4,1 144 | 30,Male,1.6,0.4,332,84,139,5.6,2.7,0.9,1 145 | 30,Male,1.6,0.4,332,84,139,5.6,2.7,0.9,1 146 | 45,Female,3.5,1.5,189,63,87,5.6,2.9,1,1 147 | 65,Male,0.8,0.2,201,18,22,5.4,2.9,1.1,2 148 | 66,Female,2.9,1.3,168,21,38,5.5,1.8,0.4,1 149 | 65,Male,0.7,0.1,392,20,30,5.3,2.8,1.1,1 150 | 50,Male,0.9,0.2,202,20,26,7.2,4.5,1.66,1 151 | 60,Male,0.8,0.2,286,21,27,7.1,4,1.2,1 152 | 56,Male,1.1,0.5,180,30,42,6.9,3.8,1.2,2 153 | 50,Male,1.6,0.8,218,18,20,5.9,2.9,0.96,1 154 | 46,Female,0.8,0.2,182,20,40,6,2.9,0.9,1 155 | 52,Male,0.6,0.1,178,26,27,6.5,3.6,1.2,2 156 | 34,Male,5.9,2.5,290,45,233,5.6,2.7,0.9,1 157 | 34,Male,8.7,4,298,58,138,5.8,2.4,0.7,1 158 | 32,Male,0.9,0.3,462,70,82,6.2,3.1,1,1 159 | 72,Male,0.7,0.1,196,20,35,5.8,2,0.5,1 160 | 72,Male,0.7,0.1,196,20,35,5.8,2,0.5,1 161 | 50,Male,1.2,0.4,282,36,32,7.2,3.9,1.1,1 162 | 60,Male,11,4.9,750,140,350,5.5,2.1,0.6,1 163 | 60,Male,11.5,5,1050,99,187,6.2,2.8,0.8,1 164 | 60,Male,5.8,2.7,599,43,66,5.4,1.8,0.5,1 165 | 39,Male,1.9,0.9,180,42,62,7.4,4.3,1.38,1 166 | 39,Male,1.9,0.9,180,42,62,7.4,4.3,1.38,1 167 | 48,Male,4.5,2.3,282,13,74,7,2.4,0.52,1 168 | 55,Male,75,3.6,332,40,66,6.2,2.5,0.6,1 169 | 47,Female,3,1.5,292,64,67,5.6,1.8,0.47,1 170 | 60,Male,22.8,12.6,962,53,41,6.9,3.3,0.9,1 171 | 60,Male,8.9,4,950,33,32,6.8,3.1,0.8,1 172 | 72,Male,1.7,0.8,200,28,37,6.2,3,0.93,1 173 | 44,Female,1.9,0.6,298,378,602,6.6,3.3,1,1 174 | 55,Male,14.1,7.6,750,35,63,5,1.6,0.47,1 175 | 31,Male,0.6,0.1,175,48,34,6,3.7,1.6,1 176 | 31,Male,0.6,0.1,175,48,34,6,3.7,1.6,1 177 | 31,Male,0.8,0.2,198,43,31,7.3,4,1.2,1 178 | 55,Male,0.8,0.2,482,112,99,5.7,2.6,0.8,1 179 | 75,Male,14.8,9,1020,71,42,5.3,2.2,0.7,1 180 | 75,Male,10.6,5,562,37,29,5.1,1.8,0.5,1 181 | 75,Male,8,4.6,386,30,25,5.5,1.8,0.48,1 182 | 75,Male,2.8,1.3,250,23,29,2.7,0.9,0.5,1 183 | 75,Male,2.9,1.3,218,33,37,3,1.5,1,1 184 | 65,Male,1.9,0.8,170,36,43,3.8,1.4,0.58,2 185 | 40,Male,0.6,0.1,171,20,17,5.4,2.5,0.8,1 186 | 64,Male,1.1,0.4,201,18,19,6.9,4.1,1.4,1 187 | 38,Male,1.5,0.4,298,60,103,6,3,1,2 188 | 60,Male,3.2,1.8,750,79,145,7.8,3.2,0.69,1 189 | 60,Male,2.1,1,191,114,247,4,1.6,0.6,1 190 | 60,Male,1.9,0.8,614,42,38,4.5,1.8,0.6,1 191 | 48,Female,0.8,0.2,218,32,28,5.2,2.5,0.9,2 192 | 60,Male,6.3,3.2,314,118,114,6.6,3.7,1.27,1 193 | 60,Male,5.8,3,257,107,104,6.6,3.5,1.12,1 194 | 60,Male,2.3,0.6,272,79,51,6.6,3.5,1.1,1 195 | 49,Male,1.3,0.4,206,30,25,6,3.1,1.06,2 196 | 49,Male,2,0.6,209,48,32,5.7,3,1.1,2 197 | 60,Male,2.4,1,1124,30,54,5.2,1.9,0.5,1 198 | 60,Male,2,1.1,664,52,104,6,2.1,0.53,1 199 | 26,Female,0.6,0.2,142,12,32,5.7,2.4,0.75,1 200 | 41,Male,0.9,0.2,169,22,18,6.1,3,0.9,2 201 | 7,Female,27.2,11.8,1420,790,1050,6.1,2,0.4,1 202 | 49,Male,0.6,0.1,218,50,53,5,2.4,0.9,1 203 | 49,Male,0.6,0.1,218,50,53,5,2.4,0.9,1 204 | 38,Female,0.8,0.2,145,19,23,6.1,3.1,1.03,2 205 | 21,Male,1,0.3,142,27,21,6.4,3.5,1.2,2 206 | 21,Male,0.7,0.2,135,27,26,6.4,3.3,1,2 207 | 45,Male,2.5,1.2,163,28,22,7.6,4,1.1,1 208 | 40,Male,3.6,1.8,285,50,60,7,2.9,0.7,1 209 | 40,Male,3.9,1.7,350,950,1500,6.7,3.8,1.3,1 210 | 70,Female,0.9,0.3,220,53,95,6.1,2.8,0.68,1 211 | 45,Female,0.9,0.3,189,23,33,6.6,3.9,,1 212 | 28,Male,0.8,0.3,190,20,14,4.1,2.4,1.4,1 213 | 42,Male,2.7,1.3,219,60,180,7,3.2,0.8,1 214 | 22,Male,2.7,1,160,82,127,5.5,3.1,1.2,2 215 | 8,Female,0.9,0.2,401,25,58,7.5,3.4,0.8,1 216 | 38,Male,1.7,1,180,18,34,7.2,3.6,1,1 217 | 66,Male,0.6,0.2,100,17,148,5,3.3,1.9,2 218 | 55,Male,0.9,0.2,116,36,16,6.2,3.2,1,2 219 | 49,Male,1.1,0.5,159,30,31,7,4.3,1.5,1 220 | 6,Male,0.6,0.1,289,38,30,4.8,2,0.7,2 221 | 37,Male,0.8,0.2,125,41,39,6.4,3.4,1.1,1 222 | 37,Male,0.8,0.2,147,27,46,5,2.5,1,1 223 | 47,Male,0.9,0.2,192,38,24,7.3,4.3,1.4,1 224 | 47,Male,0.9,0.2,265,40,28,8,4,1,1 225 | 50,Male,1.1,0.3,175,20,19,7.1,4.5,1.7,2 226 | 70,Male,1.7,0.5,400,56,44,5.7,3.1,1.1,1 227 | 26,Male,0.6,0.2,120,45,51,7.9,4,1,1 228 | 26,Male,1.3,0.4,173,38,62,8,4,1,1 229 | 68,Female,0.7,0.2,186,18,15,6.4,3.8,1.4,1 230 | 65,Female,1,0.3,202,26,13,5.3,2.6,0.9,2 231 | 46,Male,0.6,0.2,290,26,21,6,3,1,1 232 | 61,Male,1.5,0.6,196,61,85,6.7,3.8,1.3,2 233 | 61,Male,0.8,0.1,282,85,231,8.5,4.3,1,1 234 | 50,Male,2.7,1.6,157,149,156,7.9,3.1,0.6,1 235 | 33,Male,2,1.4,2110,48,89,6.2,3,0.9,1 236 | 40,Female,0.9,0.2,285,32,27,7.7,3.5,0.8,1 237 | 60,Male,1.5,0.6,360,230,298,4.5,2,0.8,1 238 | 22,Male,0.8,0.2,300,57,40,7.9,3.8,0.9,2 239 | 35,Female,0.9,0.3,158,20,16,8,4,1,1 240 | 35,Female,0.9,0.2,190,40,35,7.3,4.7,1.8,2 241 | 40,Male,0.9,0.3,196,69,48,6.8,3.1,0.8,1 242 | 48,Male,0.7,0.2,165,32,30,8,4,1,2 243 | 51,Male,0.8,0.2,230,24,46,6.5,3.1,,1 244 | 29,Female,0.8,0.2,205,30,23,8.2,4.1,1,1 245 | 28,Female,0.9,0.2,316,25,23,8.5,5.5,1.8,1 246 | 54,Male,0.8,0.2,218,20,19,6.3,2.5,0.6,1 247 | 54,Male,0.9,0.2,290,15,18,6.1,2.8,0.8,1 248 | 55,Male,1.8,9,272,22,79,6.1,2.7,0.7,1 249 | 55,Male,0.9,0.2,190,25,28,5.9,2.7,0.8,1 250 | 40,Male,0.7,0.1,202,37,29,5,2.6,1,1 251 | 33,Male,1.2,0.3,498,28,25,7,3,0.7,1 252 | 33,Male,2.1,1.3,480,38,22,6.5,3,0.8,1 253 | 33,Male,0.9,0.8,680,37,40,5.9,2.6,0.8,1 254 | 65,Male,1.1,0.3,258,48,40,7,3.9,1.2,2 255 | 35,Female,0.6,0.2,180,12,15,5.2,2.7,,2 256 | 38,Female,0.7,0.1,152,90,21,7.1,4.2,1.4,2 257 | 38,Male,1.7,0.7,859,89,48,6,3,1,1 258 | 50,Male,0.9,0.3,901,23,17,6.2,3.5,1.2,1 259 | 44,Male,0.8,0.2,335,148,86,5.6,3,1.1,1 260 | 36,Male,0.8,0.2,182,31,34,6.4,3.8,1.4,2 261 | 42,Male,30.5,14.2,285,65,130,5.2,2.1,0.6,1 262 | 42,Male,16.4,8.9,245,56,87,5.4,2,0.5,1 263 | 33,Male,1.5,7,505,205,140,7.5,3.9,1,1 264 | 18,Male,0.8,0.2,228,55,54,6.9,4,1.3,1 265 | 38,Female,0.8,0.2,185,25,21,7,3,0.7,1 266 | 38,Male,0.8,0.2,247,55,92,7.4,4.3,1.38,2 267 | 4,Male,0.9,0.2,348,30,34,8,4,1,2 268 | 62,Male,1.2,0.4,195,38,54,6.3,3.8,1.5,1 269 | 43,Female,0.9,0.3,140,12,29,7.4,3.5,1.8,1 270 | 40,Male,14.5,6.4,358,50,75,5.7,2.1,0.5,1 271 | 26,Male,0.6,0.1,110,15,20,2.8,1.6,1.3,1 272 | 37,Male,0.7,0.2,235,96,54,9.5,4.9,1,1 273 | 4,Male,0.8,0.2,460,152,231,6.5,3.2,0.9,2 274 | 21,Male,18.5,9.5,380,390,500,8.2,4.1,1,1 275 | 30,Male,0.7,0.2,262,15,18,9.6,4.7,1.2,1 276 | 33,Male,1.8,0.8,196,25,22,8,4,1,1 277 | 26,Male,1.9,0.8,180,22,19,8.2,4.1,1,2 278 | 35,Male,0.9,0.2,190,25,20,6.4,3.6,1.2,2 279 | 60,Male,2,0.8,190,45,40,6,2.8,0.8,1 280 | 45,Male,2.2,0.8,209,25,20,8,4,1,1 281 | 48,Female,1,1.4,144,18,14,8.3,4.2,1,1 282 | 58,Male,0.8,0.2,123,56,48,6,3,1,1 283 | 50,Male,0.7,0.2,192,18,15,7.4,4.2,1.3,2 284 | 50,Male,0.7,0.2,188,12,14,7,3.4,0.9,1 285 | 18,Male,1.3,0.7,316,10,21,6,2.1,0.5,2 286 | 18,Male,0.9,0.3,300,30,48,8,4,1,1 287 | 13,Male,1.5,0.5,575,29,24,7.9,3.9,0.9,1 288 | 34,Female,0.8,0.2,192,15,12,8.6,4.7,1.2,1 289 | 43,Male,1.3,0.6,155,15,20,8,4,1,2 290 | 50,Female,1,0.5,239,16,39,7.5,3.7,0.9,1 291 | 57,Male,4.5,2.3,315,120,105,7,4,1.3,1 292 | 45,Female,1,0.3,250,48,44,8.6,4.3,1,1 293 | 60,Male,0.7,0.2,174,32,14,7.8,4.2,1.1,2 294 | 45,Male,0.6,0.2,245,22,24,7.1,3.4,0.9,1 295 | 23,Male,1.1,0.5,191,37,41,7.7,4.3,1.2,2 296 | 22,Male,2.4,1,340,25,21,8.3,4.5,1.1,1 297 | 22,Male,0.6,0.2,202,78,41,8,3.9,0.9,1 298 | 74,Female,0.9,0.3,234,16,19,7.9,4,1,1 299 | 25,Female,0.9,0.3,159,24,25,6.9,4.4,1.7,2 300 | 31,Female,1.1,0.3,190,26,15,7.9,3.8,0.9,1 301 | 24,Female,0.9,0.2,195,40,35,7.4,4.1,1.2,2 302 | 58,Male,0.8,0.2,180,32,25,8.2,4.4,1.1,2 303 | 51,Female,0.9,0.2,280,21,30,6.7,3.2,0.8,1 304 | 50,Female,1.7,0.6,430,28,32,6.8,3.5,1,1 305 | 50,Male,0.7,0.2,206,18,17,8.4,4.2,1,2 306 | 55,Female,0.8,0.2,155,21,17,6.9,3.8,1.4,1 307 | 54,Female,1.4,0.7,195,36,16,7.9,3.7,0.9,2 308 | 48,Male,1.6,1,588,74,113,7.3,2.4,0.4,1 309 | 30,Male,0.8,0.2,174,21,47,4.6,2.3,1,1 310 | 45,Female,0.8,0.2,165,22,18,8.2,4.1,1,1 311 | 48,Female,1.1,0.7,527,178,250,8,4.2,1.1,1 312 | 51,Male,0.8,0.2,175,48,22,8.1,4.6,1.3,1 313 | 54,Female,23.2,12.6,574,43,47,7.2,3.5,0.9,1 314 | 27,Male,1.3,0.6,106,25,54,8.5,4.8,,2 315 | 30,Female,0.8,0.2,158,25,22,7.9,4.5,1.3,2 316 | 26,Male,2,0.9,195,24,65,7.8,4.3,1.2,1 317 | 22,Male,0.9,0.3,179,18,21,6.7,3.7,1.2,2 318 | 44,Male,0.9,0.2,182,29,82,7.1,3.7,1,2 319 | 35,Male,0.7,0.2,198,42,30,6.8,3.4,1,1 320 | 38,Male,3.7,2.2,216,179,232,7.8,4.5,1.3,1 321 | 14,Male,0.9,0.3,310,21,16,8.1,4.2,1,2 322 | 30,Female,0.7,0.2,63,31,27,5.8,3.4,1.4,1 323 | 30,Female,0.8,0.2,198,30,58,5.2,2.8,1.1,1 324 | 36,Male,1.7,0.5,205,36,34,7.1,3.9,1.2,1 325 | 12,Male,0.8,0.2,302,47,67,6.7,3.5,1.1,2 326 | 60,Male,2.6,1.2,171,42,37,5.4,2.7,1,1 327 | 42,Male,0.8,0.2,158,27,23,6.7,3.1,0.8,2 328 | 36,Female,1.2,0.4,358,160,90,8.3,4.4,1.1,2 329 | 24,Male,3.3,1.6,174,11,33,7.6,3.9,1,2 330 | 43,Male,0.8,0.2,192,29,20,6,2.9,0.9,2 331 | 21,Male,0.7,0.2,211,14,23,7.3,4.1,1.2,2 332 | 26,Male,2,0.9,157,54,68,6.1,2.7,0.8,1 333 | 26,Male,1.7,0.6,210,62,56,5.4,2.2,0.6,1 334 | 26,Male,7.1,3.3,258,80,113,6.2,2.9,0.8,1 335 | 36,Female,0.7,0.2,152,21,25,5.9,3.1,1.1,2 336 | 13,Female,0.7,0.2,350,17,24,7.4,4,1.1,1 337 | 13,Female,0.7,0.1,182,24,19,8.9,4.9,1.2,1 338 | 75,Male,6.7,3.6,458,198,143,6.2,3.2,1,1 339 | 75,Male,2.5,1.2,375,85,68,6.4,2.9,0.8,1 340 | 75,Male,1.8,0.8,405,79,50,6.1,2.9,0.9,1 341 | 75,Male,1.4,0.4,215,50,30,5.9,2.6,0.7,1 342 | 75,Male,0.9,0.2,206,44,33,6.2,2.9,0.8,1 343 | 36,Female,0.8,0.2,650,70,138,6.6,3.1,0.8,1 344 | 35,Male,0.8,0.2,198,36,32,7,4,1.3,2 345 | 70,Male,3.1,1.6,198,40,28,5.6,2,0.5,1 346 | 37,Male,0.8,0.2,195,60,40,8.2,5,1.5,2 347 | 60,Male,2.9,1.3,230,32,44,5.6,2,0.5,1 348 | 46,Male,0.6,0.2,115,14,11,6.9,3.4,0.9,1 349 | 38,Male,0.7,0.2,216,349,105,7,3.5,1,1 350 | 70,Male,1.3,0.4,358,19,14,6.1,2.8,0.8,1 351 | 49,Female,0.8,0.2,158,19,15,6.6,3.6,1.2,2 352 | 37,Male,1.8,0.8,145,62,58,5.7,2.9,1,1 353 | 37,Male,1.3,0.4,195,41,38,5.3,2.1,0.6,1 354 | 26,Female,0.7,0.2,144,36,33,8.2,4.3,1.1,1 355 | 48,Female,1.4,0.8,621,110,176,7.2,3.9,1.1,1 356 | 48,Female,0.8,0.2,150,25,23,7.5,3.9,1,1 357 | 19,Male,1.4,0.8,178,13,26,8,4.6,1.3,2 358 | 33,Male,0.7,0.2,256,21,30,8.5,3.9,0.8,1 359 | 33,Male,2.1,0.7,205,50,38,6.8,3,0.7,1 360 | 37,Male,0.7,0.2,176,28,34,5.6,2.6,0.8,1 361 | 69,Female,0.8,0.2,146,42,70,8.4,4.9,1.4,2 362 | 24,Male,0.7,0.2,218,47,26,6.6,3.3,1,1 363 | 65,Female,0.7,0.2,182,23,28,6.8,2.9,0.7,2 364 | 55,Male,1.1,0.3,215,21,15,6.2,2.9,0.8,2 365 | 42,Female,0.9,0.2,165,26,29,8.5,4.4,1,2 366 | 21,Male,0.8,0.2,183,33,57,6.8,3.5,1,2 367 | 40,Male,0.7,0.2,176,28,43,5.3,2.4,0.8,2 368 | 16,Male,0.7,0.2,418,28,35,7.2,4.1,1.3,2 369 | 60,Male,2.2,1,271,45,52,6.1,2.9,0.9,2 370 | 42,Female,0.8,0.2,182,22,20,7.2,3.9,1.1,1 371 | 58,Female,0.8,0.2,130,24,25,7,4,1.3,1 372 | 54,Female,22.6,11.4,558,30,37,7.8,3.4,0.8,1 373 | 33,Male,0.8,0.2,135,30,29,7.2,4.4,1.5,2 374 | 48,Male,0.7,0.2,326,29,17,8.7,5.5,1.7,1 375 | 25,Female,0.7,0.1,140,32,25,7.6,4.3,1.3,2 376 | 56,Female,0.7,0.1,145,26,23,7,4,1.3,2 377 | 47,Male,3.5,1.6,206,32,31,6.8,3.4,1,1 378 | 33,Male,0.7,0.1,168,35,33,7,3.7,1.1,1 379 | 20,Female,0.6,0.2,202,12,13,6.1,3,0.9,2 380 | 50,Female,0.7,0.1,192,20,41,7.3,3.3,0.8,1 381 | 72,Male,0.7,0.2,185,16,22,7.3,3.7,1,2 382 | 50,Male,1.7,0.8,331,36,53,7.3,3.4,0.9,1 383 | 39,Male,0.6,0.2,188,28,43,8.1,3.3,0.6,1 384 | 58,Female,0.7,0.1,172,27,22,6.7,3.2,0.9,1 385 | 60,Female,1.4,0.7,159,10,12,4.9,2.5,1,2 386 | 34,Male,3.7,2.1,490,115,91,6.5,2.8,0.7,1 387 | 50,Male,0.8,0.2,152,29,30,7.4,4.1,1.3,1 388 | 38,Male,2.7,1.4,105,25,21,7.5,4.2,1.2,2 389 | 51,Male,0.8,0.2,160,34,20,6.9,3.7,1.1,1 390 | 46,Male,0.8,0.2,160,31,40,7.3,3.8,1.1,1 391 | 72,Male,0.6,0.1,102,31,35,6.3,3.2,1,1 392 | 72,Male,0.8,0.2,148,23,35,6,3,1,1 393 | 75,Male,0.9,0.2,162,25,20,6.9,3.7,1.1,1 394 | 41,Male,7.5,4.3,149,94,92,6.3,3.1,0.9,1 395 | 41,Male,2.7,1.3,580,142,68,8,4,1,1 396 | 48,Female,1,0.3,310,37,56,5.9,2.5,0.7,1 397 | 45,Male,0.8,0.2,140,24,20,6.3,3.2,1,2 398 | 74,Male,1,0.3,175,30,32,6.4,3.4,1.1,1 399 | 78,Male,1,0.3,152,28,70,6.3,3.1,0.9,1 400 | 38,Male,0.8,0.2,208,25,50,7.1,3.7,1,1 401 | 27,Male,1,0.2,205,137,145,6,3,1,1 402 | 66,Female,0.7,0.2,162,24,20,6.4,3.2,1,2 403 | 50,Male,7.3,3.7,92,44,236,6.8,1.6,0.3,1 404 | 42,Female,0.5,0.1,162,155,108,8.1,4,0.9,1 405 | 65,Male,0.7,0.2,199,19,22,6.3,3.6,1.3,2 406 | 22,Male,0.8,0.2,198,20,26,6.8,3.9,1.3,1 407 | 31,Female,0.8,0.2,215,15,21,7.6,4,1.1,1 408 | 45,Male,0.7,0.2,180,18,58,6.7,3.7,1.2,2 409 | 12,Male,1,0.2,719,157,108,7.2,3.7,1,1 410 | 48,Male,2.4,1.1,554,141,73,7.5,3.6,0.9,1 411 | 48,Male,5,2.6,555,284,190,6.5,3.3,1,1 412 | 18,Male,1.4,0.6,215,440,850,5,1.9,0.6,1 413 | 23,Female,2.3,0.8,509,28,44,6.9,2.9,0.7,2 414 | 65,Male,4.9,2.7,190,33,71,7.1,2.9,0.7,1 415 | 48,Male,0.7,0.2,208,15,30,4.6,2.1,0.8,2 416 | 65,Male,1.4,0.6,260,28,24,5.2,2.2,0.7,2 417 | 70,Male,1.3,0.3,690,93,40,3.6,2.7,0.7,1 418 | 70,Male,0.6,0.1,862,76,180,6.3,2.7,0.75,1 419 | 11,Male,0.7,0.1,592,26,29,7.1,4.2,1.4,2 420 | 50,Male,4.2,2.3,450,69,50,7,3,0.7,1 421 | 55,Female,8.2,3.9,1350,52,65,6.7,2.9,0.7,1 422 | 55,Female,10.9,5.1,1350,48,57,6.4,2.3,0.5,1 423 | 26,Male,1,0.3,163,48,71,7.1,3.7,1,2 424 | 41,Male,1.2,0.5,246,34,42,6.9,3.4,0.97,1 425 | 53,Male,1.6,0.9,178,44,59,6.5,3.9,1.5,2 426 | 32,Female,0.7,0.1,240,12,15,7,3,0.7,1 427 | 58,Male,0.4,0.1,100,59,126,4.3,2.5,1.4,1 428 | 45,Male,1.3,0.6,166,49,42,5.6,2.5,0.8,2 429 | 65,Male,0.9,0.2,170,33,66,7,3,0.75,1 430 | 52,Female,0.6,0.1,194,10,12,6.9,3.3,0.9,2 431 | 73,Male,1.9,0.7,1750,102,141,5.5,2,0.5,1 432 | 53,Female,0.7,0.1,182,20,33,4.8,1.9,0.6,1 433 | 47,Female,0.8,0.2,236,10,13,6.7,2.9,0.76,2 434 | 29,Male,0.7,0.2,165,55,87,7.5,4.6,1.58,1 435 | 41,Female,0.9,0.2,201,31,24,7.6,3.8,1,2 436 | 30,Female,0.7,0.2,194,32,36,7.5,3.6,0.92,2 437 | 17,Female,0.5,0.1,206,28,21,7.1,4.5,1.7,2 438 | 23,Male,1,0.3,212,41,80,6.2,3.1,1,1 439 | 35,Male,1.6,0.7,157,15,44,5.2,2.5,0.9,1 440 | 65,Male,0.8,0.2,162,30,90,3.8,1.4,0.5,1 441 | 42,Female,0.8,0.2,168,25,18,6.2,3.1,1,1 442 | 49,Female,0.8,0.2,198,23,20,7,4.3,1.5,1 443 | 42,Female,2.3,1.1,292,29,39,4.1,1.8,0.7,1 444 | 42,Female,7.4,3.6,298,52,102,4.6,1.9,0.7,1 445 | 42,Female,0.7,0.2,152,35,81,6.2,3.2,1.06,1 446 | 61,Male,0.8,0.2,163,18,19,6.3,2.8,0.8,2 447 | 17,Male,0.9,0.2,279,40,46,7.3,4,1.2,2 448 | 54,Male,0.8,0.2,181,35,20,5.5,2.7,0.96,1 449 | 45,Female,23.3,12.8,1550,425,511,7.7,3.5,0.8,1 450 | 48,Female,0.8,0.2,142,26,25,6,2.6,0.7,1 451 | 48,Female,0.9,0.2,173,26,27,6.2,3.1,1,1 452 | 65,Male,7.9,4.3,282,50,72,6,3,1,1 453 | 35,Male,0.8,0.2,279,20,25,7.2,3.2,0.8,1 454 | 58,Male,0.9,0.2,1100,25,36,7.1,3.5,0.9,1 455 | 46,Male,0.7,0.2,224,40,23,7.1,3,0.7,1 456 | 28,Male,0.6,0.2,159,15,16,7,3.5,1,2 457 | 21,Female,0.6,0.1,186,25,22,6.8,3.4,1,1 458 | 32,Male,0.7,0.2,189,22,43,7.4,3.1,0.7,2 459 | 61,Male,0.8,0.2,192,28,35,6.9,3.4,0.9,2 460 | 26,Male,6.8,3.2,140,37,19,3.6,0.9,0.3,1 461 | 65,Male,1.1,0.5,686,16,46,5.7,1.5,0.35,1 462 | 22,Female,2.2,1,215,159,51,5.5,2.5,0.8,1 463 | 28,Female,0.8,0.2,309,55,23,6.8,4.1,1.51,1 464 | 38,Male,0.7,0.2,110,22,18,6.4,2.5,0.64,1 465 | 25,Male,0.8,0.1,130,23,42,8,4,1,1 466 | 45,Female,0.7,0.2,164,21,53,4.5,1.4,0.45,2 467 | 45,Female,0.6,0.1,270,23,42,5.1,2,0.5,2 468 | 28,Female,0.6,0.1,137,22,16,4.9,1.9,0.6,2 469 | 28,Female,1,0.3,90,18,108,6.8,3.1,0.8,2 470 | 66,Male,1,0.3,190,30,54,5.3,2.1,0.6,1 471 | 66,Male,0.8,0.2,165,22,32,4.4,2,0.8,1 472 | 66,Male,1.1,0.5,167,13,56,7.1,4.1,1.36,1 473 | 49,Female,0.6,0.1,185,17,26,6.6,2.9,0.7,2 474 | 42,Male,0.7,0.2,197,64,33,5.8,2.4,0.7,2 475 | 42,Male,1,0.3,154,38,21,6.8,3.9,1.3,2 476 | 35,Male,2,1.1,226,33,135,6,2.7,0.8,2 477 | 38,Male,2.2,1,310,119,42,7.9,4.1,1,2 478 | 38,Male,0.9,0.3,310,15,25,5.5,2.7,1,1 479 | 55,Male,0.6,0.2,220,24,32,5.1,2.4,0.88,1 480 | 33,Male,7.1,3.7,196,622,497,6.9,3.6,1.09,1 481 | 33,Male,3.4,1.6,186,779,844,7.3,3.2,0.7,1 482 | 7,Male,0.5,0.1,352,28,51,7.9,4.2,1.1,2 483 | 45,Male,2.3,1.3,282,132,368,7.3,4,1.2,1 484 | 45,Male,1.1,0.4,92,91,188,7.2,3.8,1.11,1 485 | 30,Male,0.8,0.2,182,46,57,7.8,4.3,1.2,2 486 | 62,Male,5,2.1,103,18,40,5,2.1,1.72,1 487 | 22,Female,6.7,3.2,850,154,248,6.2,2.8,0.8,1 488 | 42,Female,0.8,0.2,195,18,15,6.7,3,0.8,1 489 | 32,Male,0.7,0.2,276,102,190,6,2.9,0.93,1 490 | 60,Male,0.7,0.2,171,31,26,7,3.5,1,2 491 | 65,Male,0.8,0.1,146,17,29,5.9,3.2,1.18,2 492 | 53,Female,0.8,0.2,193,96,57,6.7,3.6,1.16,1 493 | 27,Male,1,0.3,180,56,111,6.8,3.9,1.85,2 494 | 35,Female,1,0.3,805,133,103,7.9,3.3,0.7,1 495 | 65,Male,0.7,0.2,265,30,28,5.2,1.8,0.52,2 496 | 25,Male,0.7,0.2,185,196,401,6.5,3.9,1.5,1 497 | 32,Male,0.7,0.2,165,31,29,6.1,3,0.96,2 498 | 24,Male,1,0.2,189,52,31,8,4.8,1.5,1 499 | 67,Male,2.2,1.1,198,42,39,7.2,3,0.7,1 500 | 68,Male,1.8,0.5,151,18,22,6.5,4,1.6,1 501 | 55,Male,3.6,1.6,349,40,70,7.2,2.9,0.6,1 502 | 70,Male,2.7,1.2,365,62,55,6,2.4,0.6,1 503 | 36,Male,2.8,1.5,305,28,76,5.9,2.5,0.7,1 504 | 42,Male,0.8,0.2,127,29,30,4.9,2.7,1.2,1 505 | 53,Male,19.8,10.4,238,39,221,8.1,2.5,0.4,1 506 | 32,Male,30.5,17.1,218,39,79,5.5,2.7,0.9,1 507 | 32,Male,32.6,14.1,219,95,235,5.8,3.1,1.1,1 508 | 56,Male,17.7,8.8,239,43,185,5.6,2.4,0.7,1 509 | 50,Male,0.9,0.3,194,190,73,7.5,3.9,1,1 510 | 46,Male,18.4,8.5,450,119,230,7.5,3.3,0.7,1 511 | 46,Male,20,10,254,140,540,5.4,3,1.2,1 512 | 37,Female,0.8,0.2,205,31,36,9.2,4.6,1,2 513 | 45,Male,2.2,1.6,320,37,48,6.8,3.4,1,1 514 | 56,Male,1,0.3,195,22,28,5.8,2.6,0.8,2 515 | 69,Male,0.9,0.2,215,32,24,6.9,3,0.7,1 516 | 49,Male,1,0.3,230,48,58,8.4,4.2,1,1 517 | 49,Male,3.9,2.1,189,65,181,6.9,3,0.7,1 518 | 60,Male,0.9,0.3,168,16,24,6.7,3,0.8,1 519 | 28,Male,0.9,0.2,215,50,28,8,4,1,1 520 | 45,Male,2.9,1.4,210,74,68,7.2,3.6,1,1 521 | 35,Male,26.3,12.1,108,168,630,9.2,2,0.3,1 522 | 62,Male,1.8,0.9,224,69,155,8.6,4,0.8,1 523 | 55,Male,4.4,2.9,230,14,25,7.1,2.1,0.4,1 524 | 46,Female,0.8,0.2,185,24,15,7.9,3.7,0.8,1 525 | 50,Male,0.6,0.2,137,15,16,4.8,2.6,1.1,1 526 | 29,Male,0.8,0.2,156,12,15,6.8,3.7,1.1,2 527 | 53,Female,0.9,0.2,210,35,32,8,3.9,0.9,2 528 | 46,Male,9.4,5.2,268,21,63,6.4,2.8,0.8,1 529 | 40,Male,3.5,1.6,298,68,200,7.1,3.4,0.9,1 530 | 45,Male,1.7,0.8,315,12,38,6.3,2.1,0.5,1 531 | 55,Male,3.3,1.5,214,54,152,5.1,1.8,0.5,1 532 | 22,Female,1.1,0.3,138,14,21,7,3.8,1.1,2 533 | 40,Male,30.8,18.3,285,110,186,7.9,2.7,0.5,1 534 | 62,Male,0.7,0.2,162,12,17,8.2,3.2,0.6,2 535 | 46,Female,1.4,0.4,298,509,623,3.6,1,0.3,1 536 | 39,Male,1.6,0.8,230,88,74,8,4,1,2 537 | 60,Male,19.6,9.5,466,46,52,6.1,2,0.4,1 538 | 46,Male,15.8,7.2,227,67,220,6.9,2.6,0.6,1 539 | 10,Female,0.8,0.1,395,25,75,7.6,3.6,0.9,1 540 | 52,Male,1.8,0.8,97,85,78,6.4,2.7,0.7,1 541 | 65,Female,0.7,0.2,406,24,45,7.2,3.5,0.9,2 542 | 42,Male,0.8,0.2,114,21,23,7,3,0.7,2 543 | 42,Male,0.8,0.2,198,29,19,6.6,3,0.8,2 544 | 62,Male,0.7,0.2,173,46,47,7.3,4.1,1.2,2 545 | 40,Male,1.2,0.6,204,23,27,7.6,4,1.1,1 546 | 54,Female,5.5,3.2,350,67,42,7,3.2,0.8,1 547 | 45,Female,0.7,0.2,153,41,42,4.5,2.2,0.9,2 548 | 45,Male,20.2,11.7,188,47,32,5.4,2.3,0.7,1 549 | 50,Female,27.7,10.8,380,39,348,7.1,2.3,0.4,1 550 | 42,Male,11.1,6.1,214,60,186,6.9,2.8,2.8,1 551 | 40,Female,2.1,1,768,74,141,7.8,4.9,1.6,1 552 | 46,Male,3.3,1.5,172,25,41,5.6,2.4,0.7,1 553 | 29,Male,1.2,0.4,160,20,22,6.2,3,0.9,2 554 | 45,Male,0.6,0.1,196,29,30,5.8,2.9,1,1 555 | 46,Male,10.2,4.2,232,58,140,7,2.7,0.6,1 556 | 73,Male,1.8,0.9,220,20,43,6.5,3,0.8,1 557 | 55,Male,0.8,0.2,290,139,87,7,3,0.7,1 558 | 51,Male,0.7,0.1,180,25,27,6.1,3.1,1,1 559 | 51,Male,2.9,1.2,189,80,125,6.2,3.1,1,1 560 | 51,Male,4,2.5,275,382,330,7.5,4,1.1,1 561 | 26,Male,42.8,19.7,390,75,138,7.5,2.6,0.5,1 562 | 66,Male,15.2,7.7,356,321,562,6.5,2.2,0.4,1 563 | 66,Male,16.6,7.6,315,233,384,6.9,2,0.4,1 564 | 66,Male,17.3,8.5,388,173,367,7.8,2.6,0.5,1 565 | 64,Male,1.4,0.5,298,31,83,7.2,2.6,0.5,1 566 | 38,Female,0.6,0.1,165,22,34,5.9,2.9,0.9,2 567 | 43,Male,22.5,11.8,143,22,143,6.6,2.1,0.46,1 568 | 50,Female,1,0.3,191,22,31,7.8,4,1,2 569 | 52,Male,2.7,1.4,251,20,40,6,1.7,0.39,1 570 | 20,Female,16.7,8.4,200,91,101,6.9,3.5,1.02,1 571 | 16,Male,7.7,4.1,268,213,168,7.1,4,1.2,1 572 | 16,Male,2.6,1.2,236,131,90,5.4,2.6,0.9,1 573 | 90,Male,1.1,0.3,215,46,134,6.9,3,0.7,1 574 | 32,Male,15.6,9.5,134,54,125,5.6,4,2.5,1 575 | 32,Male,3.7,1.6,612,50,88,6.2,1.9,0.4,1 576 | 32,Male,12.1,6,515,48,92,6.6,2.4,0.5,1 577 | 32,Male,25,13.7,560,41,88,7.9,2.5,2.5,1 578 | 32,Male,15,8.2,289,58,80,5.3,2.2,0.7,1 579 | 32,Male,12.7,8.4,190,28,47,5.4,2.6,0.9,1 580 | 60,Male,0.5,0.1,500,20,34,5.9,1.6,0.37,2 581 | 40,Male,0.6,0.1,98,35,31,6,3.2,1.1,1 582 | 52,Male,0.8,0.2,245,48,49,6.4,3.2,1,1 583 | 31,Male,1.3,0.5,184,29,32,6.8,3.4,1,1 584 | 38,Male,1,0.3,216,21,24,7.3,4.4,1.5,2 585 | -------------------------------------------------------------------------------- /2018/2018-03-17/Introduction to data analysis in Python/plots.py: -------------------------------------------------------------------------------- 1 | import plotly.offline as pyo 2 | from plotly.graph_objs import * 3 | import plotly.tools as tls 4 | from plotly import tools 5 | import plotly.graph_objs as go 6 | import logging 7 | 8 | from sklearn import metrics 9 | 10 | ''' 11 | Most functions in this class return a plotly "figure" which can be saved/exported by caller - AND optionally plot offline plots to ipython notebook. 12 | ''' 13 | 14 | 15 | def plotDistributions(listOfSeries, title = 'Distribution Summary - ', prefix = '', excludeZeros = True, showPlot = True): 16 | ''' 17 | Plots distributions of a list of numeric series.... or a list of categorical series 18 | ''' 19 | isNumeric = all(series.dtype.name == 'int64' or series.dtype.name == 'float64' or series.dtype.name == 'int32' for series in listOfSeries) 20 | isCategorical = all(series.dtype.name == 'object' or series.dtype.name == 'bool' or series.dtype.name == 'category' for series in listOfSeries) 21 | 22 | if not isNumeric and not isCategorical: 23 | raise ValueError('Inputs seem to contain a mix of numeric and categorical data. Please fix inputs.') 24 | 25 | if isNumeric: 26 | fig = tools.make_subplots(rows=1, cols=2, subplot_titles=('Boxplot', 'Histogram'), print_grid=False) 27 | for series in listOfSeries: 28 | series_box = go.Box( 29 | y=series, 30 | name=prefix + ' ' + series.name + '(B)' 31 | ) 32 | series_hist = go.Histogram( 33 | x = series[series != 0] if excludeZeros else series, 34 | name = prefix + ' ' + series.name + '(H)', 35 | opacity=0.75 36 | ) 37 | 38 | fig.append_trace(series_box,1,1) 39 | fig.append_trace(series_hist,1,2) 40 | 41 | fig['layout'].update(showlegend=True, title=title, barmode='overlay') 42 | else: 43 | bar_list = [] 44 | for series in listOfSeries: 45 | series_bar = go.Bar( 46 | x = series.groupby(series.values).size().keys(), 47 | y = series.groupby(series.values).size().values, 48 | name = title 49 | ) 50 | bar_list.append(series_bar) 51 | layout = go.Layout( 52 | title = title, 53 | barmode='group' 54 | ) 55 | fig = go.Figure(data=bar_list, layout=layout) 56 | 57 | if showPlot: 58 | pyo.iplot(fig) 59 | 60 | return fig 61 | 62 | 63 | 64 | def drawPlots(series, title = 'Distribution Summary - ', excludeZeros = True, by = None, showPlot = True): 65 | ''' 66 | Function to draw basic box-plot and histogram for numerics - 67 | and a bar plot for categoricals 68 | ''' 69 | title = title + series.name; 70 | if series.dtype.name == 'object' or series.dtype.name == 'bool' or series.dtype.name == 'category': 71 | # Draw Bar Chart 72 | if by is None: 73 | series_bar = go.Bar( 74 | x = series.groupby(series.values).size().keys(), 75 | y = series.groupby(series.values).size().values, 76 | name = title 77 | ) 78 | fig = [series_bar] 79 | else: 80 | bar_list = [] 81 | 82 | def processSeries(bySeries, bar_list = bar_list): 83 | for byValue in bySeries.unique(): 84 | s = series[bySeries == byValue] 85 | s = s.groupby(s.values).size() 86 | bar_list.append(go.Bar( 87 | x = s.keys(), 88 | y = s.values, 89 | name = bySeries.name + ' - ' + str(byValue) 90 | )) 91 | return bar_list 92 | 93 | if type(by).__name__ == 'DataFrame': 94 | for column in by.columns: 95 | bar_list = processSeries(by[column], bar_list) 96 | else: 97 | bar_list = processSeries(by, bar_list) 98 | layout = go.Layout( 99 | title = title, 100 | barmode='group' 101 | ) 102 | fig = go.Figure(data=bar_list, layout=layout) 103 | 104 | else: 105 | 106 | if by is not None and len(by.unique()) == len(series): 107 | # This means the input data is already summarized. A simple bar plot will suffice 108 | series_bar = go.Bar( 109 | x = by.tolist(), 110 | y = series.tolist(), 111 | name = title 112 | ) 113 | fig = [series_bar] 114 | else: 115 | # Draw Box plot and Histogram 116 | fig = tools.make_subplots(rows=1, cols=2, subplot_titles=('Boxplot', 'Histogram'), print_grid=False) 117 | series_box = go.Box( 118 | y=series, 119 | name=series.name 120 | ) 121 | series_hist = go.Histogram( 122 | x = series[series != 0] if excludeZeros else series, 123 | name = series.name, 124 | opacity=0.75 125 | ) 126 | 127 | fig.append_trace(series_box,1,1) 128 | fig.append_trace(series_hist,1,2) 129 | if by is not None: 130 | def processSeriesForNumeric(bySeries, fig): 131 | series_box = go.Box( 132 | y = series, 133 | x = bySeries.apply(lambda x: bySeries.name + ' - ' + x), 134 | name = series.name + ' - (' +bySeries.name + ')' 135 | ) 136 | fig.append_trace(series_box,1,1) 137 | series_box.update(x = bySeries) 138 | for byValue in bySeries.unique(): 139 | s = series[bySeries == byValue] 140 | series_hist = go.Histogram( 141 | x = s[s != 0] if excludeZeros else s, 142 | name = series.name + ' - (' + bySeries.name + '=' + str(byValue) + ')', 143 | opacity=0.75 144 | ) 145 | fig.append_trace(series_hist,1,2) 146 | return fig 147 | 148 | if type(by).__name__ == 'DataFrame': 149 | for column in by.columns: 150 | fig = processSeriesForNumeric(by[column], fig) 151 | else: 152 | fig = processSeriesForNumeric(by, fig) 153 | 154 | fig['layout'].update(showlegend=True, title=title, barmode='overlay') 155 | 156 | if showPlot: 157 | pyo.iplot(fig) 158 | return fig 159 | 160 | 161 | def addToROCPlot(listOfActualsAndProbLists, plotData = [], pos_label = 1): 162 | ''' 163 | Prepares data for drawing ROC curves. Takes inputs of the form [(label, y_test, y_prob)] 164 | ''' 165 | for (label, y_test, y_probs) in listOfActualsAndProbLists: 166 | fpr, tpr, thresholds = metrics.roc_curve(y_test, y_probs, pos_label = pos_label) 167 | trace1 = go.Scatter( 168 | x=fpr, 169 | y=tpr, 170 | mode='markers', 171 | marker=dict(size=4, 172 | line=dict(width=1) 173 | ), 174 | name=label, 175 | text=thresholds 176 | ) 177 | plotData.append(trace1) 178 | return plotData 179 | 180 | 181 | def showROCPlot(plotData, name = None, showPlot = True): 182 | layout = go.Layout( 183 | title='ROC Curve' + ((' - ' + name) if name is not None else ''), 184 | showlegend = True, 185 | hovermode='closest', 186 | autosize = False, 187 | width=900, 188 | height=700, 189 | xaxis=dict( 190 | title='False Positive Rate', 191 | ticklen=5, 192 | zeroline=False, 193 | gridwidth=2, 194 | ), 195 | yaxis=dict( 196 | title='True Positive Rate', 197 | ticklen=5, 198 | gridwidth=2, 199 | ), 200 | ) 201 | fig = go.Figure(data=plotData, layout=layout) 202 | 203 | if showPlot: 204 | pyo.iplot(fig) 205 | return fig 206 | 207 | 208 | def drawBarPlot(df, pivotColumn, barColumns, plotTitle, 209 | pivotLabel = None, barLabels = None, summaryTotals = None, percentages = False, 210 | showPlot = True, returnTraces = False): 211 | ''' 212 | Generic function to draw barplots, needs a dataframe with data and the columns to be used for pivot and bars 213 | ''' 214 | pivotData = df[pivotColumn].tolist() 215 | if summaryTotals is not None: 216 | pivotData = zip(pivotData, summaryTotals) 217 | pivotData = [str(elem[0])+' ('+str(elem[1])+')' for elem in pivotData] 218 | traces = [] 219 | index = 0 220 | if type(barColumns) is str: 221 | barColumns = [barColumns] 222 | for column in barColumns: 223 | if df[column].dtype not in (int,float): 224 | logging.warn('Ignoring column ' + column + ' because it has a non-numeric dtype') 225 | continue 226 | trace = go.Bar( 227 | x=pivotData, 228 | y=list(df[column]) if not percentages else list(df[column]*100)/sum(df[column]), 229 | name= ('% - ' if percentages else '') + (column if barLabels is None or len(barLabels) != len(barColumns) else barLabels[index]) 230 | ) 231 | traces = traces + [trace] 232 | index = index + 1 233 | 234 | if returnTraces: 235 | return traces 236 | 237 | layout = go.Layout( 238 | title=plotTitle, 239 | showlegend = True, 240 | hovermode='closest', 241 | autosize = False, 242 | width=900, 243 | height=500, 244 | xaxis=dict( 245 | title= pivotColumn if pivotLabel is None else pivotLabel, 246 | ticklen=5, 247 | zeroline=False, 248 | gridwidth=2, 249 | ), 250 | yaxis=dict( 251 | title= plotTitle, 252 | ticklen=5, 253 | gridwidth=2, 254 | ), 255 | ) 256 | fig = go.Figure(data=traces, layout=layout) 257 | if showPlot: 258 | pyo.iplot(fig) 259 | return fig 260 | 261 | -------------------------------------------------------------------------------- /2018/2018-03-17/Practical Python Design Patterns/PythonDesignPatterns.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Practical Python Design Patterns" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## What is a class in Python?" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "class Test:\n", 31 | " pass" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "a = Test()" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/plain": [ 51 | "<__main__.Test at 0x11115a048>" 52 | ] 53 | }, 54 | "execution_count": 3, 55 | "metadata": {}, 56 | "output_type": "execute_result" 57 | } 58 | ], 59 | "source": [ 60 | "a" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "__main__.Test" 72 | ] 73 | }, 74 | "execution_count": 4, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "type(a)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 5, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "type" 92 | ] 93 | }, 94 | "execution_count": 5, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "type(Test)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 7, 106 | "metadata": {}, 107 | "outputs": [ 108 | { 109 | "data": { 110 | "text/plain": [ 111 | "__main__.Test" 112 | ] 113 | }, 114 | "execution_count": 7, 115 | "metadata": {}, 116 | "output_type": "execute_result" 117 | } 118 | ], 119 | "source": [ 120 | "Test" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "## What is type in Python?" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 6, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "__main__.TestWithType" 139 | ] 140 | }, 141 | "execution_count": 6, 142 | "metadata": {}, 143 | "output_type": "execute_result" 144 | } 145 | ], 146 | "source": [ 147 | "type('TestWithType', (object,), {})" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 8, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "type?" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 9, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "test_with_type = type('TestWithType', (object,), {})" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 10, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "a1 = test_with_type()" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 11, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": [ 185 | "<__main__.TestWithType at 0x111173198>" 186 | ] 187 | }, 188 | "execution_count": 11, 189 | "metadata": {}, 190 | "output_type": "execute_result" 191 | } 192 | ], 193 | "source": [ 194 | "a1" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 12, 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/plain": [ 205 | "['__class__',\n", 206 | " '__delattr__',\n", 207 | " '__dict__',\n", 208 | " '__dir__',\n", 209 | " '__doc__',\n", 210 | " '__eq__',\n", 211 | " '__format__',\n", 212 | " '__ge__',\n", 213 | " '__getattribute__',\n", 214 | " '__gt__',\n", 215 | " '__hash__',\n", 216 | " '__init__',\n", 217 | " '__init_subclass__',\n", 218 | " '__le__',\n", 219 | " '__lt__',\n", 220 | " '__module__',\n", 221 | " '__ne__',\n", 222 | " '__new__',\n", 223 | " '__reduce__',\n", 224 | " '__reduce_ex__',\n", 225 | " '__repr__',\n", 226 | " '__setattr__',\n", 227 | " '__sizeof__',\n", 228 | " '__str__',\n", 229 | " '__subclasshook__',\n", 230 | " '__weakref__']" 231 | ] 232 | }, 233 | "execution_count": 12, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "dir(a1)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 13, 245 | "metadata": {}, 246 | "outputs": [ 247 | { 248 | "data": { 249 | "text/plain": [ 250 | "<__main__.TestWithType at 0x111173208>" 251 | ] 252 | }, 253 | "execution_count": 13, 254 | "metadata": {}, 255 | "output_type": "execute_result" 256 | } 257 | ], 258 | "source": [ 259 | "type('TestWithType', (object,), {})()" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "## Life Cycle involved in a class" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 14, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "class TestClass:\n", 276 | "\n", 277 | " def __new__(cls, *args, **kwargs):\n", 278 | " print('new method called')\n", 279 | " instance = super(TestClass, cls).__new__(cls, *args, **kwargs)\n", 280 | " return instance\n", 281 | "\n", 282 | " def __call__(self, a, b, c):\n", 283 | " print('call method called')\n", 284 | " return a * b * c\n", 285 | "\n", 286 | " def __init__(self):\n", 287 | " super(TestClass, self).__init__()\n", 288 | " print('init method called')" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 15, 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "name": "stdout", 298 | "output_type": "stream", 299 | "text": [ 300 | "new method called\n", 301 | "init method called\n" 302 | ] 303 | } 304 | ], 305 | "source": [ 306 | "a = TestClass()" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 16, 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "name": "stdout", 316 | "output_type": "stream", 317 | "text": [ 318 | "call method called\n" 319 | ] 320 | }, 321 | { 322 | "data": { 323 | "text/plain": [ 324 | "6" 325 | ] 326 | }, 327 | "execution_count": 16, 328 | "metadata": {}, 329 | "output_type": "execute_result" 330 | } 331 | ], 332 | "source": [ 333 | "a(1,2,3)" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "## Decorators" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": 17, 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [ 349 | "def func1(func2):\n", 350 | " def wrapper():\n", 351 | " return func2().lower()\n", 352 | " return wrapper" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 18, 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [ 361 | "@func1\n", 362 | "def get_value():\n", 363 | " return 'aBcDeFgH'" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": 19, 369 | "metadata": {}, 370 | "outputs": [ 371 | { 372 | "data": { 373 | "text/plain": [ 374 | "'abcdefgh'" 375 | ] 376 | }, 377 | "execution_count": 19, 378 | "metadata": {}, 379 | "output_type": "execute_result" 380 | } 381 | ], 382 | "source": [ 383 | "get_value()" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 20, 389 | "metadata": {}, 390 | "outputs": [ 391 | { 392 | "data": { 393 | "text/plain": [ 394 | "'abcdefgh'" 395 | ] 396 | }, 397 | "execution_count": 20, 398 | "metadata": {}, 399 | "output_type": "execute_result" 400 | } 401 | ], 402 | "source": [ 403 | "func1(get_value)()" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 21, 409 | "metadata": {}, 410 | "outputs": [], 411 | "source": [ 412 | "class MyDecorator: \n", 413 | " def __init__(self, case):\n", 414 | " self.is_lower_case = False if case == 'upper' else True\n", 415 | " \n", 416 | " def __call__(self, func2):\n", 417 | " def wrapper():\n", 418 | " return func2().lower() if self.is_lower_case else func2().upper()\n", 419 | " return wrapper" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 25, 425 | "metadata": {}, 426 | "outputs": [], 427 | "source": [ 428 | "user_input = 'upper'" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 26, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "@MyDecorator(user_input)\n", 438 | "def get_value():\n", 439 | " return 'aBcDeFgH'" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": 27, 445 | "metadata": {}, 446 | "outputs": [ 447 | { 448 | "data": { 449 | "text/plain": [ 450 | "'ABCDEFGH'" 451 | ] 452 | }, 453 | "execution_count": 27, 454 | "metadata": {}, 455 | "output_type": "execute_result" 456 | } 457 | ], 458 | "source": [ 459 | "get_value()" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": 28, 465 | "metadata": {}, 466 | "outputs": [ 467 | { 468 | "name": "stderr", 469 | "output_type": "stream", 470 | "text": [ 471 | " * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)\n", 472 | "127.0.0.1 - - [17/Mar/2018 11:44:46] \"GET /favicon.ico HTTP/1.1\" 404 -\n", 473 | "127.0.0.1 - - [17/Mar/2018 11:44:46] \"GET / HTTP/1.1\" 404 -\n", 474 | "127.0.0.1 - - [17/Mar/2018 11:44:51] \"GET /favicon.ico HTTP/1.1\" 404 -\n", 475 | "127.0.0.1 - - [17/Mar/2018 11:44:51] \"GET / HTTP/1.1\" 404 -\n", 476 | "127.0.0.1 - - [17/Mar/2018 11:44:54] \"GET //hw HTTP/1.1\" 404 -\n", 477 | "127.0.0.1 - - [17/Mar/2018 11:44:59] \"GET //hw HTTP/1.1\" 404 -\n", 478 | "127.0.0.1 - - [17/Mar/2018 11:45:03] \"GET /hw HTTP/1.1\" 200 -\n" 479 | ] 480 | } 481 | ], 482 | "source": [ 483 | "from flask import Flask\n", 484 | "app = Flask(__name__)\n", 485 | "\n", 486 | "@app.route('/hw')\n", 487 | "def hello_world():\n", 488 | " return 'Hello World!'\n", 489 | "\n", 490 | "app.run()" 491 | ] 492 | }, 493 | { 494 | "cell_type": "markdown", 495 | "metadata": {}, 496 | "source": [ 497 | "## Back to Meta classes" 498 | ] 499 | }, 500 | { 501 | "cell_type": "markdown", 502 | "metadata": {}, 503 | "source": [ 504 | "### What is type? 'type' defines how a class behaves in Python. \n", 505 | "\n", 506 | "### Got it. Well then - Can I change 'how' a class behaves in Python? - MetaClasses" 507 | ] 508 | }, 509 | { 510 | "cell_type": "code", 511 | "execution_count": 29, 512 | "metadata": {}, 513 | "outputs": [], 514 | "source": [ 515 | "class MySingletonMeta(type):\n", 516 | " _instances = {}\n", 517 | " \n", 518 | " def __call__(cls, *args, **kwargs):\n", 519 | " if cls not in cls._instances:\n", 520 | " cls._instances[cls] = super(MySingletonMeta, cls).__call__(*args)\n", 521 | " return cls._instances[cls]" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": 30, 527 | "metadata": {}, 528 | "outputs": [], 529 | "source": [ 530 | "class MySingletonClass(metaclass=MySingletonMeta):\n", 531 | " def __init__(self):\n", 532 | " self.i = 1" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": 31, 538 | "metadata": {}, 539 | "outputs": [], 540 | "source": [ 541 | "a = MySingletonClass()\n", 542 | "b = MySingletonClass()" 543 | ] 544 | }, 545 | { 546 | "cell_type": "code", 547 | "execution_count": 32, 548 | "metadata": {}, 549 | "outputs": [ 550 | { 551 | "data": { 552 | "text/plain": [ 553 | "True" 554 | ] 555 | }, 556 | "execution_count": 32, 557 | "metadata": {}, 558 | "output_type": "execute_result" 559 | } 560 | ], 561 | "source": [ 562 | "a is b" 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": 33, 568 | "metadata": {}, 569 | "outputs": [ 570 | { 571 | "data": { 572 | "text/plain": [ 573 | "(__main__.MySingletonClass, 4588486608, __main__.MySingletonClass, 4588486608)" 574 | ] 575 | }, 576 | "execution_count": 33, 577 | "metadata": {}, 578 | "output_type": "execute_result" 579 | } 580 | ], 581 | "source": [ 582 | "type(a), id(a) , type(b), id(b)" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": 34, 588 | "metadata": {}, 589 | "outputs": [], 590 | "source": [ 591 | "from abc import ABCMeta, ABC, abstractmethod" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": 35, 597 | "metadata": {}, 598 | "outputs": [], 599 | "source": [ 600 | "ABCMeta?" 601 | ] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "execution_count": 40, 606 | "metadata": {}, 607 | "outputs": [], 608 | "source": [ 609 | "class MyAbstractClass(metaclass=ABCMeta):\n", 610 | " def __init__(self):\n", 611 | " pass\n", 612 | "\n", 613 | " @abstractmethod\n", 614 | " def my_abstract_method(self):\n", 615 | " pass" 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": 41, 621 | "metadata": {}, 622 | "outputs": [ 623 | { 624 | "ename": "TypeError", 625 | "evalue": "Can't instantiate abstract class MyAbstractClass with abstract methods my_abstract_method", 626 | "output_type": "error", 627 | "traceback": [ 628 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 629 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 630 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mMyAbstractClass\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 631 | "\u001b[0;31mTypeError\u001b[0m: Can't instantiate abstract class MyAbstractClass with abstract methods my_abstract_method" 632 | ] 633 | } 634 | ], 635 | "source": [ 636 | "MyAbstractClass()" 637 | ] 638 | }, 639 | { 640 | "cell_type": "code", 641 | "execution_count": 46, 642 | "metadata": {}, 643 | "outputs": [], 644 | "source": [ 645 | "class MyChildClass(MyAbstractClass):\n", 646 | " \n", 647 | " def __init__(self):\n", 648 | " pass\n", 649 | " \n", 650 | " def my_abstract_method(self):\n", 651 | " pass\n", 652 | " \n", 653 | " @staticmethod\n", 654 | " def my_static_method():\n", 655 | " print('I am a static method')\n", 656 | " \n", 657 | " @classmethod\n", 658 | " def my_class_method(cls):\n", 659 | " print('Class method called')\n" 660 | ] 661 | }, 662 | { 663 | "cell_type": "code", 664 | "execution_count": 47, 665 | "metadata": {}, 666 | "outputs": [], 667 | "source": [ 668 | "mcc = MyChildClass()" 669 | ] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": 48, 674 | "metadata": {}, 675 | "outputs": [ 676 | { 677 | "name": "stdout", 678 | "output_type": "stream", 679 | "text": [ 680 | "I am a static method\n" 681 | ] 682 | } 683 | ], 684 | "source": [ 685 | "mcc.my_static_method()" 686 | ] 687 | }, 688 | { 689 | "cell_type": "code", 690 | "execution_count": 49, 691 | "metadata": {}, 692 | "outputs": [ 693 | { 694 | "name": "stdout", 695 | "output_type": "stream", 696 | "text": [ 697 | "Class method called\n" 698 | ] 699 | } 700 | ], 701 | "source": [ 702 | "MyChildClass.my_class_method()" 703 | ] 704 | }, 705 | { 706 | "cell_type": "markdown", 707 | "metadata": {}, 708 | "source": [ 709 | "### Combine two meta classes" 710 | ] 711 | }, 712 | { 713 | "cell_type": "code", 714 | "execution_count": 50, 715 | "metadata": {}, 716 | "outputs": [], 717 | "source": [ 718 | "class MySingletonABCMeta(ABCMeta):\n", 719 | " _instances = {}\n", 720 | "\n", 721 | " def __call__(cls, *args, **kwargs):\n", 722 | " if cls not in cls._instances:\n", 723 | " cls._instances[cls] = super(MySingletonABCMeta, cls).__call__(*args)\n", 724 | " return cls._instances[cls]" 725 | ] 726 | }, 727 | { 728 | "cell_type": "code", 729 | "execution_count": 51, 730 | "metadata": {}, 731 | "outputs": [], 732 | "source": [ 733 | "class MyAbstractSingletonClass(metaclass=MySingletonABCMeta):\n", 734 | " def __init__(self):\n", 735 | " pass\n", 736 | "\n", 737 | " @abstractmethod\n", 738 | " def my_abstract_method(self):\n", 739 | " pass" 740 | ] 741 | }, 742 | { 743 | "cell_type": "code", 744 | "execution_count": 52, 745 | "metadata": {}, 746 | "outputs": [ 747 | { 748 | "ename": "TypeError", 749 | "evalue": "Can't instantiate abstract class MyAbstractSingletonClass with abstract methods my_abstract_method", 750 | "output_type": "error", 751 | "traceback": [ 752 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 753 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 754 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mMyAbstractSingletonClass\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 755 | "\u001b[0;32m\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcls\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_instances\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_instances\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mMySingletonABCMeta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_instances\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 756 | "\u001b[0;31mTypeError\u001b[0m: Can't instantiate abstract class MyAbstractSingletonClass with abstract methods my_abstract_method" 757 | ] 758 | } 759 | ], 760 | "source": [ 761 | "MyAbstractSingletonClass()" 762 | ] 763 | }, 764 | { 765 | "cell_type": "code", 766 | "execution_count": 53, 767 | "metadata": {}, 768 | "outputs": [], 769 | "source": [ 770 | "class MyAbstractSingletonChild(MyAbstractSingletonClass):\n", 771 | " def __init__(self):\n", 772 | " pass\n", 773 | " \n", 774 | " def my_abstract_method(self):\n", 775 | " pass" 776 | ] 777 | }, 778 | { 779 | "cell_type": "code", 780 | "execution_count": 54, 781 | "metadata": {}, 782 | "outputs": [], 783 | "source": [ 784 | "a1 = MyAbstractSingletonChild()\n", 785 | "b1 = MyAbstractSingletonChild()" 786 | ] 787 | }, 788 | { 789 | "cell_type": "code", 790 | "execution_count": 55, 791 | "metadata": {}, 792 | "outputs": [ 793 | { 794 | "data": { 795 | "text/plain": [ 796 | "(__main__.MyAbstractSingletonChild,\n", 797 | " 4589319448,\n", 798 | " __main__.MyAbstractSingletonChild,\n", 799 | " 4589319448)" 800 | ] 801 | }, 802 | "execution_count": 55, 803 | "metadata": {}, 804 | "output_type": "execute_result" 805 | } 806 | ], 807 | "source": [ 808 | "type(a1), id(a1), type(b1), id(b1)" 809 | ] 810 | }, 811 | { 812 | "cell_type": "markdown", 813 | "metadata": {}, 814 | "source": [ 815 | "## Hashable Objects in Python" 816 | ] 817 | }, 818 | { 819 | "cell_type": "markdown", 820 | "metadata": {}, 821 | "source": [ 822 | "### What items are hashable in Python?" 823 | ] 824 | }, 825 | { 826 | "cell_type": "code", 827 | "execution_count": 60, 828 | "metadata": {}, 829 | "outputs": [ 830 | { 831 | "ename": "TypeError", 832 | "evalue": "unhashable type: 'list'", 833 | "output_type": "error", 834 | "traceback": [ 835 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 836 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 837 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;34m{\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 838 | "\u001b[0;31mTypeError\u001b[0m: unhashable type: 'list'" 839 | ] 840 | } 841 | ], 842 | "source": [ 843 | "a = [1,2]\n", 844 | "{a:1}" 845 | ] 846 | }, 847 | { 848 | "cell_type": "code", 849 | "execution_count": 61, 850 | "metadata": {}, 851 | "outputs": [ 852 | { 853 | "name": "stdout", 854 | "output_type": "stream", 855 | "text": [ 856 | "None\n" 857 | ] 858 | } 859 | ], 860 | "source": [ 861 | "print(a.__hash__)" 862 | ] 863 | }, 864 | { 865 | "cell_type": "code", 866 | "execution_count": null, 867 | "metadata": {}, 868 | "outputs": [], 869 | "source": [ 870 | "hash((1,2))" 871 | ] 872 | }, 873 | { 874 | "cell_type": "code", 875 | "execution_count": 62, 876 | "metadata": {}, 877 | "outputs": [], 878 | "source": [ 879 | "from collections import Hashable\n", 880 | "class HashableObject(Hashable):\n", 881 | " __metaclass__ = ABCMeta\n", 882 | "\n", 883 | " def __init__(self):\n", 884 | " pass\n", 885 | "\n", 886 | " def __eq__(self, other):\n", 887 | " return True if isinstance(other, self.__class__) and self.get_key() == other.get_key() else False\n", 888 | "\n", 889 | " def __ne__(self, other):\n", 890 | " return not self == other\n", 891 | "\n", 892 | " def __hash__(self):\n", 893 | " return hash(self.get_key())\n", 894 | "\n", 895 | " @abstractmethod\n", 896 | " def get_key(self):\n", 897 | " return self.__dict__.keys()" 898 | ] 899 | }, 900 | { 901 | "cell_type": "code", 902 | "execution_count": 63, 903 | "metadata": {}, 904 | "outputs": [], 905 | "source": [ 906 | "class MyHashableClass(HashableObject):\n", 907 | " def __init__(self, a,b,c):\n", 908 | " self.a = a\n", 909 | " self.b = b\n", 910 | " self.c = c\n", 911 | " \n", 912 | " def get_key(self):\n", 913 | " return self.a, self.b\n", 914 | " " 915 | ] 916 | }, 917 | { 918 | "cell_type": "code", 919 | "execution_count": 64, 920 | "metadata": {}, 921 | "outputs": [], 922 | "source": [ 923 | "a1 = MyHashableClass(1, 2, 3)\n", 924 | "a2 = MyHashableClass(4, 5, 6)\n", 925 | "a3 = MyHashableClass(4, 5, 8)" 926 | ] 927 | }, 928 | { 929 | "cell_type": "code", 930 | "execution_count": 65, 931 | "metadata": {}, 932 | "outputs": [ 933 | { 934 | "data": { 935 | "text/plain": [ 936 | "False" 937 | ] 938 | }, 939 | "execution_count": 65, 940 | "metadata": {}, 941 | "output_type": "execute_result" 942 | } 943 | ], 944 | "source": [ 945 | "a1 == a2" 946 | ] 947 | }, 948 | { 949 | "cell_type": "code", 950 | "execution_count": 66, 951 | "metadata": {}, 952 | "outputs": [ 953 | { 954 | "data": { 955 | "text/plain": [ 956 | "True" 957 | ] 958 | }, 959 | "execution_count": 66, 960 | "metadata": {}, 961 | "output_type": "execute_result" 962 | } 963 | ], 964 | "source": [ 965 | "a2 == a3" 966 | ] 967 | }, 968 | { 969 | "cell_type": "markdown", 970 | "metadata": {}, 971 | "source": [ 972 | "## Comparable Objects in Python" 973 | ] 974 | }, 975 | { 976 | "cell_type": "code", 977 | "execution_count": 67, 978 | "metadata": {}, 979 | "outputs": [], 980 | "source": [ 981 | "class ComparableObject:\n", 982 | " def __init__(self):\n", 983 | " pass\n", 984 | "\n", 985 | " def __eq__(self, other):\n", 986 | " return True if isinstance(other, self.__class__) and self.__dict__ == other.__dict__ else False\n", 987 | "\n", 988 | " def __ne__(self, other):\n", 989 | " return not self == other" 990 | ] 991 | }, 992 | { 993 | "cell_type": "code", 994 | "execution_count": 68, 995 | "metadata": {}, 996 | "outputs": [], 997 | "source": [ 998 | "class MyComparableObject(ComparableObject):\n", 999 | " def __init__(self, a, b, c):\n", 1000 | " self.a = a\n", 1001 | " self.b = b\n", 1002 | " self.c = c" 1003 | ] 1004 | }, 1005 | { 1006 | "cell_type": "code", 1007 | "execution_count": 69, 1008 | "metadata": {}, 1009 | "outputs": [], 1010 | "source": [ 1011 | "c1 = MyComparableObject(1, 2, 3)\n", 1012 | "c2 = MyComparableObject(4, 5, 6)\n", 1013 | "c3 = MyComparableObject(1, 2, 3)" 1014 | ] 1015 | }, 1016 | { 1017 | "cell_type": "code", 1018 | "execution_count": 70, 1019 | "metadata": {}, 1020 | "outputs": [ 1021 | { 1022 | "data": { 1023 | "text/plain": [ 1024 | "False" 1025 | ] 1026 | }, 1027 | "execution_count": 70, 1028 | "metadata": {}, 1029 | "output_type": "execute_result" 1030 | } 1031 | ], 1032 | "source": [ 1033 | "c1 == c2" 1034 | ] 1035 | }, 1036 | { 1037 | "cell_type": "code", 1038 | "execution_count": 71, 1039 | "metadata": {}, 1040 | "outputs": [ 1041 | { 1042 | "data": { 1043 | "text/plain": [ 1044 | "True" 1045 | ] 1046 | }, 1047 | "execution_count": 71, 1048 | "metadata": {}, 1049 | "output_type": "execute_result" 1050 | } 1051 | ], 1052 | "source": [ 1053 | "c1 == c3" 1054 | ] 1055 | }, 1056 | { 1057 | "cell_type": "markdown", 1058 | "metadata": {}, 1059 | "source": [ 1060 | "## Enum vs Polled Objects" 1061 | ] 1062 | }, 1063 | { 1064 | "cell_type": "code", 1065 | "execution_count": 72, 1066 | "metadata": {}, 1067 | "outputs": [], 1068 | "source": [ 1069 | "from enum import Enum" 1070 | ] 1071 | }, 1072 | { 1073 | "cell_type": "code", 1074 | "execution_count": 73, 1075 | "metadata": {}, 1076 | "outputs": [], 1077 | "source": [ 1078 | "class MyEnumeration(Enum):\n", 1079 | " ONE = 'one'\n", 1080 | " TWO = 'two'\n", 1081 | " \n", 1082 | " @staticmethod\n", 1083 | " def get_enum(inp):\n", 1084 | " for e in MyEnumeration:\n", 1085 | " if e.value == inp:\n", 1086 | " return e\n", 1087 | " " 1088 | ] 1089 | }, 1090 | { 1091 | "cell_type": "code", 1092 | "execution_count": 74, 1093 | "metadata": {}, 1094 | "outputs": [ 1095 | { 1096 | "data": { 1097 | "text/plain": [ 1098 | "" 1099 | ] 1100 | }, 1101 | "execution_count": 74, 1102 | "metadata": {}, 1103 | "output_type": "execute_result" 1104 | } 1105 | ], 1106 | "source": [ 1107 | "a = MyEnumeration.get_enum('one')\n", 1108 | "a" 1109 | ] 1110 | }, 1111 | { 1112 | "cell_type": "markdown", 1113 | "metadata": {}, 1114 | "source": [ 1115 | "### Where Enums do not work and Solution" 1116 | ] 1117 | }, 1118 | { 1119 | "cell_type": "code", 1120 | "execution_count": 75, 1121 | "metadata": {}, 1122 | "outputs": [], 1123 | "source": [ 1124 | "class MyBeanMeta(type):\n", 1125 | " _instances = {}\n", 1126 | "\n", 1127 | " def __call__(cls, *args):\n", 1128 | " print(args)\n", 1129 | " key = tuple((cls, args))\n", 1130 | " if key not in cls._instances:\n", 1131 | " cls._instances[key] = super(MyBeanMeta, cls).__call__(*args)\n", 1132 | " return cls._instances[key]" 1133 | ] 1134 | }, 1135 | { 1136 | "cell_type": "code", 1137 | "execution_count": 76, 1138 | "metadata": {}, 1139 | "outputs": [], 1140 | "source": [ 1141 | "class MyBeanClass(metaclass=MyBeanMeta):\n", 1142 | " def __init__(self, a ):\n", 1143 | " self.a = a\n" 1144 | ] 1145 | }, 1146 | { 1147 | "cell_type": "code", 1148 | "execution_count": 77, 1149 | "metadata": {}, 1150 | "outputs": [ 1151 | { 1152 | "name": "stdout", 1153 | "output_type": "stream", 1154 | "text": [ 1155 | "(1,)\n", 1156 | "(2,)\n", 1157 | "(3,)\n", 1158 | "(1,)\n" 1159 | ] 1160 | } 1161 | ], 1162 | "source": [ 1163 | "bn1 = MyBeanClass(1)\n", 1164 | "bn2 = MyBeanClass(2)\n", 1165 | "bn3 = MyBeanClass(3)\n", 1166 | "bn4 = MyBeanClass(1)" 1167 | ] 1168 | }, 1169 | { 1170 | "cell_type": "code", 1171 | "execution_count": 78, 1172 | "metadata": {}, 1173 | "outputs": [ 1174 | { 1175 | "data": { 1176 | "text/plain": [ 1177 | "(4581701504, 4581701616, 4588486160, 4581701504)" 1178 | ] 1179 | }, 1180 | "execution_count": 78, 1181 | "metadata": {}, 1182 | "output_type": "execute_result" 1183 | } 1184 | ], 1185 | "source": [ 1186 | "id(bn1), id(bn2), id(bn3), id(bn4)" 1187 | ] 1188 | }, 1189 | { 1190 | "cell_type": "markdown", 1191 | "metadata": {}, 1192 | "source": [ 1193 | "## Use Generator wherever possible" 1194 | ] 1195 | }, 1196 | { 1197 | "cell_type": "code", 1198 | "execution_count": null, 1199 | "metadata": {}, 1200 | "outputs": [], 1201 | "source": [ 1202 | "def with_generator():\n", 1203 | " result = 0\n", 1204 | " for x in get_data():\n", 1205 | " result += x\n", 1206 | "\n", 1207 | "def get_data():\n", 1208 | " i = 0\n", 1209 | " while i < 10000:\n", 1210 | " yield i\n", 1211 | " i += 1" 1212 | ] 1213 | }, 1214 | { 1215 | "cell_type": "code", 1216 | "execution_count": null, 1217 | "metadata": {}, 1218 | "outputs": [], 1219 | "source": [ 1220 | "def without_generator():\n", 1221 | " # print(\"Without Generator\")\n", 1222 | " result = 0\n", 1223 | " for x in get_data_wo_gen():\n", 1224 | " result += x\n", 1225 | "\n", 1226 | "def get_data_wo_gen():\n", 1227 | " ret_val = []\n", 1228 | " for i in range(0, 10000):\n", 1229 | " ret_val.append(i)\n", 1230 | "\n", 1231 | " return ret_val" 1232 | ] 1233 | }, 1234 | { 1235 | "cell_type": "markdown", 1236 | "metadata": {}, 1237 | "source": [ 1238 | "## Use native list comprehensions wherever possible" 1239 | ] 1240 | }, 1241 | { 1242 | "cell_type": "code", 1243 | "execution_count": null, 1244 | "metadata": {}, 1245 | "outputs": [], 1246 | "source": [ 1247 | "a = []\n", 1248 | "for i in range(1000):\n", 1249 | " a.append(i)" 1250 | ] 1251 | }, 1252 | { 1253 | "cell_type": "code", 1254 | "execution_count": null, 1255 | "metadata": {}, 1256 | "outputs": [], 1257 | "source": [ 1258 | "a = [i for i in range(1000)]" 1259 | ] 1260 | }, 1261 | { 1262 | "cell_type": "code", 1263 | "execution_count": null, 1264 | "metadata": {}, 1265 | "outputs": [], 1266 | "source": [ 1267 | "sum([i for i in range(1000)])" 1268 | ] 1269 | }, 1270 | { 1271 | "cell_type": "markdown", 1272 | "metadata": {}, 1273 | "source": [ 1274 | "## Use some of the performance optimized data structures\n", 1275 | "\n", 1276 | "https://docs.python.org/3.6/library/collections.html" 1277 | ] 1278 | } 1279 | ], 1280 | "metadata": { 1281 | "kernelspec": { 1282 | "display_name": "Python 3", 1283 | "language": "python", 1284 | "name": "python3" 1285 | }, 1286 | "language_info": { 1287 | "codemirror_mode": { 1288 | "name": "ipython", 1289 | "version": 3 1290 | }, 1291 | "file_extension": ".py", 1292 | "mimetype": "text/x-python", 1293 | "name": "python", 1294 | "nbconvert_exporter": "python", 1295 | "pygments_lexer": "ipython3", 1296 | "version": "3.6.4" 1297 | } 1298 | }, 1299 | "nbformat": 4, 1300 | "nbformat_minor": 2 1301 | } 1302 | -------------------------------------------------------------------------------- /2019/2019-05-18/Link Prediction on Hike's Network.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2019/2019-05-18/Link Prediction on Hike's Network.pptx -------------------------------------------------------------------------------- /2019/2019-05-18/code/code/prepare_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import networkx as nx 4 | import lightgbm as lgb 5 | import random 6 | import sys 7 | import time 8 | 9 | train = pd.read_csv("../Data/train.csv") 10 | test = pd.read_csv("../Data/test.csv") 11 | user_features = pd.read_csv("../Data/user_features.csv") 12 | train.shape, test.shape, user_features.shape 13 | 14 | random.seed(2) 15 | ids = list(train.index) 16 | random.shuffle(ids) 17 | folds = [] 18 | for i in range(10): 19 | folds.append(set(ids[i * train.shape[0]//10 : (i+1) * train.shape[0]//10])) 20 | 21 | node1_counts = train[['node1_id']].append(test[['node1_id']]).node1_id.value_counts().to_dict() 22 | node2_counts = train[['node2_id']].append(test[['node2_id']]).node2_id.value_counts().to_dict() 23 | 24 | def create_features(i): 25 | start_time = time.time() 26 | if i >= 0: 27 | df = train[train.index.isin(folds[i])].reset_index(drop = True) 28 | graph_df = train[train.index.isin(folds[i]) == False].reset_index(drop = True) 29 | else: 30 | df = test.copy() 31 | graph_df = train.copy() 32 | user_graph_directed = nx.DiGraph() 33 | for row in graph_df[graph_df.is_chat == 1].itertuples(): 34 | user_graph_directed.add_edge(row.node1_id, row.node2_id) 35 | user_graph_undirected = nx.Graph() 36 | for row in graph_df[graph_df.is_chat == 1].itertuples(): 37 | user_graph_undirected.add_edge(row.node1_id, row.node2_id) 38 | pg_ranks = nx.pagerank(user_graph_directed) 39 | avg_neighbors = nx.average_neighbor_degree(user_graph_directed) 40 | avg_neighbors_undirected = nx.average_neighbor_degree(user_graph_undirected) 41 | 42 | node1_contacts = {row[0] : set(row[1]) for row in graph_df[['node1_id', 'node2_id']].groupby('node1_id').aggregate(tuple).itertuples()} 43 | node2_contacts = {row[0] : set(row[1]) for row in graph_df[['node1_id', 'node2_id']].groupby('node2_id').aggregate(tuple).itertuples()} 44 | 45 | df['num_contacts_from_node1'] = df.node1_id.map(node1_counts) 46 | df['num_contacts_from_node2'] = df.node2_id.map(node1_counts) 47 | 48 | df['num_contacts_to_node1'] = df.node1_id.map(node2_counts) 49 | df['num_contacts_to_node2'] = df.node2_id.map(node2_counts) 50 | 51 | df['contacts_from_count'] = df.num_contacts_from_node1 + df.num_contacts_from_node2 52 | df['contacts_to_count'] = df.num_contacts_to_node1 + df.num_contacts_to_node2 53 | 54 | df['node_count_from_diff_abs'] = (df.num_contacts_from_node1 - df.num_contacts_from_node2).map(abs) 55 | df['node_count_from_diff'] = df.num_contacts_from_node1 - df.num_contacts_from_node2 56 | 57 | df['node_count_to_from_diff_abs'] = (df.num_contacts_to_node1 - df.num_contacts_to_node2).map(abs) 58 | df['node_count_to_diff'] = df.num_contacts_to_node1 - df.num_contacts_to_node2 59 | 60 | def common_contacts_from(node1, node2): 61 | try: 62 | return len(node1_contacts[node1].intersection(node1_contacts[node2])) 63 | except KeyError: 64 | return -1 65 | def common_contacts_to(node1, node2): 66 | try: 67 | return len(node2_contacts[node1].intersection(node2_contacts[node2])) 68 | except KeyError: 69 | return -1 70 | 71 | def common_contacts_from_ratio(node1, node2): 72 | try: 73 | return len(node1_contacts[node1].intersection(node1_contacts[node2]))/max(1, len(node1_contacts[node1].union(node1_contacts[node2]))) 74 | except KeyError: 75 | return -1 76 | def common_contacts_to_ratio(node1, node2): 77 | try: 78 | return len(node2_contacts[node1].intersection(node2_contacts[node2])) / max(1, len(node2_contacts[node1].union(node2_contacts[node2]))) 79 | except KeyError: 80 | return -1 81 | 82 | df['common_contacts_from'] = [common_contacts_from(row.node1_id, row.node2_id) for row in df[['node1_id', 'node2_id']].itertuples()] 83 | df['common_contacts_to'] = [common_contacts_to(row.node1_id, row.node2_id) for row in df[['node1_id', 'node2_id']].itertuples()] 84 | df['common_contacts_from_ratio'] = [common_contacts_from_ratio(row.node1_id, row.node2_id) for row in df[['node1_id', 'node2_id']].itertuples()] 85 | df['common_contacts_to_ratio'] = [common_contacts_to_ratio(row.node1_id, row.node2_id) for row in df[['node1_id', 'node2_id']].itertuples()] 86 | 87 | def reverse_contact_exists(node1, node2): 88 | try: 89 | return 1 if node1 in node1_contacts[node2] else 0 90 | except: 91 | return -1 92 | df['reverse_contact_exists'] = [reverse_contact_exists(row.node1_id,row.node2_id) for row in df.itertuples()] 93 | graph_df['reverse_contact_exists'] = [reverse_contact_exists(row.node1_id,row.node2_id) for row in graph_df.itertuples()] 94 | graph_df['reverse_contact_exists'] = graph_df['reverse_contact_exists'].astype(np.float16) 95 | num_contacts_with_reverse_contacts = {row[0] : row[1] for row in graph_df[graph_df.reverse_contact_exists >=0][['node1_id', 'reverse_contact_exists']].groupby('node1_id').sum().itertuples()} 96 | mean_contacts_with_reverse_contacts = {row[0] : row[1] for row in graph_df[graph_df.reverse_contact_exists >=0][['node1_id', 'reverse_contact_exists']].groupby('node1_id').mean().itertuples()} 97 | 98 | df['num_contacts_with_reverse_contacts1'] = df['node1_id'].map(num_contacts_with_reverse_contacts) 99 | df['num_contacts_with_reverse_contacts2'] = df['node2_id'].map(num_contacts_with_reverse_contacts) 100 | 101 | df['mean_contacts_with_reverse_contacts1'] = df['node1_id'].map(mean_contacts_with_reverse_contacts) 102 | df['mean_contacts_with_reverse_contacts2'] = df['node2_id'].map(mean_contacts_with_reverse_contacts) 103 | 104 | df['same_node'] = df['node1_id'] == df['node2_id'] 105 | 106 | connected_components = list(nx.connected_components(user_graph_undirected)) 107 | connected_components_index = {n : i for i, c in enumerate(connected_components) for n in c} 108 | df['node1_connected_component'] = df.node1_id.map(connected_components_index) 109 | df['node2_connected_component'] = df.node2_id.map(connected_components_index) 110 | df['same_connected_component'] = df['node1_connected_component'] == df['node2_connected_component'] 111 | df['node1_connected_component_count'] = df[['node1_id', 'node1_connected_component']].groupby('node1_id').transform('count') 112 | df['node2_connected_component_count'] = df[['node2_id', 'node2_connected_component']].groupby('node2_id').transform('count') 113 | df['connected_component_count_diff'] = df['node1_connected_component_count'] - df['node2_connected_component_count'] 114 | graph_df['node1_connected_component'] = graph_df.node1_id.map(connected_components_index) 115 | graph_df['node2_connected_component'] = graph_df.node2_id.map(connected_components_index) 116 | connected_component_is_chat_mean1 = {row[0] : row[1] for row in graph_df[['node1_connected_component', 'is_chat']].groupby('node1_connected_component').mean().itertuples()} 117 | connected_component_is_chat_mean2 = {row[0] : row[1] for row in graph_df[['node2_connected_component', 'is_chat']].groupby('node2_connected_component').mean().itertuples()} 118 | df['connected_component_is_chat_mean1'] = df['node1_connected_component'].map(connected_component_is_chat_mean1) 119 | df['connected_component_is_chat_mean2'] = df['node2_connected_component'].map(connected_component_is_chat_mean2) 120 | 121 | clusters = nx.cluster.clustering(user_graph_undirected) 122 | df['node1_cluster_coef'] = df.node1_id.map(clusters) 123 | df['node2_cluster_coef'] = df.node2_id.map(clusters) 124 | df['cluster_coef_diff'] = df['node1_cluster_coef'] - df['node2_cluster_coef'] 125 | 126 | df['reverse_connection_exists'] = [user_graph_directed.has_edge(row.node2_id, row.node1_id) for row in df.itertuples()] 127 | # df['reverse_connection_fraction_node1'] = df[['node1_id', 'reverse_connection_exists']].groupby('node1_id').transform('mean') 128 | # df['reverse_connection_fraction_node2'] = df[['node2_id', 'reverse_connection_exists']].groupby('node2_id').transform('mean') 129 | graph_df['reverse_connection_exists'] = [user_graph_directed.has_edge(row.node2_id, row.node1_id) for row in graph_df.itertuples()] 130 | num_connections_with_reverse_contacts = {row[0] : row[1] for row in graph_df[graph_df.reverse_connection_exists >=0][['node1_id', 'reverse_connection_exists']].groupby('node1_id').sum().itertuples()} 131 | mean_connections_with_reverse_contacts = {row[0] : row[1] for row in graph_df[graph_df.reverse_connection_exists >=0][['node1_id', 'reverse_connection_exists']].groupby('node1_id').mean().itertuples()} 132 | 133 | df['num_connections_with_reverse_contacts1'] = df['node1_id'].map(num_connections_with_reverse_contacts) 134 | df['num_connections_with_reverse_contacts2'] = df['node2_id'].map(num_connections_with_reverse_contacts) 135 | 136 | df['mean_connections_with_reverse_contacts1'] = df['node1_id'].map(mean_connections_with_reverse_contacts) 137 | df['mean_connections_with_reverse_contacts2'] = df['node2_id'].map(mean_connections_with_reverse_contacts) 138 | 139 | df['reversed_conection_to_contact_ratio1'] = df['num_connections_with_reverse_contacts1'] / df['num_contacts_with_reverse_contacts1'].map(lambda x : max(1,x)) 140 | df['reversed_conection_to_contact_ratio2'] = df['num_connections_with_reverse_contacts2'] / df['num_contacts_with_reverse_contacts2'].map(lambda x : max(1,x)) 141 | 142 | df['page_rank_1'] = df['node1_id'].map(pg_ranks).fillna(-1) 143 | df['page_rank_2'] = df['node2_id'].map(pg_ranks).fillna(-1) 144 | df['avg_neighbors_1_directed'] = df['node1_id'].map(avg_neighbors).fillna(-1) 145 | df['avg_neighbors_2_directed'] = df['node2_id'].map(avg_neighbors).fillna(-1) 146 | df['avg_neighbors_1_undirected'] = df['node1_id'].map(avg_neighbors_undirected).fillna(-1) 147 | df['avg_neighbors_2_undirected'] = df['node2_id'].map(avg_neighbors_undirected).fillna(-1) 148 | df['page_rank_diff'] = df['page_rank_1'] - df['page_rank_2'] 149 | df['avg_neighbors_directed_diff'] = df['avg_neighbors_1_directed'] - df['avg_neighbors_2_directed'] 150 | df['avg_neighbors_undirected_diff'] = df['avg_neighbors_1_undirected'] - df['avg_neighbors_2_undirected'] 151 | 152 | 153 | df['avg_node2_from_count'] = df[['node1_id', 'num_contacts_to_node1']].groupby('node1_id').transform('mean') 154 | df['avg_node1_from_count'] = df[['node2_id', 'num_contacts_from_node2']].groupby('node2_id').transform('mean') 155 | df['avg_node2_to_count'] = df[['node1_id', 'num_contacts_to_node1']].groupby('node1_id').transform('mean') 156 | df['avg_node1_to_count'] = df[['node2_id', 'num_contacts_to_node2']].groupby('node2_id').transform('mean') 157 | 158 | node1_connections = graph_df[graph_df.is_chat == 1]['node1_id'].value_counts().to_dict() 159 | node2_connections = graph_df[graph_df.is_chat == 1]['node2_id'].value_counts().to_dict() 160 | 161 | 162 | df['node1_connection_from_count'] = df['node1_id'].map(node1_connections) 163 | df['node2_connection_from_count'] = df['node2_id'].map(node1_connections) 164 | df['node1_connection_to_count'] = df['node1_id'].map(node2_connections) 165 | df['node2_connection_to_count'] = df['node2_id'].map(node2_connections) 166 | 167 | df['node_connection_from_sum'] = df['node1_connection_from_count'] + df['node2_connection_from_count'] 168 | df['node_connection_to_sum'] = df['node1_connection_to_count'] + df['node2_connection_to_count'] 169 | 170 | df['node1_connection_from_percentage'] = df['node1_connection_from_count'] / df['num_contacts_from_node1'] 171 | df['node2_connection_from_percentage'] = df['node2_connection_from_count'] / df['num_contacts_from_node2'] 172 | df['node1_connection_to_percentage'] = df['node1_connection_to_count'] / df['num_contacts_to_node1'] 173 | df['node2_connection_to_percentage'] = df['node2_connection_to_count'] / df['num_contacts_to_node2'] 174 | 175 | df['node_connection_from_diff'] = df['node1_connection_from_count'] - df['node2_connection_from_count'] 176 | df['node_connection_to_diff'] = df['node1_connection_to_count'] - df['node2_connection_to_count'] 177 | 178 | df['avg_node2_connection_from_count'] = df[['node1_id', 'node2_connection_from_count']].groupby('node1_id').transform('mean') 179 | df['avg_node1_connection_from_count'] = df[['node2_id', 'node1_connection_from_count']].groupby('node2_id').transform('mean') 180 | df['avg_node2_connection_to_count'] = df[['node1_id', 'node2_connection_to_count']].groupby('node1_id').transform('mean') 181 | df['avg_node1_connection_to_count'] = df[['node2_id', 'node1_connection_to_count']].groupby('node2_id').transform('mean') 182 | 183 | def get_num_common_neighbors(nodes, g): 184 | (u,v) = nodes 185 | try: 186 | return len(set(g.neighbors(u)).intersection(set(g.neighbors(v)))) 187 | except: 188 | return 0 189 | def get_common_neighbors_similarity(nodes, g): 190 | (u,v) = nodes 191 | try: 192 | return len(set(g.neighbors(u)).intersection(set(g.neighbors(v)))) / len( 193 | set(g.neighbors(u)).union(set(g.neighbors(v)))) 194 | except: 195 | return 0 196 | 197 | def get_shortest_path(nodes, g): 198 | (u,v) = nodes 199 | try: 200 | return nx.shortest_path_length(g, u,v) 201 | except: 202 | return 1e5 203 | 204 | df['num_common_neighbors_directed'] = [get_num_common_neighbors((row.node1_id, row.node2_id), user_graph_directed) for row in df[['node1_id', 'node2_id']].itertuples()] 205 | df['num_common_neighbors_undirected'] = [get_num_common_neighbors((row.node1_id, row.node2_id), user_graph_undirected) for row in df[['node1_id', 'node2_id']].itertuples()] 206 | df['common_neighbors_similarity_directed'] = [get_common_neighbors_similarity((row.node1_id, row.node2_id), user_graph_directed) for row in df[['node1_id', 'node2_id']].itertuples()] 207 | df['common_neighbors_similarity_undirected'] = [get_common_neighbors_similarity((row.node1_id, row.node2_id), user_graph_undirected) for row in df[['node1_id', 'node2_id']].itertuples()] 208 | df['shortest_path_length_directed'] = [get_shortest_path((row.node1_id, row.node2_id), user_graph_directed) for row in df[['node1_id', 'node2_id']].itertuples()] 209 | df['shortest_path_length_undirected'] = [get_shortest_path((row.node1_id, row.node2_id), user_graph_undirected) for row in df[['node1_id', 'node2_id']].itertuples()] 210 | 211 | 212 | df = df.merge(user_features.rename(columns = {'node_id' : 'node1_id'}), on = 'node1_id', how = 'left').merge( 213 | user_features.rename(columns = {'node_id' : 'node2_id'}), on = 'node2_id', how = 'left').fillna(-1) 214 | 215 | def cosine_similarity(vec1, vec2): 216 | vec1_norm = vec1 * vec1 217 | vec1_norm = np.sqrt(vec1_norm.sum(axis = 1)) 218 | vec2_norm = vec2 * vec2 219 | vec2_norm = np.sqrt(vec2_norm.sum(axis = 1)) 220 | cossim = vec1 * vec2 / (vec1_norm * vec2_norm)[:, None] 221 | return cossim.sum(axis = 1) 222 | 223 | df['cossim'] = cosine_similarity(df[['f{}_x'.format(i) for i in range(1,14)]].values, df[['f{}_y'.format(i) for i in range(1,14)]].values) 224 | 225 | feats = [12, 9, 3, 13,6] 226 | for i in range(len(feats)): 227 | for j in range(i, len(feats)): 228 | if i != j: 229 | df["f{}_x_minus_f{}_x".format(feats[i],feats[j])] = (df["f{}_x".format(feats[i])] - df["f{}_x".format(feats[j])]).astype(np.int8) 230 | df["f{}_y_minus_f{}_y".format(feats[i],feats[j])] = (df["f{}_y".format(feats[i])] - df["f{}_y".format(feats[j])]).astype(np.int8) 231 | 232 | df['f_sum_x'] = df[['f{}_x'.format(i) for i in range(1,14)]].sum(axis = 1) 233 | df['f_min_x'] = df[['f{}_x'.format(i) for i in range(1,14)]].min(axis = 1) 234 | df['f_max_x'] = df[['f{}_x'.format(i) for i in range(1,14)]].max(axis = 1) 235 | df['f_mean_x'] = df[['f{}_x'.format(i) for i in range(1,14)]].mean(axis = 1) 236 | df['f_sum_y'] = df[['f{}_y'.format(i) for i in range(1,14)]].sum(axis = 1) 237 | df['f_min_y'] = df[['f{}_y'.format(i) for i in range(1,14)]].min(axis = 1) 238 | df['f_max_y'] = df[['f{}_y'.format(i) for i in range(1,14)]].max(axis = 1) 239 | df['f_mean_y'] = df[['f{}_y'.format(i) for i in range(1,14)]].mean(axis = 1) 240 | 241 | for i in range(1,14): 242 | df["f{}_x_minus_f{}_y".format(i,i)] = (df['f{}_x'.format(i)] - df['f{}_y'.format(i)]).astype(np.int8) 243 | print("Total time taken is:", time.time() - start_time) 244 | return df 245 | 246 | i = int(sys.argv[1]) 247 | 248 | if i < 0: 249 | create_features(i).to_csv("../Data/test_features.csv", index = False) 250 | else: 251 | create_features(i).to_csv("../Data/train_features_fold_{}.csv".format(i), index = False) -------------------------------------------------------------------------------- /2019/2019-05-18/code/code/prepare_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | for i in -1 0 1 2 3 4 5 6 7 8 9; 4 | do 5 | echo $i; 6 | python prepare_data.py $i; 7 | done; 8 | -------------------------------------------------------------------------------- /2019/2019-05-18/code/code/readme.txt: -------------------------------------------------------------------------------- 1 | 1. Run the prepare_data.sh. Ensure that the train.csv, test.csv, user_features.csv is located at ../Data folder and prepare_data.py in the current folder. 2 | 2. Run train.py (it will take apprx 200 GB RAM) -------------------------------------------------------------------------------- /2019/2019-05-18/code/code/train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import lightgbm as lgb 4 | 5 | dev = pd.concat([pd.read_csv("../Data/train_features_fold_{}.csv".format(i)) for i in range(9)]).reset_index(drop = True) 6 | val = pd.read_csv("../Data/train_features_fold_9.csv") 7 | test = pd.read_csv("../Data/test_features.csv") 8 | 9 | indep_vars = list(dev.columns)[3:] 10 | 11 | params = { 12 | 'task': 'train', 13 | 'boosting_type': 'gbdt', 14 | 'objective':'binary', 15 | 'metric': {'auc'}, 16 | 'num_leaves': 500, 17 | 'learning_rate': 0.0175, 18 | 'feature_fraction': 0.72, 19 | 'bagging_fraction': 0.75, 20 | 'bagging_freq': 5, 21 | 'verbose': 1, 22 | 'min_data_in_leaf' : 100, 23 | 'max_bin' : 256, 24 | 'lambda_l1' : 0.0025, 25 | 'lambda_l2' : 0.0025, 26 | 'min_gain_to_split' : 0.05, 27 | 'min_sum_hessian_in_leaf': 12.0 28 | } 29 | 30 | 31 | lgb_dev = lgb.Dataset(dev[indep_vars].values.astype(np.float32), dev['is_chat'] ) 32 | lgb_val = lgb.Dataset(val[indep_vars].values.astype(np.float32), val['is_chat'] ) 33 | 34 | model = lgb.train(params, lgb_dev, num_boost_round = 5000, valid_sets = (lgb_dev, lgb_val),early_stopping_rounds = 200, 35 | verbose_eval = 10) 36 | 37 | 38 | pred = model.predict(test[indep_vars].values.astype(np.float32)) 39 | test['is_chat'] = pred 40 | test['is_chat'] = test['is_chat'] 41 | test[['id', 'is_chat']].to_csv("./submission.csv", index = False) -------------------------------------------------------------------------------- /2020/2020-02-29/Linux Essentials.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2020/2020-02-29/Linux Essentials.pptx -------------------------------------------------------------------------------- /2020/2020-02-29/Memory Management in Python.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2020/2020-02-29/Memory Management in Python.pptx -------------------------------------------------------------------------------- /2020/2020-02-29/MetaProgramming In Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# MetaProgramming In Python" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Classes in Python - What is a class in Python?" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "class Test:\n", 24 | " pass\n", 25 | "\n", 26 | "a = Test()\n", 27 | "a" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "type(a)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "type(Test)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "type(type)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "### Classes - Nothing but instances of types. Class technically is a sugar over the native 'type'" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## What is type in Python?" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "type?" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "TestWithType = type('TestWithType', (object,), {})" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "type(TestWithType)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "ins1 = TestWithType()" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "type(ins1)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "type('TestWithType', (object,), {})()" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "### 'type' is an important native structure used for creating classes." 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "## Life Cycle involved in a class - Vanilla" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "class TestClass:\n", 146 | "\n", 147 | " def __new__(cls, *args, **kwargs):\n", 148 | " print('new method called')\n", 149 | " instance = super(TestClass, cls).__new__(cls, *args, **kwargs)\n", 150 | " return instance\n", 151 | "\n", 152 | " def __call__(self, a, b, c):\n", 153 | " self.call_count += 1\n", 154 | " print('call method called')\n", 155 | " return a * b * c\n", 156 | "\n", 157 | " def __init__(self):\n", 158 | " self.call_count = 0\n", 159 | " super(TestClass, self).__init__()\n", 160 | " print('init method called')\n", 161 | " \n", 162 | " def get_call_count(self):\n", 163 | " return self.call_count" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "a = TestClass()" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "a(1,2,3)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "a.get_call_count()\n" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "### What is type? 'type' defines how a class behaves in Python. \n", 198 | "\n", 199 | "### Got it. Well then - Can I change 'how' a class behaves in Python? - MetaClasses\n" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "## Metaclasses" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "class MySingletonMeta(type):\n", 216 | " _instances = {}\n", 217 | " \n", 218 | " def __call__(cls, *args, **kwargs):\n", 219 | " if cls not in cls._instances:\n", 220 | " cls._instances[cls] = super(MySingletonMeta, cls).__call__(*args)\n", 221 | " return cls._instances[cls]" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "class MySingletonClass(metaclass=MySingletonMeta):\n", 231 | " def __init__(self):\n", 232 | " self.i = 1" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "a = MySingletonClass()\n", 242 | "b = MySingletonClass()" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "type(a), id(a) , type(b), id(b)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "## LifeCycle with Metaclasses" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "class MyMetaClass(type):\n", 268 | " \n", 269 | " _test_attribute = 1\n", 270 | "\n", 271 | " def __new__(cls, *args, **kwargs):\n", 272 | " print(\"metaclass new method called\")\n", 273 | " return super(MyMetaClass, cls).__new__(cls, *args, **kwargs)\n", 274 | " \n", 275 | " def __call__(cls, *args, **kwargs):\n", 276 | " print(\"metaclass call method called\")\n", 277 | " return super(MyMetaClass, cls).__call__(*args, **kwargs)\n", 278 | "\n", 279 | " def __init__(self, *args, **kwargs):\n", 280 | " print(\"metaclass init method called\")\n", 281 | " return super(MyMetaClass, self).__init__(*args, **kwargs)\n", 282 | " \n", 283 | " def test_method_1(self):\n", 284 | " print(\"MyMetaClass - Test method 1 called\")" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "class MyClass(metaclass=MyMetaClass):\n", 294 | " def __new__(cls, *args, **kwargs):\n", 295 | " print(\"instance new method called\")\n", 296 | " return super(MyClass, cls).__new__(cls, *args, **kwargs)\n", 297 | " \n", 298 | " def __init__(self, *args, **kwargs):\n", 299 | " print(\"instance init method called\")\n", 300 | " return super(MyClass, self).__init__(*args, **kwargs)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": [ 309 | "ins2 = MyClass()" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "MyClass._test_attribute" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "MyClass.__mro__" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": null, 333 | "metadata": {}, 334 | "outputs": [], 335 | "source": [ 336 | "MyMetaClass.__mro__" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": null, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "## Pattern 1 : Abstract Classes" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "metadata": {}, 357 | "outputs": [], 358 | "source": [ 359 | "from abc import ABCMeta, ABC, abstractmethod" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "metadata": {}, 366 | "outputs": [], 367 | "source": [ 368 | "ABCMeta?" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "metadata": {}, 375 | "outputs": [], 376 | "source": [ 377 | "class MyAbstractClass(metaclass=ABCMeta):\n", 378 | " def __init__(self):\n", 379 | " pass\n", 380 | "\n", 381 | " @abstractmethod\n", 382 | " def my_abstract_method(self):\n", 383 | " pass" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [ 392 | "MyAbstractClass()" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "class MyChildClass(MyAbstractClass):\n", 402 | " \n", 403 | " def __init__(self):\n", 404 | " pass\n", 405 | " \n", 406 | " def my_abstract_method(self):\n", 407 | " pass\n" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": null, 413 | "metadata": {}, 414 | "outputs": [], 415 | "source": [ 416 | "mcc = MyChildClass()\n", 417 | "mcc" 418 | ] 419 | }, 420 | { 421 | "cell_type": "markdown", 422 | "metadata": {}, 423 | "source": [ 424 | "## Pattern 2 : Abstract family of singleton classes - Combine two metaclasses" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": {}, 431 | "outputs": [], 432 | "source": [ 433 | "class MySingletonABCMeta(ABCMeta):\n", 434 | " _instances = {}\n", 435 | "\n", 436 | " def __call__(cls, *args, **kwargs):\n", 437 | " if cls not in cls._instances:\n", 438 | " cls._instances[cls] = super(MySingletonABCMeta, cls).__call__(*args)\n", 439 | " return cls._instances[cls]" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": null, 445 | "metadata": {}, 446 | "outputs": [], 447 | "source": [ 448 | "class MyAbstractSingletonClass(metaclass=MySingletonABCMeta):\n", 449 | " def __init__(self):\n", 450 | " pass\n", 451 | "\n", 452 | " @abstractmethod\n", 453 | " def my_abstract_method(self):\n", 454 | " pass" 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [ 463 | "MyAbstractSingletonClass()" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": null, 469 | "metadata": {}, 470 | "outputs": [], 471 | "source": [ 472 | "class MyAbstractSingletonChild(MyAbstractSingletonClass):\n", 473 | " def __init__(self):\n", 474 | " pass\n", 475 | " \n", 476 | " def my_abstract_method(self):\n", 477 | " pass" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": null, 483 | "metadata": {}, 484 | "outputs": [], 485 | "source": [ 486 | "a1 = MyAbstractSingletonChild()\n", 487 | "b1 = MyAbstractSingletonChild()" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": null, 493 | "metadata": {}, 494 | "outputs": [], 495 | "source": [ 496 | "type(a1), id(a1), type(b1), id(b1)" 497 | ] 498 | }, 499 | { 500 | "cell_type": "markdown", 501 | "metadata": {}, 502 | "source": [ 503 | "## Pattern 3 : Pooled Objects" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": null, 509 | "metadata": {}, 510 | "outputs": [], 511 | "source": [ 512 | "class MyBeanMeta(type):\n", 513 | " _instances = {}\n", 514 | "\n", 515 | " def __call__(cls, *args):\n", 516 | " print(args)\n", 517 | " key = tuple((cls, args))\n", 518 | " if key not in cls._instances:\n", 519 | " cls._instances[key] = super(MyBeanMeta, cls).__call__(*args)\n", 520 | " return cls._instances[key]" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": null, 526 | "metadata": {}, 527 | "outputs": [], 528 | "source": [ 529 | "class MyBeanClass(metaclass=MyBeanMeta):\n", 530 | " def __init__(self, a ):\n", 531 | " self.a = a" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": null, 537 | "metadata": {}, 538 | "outputs": [], 539 | "source": [ 540 | "bn1 = MyBeanClass(1)\n", 541 | "bn2 = MyBeanClass(2)\n", 542 | "bn3 = MyBeanClass(3)\n", 543 | "bn4 = MyBeanClass(1)" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": null, 549 | "metadata": {}, 550 | "outputs": [], 551 | "source": [ 552 | "id(bn1), id(bn2), id(bn3), id(bn4)" 553 | ] 554 | }, 555 | { 556 | "cell_type": "markdown", 557 | "metadata": {}, 558 | "source": [ 559 | "## Pattern 4 : Logging using Metaclasses" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": 1, 565 | "metadata": {}, 566 | "outputs": [], 567 | "source": [ 568 | "import logging\n", 569 | "\n", 570 | "logging.basicConfig(filename='example.log', level=logging.INFO)\n", 571 | "logging.debug('This message should go to the log file')\n", 572 | "logging.info('So should this')\n", 573 | "logging.warning('And this, too')\n", 574 | "\n", 575 | "\n", 576 | "class MyLogSingletonMeta(type):\n", 577 | " logger = logging.getLogger('abc')\n", 578 | "\n", 579 | " _instances = {}\n", 580 | "\n", 581 | " def __call__(cls, *args, **kwargs):\n", 582 | " if cls not in cls._instances:\n", 583 | " instance = super(MyLogSingletonMeta, cls).__call__(*args)\n", 584 | " cls._instances[cls] = instance\n", 585 | "\n", 586 | " instance.__dict__['logger'] = logging.getLogger('abc')\n", 587 | " return cls._instances[cls]\n", 588 | "\n", 589 | "\n", 590 | "class MyLogEnabledClass(metaclass=MyLogSingletonMeta):\n", 591 | " def test_function(self):\n", 592 | " self.logger.info('Inside test_function method of Log Enabled class')\n", 593 | " pass " 594 | ] 595 | }, 596 | { 597 | "cell_type": "code", 598 | "execution_count": 2, 599 | "metadata": {}, 600 | "outputs": [ 601 | { 602 | "name": "stdout", 603 | "output_type": "stream", 604 | "text": [ 605 | "4453879696 4453879696\n" 606 | ] 607 | } 608 | ], 609 | "source": [ 610 | "lec_instance1 = MyLogEnabledClass()\n", 611 | "lec_instance2 = MyLogEnabledClass()\n", 612 | "lec_instance1.test_function()\n", 613 | "\n", 614 | "print(id(lec_instance1), id(lec_instance2))\n", 615 | "\n" 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": 3, 621 | "metadata": {}, 622 | "outputs": [ 623 | { 624 | "name": "stdout", 625 | "output_type": "stream", 626 | "text": [ 627 | "INFO:root:So should this\r\n", 628 | "WARNING:root:And this, too\r\n", 629 | "INFO:abc:Inside test_function method of Log Enabled class\r\n" 630 | ] 631 | } 632 | ], 633 | "source": [ 634 | "!cat example.log" 635 | ] 636 | }, 637 | { 638 | "cell_type": "code", 639 | "execution_count": 4, 640 | "metadata": {}, 641 | "outputs": [], 642 | "source": [ 643 | "class MyLogger: \n", 644 | " def __init__(self, logger=None):\n", 645 | " self.logger = logger\n", 646 | " \n", 647 | " def __call__(self, func):\n", 648 | " def wrapper(*args, **kwargs):\n", 649 | " if self.logger is None:\n", 650 | " print(str(func) + \" is called\")\n", 651 | " else:\n", 652 | " self.logger.info(str(func) + \" is called\")\n", 653 | " return func(*args, **kwargs)\n", 654 | " return wrapper \n", 655 | "\n", 656 | "class MyLoggingMeta(type):\n", 657 | " \n", 658 | " def __new__(cls, name, bases, attrs): \n", 659 | " for item, value in attrs.items():\n", 660 | " if callable(value):\n", 661 | " print(\"Function item :\" + str(item), str(value), type(value))\n", 662 | " attrs[item] = MyLogger()(value)\n", 663 | " else: \n", 664 | " print(str(item), str(value), type(value))\n", 665 | " return super(MyLoggingMeta, cls).__new__(cls, name, bases, attrs)" 666 | ] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "execution_count": 5, 671 | "metadata": {}, 672 | "outputs": [ 673 | { 674 | "name": "stdout", 675 | "output_type": "stream", 676 | "text": [ 677 | "__module__ __main__ \n", 678 | "__qualname__ MyClass1 \n", 679 | "Function item :test_m1 \n", 680 | "Function item :test_m2 \n" 681 | ] 682 | } 683 | ], 684 | "source": [ 685 | "class MyClass1(metaclass=MyLoggingMeta):\n", 686 | " def test_m1(self):\n", 687 | " pass\n", 688 | " \n", 689 | " def test_m2(self):\n", 690 | " pass" 691 | ] 692 | }, 693 | { 694 | "cell_type": "code", 695 | "execution_count": 6, 696 | "metadata": {}, 697 | "outputs": [], 698 | "source": [ 699 | "a= MyClass1()" 700 | ] 701 | }, 702 | { 703 | "cell_type": "code", 704 | "execution_count": 7, 705 | "metadata": {}, 706 | "outputs": [ 707 | { 708 | "name": "stdout", 709 | "output_type": "stream", 710 | "text": [ 711 | " is called\n" 712 | ] 713 | } 714 | ], 715 | "source": [ 716 | "a.test_m2()" 717 | ] 718 | }, 719 | { 720 | "cell_type": "code", 721 | "execution_count": 8, 722 | "metadata": {}, 723 | "outputs": [ 724 | { 725 | "name": "stdout", 726 | "output_type": "stream", 727 | "text": [ 728 | " is called\n" 729 | ] 730 | } 731 | ], 732 | "source": [ 733 | "a.test_m1()" 734 | ] 735 | }, 736 | { 737 | "cell_type": "markdown", 738 | "metadata": {}, 739 | "source": [ 740 | "## Pattern 5 : Sealed classes" 741 | ] 742 | }, 743 | { 744 | "cell_type": "code", 745 | "execution_count": null, 746 | "metadata": {}, 747 | "outputs": [], 748 | "source": [ 749 | "class MySealedMeta(type):\n", 750 | " \n", 751 | " def __new__(cls, name, bases, attrs):\n", 752 | " all_metaclasses = [type(x) for x in bases]\n", 753 | " if MySealedMeta in all_metaclasses:\n", 754 | " raise TypeError(\"Sealed class cannot be sublcassed\")\n", 755 | " return super(MySealedMeta, cls).__new__(cls, name, bases, attrs)\n", 756 | " " 757 | ] 758 | }, 759 | { 760 | "cell_type": "code", 761 | "execution_count": null, 762 | "metadata": {}, 763 | "outputs": [], 764 | "source": [ 765 | "class MySealedClass(metaclass=MySealedMeta):\n", 766 | " pass" 767 | ] 768 | }, 769 | { 770 | "cell_type": "code", 771 | "execution_count": null, 772 | "metadata": {}, 773 | "outputs": [], 774 | "source": [ 775 | "class MyChildOfSealed(MySealedClass):\n", 776 | " pass" 777 | ] 778 | }, 779 | { 780 | "cell_type": "code", 781 | "execution_count": null, 782 | "metadata": {}, 783 | "outputs": [], 784 | "source": [] 785 | } 786 | ], 787 | "metadata": { 788 | "kernelspec": { 789 | "display_name": "Python 3", 790 | "language": "python", 791 | "name": "python3" 792 | }, 793 | "language_info": { 794 | "codemirror_mode": { 795 | "name": "ipython", 796 | "version": 3 797 | }, 798 | "file_extension": ".py", 799 | "mimetype": "text/x-python", 800 | "name": "python", 801 | "nbconvert_exporter": "python", 802 | "pygments_lexer": "ipython3", 803 | "version": "3.7.4" 804 | } 805 | }, 806 | "nbformat": 4, 807 | "nbformat_minor": 2 808 | } 809 | -------------------------------------------------------------------------------- /2020/2020-02-29/MetaProgramming in Python.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2020/2020-02-29/MetaProgramming in Python.pptx -------------------------------------------------------------------------------- /2020/2020-02-29/Python.and.Netflix_Meetup.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2020/2020-02-29/Python.and.Netflix_Meetup.pptx -------------------------------------------------------------------------------- /2020/2020-02-29/Writing your own container in Python.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2020/2020-02-29/Writing your own container in Python.pptx -------------------------------------------------------------------------------- /2023/13- May-2023/Datafication of Indian judicial texts using Natural Language Processing (NLP).pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2023/13- May-2023/Datafication of Indian judicial texts using Natural Language Processing (NLP).pptx -------------------------------------------------------------------------------- /2023/13- May-2023/Meetup_details.txt: -------------------------------------------------------------------------------- 1 | 13 May Meetup details 2 | -------------------------------------------------------------------------------- /2023/readme.txt: -------------------------------------------------------------------------------- 1 | Hydpy - Meetup 2023 2 | -------------------------------------------------------------------------------- /2024/17-Feb-2024/Getting started with asyncio.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2024/17-Feb-2024/Getting started with asyncio.pptx -------------------------------------------------------------------------------- /2024/17-Feb-2024/HYD MEETUP.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2024/17-Feb-2024/HYD MEETUP.pdf -------------------------------------------------------------------------------- /2024/17-Feb-2024/readme.txt: -------------------------------------------------------------------------------- 1 | PPT for the talks 2 | -------------------------------------------------------------------------------- /2024/April Meetup/Sharding Using Postgres FDW.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2024/April Meetup/Sharding Using Postgres FDW.pdf -------------------------------------------------------------------------------- /2024/April Meetup/readme.md: -------------------------------------------------------------------------------- 1 | PPT for April Month Meetup 2 | -------------------------------------------------------------------------------- /2024/March Meetup /readme.md: -------------------------------------------------------------------------------- 1 | PPT for March month Meetup 2 | -------------------------------------------------------------------------------- /2024/May Meetup/The Guide to building Indic LLMs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2024/May Meetup/The Guide to building Indic LLMs.pdf -------------------------------------------------------------------------------- /2024/May Meetup/readme.md: -------------------------------------------------------------------------------- 1 | PPT of May Month Talks 2 | -------------------------------------------------------------------------------- /2024/October Meetup/Ensuring Data Quality in Web Scraping with Data Contracts/Data_Quality_With_Contracts.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2024/October Meetup/Ensuring Data Quality in Web Scraping with Data Contracts/Data_Quality_With_Contracts.pptx -------------------------------------------------------------------------------- /2024/October Meetup/Ensuring Data Quality in Web Scraping with Data Contracts/contracts_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | ETL Pipeline for Cricket Data Extraction, Transformation, and Validation 3 | 4 | Author: Dayananda Challa 5 | Date: 19-10-2023 6 | Version: 1.0 7 | 8 | This script defines an ETL (Extract, Transform, Load) pipeline that scrapes cricket player statistics 9 | from a specified URL, validates the data using SODA checks, and transforms the data for further analysis. 10 | 11 | Classes: 12 | 1. DataExtractor: 13 | - Responsible for extracting data from a webpage using Selenium. 14 | - Retrieves player statistics from a table and converts it into a Pandas DataFrame. 15 | 16 | 2. DataTransformer: 17 | - Handles the transformation of the extracted data. 18 | - Calculates additional metrics such as the Century Conversion Rate. 19 | 20 | 3. DataValidator: 21 | - Validates the extracted DataFrame against a set of predefined checks using SODA. 22 | - Ensures the integrity and completeness of the data before transformation. 23 | 24 | 4. ETLPipeline: 25 | - Orchestrates the entire ETL process. 26 | - Manages the flow of data extraction, validation, and transformation. 27 | 28 | Dependencies: 29 | - Selenium: For web scraping dynamic content from web pages. 30 | - Pandas: For data manipulation and analysis. 31 | - PyYAML: For parsing YAML files. 32 | - Dask: For parallel computing with Pandas. 33 | - SODA: For data validation checks. 34 | 35 | Usage: 36 | - Define the URL of the webpage containing the data. 37 | - Instantiate the ETLPipeline with the URL and call the `run()` method to execute the pipeline. 38 | - The final transformed DataFrame will be printed to the console. 39 | 40 | Note: Ensure the required dependencies are installed and the Selenium WebDriver is set up correctly before running the script. 41 | """ 42 | 43 | import pandas as pd 44 | import yaml 45 | from soda.scan import Scan # Import Soda's Scan functionality 46 | from selenium import webdriver 47 | from selenium.webdriver.common.by import By 48 | from selenium.webdriver.chrome.options import Options 49 | import time 50 | import dask 51 | 52 | # Disable automatic string conversion in Dask to avoid potential issues 53 | dask.config.set({"dataframe.convert-string": False}) 54 | 55 | 56 | # Define the data contract in YAML format for Soda checks 57 | data_contract_yaml = """ 58 | checks: 59 | - name: player_check 60 | type: required 61 | column: Player 62 | - name: format_check 63 | type: required 64 | column: Format 65 | - name: runs_check 66 | type: required 67 | column: Runs 68 | - name: centuries_check 69 | type: optional 70 | column: Centuries 71 | - name: fifties_check 72 | type: optional 73 | column: Fifties 74 | """ 75 | 76 | # Load the Soda checks from the YAML 77 | data_contract = yaml.safe_load(data_contract_yaml) 78 | 79 | class DataExtractor: 80 | """Class for extracting data from a specified URL using Selenium.""" 81 | def __init__(self, url): 82 | """Initialize with the URL to scrape.""" 83 | self.url = url 84 | 85 | def extract(self): 86 | """Extract data from the webpage and return it as a DataFrame.""" 87 | # Set up Selenium WebDriver in headless mode (no GUI) 88 | chrome_options = Options() 89 | chrome_options.add_argument("--headless") # Run Chrome in headless mode 90 | chrome_options.add_argument("--no-sandbox") 91 | chrome_options.add_argument("--disable-dev-shm-usage") 92 | 93 | # Initialize the driver with Chrome options 94 | driver = webdriver.Chrome(options=chrome_options) 95 | driver.get(self.url) 96 | time.sleep(5) # Wait for the page to load completely 97 | 98 | # Find all table rows on the page 99 | rows = driver.find_elements(By.XPATH, '//table//tr') 100 | 101 | data = [] 102 | for row in rows[1:]: # Skip the header 103 | cols = row.find_elements(By.XPATH, './/td') 104 | if len(cols) >= 5: # Ensure there are enough columns 105 | data.append({ 106 | 'Player': cols[0].text.strip(), 107 | 'Format': cols[1].text.strip(), 108 | 'Runs': int(cols[2].text.strip().replace('*','').replace(',', '')), 109 | 'Centuries': int(cols[3].text.strip()) if cols[3].text.strip() else 0, 110 | 'Fifties': int(cols[4].text.strip()) if cols[4].text.strip() else 0, 111 | }) 112 | # Close the browser 113 | driver.quit() 114 | # Return the data as a DataFrame 115 | return pd.DataFrame(data) 116 | 117 | class DataTransformer: 118 | """Class for transforming the extracted data.""" 119 | def __init__(self, data): 120 | """Initialize with the DataFrame to transform.""" 121 | self.data = data 122 | 123 | def transform(self): 124 | """Perform data transformations and return the modified DataFrame.""" 125 | # Calculate century conversion rate 126 | self.data['Century Conversion Rate'] = ( 127 | self.data['Centuries'] / (self.data['Centuries'] + self.data['Fifties'].replace(0, pd.NA)) 128 | ).fillna(0).round(4) # Fill NaN values with 0 and round to 4 decimal places 129 | return self.data 130 | 131 | class DataValidator: 132 | """Class for validating data using SODA checks.""" 133 | def __init__(self, data, data_contract): 134 | """Initialize with the DataFrame to validate.""" 135 | self.data = data 136 | self.data_contract = data_contract 137 | 138 | def validate(self): 139 | """Validate the DataFrame against defined checks and print results.""" 140 | # Initialize the SODA scan 141 | scan = Scan() 142 | 143 | # Define SODA checks for data validation 144 | sodacl_checks = """ 145 | checks for df: 146 | - missing_count(Player) = 0 147 | - missing_count(Format) = 0 148 | - missing_count(Runs) = 0 149 | - max(Centuries) >= 0 150 | - min(Fifties) >= 0 151 | """ 152 | 153 | # Add checks to the scan 154 | scan.add_sodacl_yaml_str(sodacl_checks) 155 | # Add the DataFrame to the scan for validation 156 | scan.add_pandas_dataframe("df", self.data) # Pass the DataFrame to Soda for validation 157 | 158 | # Execute the scan 159 | scan.execute() 160 | 161 | # Retrieve the results of the scan 162 | scan_result = scan.get_scan_results() 163 | 164 | print(scan_result) 165 | 166 | class ETLPipeline: 167 | """Class representing the ETL pipeline for extracting, validating, and transforming data.""" 168 | def __init__(self, url, data_contract): 169 | """Initialize with the URL to extract data from.""" 170 | self.url = url 171 | self.data_contract = data_contract 172 | self.extractor = DataExtractor(url) 173 | self.transformer = None 174 | self.validator = None 175 | 176 | def run(self): 177 | """Run the ETL pipeline: extract, validate, and transform data.""" 178 | # Extract data from the specified URL 179 | data = self.extractor.extract() 180 | 181 | # Validate the extracted data 182 | self.validator = DataValidator(data, self.data_contract) 183 | self.validator.validate() 184 | 185 | # Transform the validated data 186 | self.transformer = DataTransformer(data) 187 | transformed_data = self.transformer.transform() 188 | 189 | return transformed_data 190 | 191 | if __name__ == "__main__": 192 | # Define the URL to scrape data from 193 | url = "https://www.espncricinfo.com/records/most-runs-in-career-223646" 194 | 195 | # Create an instance of the ETLPipeline with the specified URL 196 | etl_pipeline = ETLPipeline(url, data_contract) 197 | 198 | # Run the ETL pipeline and get the final transformed data 199 | final_data = etl_pipeline.run() 200 | 201 | # Print the final transformed data 202 | print(final_data) 203 | -------------------------------------------------------------------------------- /2024/October Meetup/Ensuring Data Quality in Web Scraping with Data Contracts/without_data_contracts.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import pandas as pd 4 | import yaml 5 | 6 | from selenium import webdriver 7 | from selenium.webdriver.common.by import By 8 | from selenium.webdriver.chrome.options import Options 9 | import time 10 | 11 | # Data contract definition 12 | data_contract = { 13 | 'fields': [ 14 | {'name': 'Player', 'required': True}, 15 | {'name': 'Format', 'required': True}, 16 | {'name': 'Runs', 'required': True}, 17 | {'name': 'Centuries', 'required': False}, 18 | {'name': 'Fifties', 'required': False}, 19 | ] 20 | } 21 | 22 | 23 | 24 | class DataExtractor: 25 | def __init__(self, url): 26 | self.url = url 27 | 28 | def extract(self): 29 | # Set up Selenium WebDriver in headless mode 30 | chrome_options = Options() 31 | chrome_options.add_argument("--headless") # Run Chrome in headless mode 32 | chrome_options.add_argument("--no-sandbox") 33 | chrome_options.add_argument("--disable-dev-shm-usage") 34 | 35 | # Initialize the driver with Chrome options 36 | driver = webdriver.Chrome(options=chrome_options) 37 | driver.get(self.url) 38 | time.sleep(5) # Wait for the page to load completely 39 | 40 | # XPath to find the table rows 41 | rows = driver.find_elements(By.XPATH, '//table//tr') 42 | 43 | data = [] 44 | for row in rows[1:]: # Skip the header 45 | cols = row.find_elements(By.XPATH, './/td') 46 | if len(cols) >= 5: # Ensure there are enough columns 47 | data.append({ 48 | 'Player': cols[0].text.strip(), 49 | 'Format': cols[1].text.strip(), 50 | 'Runs': int(cols[2].text.strip().replace('*','').replace(',', '')), 51 | 'Centuries': int(cols[3].text.strip()) if cols[3].text.strip() else 0, 52 | 'Fifties': int(cols[4].text.strip()) if cols[4].text.strip() else 0, 53 | }) 54 | driver.quit() 55 | return pd.DataFrame(data) 56 | 57 | class DataTransformer: 58 | def __init__(self, data): 59 | self.data = data 60 | 61 | def transform(self): 62 | # Calculate century conversion rate 63 | self.data['Century Conversion Rate'] = ( 64 | self.data['Centuries'] / (self.data['Centuries'] + self.data['Fifties'].replace(0, pd.NA)) 65 | ).fillna(0).round(4) 66 | return self.data 67 | 68 | class DataValidator: 69 | def __init__(self, data, data_contract): 70 | self.data = data 71 | self.data_contract = data_contract 72 | 73 | def validate(self): 74 | for field in self.data_contract['fields']: 75 | if field['required'] and field['name'] not in self.data.columns: 76 | raise ValueError(f"Missing required field: {field['name']}") 77 | if field['name'] in self.data.columns and self.data[field['name']].isnull().any(): 78 | raise ValueError(f"Field {field['name']} contains null values.") 79 | 80 | class ETLPipeline: 81 | def __init__(self, url, data_contract): 82 | self.url = url 83 | self.data_contract = data_contract 84 | self.extractor = DataExtractor(url) 85 | self.transformer = None 86 | self.validator = None 87 | 88 | def run(self): 89 | # Extract 90 | data = self.extractor.extract() 91 | 92 | # Validate 93 | self.validator = DataValidator(data, self.data_contract) 94 | self.validator.validate() 95 | 96 | # Transform 97 | self.transformer = DataTransformer(data) 98 | transformed_data = self.transformer.transform() 99 | 100 | return transformed_data 101 | 102 | if __name__ == "__main__": 103 | url = "https://www.espncricinfo.com/records/most-runs-in-career-223646" 104 | etl_pipeline = ETLPipeline(url, data_contract) 105 | final_data = etl_pipeline.run() 106 | print(final_data) 107 | -------------------------------------------------------------------------------- /2024/October Meetup/Intro to GenAI Architecture Modelling.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2024/October Meetup/Intro to GenAI Architecture Modelling.pdf -------------------------------------------------------------------------------- /2024/October Meetup/PandasOptimization/Pandas Optimization - Advanced Techniques.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "12b3d1e6", 6 | "metadata": {}, 7 | "source": [ 8 | "## Pandas Optimization - Advance Techniques\n", 9 | "\n", 10 | "- Chunking\n", 11 | "- Indexing\n", 12 | "- Vector Operations\n", 13 | "- Memory Profiling" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 10, 19 | "id": "8f86bf56", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import warnings\n", 24 | "warnings.filterwarnings('ignore')" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 11, 30 | "id": "2bcdd8cd", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "from memory_profiler import profile\n", 35 | "import pandas as pd\n", 36 | "import numpy as np\n", 37 | "import time\n", 38 | "from memory_profiler import memory_usage" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "id": "2232854f", 44 | "metadata": {}, 45 | "source": [ 46 | "### Chunking\n", 47 | "\n", 48 | "For large datasets, read data in chunks using the chunksize parameter in functions like pd.read_csv(). Process each chunk independently to avoid memory overflow." 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 12, 54 | "id": "e359f773", 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "Memory usage with chunking: 98.83 MiB\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "csv_file_path = 'large_file.csv'\n", 67 | "def read_with_chunking():\n", 68 | " # Measure time for reading with chunking\n", 69 | " start_time = time.time()\n", 70 | " chunks = []\n", 71 | " chunk_size = 100000 # Adjust chunk size as needed\n", 72 | "\n", 73 | " for chunk in pd.read_csv(csv_file_path, chunksize=chunk_size):\n", 74 | " chunks.append(chunk)\n", 75 | "\n", 76 | " df_chunked = pd.concat(chunks, ignore_index=True)\n", 77 | " end_time = time.time()\n", 78 | " \n", 79 | " return df_chunked, end_time - start_time\n", 80 | "\n", 81 | "# Measure memory usage\n", 82 | "mem_usage_with_chunking = memory_usage(read_with_chunking)\n", 83 | "print(f\"Memory usage with chunking: {max(mem_usage_with_chunking) - min(mem_usage_with_chunking):.2f} MiB\")" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "id": "6f6d6876", 89 | "metadata": {}, 90 | "source": [ 91 | "### Indexing\n", 92 | "\n", 93 | "Setting an appropriate index can drastically speed up lookups, joins, and group operations." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 20, 99 | "id": "f640445a", 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "Time taken without indexing: 0.15232062339782715\n", 107 | "Time taken with indexing: 0.06646132469177246\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "import pandas as pd\n", 113 | "import time\n", 114 | "\n", 115 | "# Create large DataFrames\n", 116 | "df1 = pd.DataFrame({'key': range(10000000), 'value1': range(10000000)})\n", 117 | "df2 = pd.DataFrame({'key': range(10000000), 'value2': range(10000000)})\n", 118 | "\n", 119 | "# Merge without indexing\n", 120 | "start_time = time.time()\n", 121 | "merged_df_no_index = pd.merge(df1, df2, on='key')\n", 122 | "end_time = time.time()\n", 123 | "print(\"Time taken without indexing:\", end_time - start_time)\n", 124 | "\n", 125 | "# Merge with indexing\n", 126 | "df1.set_index('key', inplace=True)\n", 127 | "df2.set_index('key', inplace=True)\n", 128 | "\n", 129 | "start_time = time.time()\n", 130 | "merged_df_with_index = pd.merge(df1, df2, left_index=True, right_index=True)\n", 131 | "end_time = time.time()\n", 132 | "print(\"Time taken with indexing:\", end_time - start_time)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "id": "ece48f16", 138 | "metadata": {}, 139 | "source": [ 140 | "### Vectorization\n", 141 | "\n", 142 | "Vectorized operations allow you to perform computations on entire columns or arrays without explicit loops, which can significantly speed up operations." 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 7, 148 | "id": "75fe4df8", 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "Time taken: 0.9784\n", 156 | "Time taken: 0.0007\n" 157 | ] 158 | } 159 | ], 160 | "source": [ 161 | "# Create a DataFrame\n", 162 | "df = pd.DataFrame({\n", 163 | " 'A': np.random.randint(0, 100, size=10000),\n", 164 | " 'B': np.random.randint(0, 100, size=10000)\n", 165 | "})\n", 166 | "\n", 167 | "df['C'] = 0\n", 168 | "start_time = time.time()\n", 169 | "# Use a loop to add the values of columns 'A' and 'B'\n", 170 | "for i in range(len(df)):\n", 171 | " df['C'][i] = df['A'][i] + df['B'][i]\n", 172 | "end_time = time.time()\n", 173 | "\n", 174 | "print(f\"Time taken: {end_time-start_time:.4f}\")\n", 175 | "\n", 176 | "# Vectorized operation: adding two columns\n", 177 | "start_time = time.time()\n", 178 | "df['C'] = df['A'] + df['B']\n", 179 | "end_time = time.time()\n", 180 | "\n", 181 | "print(f\"Time taken: {end_time-start_time:.4f}\")" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 8, 187 | "id": "7eb31a0c", 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "name": "stdout", 192 | "output_type": "stream", 193 | "text": [ 194 | "Time taken: 4.7028\n", 195 | "Time taken: 0.0089\n" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "df = pd.DataFrame({\n", 201 | " 'A': np.random.randint(0, 100, size=10000000), # int64\n", 202 | "})\n", 203 | "\n", 204 | "start_time = time.time()\n", 205 | "df['B'] = df['A'].apply(lambda x: x ** 2)\n", 206 | "end_time = time.time()\n", 207 | "\n", 208 | "print(f\"Time taken: {end_time-start_time:.4f}\")\n", 209 | "\n", 210 | "# vectorization\n", 211 | "start_time = time.time()\n", 212 | "df['B'] = df['A'] ** 2 \n", 213 | "end_time = time.time()\n", 214 | "\n", 215 | "print(f\"Time taken: {end_time-start_time:.4f}\")" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "id": "f6339b0f", 221 | "metadata": {}, 222 | "source": [ 223 | "### Memory Profiling\n", 224 | "\n", 225 | "Use profiling tools like Pandas-Profiling or memory_profiler to identify bottlenecks and memory hogs in your code." 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 9, 231 | "id": "c1496187", 232 | "metadata": {}, 233 | "outputs": [ 234 | { 235 | "name": "stdout", 236 | "output_type": "stream", 237 | "text": [ 238 | "\n", 239 | "A module that was compiled using NumPy 1.x cannot be run in\n", 240 | "NumPy 2.0.1 as it may crash. To support both 1.x and 2.x\n", 241 | "versions of NumPy, modules must be compiled with NumPy 2.0.\n", 242 | "Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.\n", 243 | "\n", 244 | "If you are a user of the module, the easiest solution will be to\n", 245 | "downgrade to 'numpy<2' or try to upgrade the affected module.\n", 246 | "We expect that some modules will need time to support NumPy 2.\n", 247 | "\n", 248 | "Traceback (most recent call last): File \"/opt/anaconda3/lib/python3.9/runpy.py\", line 197, in _run_module_as_main\n", 249 | " return _run_code(code, main_globals, None,\n", 250 | " File \"/opt/anaconda3/lib/python3.9/runpy.py\", line 87, in _run_code\n", 251 | " exec(code, run_globals)\n", 252 | " File \"/opt/anaconda3/lib/python3.9/site-packages/memory_profiler.py\", line 1351, in \n", 253 | " exec_with_profiler(script_filename, prof, args.backend, script_args)\n", 254 | " File \"/opt/anaconda3/lib/python3.9/site-packages/memory_profiler.py\", line 1252, in exec_with_profiler\n", 255 | " exec(compile(f.read(), filename, 'exec'), ns, ns)\n", 256 | " File \"memory.py\", line 1, in \n", 257 | " import pandas as pd\n", 258 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/__init__.py\", line 26, in \n", 259 | " from pandas.compat import (\n", 260 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/compat/__init__.py\", line 27, in \n", 261 | " from pandas.compat.pyarrow import (\n", 262 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/compat/pyarrow.py\", line 8, in \n", 263 | " import pyarrow as pa\n", 264 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pyarrow/__init__.py\", line 65, in \n", 265 | " import pyarrow.lib as _lib\n", 266 | "AttributeError: _ARRAY_API not found\n", 267 | "\n", 268 | "A module that was compiled using NumPy 1.x cannot be run in\n", 269 | "NumPy 2.0.1 as it may crash. To support both 1.x and 2.x\n", 270 | "versions of NumPy, modules must be compiled with NumPy 2.0.\n", 271 | "Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.\n", 272 | "\n", 273 | "If you are a user of the module, the easiest solution will be to\n", 274 | "downgrade to 'numpy<2' or try to upgrade the affected module.\n", 275 | "We expect that some modules will need time to support NumPy 2.\n", 276 | "\n", 277 | "Traceback (most recent call last): File \"/opt/anaconda3/lib/python3.9/runpy.py\", line 197, in _run_module_as_main\n", 278 | " return _run_code(code, main_globals, None,\n", 279 | " File \"/opt/anaconda3/lib/python3.9/runpy.py\", line 87, in _run_code\n", 280 | " exec(code, run_globals)\n", 281 | " File \"/opt/anaconda3/lib/python3.9/site-packages/memory_profiler.py\", line 1351, in \n", 282 | " exec_with_profiler(script_filename, prof, args.backend, script_args)\n", 283 | " File \"/opt/anaconda3/lib/python3.9/site-packages/memory_profiler.py\", line 1252, in exec_with_profiler\n", 284 | " exec(compile(f.read(), filename, 'exec'), ns, ns)\n", 285 | " File \"memory.py\", line 1, in \n", 286 | " import pandas as pd\n", 287 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/__init__.py\", line 49, in \n", 288 | " from pandas.core.api import (\n", 289 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/api.py\", line 9, in \n", 290 | " from pandas.core.dtypes.dtypes import (\n", 291 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py\", line 24, in \n", 292 | " from pandas._libs import (\n", 293 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pyarrow/__init__.py\", line 65, in \n", 294 | " import pyarrow.lib as _lib\n", 295 | "AttributeError: _ARRAY_API not found\n", 296 | "\n", 297 | "A module that was compiled using NumPy 1.x cannot be run in\n", 298 | "NumPy 2.0.1 as it may crash. To support both 1.x and 2.x\n", 299 | "versions of NumPy, modules must be compiled with NumPy 2.0.\n", 300 | "Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.\n", 301 | "\n", 302 | "If you are a user of the module, the easiest solution will be to\n", 303 | "downgrade to 'numpy<2' or try to upgrade the affected module.\n", 304 | "We expect that some modules will need time to support NumPy 2.\n", 305 | "\n", 306 | "Traceback (most recent call last): File \"/opt/anaconda3/lib/python3.9/runpy.py\", line 197, in _run_module_as_main\n", 307 | " return _run_code(code, main_globals, None,\n", 308 | " File \"/opt/anaconda3/lib/python3.9/runpy.py\", line 87, in _run_code\n", 309 | " exec(code, run_globals)\n", 310 | " File \"/opt/anaconda3/lib/python3.9/site-packages/memory_profiler.py\", line 1351, in \n", 311 | " exec_with_profiler(script_filename, prof, args.backend, script_args)\n", 312 | " File \"/opt/anaconda3/lib/python3.9/site-packages/memory_profiler.py\", line 1252, in exec_with_profiler\n", 313 | " exec(compile(f.read(), filename, 'exec'), ns, ns)\n", 314 | " File \"memory.py\", line 1, in \n", 315 | " import pandas as pd\n", 316 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/__init__.py\", line 49, in \n", 317 | " from pandas.core.api import (\n", 318 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/api.py\", line 28, in \n", 319 | " from pandas.core.arrays import Categorical\n", 320 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/__init__.py\", line 1, in \n", 321 | " from pandas.core.arrays.arrow import ArrowExtensionArray\n", 322 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/arrow/__init__.py\", line 5, in \n", 323 | " from pandas.core.arrays.arrow.array import ArrowExtensionArray\n", 324 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py\", line 50, in \n", 325 | " from pandas.core import (\n", 326 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/ops/__init__.py\", line 8, in \n", 327 | " from pandas.core.ops.array_ops import (\n", 328 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/ops/array_ops.py\", line 56, in \n", 329 | " from pandas.core.computation import expressions\n", 330 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/computation/expressions.py\", line 21, in \n", 331 | " from pandas.core.computation.check import NUMEXPR_INSTALLED\n", 332 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/computation/check.py\", line 5, in \n", 333 | " ne = import_optional_dependency(\"numexpr\", errors=\"warn\")\n", 334 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/compat/_optional.py\", line 135, in import_optional_dependency\n", 335 | " module = importlib.import_module(name)\n", 336 | " File \"/opt/anaconda3/lib/python3.9/importlib/__init__.py\", line 127, in import_module\n", 337 | " return _bootstrap._gcd_import(name[level:], package, level)\n", 338 | " File \"/opt/anaconda3/lib/python3.9/site-packages/numexpr/__init__.py\", line 26, in \n", 339 | " from numexpr.interpreter import MAX_THREADS, use_vml, __BLOCK_SIZE1__\n", 340 | "AttributeError: _ARRAY_API not found\n", 341 | "\n", 342 | "A module that was compiled using NumPy 1.x cannot be run in\n", 343 | "NumPy 2.0.1 as it may crash. To support both 1.x and 2.x\n", 344 | "versions of NumPy, modules must be compiled with NumPy 2.0.\n", 345 | "Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.\n", 346 | "\n", 347 | "If you are a user of the module, the easiest solution will be to\n", 348 | "downgrade to 'numpy<2' or try to upgrade the affected module.\n", 349 | "We expect that some modules will need time to support NumPy 2.\n", 350 | "\n", 351 | "Traceback (most recent call last): File \"/opt/anaconda3/lib/python3.9/runpy.py\", line 197, in _run_module_as_main\n", 352 | " return _run_code(code, main_globals, None,\n", 353 | " File \"/opt/anaconda3/lib/python3.9/runpy.py\", line 87, in _run_code\n", 354 | " exec(code, run_globals)\n", 355 | " File \"/opt/anaconda3/lib/python3.9/site-packages/memory_profiler.py\", line 1351, in \n", 356 | " exec_with_profiler(script_filename, prof, args.backend, script_args)\n", 357 | " File \"/opt/anaconda3/lib/python3.9/site-packages/memory_profiler.py\", line 1252, in exec_with_profiler\n", 358 | " exec(compile(f.read(), filename, 'exec'), ns, ns)\n", 359 | " File \"memory.py\", line 1, in \n", 360 | " import pandas as pd\n", 361 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/__init__.py\", line 49, in \n", 362 | " from pandas.core.api import (\n", 363 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/api.py\", line 28, in \n", 364 | " from pandas.core.arrays import Categorical\n", 365 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/__init__.py\", line 1, in \n", 366 | " from pandas.core.arrays.arrow import ArrowExtensionArray\n", 367 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/arrow/__init__.py\", line 5, in \n", 368 | " from pandas.core.arrays.arrow.array import ArrowExtensionArray\n", 369 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py\", line 64, in \n", 370 | " from pandas.core.arrays.masked import BaseMaskedArray\n", 371 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/masked.py\", line 60, in \n", 372 | " from pandas.core import (\n", 373 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/core/nanops.py\", line 52, in \n", 374 | " bn = import_optional_dependency(\"bottleneck\", errors=\"warn\")\n", 375 | " File \"/opt/anaconda3/lib/python3.9/site-packages/pandas/compat/_optional.py\", line 135, in import_optional_dependency\n", 376 | " module = importlib.import_module(name)\n", 377 | " File \"/opt/anaconda3/lib/python3.9/importlib/__init__.py\", line 127, in import_module\n", 378 | " return _bootstrap._gcd_import(name[level:], package, level)\n", 379 | " File \"/opt/anaconda3/lib/python3.9/site-packages/bottleneck/__init__.py\", line 2, in \n", 380 | " from .reduce import (\n", 381 | "AttributeError: _ARRAY_API not found\n" 382 | ] 383 | }, 384 | { 385 | "name": "stdout", 386 | "output_type": "stream", 387 | "text": [ 388 | "Filename: memory.py\r\n", 389 | "\r\n", 390 | "Line # Mem usage Increment Occurrences Line Contents\r\n", 391 | "=============================================================\r\n", 392 | " 4 94.5 MiB 94.5 MiB 1 @profile\r\n", 393 | " 5 def process_data():\r\n", 394 | " 6 125.2 MiB 30.7 MiB 1 df = pd.DataFrame({'a': range(1000000), 'b': range(1000000)})\r\n", 395 | " 7 125.4 MiB 0.2 MiB 1 df['c'] = df['a'] + df['b']\r\n", 396 | " 8 141.0 MiB 15.6 MiB 1 df = df.drop(columns=['a'])\r\n", 397 | " 9 141.0 MiB 0.0 MiB 1 return df\r\n", 398 | "\r\n", 399 | "\r\n" 400 | ] 401 | } 402 | ], 403 | "source": [ 404 | "!python -m memory_profiler memory.py" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "id": "1307b134", 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [] 414 | } 415 | ], 416 | "metadata": { 417 | "kernelspec": { 418 | "display_name": "Python 3 (ipykernel)", 419 | "language": "python", 420 | "name": "python3" 421 | }, 422 | "language_info": { 423 | "codemirror_mode": { 424 | "name": "ipython", 425 | "version": 3 426 | }, 427 | "file_extension": ".py", 428 | "mimetype": "text/x-python", 429 | "name": "python", 430 | "nbconvert_exporter": "python", 431 | "pygments_lexer": "ipython3", 432 | "version": "3.9.7" 433 | } 434 | }, 435 | "nbformat": 4, 436 | "nbformat_minor": 5 437 | } 438 | -------------------------------------------------------------------------------- /2024/October Meetup/PandasOptimization/Pandas Optimization - Best Practices.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8fd6ff88", 6 | "metadata": {}, 7 | "source": [ 8 | "## Pandas Optimization - Best Practices\n", 9 | "\n", 10 | "- Downcast datatypes\n", 11 | "- Faster Lookups\n", 12 | "- Avoid creating copy\n", 13 | "- Filter before merge/join/concat" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 7, 19 | "id": "d323d3f9", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import warnings\n", 24 | "warnings.filterwarnings('ignore')" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 8, 30 | "id": "58e7c0b6", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import time\n", 35 | "import pandas as pd\n", 36 | "import numpy as np" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "29338949", 42 | "metadata": {}, 43 | "source": [ 44 | "### Downcasting\n", 45 | "\n", 46 | "Downcasting involves converting columns to more memory-efficient data types. For instance, you can convert float64 to float32 or int64 to int32 when the values fit within the smaller data types, saving memory without losing information." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 9, 52 | "id": "1d4cd583", 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "\n", 60 | "RangeIndex: 1000000 entries, 0 to 999999\n", 61 | "Data columns (total 2 columns):\n", 62 | " # Column Non-Null Count Dtype \n", 63 | "--- ------ -------------- ----- \n", 64 | " 0 A 1000000 non-null int64 \n", 65 | " 1 B 1000000 non-null float64\n", 66 | "dtypes: float64(1), int64(1)\n", 67 | "memory usage: 15.3 MB\n", 68 | "\n", 69 | "\n", 70 | "RangeIndex: 1000000 entries, 0 to 999999\n", 71 | "Data columns (total 2 columns):\n", 72 | " # Column Non-Null Count Dtype \n", 73 | "--- ------ -------------- ----- \n", 74 | " 0 A 1000000 non-null int8 \n", 75 | " 1 B 1000000 non-null float32\n", 76 | "dtypes: float32(1), int8(1)\n", 77 | "memory usage: 4.8 MB\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "df = pd.DataFrame({\n", 83 | " 'A': np.random.randint(0, 100, size=1000000), # int64\n", 84 | " 'B': np.random.rand(1000000) # float64\n", 85 | "})\n", 86 | "\n", 87 | "# memory usage before downcasting\n", 88 | "df.info()\n", 89 | "print()\n", 90 | "\n", 91 | "# Downcast the integer and float columns\n", 92 | "df['A'] = pd.to_numeric(df['A'], downcast='integer') # Downcast to int8\n", 93 | "df['B'] = pd.to_numeric(df['B'], downcast='float') # Downcast to float32\n", 94 | "\n", 95 | "# memory usage after downcasting\n", 96 | "df.info()" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "id": "3ef6fc87", 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "name": "stdout", 107 | "output_type": "stream", 108 | "text": [ 109 | "\n", 110 | "RangeIndex: 4000 entries, 0 to 3999\n", 111 | "Data columns (total 1 columns):\n", 112 | " # Column Non-Null Count Dtype \n", 113 | "--- ------ -------------- ----- \n", 114 | " 0 city 4000 non-null object\n", 115 | "dtypes: object(1)\n", 116 | "memory usage: 31.4+ KB\n", 117 | "\n", 118 | "32128\n", 119 | "4260\n", 120 | "\n", 121 | "\n", 122 | "RangeIndex: 4000 entries, 0 to 3999\n", 123 | "Data columns (total 1 columns):\n", 124 | " # Column Non-Null Count Dtype \n", 125 | "--- ------ -------------- ----- \n", 126 | " 0 city 4000 non-null category\n", 127 | "dtypes: category(1)\n", 128 | "memory usage: 4.2 KB\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "df = pd.DataFrame({\n", 134 | " 'city': ['New York', 'Los Angeles', 'New York', 'Chicago'] * 1000\n", 135 | "})\n", 136 | "\n", 137 | "df.info()\n", 138 | "print()\n", 139 | "# Before optimization\n", 140 | "print(df['city'].memory_usage())\n", 141 | "\n", 142 | "# Convert to categorical\n", 143 | "df['city'] = df['city'].astype('category')\n", 144 | "print(df['city'].memory_usage())\n", 145 | "print()\n", 146 | "df.info()" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "id": "71dfc3a0", 152 | "metadata": {}, 153 | "source": [ 154 | "### Faster Lookups\n", 155 | "\n", 156 | ".iloc and .loc can be used for efficient and faster lookups" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 10, 162 | "id": "9104b089", 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "Lookup times with .loc:\n", 170 | "Value: 100000, Time: 0.004121 seconds\n", 171 | "Value: 500000, Time: 0.000028 seconds\n", 172 | "Value: 999999, Time: 0.000008 seconds\n", 173 | "\n", 174 | "Lookup times with .iloc:\n", 175 | "Value: 100000, Time: 0.000061 seconds\n", 176 | "Value: 500000, Time: 0.000018 seconds\n", 177 | "Value: 999999, Time: 0.000015 seconds\n", 178 | "\n", 179 | "Lookup times with Boolean indexing:\n", 180 | "Value: 100000, Time: 0.008170 seconds\n", 181 | "Value: 500000, Time: 0.000799 seconds\n", 182 | "Value: 999999, Time: 0.000516 seconds\n" 183 | ] 184 | } 185 | ], 186 | "source": [ 187 | "df = pd.DataFrame({'column1': range(1000000),\n", 188 | " 'column2': ['A'] * 1000000})\n", 189 | "\n", 190 | "# Lookup scenarios\n", 191 | "lookup_values = [100000, 500000, 999999]\n", 192 | "\n", 193 | "# Benchmarking with .loc\n", 194 | "loc_times = []\n", 195 | "for value in lookup_values:\n", 196 | " start_time = time.time()\n", 197 | " result_loc = df.loc[value, 'column1']\n", 198 | " end_time = time.time()\n", 199 | " loc_times.append(end_time - start_time)\n", 200 | "\n", 201 | "# Benchmarking with .iloc\n", 202 | "iloc_times = []\n", 203 | "for value in lookup_values:\n", 204 | " start_time = time.time()\n", 205 | " result_iloc = df.iloc[value, 0]\n", 206 | " end_time = time.time()\n", 207 | " iloc_times.append(end_time - start_time)\n", 208 | "\n", 209 | "# Benchmarking with Boolean indexing\n", 210 | "boolean_times = []\n", 211 | "for value in lookup_values:\n", 212 | " start_time = time.time()\n", 213 | " result_boolean = df[df.index == value]['column1'].values[0]\n", 214 | " end_time = time.time()\n", 215 | " boolean_times.append(end_time - start_time)\n", 216 | "\n", 217 | "print(\"Lookup times with .loc:\")\n", 218 | "for value, time in zip(lookup_values, loc_times):\n", 219 | " print(f\"Value: {value}, Time: {time:.6f} seconds\")\n", 220 | "\n", 221 | "print(\"\\nLookup times with .iloc:\")\n", 222 | "for value, time in zip(lookup_values, iloc_times):\n", 223 | " print(f\"Value: {value}, Time: {time:.6f} seconds\")\n", 224 | "\n", 225 | "print(\"\\nLookup times with Boolean indexing:\")\n", 226 | "for value, time in zip(lookup_values, boolean_times):\n", 227 | " print(f\"Value: {value}, Time: {time:.6f} seconds\")" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "id": "360eb442", 233 | "metadata": {}, 234 | "source": [ 235 | "### Filter\n", 236 | "\n", 237 | "When merging or joining large DataFrames, it’s beneficial to filter them down to only the necessary rows beforehand. This reduces the size of the DataFrames involved in the operation, improving performance." 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 13, 243 | "id": "3bc2bb36", 244 | "metadata": {}, 245 | "outputs": [ 246 | { 247 | "name": "stdout", 248 | "output_type": "stream", 249 | "text": [ 250 | " key value1 value2\n", 251 | "0 B 2 5\n", 252 | "1 C 3 6\n", 253 | "2 D 4 7\n" 254 | ] 255 | } 256 | ], 257 | "source": [ 258 | "df1 = pd.DataFrame({\n", 259 | " 'key': ['A', 'B', 'C', 'D'],\n", 260 | " 'value1': [1, 2, 3, 4]\n", 261 | "})\n", 262 | "\n", 263 | "df2 = pd.DataFrame({\n", 264 | " 'key': ['B', 'C', 'D', 'E'],\n", 265 | " 'value2': [5, 6, 7, 8]\n", 266 | "})\n", 267 | "\n", 268 | "# Filter DataFrames before merging\n", 269 | "df1_filtered = df1[df1['value1'] > 1] # Keep only rows where value1 > 1\n", 270 | "df2_filtered = df2[df2['key'].isin(df1_filtered['key'])]\n", 271 | "\n", 272 | "# Perform the merge\n", 273 | "merged_df = pd.merge(df1_filtered, df2_filtered, on='key')\n", 274 | "print(merged_df)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "id": "5dba75bc", 280 | "metadata": {}, 281 | "source": [ 282 | "### Avoid creating copy\n", 283 | "\n", 284 | "inplace parameter ensures the changes are incorporated in the same dataframe instead of creating a new one." 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 30, 290 | "id": "495701b8", 291 | "metadata": {}, 292 | "outputs": [ 293 | { 294 | "data": { 295 | "text/html": [ 296 | "
\n", 297 | "\n", 310 | "\n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | "
A
066
129
254
393
485
\n", 340 | "
" 341 | ], 342 | "text/plain": [ 343 | " A\n", 344 | "0 66\n", 345 | "1 29\n", 346 | "2 54\n", 347 | "3 93\n", 348 | "4 85" 349 | ] 350 | }, 351 | "execution_count": 30, 352 | "metadata": {}, 353 | "output_type": "execute_result" 354 | } 355 | ], 356 | "source": [ 357 | "df = pd.DataFrame({\n", 358 | " 'A': np.random.randint(0, 100, size=100),\n", 359 | " 'B': np.random.randint(0, 100, size=100)\n", 360 | "})\n", 361 | "\n", 362 | "df.drop(columns=['B'], inplace=True)\n", 363 | "df.head()" 364 | ] 365 | } 366 | ], 367 | "metadata": { 368 | "kernelspec": { 369 | "display_name": "Python 3 (ipykernel)", 370 | "language": "python", 371 | "name": "python3" 372 | }, 373 | "language_info": { 374 | "codemirror_mode": { 375 | "name": "ipython", 376 | "version": 3 377 | }, 378 | "file_extension": ".py", 379 | "mimetype": "text/x-python", 380 | "name": "python", 381 | "nbconvert_exporter": "python", 382 | "pygments_lexer": "ipython3", 383 | "version": "3.9.7" 384 | } 385 | }, 386 | "nbformat": 4, 387 | "nbformat_minor": 5 388 | } 389 | -------------------------------------------------------------------------------- /2024/October Meetup/PandasOptimization/Pandas Optimization - Use Case.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "2a2b39a3", 6 | "metadata": {}, 7 | "source": [ 8 | "## Use Case\n", 9 | "### Performance Comparison of Reading a Large CSV File with and without Chunking\n", 10 | "\n", 11 | "In this demonstration, we will:\n", 12 | "\n", 13 | "1. Simulate the creation of a large CSV file.\n", 14 | "2. Read the CSV file into a Pandas DataFrame in two different ways:\n", 15 | " - Without chunking\n", 16 | " - With chunking\n", 17 | "3. Compare the performance of both methods." 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 30, 23 | "id": "84843a1f", 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import warnings\n", 28 | "warnings.filterwarnings('ignore')" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 31, 34 | "id": "103e1683", 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "import pandas as pd\n", 39 | "import numpy as np\n", 40 | "import time\n", 41 | "\n", 42 | "from memory_profiler import memory_usage\n", 43 | "import time" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "id": "a710e4ab", 49 | "metadata": {}, 50 | "source": [ 51 | "### Step 1: Simulating a Large CSV File\n", 52 | "\n", 53 | "First, let's create a large CSV file for testing purposes. We will generate a DataFrame with a significant number of rows and save it as a CSV file." 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 24, 59 | "id": "e91a09fb", 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "# Simulate a large DataFrame\n", 64 | "num_rows = 10**6 # 1 million rows\n", 65 | "df = pd.DataFrame({\n", 66 | " 'A': np.random.rand(num_rows),\n", 67 | " 'B': np.random.randint(1, 100, size=num_rows),\n", 68 | " 'C': np.random.choice(['X', 'Y', 'Z'], size=num_rows)\n", 69 | "})\n", 70 | "\n", 71 | "# Save the DataFrame to a CSV file\n", 72 | "csv_file_path = 'large_file.csv'\n", 73 | "df.to_csv(csv_file_path, index=False)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "id": "0add448a", 79 | "metadata": {}, 80 | "source": [ 81 | "### Step 2: Reading the CSV File Without Chunking\n", 82 | "\n", 83 | "Now, we will read the entire CSV file into a DataFrame without using chunking and measure the execution time." 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 28, 89 | "id": "b56dd2ca", 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | "Memory usage without chunking: 102.02 MiB\n" 97 | ] 98 | } 99 | ], 100 | "source": [ 101 | "def read_without_chunking():\n", 102 | " # Measure time for reading without chunking\n", 103 | " start_time = time.time()\n", 104 | " df_full = pd.read_csv(csv_file_path)\n", 105 | " end_time = time.time()\n", 106 | " \n", 107 | " return df_full, end_time - start_time\n", 108 | "\n", 109 | "# Measure memory usage\n", 110 | "mem_usage_no_chunking = memory_usage(read_without_chunking)\n", 111 | "print(f\"Memory usage without chunking: {max(mem_usage_no_chunking) - min(mem_usage_no_chunking):.2f} MiB\")" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "id": "8ed94930", 117 | "metadata": {}, 118 | "source": [ 119 | "### Step 3: Reading the CSV File With Chunking\n", 120 | "\n", 121 | "Next, we will read the same CSV file using chunking and measure the execution time." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 29, 127 | "id": "acf9a260", 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "name": "stdout", 132 | "output_type": "stream", 133 | "text": [ 134 | "Memory usage with chunking: 35.65 MiB\n" 135 | ] 136 | } 137 | ], 138 | "source": [ 139 | "def read_with_chunking():\n", 140 | " # Measure time for reading with chunking\n", 141 | " start_time = time.time()\n", 142 | " chunks = []\n", 143 | " chunk_size = 100000 # Adjust chunk size as needed\n", 144 | "\n", 145 | " for chunk in pd.read_csv(csv_file_path, chunksize=chunk_size):\n", 146 | " chunks.append(chunk)\n", 147 | "\n", 148 | " df_chunked = pd.concat(chunks, ignore_index=True)\n", 149 | " end_time = time.time()\n", 150 | " \n", 151 | " return df_chunked, end_time - start_time\n", 152 | "\n", 153 | "# Measure memory usage\n", 154 | "mem_usage_with_chunking = memory_usage(read_with_chunking)\n", 155 | "print(f\"Memory usage with chunking: {max(mem_usage_with_chunking) - min(mem_usage_with_chunking):.2f} MiB\")" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "id": "cfea5fbd", 161 | "metadata": {}, 162 | "source": [ 163 | "### Summary of Results\n", 164 | "\n", 165 | "1. **Performance Comparison:** In many cases, reading data in chunks can be as fast as reading the entire file at once, especially if the data is large. Chunking can also help manage memory usage, preventing potential crashes due to memory overload.\n", 166 | "\n", 167 | "2. **Flexibility:** Chunking allows for the processing of large datasets without needing to load the entire dataset into memory, making it a valuable technique for data processing in real-world applications." 168 | ] 169 | } 170 | ], 171 | "metadata": { 172 | "kernelspec": { 173 | "display_name": "Python 3 (ipykernel)", 174 | "language": "python", 175 | "name": "python3" 176 | }, 177 | "language_info": { 178 | "codemirror_mode": { 179 | "name": "ipython", 180 | "version": 3 181 | }, 182 | "file_extension": ".py", 183 | "mimetype": "text/x-python", 184 | "name": "python", 185 | "nbconvert_exporter": "python", 186 | "pygments_lexer": "ipython3", 187 | "version": "3.9.7" 188 | } 189 | }, 190 | "nbformat": 4, 191 | "nbformat_minor": 5 192 | } 193 | -------------------------------------------------------------------------------- /2024/October Meetup/PandasOptimization/Pandas-Optimization.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2024/October Meetup/PandasOptimization/Pandas-Optimization.pptx -------------------------------------------------------------------------------- /2024/October Meetup/PandasOptimization/README.md: -------------------------------------------------------------------------------- 1 | # Pandas Optimization 2 | This repository explores various optimization techniques for working with Pandas. It covers best practices and advanced concepts, illustrating key optimization strategies with practical examples. Additionally, it includes a demonstration of how to read large CSV files using chunking. 3 | 4 | While there are alternative libraries for data manipulation, such as Polars, Dask, and Spark, this repository focuses on enhancing performance specifically within the context of Pandas. 5 | 6 | ## Recommended Reading Flow 7 | 1. **The Deck**: An overview and summary of the methods. 8 | 2. **Best Practices**: Practical examples of effective best practices for using Pandas. 9 | 3. **Advanced Concepts**: Insights into advanced techniques that significantly improve performance with large datasets. 10 | 4. **Use Case**: A detailed example of reading large CSV files through chunking. 11 | -------------------------------------------------------------------------------- /2024/October Meetup/PandasOptimization/memory.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from memory_profiler import profile 3 | 4 | @profile 5 | def process_data(): 6 | df = pd.DataFrame({'a': range(1000000), 'b': range(1000000)}) 7 | df['c'] = df['a'] + df['b'] 8 | df = df.drop(columns=['a']) 9 | return df 10 | 11 | # Step 3: Use %mprun to profile the function 12 | process_data() 13 | -------------------------------------------------------------------------------- /2024/October Meetup/SplitFXM - Oct '24.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydPy/HydPy-meetups/7bccf3d72bfac4586f3627d6ada131e836e9dfb5/2024/October Meetup/SplitFXM - Oct '24.pdf -------------------------------------------------------------------------------- /2024/October Meetup/readme.md: -------------------------------------------------------------------------------- 1 | # HydPy October 2024 Meetup - Talk Resources 2 | 3 | Welcome to the **HydPy October 2024 Meetup** repository! This is where speakers has shared their presentation materials and resources from the event. 4 | 5 | ## Speaker Lineup and Resources 6 | | Topic | Speaker | 7 | | ------------------------------------------------------------ | ---------------------- | 8 | | **Pandas Optimization** | Pranali Bose | 9 | | **Intro to GenAI Architecture Modelling** | OM Mishra | 10 | | **Ensuring Data Quality in Web Scraping with Data Contracts** | Dayananada Challa | 11 | | **SplitFXM - Fast and Robust Boundary Value Problem Solver in Python** | Pavan B Govindaraju | 12 | -------------------------------------------------------------------------------- /2024/readme.md: -------------------------------------------------------------------------------- 1 | # Hydpy - Meetup 2024 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Hyderabad Python User Group 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HydPy-meetups 2 | All the code from hands on sessions will be shared here. 3 | 4 | 5 | ## Past Meetups 6 | - [18th May, 2019](https://www.meetup.com/Hyderabad-Python-Meetup-Group/events/261292322/) 7 | - [17th Mar, 2018](https://www.meetup.com/Hyderabad-Python-Meetup-Group/events/248632923/) 8 | - [12th Aug, 2017](https://www.meetup.com/Hyderabad-Python-Meetup-Group/events/241221757/) 9 | - [8th Jul, 2017](https://www.meetup.com/Hyderabad-Python-Meetup-Group/events/241221757/) 10 | - [8th Apr, 2017](https://www.meetup.com/Hyderabad-Python-Meetup-Group/events/238343965/) 11 | - [22nd Oct, 2016](https://www.meetup.com/Hyderabad-Python-Meetup-Group/events/234853547/) 12 | - [10th Sept, 2016](https://www.meetup.com/Hyderabad-Python-Meetup-Group/events/233703414/) 13 | --------------------------------------------------------------------------------