├── .gitignore ├── LICENSE ├── README.md ├── pandas_accidents.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | *.csv 60 | *.xlsx 61 | *.xls -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Shantnu Tiwari 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PandasLargeFiles 2 | -------------------------------------------------------------------------------- /pandas_accidents.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | # Read the file 5 | data = pd.read_csv("Accidents7904.csv", low_memory=False) 6 | # Output the number of rows 7 | print("Total rows: {0}".format(len(data))) 8 | # See which headers are available 9 | # print(list(data)) 10 | 11 | print("\nAccidents") 12 | print("-----------") 13 | 14 | # Accidents which happened on a Sunday 15 | accidents_sunday = data[data.Day_of_Week == 1] 16 | print("Accidents which happened on a Sunday: {0}".format( 17 | len(accidents_sunday))) 18 | 19 | # Accidents which happened on a Sunday, > 20 cars 20 | accidents_sunday_twenty_cars = data[ 21 | (data.Day_of_Week == 1) & (data.Number_of_Vehicles > 20)] 22 | print("Accidents which happened on a Sunday involving > 20 cars: {0}".format( 23 | len(accidents_sunday_twenty_cars))) 24 | 25 | # Accidents which happened on a Sunday, > 20 cars, in the rain 26 | accidents_sunday_twenty_cars_rain = data[ 27 | (data.Day_of_Week == 1) & (data.Number_of_Vehicles > 20) & 28 | (data.Weather_Conditions == 2)] 29 | print("Accidents which happened on a Sunday involving > 20 cars in the rain: {0}".format( 30 | len(accidents_sunday_twenty_cars_rain))) 31 | 32 | # Accidents in London on a Sunday 33 | london_data = data[data['Police_Force'] == 1 & (data.Day_of_Week == 1)] 34 | print("\nAccidents in London from 1979-2004 on a Sunday: {0}".format( 35 | len(london_data))) 36 | 37 | # Convert date to Pandas date/time 38 | london_data_2000 = london_data[ 39 | (pd.to_datetime(london_data['Date'], coerce=True) > 40 | pd.to_datetime('2000-01-01', coerce=True)) & 41 | (pd.to_datetime(london_data['Date'], coerce=True) < 42 | pd.to_datetime('2000-12-31', coerce=True)) 43 | ] 44 | print("Accidents in London in the year 2000 on a Sunday: {0}".format( 45 | len(london_data_2000))) 46 | 47 | # Update header 48 | london_data_2000.rename( 49 | columns={'\xef\xbb\xbfAccident_Index': 'Accident_Index'}, inplace=True) 50 | 51 | # Save to Excel 52 | writer = pd.ExcelWriter( 53 | 'London_Sundays_2000.xlsx', engine='xlsxwriter') 54 | london_data_2000.to_excel(writer, 'Sheet1') 55 | writer.save() 56 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | XlsxWriter==0.7.3 2 | numpy==1.9.2 3 | pandas==0.16.1 4 | python-dateutil==2.4.2 5 | pytz==2015.4 6 | six==1.9.0 7 | --------------------------------------------------------------------------------