├── applications
    ├── .Rapp.history
    ├── item_numbers_to_find.csv
    ├── historical_files
    │   ├── suppliers.xls
    │   ├── suppliers.xlsx
    │   └── suppliers_2012.csv
    ├── output_files
    │   ├── 3output.csv
    │   ├── 3output-clinton.csv
    │   ├── 2output.csv
    │   ├── 2output-clinton.csv
    │   └── 1output.csv
    ├── 3parse_text_file.py
    ├── customer_category_history.csv
    ├── 3parse_text_file_skip_first_space.py
    ├── mysql_server_error_log.txt
    ├── 2calculate_statistic_by_category.py
    └── 1search_for_items_write_found.py
├── database
    ├── data_for_updating.csv
    ├── Suppliers.db
    ├── data_for_updating_mysql.csv
    ├── output_files
    │   ├── 5output.csv
    │   └── 5output-clinton.csv
    ├── supplier_data.csv
    ├── supplier_data_for_mysql_database.csv
    ├── 5db_mysql_write_to_file.py
    ├── 6db_mysql_update_from_csv.py
    ├── 1db_count_rows.py
    ├── 2db_insert_rows.py
    ├── 4db_mysql_load_from_csv.py
    └── 3db_update_rows.py
├── excel
    ├── sales_2013.xlsx
    ├── sales_2014.xlsx
    ├── sales_2015.xlsx
    ├── output_files
    │   ├── 2output.xls
    │   ├── 3output.xls
    │   ├── 4output.xls
    │   ├── 5output.xls
    │   ├── 6output.xls
    │   ├── 7output.xls
    │   ├── 8output.xls
    │   ├── 9output.xls
    │   ├── 10output.xls
    │   ├── 11output.xls
    │   ├── 13output.xls
    │   ├── 14output.xls
    │   └── pandas_output.xls
    ├── pandas_parsing_and_write_keep_dates.py
    ├── 1excel_introspect_workbook.py
    ├── pandas_column_by_index.py
    ├── pandas_column_by_name.py
    ├── pandas_value_matches_pattern.py
    ├── pandas_value_meets_condition.py
    ├── pandas_value_in_set.py
    ├── pandas_column_by_name_all_worksheets.py
    ├── 2excel_parsing_and_write.py
    ├── pandas_value_meets_condition_all_worksheets.py
    ├── 12excel_introspect_all_workbooks.py
    ├── pandas_value_meets_condition_set_of_worksheets.py
    ├── pandas_concat_data_from_multiple_workbooks.py
    ├── 3excel_parsing_and_write_keep_dates.py
    ├── 7excel_column_by_index.py
    ├── 4excel_value_meets_condition.py
    ├── 8excel_column_by_name.py
    ├── 6excel_value_matches_pattern.py
    ├── 13excel_concat_data_from_multiple_workbooks.py
    ├── 10excel_column_by_name_all_worksheets.py
    ├── 5excel_value_in_set.py
    ├── 9excel_value_meets_condition_all_worksheets.py
    ├── 11excel_value_meets_condition_set_of_worksheets.py
    ├── 14excel_sum_average_multiple_workbooks.py
    └── pandas_sum_average_multiple_workbooks.py
├── letters.txt
├── numbers.txt
├── csv
    ├── output_files
    │   ├── 5output.csv
    │   ├── 7output.csv
    │   ├── 6output.csv
    │   ├── 3output.csv
    │   ├── 4output.csv
    │   ├── 1output.csv
    │   ├── 2output.csv
    │   ├── 11output.csv
    │   ├── pandas_output.csv
    │   ├── 12output.csv
    │   └── 9output.csv
    ├── pandas_parsing_and_write.py
    ├── pandas_column_by_index.py
    ├── pandas_column_by_name.py
    ├── pandas_add_header_row.py
    ├── sales_march_2014.csv
    ├── pandas_value_matches_pattern.py
    ├── sales_february_2014.csv
    ├── sales_january_2014.csv
    ├── pandas_value_in_set.py
    ├── pandas_select_contiguous_rows.py
    ├── 2csv_reader_parsing_and_write.py
    ├── pandas_value_meets_condition.py
    ├── 11csv_reader_select_contiguous_rows.py
    ├── pandas_concat_rows_from_multiple_files.py
    ├── 12csv_reader_add_header_row.py
    ├── 6csv_reader_column_by_index.py
    ├── 4csv_reader_value_in_set.py
    ├── supplier_data.csv
    ├── supplier_data_no_header_row.csv
    ├── 1csv_simple_parsing_and_write.py
    ├── 3csv_reader_value_meets_condition.py
    ├── 8csv_reader_counts_for_multiple_files.py
    ├── 5csv_reader_value_matches_pattern.py
    ├── 9csv_reader_concat_rows_from_multiple_files.py
    ├── 7csv_reader_column_by_name.py
    ├── supplier_data_unnecessary_header_footer.csv
    ├── pandas_sum_average_from_multiple_files.py
    └── 10csv_reader_sum_average_from_multiple_files.py
├── plots
    ├── matplotlib_basic_bar.py
    ├── matplotlib_basic_histogram.py
    ├── matplotlib_basic_scatter.py
    ├── matplotlib_basic_line.py
    ├── matplotlib_basic_boxplot.py
    ├── ggplot_plots.py
    ├── pandas_plots.py
    └── seaborn_plots.py
├── README.md
├── statistics
    ├── wine_quality.py
    └── customer_churn.py
└── first_script.py


/applications/.Rapp.history:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/applications/item_numbers_to_find.csv:
--------------------------------------------------------------------------------
1 | 1234
2 | 2345
3 | 4567
4 | 6789
5 | 7890
6 | 


--------------------------------------------------------------------------------
/database/data_for_updating.csv:
--------------------------------------------------------------------------------
1 | amount,date,customer
2 | 4.25,5/11/2014,Richard Lucas
3 | 6.75,5/12/2014,Jenny Kim
4 | 


--------------------------------------------------------------------------------
/database/Suppliers.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/database/Suppliers.db


--------------------------------------------------------------------------------
/excel/sales_2013.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/sales_2013.xlsx


--------------------------------------------------------------------------------
/excel/sales_2014.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/sales_2014.xlsx


--------------------------------------------------------------------------------
/excel/sales_2015.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/sales_2015.xlsx


--------------------------------------------------------------------------------
/letters.txt:
--------------------------------------------------------------------------------
 1 | a b
 2 | c d
 3 | e f
 4 | g h
 5 | i j
 6 | k l
 7 | m n
 8 | o p
 9 | q r
10 | s t
11 | u v
12 | w x
13 | y z


--------------------------------------------------------------------------------
/database/data_for_updating_mysql.csv:
--------------------------------------------------------------------------------
1 | Cost,Purchase Date,Supplier Name
2 | 600.00,2014-01-22,Supplier X
3 | 200.00,2014-02-01,Supplier Y
4 | 


--------------------------------------------------------------------------------
/excel/output_files/2output.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/output_files/2output.xls


--------------------------------------------------------------------------------
/excel/output_files/3output.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/output_files/3output.xls


--------------------------------------------------------------------------------
/excel/output_files/4output.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/output_files/4output.xls


--------------------------------------------------------------------------------
/excel/output_files/5output.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/output_files/5output.xls


--------------------------------------------------------------------------------
/excel/output_files/6output.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/output_files/6output.xls


--------------------------------------------------------------------------------
/excel/output_files/7output.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/output_files/7output.xls


--------------------------------------------------------------------------------
/excel/output_files/8output.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/output_files/8output.xls


--------------------------------------------------------------------------------
/excel/output_files/9output.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/output_files/9output.xls


--------------------------------------------------------------------------------
/excel/output_files/10output.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/output_files/10output.xls


--------------------------------------------------------------------------------
/excel/output_files/11output.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/output_files/11output.xls


--------------------------------------------------------------------------------
/excel/output_files/13output.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/output_files/13output.xls


--------------------------------------------------------------------------------
/excel/output_files/14output.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/output_files/14output.xls


--------------------------------------------------------------------------------
/numbers.txt:
--------------------------------------------------------------------------------
 1 | 1 2
 2 | 3 4
 3 | 5 6
 4 | 7 8
 5 | 9 10
 6 | 11 12
 7 | 13 14
 8 | 15 16
 9 | 17 18
10 | 19 20
11 | 21 22
12 | 23 24
13 | 25 26


--------------------------------------------------------------------------------
/excel/output_files/pandas_output.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/excel/output_files/pandas_output.xls


--------------------------------------------------------------------------------
/applications/historical_files/suppliers.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/applications/historical_files/suppliers.xls


--------------------------------------------------------------------------------
/applications/historical_files/suppliers.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluesfer2007/foundations-for-analytics-with-python/master/applications/historical_files/suppliers.xlsx


--------------------------------------------------------------------------------
/database/output_files/5output.csv:
--------------------------------------------------------------------------------
1 | Supplier Name,Invoice Number,Part Number,Cost,Purchase Date
2 | Supplier X,001-1001,5467,750.0,2014-01-20
3 | Supplier X,001-1001,5467,750.0,2014-01-20
4 | 


--------------------------------------------------------------------------------
/database/output_files/5output-clinton.csv:
--------------------------------------------------------------------------------
1 | Supplier Name,Invoice Number,Part Number,Cost,Purchase Date
2 | Supplier X,001-1001,5467,750.0,2014-01-20
3 | Supplier X,001-1001,5467,750.0,2014-01-20
4 | 


--------------------------------------------------------------------------------
/csv/output_files/5output.csv:
--------------------------------------------------------------------------------
1 | Supplier Name,Invoice Number,Part Number,Cost,Purchase Date
2 | Supplier X,001-1001,2341,$500.00,1/20/14
3 | Supplier X,001-1001,2341,$500.00,1/20/14
4 | Supplier X,001-1001,5467,$750.00,1/20/14
5 | Supplier X,001-1001,5467,$750.00,1/20/14
6 | 


--------------------------------------------------------------------------------
/csv/pandas_parsing_and_write.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | import pandas as pd
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | data_frame = pd.read_csv(input_file)
 9 | print(data_frame)
10 | data_frame.to_csv(output_file, index=False)


--------------------------------------------------------------------------------
/csv/output_files/7output.csv:
--------------------------------------------------------------------------------
 1 | Invoice Number,Purchase Date
 2 | 001-1001,1/20/14
 3 | 001-1001,1/20/14
 4 | 001-1001,1/20/14
 5 | 001-1001,1/20/14
 6 | 50-9501,1/30/14
 7 | 50-9501,1/30/14
 8 | 50-9505,2/3/14
 9 | 50-9505,2/3/14
10 | 920-4803,2/3/14
11 | 920-4804,2/10/14
12 | 920-4805,2/17/14
13 | 920-4806,2/24/14
14 | 


--------------------------------------------------------------------------------
/csv/pandas_column_by_index.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | data_frame = pd.read_csv(input_file)
 9 | data_frame_column_by_index = data_frame.iloc[:, [0, 3]]
10 | 
11 | data_frame_column_by_index.to_csv(output_file, index=False)


--------------------------------------------------------------------------------
/csv/output_files/6output.csv:
--------------------------------------------------------------------------------
 1 | Supplier Name,Cost
 2 | Supplier X,$500.00
 3 | Supplier X,$500.00
 4 | Supplier X,$750.00
 5 | Supplier X,$750.00
 6 | Supplier Y,$250.00
 7 | Supplier Y,$250.00
 8 | Supplier Y,$125.00
 9 | Supplier Y,$125.00
10 | Supplier Z,$615.00
11 | Supplier Z,$615.00
12 | Supplier Z,$615.00
13 | Supplier Z,$615.00
14 | 


--------------------------------------------------------------------------------
/csv/output_files/3output.csv:
--------------------------------------------------------------------------------
1 | Supplier Name,Invoice Number,Part Number,Cost,Purchase Date
2 | Supplier X,001-1001,5467,$750.00,1/20/14
3 | Supplier X,001-1001,5467,$750.00,1/20/14
4 | Supplier Z,920-4803,3321,$615.00,2/3/14
5 | Supplier Z,920-4804,3321,$615.00,2/10/14
6 | Supplier Z,920-4805,3321,$615.00,2/17/14
7 | Supplier Z,920-4806,3321,$615.00,2/24/14
8 | 


--------------------------------------------------------------------------------
/csv/output_files/4output.csv:
--------------------------------------------------------------------------------
1 | Supplier Name,Invoice Number,Part Number,Cost,Purchase Date
2 | Supplier X,001-1001,2341,$500.00,1/20/14
3 | Supplier X,001-1001,2341,$500.00,1/20/14
4 | Supplier X,001-1001,5467,$750.00,1/20/14
5 | Supplier X,001-1001,5467,$750.00,1/20/14
6 | Supplier Y,50-9501,7009,$250.00,1/30/14
7 | Supplier Y,50-9501,7009,$250.00,1/30/14
8 | 


--------------------------------------------------------------------------------
/csv/pandas_column_by_name.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | data_frame = pd.read_csv(input_file)
 9 | data_frame_column_by_name = data_frame.loc[:, ['Invoice Number', 'Purchase Date']]
10 | 
11 | data_frame_column_by_name.to_csv(output_file, index=False)


--------------------------------------------------------------------------------
/excel/pandas_parsing_and_write_keep_dates.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | data_frame = pd.read_excel(input_file, sheetname='january_2013')
 9 | 
10 | writer = pd.ExcelWriter(output_file)
11 | data_frame.to_excel(writer, sheet_name='jan_13_output', index=False)
12 | writer.save()


--------------------------------------------------------------------------------
/applications/output_files/3output.csv:
--------------------------------------------------------------------------------
1 | Date,InnoDB: Compressed tables use zlib 1.2.3,InnoDB: Using atomics to ref count buffer pool pages,InnoDB: 5.6.16 started; log sequence number 1234567,/usr/local/mysql/bin/mysqld: Shutdown complete,InnoDB: Completed initialization of buffer pool,InnoDB: IPv6 is available.
2 | 2014-10-27,0,0,1,1,2,2
3 | 2014-03-07,3,1,1,1,0,0
4 | 2014-02-03,2,2,1,1,0,0
5 | 


--------------------------------------------------------------------------------
/csv/pandas_add_header_row.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | header_list = ['Supplier Name', 'Invoice Number', \
 9 | 'Part Number', 'Cost', 'Purchase Date']
10 | data_frame = pd.read_csv(input_file, header=None, names=header_list)
11 | 
12 | data_frame.to_csv(output_file, index=False)


--------------------------------------------------------------------------------
/applications/output_files/3output-clinton.csv:
--------------------------------------------------------------------------------
1 | Date,InnoDB: Compressed tables use zlib 1.2.3,InnoDB: Using atomics to ref count buffer pool pages,InnoDB: 5.6.16 started; log sequence number 1234567,/usr/local/mysql/bin/mysqld: Shutdown complete,InnoDB: Completed initialization of buffer pool,InnoDB: IPv6 is available.
2 | 2014-10-27,0,0,1,1,2,2
3 | 2014-03-07,3,1,1,1,0,0
4 | 2014-02-03,2,2,1,1,0,0
5 | 


--------------------------------------------------------------------------------
/csv/sales_march_2014.csv:
--------------------------------------------------------------------------------
1 | Customer ID,Customer Name,Invoice Number,Sale Amount,Purchase Date
2 | 1234,John Smith,100-0014,"$1,350.00",3/4/14
3 | 8765,Tony Song,100-0015,"$1,167.00",3/8/14
4 | 2345,Mary Harrison,100-0016,"$1,789.00",3/17/14
5 | 6543,Rachel Paz,100-0017,"$2,042.00",3/22/14
6 | 3456,Lucy Gomez,100-0018,"$1,511.00",3/28/14
7 | 4321,Susan Wallace,100-0019,"$2,280.00",3/30/14
8 | 


--------------------------------------------------------------------------------
/csv/pandas_value_matches_pattern.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | data_frame = pd.read_csv(input_file)
 9 | data_frame_value_matches_pattern = data_frame.ix[data_frame['Invoice Number']\
10 | .str.startswith("001-"), :]
11 | 
12 | data_frame_value_matches_pattern.to_csv(output_file, index=False)


--------------------------------------------------------------------------------
/csv/sales_february_2014.csv:
--------------------------------------------------------------------------------
1 | Customer ID,Customer Name,Invoice Number,Sale Amount,Purchase Date
2 | 9876,Daniel Farber,100-0008,"$1,115.00",2/2/14
3 | 8765,Laney Stone,100-0009,"$1,367.00",2/8/14
4 | 7654,Roger Lipney,100-0010,"$2,135.00",2/15/14
5 | 6543,Thomas Haines,100-0011,"$1,346.00",2/17/14
6 | 5432,Anushka Vaz,100-0012,"$1,560.00",2/21/14
7 | 4321,Harriet Cooper,100-0013,"$1,852.00",2/25/14
8 | 


--------------------------------------------------------------------------------
/csv/sales_january_2014.csv:
--------------------------------------------------------------------------------
1 | Customer ID,Customer Name,Invoice Number,Sale Amount,Purchase Date
2 | 1234,John Smith,100-0002,"$1,200.00",1/1/14
3 | 2345,Mary Harrison,100-0003,"$1,425.00",1/6/14
4 | 3456,Lucy Gomez,100-0004,"$1,390.00",1/11/14
5 | 4567,Rupert Jones,100-0005,"$1,257.00",1/18/14
6 | 5678,Jenny Walters,100-0006,"$1,725.00",1/24/14
7 | 6789,Samantha Donaldson,100-0007,"$1,995.00",1/31/14
8 | 


--------------------------------------------------------------------------------
/excel/1excel_introspect_workbook.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | from xlrd import open_workbook
 4 | 
 5 | input_file = sys.argv[1]
 6 | 
 7 | workbook = open_workbook(input_file)
 8 | print('Number of worksheets:', workbook.nsheets)
 9 | for worksheet in workbook.sheets():
10 | 	print("Worksheet name:", worksheet.name, "\tRows:", \
11 | 			worksheet.nrows, "\tColumns:", worksheet.ncols)
12 | 


--------------------------------------------------------------------------------
/csv/pandas_value_in_set.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | data_frame = pd.read_csv(input_file)
 9 | 
10 | important_dates = ['1/20/14', '1/30/14']
11 | data_frame_value_in_set = data_frame.loc[data_frame['Purchase Date']\
12 | .isin(important_dates), :]
13 | 
14 | data_frame_value_in_set.to_csv(output_file, index=False)


--------------------------------------------------------------------------------
/csv/pandas_select_contiguous_rows.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | data_frame = pd.read_csv(input_file, header=None)
 9 | 
10 | data_frame = data_frame.drop([0,1,2,16,17,18])
11 | data_frame.columns = data_frame.iloc[0]
12 | data_frame = data_frame.reindex(data_frame.index.drop(3))
13 | 
14 | data_frame.to_csv(output_file, index=False)


--------------------------------------------------------------------------------
/excel/pandas_column_by_index.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | data_frame = pd.read_excel(input_file, 'january_2013', index_col=None)
 9 | 
10 | data_frame_column_by_index = data_frame.iloc[:, [1, 4]]
11 | 
12 | writer = pd.ExcelWriter(output_file)
13 | data_frame_column_by_index.to_excel(writer, sheet_name='jan_13_output', index=False)
14 | writer.save()


--------------------------------------------------------------------------------
/csv/2csv_reader_parsing_and_write.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | with open(input_file, 'r', newline='') as csv_in_file:
 9 | 	with open(output_file, 'w', newline='') as csv_out_file:
10 | 		filereader = csv.reader(csv_in_file, delimiter=',')
11 | 		filewriter = csv.writer(csv_out_file, delimiter=',')
12 | 		for row_list in filereader:
13 | 			filewriter.writerow(row_list)


--------------------------------------------------------------------------------
/excel/pandas_column_by_name.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | data_frame = pd.read_excel(input_file, 'january_2013', index_col=None)
 9 | 
10 | data_frame_column_by_name = data_frame.loc[:, ['Customer ID', 'Purchase Date']]
11 | 
12 | writer = pd.ExcelWriter(output_file)
13 | data_frame_column_by_name.to_excel(writer, sheet_name='jan_13_output', index=False)
14 | writer.save()


--------------------------------------------------------------------------------
/applications/output_files/2output.csv:
--------------------------------------------------------------------------------
 1 | Customer Name,Category,Total Time (in Days)
 2 | Wayne Thompson,Silver,157
 3 | Wayne Thompson,Bronze,167
 4 | Bruce Johnson,Gold,160
 5 | Bruce Johnson,Silver,60
 6 | Bruce Johnson,Bronze,77
 7 | Annie Lee,Gold,192
 8 | Annie Lee,Silver,44
 9 | Annie Lee,Bronze,26
10 | Priya Patel,Silver,99
11 | Priya Patel,Gold,54
12 | Mary Yu,Silver,72
13 | Mary Yu,Gold,231
14 | John Smith,Gold,206
15 | John Smith,Silver,39
16 | John Smith,Bronze,70
17 | 


--------------------------------------------------------------------------------
/applications/output_files/2output-clinton.csv:
--------------------------------------------------------------------------------
 1 | Customer Name,Category,Total Time (in Days)
 2 | Wayne Thompson,Silver,198
 3 | Wayne Thompson,Bronze,167
 4 | Bruce Johnson,Gold,201
 5 | Bruce Johnson,Silver,60
 6 | Bruce Johnson,Bronze,77
 7 | Annie Lee,Gold,233
 8 | Annie Lee,Silver,44
 9 | Annie Lee,Bronze,26
10 | Priya Patel,Silver,99
11 | Priya Patel,Gold,54
12 | Mary Yu,Silver,72
13 | Mary Yu,Gold,272
14 | John Smith,Gold,247
15 | John Smith,Silver,39
16 | John Smith,Bronze,70
17 | 


--------------------------------------------------------------------------------
/csv/pandas_value_meets_condition.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | data_frame = pd.read_csv(input_file)
 9 | 
10 | data_frame['Cost'] = data_frame['Cost'].str.strip('$').astype(float)
11 | data_frame_value_meets_condition = data_frame.loc[(data_frame['Supplier Name']\
12 | .str.contains('Z')) | (data_frame['Cost'] > 600.0), :]
13 | 
14 | data_frame_value_meets_condition.to_csv(output_file, index=False)


--------------------------------------------------------------------------------
/excel/pandas_value_matches_pattern.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | data_frame = pd.read_excel(input_file, 'january_2013', index_col=None)
 9 | 
10 | data_frame_value_matches_pattern = data_frame[data_frame['Customer Name'].str.startswith("J")]
11 | 
12 | writer = pd.ExcelWriter(output_file)
13 | data_frame_value_matches_pattern.to_excel(writer, sheet_name='jan_13_output', index=False)
14 | writer.save()


--------------------------------------------------------------------------------
/excel/pandas_value_meets_condition.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | data_frame = pd.read_excel(input_file, 'january_2013', index_col=None)
 9 | data_frame_value_meets_condition = \
10 | 	data_frame[data_frame['Sale Amount'].astype(float) > 1400.0]
11 | 
12 | writer = pd.ExcelWriter(output_file)
13 | data_frame_value_meets_condition.to_excel(writer, sheet_name='jan_13_output', index=False)
14 | writer.save()


--------------------------------------------------------------------------------
/excel/pandas_value_in_set.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import string
 4 | import sys
 5 | 
 6 | input_file = sys.argv[1]
 7 | output_file = sys.argv[2]
 8 | 
 9 | data_frame = pd.read_excel(input_file, 'january_2013', index_col=None)
10 | 
11 | important_dates = ['01/24/2013','01/31/2013']
12 | data_frame_value_in_set = data_frame[data_frame['Purchase Date'].isin(important_dates)]
13 | 
14 | writer = pd.ExcelWriter(output_file)
15 | data_frame_value_in_set.to_excel(writer, sheet_name='jan_13_output', index=False)
16 | writer.save()


--------------------------------------------------------------------------------
/csv/11csv_reader_select_contiguous_rows.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | row_counter = 0
 9 | with open(input_file, 'r', newline='') as csv_in_file:
10 | 	with open(output_file, 'w', newline='') as csv_out_file:
11 | 		filereader = csv.reader(csv_in_file)
12 | 		filewriter = csv.writer(csv_out_file)
13 | 		for row in filereader:
14 | 			if row_counter >= 3 and row_counter <= 15:
15 | 				filewriter.writerow([value.strip() for value in row])
16 | 			row_counter += 1


--------------------------------------------------------------------------------
/csv/pandas_concat_rows_from_multiple_files.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import glob
 4 | import os
 5 | import sys
 6 | 
 7 | input_path = sys.argv[1]
 8 | output_file = sys.argv[2]
 9 | 
10 | all_files = glob.glob(os.path.join(input_path,'sales_*'))
11 | 
12 | all_data_frames = []
13 | for file in all_files:
14 | 	data_frame = pd.read_csv(file, index_col=None)
15 | 	all_data_frames.append(data_frame)
16 | data_frame_concat = pd.concat(all_data_frames, axis=0, ignore_index=True)
17 | 
18 | data_frame_concat.to_csv(output_file, index = False)


--------------------------------------------------------------------------------
/csv/12csv_reader_add_header_row.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | with open(input_file, 'r', newline='') as csv_in_file:
 9 | 	with open(output_file, 'w', newline='') as csv_out_file:
10 | 		filereader = csv.reader(csv_in_file)
11 | 		filewriter = csv.writer(csv_out_file)
12 | 		header_list = ['Supplier Name', 'Invoice Number', \
13 | 					   'Part Number', 'Cost', 'Purchase Date']
14 | 		filewriter.writerow(header_list)
15 | 		for row in filereader:
16 | 			filewriter.writerow (row)


--------------------------------------------------------------------------------
/csv/6csv_reader_column_by_index.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | my_columns = [0, 3]
 9 | 
10 | with open(input_file, 'r', newline='') as csv_in_file:
11 | 	with open(output_file, 'w', newline='') as csv_out_file:
12 | 		filereader = csv.reader(csv_in_file)
13 | 		filewriter = csv.writer(csv_out_file)
14 | 		for row_list in filereader:
15 | 			row_list_output = [ ]
16 | 			for index_value in my_columns:
17 | 				row_list_output.append(row_list[index_value])
18 | 			filewriter.writerow(row_list_output)


--------------------------------------------------------------------------------
/applications/historical_files/suppliers_2012.csv:
--------------------------------------------------------------------------------
 1 | Item Number,Description,Supplier,Cost,Date
 2 | 1234,Widget 1,Supplier A,"$1,100.00",6/2/2012
 3 | ,Widget 1 Service,Supplier A,$600.00,6/3/2012
 4 | 2345,Widget 2,Supplier A,"$2,300.00",6/17/2012
 5 | ,Widget 2 Maintenance,Supplier A,"$1,000.00",6/30/2012
 6 | 3456,Widget 3,Supplier B,$950.00,7/3/2012
 7 | 4567,Widget 4,Supplier B,"$1,300.00",7/4/2012
 8 | 5678,Widget 5,Supplier B,"$1,050.00",7/11/2012
 9 | ,Widget 5 Service,Supplier B,$550.00,7/15/2012
10 | 6789,Widget 6,Supplier C,"$1,175.00",7/23/2012
11 | 7890,Widget 7,Supplier C,"$1,200.00",7/27/2012
12 | 


--------------------------------------------------------------------------------
/csv/4csv_reader_value_in_set.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | important_dates = ['1/20/14', '1/30/14']
 9 | 
10 | with open(input_file, 'r', newline='') as csv_in_file:
11 | 	with open(output_file, 'w', newline='') as csv_out_file:
12 | 		filereader = csv.reader(csv_in_file)
13 | 		filewriter = csv.writer(csv_out_file)
14 | 		header = next(filereader)
15 | 		filewriter.writerow(header)
16 | 		for row_list in filereader:
17 | 			a_date = row_list[4]
18 | 			if a_date in important_dates:
19 | 				filewriter.writerow(row_list)


--------------------------------------------------------------------------------
/csv/supplier_data.csv:
--------------------------------------------------------------------------------
 1 | Supplier Name,Invoice Number,Part Number,Cost,Purchase Date
 2 | Supplier X,001-1001,2341,$500.00,1/20/14
 3 | Supplier X,001-1001,2341,$500.00,1/20/14
 4 | Supplier X,001-1001,5467,$750.00,1/20/14
 5 | Supplier X,001-1001,5467,$750.00,1/20/14
 6 | Supplier Y,50-9501,7009,$250.00,1/30/14
 7 | Supplier Y,50-9501,7009,$250.00,1/30/14
 8 | Supplier Y,50-9505,6650,$125.00,2/3/14
 9 | Supplier Y,50-9505,6650,$125.00,2/3/14
10 | Supplier Z,920-4803,3321,$615.00,2/3/14
11 | Supplier Z,920-4804,3321,$615.00,2/10/14
12 | Supplier Z,920-4805,3321,$615.00,2/17/14
13 | Supplier Z,920-4806,3321,$615.00,2/24/14
14 | 


--------------------------------------------------------------------------------
/csv/supplier_data_no_header_row.csv:
--------------------------------------------------------------------------------
 1 | Supplier X,001-1001,2341,$500.00 ,1/20/2014
 2 | Supplier X,001-1001,2341,$500.00 ,1/20/2014
 3 | Supplier X,001-1001,5467,$750.00 ,1/20/2014
 4 | Supplier X,001-1001,5467,$750.00 ,1/20/2014
 5 | Supplier Y,50-9501,7009,$250.00 ,1/30/2014
 6 | Supplier Y,50-9501,7009,$250.00 ,1/30/2014
 7 | Supplier Y,50-9505,6650,$125.00 ,2/3/2014
 8 | Supplier Y,50-9505,6650,$125.00 ,2/3/2014
 9 | Supplier Z,920-4803,3321,$615.00 ,2/3/2014
10 | Supplier Z,920-4804,3321,$615.00 ,2/10/2014
11 | Supplier Z,920-4805,3321,$615.00 ,2/17/2014
12 | Supplier Z,920-4806,3321,$615.00 ,2/24/2014
13 | 


--------------------------------------------------------------------------------
/csv/1csv_simple_parsing_and_write.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | 
 4 | input_file = sys.argv[1]
 5 | output_file = sys.argv[2]
 6 | 
 7 | with open(input_file, 'r', newline='') as filereader:
 8 | 	with open(output_file, 'w', newline='') as filewriter:
 9 | 		header = filereader.readline()
10 | 		header = header.strip()
11 | 		header_list = header.split(',')
12 | 		print(header_list)
13 | 		filewriter.write(','.join(map(str,header_list))+'\n')
14 | 		for row in filereader:
15 | 			row = row.strip()
16 | 			row_list = row.split(',')
17 | 			print(row_list)
18 | 			filewriter.write(','.join(map(str,row_list))+'\n')


--------------------------------------------------------------------------------
/csv/output_files/1output.csv:
--------------------------------------------------------------------------------
 1 | Supplier Name,Invoice Number,Part Number,Cost,Purchase Date
 2 | Supplier X,001-1001,2341,$500.00,1/20/14
 3 | Supplier X,001-1001,2341,$500.00,1/20/14
 4 | Supplier X,001-1001,5467,$750.00,1/20/14
 5 | Supplier X,001-1001,5467,$750.00,1/20/14
 6 | Supplier Y,50-9501,7009,$250.00,1/30/14
 7 | Supplier Y,50-9501,7009,$250.00,1/30/14
 8 | Supplier Y,50-9505,6650,$125.00,2/3/14
 9 | Supplier Y,50-9505,6650,$125.00,2/3/14
10 | Supplier Z,920-4803,3321,$615.00,2/3/14
11 | Supplier Z,920-4804,3321,$615.00,2/10/14
12 | Supplier Z,920-4805,3321,$615.00,2/17/14
13 | Supplier Z,920-4806,3321,$615.00,2/24/14
14 | 


--------------------------------------------------------------------------------
/csv/output_files/2output.csv:
--------------------------------------------------------------------------------
 1 | Supplier Name,Invoice Number,Part Number,Cost,Purchase Date
 2 | Supplier X,001-1001,2341,$500.00,1/20/14
 3 | Supplier X,001-1001,2341,$500.00,1/20/14
 4 | Supplier X,001-1001,5467,$750.00,1/20/14
 5 | Supplier X,001-1001,5467,$750.00,1/20/14
 6 | Supplier Y,50-9501,7009,$250.00,1/30/14
 7 | Supplier Y,50-9501,7009,$250.00,1/30/14
 8 | Supplier Y,50-9505,6650,$125.00,2/3/14
 9 | Supplier Y,50-9505,6650,$125.00,2/3/14
10 | Supplier Z,920-4803,3321,$615.00,2/3/14
11 | Supplier Z,920-4804,3321,$615.00,2/10/14
12 | Supplier Z,920-4805,3321,$615.00,2/17/14
13 | Supplier Z,920-4806,3321,$615.00,2/24/14
14 | 


--------------------------------------------------------------------------------
/database/supplier_data.csv:
--------------------------------------------------------------------------------
 1 | Supplier Name,Invoice Number,Part Number,Cost,Purchase Date
 2 | Supplier X,001-1001,2341,$500.00,1/20/14
 3 | Supplier X,001-1001,2341,$500.00,1/20/14
 4 | Supplier X,001-1001,5467,$750.00,1/20/14
 5 | Supplier X,001-1001,5467,$750.00,1/20/14
 6 | Supplier Y,50-9501,7009,$250.00,1/30/14
 7 | Supplier Y,50-9501,7009,$250.00,1/30/14
 8 | Supplier Y,50-9505,6650,$125.00,2/3/14
 9 | Supplier Y,50-9505,6650,$125.00,2/3/14
10 | Supplier Z,920-4803,3321,$615.00,2/3/14
11 | Supplier Z,920-4804,3321,$615.00,2/10/14
12 | Supplier Z,920-4805,3321,$615.00,2/17/14
13 | Supplier Z,920-4806,3321,$615.00,2/24/14
14 | 


--------------------------------------------------------------------------------
/excel/pandas_column_by_name_all_worksheets.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | data_frame = pd.read_excel(input_file, sheetname=None, index_col=None)
 9 | 
10 | column_output = []
11 | for worksheet_name, data in data_frame.items():
12 | 	column_output.append(data.loc[:, ['Customer Name', 'Sale Amount']])
13 | selected_columns = pd.concat(column_output, axis=0, ignore_index=True)
14 | 
15 | writer = pd.ExcelWriter(output_file)
16 | selected_columns.to_excel(writer, sheet_name='selected_columns_all_worksheets', index=False)
17 | writer.save()


--------------------------------------------------------------------------------
/excel/2excel_parsing_and_write.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | from xlrd import open_workbook
 4 | from xlwt import Workbook
 5 | 
 6 | input_file = sys.argv[1]
 7 | output_file = sys.argv[2]
 8 | 
 9 | output_workbook = Workbook()
10 | output_worksheet = output_workbook.add_sheet('jan_2013_output')
11 | 
12 | with open_workbook(input_file) as workbook:
13 | 	worksheet = workbook.sheet_by_name('january_2013')
14 | 	for row_index in range(worksheet.nrows):
15 | 		for column_index in range(worksheet.ncols):
16 | 			output_worksheet.write(row_index, column_index, worksheet.cell_value(row_index, column_index))
17 | output_workbook.save(output_file)


--------------------------------------------------------------------------------
/csv/output_files/11output.csv:
--------------------------------------------------------------------------------
 1 | Supplier Name,Invoice Number,Part Number,Cost,Purchase Date
 2 | Supplier X,001-1001,2341,$500.00,1/20/2014
 3 | Supplier X,001-1001,2341,$500.00,1/20/2014
 4 | Supplier X,001-1001,5467,$750.00,1/20/2014
 5 | Supplier X,001-1001,5467,$750.00,1/20/2014
 6 | Supplier Y,50-9501,7009,$250.00,1/30/2014
 7 | Supplier Y,50-9501,7009,$250.00,1/30/2014
 8 | Supplier Y,50-9505,6650,$125.00,2/3/2014
 9 | Supplier Y,50-9505,6650,$125.00,2/3/2014
10 | Supplier Z,920-4803,3321,$615.00,2/3/2014
11 | Supplier Z,920-4804,3321,$615.00,2/10/2014
12 | Supplier Z,920-4805,3321,$615.00,2/17/2014
13 | Supplier Z,920-4806,3321,$615.00,2/24/2014
14 | 


--------------------------------------------------------------------------------
/excel/pandas_value_meets_condition_all_worksheets.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | data_frame = pd.read_excel(input_file, sheetname=None, index_col=None)
 9 | 
10 | row_output = []
11 | for worksheet_name, data in data_frame.items():
12 | 	row_output.append(data[data['Sale Amount'].replace('$', '').replace(',', '').astype(float) > 2000.0])
13 | filtered_rows = pd.concat(row_output, axis=0, ignore_index=True)
14 | 
15 | writer = pd.ExcelWriter(output_file)
16 | filtered_rows.to_excel(writer, sheet_name='sale_amount_gt2000', index=False)
17 | writer.save()
18 | 


--------------------------------------------------------------------------------
/csv/output_files/pandas_output.csv:
--------------------------------------------------------------------------------
 1 | Supplier Name,Invoice Number,Part Number,Cost,Purchase Date
 2 | Supplier X,001-1001,2341,$500.00 ,1/20/2014
 3 | Supplier X,001-1001,2341,$500.00 ,1/20/2014
 4 | Supplier X,001-1001,5467,$750.00 ,1/20/2014
 5 | Supplier X,001-1001,5467,$750.00 ,1/20/2014
 6 | Supplier Y,50-9501,7009,$250.00 ,1/30/2014
 7 | Supplier Y,50-9501,7009,$250.00 ,1/30/2014
 8 | Supplier Y,50-9505,6650,$125.00 ,2/3/2014
 9 | Supplier Y,50-9505,6650,$125.00 ,2/3/2014
10 | Supplier Z,920-4803,3321,$615.00 ,2/3/2014
11 | Supplier Z,920-4804,3321,$615.00 ,2/10/2014
12 | Supplier Z,920-4805,3321,$615.00 ,2/17/2014
13 | Supplier Z,920-4806,3321,$615.00 ,2/24/2014
14 | 


--------------------------------------------------------------------------------
/csv/3csv_reader_value_meets_condition.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | with open(input_file, 'r', newline='') as csv_in_file:
 9 | 	with open(output_file, 'w', newline='') as csv_out_file:
10 | 		filereader = csv.reader(csv_in_file)
11 | 		filewriter = csv.writer(csv_out_file)
12 | 		header = next(filereader)
13 | 		filewriter.writerow(header)
14 | 		for row_list in filereader:
15 | 			supplier = str(row_list[0]).strip()
16 | 			cost = str(row_list[3]).strip('$').replace(',', '')
17 | 			if supplier == 'Supplier Z' or float(cost) > 600.0:
18 | 				filewriter.writerow(row_list)


--------------------------------------------------------------------------------
/csv/8csv_reader_counts_for_multiple_files.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import glob
 4 | import os
 5 | import sys
 6 | 
 7 | input_path = sys.argv[1]
 8 | 
 9 | file_counter = 0
10 | for input_file in glob.glob(os.path.join(input_path,'sales_*')):
11 | 	row_counter = 1
12 | 	with open(input_file, 'r', newline='') as csv_in_file:
13 | 		filereader = csv.reader(csv_in_file)
14 | 		header = next(filereader)
15 | 		for row in filereader:
16 | 			row_counter += 1
17 | 	print('{0!s}: \t{1:d} rows \t{2:d} columns'.format(\
18 | 	os.path.basename(input_file), row_counter, len(header)))
19 | 	file_counter += 1
20 | print('Number of files: {0:d}'.format(file_counter))


--------------------------------------------------------------------------------
/csv/5csv_reader_value_matches_pattern.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import re
 4 | import sys
 5 | 
 6 | input_file = sys.argv[1]
 7 | output_file = sys.argv[2]
 8 | 
 9 | pattern = re.compile(r'(?P<my_pattern_group>^001-.*)', re.I)
10 | 
11 | with open(input_file, 'r', newline='') as csv_in_file:
12 | 	with open(output_file, 'w', newline='') as csv_out_file:
13 | 		filereader = csv.reader(csv_in_file)
14 | 		filewriter = csv.writer(csv_out_file)
15 | 		header = next(filereader)
16 | 		filewriter.writerow(header)
17 | 		for row_list in filereader:
18 | 			invoice_number = row_list[1]
19 | 			if pattern.search(invoice_number):
20 | 				filewriter.writerow(row_list)


--------------------------------------------------------------------------------
/csv/output_files/12output.csv:
--------------------------------------------------------------------------------
 1 | Supplier Name,Invoice Number,Part Number,Cost,Purchase Date
 2 | Supplier X,001-1001,2341,$500.00 ,1/20/2014
 3 | Supplier X,001-1001,2341,$500.00 ,1/20/2014
 4 | Supplier X,001-1001,5467,$750.00 ,1/20/2014
 5 | Supplier X,001-1001,5467,$750.00 ,1/20/2014
 6 | Supplier Y,50-9501,7009,$250.00 ,1/30/2014
 7 | Supplier Y,50-9501,7009,$250.00 ,1/30/2014
 8 | Supplier Y,50-9505,6650,$125.00 ,2/3/2014
 9 | Supplier Y,50-9505,6650,$125.00 ,2/3/2014
10 | Supplier Z,920-4803,3321,$615.00 ,2/3/2014
11 | Supplier Z,920-4804,3321,$615.00 ,2/10/2014
12 | Supplier Z,920-4805,3321,$615.00 ,2/17/2014
13 | Supplier Z,920-4806,3321,$615.00 ,2/24/2014
14 | 


--------------------------------------------------------------------------------
/database/supplier_data_for_mysql_database.csv:
--------------------------------------------------------------------------------
 1 | Supplier Name,Invoice Number,Part Number,Cost,Purchase Date
 2 | Supplier X,001-1001,2341,500.00,2014-01-20
 3 | Supplier X,001-1001,2341,500.00,2014-01-20
 4 | Supplier X,001-1001,5467,750.00,2014-01-20
 5 | Supplier X,001-1001,5467,750.00,2014-01-20
 6 | Supplier Y,50-9501,7009,250.00,2014-01-30
 7 | Supplier Y,50-9501,7009,250.00,2014-01-30
 8 | Supplier Y,50-9505,6650,125.00,2014-02-03
 9 | Supplier Y,50-9505,6650,125.00,2014-02-03
10 | Supplier Z,920-4803,3321,615.00,2014-02-03
11 | Supplier Z,920-4804,3321,615.00,2014-02-10
12 | Supplier Z,920-4805,3321,615.00,2014-02-17
13 | Supplier Z,920-4806,3321,615.00,2014-02-24
14 | 


--------------------------------------------------------------------------------
/excel/12excel_introspect_all_workbooks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import glob
 3 | import os
 4 | import sys
 5 | from xlrd import open_workbook
 6 | 
 7 | input_directory = sys.argv[1]
 8 | 
 9 | workbook_counter = 0
10 | for input_file in glob.glob(os.path.join(input_directory, '*.xls*')):
11 | 	workbook = open_workbook(input_file)
12 | 	print('Workbook: {}'.format(os.path.basename(input_file)))
13 | 	print('Number of worksheets: {}'.format(workbook.nsheets))
14 | 	for worksheet in workbook.sheets():
15 | 		print('Worksheet name:', worksheet.name, '\tRows:',\
16 | 				worksheet.nrows, '\tColumns:', worksheet.ncols)
17 | 	workbook_counter += 1
18 | print('Number of Excel workbooks: {}'.format(workbook_counter))


--------------------------------------------------------------------------------
/excel/pandas_value_meets_condition_set_of_worksheets.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | my_sheets = [0,1]
 9 | threshold = 1900.0
10 | 
11 | data_frame = pd.read_excel(input_file, sheetname=my_sheets, index_col=None)
12 | 
13 | row_list = []
14 | for worksheet_name, data in data_frame.items():
15 | 	row_list.append(data[data['Sale Amount'].replace('$', '').replace(',', '').astype(float) > threshold])
16 | filtered_rows = pd.concat(row_list, axis=0, ignore_index=True)
17 | 
18 | writer = pd.ExcelWriter(output_file)
19 | filtered_rows.to_excel(writer, sheet_name='set_of_worksheets', index=False)
20 | writer.save()
21 | 


--------------------------------------------------------------------------------
/excel/pandas_concat_data_from_multiple_workbooks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import glob
 4 | import os
 5 | import sys
 6 | 
 7 | input_path = sys.argv[1]
 8 | output_file = sys.argv[2]
 9 | 
10 | all_workbooks = glob.glob(os.path.join(input_path,'*.xls*'))
11 | data_frames = []
12 | for workbook in all_workbooks:
13 | 	all_worksheets = pd.read_excel(workbook, sheetname=None, index_col=None)
14 | 	for worksheet_name, data in all_worksheets.items():
15 | 		data_frames.append(data)
16 | all_data_concatenated = pd.concat(data_frames, axis=0, ignore_index=True)
17 | 
18 | writer = pd.ExcelWriter(output_file)
19 | all_data_concatenated.to_excel(writer, sheet_name='all_data_all_workbooks', index=False)
20 | writer.save()


--------------------------------------------------------------------------------
/plots/matplotlib_basic_bar.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import matplotlib.pyplot as plt
 3 | plt.style.use('ggplot')
 4 | 
 5 | customers = ['ABC', 'DEF', 'GHI', 'JKL', 'MNO']
 6 | customers_index = range(len(customers))
 7 | sale_amounts = [127, 90, 201, 111, 232]
 8 | 
 9 | fig = plt.figure()
10 | ax1 = fig.add_subplot(1,1,1)
11 | ax1.bar(customers_index, sale_amounts, align='center', color='darkblue')
12 | ax1.xaxis.set_ticks_position('bottom')
13 | ax1.yaxis.set_ticks_position('left')
14 | plt.xticks(customers_index, customers, rotation=0, fontsize='small')
15 | 
16 | plt.xlabel('Customer Name')
17 | plt.ylabel('Sale Amount')
18 | plt.title('Sale Amount per Customer')
19 | 
20 | plt.savefig('bar_plot.png', dpi=400, bbox_inches='tight')
21 | plt.show()


--------------------------------------------------------------------------------
/csv/9csv_reader_concat_rows_from_multiple_files.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import glob
 4 | import os
 5 | import sys
 6 | 
 7 | input_path = sys.argv[1]
 8 | output_file = sys.argv[2]
 9 |  
10 | first_file = True
11 | for input_file in glob.glob(os.path.join(input_path,'sales_*')):
12 | 	print(os.path.basename(input_file))
13 | 	with open(input_file, 'r', newline='') as csv_in_file:
14 | 		with open(output_file, 'a', newline='') as csv_out_file:
15 | 			filereader = csv.reader(csv_in_file)
16 | 			filewriter = csv.writer(csv_out_file)
17 | 			if first_file:
18 | 				for row in filereader:
19 | 					filewriter.writerow(row)
20 | 				first_file = False
21 | 			else:
22 | 				header = next(filereader)
23 | 				for row in filereader:
24 | 					filewriter.writerow(row)


--------------------------------------------------------------------------------
/plots/matplotlib_basic_histogram.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | plt.style.use('ggplot')
 5 | 
 6 | mu1, mu2, sigma = 100, 130, 15
 7 | x1 = mu1 + sigma*np.random.randn(10000)
 8 | x2 = mu2 + sigma*np.random.randn(10000)
 9 | 
10 | fig = plt.figure()
11 | ax1 = fig.add_subplot(1,1,1)
12 | n, bins, patches = ax1.hist(x1, bins=50, normed=False, color='darkgreen')
13 | n, bins, patches = ax1.hist(x2, bins=50, normed=False, color='orange', alpha=0.5)
14 | ax1.xaxis.set_ticks_position('bottom')
15 | ax1.yaxis.set_ticks_position('left')
16 | 
17 | plt.xlabel('Bins')
18 | plt.ylabel('Number of Values in Bin')
19 | fig.suptitle('Histograms', fontsize=14, fontweight='bold')
20 | ax1.set_title('Two Frequency Distributions')
21 | 
22 | plt.savefig('histogram.png', dpi=400, bbox_inches='tight')
23 | plt.show()


--------------------------------------------------------------------------------
/database/5db_mysql_write_to_file.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import MySQLdb
 4 | import sys
 5 | 
 6 | # Path to and name of a CSV output file
 7 | output_file = sys.argv[1]
 8 | 
 9 | # Connect to a MySQL database
10 | con = MySQLdb.connect(host='localhost', port=3306, db='my_suppliers', \
11 | user='root', passwd='my_password')
12 | c = con.cursor()
13 | 
14 | # Create a file writer object and write the header row
15 | filewriter = csv.writer(open(output_file, 'w', newline=''), delimiter=',')
16 | header = ['Supplier Name','Invoice Number','Part Number','Cost','Purchase Date']
17 | filewriter.writerow(header)
18 | 
19 | # Query the Suppliers table and write the output to a CSV file
20 | c.execute("""SELECT * 
21 | 		FROM Suppliers 
22 | 		WHERE Cost > 700.0;""")
23 | rows = c.fetchall()
24 | for row in rows:
25 | 	filewriter.writerow(row)
26 | 


--------------------------------------------------------------------------------
/csv/7csv_reader_column_by_name.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | output_file = sys.argv[2]
 7 | 
 8 | my_columns = ['Invoice Number', 'Purchase Date']
 9 | my_columns_index = []
10 | 
11 | with open(input_file, 'r', newline='') as csv_in_file:
12 | 	with open(output_file, 'w', newline='') as csv_out_file:
13 | 		filereader = csv.reader(csv_in_file)
14 | 		filewriter = csv.writer(csv_out_file)
15 | 		header = next(filereader)
16 | 		for index_value in range(len(header)):
17 | 			if header[index_value] in my_columns:
18 | 				my_columns_index.append(index_value)
19 | 		filewriter.writerow(my_columns)
20 | 		for row_list in filereader:
21 | 			row_list_output = [ ]
22 | 			for index_value in my_columns_index:
23 | 				row_list_output.append(row_list[index_value])
24 | 			filewriter.writerow(row_list_output)


--------------------------------------------------------------------------------
/csv/supplier_data_unnecessary_header_footer.csv:
--------------------------------------------------------------------------------
 1 | I don't care about this row,,,,
 2 | I don't care about this row,,,,
 3 | I don't care about this row,,,,
 4 | Supplier Name,Invoice Number,Part Number,Cost,Purchase Date
 5 | Supplier X,001-1001,2341,$500.00 ,1/20/2014
 6 | Supplier X,001-1001,2341,$500.00 ,1/20/2014
 7 | Supplier X,001-1001,5467,$750.00 ,1/20/2014
 8 | Supplier X,001-1001,5467,$750.00 ,1/20/2014
 9 | Supplier Y,50-9501,7009,$250.00 ,1/30/2014
10 | Supplier Y,50-9501,7009,$250.00 ,1/30/2014
11 | Supplier Y,50-9505,6650,$125.00 ,2/3/2014
12 | Supplier Y,50-9505,6650,$125.00 ,2/3/2014
13 | Supplier Z,920-4803,3321,$615.00 ,2/3/2014
14 | Supplier Z,920-4804,3321,$615.00 ,2/10/2014
15 | Supplier Z,920-4805,3321,$615.00 ,2/17/2014
16 | Supplier Z,920-4806,3321,$615.00 ,2/24/2014
17 | I don't want this row either,,,,
18 | I don't want this row either,,,,
19 | I don't want this row either,,,,
20 | 


--------------------------------------------------------------------------------
/plots/matplotlib_basic_scatter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | plt.style.use('ggplot')
 5 | 
 6 | x = np.arange(start=1., stop=15., step=1.)
 7 | y_linear = x + 5. * np.random.randn(14.)
 8 | y_quadratic = x**2 + 10. * np.random.randn(14.)
 9 | 
10 | fn_linear = np.poly1d(np.polyfit(x, y_linear, deg=1))
11 | fn_quadratic = np.poly1d(np.polyfit(x, y_quadratic, deg=2))
12 | 
13 | fig = plt.figure()
14 | ax1 = fig.add_subplot(1,1,1)
15 | ax1.plot(x, y_linear, 'bo', x, y_quadratic, 'go', \
16 | 			x, fn_linear(x), 'b-', x, fn_quadratic(x), 'g-', linewidth=2.)
17 | ax1.xaxis.set_ticks_position('bottom')
18 | ax1.yaxis.set_ticks_position('left')
19 | 
20 | ax1.set_title('Scatter Plots with Best Fit Lines')
21 | plt.xlabel('x')
22 | plt.ylabel('f(x)')
23 | plt.xlim((min(x)-1., max(x)+1.))
24 | plt.ylim((min(y_quadratic)-10., max(y_quadratic)+10.))
25 | 
26 | plt.savefig('scatter_plot.png', dpi=400, bbox_inches='tight')
27 | plt.show()


--------------------------------------------------------------------------------
/csv/output_files/9output.csv:
--------------------------------------------------------------------------------
 1 | Customer ID,Customer Name,Invoice Number,Sale Amount,Purchase Date
 2 | 9876,Daniel Farber,100-0008,"$1,115.00",2/2/14
 3 | 8765,Laney Stone,100-0009,"$1,367.00",2/8/14
 4 | 7654,Roger Lipney,100-0010,"$2,135.00",2/15/14
 5 | 6543,Thomas Haines,100-0011,"$1,346.00",2/17/14
 6 | 5432,Anushka Vaz,100-0012,"$1,560.00",2/21/14
 7 | 4321,Harriet Cooper,100-0013,"$1,852.00",2/25/14
 8 | 1234,John Smith,100-0002,"$1,200.00",1/1/14
 9 | 2345,Mary Harrison,100-0003,"$1,425.00",1/6/14
10 | 3456,Lucy Gomez,100-0004,"$1,390.00",1/11/14
11 | 4567,Rupert Jones,100-0005,"$1,257.00",1/18/14
12 | 5678,Jenny Walters,100-0006,"$1,725.00",1/24/14
13 | 6789,Samantha Donaldson,100-0007,"$1,995.00",1/31/14
14 | 1234,John Smith,100-0014,"$1,350.00",3/4/14
15 | 8765,Tony Song,100-0015,"$1,167.00",3/8/14
16 | 2345,Mary Harrison,100-0016,"$1,789.00",3/17/14
17 | 6543,Rachel Paz,100-0017,"$2,042.00",3/22/14
18 | 3456,Lucy Gomez,100-0018,"$1,511.00",3/28/14
19 | 4321,Susan Wallace,100-0019,"$2,280.00",3/30/14
20 | 


--------------------------------------------------------------------------------
/plots/matplotlib_basic_line.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from numpy.random import randn
 3 | import matplotlib.pyplot as plt
 4 | plt.style.use('ggplot')
 5 | 
 6 | plot_data1 = randn(50).cumsum()
 7 | plot_data2 = randn(50).cumsum()
 8 | plot_data3 = randn(50).cumsum()
 9 | plot_data4 = randn(50).cumsum()
10 | 
11 | fig = plt.figure()
12 | ax1 = fig.add_subplot(1,1,1)
13 | ax1.plot(plot_data1, marker=r'o', color=u'blue', linestyle='-', label='Blue Solid')
14 | ax1.plot(plot_data2, marker=r'+', color=u'red', linestyle='--', label='Red Dashed')
15 | ax1.plot(plot_data3, marker=r'*', color=u'green', linestyle='-.', label='Green Dash Dot')
16 | ax1.plot(plot_data4, marker=r's', color=u'orange', linestyle=':', label='Orange Dotted')
17 | ax1.xaxis.set_ticks_position('bottom')
18 | ax1.yaxis.set_ticks_position('left')
19 | 
20 | ax1.set_title('Line Plots: Markers, Colors, and Linestyles')
21 | plt.xlabel('Draw')
22 | plt.ylabel('Random Number')
23 | plt.legend(loc='best')
24 | 
25 | plt.savefig('line_plot.png', dpi=400, bbox_inches='tight')
26 | plt.show()


--------------------------------------------------------------------------------
/plots/matplotlib_basic_boxplot.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | plt.style.use('ggplot')
 5 | 
 6 | N = 500
 7 | normal = np.random.normal(loc=0.0, scale=1.0, size=N)
 8 | lognormal = np.random.lognormal(mean=0.0, sigma=1.0, size=N)
 9 | index_value = np.random.random_integers(low=0, high=N-1, size=N)
10 | normal_sample = normal[index_value]
11 | lognormal_sample = lognormal[index_value]
12 | box_plot_data = [normal,normal_sample,lognormal,lognormal_sample]
13 | 
14 | fig = plt.figure()
15 | ax1 = fig.add_subplot(1,1,1)
16 | 
17 | box_labels = ['normal','normal_sample','lognormal','lognormal_sample']
18 | ax1.boxplot(box_plot_data, notch=False, sym='.', vert=True, whis=1.5, \
19 | 				showmeans=True, labels=box_labels)
20 | ax1.xaxis.set_ticks_position('bottom')
21 | ax1.yaxis.set_ticks_position('left')
22 | ax1.set_title('Box Plots: Resampling of Two Distributions')
23 | ax1.set_xlabel('Distribution')
24 | ax1.set_ylabel('Value')
25 | 
26 | plt.savefig('box_plot.png', dpi=400, bbox_inches='tight')
27 | plt.show()


--------------------------------------------------------------------------------
/database/6db_mysql_update_from_csv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import MySQLdb
 4 | import sys
 5 | 
 6 | # Path to and name of a CSV input file
 7 | input_file = sys.argv[1]
 8 | 
 9 | # Connect to a MySQL database
10 | con = MySQLdb.connect(host='localhost', port=3306, db='my_suppliers', \
11 | user='root', passwd='my_password')
12 | c = con.cursor()
13 | 	
14 | # Read the CSV file and update the specific rows
15 | file_reader = csv.reader(open(input_file, 'r', newline=''), delimiter=',')
16 | header = next(file_reader, None)
17 | for row in file_reader:
18 | 	data = []
19 | 	for column_index in range(len(header)):
20 | 		data.append(str(row[column_index]).strip())
21 | 	print(data)
22 | 	c.execute("""UPDATE Suppliers SET Cost=%s, Purchase_Date=%s WHERE Supplier_Name=%s;""", data)
23 | con.commit()
24 | 
25 | # Query the Suppliers table
26 | c.execute("SELECT * FROM Suppliers")
27 | rows = c.fetchall()
28 | for row in rows:
29 | 	output = []
30 | 	for column_index in range(len(row)):
31 | 		output.append(str(row[column_index]))
32 | 	print(output)
33 | 


--------------------------------------------------------------------------------
/csv/pandas_sum_average_from_multiple_files.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import glob
 4 | import os
 5 | import sys
 6 | 
 7 | input_path = sys.argv[1]
 8 | output_file = sys.argv[2]
 9 | 
10 | all_files = glob.glob(os.path.join(input_path,'sales_*'))
11 | all_data_frames = []
12 | for input_file in all_files:
13 | 	data_frame = pd.read_csv(input_file, index_col=None)
14 | 	
15 | 	total_sales = pd.DataFrame([float(str(value).strip('$').replace(',','')) \
16 | 						for value in data_frame.loc[:, 'Sale Amount']]).sum()
17 | 	
18 | 	average_sales = pd.DataFrame([float(str(value).strip('$').replace(',','')) \
19 | 						for value in data_frame.loc[:, 'Sale Amount']]).mean()
20 | 
21 | 	data = {'file_name': os.path.basename(input_file),
22 | 			'total_sales': total_sales,
23 | 			'average_sales': average_sales}
24 | 	
25 | 	all_data_frames.append(pd.DataFrame(data, columns=['file_name', 'total_sales', 'average_sales']))
26 | 
27 | data_frames_concat = pd.concat(all_data_frames, axis=0, ignore_index=True)
28 | 
29 | data_frames_concat.to_csv(output_file, index = False)


--------------------------------------------------------------------------------
/database/1db_count_rows.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sqlite3
 3 | 
 4 | # Create an in-memory SQLite3 database
 5 | # Create a table called sales with four attributes
 6 | con = sqlite3.connect(':memory:')
 7 | query = """CREATE TABLE sales
 8 | 			(customer VARCHAR(20), 
 9 | 			 product VARCHAR(40),
10 | 			 amount FLOAT,
11 | 			 date DATE);"""
12 | con.execute(query)
13 | con.commit()
14 | 
15 | # Insert a few rows of data into the table
16 | data = [('Richard Lucas', 'Notepad', 2.50, '2014-01-02'),
17 | 		('Jenny Kim', 'Binder', 4.15, '2014-01-15'),
18 | 		('Svetlana Crow', 'Printer', 155.75, '2014-02-03'),
19 | 		('Stephen Randolph', 'Computer', 679.40, '2014-02-20')]
20 | statement = "INSERT INTO sales VALUES(?, ?, ?, ?)"
21 | con.executemany(statement, data)
22 | con.commit()
23 | 
24 | # Query the sales table
25 | cursor = con.execute("SELECT * FROM sales")
26 | rows = cursor.fetchall()
27 | 
28 | # Count the number of rows in the output
29 | row_counter = 0
30 | for row in rows:
31 | 	print(row)
32 | 	row_counter += 1
33 | print('Number of rows: {}'.format(row_counter))
34 | 


--------------------------------------------------------------------------------
/plots/ggplot_plots.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from ggplot import *
 3 | """
 4 | print(mtcars.head())
 5 | plt1 = ggplot(aes(x='mpg'), data=mtcars) +\
 6 |  		geom_histogram(fill='darkblue', binwidth=2) +\
 7 | 		xlim(10, 35) + ylim(0, 10) +\
 8 | 		xlab("MPG") + ylab("Frequency") +\
 9 | 		ggtitle("Histogram of MPG") +\
10 | 		theme_matplotlib()
11 | print(plt1)
12 | 
13 | print(meat.head())
14 | plt2 = ggplot(aes(x='date', y='beef'), data=meat) +\
15 | 		geom_line(color='purple', size=1.5, alpha=0.75) +\
16 | 		stat_smooth(colour='blue', size=2.0, span=0.15) +\
17 | 		xlab("Year") + ylab("Head of Cattle Slaughtered") +\
18 | 		ggtitle("Beef Consumption Over Time") +\
19 | 		theme_seaborn()
20 | print(plt2)
21 | """
22 | print(diamonds.head())
23 | plt3 = ggplot(diamonds, aes(x='carat', y='price', colour='cut')) +\
24 | 		geom_point(alpha=0.5) +\
25 | 		scale_color_gradient(low='#05D9F6', high='#5011D1') +\
26 | 		xlim(0, 6) + ylim(0, 20000) +\
27 | 		xlab("Carat") + ylab("Price") +\
28 | 		ggtitle("Diamond Price by Carat and Cut") +\
29 | 		theme_gray()
30 | print(plt3)
31 | 
32 | ggsave(plt3, "ggplot_plots.png")


--------------------------------------------------------------------------------
/csv/10csv_reader_sum_average_from_multiple_files.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import glob
 4 | import os
 5 | import string
 6 | import sys
 7 | 
 8 | input_path = sys.argv[1]
 9 | output_file = sys.argv[2]
10 | 
11 | output_header_list = ['file_name', 'total_sales', 'average_sales']
12 | 
13 | csv_out_file = open(output_file, 'a', newline='')
14 | filewriter = csv.writer(csv_out_file)
15 | filewriter.writerow(output_header_list)
16 | 
17 | for input_file in glob.glob(os.path.join(input_path,'sales_*')):
18 | 	with open(input_file, 'r', newline='') as csv_in_file:
19 | 		filereader = csv.reader(csv_in_file)
20 | 		output_list = [ ]
21 | 		output_list.append(os.path.basename(input_file))
22 | 		header = next(filereader)
23 | 		total_sales = 0.0
24 | 		number_of_sales = 0.0
25 | 		for row in filereader:
26 | 			sale_amount = row[3]
27 | 			total_sales += float(str(sale_amount).strip('$').replace(',',''))
28 | 			number_of_sales += 1.0
29 | 		average_sales = '{0:.2f}'.format(total_sales / number_of_sales)
30 | 		output_list.append(total_sales)
31 | 		output_list.append(average_sales)
32 | 		filewriter.writerow(output_list)
33 | csv_out_file.close()
34 | 


--------------------------------------------------------------------------------
/excel/3excel_parsing_and_write_keep_dates.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | from datetime import date
 4 | from xlrd import open_workbook, xldate_as_tuple
 5 | from xlwt import Workbook
 6 | 
 7 | input_file = sys.argv[1]
 8 | output_file = sys.argv[2]
 9 | 
10 | output_workbook = Workbook()
11 | output_worksheet = output_workbook.add_sheet('jan_2013_output')
12 | 
13 | with open_workbook(input_file) as workbook:
14 | 	worksheet = workbook.sheet_by_name('january_2013')
15 | 	for row_index in range(worksheet.nrows):
16 | 		row_list_output = []
17 | 		for col_index in range(worksheet.ncols):
18 | 			if worksheet.cell_type(row_index, col_index) == 3:
19 | 				date_cell = xldate_as_tuple(worksheet.cell_value\
20 | 					(row_index, col_index),workbook.datemode)
21 | 				date_cell = date(*date_cell[0:3]).strftime\
22 | 					('%m/%d/%Y')
23 | 				row_list_output.append(date_cell)
24 | 				output_worksheet.write(row_index, col_index, date_cell)
25 | 			else:
26 | 				non_date_cell = worksheet.cell_value\
27 | 					(row_index,col_index)
28 | 				row_list_output.append(non_date_cell)
29 | 				output_worksheet.write(row_index, col_index,\
30 | 					non_date_cell)
31 | output_workbook.save(output_file)


--------------------------------------------------------------------------------
/applications/3parse_text_file.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | 
 4 | input_file = sys.argv[1]
 5 | output_file = sys.argv[2]
 6 | 
 7 | messages = {}
 8 | notes = []
 9 | with open(input_file, 'r', newline='') as text_file:
10 | 	for row in text_file:
11 | 		if '[Note]' in row:
12 | 			row_list = row.split(' ', 4)
13 | 			day = row_list[0].strip()
14 | 			note = row_list[4].strip('\n').strip()
15 | 			if note not in notes:
16 | 				notes.append(note)
17 | 			if day not in messages:
18 | 				messages[day] = {}
19 | 			if note not in messages[day]:
20 | 				messages[day][note] = 1
21 | 			else:
22 | 				messages[day][note] += 1
23 | 
24 | filewriter = open(output_file, 'w', newline='')
25 | header = ['Date']
26 | header.extend(notes)
27 | header = ','.join(map(str,header)) + '\n'
28 | print(header)
29 | filewriter.write(header)
30 | for day, day_value in messages.items():
31 | 	row_of_output = []
32 | 	row_of_output.append(day)	
33 | 	for index in range(len(notes)):
34 | 		if notes[index] in day_value.keys():
35 | 			row_of_output.append(day_value[notes[index]])
36 | 		else:
37 | 			row_of_output.append(0)
38 | 	output = ','.join(map(str,row_of_output)) + '\n'
39 | 	print(output)
40 | 	filewriter.write(output)
41 | filewriter.close()


--------------------------------------------------------------------------------
/excel/7excel_column_by_index.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | from datetime import date
 4 | from xlrd import open_workbook, xldate_as_tuple
 5 | from xlwt import Workbook
 6 | 
 7 | input_file = sys.argv[1]
 8 | output_file = sys.argv[2]
 9 | 
10 | output_workbook = Workbook()
11 | output_worksheet = output_workbook.add_sheet('jan_2013_output')
12 | 
13 | my_columns = [1, 4]
14 | 
15 | with open_workbook(input_file) as workbook:
16 | 	worksheet = workbook.sheet_by_name('january_2013')
17 | 	data = []
18 | 	for row_index in range(worksheet.nrows):
19 | 		row_list = []
20 | 		for column_index in my_columns:
21 | 			cell_value = worksheet.cell_value(row_index,column_index)
22 | 			cell_type = worksheet.cell_type(row_index, column_index)
23 | 			if cell_type == 3:
24 | 				date_cell = xldate_as_tuple(cell_value,workbook.datemode)
25 | 				date_cell = date(*date_cell[0:3]).strftime('%m/%d/%Y')
26 | 				row_list.append(date_cell)
27 | 			else:
28 | 				row_list.append(cell_value)
29 | 		data.append(row_list)
30 | 
31 | 	for list_index, output_list in enumerate(data):
32 | 		for element_index, element in enumerate(output_list):
33 | 			output_worksheet.write(list_index, element_index, element)
34 | 
35 | output_workbook.save(output_file)


--------------------------------------------------------------------------------
/database/2db_insert_rows.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import sqlite3
 4 | import sys
 5 | 
 6 | # Path to and name of a CSV input file
 7 | input_file = sys.argv[1]
 8 | 
 9 | # Create an in-memory SQLite3 database
10 | # Create a table called Suppliers with five attributes
11 | con = sqlite3.connect('Suppliers.db')
12 | c = con.cursor()
13 | create_table = """CREATE TABLE IF NOT EXISTS Suppliers
14 | 				(Supplier_Name VARCHAR(20), 
15 | 				Invoice_Number VARCHAR(20),
16 | 				Part_Number VARCHAR(20),
17 | 				Cost FLOAT,
18 | 				Purchase_Date DATE);"""
19 | c.execute(create_table)
20 | con.commit()
21 | 
22 | # Read the CSV file
23 | # Insert the data into the Suppliers table
24 | file_reader = csv.reader(open(input_file, 'r'), delimiter=',')
25 | header = next(file_reader, None)
26 | for row in file_reader:
27 | 	data = []
28 | 	for column_index in range(len(header)):
29 | 		data.append(row[column_index])
30 | 	print(data)
31 | 	c.execute("INSERT INTO Suppliers VALUES (?, ?, ?, ?, ?);", data)
32 | con.commit()
33 | 
34 | # Query the Suppliers table
35 | output = c.execute("SELECT * FROM Suppliers")
36 | rows = output.fetchall()
37 | for row in rows:
38 | 	output = []
39 | 	for column_index in range(len(row)):
40 | 		output.append(str(row[column_index]))
41 | 	print(output)
42 | 


--------------------------------------------------------------------------------
/plots/pandas_plots.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | plt.style.use('ggplot')
 6 | 
 7 | fig, axes = plt.subplots(nrows=1, ncols=2)
 8 | ax1, ax2 = axes.ravel()
 9 | 
10 | data_frame = pd.DataFrame(np.random.rand(5, 3),
11 | 						index=['Customer 1', 'Customer 2', 'Customer 3', 'Customer 4', 'Customer 5'],
12 | 						columns=pd.Index(['Metric 1', 'Metric 2', 'Metric 3'], name='Metrics'))
13 | 
14 | data_frame.plot(kind='bar', ax=ax1, alpha=0.75, title='Bar Plot')
15 | plt.setp(ax1.get_xticklabels(), rotation=45, fontsize=10)
16 | plt.setp(ax1.get_yticklabels(), rotation=0, fontsize=10)
17 | ax1.set_xlabel('Customer')
18 | ax1.set_ylabel('Value')
19 | ax1.xaxis.set_ticks_position('bottom')
20 | ax1.yaxis.set_ticks_position('left')
21 | 
22 | colors = dict(boxes='DarkBlue', whiskers='Gray', medians='Red', caps='Black')
23 | data_frame.plot(kind='box', color=colors, sym='r.', ax=ax2, title='Box Plot')
24 | plt.setp(ax2.get_xticklabels(), rotation=45, fontsize=10)
25 | plt.setp(ax2.get_yticklabels(), rotation=0, fontsize=10)
26 | ax2.set_xlabel('Metric')
27 | ax2.set_ylabel('Value')
28 | ax2.xaxis.set_ticks_position('bottom')
29 | ax2.yaxis.set_ticks_position('left')
30 | 
31 | plt.savefig('pandas_plots.png', dpi=400, bbox_inches='tight')
32 | plt.show()


--------------------------------------------------------------------------------
/applications/customer_category_history.csv:
--------------------------------------------------------------------------------
 1 | Customer Name,Category,Price,Date,,,,
 2 | John Smith,Bronze,$20.00,1/22/2014,,,,
 3 | John Smith,Bronze,$25.00,3/15/2014,,,,
 4 | John Smith,Silver,$30.00,4/2/2014,,,,
 5 | John Smith,Gold,$40.00,5/11/2014,,,,
 6 | John Smith,Gold,$45.00,7/13/2014,,,,
 7 | Mary Yu,Silver,$30.00,2/3/2014,,,,
 8 | Mary Yu,Gold,$40.00,4/16/2014,,,,
 9 | Mary Yu,Gold,$45.00,6/23/2014,,,,
10 | Wayne Thompson,Bronze,$20.00,1/13/2014,,,,
11 | Wayne Thompson,Bronze,$25.00,3/24/2014,,,,
12 | Wayne Thompson,Bronze,$30.00,5/21/2014,,,,
13 | Wayne Thompson,Silver,$30.00,6/29/2014,,,,
14 | Bruce Johnson,Bronze,$20.00,2/9/2014,,,,
15 | Bruce Johnson,Bronze,$25.00,3/22/2014,,,,
16 | Bruce Johnson,Silver,$30.00,4/27/2014,,,,
17 | Bruce Johnson,Silver,$35.00,5/8/2014,,,,
18 | Bruce Johnson,Gold,$40.00,6/26/2014,,,,
19 | Bruce Johnson,Gold,$45.00,7/21/2014,,,,
20 | Annie Lee,Bronze,$20.00,3/16/2014,,,,
21 | Annie Lee,Silver,$30.00,4/11/2014,,,,
22 | Annie Lee,Gold,$40.00,5/25/2014,,,,
23 | Annie Lee,Gold,$45.00,7/14/2014,,,,
24 | Annie Lee,Gold,$50.00,7/21/2014,,,,
25 | Priya Patel,Silver,$30.00,1/19/2014,,,,
26 | Priya Patel,Silver,$35.00,2/28/2014,,,,
27 | Priya Patel,Silver,$40.00,3/26/2014,,,,
28 | Priya Patel,Gold,$40.00,4/28/2014,,,,
29 | Priya Patel,Gold,$45.00,5/12/2014,,,,
30 | Priya Patel,Gold,$50.00,6/21/2014,,,,
31 | 


--------------------------------------------------------------------------------
/database/4db_mysql_load_from_csv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import MySQLdb
 4 | import sys
 5 | from datetime import datetime, date
 6 | 
 7 | # Path to and name of a CSV input file
 8 | input_file = sys.argv[1]
 9 | 
10 | # Connect to a MySQL database
11 | con = MySQLdb.connect(host='localhost', port=3306, db='my_suppliers', user='python_training', passwd='python_training')
12 | c = con.cursor()
13 | 
14 | # Read the CSV file
15 | # Insert the data into the Suppliers table
16 | file_reader = csv.reader(open(input_file, 'r'), delimiter=',')
17 | header = next(file_reader)
18 | for row in file_reader:
19 | 	data = []
20 | 	for column_index in range(len(header)):
21 | 		if column_index < 4:
22 | 			data.append(str(row[column_index]).lstrip('$')\
23 | 			.replace(',', '').strip())
24 | 		else:
25 | 			a_date = datetime.date(datetime.strptime(\
26 | 			str(row[column_index]), '%m/%d/%Y'))
27 | 			# %Y: year is 2016; %y: year is 15
28 | 			a_date = a_date.strftime('%Y-%m-%d')
29 | 			data.append(a_date)
30 | 	print(data)
31 | 	c.execute("""INSERT INTO Suppliers VALUES (%s, %s, %s, %s, %s);""", data)
32 | con.commit()
33 | 
34 | # Query the Suppliers table
35 | c.execute("SELECT * FROM Suppliers")
36 | rows = c.fetchall()
37 | for row in rows:
38 | 	row_list_output = []
39 | 	for column_index in range(len(row)):
40 | 		row_list_output.append(str(row[column_index]))
41 | 	print(row_list_output)
42 | 


--------------------------------------------------------------------------------
/excel/4excel_value_meets_condition.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | from datetime import date
 4 | from xlrd import open_workbook, xldate_as_tuple
 5 | from xlwt import Workbook
 6 | 
 7 | input_file = sys.argv[1]
 8 | output_file = sys.argv[2]
 9 | 
10 | output_workbook = Workbook()
11 | output_worksheet = output_workbook.add_sheet('jan_2013_output')
12 | 
13 | sale_amount_column_index = 3
14 | with open_workbook(input_file) as workbook:
15 | 	worksheet = workbook.sheet_by_name('january_2013')
16 | 	data = []
17 | 	header = worksheet.row_values(0)
18 | 	data.append(header)
19 | 	for row_index in range(1,worksheet.nrows):
20 | 			row_list = []
21 | 			sale_amount = worksheet.cell_value(row_index, sale_amount_column_index)
22 | 			if sale_amount > 1400.0:
23 | 				for column_index in range(worksheet.ncols):
24 | 					cell_value = worksheet.cell_value(row_index,column_index)
25 | 					cell_type = worksheet.cell_type(row_index, column_index)
26 | 					if cell_type == 3:
27 | 						date_cell = xldate_as_tuple(cell_value,workbook.datemode)
28 | 						date_cell = date(*date_cell[0:3]).strftime('%m/%d/%Y')
29 | 						row_list.append(date_cell)
30 | 					else:
31 | 						row_list.append(cell_value)
32 | 			if row_list:
33 | 				data.append(row_list)
34 | 
35 | 	for list_index, output_list in enumerate(data):
36 | 		for element_index, element in enumerate(output_list):
37 | 			output_worksheet.write(list_index, element_index, element)
38 | 
39 | output_workbook.save(output_file)


--------------------------------------------------------------------------------
/excel/8excel_column_by_name.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | from datetime import date
 4 | from xlrd import open_workbook, xldate_as_tuple
 5 | from xlwt import Workbook
 6 | 
 7 | input_file = sys.argv[1]
 8 | output_file = sys.argv[2]
 9 | 
10 | output_workbook = Workbook()
11 | output_worksheet = output_workbook.add_sheet('jan_2013_output')
12 | 
13 | my_columns = ['Customer ID', 'Purchase Date']
14 | 
15 | with open_workbook(input_file) as workbook:
16 | 	worksheet = workbook.sheet_by_name('january_2013')
17 | 	data = [my_columns]
18 | 	header_list = worksheet.row_values(0)
19 | 	header_index_list = []
20 | 	for header_index in range(len(header_list)):
21 | 		if header_list[header_index] in my_columns:
22 | 			header_index_list.append(header_index)
23 | 	for row_index in range(1,worksheet.nrows):
24 | 		row_list = []
25 | 		for column_index in header_index_list:
26 | 			cell_value = worksheet.cell_value(row_index,column_index)
27 | 			cell_type = worksheet.cell_type(row_index, column_index)
28 | 			if cell_type == 3:
29 | 				date_cell = xldate_as_tuple(cell_value,workbook.datemode)
30 | 				date_cell = date(*date_cell[0:3]).strftime('%m/%d/%Y')
31 | 				row_list.append(date_cell)
32 | 			else:
33 | 				row_list.append(cell_value)
34 | 		data.append(row_list)
35 | 
36 | 	for list_index, output_list in enumerate(data):
37 | 		for element_index, element in enumerate(output_list):
38 | 			output_worksheet.write(list_index, element_index, element)
39 | 
40 | output_workbook.save(output_file)


--------------------------------------------------------------------------------
/excel/6excel_value_matches_pattern.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import re
 3 | import sys
 4 | from datetime import date
 5 | from xlrd import open_workbook, xldate_as_tuple
 6 | from xlwt import Workbook
 7 | 
 8 | input_file = sys.argv[1]
 9 | output_file = sys.argv[2]
10 | 
11 | output_workbook = Workbook()
12 | output_worksheet = output_workbook.add_sheet('jan_2013_output')
13 | 
14 | pattern = re.compile(r'(?P<my_pattern>^J.*)')
15 | 
16 | customer_name_column_index = 1
17 | with open_workbook(input_file) as workbook:
18 | 	worksheet = workbook.sheet_by_name('january_2013')
19 | 	data = []
20 | 	header = worksheet.row_values(0)
21 | 	data.append(header)
22 | 	for row_index in range(1, worksheet.nrows):		
23 | 		row_list = []
24 | 		if pattern.search(worksheet.cell_value(row_index, customer_name_column_index)):
25 | 			for column_index in range(worksheet.ncols):
26 | 				cell_value = worksheet.cell_value(row_index,column_index)
27 | 				cell_type = worksheet.cell_type(row_index, column_index)
28 | 				if cell_type == 3:
29 | 					date_cell = xldate_as_tuple(cell_value,workbook.datemode)
30 | 					date_cell = date(*date_cell[0:3]).strftime('%m/%d/%Y')
31 | 					row_list.append(date_cell)
32 | 				else:
33 | 					row_list.append(cell_value)
34 | 		if row_list:
35 | 			data.append(row_list)
36 | 
37 | 	for list_index, output_list in enumerate(data):
38 | 		for element_index, element in enumerate(output_list):
39 | 			output_worksheet.write(list_index, element_index, element)
40 | 
41 | output_workbook.save(output_file)


--------------------------------------------------------------------------------
/applications/3parse_text_file_skip_first_space.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import string
 3 | import sys
 4 | 
 5 | input_file = sys.argv[1]
 6 | #output_file = sys.argv[2]
 7 | 
 8 | messages = {}
 9 | notes = []
10 | with open(input_file, 'rU') as text_file:
11 | 	for row in text_file:
12 | 		if '[Note]' in row:
13 | 			n = 2
14 | 			groups = row.split(' ')
15 | 			date_time = ' '.join(groups[:n])
16 | 			rest_of_line_string = ' '.join(groups[n:])
17 | 			rest_of_line_list = rest_of_line_string.split(' ', 2)
18 | 			note = rest_of_line_list[2].strip('\n').strip()
19 | 			row_list = []
20 | 			row_list.append(date_time)
21 | 			row_list.append(note)
22 | 			print row_list
23 | 			
24 | 			day = row_list[0]
25 | 			note = row_list[1]
26 | 			if note not in notes:
27 | 				notes.append(note)
28 | 			if day not in messages:
29 | 				messages[day] = {}
30 | 			if note not in messages[day]:
31 | 				messages[day][note] = 1
32 | 			else:
33 | 				messages[day][note] += 1
34 | 
35 | #filewriter = open(output_file, 'wb')
36 | header = ['Date']
37 | header.extend(notes)
38 | header = ','.join(map(str,header)) + '\n'
39 | print header
40 | #filewriter.write(header)
41 | for day, day_value in messages.items():
42 | 	row_of_output = []
43 | 	row_of_output.append(day)	
44 | 	for index in range(len(notes)):
45 | 		if notes[index] in day_value.keys():
46 | 			row_of_output.append(day_value[notes[index]])
47 | 		else:
48 | 			row_of_output.append(0)
49 | 	output = ','.join(map(str,row_of_output)) + '\n'
50 | 	print output
51 | 	#filewriter.write(output)
52 | #filewriter.close()


--------------------------------------------------------------------------------
/excel/13excel_concat_data_from_multiple_workbooks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import glob
 3 | import os
 4 | import sys
 5 | from datetime import date
 6 | from xlrd import open_workbook, xldate_as_tuple
 7 | from xlwt import Workbook
 8 | 
 9 | input_folder = sys.argv[1]
10 | output_file = sys.argv[2]
11 | 
12 | output_workbook = Workbook()
13 | output_worksheet = output_workbook.add_sheet('all_data_all_workbooks')
14 | 
15 | data = []
16 | first_worksheet = True
17 | for input_file in glob.glob(os.path.join(input_folder, '*.xls*')):
18 | 	print os.path.basename(input_file)
19 | 	with open_workbook(input_file) as workbook:
20 | 		for worksheet in workbook.sheets():
21 | 			if first_worksheet:
22 | 				header_row = worksheet.row_values(0)
23 | 				data.append(header_row)
24 | 				first_worksheet = False
25 | 			for row_index in range(1,worksheet.nrows):
26 | 				row_list = []
27 | 				for column_index in range(worksheet.ncols):
28 | 					cell_value = worksheet.cell_value(row_index,column_index)
29 | 					cell_type = worksheet.cell_type(row_index, column_index)
30 | 					if cell_type == 3:
31 | 						date_cell = xldate_as_tuple(cell_value,workbook.datemode)
32 | 						date_cell = date(*date_cell[0:3]).strftime('%m/%d/%Y')
33 | 						row_list.append(date_cell)
34 | 					else:
35 | 						row_list.append(cell_value)
36 | 				data.append(row_list)
37 | 
38 | for list_index, output_list in enumerate(data):
39 | 	for element_index, element in enumerate(output_list):
40 | 		output_worksheet.write(list_index, element_index, element)
41 | 
42 | output_workbook.save(output_file)


--------------------------------------------------------------------------------
/database/3db_update_rows.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import sqlite3
 4 | import sys
 5 | 
 6 | # Path to and name of a CSV input file
 7 | input_file = sys.argv[1]
 8 | 
 9 | # Create an in-memory SQLite3 database
10 | # Create a table called sales with four attributes
11 | con = sqlite3.connect(':memory:')
12 | query = """CREATE TABLE IF NOT EXISTS sales
13 | 			(customer VARCHAR(20), 
14 | 				product VARCHAR(40),
15 | 				amount FLOAT,
16 | 				date DATE);"""
17 | con.execute(query)
18 | con.commit()
19 | 
20 | # Insert a few rows of data into the table
21 | data = [('Richard Lucas', 'Notepad', 2.50, '2014-01-02'),
22 | 		('Jenny Kim', 'Binder', 4.15, '2014-01-15'),
23 | 		('Svetlana Crow', 'Printer', 155.75, '2014-02-03'),
24 | 		('Stephen Randolph', 'Computer', 679.40, '2014-02-20')]
25 | for tuple in data:
26 | 	print(tuple)
27 | statement = "INSERT INTO sales VALUES(?, ?, ?, ?)"
28 | con.executemany(statement, data)
29 | con.commit()
30 | 	
31 | # Read the CSV file and update the specific rows
32 | file_reader = csv.reader(open(input_file, 'r'), delimiter=',')
33 | header = next(file_reader, None)
34 | for row in file_reader:
35 | 	data = []
36 | 	for column_index in range(len(header)):
37 | 		data.append(row[column_index])
38 | 	print(data)
39 | 	con.execute("UPDATE sales SET amount=?, date=? WHERE customer=?;", data)
40 | con.commit()
41 | 
42 | # Query the sales table
43 | cursor = con.execute("SELECT * FROM sales")
44 | rows = cursor.fetchall()
45 | for row in rows:
46 | 	output = []
47 | 	for column_index in range(len(row)):
48 | 		output.append(str(row[column_index]))
49 | 	print(output)


--------------------------------------------------------------------------------
/excel/10excel_column_by_name_all_worksheets.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | from datetime import date
 4 | from xlrd import open_workbook, xldate_as_tuple
 5 | from xlwt import Workbook
 6 | 
 7 | input_file = sys.argv[1]
 8 | output_file = sys.argv[2]
 9 | 
10 | output_workbook = Workbook()
11 | output_worksheet = output_workbook.add_sheet('selected_columns_all_worksheets')
12 | 
13 | my_columns = ['Customer Name', 'Sale Amount']
14 | 	
15 | first_worksheet = True
16 | with open_workbook(input_file) as workbook:
17 | 	data = [my_columns]
18 | 	index_of_cols_to_keep = []
19 | 	for worksheet in workbook.sheets():
20 | 		if first_worksheet:
21 | 			header = worksheet.row_values(0)
22 | 			for column_index in range(len(header)):
23 | 				if header[column_index] in my_columns:
24 | 					index_of_cols_to_keep.append(column_index)
25 | 			first_worksheet = False
26 | 		for row_index in range(1, worksheet.nrows):
27 | 			row_list = []
28 | 			for column_index in index_of_cols_to_keep:	
29 | 				cell_value = worksheet.cell_value(row_index, column_index)
30 | 				cell_type = worksheet.cell_type(row_index, column_index)
31 | 				if cell_type == 3:
32 | 					date_cell = xldate_as_tuple(cell_value,workbook.datemode)
33 | 					date_cell = date(*date_cell[0:3]).strftime('%m/%d/%Y')
34 | 					row_list.append(date_cell)
35 | 				else:
36 | 					row_list.append(cell_value)
37 | 			data.append(row_list)
38 | 
39 | 	for list_index, output_list in enumerate(data):
40 | 		for element_index, element in enumerate(output_list):
41 | 			output_worksheet.write(list_index, element_index, element)
42 | 
43 | output_workbook.save(output_file)


--------------------------------------------------------------------------------
/excel/5excel_value_in_set.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | from datetime import date
 4 | from xlrd import open_workbook, xldate_as_tuple
 5 | from xlwt import Workbook
 6 | 
 7 | input_file = sys.argv[1]
 8 | output_file = sys.argv[2]
 9 | 
10 | output_workbook = Workbook()
11 | output_worksheet = output_workbook.add_sheet('jan_2013_output')
12 | 
13 | important_dates = ['01/24/2013', '01/31/2013']
14 | 
15 | purchase_date_column_index = 4
16 | with open_workbook(input_file) as workbook:
17 | 	worksheet = workbook.sheet_by_name('january_2013')
18 | 	data = []
19 | 	header = worksheet.row_values(0)
20 | 	data.append(header)
21 | 	for row_index in range(1, worksheet.nrows):		
22 | 		purchase_datetime = xldate_as_tuple(worksheet.cell_value(row_index, purchase_date_column_index),workbook.datemode)
23 | 		purchase_date = date(*purchase_datetime[0:3]).strftime('%m/%d/%Y')
24 | 		row_list = []
25 | 		if purchase_date in important_dates:
26 | 			for column_index in range(worksheet.ncols):
27 | 				cell_value = worksheet.cell_value(row_index,column_index)
28 | 				cell_type = worksheet.cell_type(row_index, column_index)
29 | 				if cell_type == 3:
30 | 					date_cell = xldate_as_tuple(cell_value,workbook.datemode)
31 | 					date_cell = date(*date_cell[0:3]).strftime('%m/%d/%Y')
32 | 					row_list.append(date_cell)
33 | 				else:
34 | 					row_list.append(cell_value)
35 | 		if row_list:
36 | 			data.append(row_list)
37 | 
38 | 	for list_index, output_list in enumerate(data):
39 | 		for element_index, element in enumerate(output_list):
40 | 			output_worksheet.write(list_index, element_index, element)
41 | 
42 | output_workbook.save(output_file)
43 | 


--------------------------------------------------------------------------------
/excel/9excel_value_meets_condition_all_worksheets.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | from datetime import date
 4 | from xlrd import open_workbook, xldate_as_tuple
 5 | from xlwt import Workbook
 6 | 
 7 | input_file = sys.argv[1]
 8 | output_file = sys.argv[2]
 9 | 
10 | output_workbook = Workbook()
11 | output_worksheet = output_workbook.add_sheet('filtered_rows_all_worksheets')
12 | 
13 | sales_column_index = 3
14 | threshold = 2000.0
15 | 
16 | first_worksheet = True
17 | with open_workbook(input_file) as workbook:
18 | 	data = []
19 | 	for worksheet in workbook.sheets():
20 | 		if first_worksheet:
21 | 			header_row = worksheet.row_values(0)
22 | 			data.append(header_row)
23 | 			first_worksheet = False
24 | 		for row_index in range(1,worksheet.nrows):
25 | 			row_list = []
26 | 			sale_amount = worksheet.cell_value(row_index, sales_column_index)
27 | 			sale_amount = float(str(sale_amount).replace('$', '').replace(',', ''))
28 | 			if sale_amount > threshold:
29 | 				for column_index in range(worksheet.ncols):
30 | 					cell_value = worksheet.cell_value(row_index,column_index)
31 | 					cell_type = worksheet.cell_type(row_index, column_index)
32 | 					if cell_type == 3:
33 | 						date_cell = xldate_as_tuple(cell_value,workbook.datemode)
34 | 						date_cell = date(*date_cell[0:3]).strftime('%m/%d/%Y')
35 | 						row_list.append(date_cell)
36 | 					else:
37 | 						row_list.append(cell_value)
38 | 			if row_list:
39 | 				data.append(row_list)
40 | 
41 | 	for list_index, output_list in enumerate(data):
42 | 		for element_index, element in enumerate(output_list):
43 | 			output_worksheet.write(list_index, element_index, element)
44 | 
45 | output_workbook.save(output_file)
46 | 


--------------------------------------------------------------------------------
/excel/11excel_value_meets_condition_set_of_worksheets.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | from datetime import date
 4 | from xlrd import open_workbook, xldate_as_tuple
 5 | from xlwt import Workbook
 6 | 
 7 | input_file = sys.argv[1]
 8 | output_file = sys.argv[2]
 9 | 
10 | output_workbook = Workbook()
11 | output_worksheet = output_workbook.add_sheet('set_of_worksheets')
12 | 
13 | my_sheets = [0,1]
14 | threshold = 1900.0
15 | sales_column_index = 3
16 | 
17 | first_worksheet = True
18 | with open_workbook(input_file) as workbook:
19 | 	data = []
20 | 	for sheet_index in range(workbook.nsheets):
21 | 		if sheet_index in my_sheets:
22 | 			worksheet = workbook.sheet_by_index(sheet_index)
23 | 			if first_worksheet:
24 | 				header_row = worksheet.row_values(0)
25 | 				data.append(header_row)
26 | 				first_worksheet = False
27 | 			for row_index in range(1,worksheet.nrows):
28 | 				row_list = []
29 | 				sale_amount = worksheet.cell_value(row_index, sales_column_index)
30 | 				if sale_amount > threshold:
31 | 					for column_index in range(worksheet.ncols):
32 | 						cell_value = worksheet.cell_value(row_index,column_index)
33 | 						cell_type = worksheet.cell_type(row_index, column_index)
34 | 						if cell_type == 3:
35 | 							date_cell = xldate_as_tuple(cell_value,workbook.datemode)
36 | 							date_cell = date(*date_cell[0:3]).strftime('%m/%d/%Y')
37 | 							row_list.append(date_cell)
38 | 						else:
39 | 							row_list.append(cell_value)
40 | 				if row_list:
41 | 					data.append(row_list)
42 | 
43 | 	for list_index, output_list in enumerate(data):
44 | 		for element_index, element in enumerate(output_list):
45 | 			output_worksheet.write(list_index, element_index, element)
46 | 
47 | output_workbook.save(output_file)


--------------------------------------------------------------------------------
/applications/mysql_server_error_log.txt:
--------------------------------------------------------------------------------
 1 | 246824 10:40:55 mysqld_safe Starting mysqld daemon with databases from /usr/local/mysql/data
 2 | 2014-02-03 10:40:55 98765 [Note] InnoDB: Compressed tables use zlib 1.2.3
 3 | 2014-02-03 10:40:55 98765 [Note] InnoDB: Using atomics to ref count buffer pool pages
 4 | 2014-02-03 10:40:55 98765 [Note] InnoDB: 5.6.16 started; log sequence number 1234567
 5 | 2014-02-03 10:47:18 64208 [Note] InnoDB: Using atomics to ref count buffer pool pages
 6 | 2014-02-03 10:47:18 64208 [Note] InnoDB: Compressed tables use zlib 1.2.3
 7 | 2014-02-03 10:55:55 64208 [Note] /usr/local/mysql/bin/mysqld: Shutdown complete
 8 | 
 9 | 135791 15:59:29 mysqld_safe Starting mysqld daemon with databases from /usr/local/mysql/data
10 | 2014-03-07 10:40:55 98765 [Note] InnoDB: Compressed tables use zlib 1.2.3
11 | 2014-03-07 10:40:55 98765 [Note] InnoDB: Compressed tables use zlib 1.2.3
12 | 2014-03-07 10:40:55 98765 [Note] InnoDB: 5.6.16 started; log sequence number 1234567
13 | 2014-03-07 10:47:18 64208 [Note] InnoDB: Using atomics to ref count buffer pool pages
14 | 2014-03-07 10:47:18 64208 [Note] InnoDB: Compressed tables use zlib 1.2.3
15 | 2014-03-07 10:55:55 64208 [Note] /usr/local/mysql/bin/mysqld: Shutdown complete
16 | 
17 | 124578 15:59:29 mysqld_safe Starting mysqld daemon with databases from /usr/local/mysql/data
18 | 2014-10-27 10:40:55 98765 [Note] InnoDB: Completed initialization of buffer pool
19 | 2014-10-27 10:40:55 98765 [Note] InnoDB: IPv6 is available.
20 | 2014-10-27 10:40:55 98765 [Note] InnoDB: 5.6.16 started; log sequence number 1234567
21 | 2014-10-27 10:47:18 64208 [Note] InnoDB: Completed initialization of buffer pool
22 | 2014-10-27 10:47:18 64208 [Note] InnoDB: IPv6 is available.
23 | 2014-10-27 10:55:55 64208 [Note] /usr/local/mysql/bin/mysqld: Shutdown complete
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/applications/output_files/1output.csv:
--------------------------------------------------------------------------------
 1 | 1234.0,Widget 1,Supplier A,1100,2013-06-02,suppliers.xls,suppliers_2013
 2 | 2345.0,Widget 2,Supplier A,2300,2013-06-17,suppliers.xls,suppliers_2013
 3 | 4567.0,Widget 4,Supplier B,1300,2013-07-04,suppliers.xls,suppliers_2013
 4 | 6789.0,Widget 6,Supplier C,1175,2013-07-23,suppliers.xls,suppliers_2013
 5 | 7890.0,Widget 7,Supplier C,1200,2013-07-27,suppliers.xls,suppliers_2013
 6 | 1234.0,Widget 1,Supplier A,1100,2014-06-02,suppliers.xls,suppliers_2014
 7 | 2345.0,Widget 2,Supplier A,2300,2014-06-17,suppliers.xls,suppliers_2014
 8 | 4567.0,Widget 4,Supplier B,1300,2014-07-04,suppliers.xls,suppliers_2014
 9 | 6789.0,Widget 6,Supplier C,1175,2014-07-23,suppliers.xls,suppliers_2014
10 | 7890.0,Widget 7,Supplier C,1200,2014-07-27,suppliers.xls,suppliers_2014
11 | 1234.0,Widget 1,Supplier A,1100,2013-06-02,suppliers.xlsx,suppliers_2013
12 | 2345.0,Widget 2,Supplier A,2300,2013-06-17,suppliers.xlsx,suppliers_2013
13 | 4567.0,Widget 4,Supplier B,1300,2013-07-04,suppliers.xlsx,suppliers_2013
14 | 6789.0,Widget 6,Supplier C,1175,2013-07-23,suppliers.xlsx,suppliers_2013
15 | 7890.0,Widget 7,Supplier C,1200,2013-07-27,suppliers.xlsx,suppliers_2013
16 | 1234.0,Widget 1,Supplier A,1100,2014-06-02,suppliers.xlsx,suppliers_2014
17 | 2345.0,Widget 2,Supplier A,2300,2014-06-17,suppliers.xlsx,suppliers_2014
18 | 4567.0,Widget 4,Supplier B,1300,2014-07-04,suppliers.xlsx,suppliers_2014
19 | 6789.0,Widget 6,Supplier C,1175,2014-07-23,suppliers.xlsx,suppliers_2014
20 | 7890.0,Widget 7,Supplier C,1200,2014-07-27,suppliers.xlsx,suppliers_2014
21 | 1234,Widget 1,Supplier A,1100,6/2/2012,suppliers_2012.csv
22 | 2345,Widget 2,Supplier A,2300,6/17/2012,suppliers_2012.csv
23 | 4567,Widget 4,Supplier B,1300,7/4/2012,suppliers_2012.csv
24 | 6789,Widget 6,Supplier C,1175,7/23/2012,suppliers_2012.csv
25 | 7890,Widget 7,Supplier C,1200,7/27/2012,suppliers_2012.csv
26 | 


--------------------------------------------------------------------------------
/excel/14excel_sum_average_multiple_workbooks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import glob
 3 | import os
 4 | import sys
 5 | from datetime import date
 6 | from xlrd import open_workbook, xldate_as_tuple
 7 | from xlwt import Workbook
 8 | 
 9 | input_folder = sys.argv[1]
10 | output_file = sys.argv[2]
11 | 
12 | output_workbook = Workbook()
13 | output_worksheet = output_workbook.add_sheet('sums_and_averages')
14 | 
15 | all_data = []
16 | sales_column_index = 3
17 | 
18 | header = ['workbook', 'worksheet', 'worksheet_total', 'worksheet_average',\
19 |  					'workbook_total', 'workbook_average']
20 | all_data.append(header)
21 | 
22 | for input_file in glob.glob(os.path.join(input_folder, '*.xls*')):
23 | 	with open_workbook(input_file) as workbook:
24 | 		list_of_totals = []
25 | 		list_of_numbers = []
26 | 		workbook_output = []
27 | 		for worksheet in workbook.sheets():
28 | 			total_sales = 0
29 | 			number_of_sales = 0
30 | 			worksheet_list = []
31 | 			worksheet_list.append(os.path.basename(input_file))
32 | 			worksheet_list.append(worksheet.name)
33 | 			for row_index in range(1,worksheet.nrows):
34 | 				try:
35 | 					total_sales += float(str(worksheet.cell_value(row_index,sales_column_index)).strip('$').replace(',',''))
36 | 					number_of_sales += 1.
37 | 				except:
38 | 					total_sales += 0.
39 | 					number_of_sales += 0.
40 | 			average_sales = '%.2f' % (total_sales / number_of_sales)
41 | 			worksheet_list.append(total_sales)
42 | 			worksheet_list.append(float(average_sales))
43 | 			list_of_totals.append(total_sales)
44 | 			list_of_numbers.append(float(number_of_sales))
45 | 			workbook_output.append(worksheet_list)
46 | 		workbook_total = sum(list_of_totals)
47 | 		workbook_average = sum(list_of_totals)/sum(list_of_numbers)
48 | 		for list_element in workbook_output:
49 | 			list_element.append(workbook_total)
50 | 			list_element.append(workbook_average)
51 | 		all_data.extend(workbook_output)
52 | 		
53 | for list_index, output_list in enumerate(all_data):
54 | 	for element_index, element in enumerate(output_list):
55 | 		output_worksheet.write(list_index, element_index, element)
56 | 
57 | output_workbook.save(output_file)


--------------------------------------------------------------------------------
/excel/pandas_sum_average_multiple_workbooks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pandas as pd
 3 | import glob
 4 | import os
 5 | import sys
 6 | 
 7 | input_path = sys.argv[1]
 8 | output_file = sys.argv[2]
 9 | 
10 | all_workbooks = glob.glob(os.path.join(input_path,'*.xls*'))
11 | data_frames = []
12 | for workbook in all_workbooks:
13 | 	all_worksheets = pd.read_excel(workbook, sheetname=None, index_col=None)
14 | 	workbook_total_sales = []
15 | 	workbook_number_of_sales = []
16 | 	worksheet_data_frames = []
17 | 	worksheets_data_frame = None
18 | 	workbook_data_frame = None
19 | 	for worksheet_name, data in all_worksheets.items():
20 | 		total_sales = pd.DataFrame([float(str(value).strip('$').replace(',','')) for value in data.ix[:, 'Sale Amount']]).sum()
21 | 		number_of_sales = len(data.loc[:, 'Sale Amount'])
22 | 		average_sales = pd.DataFrame(total_sales / number_of_sales)
23 | 		
24 | 		workbook_total_sales.append(total_sales)
25 | 		workbook_number_of_sales.append(number_of_sales)
26 | 		
27 | 		data = {'workbook': os.path.basename(workbook),
28 | 				'worksheet': worksheet_name,
29 | 				'worksheet_total': total_sales,
30 | 				'worksheet_average': average_sales}
31 | 		
32 | 		worksheet_data_frames.append(pd.DataFrame(data, columns=['workbook', 'worksheet', 'worksheet_total', 'worksheet_average']))
33 | 	worksheets_data_frame = pd.concat(worksheet_data_frames, axis=0, ignore_index=True)
34 | 
35 | 	workbook_total = pd.DataFrame(workbook_total_sales).sum()
36 | 	workbook_total_number_of_sales = pd.DataFrame(workbook_number_of_sales).sum()
37 | 	workbook_average = pd.DataFrame(workbook_total / workbook_total_number_of_sales)
38 | 	
39 | 	workbook_stats = {'workbook': os.path.basename(workbook),
40 | 					 'workbook_total': workbook_total,
41 | 					 'workbook_average': workbook_average}
42 | 
43 | 	workbook_stats = pd.DataFrame(workbook_stats, columns=['workbook', 'workbook_total', 'workbook_average'])
44 | 	workbook_data_frame = pd.merge(worksheets_data_frame, workbook_stats, on='workbook', how='left')
45 | 	data_frames.append(workbook_data_frame)
46 | 
47 | all_data_concatenated = pd.concat(data_frames, axis=0, ignore_index=True)
48 | 
49 | writer = pd.ExcelWriter(output_file)
50 | all_data_concatenated.to_excel(writer, sheet_name='sums_and_averages', index=False)
51 | writer.save()


--------------------------------------------------------------------------------
/applications/2calculate_statistic_by_category.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import sys
 4 | from datetime import date, datetime
 5 | 
 6 | def date_diff(date1, date2):
 7 | 	try:
 8 | 		diff = str(datetime.strptime(date1, '%m/%d/%Y') - \
 9 | 				datetime.strptime(date2, '%m/%d/%Y')).split()[0]
10 | 	except:
11 | 		diff = 0
12 | 	if diff == '0:00:00':
13 | 		diff = 0
14 | 	return diff
15 | 	
16 | input_file = sys.argv[1]
17 | output_file = sys.argv[2]
18 | 
19 | packages = {}
20 | previous_name = 'N/A'
21 | previous_package = 'N/A'
22 | previous_package_date = 'N/A'
23 | first_row = True
24 | today = date.today().strftime('%m/%d/%Y')
25 | 
26 | with open(input_file, 'r', newline='') as input_csv_file:
27 | 	filereader = csv.reader(input_csv_file)
28 | 	header = next(filereader)
29 | 	for row in filereader:
30 | 		current_name = row[0]
31 | 		current_package = row[1]
32 | 		current_package_date = row[3]
33 | 		if current_name not in packages:
34 | 			packages[current_name] = {}
35 | 		if current_package not in packages[current_name]:
36 | 			packages[current_name][current_package] = 0
37 | 		if current_name != previous_name:
38 | 			if first_row:
39 | 				first_row = False
40 | 			else:
41 | 				diff = date_diff(today, previous_package_date)
42 | 				if previous_package not in packages[previous_name]:
43 | 					packages[previous_name][previous_package] = int(diff)
44 | 				else:
45 | 					packages[previous_name][previous_package] += int(diff)
46 | 		else:
47 | 			diff = date_diff(current_package_date, previous_package_date)
48 | 			packages[previous_name][previous_package] += int(diff)
49 | 		previous_name = current_name
50 | 		previous_package = current_package
51 | 		previous_package_date = current_package_date
52 | 
53 | header = ['Customer Name', 'Category', 'Total Time (in Days)']
54 | with open(output_file, 'w', newline='') as output_csv_file:
55 | 	filewriter = csv.writer(output_csv_file)
56 | 	filewriter.writerow(header)
57 | for customer_name, customer_name_value in packages.items():
58 | 	for package_category, package_category_value in packages[customer_name].items():
59 | 		row_of_output = []
60 | 		print(customer_name, package_category, package_category_value)
61 | 		row_of_output.append(customer_name)
62 | 		row_of_output.append(package_category)
63 | 		row_of_output.append(package_category_value)
64 | 		filewriter.writerow(row_of_output)


--------------------------------------------------------------------------------
/applications/1search_for_items_write_found.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import csv
 3 | import glob
 4 | import os
 5 | import sys
 6 | from datetime import date
 7 | from xlrd import open_workbook, xldate_as_tuple
 8 | 
 9 | item_numbers_file = sys.argv[1]
10 | path_to_folder = sys.argv[2]
11 | output_file = sys.argv[3]
12 | 
13 | item_numbers_to_find = []
14 | with open(item_numbers_file, 'r', newline='') as item_numbers_csv_file:
15 | 	filereader = csv.reader(item_numbers_csv_file)
16 | 	for row in filereader:
17 | 		item_numbers_to_find.append(row[0])
18 | print(item_numbers_to_find)
19 | 
20 | filewriter = csv.writer(open(output_file, 'a', newline=''))
21 | 
22 | file_counter = 0
23 | line_counter = 0
24 | count_of_item_numbers = 0
25 | for input_file in glob.glob(os.path.join(path_to_folder, '*.*')):
26 | 	file_counter += 1
27 | 	if input_file.split('.')[1] == 'csv':
28 | 		with open(input_file, 'r', newline='') as csv_in_file:
29 | 			filereader = csv.reader(csv_in_file)
30 | 			header = next(filereader)
31 | 			for row in filereader:
32 | 				row_of_output = []
33 | 				for column in range(len(header)):
34 | 					if column < 3:
35 | 						cell_value = str(row[column]).strip()
36 | 						row_of_output.append(cell_value)
37 | 					elif column == 3:
38 | 						cell_value = str(row[column]).lstrip('$').replace(',','').split('.')[0].strip()
39 | 						row_of_output.append(cell_value)
40 | 					else:
41 | 						cell_value = str(row[column]).strip()
42 | 						row_of_output.append(cell_value)
43 | 				row_of_output.append(os.path.basename(input_file))
44 | 				if row[0] in item_numbers_to_find:
45 | 					filewriter.writerow(row_of_output)
46 | 					count_of_item_numbers += 1
47 | 				line_counter += 1
48 | 	elif input_file.split('.')[1] == 'xls' or input_file.split('.')[1] == 'xlsx':
49 | 		workbook = open_workbook(input_file)
50 | 		for worksheet in workbook.sheets():
51 | 			try:
52 | 				header = worksheet.row_values(0)
53 | 			except IndexError:
54 | 				pass
55 | 			for row in range(1, worksheet.nrows):
56 | 				row_of_output = []
57 | 				for column in range(len(header)):
58 | 					if column < 3:
59 | 						cell_value = str(worksheet.cell_value(row,column)).strip()
60 | 						row_of_output.append(cell_value)
61 | 					elif column == 3:
62 | 						cell_value = str(worksheet.cell_value(row,column)).split('.')[0].strip()
63 | 						row_of_output.append(cell_value)
64 | 					else:
65 | 						cell_value = xldate_as_tuple(worksheet.cell(row,column).value,workbook.datemode)
66 | 						cell_value = str(date(*cell_value[0:3])).strip()
67 | 						row_of_output.append(cell_value)
68 | 				row_of_output.append(os.path.basename(input_file))
69 | 				row_of_output.append(worksheet.name)
70 | 				if str(worksheet.cell(row,0).value).split('.')[0].strip() in item_numbers_to_find:
71 | 					filewriter.writerow(row_of_output)
72 | 					count_of_item_numbers += 1
73 | 				line_counter += 1
74 | print('Number of files: {}'.format(file_counter))
75 | print('Number of lines: {}'.format(line_counter))
76 | print('Number of item numbers: {}'.format(count_of_item_numbers))


--------------------------------------------------------------------------------
/plots/seaborn_plots.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import seaborn as sns
  3 | import numpy as np
  4 | import pandas as pd
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | sns.set(color_codes=True)
  8 | 
  9 | 
 10 | # Simple plot of linear, quadratic, and cubic curves
 11 | x = np.linspace(0, 2, 100)
 12 | plt.plot(x, x, label='linear')
 13 | plt.plot(x, x**2, label='quadratic')
 14 | plt.plot(x, x**3, label='cubic')
 15 | plt.xlabel('x label')
 16 | plt.ylabel('y label')
 17 | plt.title("Simple Plot")
 18 | plt.legend(loc="best")
 19 | plt.show()
 20 | 
 21 | 
 22 | # Histogram
 23 | x = np.random.normal(size=1000)
 24 | sns.distplot(x, bins=20, kde=True, rug=False, label="Histogram w/o Density")
 25 | sns.axlabel("Value", "Frequency")
 26 | plt.title("Histogram of a Random Sample from a Normal Distribution")
 27 | plt.legend()
 28 | plt.show()
 29 | 
 30 | 
 31 | # Scatter plot
 32 | mean, cov = [5, 10], [(1, .5), (.5, 1)]
 33 | data = np.random.multivariate_normal(mean, cov, 200)
 34 | data_frame = pd.DataFrame(data, columns=["x", "y"])
 35 | sns.jointplot(x="x", y="y", data=data_frame, kind="reg").set_axis_labels("x", "y")
 36 | plt.suptitle("Joint Plot of Two Variables with Bivariate and Univariate Graphs")
 37 | plt.show()
 38 | 
 39 | 
 40 | # Pairwise bivariate
 41 | #iris = sns.load_dataset("iris")
 42 | #sns.pairplot(iris)
 43 | #plt.show()
 44 | 
 45 | 
 46 | # Linear regression model
 47 | tips = sns.load_dataset("tips")
 48 | #sns.lmplot(x="total_bill", y="tip", data=tips)
 49 | sns.lmplot(x="size", y="tip", data=tips, x_jitter=.15, ci=None)
 50 | #sns.lmplot(x="size", y="tip", data=tips, x_estimator=np.mean, ci=None)
 51 | plt.show()
 52 | 
 53 | 
 54 | # Box plots
 55 | sns.boxplot(x="day", y="total_bill", hue="time", data=tips)
 56 | #sns.factorplot(x="time", y="total_bill", hue="smoker",
 57 | #               col="day", data=tips, kind="box", size=4, aspect=.5)
 58 | plt.show()
 59 | 
 60 | 
 61 | # Bar plots
 62 | titanic = sns.load_dataset("titanic")
 63 | #sns.barplot(x="sex", y="survived", hue="class", data=titanic)
 64 | #sns.countplot(y="deck", hue="class", data=titanic, palette="Greens_d")
 65 | #plt.show()
 66 | 
 67 | 
 68 | # Non-linear regression model
 69 | anscombe = sns.load_dataset("anscombe")
 70 | # polynomial
 71 | #sns.lmplot(x="x", y="y", data=anscombe.query("dataset == 'II'"),
 72 | #           order=2, ci=False, scatter_kws={"s": 80})
 73 | #plt.show()
 74 | 
 75 | 
 76 | # robust to outliers
 77 | #sns.lmplot(x="x", y="y", data=anscombe.query("dataset == 'III'"),
 78 | #           robust=True, ci=False, scatter_kws={"s": 80})
 79 | #plt.show()
 80 | 
 81 | 
 82 | # logistic
 83 | #tips["big_tip"] = (tips.tip / tips.total_bill) > .15
 84 | #sns.lmplot(x="total_bill", y="big_tip", data=tips, logistic=True, y_jitter=.03).set_axis_labels("Total Bill", "Big Tip")
 85 | #plt.title("Logistic Regression of Big Tip vs. Total Bill")
 86 | #plt.show()
 87 | 
 88 | 
 89 | # lowess smoother
 90 | #sns.lmplot(x="total_bill", y="tip", data=tips, lowess=True)
 91 | #plt.show()
 92 | 
 93 | 
 94 | # Condition on other variables
 95 | #sns.lmplot(x="total_bill", y="tip", hue="smoker", data=tips,
 96 | #           markers=["o", "x"], palette="Set1")
 97 | #sns.lmplot(x="total_bill", y="tip", hue="smoker",
 98 | #           col="time", row="sex", data=tips)
 99 | #plt.show()
100 | 
101 | 
102 | # Control shape and size of plot
103 | #sns.lmplot(x="total_bill", y="tip", col="day", data=tips, col_wrap=2, size=3)
104 | #sns.lmplot(x="total_bill", y="tip", col="day", data=tips, aspect=.5)
105 | #plt.show()
106 | 
107 | 
108 | # Plotting regression in other contexts
109 | #sns.jointplot(x="total_bill", y="tip", data=tips, kind="reg")
110 | #sns.pairplot(tips, x_vars=["total_bill", "size"], y_vars=["tip"],
111 | #             size=5, aspect=.8, kind="reg")
112 | #sns.pairplot(tips, x_vars=["total_bill", "size"], y_vars=["tip"],
113 | #             hue="smoker", size=5, aspect=.8, kind="reg")
114 | #plt.show()
115 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | foundations-for-analytics-with-python
 2 | ========================
 3 | 
 4 | This repository contains all of the Python scripts, input files, and output files associated with the book, Foundations for Analytics with Python. <br>
 5 | 
 6 | <b>About</b> <br>
 7 | <a href="https://cbrownley.wordpress.com/2016/03/02/foundations-for-analytics-with-python-from-non-programmer-to-hacker/">My Blog Post: Foundations for Analytics with Python</a> <br>
 8 | 
 9 | <b>Shop</b> <br>
10 | O'Reilly Media <br>
11 | <a href="http://shop.oreilly.com/product/0636920038375.do">Foundations for Analytics with Python</a> <br>
12 | 
13 | Amazon <br>
14 | <a href="https://www.amazon.com/Foundations-Analytics-Python-Brownley/dp/1491922532">Foundations for Analytics with Python</a> <br>
15 | 
16 | <b>Advance Praise</b> <br>
17 | "This book is a useful learning resource for new Python programmers working with data. The tutorial style and accompanying exercises will help users get their feet wet with the Python language, programming environment, and a number of the most important packages in the ecosystem." - Wes McKinney, Creator of pandas library <br>
18 | 
19 | "This is a must read book for anyone who feels limited by spreadsheets and wants to master the basics of coding and automation for business applications.  This is also good primer on programmatic approaches to conducting the most common statistical methods, incluing correlations, t-tests, and regressions." - Rajiv Krishnamurthy, Manager, Infra Data Science, Facebook <br>
20 | 
21 | "Foundations for Analytics with Python is an extremely well-written introduction to Python for analysts, giving clear and practical guidance for the new programmer. It connects principles and best-practices effectively, as if Mr. Brownley was sitting next to you, guiding you each step of the way." - Dean Abbott, Co-Founder and Chief Data Scientist at SmarterHQ <br>
22 | 
23 | "Data analysis is an essential skill for the modern professional and Clinton's book is the perfect primer to move beyond the pre-defined tools into truly flexible analytics with real code.  Even if you haven't written a single line of code before." - Chandika Jayasundara, CEO & Co-Founder, Creately <br>
24 | 
25 | "Python is widely used for data analysis -- it is in fact one of the most popular tools/languages for data analysis and data science.  Via this book, Clinton is adding to the field in a much needed manner: by teaching the reader to learn how to program as well as automate and scale their data analyses.  Everyone today would be well served to learn to code and to apply programming to data analysis.  This book serves exactly that purpose: it targets non-coders and teaches them fundamentals of Analytics using Python -- the tool of choice for data scientists today!" - Sameer Chopra, Chief Analytics Officer, GoDaddy <br>
26 | 
27 | to download
28 | ========================
29 | <b>Mac computer:</b> <br>
30 | 1. Open a Terminal window <br>
31 | 2. Navigate to the folder where you want to download the foundations-for-analytics-with-python folder <br>
32 | &nbsp;&nbsp;&nbsp;&nbsp;For example, to download the foundations-for-analytics-with-python folder onto your Desktop: <br>
33 | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;First, type the following and then hit Enter: `cd` <br>
34 | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Second, type the following and then hit Enter: `cd Desktop/` <br>
35 | 3. Finally, to download the foundations-for-analytics-with-python folder, type the following and then hit Enter: <br>
36 | &nbsp;&nbsp;&nbsp;&nbsp;`git clone https://github.com/cbrownley/foundations-for-analytics-with-python.git` <br>
37 | 
38 | <b>Windows computer:</b> <br>
39 | 1. Go to: https://github.com/cbrownley/foundations-for-analytics-with-python <br>
40 | 2. Click 'Clone or download' and then 'Download ZIP' in the right side of the page <br>
41 | 3. Click on the zipped folder to open it in File Explorer <br>
42 | 4. Click 'Extract all' <br>
43 | 5. Edit the path to save the foundations-for-analytics-with-python folder on your Desktop <br>
44 | 6. Click 'Extract' <br>
45 | 


--------------------------------------------------------------------------------
/statistics/wine_quality.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import numpy as np
  3 | import pandas as pd
  4 | import seaborn as sns
  5 | import matplotlib.pyplot as plt
  6 | import statsmodels.api as sm
  7 | import statsmodels.formula.api as smf
  8 | from statsmodels.formula.api import ols, glm
  9 | 
 10 | 
 11 | # Read the data set into a pandas DataFrame
 12 | wine = pd.read_csv('winequality-both.csv', sep=',', header=0)
 13 | wine.columns = wine.columns.str.replace(' ', '_')
 14 | print(wine.head())
 15 | 
 16 | # Display descriptive statistics for all variables
 17 | print(wine.describe())
 18 | 
 19 | # Identify unique values
 20 | print(sorted(wine.quality.unique()))
 21 | 
 22 | # Calculate value frequencies
 23 | print(wine.quality.value_counts())
 24 | 
 25 | # Display descriptive statistics for quality by wine type
 26 | print(wine.groupby('type')[['alcohol']].describe().unstack('type'))
 27 | 
 28 | # Calculate specific quantiles
 29 | print(wine.groupby('type')[['quality']].quantile([0.25, 0.75]).unstack('type'))
 30 | 
 31 | # Calculate correlation matrix for all variables
 32 | print(wine.corr())
 33 | 
 34 | # Look at relationship between pairs of variables
 35 | # Take a "small" sample of red and white wines for plotting
 36 | def take_sample(data_frame, replace=False, n=200):
 37 | 	return data_frame.loc[np.random.choice(data_frame.index, replace=replace, size=n)]	
 38 | reds = wine.loc[wine['type']=='red', :]
 39 | whites = wine.loc[wine['type']=='white', :]
 40 | reds_sample = take_sample(wine.loc[wine['type']=='red', :])
 41 | whites_sample = take_sample(wine.loc[wine['type']=='white', :])
 42 | wine_sample = pd.concat([reds_sample, whites_sample])
 43 | wine['in_sample'] = np.where(wine.index.isin(wine_sample.index), 1.,0.)
 44 | 
 45 | reds_sample = reds.ix[np.random.choice(reds.index, 100)]
 46 | whites_sample = whites.ix[np.random.choice(whites.index, 100)]
 47 | wine_sample = pd.concat([reds_sample, whites_sample], ignore_index=True)
 48 | 
 49 | print(wine['in_sample'])
 50 | print(pd.crosstab(wine.in_sample, wine.type, margins=True))
 51 | 
 52 | sns.set_style("dark")
 53 | sns.set_style("darkgrid", {"legend.scatterpoints": 0})
 54 | pg = sns.PairGrid(wine_sample, hue="type", hue_order=["red", "white"], \
 55 | palette=dict(red="red", white="white"), hue_kws={"marker": ["o", "s"]}, vars=['quality', 'alcohol', 'residual_sugar'])
 56 | pg.x = wine_sample.ix[wine_sample['type']=='red', 'quality']
 57 | pg = pg.map_diag(plt.hist)
 58 | pg.x = wine_sample.ix[wine_sample['type']=='white', 'quality']
 59 | pg = pg.map_diag(plt.hist)
 60 | pg = pg.map_offdiag(plt.scatter, edgecolor="black", s=10, alpha=0.25)
 61 | #plt.show()
 62 | 
 63 | g = sns.pairplot(wine_sample, kind='reg', plot_kws={"ci": False, "x_jitter": 0.25, "y_jitter": 0.25}, \
 64 | hue='type', diag_kind='hist', diag_kws={"bins": 10, "alpha": 1.0}, palette=dict(red="red", white="white"), \
 65 | markers=["o", "s"], vars=['quality', 'alcohol', 'residual_sugar'])
 66 | sns.set_style({'legend.frameon': True,'legend.numpoints': 0,'legend.scatterpoints': 0})
 67 | wine_all_plot = sns.pairplot(wine, kind='reg', hue='type', palette=dict(red="red", white="white"), markers=["o", "s"], vars=['quality', 'alcohol', 'residual_sugar'])
 68 | wine_sample_plot = sns.pairplot(wine_sample, kind='reg', hue='type', palette=dict(red="red", white="white"), markers=["o", "s"], vars=['quality', 'alcohol', 'residual_sugar'])
 69 | 
 70 | wine['ln_fixed_acidity'] = np.log(wine.ix[:, 'fixed_acidity'])
 71 | sns.distplot(wine.ix[:, 'fixed_acidity'])
 72 | sns.distplot(wine.ix[:, 'ln_fixed_acidity'])
 73 | print(g)
 74 | plt.suptitle('Histograms and Scatter Plots of Quality, Alcohol, and Residual Sugar', fontsize=14, \
 75 | 		horizontalalignment='center', verticalalignment='top',
 76 | 		x=0.5, y=0.999)
 77 | #plt.show()
 78 | 
 79 | # Look at the distribution of quality by wine type
 80 | red_wine = wine.ix[wine['type']=='red', 'quality']
 81 | white_wine = wine.ix[wine['type']=='white', 'quality']
 82 | 
 83 | sns.set_style("dark")
 84 | print(sns.distplot(red_wine, \
 85 | 		norm_hist=True, kde=False, color="red", label="Red wine"))
 86 | print(sns.distplot(white_wine, \
 87 | 		norm_hist=True, kde=False, color="white", label="White wine"))
 88 | sns.axlabel("Quality Score", "Density")
 89 | plt.title("Distribution of Quality by Wine Type")
 90 | plt.legend()
 91 | #plt.show()
 92 | 
 93 | # Test whether mean quality is different between red and white wines
 94 | print(wine.groupby(['type'])[['quality']].agg(['std', 'mean']))
 95 | tstat, pvalue, df = sm.stats.ttest_ind(red_wine, white_wine)
 96 | print('tstat: %.3f  pvalue: %.4f' % (tstat, pvalue))
 97 | 
 98 | # Fit a multivariate linear regression model
 99 | #wine_standardized = (wine - wine.mean()) / wine.std()
100 | #formula_all = 'quality ~ alcohol + chlorides + citric_acid + density + fixed_acidity + free_sulfur_dioxide + pH + residual_sugar + sulphates + total_sulfur_dioxide + volatile_acidity'
101 | my_formula = 'quality ~ alcohol + chlorides + citric_acid + density + fixed_acidity + free_sulfur_dioxide + pH + residual_sugar + sulphates + total_sulfur_dioxide + volatile_acidity'
102 | #formula_all = 'quality ~ fixed_acidity + volatile_acidity + citric_acid + residual_sugar + chlorides + free_sulfur_dioxide + total_sulfur_dioxide + density + pH + sulphates + alcohol'
103 | #formula = 'quality ~ residual_sugar + alcohol'
104 | lm = ols(my_formula, data=wine).fit()
105 | #lm = glm(my_formula, data=wine, family=sm.families.Gaussian()).fit()
106 | #lm = smf.glm(formula_all, data=wine_standardized, family=sm.families.Gaussian()).fit()
107 | print(lm.summary())
108 | print("\nQuantities you can extract from the result:\n%s" % dir(lm))
109 | print("\nCoefficients:\n%s" % lm.params)
110 | print("\nCoefficient Std Errors:\n%s" % lm.bse)
111 | print("\nAdj. R-squared:\n%.2f" % lm.rsquared_adj)
112 | print("\nF-statistic: %.1f  P-value: %.2f" % (lm.fvalue, lm.f_pvalue))
113 | print("\nNumber of obs: %d  Number of fitted values: %s" % (lm.nobs, len(lm.fittedvalues)))
114 | 
115 | # Fit a multivariate linear model with standardized independent variables
116 | dependent_variable = wine['quality']
117 | independent_variables = wine[wine.columns.difference(['quality', 'type', 'in_sample'])]
118 | independent_variables_standardized = (independent_variables - independent_variables.mean()) / independent_variables.std()
119 | wine_standardized = pd.concat([dependent_variable, independent_variables_standardized], axis=1)
120 | lm_standardized = ols(my_formula, data=wine_standardized).fit()
121 | print(lm_standardized.summary())
122 | 
123 | # Predict quality scores for "new" observations
124 | new_observations = wine.ix[wine.index.isin(xrange(10)), independent_variables.columns]
125 | y_predicted = lm.predict(new_observations)
126 | y_predicted_rounded = [round(score, 2) for score in y_predicted]
127 | print(y_predicted_rounded)
128 | 


--------------------------------------------------------------------------------
/statistics/customer_churn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import numpy as np
  3 | import pandas as pd
  4 | import seaborn as sns
  5 | import matplotlib.pyplot as plt
  6 | import statsmodels.api as sm
  7 | import statsmodels.formula.api as smf
  8 | 
  9 | # Read the data set into a pandas DataFrame
 10 | churn = pd.read_csv('churn.csv', sep=',', header=0)
 11 | 
 12 | churn.columns = [heading.lower() for heading in \
 13 | churn.columns.str.replace(' ', '_').str.replace("\'", "").str.strip('?')]
 14 | 
 15 | churn['churn01'] = np.where(churn['churn'] == 'True.', 1., 0.)
 16 | print(churn.head())
 17 | print(churn.describe())
 18 | 
 19 | 
 20 | # Calculate descriptive statistics for grouped data
 21 | print(churn.groupby(['churn'])[['day_charge', 'eve_charge', 'night_charge', 'intl_charge', 'account_length', 'custserv_calls']].agg(['count', 'mean', 'std']))
 22 | 
 23 | # Specify different statistics for different variables
 24 | print(churn.groupby(['churn']).agg({'day_charge' : ['mean', 'std'], 
 25 | 				'eve_charge' : ['mean', 'std'],
 26 | 				'night_charge' : ['mean', 'std'],
 27 | 				'intl_charge' : ['mean', 'std'],
 28 | 				'account_length' : ['count', 'min', 'max'],
 29 | 				'custserv_calls' : ['count', 'min', 'max']}))
 30 | 
 31 | # Create total_charges, split it into 5 groups, and
 32 | # calculate statistics for each of the groups
 33 | churn['total_charges'] = churn['day_charge'] + churn['eve_charge'] + \
 34 | 						 churn['night_charge'] + churn['intl_charge']
 35 | factor_cut = pd.cut(churn.total_charges, 5, precision=2)
 36 | def get_stats(group):
 37 | 	return {'min' : group.min(), 'max' : group.max(),
 38 | 			'count' : group.count(), 'mean' : group.mean(),
 39 | 			'std' : group.std()}
 40 | grouped = churn.custserv_calls.groupby(factor_cut)
 41 | print(grouped.apply(get_stats).unstack())
 42 | 
 43 | # Split account_length into quantiles and
 44 | # calculate statistics for each of the quantiles
 45 | factor_qcut = pd.qcut(churn.account_length, [0., 0.25, 0.5, 0.75, 1.])
 46 | grouped = churn.custserv_calls.groupby(factor_qcut)
 47 | print(grouped.apply(get_stats).unstack())
 48 | 
 49 | # Create binary/dummy indicator variables for intl_plan and vmail_plan
 50 | # and join them with the churn column in a new DataFrame
 51 | intl_dummies = pd.get_dummies(churn['intl_plan'], prefix='intl_plan')
 52 | vmail_dummies = pd.get_dummies(churn['vmail_plan'], prefix='vmail_plan')
 53 | churn_with_dummies = churn[['churn']].join([intl_dummies, vmail_dummies])
 54 | print(churn_with_dummies.head())
 55 | 
 56 | # Split total_charges into quartiles, create binary indicator variables
 57 | # for each of the quartiles, and add them to the churn DataFrame
 58 | qcut_names = ['1st_quartile', '2nd_quartile', '3rd_quartile', '4th_quartile']
 59 | total_charges_quartiles = pd.qcut(churn.total_charges, 4, labels=qcut_names)
 60 | dummies = pd.get_dummies(total_charges_quartiles, prefix='total_charges')
 61 | churn_with_dummies = churn.join(dummies)
 62 | print(churn_with_dummies.head())
 63 | 
 64 | # Create pivot tables
 65 | print(churn.pivot_table(['total_charges'], index=['churn', 'custserv_calls']))
 66 | print(churn.pivot_table(['total_charges'], index=['churn'], columns=['custserv_calls']))
 67 | print(churn.pivot_table(['total_charges'], index=['custserv_calls'], columns=['churn'], \
 68 | 						aggfunc='mean', fill_value='NaN', margins=True))
 69 | 
 70 | # Fit a logistic regression model
 71 | dependent_variable = churn['churn01']
 72 | independent_variables = churn[['account_length', 'custserv_calls', 'total_charges']]
 73 | independent_variables_with_constant = sm.add_constant(independent_variables, prepend=True)
 74 | logit_model = sm.Logit(dependent_variable, independent_variables_with_constant).fit()
 75 | #logit_model = smf.glm(output_variable, input_variables, family=sm.families.Binomial()).fit()
 76 | print(logit_model.summary())
 77 | print("\nQuantities you can extract from the result:\n%s" % dir(logit_model))
 78 | print("\nCoefficients:\n%s" % logit_model.params)
 79 | print("\nCoefficient Std Errors:\n%s" % logit_model.bse)
 80 | #logit_marginal_effects = logit_model.get_margeff(method='dydx', at='overall')
 81 | #print(logit_marginal_effects.summary())
 82 | 
 83 | print("\ninvlogit(-7.2205 + 0.0012*mean(account_length) + 0.4443*mean(custserv_calls) + 0.0729*mean(total_charges))")
 84 | 
 85 | def inverse_logit(model_formula):
 86 | 	from math import exp
 87 | 	return (1.0 / (1.0 + exp(-model_formula)))*100.0
 88 | 
 89 | at_means = float(logit_model.params[0]) + \
 90 | 	float(logit_model.params[1])*float(churn['account_length'].mean()) + \
 91 | 	float(logit_model.params[2])*float(churn['custserv_calls'].mean()) + \
 92 | 	float(logit_model.params[3])*float(churn['total_charges'].mean())
 93 | 
 94 | print(churn['account_length'].mean())
 95 | print(churn['custserv_calls'].mean())
 96 | print(churn['total_charges'].mean())
 97 | print(at_means)
 98 | print("Probability of churn when independent variables are at their mean values: %.2f" % inverse_logit(at_means))
 99 | 
100 | cust_serv_mean = float(logit_model.params[0]) + \
101 | 	float(logit_model.params[1])*float(churn['account_length'].mean()) + \
102 | 	float(logit_model.params[2])*float(churn['custserv_calls'].mean()) + \
103 | 	float(logit_model.params[3])*float(churn['total_charges'].mean())
104 | 		
105 | cust_serv_mean_minus_one = float(logit_model.params[0]) + \
106 | 		float(logit_model.params[1])*float(churn['account_length'].mean()) + \
107 | 		float(logit_model.params[2])*float(churn['custserv_calls'].mean()-1.0) + \
108 | 		float(logit_model.params[3])*float(churn['total_charges'].mean())
109 | 
110 | print(cust_serv_mean)
111 | print(churn['custserv_calls'].mean()-1.0)
112 | print(cust_serv_mean_minus_one)
113 | print("Probability of churn when account length changes by 1: %.2f" % (inverse_logit(cust_serv_mean) - inverse_logit(cust_serv_mean_minus_one)))
114 | 
115 | # Predict churn for "new" observations
116 | new_observations = churn.ix[churn.index.isin(xrange(10)), independent_variables.columns]
117 | new_observations_with_constant = sm.add_constant(new_observations, prepend=True)
118 | y_predicted = logit_model.predict(new_observations_with_constant)
119 | y_predicted_rounded = [round(score, 2) for score in y_predicted]
120 | print(y_predicted_rounded)
121 | 
122 | # Fit a logistic regression model
123 | output_variable = churn['churn01']
124 | vars_to_keep = churn[['account_length', 'custserv_calls', 'total_charges']]
125 | inputs_standardized = (vars_to_keep - vars_to_keep.mean()) / vars_to_keep.std()
126 | input_variables = sm.add_constant(inputs_standardized, prepend=False)
127 | logit_model = sm.Logit(output_variable, input_variables).fit()
128 | #logit_model = smf.glm(output_variable, input_variables, family=sm.families.Binomial()).fit()
129 | print(logit_model.summary())
130 | print(logit_model.params)
131 | print(logit_model.bse)
132 | #logit_marginal_effects = logit_model.get_margeff(method='dydx', at='overall')
133 | #print(logit_marginal_effects.summary())
134 | 
135 | # Predict output value for a new observation based on its mean standardized input values
136 | input_variables = [0., 0., 0., 1.]
137 | predicted_value = logit_model.predict(input_variables)
138 | print("Predicted value: %.5f") % predicted_value


--------------------------------------------------------------------------------
/first_script.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | from math import exp, log, sqrt
  3 | import re
  4 | from datetime import date, time, datetime, timedelta
  5 | from operator import itemgetter
  6 | import sys
  7 | import glob
  8 | import os
  9 | 
 10 | # Print a simple string
 11 | print("Output #1: I'm excited to learn Python.")
 12 | 
 13 | # Add two numbers together
 14 | x = 4
 15 | y = 5
 16 | z = x + y
 17 | print("Output #2: Four plus five equals {0:d}.".format(z))
 18 | 
 19 | # Add two lists together
 20 | a = [1, 2, 3, 4]
 21 | b = ["first", "second", "third", "fourth"]
 22 | c = a + b
 23 | print("Output #3: {0}, {1}, {2}".format(a, b, c))
 24 | 
 25 | # INTEGERS
 26 | x = 9
 27 | print("Output #4: {0}".format(x))
 28 | print("Output #5: {0}".format(3**4))
 29 | print("Output #6: {0}".format(int(8.3)/int(2.7)))
 30 | 
 31 | # FLOATING-POINT NUMBERS
 32 | print("Output #7: {0:.3f}".format(8.3/2.7))
 33 | y = 2.5*4.8
 34 | print("Output #8: {0:.1f}".format(y))
 35 | r = 8/float(3)
 36 | print("Output #9: {0:.2f}".format(r))
 37 | print("Output #10: {0:.4f}".format(8.0/3))
 38 | 
 39 | # Some mathematical functions available in the math module
 40 | print("Output #11: {0:.4f}".format(exp(3)))
 41 | print("Output #12: {0:.2f}".format(log(4)))
 42 | print("Output #13: {0:.1f}".format(sqrt(81)))
 43 | 
 44 | # STRINGS
 45 | # A string with single quotes, so include a backslash before the single quote
 46 | print("Output #14: {0:s}".format('I\'m enjoying learning Python'))
 47 | 
 48 | # A one-line string, but if the string is long and running off the page on the right
 49 | # you can use a "\" to separate the long string into smaller strings on separate lines
 50 | print("Output #15: {0:s}".format("This is a long string.  Without the backslash \
 51 | it would run off of the page on the right in the text editor and be very \
 52 | difficult to read and edit.  By using the backslash you can split the long \
 53 | string into smaller strings on separate lines so that the whole string is easy \
 54 | to view in the text editor."))
 55 | 
 56 | # Use triple single or double quotes if you want the string to span multiple lines
 57 | # and you don't want to use the "\"
 58 | print("Output #16: {0:s}".format('''You can use triple single quotes
 59 | for multi-line comment strings'''))
 60 | 
 61 | print("Output #17: {0:s}".format("""You can also use triple double quotations
 62 | for multi-line comment strings"""))
 63 | 
 64 | # Add two strings together
 65 | string1 = "This is a "
 66 | string2 = "short string."
 67 | sentence = string1 + string2
 68 | print("Output #18: {0:s}".format(sentence))
 69 | 
 70 | # Repeat a string four times
 71 | print("Output #19: {0:s} {1:s}{2:s}".format("She is", "very "*4, "beautiful."))
 72 | 
 73 | # Determine the number of characters in a string, including spaces and punctuation
 74 | m = len(sentence)
 75 | print("Output #20: {0:d}".format(m))
 76 | 
 77 | # split()
 78 | string1 = "My deliverable is due in May"
 79 | string1_list1 = string1.split()
 80 | string1_list2 = string1.split(" ", 2)
 81 | print("Output #21: {0}".format(string1_list1))
 82 | print("Output #22: FIRST PIECE:{0} SECOND PIECE:{1} THIRD PIECE:{2}"\
 83 | .format(string1_list2[0], string1_list2[1], string1_list2[2]))
 84 | 
 85 | string2 = "Your,deliverable,is,due,in,June"
 86 | string2_list = string2.split(',')
 87 | print("Output #23: {0}".format(string2_list))
 88 | print("Output #24: {0} {1} {2}".format(string2_list[1], string2_list[5], string2_list[-1]))
 89 | 
 90 | # join()
 91 | print("Output #25: {0}".format(','.join(string2_list)))
 92 | 
 93 | # strip()
 94 | string3 = "   Remove unwanted characters from this string\t\t    \n"
 95 | print("Output #26: string3: {0:s}".format(string3))
 96 | string3_lstrip = string3.lstrip()
 97 | print("Output #27: lstrip: {0:s}".format(string3_lstrip))
 98 | string3_rstrip = string3.rstrip()
 99 | print("Output #28: rstrip: {0:s}".format(string3_rstrip))
100 | string3_strip = string3.strip()
101 | print("Output #29: strip: {0:s}".format(string3_strip))
102 | 
103 | string4 = "$$Here's another string that has unwanted characters.__---++"
104 | print("Output #30: {0:s}".format(string4))
105 | string4 = "$$The unwanted characters have been removed.__---++"
106 | string4_strip = string4.strip('$_-+')
107 | print("Output #31: {0:s}".format(string4_strip))
108 | 
109 | # replace()
110 | string5 = "Let's replace the spaces in this sentence with other characters."
111 | string5_replace = string5.replace(" ", "!@!")
112 | print("Output #32 (with !@!): {0:s}".format(string5_replace))
113 | string5_replace = string5.replace(" ", ",")
114 | print("Output #33 (with commas): {0:s}".format(string5_replace))
115 | 
116 | # lower(), upper(), capitalize()
117 | string6 = "Here's WHAT Happens WHEN You Use lower."
118 | print("Output #34: {0:s}".format(string6.lower()))
119 | 
120 | string7 = "Here's what Happens when You Use UPPER."
121 | print("Output #35: {0:s}".format(string7.upper()))
122 | 
123 | string8 = "here's WHAT Happens WHEN you use Capitalize."
124 | print("Output #36: {0:s}".format(string8.capitalize()))
125 | string8_list = string8.split()
126 | print("Output #37 (on each word):")
127 | for word in string8_list:
128 |     print("{0:s}".format(word.capitalize()))
129 | 
130 | # REGULAR EXPRESSIONS / PATTERN MATCHING
131 | # Count the number of times a pattern appears in a string
132 | string = "The quick brown fox jumps over the lazy dog."
133 | string_list = string.split()
134 | pattern = re.compile(r"The", re.I)
135 | count = 0
136 | for word in string_list:
137 |     if pattern.search(word):
138 |     	count += 1
139 | print("Output #38: {0:d}".format(count))
140 | 
141 | # Print the pattern each time it is found in the string
142 | string = "The quick brown fox jumps over the lazy dog."
143 | string_list = string.split()
144 | pattern = re.compile(r"(?P<match_word>The)", re.I)
145 | print("Output #39:")
146 | for word in string_list:
147 |     if pattern.search(word):
148 | 		print("{:s}".format(pattern.search(word).group('match_word')))
149 | 
150 | # Substitute the letter "a" for the word "the" in the string
151 | string = "The quick brown fox jumps over the lazy dog."
152 | string_to_find = r"The"
153 | pattern = re.compile(string_to_find, re.I)
154 | print("Output #40: {:s}".format(pattern.sub("a", string)))
155 | 
156 | # DATES
157 | # Print today's date, as well as the year, month, and day elements
158 | today = date.today()
159 | print("Output #41: today: {0!s}".format(today))
160 | print("Output #42: {0!s}".format(today.year))
161 | print("Output #43: {0!s}".format(today.month))
162 | print("Output #44: {0!s}".format(today.day))
163 | current_datetime = datetime.today()
164 | print("Output #45: {0!s}".format(current_datetime))
165 | 
166 | # Calculate a new date using a timedelta
167 | one_day = timedelta(days=-1)
168 | yesterday = today + one_day
169 | print("Output #46: yesterday: {0!s}".format(yesterday))
170 | eight_hours = timedelta(hours=-8)
171 | print("Output #47: {0!s} {1!s}".format(eight_hours.days, eight_hours.seconds))
172 | 
173 | # Calculate the amount of time between two dates and grab the first element, the number of days
174 | date_diff = today - yesterday
175 | print("Output #48: {0!s}".format(date_diff))
176 | print("Output #49: {0!s}".format(str(date_diff).split()[0]))
177 | 
178 | # Create a string with a specific format from a date object
179 | print("Output #50: {:s}".format(today.strftime('%m/%d/%Y')))
180 | print("Output #51: {:s}".format(today.strftime('%b %d, %Y')))
181 | print("Output #52: {:s}".format(today.strftime('%Y-%m-%d')))
182 | print("Output #53: {:s}".format(today.strftime('%B %d, %Y')))
183 | 
184 | # Create a datetime object with a specific format
185 | # from a string representing a date
186 | date1 = today.strftime('%m/%d/%Y')
187 | date2 = today.strftime('%b %d, %Y')
188 | date3 = today.strftime('%Y-%m-%d')
189 | date4 = today.strftime('%B %d, %Y')
190 | 
191 | # Two datetime objects and two date objects
192 | # based on the four strings that have different date formats
193 | print("Output #54: {!s}".format(datetime.strptime(date1, '%m/%d/%Y')))
194 | print("Output #55: {!s}".format(datetime.strptime(date2, '%b %d, %Y')))
195 | 
196 | # Show the date portion only
197 | print("Output #56: {!s}".format(datetime.date(datetime.strptime\
198 | (date3, '%Y-%m-%d')))
199 | print("Output #57: {!s}".format(datetime.date(datetime.strptime\
200 | (date4, '%B %d, %Y')))
201 | 
202 | # LISTS
203 | # Use square brackets to create a list
204 | # len() counts the number of elements in a list
205 | # max() and min() find the maximum and minimum numbers in numeric lists
206 | # count() counts the number of times a value appears in a list
207 | a_list = [1, 2, 3]
208 | print("Output #58: {}".format(a_list))
209 | print("Output #59: a_list has {} elements.".format(len(a_list)))
210 | print("Output #60: the maximum value in a_list is {}.".format(max(a_list)))
211 | print("Output #61: the minimum value in a_list is {}.".format(min(a_list)))
212 | another_list = ['printer', 5, ['star', 'circle', 9]]
213 | print("Output #62: {}".format(another_list))
214 | print("Output #63: another_list also has {} elements.".format(len(another_list)))
215 | print("Output #64: 5 is in another_list {} time.".format(another_list.count(5)))
216 | 
217 | # Use list indices to access specific values in a list
218 | # [0] is the first value; [-1] is the last value
219 | print("Output #65: {}".format(a_list[0]))
220 | print("Output #66: {}".format(a_list[1]))
221 | print("Output #67: {}".format(a_list[2]))
222 | print("Output #68: {}".format(a_list[-1]))
223 | print("Output #69: {}".format(a_list[-2]))
224 | print("Output #70: {}".format(a_list[-3]))
225 | print("Output #71: {}".format(another_list[2]))
226 | print("Output #72: {}".format(another_list[-1]))
227 | 
228 | # Use list slices to access a subset of list values
229 | # Do not include the starting indice to start from the beginning
230 | # Do not include the ending indice to go all of the way to the end
231 | print("Output #73: {}".format(a_list[0:2]))
232 | print("Output #74: {}".format(another_list[:2]))
233 | print("Output #75: {}".format(a_list[1:3]))
234 | print("Output #76: {}".format(another_list[1:]))
235 | 
236 | # Use [:] to make a copy of a list
237 | a_new_list = a_list[:]
238 | print("Output #77: {}".format(a_new_list))
239 | 
240 | # Use + to add two or more lists together
241 | a_longer_list = a_list + another_list    # to add lists together
242 | print("Output #78: {}".format(a_longer_list))
243 | 
244 | # Use 'in' and 'not in' to check whether specific values are or are not in a list
245 | a = 2 in a_list
246 | print("Output #79: {}".format(a))
247 | if 2 in a_list:
248 |     print("Output #80: 2 is in {}.".format(a_list))
249 | b = 6 not in a_list
250 | print("Output #81: {}".format(b))
251 | if 6 not in a_list:
252 |     print("Output #82: 6is not in {}.".format(a_list))
253 | 
254 | # Use append() to add additional values to the end of the list
255 | # Use remove() to remove specific values from the list
256 | # Use pop() to remove values from the end of the list
257 | a_list.append(4)
258 | a_list.append(5)
259 | a_list.append(6)
260 | print("Output #83: {}".format(a_list))
261 | a_list.remove(5)
262 | print("Output #84: {}".format(a_list))
263 | a_list.pop()
264 | a_list.pop()
265 | print("Output #85: {}".format(a_list))
266 | 
267 | # Use reverse() to reverse a list, in-place, meaning it changes the list
268 | # To reverse a list without changing the original list, make a copy first
269 | a_list.reverse()
270 | print("Output #86: {}".format(a_list))
271 | a_list_copy = a_list[:]
272 | a_list_copy.reverse()
273 | print("Output #87: {}".format(a_list_copy))
274 | 
275 | # Use sort() to sort a list, in-place, meaning it changes the list
276 | # To sort a list without changing the original list, make a copy first
277 | unordered_list = [3, 5, 1, 7, 2, 8, 4, 9, 0, 6]
278 | print("Output #88: {}".format(unordered_list))
279 | list_copy = unordered_list[:]
280 | list_copy.sort()
281 | print("Output #89: {}".format(list_copy))
282 | print("Output #90: {}".format(unordered_list))
283 | 
284 | # Use sorted() to sort a collection of lists by a position in the lists
285 | my_lists = [[1,2,3,4], [4,3,2,1], [2,4,1,3]]
286 | my_lists_sorted_by_index_3 = sorted(my_lists, key=lambda index_value: index_value[3])
287 | print("Output #91: {}".format(my_lists_sorted_by_index_3))
288 | 
289 | # Use itemgetter() to sort a collection of lists by two index positions
290 | my_lists = [[123,2,2,444], [22,6,6,444], [354,4,4,678], [236,5,5,678], \
291 | [578,1,1,290], [461,1,1,290]]
292 | my_lists_sorted_by_index_3_and_0 = sorted(my_lists, key=itemgetter(3,0))
293 | print("Output #92: {}".format(my_lists_sorted_by_index_3_and_0))
294 | 
295 | # TUPLES
296 | # Use parentheses to create a tuple
297 | my_tuple = ('x', 'y', 'z')
298 | print("Output #93: {}".format(my_tuple))
299 | print("Output #94: my_tuple has {} elements".format(len(my_tuple)))
300 | print("Output #95: {}".format(my_tuple[1]))
301 | longer_tuple = my_tuple + my_tuple
302 | print("Output #96: {}".format(longer_tuple))
303 | 
304 | # Unpack tuples with the left-hand side of an assignment operator
305 | one, two, three = my_tuple
306 | print("Output #97: {0} {1} {2}".format(one, two, three))
307 | var1 = 'red'
308 | var2 = 'robin'
309 | print("Output #98: {} {}".format(var1, var2))
310 | # Swap values between variables
311 | var1, var2 = var2, var1
312 | print("Output #99: {} {}".format(var1, var2))
313 | 
314 | # Convert tuples to lists and lists to tuples
315 | my_list = [1, 2, 3]
316 | my_tuple = ('x', 'y', 'z')
317 | print("Output #100: {}".format(tuple(my_list)))
318 | print("Output #101: {}".format(list(my_tuple)))
319 | 
320 | # DICTIONARIES
321 | # Use curly braces to create a dictionary
322 | # Use a colon between keys and values in each pair
323 | # len() counts the number of key-value pairs in a dictionary
324 | empty_dict = { }
325 | a_dict = {'one':1, 'two':2, 'three':3}
326 | print("Output #102: {}".format(a_dict))
327 | print("Output #103: a_dict has {!s} elements".format(len(a_dict)))
328 | another_dict = {'x':'printer', 'y':5, 'z':['star', 'circle', 9]}
329 | print("Output #104: {}".format(another_dict))
330 | print("Output #105: another_dict also has {!s} elements"\
331 | .format(len(another_dict)))
332 | 
333 | # Use keys to access specific values in a dictionary
334 | print("Output #106: {}".format(a_dict['two']))
335 | print("Output #107: {}".format(another_dict['z']))
336 | 
337 | # Use copy() to make a copy of a dictionary
338 | a_new_dict = a_dict.copy()
339 | print("Output #108: {}".format(a_new_dict))
340 | 
341 | # Use keys(), values(), and items() to access
342 | # a dictionary's keys, values, and key-value pairs, respectively
343 | print("Output #109: {}".format(a_dict.keys()))
344 | a_dict_keys = a_dict.keys()
345 | print("Output #110: {}".format(a_dict_keys))
346 | print("Output #111: {}".format(a_dict.values()))
347 | print("Output #112: {}".format(a_dict.items()))
348 | 
349 | # Use in, not in, and get to test
350 | # whether a key is in a dictionary
351 | if 'y' in another_dict:
352 | 	print("Output #114: y is a key in another_dict: {}."\
353 |     .format(another_dict.keys()))
354 | 
355 | if 'c' not in another_dict:
356 | 	print("Output #115: c is not a key in another_dict: {}."\
357 |     .format(another_dict.keys()))
358 | 
359 | print("Output #116: {!s}".format(a_dict.get('three')))
360 | print("Output #117: {!s}".format(a_dict.get('four')))
361 | print("Output #118: {!s}".format(a_dict.get('four', 'Not in dict')))
362 | 
363 | # Use sorted() to sort a dictionary
364 | # To sort a dictionary without changing the original dictionary,
365 | # make a copy first
366 | print("Output #119: " + str(a_dict)
367 | dict_copy = a_dict.copy()
368 | ordered_dict1 = sorted(dict_copy.items(), key=lambda item: item[0])
369 | print("Output #120 (order by keys): {}".format(ordered_dict1))
370 | ordered_dict2 = sorted(dict_copy.items(), key=lambda item: item[1])
371 | print("Output #121 (order by values): {}".format(ordered_dict2))
372 | ordered_dict3 = sorted(dict_copy.items(), key=lambda x: x[1], reverse=True)
373 | print("Output #122 (order by values, descending): {}".format(ordered_dict3))
374 | ordered_dict4 = sorted(dict_copy.items(), key=lambda x: x[1], reverse=False)
375 | print("Output #123 (order by values, ascending): {}".format(ordered_dict4))
376 | 
377 | # CONTROL FLOW
378 | # if-else statement
379 | x = 5
380 | if x > 4 or x != 9:
381 |     print("Output #124: {}".format(x))
382 | else:
383 |     print("Output #125: x is not greater than 4")
384 | 
385 | # if-elif-else statement
386 | if x > 6:
387 |     print("Output #126: x is greater than six")
388 | elif x > 4 and x == 5:
389 |     print("Output #127: {}".format(x*x))
390 | else:
391 |     print("Output #128: x is not greater than 4")
392 | 
393 | # for loop
394 | y = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', \
395 | 'Nov', 'Dec']
396 | z = ['Annie', 'Betty', 'Claire', 'Daphne', 'Ellie', 'Franchesca', 'Greta', \
397 | 'Holly', 'Isabel', 'Jenny']
398 | 
399 | print("Output #129:")
400 | for month in y:
401 |     print("{!s}".format(month))
402 | 
403 | print("Output #130: (index value: name in list)")
404 | for i in range(len(z)):
405 |     print("{0!s}: {1:s}".format(i, z[i]))
406 | 
407 | print("Output #131: (access elements in y with z's index values)")
408 | for j in range(len(z)):
409 |     if y[j].startswith('J'):
410 |         print("{!s}".format(y[j]))
411 | 
412 | print("Output #132:")
413 | for key, value in another_dict.items():
414 |     print("{0:s}, {1}".format(key, value))
415 | 
416 | # compact for loops
417 | # list, set, and dictionary comprehensions
418 | # Select specific rows using a list comprehension
419 | my_data = [[1,2,3], [4,5,6], [7,8,9]]
420 | rows_to_keep = [row for row in my_data if row[2] > 5]
421 | print("Output #133 (list comprehension): {}".format(rows_to_keep))
422 | 
423 | # Select a set of unique tuples in a list using a set comprehension
424 | my_data = [(1,2,3), (4,5,6), (7,8,9), (7,8,9)]
425 | set_of_tuples1 = {x for x in my_data}
426 | print("Output #134 (set comprehension): {}".format(set_of_tuples1))
427 | set_of_tuples2 = set(my_data)
428 | print("Output #135 (set function): {}".format(set_of_tuples2))
429 | 
430 | # Select specific key-value pairs using a dictionary comprehension
431 | my_dictionary = {'customer1': 7, 'customer2': 9, 'customer3': 11}
432 | my_results = {key : value for key, value in my_dictionary.items() if \
433 | value > 10}
434 | print("Output #136 (dictionary comprehension): {}".format(my_results))
435 | 
436 | # while loop
437 | print("Output #137:")
438 | x = 0
439 | while x < 11:
440 |     print("{!s}".format(x))
441 |     x += 1
442 | 
443 | # FUNCTIONS
444 | # Calculate the mean of a sequence of numeric values
445 | def getMean(numericValues):
446 |     return sum(numericValues)/len(numericValues) if len(numericValues) > 0 \
447 |     else float('nan')
448 | 
449 | my_list = [2, 2, 4, 4, 6, 6, 8, 8]
450 | print("Output #138 (mean): {!s}".format(getMean(my_list)))
451 | 
452 | #import numpy as np
453 | #print np.mean(my_list)
454 | 
455 | # EXCEPTIONS
456 | # Calculate the mean of a sequence of numeric values
457 | def getMean(numericValues):
458 |     return sum(numericValues)/len(numericValues)
459 | 
460 | my_list2 = [ ]
461 | # Short version
462 | try:
463 |     print("Output #139: {}".format(getMean(my_list2)))
464 | except ZeroDivisionError as detail:
465 |     print("Output #139 (Error): {}".format(float('nan')))
466 |     print("Output #139 (Error): {}".format(detail))
467 | 
468 | # Long version
469 | try:
470 |     result = getMean(my_list2)
471 | except ZeroDivisionError as detail:
472 |     print("Output #140 (Error): {}".format(float('nan')))
473 |     print("Output #140 (Error): {}".format(detail))
474 | else:
475 |     print("Output #140 (The mean is): {}".format(result))
476 | finally:
477 |     print("Output #140 (Finally): The finally block is executed every time")
478 | 
479 | # READ A FILE
480 | # Read a single text file
481 | #input_file = sys.argv[1]
482 | 
483 | ## Read a text file (older method) ##
484 | #print("Output #141:")
485 | #filereader = open(input_file, 'r', newline='')
486 | #for row in filereader:
487 | #    print("{}".format(row.strip()))
488 | #filereader.close()
489 | 
490 | ## Read a text file (newer method) ##
491 | #print("Output #142:")
492 | #with open(input_file, 'r', newline='') as filereader:
493 | #    for row in filereader:
494 | #        print("{}".format(row.strip()))
495 | 
496 | #print("Output #143:")
497 | # READ MULTIPLE FILES
498 | # Read multiple text files
499 | #inputPath = sys.argv[1]
500 | #for input_file in glob.glob(os.path.join(inputPath,'*.txt')):
501 | #    with open(input_file, 'r', newline='') as filereader:
502 | #       for row in filereader:
503 | #           print("{}".format(row.strip()))
504 | 
505 | # WRITE TO A FILE
506 | # Write to a text file
507 | #my_letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
508 | #max_index = len(my_letters)
509 | #output_file = sys.argv[1]
510 | #filewriter = open(output_file, 'w')
511 | #for index_value in range(len(my_letters)):
512 | #    if index_value < (max_index-1):
513 | #        filewriter.write(my_letters[index_value]+'\t')
514 | #    else:
515 | #        filewriter.write(my_letters[index_value]+'\n')
516 | #filewriter.close()
517 | #print("Output #144: Output written to file")
518 | 
519 | # Write to a CSV file
520 | #my_numbers = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
521 | #max_index = len(my_numbers)
522 | #output_file = sys.argv[1]
523 | #filewriter = open(output_file, 'a')
524 | #for index_value in range(len(my_numbers)):
525 | #    if index_value < (max_index-1):
526 | #        filewriter.write(str(my_numbers[index_value])+',')
527 | #    else:
528 | #        filewriter.write(str(my_numbers[index_value])+'\n')
529 | #filewriter.close()
530 | #print("Output #145: Output appended to file")
531 | 


--------------------------------------------------------------------------------