├── README.md ├── LICENSE └── selectcolumns.py /README.md: -------------------------------------------------------------------------------- 1 | # textutil-select-column 2 | textutil-select-column 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 floydhub 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /selectcolumns.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | import argparse 3 | import codecs 4 | import csv 5 | 6 | def str2bool(val): 7 | """ 8 | Helper method to convert string to bool 9 | """ 10 | if val is None: 11 | return False 12 | val = val.lower().strip() 13 | if val in ['true', 't', 'yes', 'y', '1', 'on']: 14 | return True 15 | elif val in ['false', 'f', 'no', 'n', '0', 'off']: 16 | return False 17 | 18 | def main(): 19 | """ 20 | Selects the specified columns from the input file 21 | """ 22 | 23 | # Parse command line args 24 | parser = argparse.ArgumentParser(description='Selects the given columns') 25 | 26 | parser.add_argument( 27 | '-i', '--input', required=True, 28 | help='Path to input file') 29 | parser.add_argument( 30 | '-c', '--cols', required=True, type=str, default=0, 31 | help='Comma separated list of columns indices to select') 32 | parser.add_argument( 33 | '-d', '--delimiter', required=True, default='\t', 34 | help='Column delimiter') 35 | parser.add_argument( 36 | '-header', '--hasheader', required=False, type=str2bool, 37 | default='False', help='File has header row?') 38 | parser.add_argument('-o', '--output', required=True, help='Path to output file') 39 | 40 | args = parser.parse_args() 41 | # Unescape the delimiter 42 | args.delimiter = codecs.decode(args.delimiter, "unicode_escape") 43 | # Parse cols into list of ints 44 | args.cols = [int(x) for x in args.cols.split(',')] 45 | 46 | # Convert args to dict 47 | vargs = vars(args) 48 | 49 | print("\nArguments:") 50 | for arg in vargs: 51 | print("{}={}".format(arg, getattr(args, arg))) 52 | 53 | # Read the input file 54 | with open(args.input, 'r') as inputfile: 55 | with open(args.output, 'w') as outputfile: 56 | 57 | reader = csv.reader(inputfile, delimiter=args.delimiter) 58 | writer = csv.writer(outputfile, delimiter=args.delimiter) 59 | 60 | # If has header, write it unprocessed 61 | if args.hasheader: 62 | headers = next(reader, None) 63 | if headers: 64 | cols = [] 65 | for idx, col in enumerate(headers): 66 | if idx in args.cols: 67 | cols.append(col) 68 | writer.writerow(cols) 69 | 70 | print("\nProcessing input") 71 | for row in reader: 72 | cols = [] 73 | for idx, col in enumerate(row): 74 | if idx in args.cols: 75 | cols.append(col) 76 | writer.writerow(cols) 77 | 78 | print("\nDone. Bye!") 79 | 80 | if __name__ == '__main__': 81 | main() --------------------------------------------------------------------------------