├── .gitignore
├── tests
    ├── __init__.py
    ├── help_tests.py
    └── processor_tests.py
├── MANIFEST.in
├── requirements.txt
├── fixtures
    ├── sample.tsv
    ├── sample.csv
    ├── sample.psv
    └── au.csv
├── .travis.yml
├── makefile
├── setup.py
├── LICENSE
├── csvfilter
    └── __init__.py
├── README.rst
└── bin
    └── csvfilter


/.gitignore:
--------------------------------------------------------------------------------
1 | dist/


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.rst
2 | include LICENSE
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | envoy==0.0.2
2 | nose==1.3.4
3 | 


--------------------------------------------------------------------------------
/fixtures/sample.tsv:
--------------------------------------------------------------------------------
1 | This	is	a	test
2 | And	so	is	this
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 |   - 2.6
4 |   - 2.7
5 | install:
6 | script:
7 |   - pip install -r requirements.txt
8 |   - ./setup.py develop
9 |   - nosetests


--------------------------------------------------------------------------------
/fixtures/sample.csv:
--------------------------------------------------------------------------------
1 | "1","Terry, Smith","Flat 1, Mansion House, London N12 4RT"
2 | "2","Doe, John","73 Woodside Road, Manchester M3 2ER"
3 | "3","Partridge, Alan","Lynton Travel Tavern, Norwich"


--------------------------------------------------------------------------------
/fixtures/sample.psv:
--------------------------------------------------------------------------------
1 | "1"|"Terry, Smith"|"Flat 1, Mansion House, London N12 4RT"
2 | "2"|"Doe, John"|"73 Woodside Road, Manchester M3 2ER"
3 | "3"|"Partridge, Alan"|"Lynton Travel Tavern, Norwich"


--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
 1 | install: clean
 2 | 	pip install -r requirements.txt
 3 | 	python setup.py develop
 4 | 
 5 | clean:
 6 | 	find . -name "*.pyc" -delete
 7 | 	-rm -rf *.egg-info dist
 8 | 
 9 | release:
10 | 	python setup.py sdist upload
11 | 	git push --tags
12 | 


--------------------------------------------------------------------------------
/tests/help_tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import envoy
 3 | 
 4 | from csvfilter import VERSION
 5 | 
 6 | 
 7 | class HelpOutputTests(unittest.TestCase):
 8 | 
 9 |     def setUp(self):
10 |         self.r = envoy.run('csvfilter -h')
11 | 
12 |     def test_help_output(self):
13 |         self.assertEqual(0, self.r.status_code)
14 | 
15 |     def test_version_is_present(self):
16 |         self.assertTrue(VERSION in self.r.std_out)
17 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from setuptools import setup, find_packages
 3 | from csvfilter import VERSION
 4 | 
 5 | 
 6 | setup(name='csvfilter',
 7 |       version=VERSION,
 8 |       url='https://github.com/codeinthehole/csvfilter',
 9 |       author="David Winterbottom",
10 |       author_email="david.winterbottom@gmail.com",
11 |       description="A command-line utility and Python API for manipulating CSV data, eg plucking columns and reordering them.  It's a bit like the unix utility 'cut'",
12 |       license='MIT',
13 |       long_description=open('README.rst').read(),
14 |       packages=find_packages(exclude=["tests*"]),
15 |       scripts=['bin/csvfilter'])
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2014 csvfilter David Winterbottom <david.winterbottom@gmail.com>
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 7 | of the Software, and to permit persons to whom the Software is furnished to do
 8 | so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/csvfilter/__init__.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import sys
 3 | 
 4 | # Avoid issue with fields larger than max size
 5 | csv.field_size_limit(sys.maxsize)
 6 | 
 7 | VERSION = '0.3.2'
 8 | 
 9 | 
10 | class Processor(object):
11 | 
12 |     def __init__(self, fields=None, invert=False, delimiter=',',
13 |             quotechar='"', skip=0):
14 |         self.fields = fields
15 |         self.invert = invert
16 |         self.delimiter = delimiter
17 |         self.quotechar = quotechar
18 |         self.skip = skip
19 |         self.validators = []
20 | 
21 |     def add_validator(self, f):
22 |         self.validators.append(f)
23 | 
24 |     def process(self, file_handle):
25 |         reader = csv.reader(file_handle, delimiter=self.delimiter,
26 |             quotechar=self.quotechar)
27 |         for row in reader:
28 |             output = None
29 |             if reader.line_num <= self.skip:
30 |                 continue
31 |             if self.fields:
32 |                 if not self.invert:
33 |                     output = [row[i] for i in self.fields if len(row) > i]
34 |                 else:
35 |                     output = [e for i,e in enumerate(row) if i not in self.fields]
36 |             else:
37 |                 output = row
38 |             if not self.is_valid(output):
39 |                 continue
40 |             if output:
41 |                 yield output
42 | 
43 |     def is_valid(self, row):
44 |         for validator in self.validators:
45 |             if not validator(row):
46 |                 return False
47 |         return True
48 | 


--------------------------------------------------------------------------------
/fixtures/au.csv:
--------------------------------------------------------------------------------
 1 | "Pcode","Locality","State","Comments","DeliveryOffice","PresortIndicator","ParcelZone","BSPnumber","BSPname","Category","Lat","Long"
 2 | "02,00","AUSTRALIAN NATIONAL UNIVERSITY","ACT","PO Boxes","AUSTRALIAN NATIONAL UNI LPO x","150","N2 ","019","CANBERRA","Post Office Boxes                                 ","-35.277272","149.117136"
 3 | "0221","BARTON","ACT","LVR Special Mailing",,"150","N2 ","019","CANBERRA","LVR                                               ","-35.201372","149.095065"
 4 | "0800","DARWIN","NT",,"DARWIN DELIVERY CENTRE        ","085","NT1","001","DARWIN","Delivery Area                                     ","-12.801028","130.955789"
 5 | "0801","DARWIN","NT","GPO Boxes","DARWIN GPO DELIVERY ANNEXE    ","085","NT1","001","DARWIN","Post Office Boxes                                 ","-12.801028","130.955789"
 6 | "0804","PARAP","NT","PO Boxes","PARAP LPO                     ","085","NT1","001","DARWIN","Post Office Boxes                                 ","-12.432181","130.84331"
 7 | "0810","ALAWA","NT",,"DARWIN DELIVERY CENTRE        ","085","NT1","001","DARWIN","Delivery Area                                     ","-12.378451","130.877014"
 8 | "0810","BRINKIN","NT",,"DARWIN DELIVERY CENTRE        ","085","NT1","001","DARWIN","Delivery Area                                     ","-12.378451","130.877014"
 9 | "0810","CASUARINA","NT",,"DARWIN DELIVERY CENTRE        ","085","NT1","001","DARWIN","Delivery Area                                     ","-12.378451","130.877014"
10 | "0810","COCONUT GROVE","NT",,"DARWIN DELIVERY CENTRE        ","085","NT1","001","DARWIN","Delivery Area                                     ","-12.378451","130.877014"
11 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | ================================
 2 | csvfilter - Simple CSV filtering
 3 | ================================
 4 | 
 5 | A simple wrapper around Python's CSV module to provide a command-line tool for
 6 | filtering columns from a CSV file.  This is useful as standard tools like awk
 7 | can't easily handle the quoting and escaping used in CSV files.  
 8 | 
 9 | Basically, it's a bit like ``cut`` but for CSVs.
10 | 
11 | Install
12 | -------
13 | 
14 | From PyPi::
15 | 
16 |     pip install csvfilter
17 | 
18 | Use
19 | ---
20 | 
21 | Pluck fields 1, 3 and 5 from ``in.csv``::
22 | 
23 |     csvfilter -f 1,3,5 in.csv > out.csv
24 | 
25 | Pluck all fields apart from column 2 from STDIN::
26 | 
27 |     cat in.csv | csvfilter -f 2 -i > out.csv
28 | 
29 | Convert pipe-separated file to comma-separated (by default, output is 
30 | comma-separated)::
31 | 
32 |     csvfilter -d"|" in.psv > out.csv 
33 | 
34 | Skip that pesky header row::
35 | 
36 |     cat in.csv | csvfilter --skip=1
37 | 
38 | As you can see, CSV data can be supplied through STDIN or by running ``csvfilter`` directly on a
39 | file.
40 | 
41 | Help is in the usual place::
42 | 
43 |     $ csvfilter --help
44 | 
45 |     Usage: csvfilter [options] [inputfile]
46 | 
47 |     Source: https://github.com/codeinthehole/csvfilter/
48 | 
49 |     Options:
50 |     -h, --help            show this help message and exit
51 |     -f FIELDS, --fields=FIELDS
52 |                             Specify which fields to pluck
53 |     -s SKIP, --skip=SKIP  Number of rows to skip
54 |     -d DELIMITER, --delimiter=DELIMITER
55 |                             Delimiter of incoming CSV data
56 |     -q QUOTECHAR, --quotechar=QUOTECHAR
57 |                             Quotechar of incoming CSV data
58 | 
59 |     -i, --inverse         Invert the filter - ie drop the selected fields
60 |     --out-delimiter=OUT_DELIMITER
61 |                             Delimiter to use for output
62 |     --out-quotechar=OUT_QUOTECHAR
63 |                             Quote character to use for output
64 | 
65 | Report issues
66 | -------------
67 | 
68 | Use the `Github issue tracker`_ or, better still...
69 | 
70 | .. _`Github issue tracker`: https://github.com/codeinthehole/csvfilter/issues
71 | 
72 | Contribute
73 | ----------
74 | 
75 | After cloning, install the testing requirements::
76 | 
77 |     make 
78 | 
79 | Run the tests with::
80 | 
81 |     nosetests
82 | 
83 | and, if it helps, use the fixture files to test your amendments::
84 | 
85 |     cat fixtures/au.csv | csvfilter -f 3,1,2 -s 1
86 |     csvfilter fixutres/au.csv -f 1,2 -i
87 | 
88 | Have fun.
89 | 


--------------------------------------------------------------------------------
/bin/csvfilter:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | from optparse import OptionParser
 4 | import csv
 5 | 
 6 | from csvfilter import Processor, VERSION
 7 | 
 8 | 
 9 | def main(options, args):
10 |     infile, outfile = get_files(options, args)
11 |     processor = get_processor(options, args)
12 |     writer = get_writer(outfile, options)
13 |     pump(processor, infile, writer)
14 | 
15 | 
16 | def get_files(options, args):
17 |     infile = open(args[0], 'r') if len(args) > 0 else sys.stdin
18 |     return infile, sys.stdout
19 | 
20 | 
21 | def get_processor(options, args):
22 |     fields = map(int, options.fields.split(',')) if options.fields else None
23 |     return Processor(fields=fields,
24 |                      skip=options.skip,
25 |                      delimiter=options.delimiter,
26 |                      quotechar=options.quotechar,
27 |                      invert=options.inverse)
28 | 
29 | 
30 | def get_writer(outfile, options):
31 |     return csv.writer(outfile, delimiter=options.out_delimiter,
32 |                       quotechar=options.out_quotechar)
33 | 
34 | 
35 | def pump(processor, infile, writer):
36 |     for output in processor.process(infile):
37 |         writer.writerow(output)
38 | 
39 | 
40 | def process_delimiter(option, opt, value, parser):
41 |     if value == '\\t':
42 |         value = '\t'
43 |     parser.values.delimiter = value
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     usage = "Usage: %prog [options] [inputfile]" +\
48 |             "\n\nVersion: %s\nSource: %s" % (VERSION, 'https://github.com/codeinthehole/csvfilter/')
49 |     parser = OptionParser(usage=usage)
50 |     parser.add_option('-f', '--fields', dest='fields', default=None,
51 |                       help="Specify which fields to pluck")
52 |     parser.add_option('-s', '--skip', dest='skip', default=0,
53 |                       type='int', help="Number of rows to skip")
54 |     parser.add_option('-d', '--delimiter', type='string', default=',',
55 |                       action='callback', callback=process_delimiter,
56 |                       help="Delimiter of incoming CSV data")
57 |     parser.add_option('-q', '--quotechar', dest='quotechar', default='"',
58 |                       help="Quotechar of incoming CSV data")
59 |     parser.add_option('-i', '--inverse', dest='inverse', default=False,
60 |                       action='store_true', help="Invert the filter - ie drop the selected fields")
61 |     parser.add_option('--out-delimiter', dest='out_delimiter', default=',',
62 |                       help="Delimiter to use for output")
63 |     parser.add_option('--out-quotechar', dest='out_quotechar', default='"',
64 |                       help="Quote character to use for output")
65 |     (options, args) = parser.parse_args()
66 |     main(options, args)
67 | 


--------------------------------------------------------------------------------
/tests/processor_tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from csvfilter import Processor
 4 | 
 5 | SAMPLE_CSV = ['a,b,c', 'd,e,f', 'g,h,i']
 6 | SAMPLE_PSV = ['a|b|c', 'd|e|f', 'g|h|i']
 7 | SAMPLE_TSV = ['a	b	c', 'd	e	f', 'g	h	i']
 8 | 
 9 | SAMPLE_QUOTED_CSV = [
10 | '"Pcode","Locality","State","Comments","DeliveryOffice","PresortIndicator","ParcelZone","BSPnumber","BSPname","Category","Lat","Long"',
11 | '"0200","AUSTRALIAN NATIONAL UNIVERSITY","ACT","PO Boxes","AUSTRALIAN NATIONAL UNI LPO x","150","N2 ","019","CANBERRA","Post Office Boxes ","-35.277272","149.117136"',
12 | '"0221","BARTON","ACT","LVR Special Mailing",,"150","N2 ","019","CANBERRA","LVR ","-35.201372","149.095065"'
13 | ]
14 | 
15 | SAMPLE_CUSTOM_QUOTED_CSV = [
16 | '|Pcode|,|Locality|,|State|,|Comments|,|DeliveryOffice|,|PresortIndicator|,|ParcelZone|,|BSPnumber|,|BSPname|,|Category|,|Lat|,|Long|',
17 | '|0200|,|AUSTRALIAN NATIONAL UNIVERSITY|,|ACT|,|PO Boxes|,|AUSTRALIAN NATIONAL UNI LPO x|,|150|,|N2 |,|019|,|CANBERRA|,|Post Office Boxes |,|-35.277272|,|149.117136|',
18 | '|0221|,|BARTON|,|ACT|,|LVR Special Mailing|,,|150|,|N2 |,|019|,|CANBERRA|,|LVR |,|-35.201372|,|149.095065|'
19 | ]
20 | 
21 | 
22 | class ProcessorTests(unittest.TestCase):
23 | 
24 |     def test_no_config_does_no_processing(self):
25 |         p = Processor()
26 |         output = [row for row in p.process(SAMPLE_CSV)]
27 |         self.assertEqual([['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']], output)
28 | 
29 |     def test_single_col_plucking(self):
30 |         p = Processor(fields=[0])
31 |         output = [row for row in p.process(SAMPLE_CSV)]
32 |         self.assertEqual([['a'], ['d'], ['g']], output)
33 | 
34 |     def test_validator(self):
35 |         p = Processor()
36 |         p.add_validator(lambda row: row[0] == 'a')
37 |         output = [row for row in p.process(SAMPLE_CSV)]
38 |         self.assertEqual([['a', 'b', 'c']], output)
39 | 
40 |     def test_multiple_col_plucking(self):
41 |         p = Processor(fields=[0, 2])
42 |         output = [row for row in p.process(SAMPLE_CSV)]
43 |         self.assertEqual([['a', 'c'], ['d', 'f'], ['g', 'i']], output)
44 | 
45 |     def test_multiple_col_plucking_with_reordering(self):
46 |         p = Processor(fields=[2, 1])
47 |         output = [row for row in p.process(SAMPLE_CSV)]
48 |         self.assertEqual([['c', 'b'], ['f', 'e'], ['i', 'h']], output)
49 | 
50 |     def test_single_col_dropping(self):
51 |         p = Processor(fields=[1], invert=True)
52 |         output = [row for row in p.process(SAMPLE_CSV)]
53 |         self.assertEqual([['a', 'c'], ['d', 'f'], ['g', 'i']], output)
54 | 
55 |     def test_multiple_col_dropping(self):
56 |         p = Processor(fields=[0,2], invert=True)
57 |         output = [row for row in p.process(SAMPLE_CSV)]
58 |         self.assertEqual([['b'], ['e'], ['h']], output)
59 | 
60 |     def test_single_col_plucking_with_skip(self):
61 |         p = Processor(fields=[0], skip=1)
62 |         output = [row for row in p.process(SAMPLE_CSV)]
63 |         self.assertEqual([['d'], ['g']], output)
64 | 
65 |     def test_pluck_with_pipes(self):
66 |         p = Processor(fields=[0], delimiter='|')
67 |         output = [row for row in p.process(SAMPLE_PSV)]
68 |         self.assertEqual([['a'], ['d'], ['g']], output)
69 | 
70 |     def test_quoted_pluck(self):
71 |         p = Processor(fields=[0, 10, 11], skip=1)
72 |         output = [row for row in p.process(SAMPLE_QUOTED_CSV)]
73 |         expected = [['0200', '-35.277272', '149.117136'], ['0221', '-35.201372', '149.095065']]
74 |         self.assertEqual(expected, output)
75 | 
76 |     def test_custom_quoted_pluck(self):
77 |         p = Processor(fields=[0, 10, 11], quotechar='|', skip=1)
78 |         output = [row for row in p.process(SAMPLE_CUSTOM_QUOTED_CSV)]
79 |         expected = [['0200', '-35.277272', '149.117136'], ['0221', '-35.201372', '149.095065']]
80 |         self.assertEqual(expected, output)
81 | 
82 |     def test_tab_delimited_input(self):
83 |         p = Processor(delimiter="\t")
84 |         output = [row for row in p.process(SAMPLE_TSV)]
85 |         self.assertEqual([['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']],
86 |                          output)
87 | 


--------------------------------------------------------------------------------