├── .gitignore
├── t
    ├── text-simple.txt
    ├── example.sp
    ├── dummy-data.csv
    ├── dirty-data.csv
    ├── Text-Parser-pod-01.t
    ├── Text-Parser-pod-02.t
    ├── Text-Parser-pod-04.t
    ├── Spice-Parser.t
    ├── Text-Parser-csv-02.t
    ├── Text-Parser-pod-03.t
    ├── Text-Parser-csv-03.t
    └── Text-Parser.t
├── .whitesource
├── .travis.yml
├── weaver.ini
├── CONTRIBUTING.md
├── dist.ini
├── Changes
├── README.pod
└── lib
    └── Text
        └── Parser.pm


/.gitignore:
--------------------------------------------------------------------------------
1 | Text-Parser*.tar.gz
2 | Text-Parser*/
3 | 


--------------------------------------------------------------------------------
/t/text-simple.txt:
--------------------------------------------------------------------------------
1 | This is a file with one line
2 | 


--------------------------------------------------------------------------------
/t/example.sp:
--------------------------------------------------------------------------------
1 | Minst net1
2 | + net2 net3
3 | + net4 nmos l=0.09u
4 | + w=0.13u
5 | .END
6 | 
7 | *This line is not read
8 | 


--------------------------------------------------------------------------------
/.whitesource:
--------------------------------------------------------------------------------
1 | {
2 |   "generalSettings": {
3 |     "shouldScanRepo": true
4 |   },
5 |   "checkRunSettings": {
6 |     "vulnerableCheckRunConclusionLevel": "failure"
7 |   }
8 | }


--------------------------------------------------------------------------------
/t/dummy-data.csv:
--------------------------------------------------------------------------------
1 | NAME,DAY_OF_BIRTH,MONTH_OF_BIRTH,YEAR_OF_BIRTH
2 | Balaji,5,9,1981
3 | Elizabeth,16,2,1984
4 | Narayanan,19,5,1986
5 | Hemalatha,2,5,1956
6 | Ramasubramanian,5,2,1951
7 | 


--------------------------------------------------------------------------------
/t/dirty-data.csv:
--------------------------------------------------------------------------------
1 | NAME,DAY_OF_BIRTH,MONTH_OF_BIRTH,YEAR_OF_BIRTH
2 | Balaji,5,9,1981
3 | Elizabeth,16,2,1984
4 | Narayanan,19,5,1986,Aadhavi
5 | Hemalatha,2,5,1956
6 | Ramasubramanian,5,2,1951
7 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: perl
 2 | perl:
 3 |   - "5.28"
 4 |   - "5.26"
 5 |   - "5.24"
 6 |   - "5.22"
 7 |   - "5.20"
 8 |   - "5.18"
 9 |   - "5.16"
10 | script:
11 |   - perl Makefile.PL && make test
12 | notifications:
13 |   recipients:
14 |     - balajiram@cpan.org
15 |   email:
16 |     on_success: change
17 |     on_failure: always
18 | 


--------------------------------------------------------------------------------
/t/Text-Parser-pod-01.t:
--------------------------------------------------------------------------------
 1 | use strict;
 2 | use warnings;
 3 | use Test::More;
 4 | use Test::Output;
 5 | use Test::Exception;
 6 | 
 7 | BEGIN { use_ok 'Text::Parser'; }
 8 | 
 9 | my $parser = Text::Parser->new();
10 | (@ARGV) = qw(t/text-simple.txt);
11 | lives_ok {
12 |     $parser->read(shift @ARGV);
13 |     stdout_is {
14 |         print $parser->get_records, "\n";
15 |     }
16 |     "This is a file with one line\n\n", 'Prints the output to screen correctly';
17 | }
18 | 'No errors in reading this file';
19 | 
20 | done_testing();
21 | 


--------------------------------------------------------------------------------
/weaver.ini:
--------------------------------------------------------------------------------
 1 | [@CorePrep]
 2 |  
 3 | [-SingleEncoding]
 4 |  
 5 | [Name]
 6 | [Version]
 7 |  
 8 | [Region  / prelude]
 9 |  
10 | [Generic / SYNOPSIS]
11 | [Generic / RATIONALE]
12 | [Generic / DESCRIPTION]
13 | [Generic / OVERVIEW]
14 |  
15 | [Collect / ATTRIBUTES]
16 | command = attr
17 |  
18 | [Collect / METHODS]
19 | command = method
20 |  
21 | [Collect / FUNCTIONS]
22 | command = func
23 |  
24 | [Generic / EXAMPLES]
25 | [Leftovers]
26 |  
27 | [Region  / postlude]
28 |  
29 | [Bugs]
30 | [Authors]
31 | [Legal]
32 | ;;[@Default]
33 | [Contributors]
34 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to contribute
 2 | 
 3 | Welcome! I am glad you consider it worth your time to contribute. Here are some guidelines to follow:
 4 | 
 5 | 1. Open a new issue if it is a bug or even an enhancement, and describe what you are doing and why you think it is important. If it is a major change I would prefer that this is discussed first.
 6 | 2. Make your code changes and send me a pull request.
 7 | 3. Preferably add tests to ensure that the code coverage is high enough. You can use `Devel::Cover` to add tests that cover all your code changes.
 8 | 
 9 | # Please:
10 | 
11 | 1. Be Polite.
12 | 2. Avoid foul language.
13 | 3. Avoid sexist or racist comments.
14 | 4. Be open to the possibility that your change is not accepted (although I would rarely want to do that).
15 | 


--------------------------------------------------------------------------------
/t/Text-Parser-pod-02.t:
--------------------------------------------------------------------------------
 1 | use strict;
 2 | use warnings;
 3 | 
 4 | package Text::Parser::CSV;
 5 | use parent 'Text::Parser';
 6 | 
 7 | sub save_record {
 8 |     my ( $self, $line ) = @_;
 9 |     chomp $line;
10 |     my (@fields) = split /,/, $line;
11 |     $self->SUPER::save_record( \@fields );
12 | }
13 | 
14 | package main;
15 | use Test::More;
16 | use Test::Output;
17 | use Test::Exception;
18 | 
19 | my $csvp = Text::Parser::CSV->new();
20 | lives_ok {
21 |     $csvp->read('t/dummy-data.csv');
22 |     is_deeply(
23 |         [ $csvp->get_records() ],
24 |         [   [qw(NAME DAY_OF_BIRTH MONTH_OF_BIRTH YEAR_OF_BIRTH)],
25 |             [qw(Balaji 5 9 1981)],
26 |             [qw(Elizabeth 16 2 1984)],
27 |             [qw(Narayanan 19 5 1986)],
28 |             [qw(Hemalatha 2 5 1956)],
29 |             [qw(Ramasubramanian 5 2 1951)],
30 |         ],
31 |         'Got the records as expected'
32 |     );
33 | }
34 | 'No issues in reading the CSV file';
35 | 
36 | done_testing();
37 | 


--------------------------------------------------------------------------------
/t/Text-Parser-pod-04.t:
--------------------------------------------------------------------------------
 1 | use strict;
 2 | use warnings;
 3 | 
 4 | use Test::More;
 5 | use Test::Output;
 6 | use Test::Exception;
 7 | use Try::Tiny;
 8 | 
 9 | BEGIN { use_ok 'Text::Parser'; }
10 | 
11 | my $parser = Text::Parser->new();
12 | isa_ok $parser, 'Text::Parser';
13 | lives_ok {
14 |     $parser->read('t/text-simple.txt');
15 | }
16 | 'Parses a text file normally';
17 | is( $parser->filename(), 't/text-simple.txt', 'Last file read' );
18 | is( $parser->filehandle(), undef, 'Filehandle was closed' );
19 | 
20 | open MYFH, "<t/text-simple.txt";
21 | lives_ok {
22 |     $parser->read( \*MYFH );
23 | }
24 | 'Reads the content again';
25 | is( $parser->filename(), undef, 'The last file read is lost' );
26 | close MYFH;
27 | isnt( $parser->filehandle(), undef, 'Retains the last filehandle read' );
28 | throws_ok {
29 |     $parser->read(\*MYFH);
30 | } 'Text::Parser::Exception', 'Trying to read a closed filehandle';
31 | isnt( $parser->filehandle(), undef, 'Retains the last filehandle read' );
32 | 
33 | done_testing();
34 | 


--------------------------------------------------------------------------------
/t/Spice-Parser.t:
--------------------------------------------------------------------------------
 1 | use strict;
 2 | use warnings;
 3 | 
 4 | package SpiceParser;
 5 | use parent 'Text::Parser';
 6 | 
 7 | use constant {
 8 |     SPICE_LINE_CONTD => qr/^[+]\s*/,
 9 |     SPICE_END_FILE   => qr/^\.end/i,
10 | };
11 | 
12 | sub save_record {
13 |     my ( $self, $line ) = @_;
14 |     return $self->__spice_line_contd($line) if $line =~ SPICE_LINE_CONTD;
15 |     return $self->abort_reading() if $line =~ SPICE_END_FILE;
16 |     $self->SUPER::save_record($line);
17 | }
18 | 
19 | sub __spice_line_contd {
20 |     my ( $self, $line ) = @_;
21 |     $line =~ s/^[+]\s*//;
22 |     my $last_rec = $self->pop_record;
23 |     chomp $last_rec;
24 |     $self->SUPER::save_record( $last_rec . ' ' . $line );
25 | }
26 | 
27 | package main;
28 | use Test::More;
29 | use Test::Exception;
30 | 
31 | my $sp = new SpiceParser;
32 | 
33 | lives_ok { $sp->read('t/example.sp'); } 'Works fine';
34 | is( scalar( $sp->get_records() ), 1, '1 record saved' );
35 | is( $sp->lines_parsed(),          5, '5 lines parsed' );
36 | is( $sp->last_record, "Minst net1 net2 net3 net4 nmos l=0.09u w=0.13u\n" );
37 | 
38 | done_testing;
39 | 


--------------------------------------------------------------------------------
/t/Text-Parser-csv-02.t:
--------------------------------------------------------------------------------
 1 | use strict;
 2 | use warnings;
 3 | 
 4 | package Text::Parser::CSV;
 5 | use parent 'Text::Parser';
 6 | use Text::CSV;
 7 | 
 8 | # Note that this approach is still unsafe for embedded newlines
 9 | my $csv = Text::CSV->new({ binary => 1, auto_diag => 1 });
10 | 
11 | sub save_record {
12 |     my ( $self, $line ) = @_;
13 |     my @fields = $csv->parse($line) ? $csv-> fields : ();
14 |     $self->SUPER::save_record( \@fields );
15 | }
16 | 
17 | package main;
18 | use Test::More;
19 | use Test::Output;
20 | use Test::Exception;
21 | 
22 | my $csvp = Text::Parser::CSV->new();
23 | lives_ok {
24 |     $csvp->read('t/dummy-data.csv');
25 |     is_deeply(
26 |         [ $csvp->get_records() ],
27 |         [   [qw(NAME DAY_OF_BIRTH MONTH_OF_BIRTH YEAR_OF_BIRTH)],
28 |             [qw(Balaji 5 9 1981)],
29 |             [qw(Elizabeth 16 2 1984)],
30 |             [qw(Narayanan 19 5 1986)],
31 |             [qw(Hemalatha 2 5 1956)],
32 |             [qw(Ramasubramanian 5 2 1951)],
33 |         ],
34 |         'Got the records as expected'
35 |     );
36 | }
37 | 'No issues in reading the CSV file';
38 | 
39 | done_testing();
40 | 


--------------------------------------------------------------------------------
/t/Text-Parser-pod-03.t:
--------------------------------------------------------------------------------
 1 | use strict;
 2 | use warnings;
 3 | 
 4 | package Text::Parser::CSV;
 5 | use parent 'Text::Parser';
 6 | use Exception::Class ( 'Text::Parser::CSV::Error',
 7 |     'Text::Parser::CSV::TooManyFields' =>
 8 |         { isa => 'Text::Parser::CSV::Error', }, );
 9 | 
10 | sub save_record {
11 |     my ( $self, $line ) = @_;
12 |     chomp $line;
13 |     my (@fields) = split /,/, $line;
14 |     $self->{__csv_header} = \@fields if not scalar( $self->get_records );
15 |     Text::Parser::CSV::TooManyFields->throw(
16 |         error => "Too many fields on line #" . $self->lines_parsed )
17 |         if scalar(@fields) > scalar( @{ $self->{__csv_header} } );
18 |     $self->SUPER::save_record( \@fields );
19 | }
20 | 
21 | package main;
22 | use Test::More;
23 | use Test::Output;
24 | use Test::Exception;
25 | use Try::Tiny;
26 | 
27 | sub test_read_dirty_csv {
28 |     my $csvp = shift;
29 |     try {
30 |         $csvp->read('t/dirty-data.csv');
31 |     } catch {
32 |         isa_ok( $_, 'Text::Parser::CSV::TooManyFields',
33 |             'Correct error type' );
34 |         $_->rethrow() if not $_->isa('Text::Parser::CSV::TooManyFields');
35 |         print STDERR $_->error, "\n";
36 |     };
37 | }
38 | 
39 | my $csvp = Text::Parser::CSV->new();
40 | lives_ok {
41 |     stderr_is {
42 |         test_read_dirty_csv($csvp);
43 |     }
44 |     "Too many fields on line #4\n", 'Displays correct error message';
45 | }
46 | 'Exception caught properly ; no other exceptions';
47 | 
48 | done_testing();
49 | 


--------------------------------------------------------------------------------
/t/Text-Parser-csv-03.t:
--------------------------------------------------------------------------------
 1 | use strict;
 2 | use warnings;
 3 | 
 4 | package Text::Parser::CSV;
 5 | use parent 'Text::Parser';
 6 | use Exception::Class ( 'Text::Parser::CSV::Error',
 7 |     'Text::Parser::CSV::TooManyFields' =>
 8 |         { isa => 'Text::Parser::CSV::Error', }, );
 9 | use Text::CSV;
10 | 
11 | # Note that this approach is still unsafe for embedded newlines
12 | my $csv = Text::CSV->new({ binary => 1, auto_diag => 1 });
13 | 
14 | sub save_record {
15 |     my ( $self, $line ) = @_;
16 |     my @fields = $csv->parse($line) ? $csv-> fields : ();
17 |     $self->{__csv_header} = \@fields if not scalar( $self->get_records );
18 |     Text::Parser::CSV::TooManyFields->throw(
19 |         error => "Too many fields on line #" . $self->lines_parsed )
20 |         if scalar(@fields) > scalar( @{ $self->{__csv_header} } );
21 |     $self->SUPER::save_record( \@fields );
22 | }
23 | 
24 | package main;
25 | use Test::More;
26 | use Test::Output;
27 | use Test::Exception;
28 | use Try::Tiny;
29 | 
30 | sub test_read_dirty_csv {
31 |     my $csvp = shift;
32 |     try {
33 |         $csvp->read('t/dirty-data.csv');
34 |     } catch {
35 |         isa_ok( $_, 'Text::Parser::CSV::TooManyFields',
36 |             'Correct error type' );
37 |         $_->rethrow() if not $_->isa('Text::Parser::CSV::TooManyFields');
38 |         print STDERR $_->error, "\n";
39 |     };
40 | }
41 | 
42 | my $csvp = Text::Parser::CSV->new();
43 | lives_ok {
44 |     stderr_is {
45 |         test_read_dirty_csv($csvp);
46 |     }
47 |     "Too many fields on line #4\n", 'Displays correct error message';
48 | }
49 | 'Exception caught properly ; no other exceptions';
50 | 
51 | done_testing();
52 | 


--------------------------------------------------------------------------------
/dist.ini:
--------------------------------------------------------------------------------
 1 | name    = Text-Parser
 2 | author  = Balaji Ramasubramanian <balajiram@cpan.org>
 3 | license = Perl_5
 4 | copyright_holder = Balaji Ramasubramanian
 5 | copyright_year   = 2018
 6 | 
 7 | 
 8 | ;; Determine the next version from the Git repo
 9 | [Git::NextVersion]
10 | first_version = 0.001       ; this is the default
11 | version_by_branch = 0       ; this is the default
12 | version_regexp  = ^v(.+)$   ; this is the default
13 | [PkgVersion]
14 | use_package = 1
15 | 
16 | ;; All plugins for POD
17 | ;; PodWeaver should come before PodVersion because PodVersion looks for =head1 NAME
18 | ;; Check weaver.ini for order of sections in POD
19 | [Bugtracker]
20 | [Git::Contributors]
21 | remove=balajiram
22 | include_releaser=0
23 | 
24 | [PodWeaver]
25 | [PodVersion]
26 | 
27 | ;; README.pod to be created
28 | [ReadmeAnyFromPod]
29 | type = pod
30 | filename = README.pod
31 | location = root
32 | 
33 | ;; Tests
34 | [PodCoverageTests]
35 | [PodSyntaxTests]
36 | [Test::Kwalitee]
37 | [Test::MinimumVersion]
38 | [Test::CPAN::Changes]
39 | [MetaTests]
40 | 
41 | ;; Prerequisites for Makefile.PL
42 | [MinimumPerl]
43 | [AutoPrereqs]
44 | 
45 | ;; [NextRelease] should precede [@Git] http://dzil.org/tutorial/vcs-git.html
46 | ;; Git related
47 | [NextRelease]
48 | [@Git]
49 | changelog   = Changes             ; this is the default
50 | allow_dirty = dist.ini            ; see Git::Check...
51 | allow_dirty = Changes             ; ... and Git::Commit
52 | allow_dirty = README.pod          ; ... and Git::Commit
53 | commit_msg  = v%v%n%n%c           ; see Git::Commit
54 | tag_format  = v%v                 ; see Git::Tag
55 | tag_message = v%v                 ; see Git::Tag
56 | push_to     = origin              ; see Git::Push
57 | remotes_must_exist = 1
58 | 
59 | ;; Release related
60 | [GithubMeta]
61 | [MetaJSON]
62 | [MetaProvides::Package]
63 | [CheckChangesHasContent]
64 | [@Basic]
65 | 


--------------------------------------------------------------------------------
/Changes:
--------------------------------------------------------------------------------
 1 | Revision history for {{$dist->name}}
 2 | 
 3 | {{$NEXT}}
 4 |   - Skip tests of unreadable files for MSWin32.
 5 |   - A safer CSV example in Example 1
 6 |   - Grammatical/typo fixes in POD.
 7 | 
 8 | 0.752     2019-01-02 20:25:16-08:00 America/Los_Angeles
 9 |   - One test fails for superuser. Fixing that.
10 |   - Known issue in MSWin32 seems related to the same thing. Don't know if it gets fixed.
11 |   - Made POD less verbose.
12 | 
13 | 0.751     2019-01-01 20:30:37-08:00 America/Los_Angeles
14 |   - Documentation looks better. Made links where it makes sense. Re-arranged the information better.
15 | 
16 | 0.750     2019-01-01 19:37:50-08:00 America/Los_Angeles
17 |   - Correct problems in test for Win32 platform compatibility.
18 |   - Improve documentation to give examples of what all the functions do.
19 |   - Changed the spec for filehandle() and read()
20 |   - Added tests to cover the new spec.
21 | 
22 | 0.700     2019-01-01 13:20:43-08:00 America/Los_Angeles
23 |   - Some operating systems can read from STDOUT and some cannot. Same goes for output file handles.
24 |   - Improved documentation.
25 |   - Improved code coverage using Devel::Cover for the first time. We now have 99.5% coverage.
26 |   - Added a new method called abort_reading().
27 | 
28 | 0.502     2018-12-29 08:18:06-08:00 America/Los_Angeles
29 |   - STDOUT and other output files don't uniformly respond to the -r test on all operating systems. This will need more work.
30 | 
31 | 0.501     2018-12-23 20:51:09-08:00 America/Los_Angeles
32 |   - This will be a known issue for V0.500 and V0.501, until fixed:
33 |   - The test for checking that filehandle is read-only is actually not working. Fixing the tests for now to mark the test as TODO.
34 | 
35 | 0.500     2018-12-23 20:14:10-08:00 America/Los_Angeles
36 |   - Renamed this class as Text::Parser after RT#127067 by James Keenan
37 |   - Added the ability to parse text from filehandles, thus the name Text::Parser and not Text::File::Parser
38 |   - Added new method filehandle just to provide interface similar to filename.
39 |   - Indicated in POD that Text::Parser supersedes TextFileParser.
40 | 
41 | 0.204     2018-08-08 20:37:32-07:00 America/Los_Angeles
42 |   - Removed use of indirect method notation in POD
43 | 
44 | 0.203     2018-08-08 17:43:11-07:00 America/Los_Angeles
45 |   - Clearer documentation
46 | 
47 | 0.202     2018-08-08 00:03:52-07:00 America/Los_Angeles
48 |   - Added bugtracking info to POD
49 | 
50 | 0.201     2018-08-07 23:19:45-07:00 America/Los_Angeles
51 |   - Improved documentation
52 |   - Removed the use Role::Tiny from the tests
53 | 
54 | 0.200     2018-08-07 16:31:53-07:00 America/Los_Angeles
55 |   - Changed versioning scheme
56 |   - Updated documentation
57 | 
58 | 0.1821907 2018-08-07 14:37:11-07:00 America/Los_Angeles
59 |   - Removed record_list_pointer method
60 |   - Updated documentation
61 |   - Added tests using a Role::Tiny
62 | 
63 | 0.1821905 2018-08-07 09:22:12-07:00 America/Los_Angeles
64 |   - Added a new method pop_record
65 |   - Updated POD and simplified examples
66 |   - Added more tests
67 | 
68 | 0.1821902 2018-08-06 23:33:10-07:00 America/Los_Angeles
69 |   - Added the first tests
70 |   - Removed method make_sense_of_line. Derived classes override only save_record
71 |   - Updated documentation briefly.
72 | 
73 | 0.1821900 2018-08-06 20:41:42-07:00 America/Los_Angeles
74 |   - Put your comment here
75 | 


--------------------------------------------------------------------------------
/t/Text-Parser.t:
--------------------------------------------------------------------------------
  1 | use strict;
  2 | use warnings;
  3 | 
  4 | use Test::More;
  5 | use Test::Exception;
  6 | 
  7 | BEGIN { use_ok 'Text::Parser'; }
  8 | 
  9 | my $fname = 'text-simple.txt';
 10 | 
 11 | my $pars = Text::Parser->new();
 12 | is( $pars->filename(), undef, 'No filename specified so far' );
 13 | lives_ok { is( $pars->filehandle(), undef, 'No filehandles' ); }
 14 | 'This should not die, just return undef';
 15 | throws_ok { $pars->filehandle('bad argument'); } 'Text::Parser::Exception',
 16 |     'filehandle() will take only a GLOB input';
 17 | throws_ok { $pars->filename( { a => 'b' } ); } 'Text::Parser::Exception',
 18 |     'filename() will take only string as input';
 19 | throws_ok { $pars->filename($fname) } 'Text::Parser::Exception',
 20 |     'No file by this name';
 21 | throws_ok { $pars->read($fname); } 'Text::Parser::Exception',
 22 |     'Throws exception for non-existent file';
 23 | 
 24 | lives_ok { $pars->read(); } 'Returns doing nothing';
 25 | is( $pars->lines_parsed, 0, 'Nothing parsed' );
 26 | is_deeply( [ $pars->get_records ], [], 'No data recorded' );
 27 | is( $pars->last_record, undef, 'No records' );
 28 | is( $pars->pop_record,  undef, 'Nothing on stack' );
 29 | 
 30 | lives_ok { $pars->read(''); } 'Reads no file ; returns doing nothing';
 31 | is( $pars->filename(),     undef, 'No file name still' );
 32 | is( $pars->lines_parsed(), 0,     'Nothing parsed again' );
 33 | 
 34 | SKIP: {
 35 |     skip 'Tests not meant for root user', 2 unless $>;
 36 |     skip 'Tests wont work on MSWin32',    2 unless $^O ne 'MSWin32';
 37 |     open OFILE, ">t/unreadable.txt";
 38 |     print OFILE "This is unreadable\n";
 39 |     close OFILE;
 40 |     chmod 0200, 't/unreadable.txt';
 41 |     throws_ok { $pars->filename('t/unreadable.txt'); }
 42 |     'Text::Parser::Exception', 'This file cannot be read';
 43 |     is( $pars->filename(), undef, 'Still no file has been read so far' );
 44 |     unlink 't/unreadable.txt';
 45 | }
 46 | 
 47 | my $content = "This is a file with one line\n";
 48 | lives_ok { $pars->filename( 't/' . $fname ); } 'Now I can open the file';
 49 | lives_ok { $pars->read; } 'Reads the file now';
 50 | is_deeply( [ $pars->get_records ], [$content], 'Get correct data' );
 51 | is( $pars->lines_parsed, 1,             '1 line parsed' );
 52 | is( $pars->last_record,  $content,      'Worked' );
 53 | is( $pars->pop_record,   $content,      'Popped' );
 54 | is( $pars->lines_parsed, 1,             'Still lines_parsed returns 1' );
 55 | is( $pars->filename(),   't/' . $fname, 'Last file read' );
 56 | 
 57 | open OUTFILE, ">example";
 58 | if ( -r OUTFILE ) {
 59 |     lives_ok { $pars->filehandle( \*OUTFILE ); }
 60 |     'In some systems output file handles are also readable! Your system is one of those. This could be a potential security hole.';
 61 |     is( $pars->filename(), undef, 'Last file read is not available anymore' );
 62 | } else {
 63 |     throws_ok { $pars->filehandle( \*OUTFILE ); } 'Text::Parser::Exception',
 64 |         'Your system is strict and will not read from an output filehandle. This is potentially good for security.';
 65 | }
 66 | print OUTFILE "Simple text";
 67 | close OUTFILE;
 68 | open INFILE, "<example";
 69 | lives_ok {
 70 |     $pars->read( \*INFILE );
 71 |     is_deeply( [ $pars->get_records() ],
 72 |         ['Simple text'], 'Read correct data in file' );
 73 | }
 74 | 'Exercising the ability to read from file handles directly';
 75 | unlink 'example';
 76 | open OUTFILE, ">example";
 77 | if ( -r OUTFILE ) {
 78 |     chmod 0200, 'example';
 79 |     throws_ok { $pars->filehandle( \*OUTFILE ); } 'Text::Parser::Exception',
 80 |         'Now this is not readable'
 81 |         if not -r OUTFILE;
 82 | }
 83 | chmod 0644, 'example';
 84 | close OUTFILE;
 85 | unlink 'example';
 86 | 
 87 | ## Testing the reading from filehandles on STDOUT and STDIN
 88 | if ( -r STDOUT ) {
 89 |     lives_ok { $pars->filehandle( \*STDOUT ); }
 90 |     'Some systems can read from STDOUT. Your system is one of them.';
 91 | } else {
 92 |     throws_ok { $pars->filehandle( \*STDOUT ); } 'Text::Parser::Exception',
 93 |         'Your system is strict and will not read from STDOUT';
 94 | }
 95 | lives_ok { $pars->filehandle( \*STDIN ); } 'No issues in reading from STDIN';
 96 | 
 97 | throws_ok { $pars->read( { a => 'b' } ); } 'Text::Parser::Exception',
 98 |     'Invalid type of argument for read() method';
 99 | 
100 | lives_ok { $pars->read( 't/' . $fname ); }
101 | 'reads the contents of a file without dying';
102 | is( $pars->last_record,  $content, 'Last record is correct' );
103 | is( $pars->lines_parsed, 1,        'Read only one line' );
104 | is_deeply( [ $pars->get_records ], [$content], 'Got correct file content' );
105 | 
106 | my $add = "This record is added";
107 | lives_ok { $pars->save_record(); } 'Add nothing';
108 | is( $pars->last_record, undef, 'Last record is undef' );
109 | lives_ok { $pars->save_record($add); } 'Add another record';
110 | is( $pars->lines_parsed, 1,    'Still only 1 line parsed' );
111 | is( $pars->last_record,  $add, 'Last added record' );
112 | is_deeply(
113 |     [ $pars->get_records ],
114 |     [ $content, undef, $add ],
115 |     'But will contain all elements including an undef'
116 | );
117 | 
118 | is( $pars->pop_record,   $add,     'Popped a record' );
119 | is( $pars->lines_parsed, 1,        'Still only 1 line parsed' );
120 | is( $pars->last_record,  undef,    'There was an undef in between' );
121 | is( $pars->pop_record,   undef,    'Now undef is removed' );
122 | is( $pars->last_record,  $content, 'Now the last record is the one earlier' );
123 | is_deeply( [ $pars->get_records ],
124 |     [$content], 'Got correct content after pop' );
125 | 
126 | done_testing;
127 | 


--------------------------------------------------------------------------------
/README.pod:
--------------------------------------------------------------------------------
  1 | =pod
  2 | 
  3 | =encoding UTF-8
  4 | 
  5 | =head1 NAME
  6 | 
  7 | Text::Parser - Bare text parser, bundles other common "mundane" tasks.
  8 | 
  9 | =head1 VERSION
 10 | 
 11 | version 0.753
 12 | 
 13 | =head1 SYNOPSIS
 14 | 
 15 |     use Text::Parser;
 16 | 
 17 |     my $parser = Text::Parser->new();
 18 |     $parser->read(shift @ARGV);
 19 |     print $parser->get_records, "\n";
 20 | 
 21 | The above code reads the first command-line argument as a string, and assuming it is the name of a text file, it will print the content of the file to C<STDOUT>. If the string is not the name of a text file it will throw an exception and exit.
 22 | 
 23 | =head1 RATIONALE
 24 | 
 25 | A simple text file parser should have to only specify the "grammar" it intends to interpret. Everything else, like C<open>ing a file handle, tracking how many lines have been read, etc., should be "automatic".
 26 | 
 27 | Unfortunately, most programmers write code that calls C<open>, C<close>, etc., and keep track of things that should have been simple features of every text file parser. And if they have to read multiple files, usually, all these calls are repeated.
 28 | 
 29 | This class does all "mundane" operations like C<open>, C<close>, line-count, and storage/deletion/retrieval of records, etc. You don't have to bother with a lot of book-keeping when you write your next parser. Instead, just inherit this class and override one method (C<L<save_record|/save_record>>). And voila! you have a parser. Look at L<these examples|/EXAMPLES> to see how easy this can be.
 30 | 
 31 | =head1 DESCRIPTION
 32 | 
 33 | C<Text::Parser> is a bare-bones text parsing class. It is ignorant of the text format, and cannot recognize any grammars, but derived classes that inherit from it can specify this. They can do this usually by overriding just one of the methods in this class.
 34 | 
 35 | Future versions are expected to include progress-bar support, parsing text from sockets, or a chunk of memory. All these software features are text-format independent and can be re-used in parsing any text format. Derived classes of C<Text::Parser> will be able to take advantage of these features.
 36 | 
 37 | =head1 METHODS
 38 | 
 39 | =head2 new
 40 | 
 41 | Takes no arguments. Constructor.
 42 | 
 43 |     my $parser = Text::Parser->new();
 44 | 
 45 | This C<$parser> variable will be used in examples below.
 46 | 
 47 | =head2 read
 48 | 
 49 | Takes zero or one argument which could be a string containing the name of the file, or a filehandle reference or a C<GLOB> (e.g. C<\*STDIN>). Throws an exception if filename/C<GLOB> provided is either non-existent or cannot be read for any reason.
 50 | 
 51 | B<Note:> Normally if you provide the C<GLOB> of a file opened for write, some Operating Systems allow reading from it too, and some don't. Read the documentation for C<L<filehandle|/filehandle>> for more on this.
 52 | 
 53 |     $parser->read($filename);
 54 | 
 55 |     # The above is equivalent to the following
 56 |     $parser->filename($filename);
 57 |     $parser->read();
 58 | 
 59 |     # You can also read from a previously opened file handle directly
 60 |     $parser->filehandle(\*STDIN);
 61 |     $parser->read();
 62 | 
 63 | Returns once all records have been read or if an exception is thrown for any parsing errors, or if reading has been aborted with the C<L<abort_reading|/abort_reading>> method.
 64 | 
 65 | If you provide a string file name as input, the function will handle all C<open> and C<close> operations on files even if any exception is thrown, or if the reading has been aborted. But if you pass a file handle C<GLOB> instead, then the file handle won't be closed and it will be the responsibility of the calling program to close the filehandle.
 66 | 
 67 |     $parser->read('myfile.txt'); # Will handle open, parsing, and closing of file automatically.
 68 | 
 69 |     open MYFH, "<myfile.txt" or die "Can't open file myfile.txt at ";
 70 |     $parser->read(\*MYFH);       # Will not close MYFH and it is the respo
 71 |     close MYFH;
 72 | 
 73 | When you do read a new file or input stream with this method, you will lose all the records stored from the previous read operation. So this means that if you want to read a different file with the same parser object, (unless you don't care about the records from the last file you read) you should use the C<L<get_records|/get_records>> method to retrieve all the read records before parsing a new file. So all those calls to C<read> in the example above were parsing three different files, and each successive call overwrote the records from the previous call.
 74 | 
 75 |     $parser->read($file1);
 76 |     my (@records) = $parser->get_records();
 77 | 
 78 |     $parser->read(\*STDIN);
 79 |     my (@stdin) = $parser->get_records();
 80 | 
 81 | B<Inheritance Recommendation:> To extend the class to other file formats, override C<L<save_record|/save_record>> instead of this one.
 82 | 
 83 | =head3 Future Enhancement
 84 | 
 85 | I<At present the C<read> method takes only two possible inputs argument types, either a file name, or a file handle. In future this may be enhanced to read from sockets, subroutines, or even just a block of memory (a string reference). Suggestions for other forms of input are welcome.>
 86 | 
 87 | =head2 filename
 88 | 
 89 | Takes zero or one string argument containing the name of a file. Returns the name of the file that was last opened if any. Returns C<undef> if no file has been opened.
 90 | 
 91 |     print "Last read ", $parser->filename, "\n";
 92 | 
 93 | The file name is "persistent" in the object. Meaning, even after you have called C<L<read|/read>> once, it still remembers the file name. So you can do this:
 94 | 
 95 |     $parser->read(shift @ARGV);
 96 |     print $parser->filename(), ":\n", "=" x (length($parser->filename())+1), "\n", $parser->get_records(), "\n";
 97 | 
 98 | But if you do a C<read> with a filehandle as argument, you'll see that the last filename is lost - which makes sense.
 99 | 
100 |     $parser->read(\*MYFH);
101 |     print "Last file name is lost\n" if not defined $parser->filename();
102 | 
103 | =head2 filehandle
104 | 
105 | Takes zero or one C<GLOB> argument and saves it for future a C<L<read|/read>> call. Returns the filehandle last saved, or C<undef> if none was saved. Remember that after a successful C<read> call, filehandles are lost.
106 | 
107 |     my $fh = $parser->filehandle();
108 | 
109 | B<Note:> As such there is a check to ensure one is not supplying a write-only filehandle. For example, if you specify the filehandle of a write-only file or if the file is opened for write and you cannot read from it. The weird thing is that some of the standard filehandles like C<STDOUT> don't behave uniformly across all platforms. On most POSIX platforms, C<STDOUT> is readable. On such platforms you will not get any exceptions if you try to do this:
110 | 
111 |     $parser->filehandle(\*STDOUT);  ## Works on many POSIX platforms
112 |                                     ## Throws exception on others
113 | 
114 | Like in the case of C<L<filename|/filename>> method, if after you C<read> with a filehandle, you call C<read> again, this time with a file name, the last filehandle is lost.
115 | 
116 |     my $lastfh = $parser->filehandle();
117 |     ## Will return STDOUT
118 |     
119 |     $parser->read('another.txt');
120 |     print "No filehandle saved any more\n" if not defined $parser->filehandle();
121 | 
122 | =head2 lines_parsed
123 | 
124 | Takes no arguments. Returns the number of lines last parsed. A line is reckoned when the C<\n> character is encountered.
125 | 
126 |     print $parser->lines_parsed, " lines were parsed\n";
127 | 
128 | The value is auto-updated during the execution of C<L<read|/read>>. See L<this example|/"Example 2 : Error checking"> of how this can be used in derived classes.
129 | 
130 | Again the information in this is "persistent". You can also be assured that every time you call C<read>, the value be auto-reset before parsing.
131 | 
132 | =head2 save_record
133 | 
134 | Takes exactly one argument and that is saved as a record. Additional arguments are ignored. If no arguments are passed, then C<undef> is stored as a record.
135 | 
136 | In an application that uses a text parser, you will most-likely never call this method directly. It is automatically called within C<L<read|/read>> for each line. In this base class C<Text::Parser>, C<save_record> is simply called with a string containing the raw line of text ; i.e. the line of text will not be C<chomp>ed or modified in any way. L<Here|/"Example 1 : A simple CSV Parser"> is a basic example.
137 | 
138 | Derived classes can decide to store records in a different form. A derived class could, for example, store the records in the form of hash references (so that when you use C<L<get_records|/get_records>>, you'd get an array of hashes), or maybe even another array reference (so when you use C<get_records> you'd get an array of arrays). The L<CSV parser example|/"Example 1 : A simple CSV Parser"> does the latter by example.
139 | 
140 | =head2 abort_reading
141 | 
142 | Takes no arguments. Returns C<1>. You will probably never call this method in your main program.
143 | 
144 | This method is usually used only in the derived class. See L<this example|/"Example 3 : Aborting without errors">.
145 | 
146 | =head2 get_records
147 | 
148 | Takes no arguments. Returns an array containing all the records saved by the parser.
149 | 
150 |     foreach my $record ( $parser->get_records ) {
151 |         $i++;
152 |         print "Record: $i: ", $record, "\n";
153 |     }
154 | 
155 | =head2 last_record
156 | 
157 | Takes no arguments and returns the last saved record. Leaves the saved records untouched.
158 | 
159 |     my $last_rec = $parser->last_record;
160 | 
161 | =head2 pop_record
162 | 
163 | Takes no arguments and pops the last saved record.
164 | 
165 |     my $last_rec = $parser->pop_record;
166 |     $uc_last = uc $last_rec;
167 |     $parser->save_record($uc_last);
168 | 
169 | =head1 EXAMPLES
170 | 
171 | The following examples should illustrate the use of inheritance to parse various types of text file formats.
172 | 
173 | =head2 Basic principle
174 | 
175 | Derived classes simply need to override one method : C<L<save_record|/save_record>>. With the help of that any arbitrary file format can be read. C<save_record> should interpret the format of the text and store it in some form by calling C<SUPER::save_record>. The C<main::> program will then use the records and create an appropriate data structure with it.
176 | 
177 | Notice that the creation of a data structure is not the objective of a parser. It is simply concerned with collecting data and arranging it in a form that can be used. That's all. Data structures can be created by a different part of your program using the data collected by your parser.
178 | 
179 | =head2 Example 1 : A simple CSV Parser
180 | 
181 | We will write a parser for a simple CSV file that reads each line and stores the records as array references. This example is oversimplified, and does B<not> handle embedded newlines.
182 | 
183 |     package Text::Parser::CSV;
184 |     use parent 'Text::Parser';
185 |     use Text::CSV;
186 | 
187 |     my $csv;
188 |     sub save_record {
189 |         my ($self, $line) = @_;
190 |         $csv //= Text::CSV->new({ binary => 1, auto_diag => 1});
191 |         $csv->parse($line);
192 |         $self->SUPER::save_record([$csv->fields]);
193 |     }
194 | 
195 | That's it! Now in C<main::> you can write something like this:
196 | 
197 |     use Text::Parser::CSV;
198 |     
199 |     my $csvp = Text::Parser::CSV->new();
200 |     $csvp->read(shift @ARGV);
201 |     foreach my $aref ($csvp->get_records) {
202 |         my (@arr) = @{$aref};
203 |         print "@arr\n";
204 |     }
205 | 
206 | The above program reads the content of a given CSV file and prints the content out in space-separated form.
207 | 
208 | =head2 Example 2 : Error checking
209 | 
210 | It is easy to add any error checks using exceptions. One of the easiest ways to do this is to C<use L<Exception::Class>>. We'll modify the CSV parser above to demonstrate that.
211 | 
212 |     package Text::Parser::CSV;
213 |     use Exception::Class (
214 |         'Text::Parser::CSV::Error', 
215 |         'Text::Parser::CSV::TooManyFields' => {
216 |             isa => 'Text::Parser::CSV::Error',
217 |         },
218 |     );
219 |     
220 |     use parent 'Text::Parser';
221 |     use Text::CSV;
222 | 
223 |     my $csv;
224 |     sub save_record {
225 |         my ($self, $line) = @_;
226 |         $csv //= Text::CSV->new({ binary => 1, auto_diag => 1});
227 |         $csv->parse($line);
228 |         my @fields = $csv->fields;
229 |         $self->{__csv_header} = \@fields if not scalar($self->get_records);
230 |         Text::Parser::CSV::TooManyFields->throw(error => "Too many fields on line #" . $self->lines_parsed)
231 |             if scalar(@fields) > scalar(@{$self->{__csv_header}});
232 |         $self->SUPER::save_record(\@fields);
233 |     }
234 | 
235 | The C<Text::Parser> class will C<close> all filehandles automatically as soon as an exception is thrown from C<save_record>. You can catch the exception in C<main::> as you would normally, by C<use>ing C<L<Try::Tiny>> or other such class.
236 | 
237 | =head2 Example 3 : Aborting without errors
238 | 
239 | We can also abort parsing a text file without throwing an exception. This could be if we got the information we needed. For example:
240 | 
241 |     package Text::Parser::SomeFile;
242 |     use parent 'Text::Parser';
243 | 
244 |     sub save_record {
245 |         my ($self, $line) = @_;
246 |         my ($leading, $rest) = split /\s+/, $line, 2;
247 |         return $self->abort_reading() if $leading eq '**ABORT';
248 |         return $self->SUPER::save_record($line);
249 |     }
250 | 
251 | In this derived class, we have a parser C<Text::Parser::SomeFile> that would save each line as a record, but would abort reading the rest of the file as soon as it reaches a line with C<**ABORT> as the first word. When this parser is given the following file as input:
252 | 
253 |     somefile.txt:
254 | 
255 |     Some text is here.
256 |     More text here.
257 |     **ABORT reading
258 |     This text is not read
259 |     This text is not read
260 |     This text is not read
261 |     This text is not read
262 | 
263 | You can now write a program as follows:
264 | 
265 |     use Text::Parser::SomeFile;
266 | 
267 |     my $par = Text::Parser::SomeFile->new();
268 |     $par->read('somefile.txt');
269 |     print $par->get_records(), "\n";
270 | 
271 | The output will be:
272 | 
273 |     Some text is here.
274 |     More text here.
275 | 
276 | =head1 BUGS
277 | 
278 | Please report any bugs or feature requests on the bugtracker website
279 | L<http://rt.cpan.org/Public/Dist/Display.html?Name=Text-Parser> or by email
280 | to L<bug-text-parser at rt.cpan.org|mailto:bug-text-parser at rt.cpan.org>.
281 | 
282 | When submitting a bug or request, please include a test-file or a
283 | patch to an existing test-file that illustrates the bug or desired
284 | feature.
285 | 
286 | =head1 AUTHOR
287 | 
288 | Balaji Ramasubramanian <balajiram@cpan.org>
289 | 
290 | =head1 COPYRIGHT AND LICENSE
291 | 
292 | This software is copyright (c) 2018 by Balaji Ramasubramanian.
293 | 
294 | This is free software; you can redistribute it and/or modify it under
295 | the same terms as the Perl 5 programming language system itself.
296 | 
297 | =head1 CONTRIBUTOR
298 | 
299 | =for stopwords H.Merijn Brand - Tux
300 | 
301 | H.Merijn Brand - Tux <h.m.brand@xs4all.nl>
302 | 
303 | =cut
304 | 


--------------------------------------------------------------------------------
/lib/Text/Parser.pm:
--------------------------------------------------------------------------------
  1 | use warnings;
  2 | use strict;
  3 | 
  4 | package Text::Parser;
  5 | 
  6 | # ABSTRACT: Bare text parser, bundles other common "mundane" tasks.
  7 | 
  8 | use Exporter 'import';
  9 | our (@EXPORT_OK) = ();
 10 | our (@EXPORT)    = (@EXPORT_OK);
 11 | 
 12 | =head1 SYNOPSIS
 13 | 
 14 |     use Text::Parser;
 15 | 
 16 |     my $parser = Text::Parser->new();
 17 |     $parser->read(shift @ARGV);
 18 |     print $parser->get_records, "\n";
 19 | 
 20 | The above code reads the first command-line argument as a string, and assuming it is the name of a text file, it will print the content of the file to C<STDOUT>. If the string is not the name of a text file it will throw an exception and exit.
 21 | 
 22 | =head1 RATIONALE
 23 | 
 24 | A simple text file parser should have to only specify the "grammar" it intends to interpret. Everything else, like C<open>ing a file handle, tracking how many lines have been read, etc., should be "automatic".
 25 | 
 26 | Unfortunately, most programmers write code that calls C<open>, C<close>, etc., and keep track of things that should have been simple features of every text file parser. And if they have to read multiple files, usually, all these calls are repeated.
 27 | 
 28 | This class does all "mundane" operations like C<open>, C<close>, line-count, and storage/deletion/retrieval of records, etc. You don't have to bother with a lot of book-keeping when you write your next parser. Instead, just inherit this class and override one method (C<L<save_record|/save_record>>). And voila! you have a parser. Look at L<these examples|/EXAMPLES> to see how easy this can be.
 29 | 
 30 | =head1 DESCRIPTION
 31 | 
 32 | C<Text::Parser> is a bare-bones text parsing class. It is ignorant of the text format, and cannot recognize any grammars, but derived classes that inherit from it can specify this. They can do this usually by overriding just one of the methods in this class.
 33 | 
 34 | Future versions are expected to include progress-bar support, parsing text from sockets, or a chunk of memory. All these software features are text-format independent and can be re-used in parsing any text format. Derived classes of C<Text::Parser> will be able to take advantage of these features.
 35 | 
 36 | =cut
 37 | 
 38 | use Exception::Class (
 39 |     'Text::Parser::Exception',
 40 |     'Text::Parser::Exception::ParsingError' => {
 41 |         isa         => 'Text::Parser::Exception',
 42 |         description => 'For all parsing errors',
 43 |         alias       => 'throw_text_parsing_error'
 44 |     },
 45 |     'Text::Parser::Exception::FileNotFound' => {
 46 |         isa         => 'Text::Parser::Exception',
 47 |         description => 'File not found',
 48 |         alias       => 'throw_file_not_found'
 49 |     },
 50 |     'Text::Parser::Exception::FileNotReadable' => {
 51 |         isa         => 'Text::Parser::Exception',
 52 |         description => 'File not readable',
 53 |         alias       => 'throw_file_not_readable'
 54 |     },
 55 |     'Text::Parser::Exception::InvalidFileHandle' => {
 56 |         isa         => 'Text::Parser::Exception',
 57 |         description => 'Bad argument supplied to filehandle()',
 58 |         alias       => 'throw_invalid_filehandle'
 59 |     },
 60 |     'Text::Parser::Exception::InvalidFilename' => {
 61 |         isa         => 'Text::Parser::Exception',
 62 |         description => 'Bad argument supplied to filename()',
 63 |         alias       => 'throw_bad_filename'
 64 |     },
 65 |     'Text::Parser::Exception::FileCantOpen' => {
 66 |         isa         => 'Text::Parser::Exception',
 67 |         description => 'Error opening file',
 68 |         alias       => 'throw_cant_open'
 69 |     },
 70 |     'Text::Parser::Exception::BadReadInput' => {
 71 |         isa => 'Text::Parser::Exception',
 72 |         description =>
 73 |             'The user called read() method with an unsupported type of input',
 74 |         alias => 'throw_bad_input_to_read',
 75 |     },
 76 | );
 77 | 
 78 | use Try::Tiny;
 79 | use Scalar::Util 'openhandle';
 80 | 
 81 | =method new
 82 | 
 83 | Takes no arguments. Constructor.
 84 | 
 85 |     my $parser = Text::Parser->new();
 86 | 
 87 | This C<$parser> variable will be used in examples below.
 88 | 
 89 | =cut
 90 | 
 91 | sub new {
 92 |     my $pkg = shift;
 93 |     bless {}, $pkg;
 94 | }
 95 | 
 96 | =method read
 97 | 
 98 | Takes zero or one argument which could be a string containing the name of the file, or a filehandle reference or a C<GLOB> (e.g. C<\*STDIN>). Throws an exception if filename/C<GLOB> provided is either non-existent or cannot be read for any reason.
 99 | 
100 | B<Note:> Normally if you provide the C<GLOB> of a file opened for write, some Operating Systems allow reading from it too, and some don't. Read the documentation for C<L<filehandle|/filehandle>> for more on this.
101 | 
102 |     $parser->read($filename);
103 | 
104 |     # The above is equivalent to the following
105 |     $parser->filename($filename);
106 |     $parser->read();
107 | 
108 |     # You can also read from a previously opened file handle directly
109 |     $parser->filehandle(\*STDIN);
110 |     $parser->read();
111 | 
112 | Returns once all records have been read or if an exception is thrown for any parsing errors, or if reading has been aborted with the C<L<abort_reading|/abort_reading>> method.
113 | 
114 | If you provide a string file name as input, the function will handle all C<open> and C<close> operations on files even if any exception is thrown, or if the reading has been aborted. But if you pass a file handle C<GLOB> instead, then the file handle won't be closed and it will be the responsibility of the calling program to close the filehandle.
115 | 
116 |     $parser->read('myfile.txt'); # Will handle open, parsing, and closing of file automatically.
117 | 
118 |     open MYFH, "<myfile.txt" or die "Can't open file myfile.txt at ";
119 |     $parser->read(\*MYFH);       # Will not close MYFH and it is the respo
120 |     close MYFH;
121 | 
122 | When you do read a new file or input stream with this method, you will lose all the records stored from the previous read operation. So this means that if you want to read a different file with the same parser object, (unless you don't care about the records from the last file you read) you should use the C<L<get_records|/get_records>> method to retrieve all the read records before parsing a new file. So all those calls to C<read> in the example above were parsing three different files, and each successive call overwrote the records from the previous call.
123 | 
124 |     $parser->read($file1);
125 |     my (@records) = $parser->get_records();
126 | 
127 |     $parser->read(\*STDIN);
128 |     my (@stdin) = $parser->get_records();
129 | 
130 | B<Inheritance Recommendation:> To extend the class to other file formats, override C<L<save_record|/save_record>> instead of this one.
131 | 
132 | =head3 Future Enhancement
133 | 
134 | I<At present the C<read> method takes only two possible inputs argument types, either a file name, or a file handle. In future this may be enhanced to read from sockets, subroutines, or even just a block of memory (a string reference). Suggestions for other forms of input are welcome.>
135 | 
136 | =cut
137 | 
138 | sub read {
139 |     my ( $self, $input ) = @_;
140 |     return if not $self->__is_file_known_or_opened($input);
141 |     $self->__store_check_input($input);
142 |     $self->__read_and_close_filehandle();
143 | }
144 | 
145 | sub __store_check_input {
146 |     my ( $self, $input ) = @_;
147 |     return                           if not defined $input;
148 |     return $self->filename($input)   if ref($input) eq '';
149 |     return $self->filehandle($input) if ref($input) eq 'GLOB';
150 |     __throw_bad_input_to_read( ref($input) );
151 | }
152 | 
153 | sub __throw_bad_input_to_read {
154 |     throw_bad_input_to_read error => 'Unexpected ' . shift
155 |         . ' type input to read() ; must be either string filename or GLOB';
156 | }
157 | 
158 | sub __is_file_known_or_opened {
159 |     my ( $self, $fname ) = @_;
160 |     return 0 if not defined $fname and not exists $self->{__filehandle};
161 |     return 0 if defined $fname and not $fname;
162 |     return 1;
163 | }
164 | 
165 | sub __read_and_close_filehandle {
166 |     my $self = shift;
167 |     $self->__init_read_fh;
168 |     $self->__read_file_handle;
169 |     $self->__close_file;
170 | }
171 | 
172 | sub __init_read_fh {
173 |     my $self = shift;
174 |     $self->lines_parsed(0);
175 |     $self->{__bytes_read} = 0;
176 |     delete $self->{__records} if exists $self->{__records};
177 |     delete $self->{__abort_reading};
178 | }
179 | 
180 | sub __read_file_handle {
181 |     my $self = shift;
182 |     my $fh   = $self->filehandle();
183 |     while (<$fh>) {
184 |         last if not $self->__parse_line_and_next($_);
185 |     }
186 | }
187 | 
188 | sub __parse_line_and_next {
189 |     my $self = shift;
190 |     $self->lines_parsed( $self->lines_parsed + 1 );
191 |     $self->__try_to_parse(shift);
192 |     return not exists $self->{__abort_reading};
193 | }
194 | 
195 | sub __try_to_parse {
196 |     my ( $self, $line ) = @_;
197 |     try { $self->save_record($line); }
198 |     catch {
199 |         $self->__close_file;
200 |         $_->rethrow;
201 |     };
202 | }
203 | 
204 | sub __close_file {
205 |     my $self = shift;
206 |     return if not exists $self->{__filename};
207 |     close $self->{__filehandle};
208 |     delete $self->{__filehandle};
209 | }
210 | 
211 | =method filename
212 | 
213 | Takes zero or one string argument containing the name of a file. Returns the name of the file that was last opened if any. Returns C<undef> if no file has been opened.
214 | 
215 |     print "Last read ", $parser->filename, "\n";
216 | 
217 | The file name is "persistent" in the object. Meaning, even after you have called C<L<read|/read>> once, it still remembers the file name. So you can do this:
218 | 
219 |     $parser->read(shift @ARGV);
220 |     print $parser->filename(), ":\n", "=" x (length($parser->filename())+1), "\n", $parser->get_records(), "\n";
221 | 
222 | But if you do a C<read> with a filehandle as argument, you'll see that the last filename is lost - which makes sense.
223 | 
224 |     $parser->read(\*MYFH);
225 |     print "Last file name is lost\n" if not defined $parser->filename();
226 | 
227 | =cut
228 | 
229 | sub filename {
230 |     my $self = shift;
231 |     $self->__open_file( $self->__is_readable_file(shift) ) if scalar(@_);
232 |     return ( exists $self->{__filename} ) ? $self->{__filename} : undef;
233 | }
234 | 
235 | sub __is_readable_file {
236 |     my ( $self, $fname ) = @_;
237 |     throw_bad_filename( error => "$fname is not a string" )
238 |         if ref($fname) ne '';
239 |     throw_file_not_found( error => "$fname is not a file" )
240 |         if not -f $fname;
241 |     throw_file_not_readable( error => "$fname is not readable" )
242 |         if not -r $fname;
243 |     return $fname;
244 | }
245 | 
246 | sub __open_file {
247 |     my ( $self, $fname ) = @_;
248 |     open my $fh, "<$fname"
249 |         or throw_cant_open( error => "Error while opening file $fname" );
250 |     $self->__close_file if exists $self->{__filehandle};
251 |     $self->{__filename}   = $fname;
252 |     $self->{__filehandle} = $fh;
253 | }
254 | 
255 | =method filehandle
256 | 
257 | Takes zero or one C<GLOB> argument and saves it for future a C<L<read|/read>> call. Returns the filehandle last saved, or C<undef> if none was saved. Remember that after a successful C<read> call, filehandles are lost.
258 | 
259 |     my $fh = $parser->filehandle();
260 | 
261 | B<Note:> As such there is a check to ensure one is not supplying a write-only filehandle. For example, if you specify the filehandle of a write-only file or if the file is opened for write and you cannot read from it. The weird thing is that some of the standard filehandles like C<STDOUT> don't behave uniformly across all platforms. On most POSIX platforms, C<STDOUT> is readable. On such platforms you will not get any exceptions if you try to do this:
262 | 
263 |     $parser->filehandle(\*STDOUT);  ## Works on many POSIX platforms
264 |                                     ## Throws exception on others
265 | 
266 | Like in the case of C<L<filename|/filename>> method, if after you C<read> with a filehandle, you call C<read> again, this time with a file name, the last filehandle is lost.
267 | 
268 |     my $lastfh = $parser->filehandle();
269 |     ## Will return STDOUT
270 |     
271 |     $parser->read('another.txt');
272 |     print "No filehandle saved any more\n" if not defined $parser->filehandle();
273 | 
274 | =cut
275 | 
276 | sub filehandle {
277 |     my ( $self, $fhref ) = @_;
278 |     $self->__save_file_handle($fhref) if $self->__check_file_handle($fhref);
279 |     return ( exists $self->{__filehandle} ) ? $self->{__filehandle} : undef;
280 | }
281 | 
282 | sub __save_file_handle {
283 |     my ( $self, $fhref ) = @_;
284 |     $self->{__filehandle} = $$fhref;
285 |     delete $self->{__filename} if exists $self->{__filename};
286 |     $self->{__size} = ( stat $$fhref )[7];
287 | }
288 | 
289 | sub __check_file_handle {
290 |     my ( $self, $fhref ) = @_;
291 |     return 0 if not defined $fhref;
292 |     throw_invalid_filehandle( error => "$fhref is not a valid filehandle" )
293 |         if ref($fhref) ne 'GLOB';
294 |     throw_file_not_readable( error => "$$fhref is a closed filehandle" )
295 |         if not defined openhandle($fhref);
296 |     throw_file_not_readable(
297 |         error => "The filehandle $$fhref is not readable" )
298 |         if not -r $$fhref;
299 |     return 1;
300 | }
301 | 
302 | =method lines_parsed
303 | 
304 | Takes no arguments. Returns the number of lines last parsed. A line is reckoned when the C<\n> character is encountered.
305 | 
306 |     print $parser->lines_parsed, " lines were parsed\n";
307 | 
308 | The value is auto-updated during the execution of C<L<read|/read>>. See L<this example|/"Example 2 : Error checking"> of how this can be used in derived classes.
309 | 
310 | Again the information in this is "persistent". You can also be assured that every time you call C<read>, the value be auto-reset before parsing.
311 | 
312 | =cut
313 | 
314 | sub lines_parsed {
315 |     my $self = shift;
316 |     return $self->{__current_line} = shift if @_;
317 |     return ( exists $self->{__current_line} ) ? $self->{__current_line} : 0;
318 | }
319 | 
320 | =method save_record
321 | 
322 | Takes exactly one argument and that is saved as a record. Additional arguments are ignored. If no arguments are passed, then C<undef> is stored as a record.
323 | 
324 | In an application that uses a text parser, you will most-likely never call this method directly. It is automatically called within C<L<read|/read>> for each line. In this base class C<Text::Parser>, C<save_record> is simply called with a string containing the raw line of text ; i.e. the line of text will not be C<chomp>ed or modified in any way. L<Here|/"Example 1 : A simple CSV Parser"> is a basic example.
325 | 
326 | Derived classes can decide to store records in a different form. A derived class could, for example, store the records in the form of hash references (so that when you use C<L<get_records|/get_records>>, you'd get an array of hashes), or maybe even another array reference (so when you use C<get_records> you'd get an array of arrays). The L<CSV parser example|/"Example 1 : A simple CSV Parser"> does the latter.
327 | 
328 | =cut
329 | 
330 | sub save_record {
331 |     my $self = shift;
332 |     $self->{__records} = [] if not defined $self->{__records};
333 |     push @{ $self->{__records} }, shift;
334 | }
335 | 
336 | =method abort_reading
337 | 
338 | Takes no arguments. Returns C<1>. You will probably never call this method in your main program.
339 | 
340 | This method is usually used only in the derived class. See L<this example|/"Example 3 : Aborting without errors">.
341 | 
342 | =cut
343 | 
344 | sub abort_reading {
345 |     my $self = shift;
346 |     return $self->{__abort_reading} = 1;
347 | }
348 | 
349 | =method get_records
350 | 
351 | Takes no arguments. Returns an array containing all the records saved by the parser.
352 | 
353 |     foreach my $record ( $parser->get_records ) {
354 |         $i++;
355 |         print "Record: $i: ", $record, "\n";
356 |     }
357 | 
358 | =cut
359 | 
360 | sub get_records {
361 |     my $self = shift;
362 |     return () if not exists $self->{__records};
363 |     return @{ $self->{__records} };
364 | }
365 | 
366 | =method last_record
367 | 
368 | Takes no arguments and returns the last saved record. Leaves the saved records untouched.
369 | 
370 |     my $last_rec = $parser->last_record;
371 | 
372 | =cut
373 | 
374 | sub last_record {
375 |     my $self = shift;
376 |     return undef if not exists $self->{__records};
377 |     my (@record) = @{ $self->{__records} };
378 |     return $record[$#record];
379 | }
380 | 
381 | =method pop_record
382 | 
383 | Takes no arguments and pops the last saved record.
384 | 
385 |     my $last_rec = $parser->pop_record;
386 |     $uc_last = uc $last_rec;
387 |     $parser->save_record($uc_last);
388 | 
389 | =cut
390 | 
391 | sub pop_record {
392 |     my $self = shift;
393 |     return undef if not exists $self->{__records};
394 |     pop @{ $self->{__records} };
395 | }
396 | 
397 | =head1 EXAMPLES
398 | 
399 | The following examples should illustrate the use of inheritance to parse various types of text file formats.
400 | 
401 | =head2 Basic principle
402 | 
403 | Derived classes simply need to override one method : C<L<save_record|/save_record>>. With the help of that any arbitrary file format can be read. C<save_record> should interpret the format of the text and store it in some form by calling C<SUPER::save_record>. The C<main::> program will then use the records and create an appropriate data structure with it.
404 | 
405 | Notice that the creation of a data structure is not the objective of a parser. It is simply concerned with collecting data and arranging it in a form that can be used. That's all. Data structures can be created by a different part of your program using the data collected by your parser.
406 | 
407 | =head2 Example 1 : A simple CSV Parser
408 | 
409 | We will write a parser for a simple CSV file that reads each line and stores the records as array references. This example is oversimplified, and does B<not> handle embedded newlines.
410 | 
411 |     package Text::Parser::CSV;
412 |     use parent 'Text::Parser';
413 |     use Text::CSV;
414 | 
415 |     my $csv;
416 |     sub save_record {
417 |         my ($self, $line) = @_;
418 |         $csv //= Text::CSV->new({ binary => 1, auto_diag => 1});
419 |         $csv->parse($line);
420 |         $self->SUPER::save_record([$csv->fields]);
421 |     }
422 | 
423 | That's it! Now in C<main::> you can write something like this:
424 | 
425 |     use Text::Parser::CSV;
426 |     
427 |     my $csvp = Text::Parser::CSV->new();
428 |     $csvp->read(shift @ARGV);
429 |     foreach my $aref ($csvp->get_records) {
430 |         my (@arr) = @{$aref};
431 |         print "@arr\n";
432 |     }
433 | 
434 | The above program reads the content of a given CSV file and prints the content out in space-separated form.
435 | 
436 | =head2 Example 2 : Error checking
437 | 
438 | It is easy to add any error checks using exceptions. One of the easiest ways to do this is to C<use L<Exception::Class>>. We'll modify the CSV parser above to demonstrate that.
439 | 
440 |     package Text::Parser::CSV;
441 |     use Exception::Class (
442 |         'Text::Parser::CSV::Error', 
443 |         'Text::Parser::CSV::TooManyFields' => {
444 |             isa => 'Text::Parser::CSV::Error',
445 |         },
446 |     );
447 |     
448 |     use parent 'Text::Parser';
449 |     use Text::CSV;
450 | 
451 |     my $csv;
452 |     sub save_record {
453 |         my ($self, $line) = @_;
454 |         $csv //= Text::CSV->new({ binary => 1, auto_diag => 1});
455 |         $csv->parse($line);
456 |         my @fields = $csv->fields;
457 |         $self->{__csv_header} = \@fields if not scalar($self->get_records);
458 |         Text::Parser::CSV::TooManyFields->throw(error => "Too many fields on line #" . $self->lines_parsed)
459 |             if scalar(@fields) > scalar(@{$self->{__csv_header}});
460 |         $self->SUPER::save_record(\@fields);
461 |     }
462 | 
463 | The C<Text::Parser> class will C<close> all filehandles automatically as soon as an exception is thrown from C<save_record>. You can catch the exception in C<main::> as you would normally, by C<use>ing C<L<Try::Tiny>> or other such class.
464 | 
465 | =head2 Example 3 : Aborting without errors
466 | 
467 | We can also abort parsing a text file without throwing an exception. This could be if we got the information we needed. For example:
468 | 
469 |     package Text::Parser::SomeFile;
470 |     use parent 'Text::Parser';
471 | 
472 |     sub save_record {
473 |         my ($self, $line) = @_;
474 |         my ($leading, $rest) = split /\s+/, $line, 2;
475 |         return $self->abort_reading() if $leading eq '**ABORT';
476 |         return $self->SUPER::save_record($line);
477 |     }
478 | 
479 | In this derived class, we have a parser C<Text::Parser::SomeFile> that would save each line as a record, but would abort reading the rest of the file as soon as it reaches a line with C<**ABORT> as the first word. When this parser is given the following file as input:
480 | 
481 |     somefile.txt:
482 | 
483 |     Some text is here.
484 |     More text here.
485 |     **ABORT reading
486 |     This text is not read
487 |     This text is not read
488 |     This text is not read
489 |     This text is not read
490 | 
491 | You can now write a program as follows:
492 | 
493 |     use Text::Parser::SomeFile;
494 | 
495 |     my $par = Text::Parser::SomeFile->new();
496 |     $par->read('somefile.txt');
497 |     print $par->get_records(), "\n";
498 | 
499 | The output will be:
500 | 
501 |     Some text is here.
502 |     More text here.
503 | 
504 | =cut
505 | 
506 | 1;
507 | 


--------------------------------------------------------------------------------