├── .gitignore ├── t ├── text-simple.txt ├── example.sp ├── dummy-data.csv ├── dirty-data.csv ├── Text-Parser-pod-01.t ├── Text-Parser-pod-02.t ├── Text-Parser-pod-04.t ├── Spice-Parser.t ├── Text-Parser-csv-02.t ├── Text-Parser-pod-03.t ├── Text-Parser-csv-03.t └── Text-Parser.t ├── .whitesource ├── .travis.yml ├── weaver.ini ├── CONTRIBUTING.md ├── dist.ini ├── Changes ├── README.pod └── lib └── Text └── Parser.pm /.gitignore: -------------------------------------------------------------------------------- 1 | Text-Parser*.tar.gz 2 | Text-Parser*/ 3 | -------------------------------------------------------------------------------- /t/text-simple.txt: -------------------------------------------------------------------------------- 1 | This is a file with one line 2 | -------------------------------------------------------------------------------- /t/example.sp: -------------------------------------------------------------------------------- 1 | Minst net1 2 | + net2 net3 3 | + net4 nmos l=0.09u 4 | + w=0.13u 5 | .END 6 | 7 | *This line is not read 8 | -------------------------------------------------------------------------------- /.whitesource: -------------------------------------------------------------------------------- 1 | { 2 | "generalSettings": { 3 | "shouldScanRepo": true 4 | }, 5 | "checkRunSettings": { 6 | "vulnerableCheckRunConclusionLevel": "failure" 7 | } 8 | } -------------------------------------------------------------------------------- /t/dummy-data.csv: -------------------------------------------------------------------------------- 1 | NAME,DAY_OF_BIRTH,MONTH_OF_BIRTH,YEAR_OF_BIRTH 2 | Balaji,5,9,1981 3 | Elizabeth,16,2,1984 4 | Narayanan,19,5,1986 5 | Hemalatha,2,5,1956 6 | Ramasubramanian,5,2,1951 7 | -------------------------------------------------------------------------------- /t/dirty-data.csv: -------------------------------------------------------------------------------- 1 | NAME,DAY_OF_BIRTH,MONTH_OF_BIRTH,YEAR_OF_BIRTH 2 | Balaji,5,9,1981 3 | Elizabeth,16,2,1984 4 | Narayanan,19,5,1986,Aadhavi 5 | Hemalatha,2,5,1956 6 | Ramasubramanian,5,2,1951 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: perl 2 | perl: 3 | - "5.28" 4 | - "5.26" 5 | - "5.24" 6 | - "5.22" 7 | - "5.20" 8 | - "5.18" 9 | - "5.16" 10 | script: 11 | - perl Makefile.PL && make test 12 | notifications: 13 | recipients: 14 | - balajiram@cpan.org 15 | email: 16 | on_success: change 17 | on_failure: always 18 | -------------------------------------------------------------------------------- /t/Text-Parser-pod-01.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use Test::More; 4 | use Test::Output; 5 | use Test::Exception; 6 | 7 | BEGIN { use_ok 'Text::Parser'; } 8 | 9 | my $parser = Text::Parser->new(); 10 | (@ARGV) = qw(t/text-simple.txt); 11 | lives_ok { 12 | $parser->read(shift @ARGV); 13 | stdout_is { 14 | print $parser->get_records, "\n"; 15 | } 16 | "This is a file with one line\n\n", 'Prints the output to screen correctly'; 17 | } 18 | 'No errors in reading this file'; 19 | 20 | done_testing(); 21 | -------------------------------------------------------------------------------- /weaver.ini: -------------------------------------------------------------------------------- 1 | [@CorePrep] 2 | 3 | [-SingleEncoding] 4 | 5 | [Name] 6 | [Version] 7 | 8 | [Region / prelude] 9 | 10 | [Generic / SYNOPSIS] 11 | [Generic / RATIONALE] 12 | [Generic / DESCRIPTION] 13 | [Generic / OVERVIEW] 14 | 15 | [Collect / ATTRIBUTES] 16 | command = attr 17 | 18 | [Collect / METHODS] 19 | command = method 20 | 21 | [Collect / FUNCTIONS] 22 | command = func 23 | 24 | [Generic / EXAMPLES] 25 | [Leftovers] 26 | 27 | [Region / postlude] 28 | 29 | [Bugs] 30 | [Authors] 31 | [Legal] 32 | ;;[@Default] 33 | [Contributors] 34 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | Welcome! I am glad you consider it worth your time to contribute. Here are some guidelines to follow: 4 | 5 | 1. Open a new issue if it is a bug or even an enhancement, and describe what you are doing and why you think it is important. If it is a major change I would prefer that this is discussed first. 6 | 2. Make your code changes and send me a pull request. 7 | 3. Preferably add tests to ensure that the code coverage is high enough. You can use `Devel::Cover` to add tests that cover all your code changes. 8 | 9 | # Please: 10 | 11 | 1. Be Polite. 12 | 2. Avoid foul language. 13 | 3. Avoid sexist or racist comments. 14 | 4. Be open to the possibility that your change is not accepted (although I would rarely want to do that). 15 | -------------------------------------------------------------------------------- /t/Text-Parser-pod-02.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | package Text::Parser::CSV; 5 | use parent 'Text::Parser'; 6 | 7 | sub save_record { 8 | my ( $self, $line ) = @_; 9 | chomp $line; 10 | my (@fields) = split /,/, $line; 11 | $self->SUPER::save_record( \@fields ); 12 | } 13 | 14 | package main; 15 | use Test::More; 16 | use Test::Output; 17 | use Test::Exception; 18 | 19 | my $csvp = Text::Parser::CSV->new(); 20 | lives_ok { 21 | $csvp->read('t/dummy-data.csv'); 22 | is_deeply( 23 | [ $csvp->get_records() ], 24 | [ [qw(NAME DAY_OF_BIRTH MONTH_OF_BIRTH YEAR_OF_BIRTH)], 25 | [qw(Balaji 5 9 1981)], 26 | [qw(Elizabeth 16 2 1984)], 27 | [qw(Narayanan 19 5 1986)], 28 | [qw(Hemalatha 2 5 1956)], 29 | [qw(Ramasubramanian 5 2 1951)], 30 | ], 31 | 'Got the records as expected' 32 | ); 33 | } 34 | 'No issues in reading the CSV file'; 35 | 36 | done_testing(); 37 | -------------------------------------------------------------------------------- /t/Text-Parser-pod-04.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test::More; 5 | use Test::Output; 6 | use Test::Exception; 7 | use Try::Tiny; 8 | 9 | BEGIN { use_ok 'Text::Parser'; } 10 | 11 | my $parser = Text::Parser->new(); 12 | isa_ok $parser, 'Text::Parser'; 13 | lives_ok { 14 | $parser->read('t/text-simple.txt'); 15 | } 16 | 'Parses a text file normally'; 17 | is( $parser->filename(), 't/text-simple.txt', 'Last file read' ); 18 | is( $parser->filehandle(), undef, 'Filehandle was closed' ); 19 | 20 | open MYFH, "read( \*MYFH ); 23 | } 24 | 'Reads the content again'; 25 | is( $parser->filename(), undef, 'The last file read is lost' ); 26 | close MYFH; 27 | isnt( $parser->filehandle(), undef, 'Retains the last filehandle read' ); 28 | throws_ok { 29 | $parser->read(\*MYFH); 30 | } 'Text::Parser::Exception', 'Trying to read a closed filehandle'; 31 | isnt( $parser->filehandle(), undef, 'Retains the last filehandle read' ); 32 | 33 | done_testing(); 34 | -------------------------------------------------------------------------------- /t/Spice-Parser.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | package SpiceParser; 5 | use parent 'Text::Parser'; 6 | 7 | use constant { 8 | SPICE_LINE_CONTD => qr/^[+]\s*/, 9 | SPICE_END_FILE => qr/^\.end/i, 10 | }; 11 | 12 | sub save_record { 13 | my ( $self, $line ) = @_; 14 | return $self->__spice_line_contd($line) if $line =~ SPICE_LINE_CONTD; 15 | return $self->abort_reading() if $line =~ SPICE_END_FILE; 16 | $self->SUPER::save_record($line); 17 | } 18 | 19 | sub __spice_line_contd { 20 | my ( $self, $line ) = @_; 21 | $line =~ s/^[+]\s*//; 22 | my $last_rec = $self->pop_record; 23 | chomp $last_rec; 24 | $self->SUPER::save_record( $last_rec . ' ' . $line ); 25 | } 26 | 27 | package main; 28 | use Test::More; 29 | use Test::Exception; 30 | 31 | my $sp = new SpiceParser; 32 | 33 | lives_ok { $sp->read('t/example.sp'); } 'Works fine'; 34 | is( scalar( $sp->get_records() ), 1, '1 record saved' ); 35 | is( $sp->lines_parsed(), 5, '5 lines parsed' ); 36 | is( $sp->last_record, "Minst net1 net2 net3 net4 nmos l=0.09u w=0.13u\n" ); 37 | 38 | done_testing; 39 | -------------------------------------------------------------------------------- /t/Text-Parser-csv-02.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | package Text::Parser::CSV; 5 | use parent 'Text::Parser'; 6 | use Text::CSV; 7 | 8 | # Note that this approach is still unsafe for embedded newlines 9 | my $csv = Text::CSV->new({ binary => 1, auto_diag => 1 }); 10 | 11 | sub save_record { 12 | my ( $self, $line ) = @_; 13 | my @fields = $csv->parse($line) ? $csv-> fields : (); 14 | $self->SUPER::save_record( \@fields ); 15 | } 16 | 17 | package main; 18 | use Test::More; 19 | use Test::Output; 20 | use Test::Exception; 21 | 22 | my $csvp = Text::Parser::CSV->new(); 23 | lives_ok { 24 | $csvp->read('t/dummy-data.csv'); 25 | is_deeply( 26 | [ $csvp->get_records() ], 27 | [ [qw(NAME DAY_OF_BIRTH MONTH_OF_BIRTH YEAR_OF_BIRTH)], 28 | [qw(Balaji 5 9 1981)], 29 | [qw(Elizabeth 16 2 1984)], 30 | [qw(Narayanan 19 5 1986)], 31 | [qw(Hemalatha 2 5 1956)], 32 | [qw(Ramasubramanian 5 2 1951)], 33 | ], 34 | 'Got the records as expected' 35 | ); 36 | } 37 | 'No issues in reading the CSV file'; 38 | 39 | done_testing(); 40 | -------------------------------------------------------------------------------- /t/Text-Parser-pod-03.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | package Text::Parser::CSV; 5 | use parent 'Text::Parser'; 6 | use Exception::Class ( 'Text::Parser::CSV::Error', 7 | 'Text::Parser::CSV::TooManyFields' => 8 | { isa => 'Text::Parser::CSV::Error', }, ); 9 | 10 | sub save_record { 11 | my ( $self, $line ) = @_; 12 | chomp $line; 13 | my (@fields) = split /,/, $line; 14 | $self->{__csv_header} = \@fields if not scalar( $self->get_records ); 15 | Text::Parser::CSV::TooManyFields->throw( 16 | error => "Too many fields on line #" . $self->lines_parsed ) 17 | if scalar(@fields) > scalar( @{ $self->{__csv_header} } ); 18 | $self->SUPER::save_record( \@fields ); 19 | } 20 | 21 | package main; 22 | use Test::More; 23 | use Test::Output; 24 | use Test::Exception; 25 | use Try::Tiny; 26 | 27 | sub test_read_dirty_csv { 28 | my $csvp = shift; 29 | try { 30 | $csvp->read('t/dirty-data.csv'); 31 | } catch { 32 | isa_ok( $_, 'Text::Parser::CSV::TooManyFields', 33 | 'Correct error type' ); 34 | $_->rethrow() if not $_->isa('Text::Parser::CSV::TooManyFields'); 35 | print STDERR $_->error, "\n"; 36 | }; 37 | } 38 | 39 | my $csvp = Text::Parser::CSV->new(); 40 | lives_ok { 41 | stderr_is { 42 | test_read_dirty_csv($csvp); 43 | } 44 | "Too many fields on line #4\n", 'Displays correct error message'; 45 | } 46 | 'Exception caught properly ; no other exceptions'; 47 | 48 | done_testing(); 49 | -------------------------------------------------------------------------------- /t/Text-Parser-csv-03.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | package Text::Parser::CSV; 5 | use parent 'Text::Parser'; 6 | use Exception::Class ( 'Text::Parser::CSV::Error', 7 | 'Text::Parser::CSV::TooManyFields' => 8 | { isa => 'Text::Parser::CSV::Error', }, ); 9 | use Text::CSV; 10 | 11 | # Note that this approach is still unsafe for embedded newlines 12 | my $csv = Text::CSV->new({ binary => 1, auto_diag => 1 }); 13 | 14 | sub save_record { 15 | my ( $self, $line ) = @_; 16 | my @fields = $csv->parse($line) ? $csv-> fields : (); 17 | $self->{__csv_header} = \@fields if not scalar( $self->get_records ); 18 | Text::Parser::CSV::TooManyFields->throw( 19 | error => "Too many fields on line #" . $self->lines_parsed ) 20 | if scalar(@fields) > scalar( @{ $self->{__csv_header} } ); 21 | $self->SUPER::save_record( \@fields ); 22 | } 23 | 24 | package main; 25 | use Test::More; 26 | use Test::Output; 27 | use Test::Exception; 28 | use Try::Tiny; 29 | 30 | sub test_read_dirty_csv { 31 | my $csvp = shift; 32 | try { 33 | $csvp->read('t/dirty-data.csv'); 34 | } catch { 35 | isa_ok( $_, 'Text::Parser::CSV::TooManyFields', 36 | 'Correct error type' ); 37 | $_->rethrow() if not $_->isa('Text::Parser::CSV::TooManyFields'); 38 | print STDERR $_->error, "\n"; 39 | }; 40 | } 41 | 42 | my $csvp = Text::Parser::CSV->new(); 43 | lives_ok { 44 | stderr_is { 45 | test_read_dirty_csv($csvp); 46 | } 47 | "Too many fields on line #4\n", 'Displays correct error message'; 48 | } 49 | 'Exception caught properly ; no other exceptions'; 50 | 51 | done_testing(); 52 | -------------------------------------------------------------------------------- /dist.ini: -------------------------------------------------------------------------------- 1 | name = Text-Parser 2 | author = Balaji Ramasubramanian 3 | license = Perl_5 4 | copyright_holder = Balaji Ramasubramanian 5 | copyright_year = 2018 6 | 7 | 8 | ;; Determine the next version from the Git repo 9 | [Git::NextVersion] 10 | first_version = 0.001 ; this is the default 11 | version_by_branch = 0 ; this is the default 12 | version_regexp = ^v(.+)$ ; this is the default 13 | [PkgVersion] 14 | use_package = 1 15 | 16 | ;; All plugins for POD 17 | ;; PodWeaver should come before PodVersion because PodVersion looks for =head1 NAME 18 | ;; Check weaver.ini for order of sections in POD 19 | [Bugtracker] 20 | [Git::Contributors] 21 | remove=balajiram 22 | include_releaser=0 23 | 24 | [PodWeaver] 25 | [PodVersion] 26 | 27 | ;; README.pod to be created 28 | [ReadmeAnyFromPod] 29 | type = pod 30 | filename = README.pod 31 | location = root 32 | 33 | ;; Tests 34 | [PodCoverageTests] 35 | [PodSyntaxTests] 36 | [Test::Kwalitee] 37 | [Test::MinimumVersion] 38 | [Test::CPAN::Changes] 39 | [MetaTests] 40 | 41 | ;; Prerequisites for Makefile.PL 42 | [MinimumPerl] 43 | [AutoPrereqs] 44 | 45 | ;; [NextRelease] should precede [@Git] http://dzil.org/tutorial/vcs-git.html 46 | ;; Git related 47 | [NextRelease] 48 | [@Git] 49 | changelog = Changes ; this is the default 50 | allow_dirty = dist.ini ; see Git::Check... 51 | allow_dirty = Changes ; ... and Git::Commit 52 | allow_dirty = README.pod ; ... and Git::Commit 53 | commit_msg = v%v%n%n%c ; see Git::Commit 54 | tag_format = v%v ; see Git::Tag 55 | tag_message = v%v ; see Git::Tag 56 | push_to = origin ; see Git::Push 57 | remotes_must_exist = 1 58 | 59 | ;; Release related 60 | [GithubMeta] 61 | [MetaJSON] 62 | [MetaProvides::Package] 63 | [CheckChangesHasContent] 64 | [@Basic] 65 | -------------------------------------------------------------------------------- /Changes: -------------------------------------------------------------------------------- 1 | Revision history for {{$dist->name}} 2 | 3 | {{$NEXT}} 4 | - Skip tests of unreadable files for MSWin32. 5 | - A safer CSV example in Example 1 6 | - Grammatical/typo fixes in POD. 7 | 8 | 0.752 2019-01-02 20:25:16-08:00 America/Los_Angeles 9 | - One test fails for superuser. Fixing that. 10 | - Known issue in MSWin32 seems related to the same thing. Don't know if it gets fixed. 11 | - Made POD less verbose. 12 | 13 | 0.751 2019-01-01 20:30:37-08:00 America/Los_Angeles 14 | - Documentation looks better. Made links where it makes sense. Re-arranged the information better. 15 | 16 | 0.750 2019-01-01 19:37:50-08:00 America/Los_Angeles 17 | - Correct problems in test for Win32 platform compatibility. 18 | - Improve documentation to give examples of what all the functions do. 19 | - Changed the spec for filehandle() and read() 20 | - Added tests to cover the new spec. 21 | 22 | 0.700 2019-01-01 13:20:43-08:00 America/Los_Angeles 23 | - Some operating systems can read from STDOUT and some cannot. Same goes for output file handles. 24 | - Improved documentation. 25 | - Improved code coverage using Devel::Cover for the first time. We now have 99.5% coverage. 26 | - Added a new method called abort_reading(). 27 | 28 | 0.502 2018-12-29 08:18:06-08:00 America/Los_Angeles 29 | - STDOUT and other output files don't uniformly respond to the -r test on all operating systems. This will need more work. 30 | 31 | 0.501 2018-12-23 20:51:09-08:00 America/Los_Angeles 32 | - This will be a known issue for V0.500 and V0.501, until fixed: 33 | - The test for checking that filehandle is read-only is actually not working. Fixing the tests for now to mark the test as TODO. 34 | 35 | 0.500 2018-12-23 20:14:10-08:00 America/Los_Angeles 36 | - Renamed this class as Text::Parser after RT#127067 by James Keenan 37 | - Added the ability to parse text from filehandles, thus the name Text::Parser and not Text::File::Parser 38 | - Added new method filehandle just to provide interface similar to filename. 39 | - Indicated in POD that Text::Parser supersedes TextFileParser. 40 | 41 | 0.204 2018-08-08 20:37:32-07:00 America/Los_Angeles 42 | - Removed use of indirect method notation in POD 43 | 44 | 0.203 2018-08-08 17:43:11-07:00 America/Los_Angeles 45 | - Clearer documentation 46 | 47 | 0.202 2018-08-08 00:03:52-07:00 America/Los_Angeles 48 | - Added bugtracking info to POD 49 | 50 | 0.201 2018-08-07 23:19:45-07:00 America/Los_Angeles 51 | - Improved documentation 52 | - Removed the use Role::Tiny from the tests 53 | 54 | 0.200 2018-08-07 16:31:53-07:00 America/Los_Angeles 55 | - Changed versioning scheme 56 | - Updated documentation 57 | 58 | 0.1821907 2018-08-07 14:37:11-07:00 America/Los_Angeles 59 | - Removed record_list_pointer method 60 | - Updated documentation 61 | - Added tests using a Role::Tiny 62 | 63 | 0.1821905 2018-08-07 09:22:12-07:00 America/Los_Angeles 64 | - Added a new method pop_record 65 | - Updated POD and simplified examples 66 | - Added more tests 67 | 68 | 0.1821902 2018-08-06 23:33:10-07:00 America/Los_Angeles 69 | - Added the first tests 70 | - Removed method make_sense_of_line. Derived classes override only save_record 71 | - Updated documentation briefly. 72 | 73 | 0.1821900 2018-08-06 20:41:42-07:00 America/Los_Angeles 74 | - Put your comment here 75 | -------------------------------------------------------------------------------- /t/Text-Parser.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test::More; 5 | use Test::Exception; 6 | 7 | BEGIN { use_ok 'Text::Parser'; } 8 | 9 | my $fname = 'text-simple.txt'; 10 | 11 | my $pars = Text::Parser->new(); 12 | is( $pars->filename(), undef, 'No filename specified so far' ); 13 | lives_ok { is( $pars->filehandle(), undef, 'No filehandles' ); } 14 | 'This should not die, just return undef'; 15 | throws_ok { $pars->filehandle('bad argument'); } 'Text::Parser::Exception', 16 | 'filehandle() will take only a GLOB input'; 17 | throws_ok { $pars->filename( { a => 'b' } ); } 'Text::Parser::Exception', 18 | 'filename() will take only string as input'; 19 | throws_ok { $pars->filename($fname) } 'Text::Parser::Exception', 20 | 'No file by this name'; 21 | throws_ok { $pars->read($fname); } 'Text::Parser::Exception', 22 | 'Throws exception for non-existent file'; 23 | 24 | lives_ok { $pars->read(); } 'Returns doing nothing'; 25 | is( $pars->lines_parsed, 0, 'Nothing parsed' ); 26 | is_deeply( [ $pars->get_records ], [], 'No data recorded' ); 27 | is( $pars->last_record, undef, 'No records' ); 28 | is( $pars->pop_record, undef, 'Nothing on stack' ); 29 | 30 | lives_ok { $pars->read(''); } 'Reads no file ; returns doing nothing'; 31 | is( $pars->filename(), undef, 'No file name still' ); 32 | is( $pars->lines_parsed(), 0, 'Nothing parsed again' ); 33 | 34 | SKIP: { 35 | skip 'Tests not meant for root user', 2 unless $>; 36 | skip 'Tests wont work on MSWin32', 2 unless $^O ne 'MSWin32'; 37 | open OFILE, ">t/unreadable.txt"; 38 | print OFILE "This is unreadable\n"; 39 | close OFILE; 40 | chmod 0200, 't/unreadable.txt'; 41 | throws_ok { $pars->filename('t/unreadable.txt'); } 42 | 'Text::Parser::Exception', 'This file cannot be read'; 43 | is( $pars->filename(), undef, 'Still no file has been read so far' ); 44 | unlink 't/unreadable.txt'; 45 | } 46 | 47 | my $content = "This is a file with one line\n"; 48 | lives_ok { $pars->filename( 't/' . $fname ); } 'Now I can open the file'; 49 | lives_ok { $pars->read; } 'Reads the file now'; 50 | is_deeply( [ $pars->get_records ], [$content], 'Get correct data' ); 51 | is( $pars->lines_parsed, 1, '1 line parsed' ); 52 | is( $pars->last_record, $content, 'Worked' ); 53 | is( $pars->pop_record, $content, 'Popped' ); 54 | is( $pars->lines_parsed, 1, 'Still lines_parsed returns 1' ); 55 | is( $pars->filename(), 't/' . $fname, 'Last file read' ); 56 | 57 | open OUTFILE, ">example"; 58 | if ( -r OUTFILE ) { 59 | lives_ok { $pars->filehandle( \*OUTFILE ); } 60 | 'In some systems output file handles are also readable! Your system is one of those. This could be a potential security hole.'; 61 | is( $pars->filename(), undef, 'Last file read is not available anymore' ); 62 | } else { 63 | throws_ok { $pars->filehandle( \*OUTFILE ); } 'Text::Parser::Exception', 64 | 'Your system is strict and will not read from an output filehandle. This is potentially good for security.'; 65 | } 66 | print OUTFILE "Simple text"; 67 | close OUTFILE; 68 | open INFILE, "read( \*INFILE ); 71 | is_deeply( [ $pars->get_records() ], 72 | ['Simple text'], 'Read correct data in file' ); 73 | } 74 | 'Exercising the ability to read from file handles directly'; 75 | unlink 'example'; 76 | open OUTFILE, ">example"; 77 | if ( -r OUTFILE ) { 78 | chmod 0200, 'example'; 79 | throws_ok { $pars->filehandle( \*OUTFILE ); } 'Text::Parser::Exception', 80 | 'Now this is not readable' 81 | if not -r OUTFILE; 82 | } 83 | chmod 0644, 'example'; 84 | close OUTFILE; 85 | unlink 'example'; 86 | 87 | ## Testing the reading from filehandles on STDOUT and STDIN 88 | if ( -r STDOUT ) { 89 | lives_ok { $pars->filehandle( \*STDOUT ); } 90 | 'Some systems can read from STDOUT. Your system is one of them.'; 91 | } else { 92 | throws_ok { $pars->filehandle( \*STDOUT ); } 'Text::Parser::Exception', 93 | 'Your system is strict and will not read from STDOUT'; 94 | } 95 | lives_ok { $pars->filehandle( \*STDIN ); } 'No issues in reading from STDIN'; 96 | 97 | throws_ok { $pars->read( { a => 'b' } ); } 'Text::Parser::Exception', 98 | 'Invalid type of argument for read() method'; 99 | 100 | lives_ok { $pars->read( 't/' . $fname ); } 101 | 'reads the contents of a file without dying'; 102 | is( $pars->last_record, $content, 'Last record is correct' ); 103 | is( $pars->lines_parsed, 1, 'Read only one line' ); 104 | is_deeply( [ $pars->get_records ], [$content], 'Got correct file content' ); 105 | 106 | my $add = "This record is added"; 107 | lives_ok { $pars->save_record(); } 'Add nothing'; 108 | is( $pars->last_record, undef, 'Last record is undef' ); 109 | lives_ok { $pars->save_record($add); } 'Add another record'; 110 | is( $pars->lines_parsed, 1, 'Still only 1 line parsed' ); 111 | is( $pars->last_record, $add, 'Last added record' ); 112 | is_deeply( 113 | [ $pars->get_records ], 114 | [ $content, undef, $add ], 115 | 'But will contain all elements including an undef' 116 | ); 117 | 118 | is( $pars->pop_record, $add, 'Popped a record' ); 119 | is( $pars->lines_parsed, 1, 'Still only 1 line parsed' ); 120 | is( $pars->last_record, undef, 'There was an undef in between' ); 121 | is( $pars->pop_record, undef, 'Now undef is removed' ); 122 | is( $pars->last_record, $content, 'Now the last record is the one earlier' ); 123 | is_deeply( [ $pars->get_records ], 124 | [$content], 'Got correct content after pop' ); 125 | 126 | done_testing; 127 | -------------------------------------------------------------------------------- /README.pod: -------------------------------------------------------------------------------- 1 | =pod 2 | 3 | =encoding UTF-8 4 | 5 | =head1 NAME 6 | 7 | Text::Parser - Bare text parser, bundles other common "mundane" tasks. 8 | 9 | =head1 VERSION 10 | 11 | version 0.753 12 | 13 | =head1 SYNOPSIS 14 | 15 | use Text::Parser; 16 | 17 | my $parser = Text::Parser->new(); 18 | $parser->read(shift @ARGV); 19 | print $parser->get_records, "\n"; 20 | 21 | The above code reads the first command-line argument as a string, and assuming it is the name of a text file, it will print the content of the file to C. If the string is not the name of a text file it will throw an exception and exit. 22 | 23 | =head1 RATIONALE 24 | 25 | A simple text file parser should have to only specify the "grammar" it intends to interpret. Everything else, like Cing a file handle, tracking how many lines have been read, etc., should be "automatic". 26 | 27 | Unfortunately, most programmers write code that calls C, C, etc., and keep track of things that should have been simple features of every text file parser. And if they have to read multiple files, usually, all these calls are repeated. 28 | 29 | This class does all "mundane" operations like C, C, line-count, and storage/deletion/retrieval of records, etc. You don't have to bother with a lot of book-keeping when you write your next parser. Instead, just inherit this class and override one method (C>). And voila! you have a parser. Look at L to see how easy this can be. 30 | 31 | =head1 DESCRIPTION 32 | 33 | C is a bare-bones text parsing class. It is ignorant of the text format, and cannot recognize any grammars, but derived classes that inherit from it can specify this. They can do this usually by overriding just one of the methods in this class. 34 | 35 | Future versions are expected to include progress-bar support, parsing text from sockets, or a chunk of memory. All these software features are text-format independent and can be re-used in parsing any text format. Derived classes of C will be able to take advantage of these features. 36 | 37 | =head1 METHODS 38 | 39 | =head2 new 40 | 41 | Takes no arguments. Constructor. 42 | 43 | my $parser = Text::Parser->new(); 44 | 45 | This C<$parser> variable will be used in examples below. 46 | 47 | =head2 read 48 | 49 | Takes zero or one argument which could be a string containing the name of the file, or a filehandle reference or a C (e.g. C<\*STDIN>). Throws an exception if filename/C provided is either non-existent or cannot be read for any reason. 50 | 51 | B Normally if you provide the C of a file opened for write, some Operating Systems allow reading from it too, and some don't. Read the documentation for C> for more on this. 52 | 53 | $parser->read($filename); 54 | 55 | # The above is equivalent to the following 56 | $parser->filename($filename); 57 | $parser->read(); 58 | 59 | # You can also read from a previously opened file handle directly 60 | $parser->filehandle(\*STDIN); 61 | $parser->read(); 62 | 63 | Returns once all records have been read or if an exception is thrown for any parsing errors, or if reading has been aborted with the C> method. 64 | 65 | If you provide a string file name as input, the function will handle all C and C operations on files even if any exception is thrown, or if the reading has been aborted. But if you pass a file handle C instead, then the file handle won't be closed and it will be the responsibility of the calling program to close the filehandle. 66 | 67 | $parser->read('myfile.txt'); # Will handle open, parsing, and closing of file automatically. 68 | 69 | open MYFH, "read(\*MYFH); # Will not close MYFH and it is the respo 71 | close MYFH; 72 | 73 | When you do read a new file or input stream with this method, you will lose all the records stored from the previous read operation. So this means that if you want to read a different file with the same parser object, (unless you don't care about the records from the last file you read) you should use the C> method to retrieve all the read records before parsing a new file. So all those calls to C in the example above were parsing three different files, and each successive call overwrote the records from the previous call. 74 | 75 | $parser->read($file1); 76 | my (@records) = $parser->get_records(); 77 | 78 | $parser->read(\*STDIN); 79 | my (@stdin) = $parser->get_records(); 80 | 81 | B To extend the class to other file formats, override C> instead of this one. 82 | 83 | =head3 Future Enhancement 84 | 85 | I method takes only two possible inputs argument types, either a file name, or a file handle. In future this may be enhanced to read from sockets, subroutines, or even just a block of memory (a string reference). Suggestions for other forms of input are welcome.> 86 | 87 | =head2 filename 88 | 89 | Takes zero or one string argument containing the name of a file. Returns the name of the file that was last opened if any. Returns C if no file has been opened. 90 | 91 | print "Last read ", $parser->filename, "\n"; 92 | 93 | The file name is "persistent" in the object. Meaning, even after you have called C> once, it still remembers the file name. So you can do this: 94 | 95 | $parser->read(shift @ARGV); 96 | print $parser->filename(), ":\n", "=" x (length($parser->filename())+1), "\n", $parser->get_records(), "\n"; 97 | 98 | But if you do a C with a filehandle as argument, you'll see that the last filename is lost - which makes sense. 99 | 100 | $parser->read(\*MYFH); 101 | print "Last file name is lost\n" if not defined $parser->filename(); 102 | 103 | =head2 filehandle 104 | 105 | Takes zero or one C argument and saves it for future a C> call. Returns the filehandle last saved, or C if none was saved. Remember that after a successful C call, filehandles are lost. 106 | 107 | my $fh = $parser->filehandle(); 108 | 109 | B As such there is a check to ensure one is not supplying a write-only filehandle. For example, if you specify the filehandle of a write-only file or if the file is opened for write and you cannot read from it. The weird thing is that some of the standard filehandles like C don't behave uniformly across all platforms. On most POSIX platforms, C is readable. On such platforms you will not get any exceptions if you try to do this: 110 | 111 | $parser->filehandle(\*STDOUT); ## Works on many POSIX platforms 112 | ## Throws exception on others 113 | 114 | Like in the case of C> method, if after you C with a filehandle, you call C again, this time with a file name, the last filehandle is lost. 115 | 116 | my $lastfh = $parser->filehandle(); 117 | ## Will return STDOUT 118 | 119 | $parser->read('another.txt'); 120 | print "No filehandle saved any more\n" if not defined $parser->filehandle(); 121 | 122 | =head2 lines_parsed 123 | 124 | Takes no arguments. Returns the number of lines last parsed. A line is reckoned when the C<\n> character is encountered. 125 | 126 | print $parser->lines_parsed, " lines were parsed\n"; 127 | 128 | The value is auto-updated during the execution of C>. See L of how this can be used in derived classes. 129 | 130 | Again the information in this is "persistent". You can also be assured that every time you call C, the value be auto-reset before parsing. 131 | 132 | =head2 save_record 133 | 134 | Takes exactly one argument and that is saved as a record. Additional arguments are ignored. If no arguments are passed, then C is stored as a record. 135 | 136 | In an application that uses a text parser, you will most-likely never call this method directly. It is automatically called within C> for each line. In this base class C, C is simply called with a string containing the raw line of text ; i.e. the line of text will not be Ced or modified in any way. L is a basic example. 137 | 138 | Derived classes can decide to store records in a different form. A derived class could, for example, store the records in the form of hash references (so that when you use C>, you'd get an array of hashes), or maybe even another array reference (so when you use C you'd get an array of arrays). The L does the latter by example. 139 | 140 | =head2 abort_reading 141 | 142 | Takes no arguments. Returns C<1>. You will probably never call this method in your main program. 143 | 144 | This method is usually used only in the derived class. See L. 145 | 146 | =head2 get_records 147 | 148 | Takes no arguments. Returns an array containing all the records saved by the parser. 149 | 150 | foreach my $record ( $parser->get_records ) { 151 | $i++; 152 | print "Record: $i: ", $record, "\n"; 153 | } 154 | 155 | =head2 last_record 156 | 157 | Takes no arguments and returns the last saved record. Leaves the saved records untouched. 158 | 159 | my $last_rec = $parser->last_record; 160 | 161 | =head2 pop_record 162 | 163 | Takes no arguments and pops the last saved record. 164 | 165 | my $last_rec = $parser->pop_record; 166 | $uc_last = uc $last_rec; 167 | $parser->save_record($uc_last); 168 | 169 | =head1 EXAMPLES 170 | 171 | The following examples should illustrate the use of inheritance to parse various types of text file formats. 172 | 173 | =head2 Basic principle 174 | 175 | Derived classes simply need to override one method : C>. With the help of that any arbitrary file format can be read. C should interpret the format of the text and store it in some form by calling C. The C program will then use the records and create an appropriate data structure with it. 176 | 177 | Notice that the creation of a data structure is not the objective of a parser. It is simply concerned with collecting data and arranging it in a form that can be used. That's all. Data structures can be created by a different part of your program using the data collected by your parser. 178 | 179 | =head2 Example 1 : A simple CSV Parser 180 | 181 | We will write a parser for a simple CSV file that reads each line and stores the records as array references. This example is oversimplified, and does B handle embedded newlines. 182 | 183 | package Text::Parser::CSV; 184 | use parent 'Text::Parser'; 185 | use Text::CSV; 186 | 187 | my $csv; 188 | sub save_record { 189 | my ($self, $line) = @_; 190 | $csv //= Text::CSV->new({ binary => 1, auto_diag => 1}); 191 | $csv->parse($line); 192 | $self->SUPER::save_record([$csv->fields]); 193 | } 194 | 195 | That's it! Now in C you can write something like this: 196 | 197 | use Text::Parser::CSV; 198 | 199 | my $csvp = Text::Parser::CSV->new(); 200 | $csvp->read(shift @ARGV); 201 | foreach my $aref ($csvp->get_records) { 202 | my (@arr) = @{$aref}; 203 | print "@arr\n"; 204 | } 205 | 206 | The above program reads the content of a given CSV file and prints the content out in space-separated form. 207 | 208 | =head2 Example 2 : Error checking 209 | 210 | It is easy to add any error checks using exceptions. One of the easiest ways to do this is to C>. We'll modify the CSV parser above to demonstrate that. 211 | 212 | package Text::Parser::CSV; 213 | use Exception::Class ( 214 | 'Text::Parser::CSV::Error', 215 | 'Text::Parser::CSV::TooManyFields' => { 216 | isa => 'Text::Parser::CSV::Error', 217 | }, 218 | ); 219 | 220 | use parent 'Text::Parser'; 221 | use Text::CSV; 222 | 223 | my $csv; 224 | sub save_record { 225 | my ($self, $line) = @_; 226 | $csv //= Text::CSV->new({ binary => 1, auto_diag => 1}); 227 | $csv->parse($line); 228 | my @fields = $csv->fields; 229 | $self->{__csv_header} = \@fields if not scalar($self->get_records); 230 | Text::Parser::CSV::TooManyFields->throw(error => "Too many fields on line #" . $self->lines_parsed) 231 | if scalar(@fields) > scalar(@{$self->{__csv_header}}); 232 | $self->SUPER::save_record(\@fields); 233 | } 234 | 235 | The C class will C all filehandles automatically as soon as an exception is thrown from C. You can catch the exception in C as you would normally, by Cing C> or other such class. 236 | 237 | =head2 Example 3 : Aborting without errors 238 | 239 | We can also abort parsing a text file without throwing an exception. This could be if we got the information we needed. For example: 240 | 241 | package Text::Parser::SomeFile; 242 | use parent 'Text::Parser'; 243 | 244 | sub save_record { 245 | my ($self, $line) = @_; 246 | my ($leading, $rest) = split /\s+/, $line, 2; 247 | return $self->abort_reading() if $leading eq '**ABORT'; 248 | return $self->SUPER::save_record($line); 249 | } 250 | 251 | In this derived class, we have a parser C that would save each line as a record, but would abort reading the rest of the file as soon as it reaches a line with C<**ABORT> as the first word. When this parser is given the following file as input: 252 | 253 | somefile.txt: 254 | 255 | Some text is here. 256 | More text here. 257 | **ABORT reading 258 | This text is not read 259 | This text is not read 260 | This text is not read 261 | This text is not read 262 | 263 | You can now write a program as follows: 264 | 265 | use Text::Parser::SomeFile; 266 | 267 | my $par = Text::Parser::SomeFile->new(); 268 | $par->read('somefile.txt'); 269 | print $par->get_records(), "\n"; 270 | 271 | The output will be: 272 | 273 | Some text is here. 274 | More text here. 275 | 276 | =head1 BUGS 277 | 278 | Please report any bugs or feature requests on the bugtracker website 279 | L or by email 280 | to L. 281 | 282 | When submitting a bug or request, please include a test-file or a 283 | patch to an existing test-file that illustrates the bug or desired 284 | feature. 285 | 286 | =head1 AUTHOR 287 | 288 | Balaji Ramasubramanian 289 | 290 | =head1 COPYRIGHT AND LICENSE 291 | 292 | This software is copyright (c) 2018 by Balaji Ramasubramanian. 293 | 294 | This is free software; you can redistribute it and/or modify it under 295 | the same terms as the Perl 5 programming language system itself. 296 | 297 | =head1 CONTRIBUTOR 298 | 299 | =for stopwords H.Merijn Brand - Tux 300 | 301 | H.Merijn Brand - Tux 302 | 303 | =cut 304 | -------------------------------------------------------------------------------- /lib/Text/Parser.pm: -------------------------------------------------------------------------------- 1 | use warnings; 2 | use strict; 3 | 4 | package Text::Parser; 5 | 6 | # ABSTRACT: Bare text parser, bundles other common "mundane" tasks. 7 | 8 | use Exporter 'import'; 9 | our (@EXPORT_OK) = (); 10 | our (@EXPORT) = (@EXPORT_OK); 11 | 12 | =head1 SYNOPSIS 13 | 14 | use Text::Parser; 15 | 16 | my $parser = Text::Parser->new(); 17 | $parser->read(shift @ARGV); 18 | print $parser->get_records, "\n"; 19 | 20 | The above code reads the first command-line argument as a string, and assuming it is the name of a text file, it will print the content of the file to C. If the string is not the name of a text file it will throw an exception and exit. 21 | 22 | =head1 RATIONALE 23 | 24 | A simple text file parser should have to only specify the "grammar" it intends to interpret. Everything else, like Cing a file handle, tracking how many lines have been read, etc., should be "automatic". 25 | 26 | Unfortunately, most programmers write code that calls C, C, etc., and keep track of things that should have been simple features of every text file parser. And if they have to read multiple files, usually, all these calls are repeated. 27 | 28 | This class does all "mundane" operations like C, C, line-count, and storage/deletion/retrieval of records, etc. You don't have to bother with a lot of book-keeping when you write your next parser. Instead, just inherit this class and override one method (C>). And voila! you have a parser. Look at L to see how easy this can be. 29 | 30 | =head1 DESCRIPTION 31 | 32 | C is a bare-bones text parsing class. It is ignorant of the text format, and cannot recognize any grammars, but derived classes that inherit from it can specify this. They can do this usually by overriding just one of the methods in this class. 33 | 34 | Future versions are expected to include progress-bar support, parsing text from sockets, or a chunk of memory. All these software features are text-format independent and can be re-used in parsing any text format. Derived classes of C will be able to take advantage of these features. 35 | 36 | =cut 37 | 38 | use Exception::Class ( 39 | 'Text::Parser::Exception', 40 | 'Text::Parser::Exception::ParsingError' => { 41 | isa => 'Text::Parser::Exception', 42 | description => 'For all parsing errors', 43 | alias => 'throw_text_parsing_error' 44 | }, 45 | 'Text::Parser::Exception::FileNotFound' => { 46 | isa => 'Text::Parser::Exception', 47 | description => 'File not found', 48 | alias => 'throw_file_not_found' 49 | }, 50 | 'Text::Parser::Exception::FileNotReadable' => { 51 | isa => 'Text::Parser::Exception', 52 | description => 'File not readable', 53 | alias => 'throw_file_not_readable' 54 | }, 55 | 'Text::Parser::Exception::InvalidFileHandle' => { 56 | isa => 'Text::Parser::Exception', 57 | description => 'Bad argument supplied to filehandle()', 58 | alias => 'throw_invalid_filehandle' 59 | }, 60 | 'Text::Parser::Exception::InvalidFilename' => { 61 | isa => 'Text::Parser::Exception', 62 | description => 'Bad argument supplied to filename()', 63 | alias => 'throw_bad_filename' 64 | }, 65 | 'Text::Parser::Exception::FileCantOpen' => { 66 | isa => 'Text::Parser::Exception', 67 | description => 'Error opening file', 68 | alias => 'throw_cant_open' 69 | }, 70 | 'Text::Parser::Exception::BadReadInput' => { 71 | isa => 'Text::Parser::Exception', 72 | description => 73 | 'The user called read() method with an unsupported type of input', 74 | alias => 'throw_bad_input_to_read', 75 | }, 76 | ); 77 | 78 | use Try::Tiny; 79 | use Scalar::Util 'openhandle'; 80 | 81 | =method new 82 | 83 | Takes no arguments. Constructor. 84 | 85 | my $parser = Text::Parser->new(); 86 | 87 | This C<$parser> variable will be used in examples below. 88 | 89 | =cut 90 | 91 | sub new { 92 | my $pkg = shift; 93 | bless {}, $pkg; 94 | } 95 | 96 | =method read 97 | 98 | Takes zero or one argument which could be a string containing the name of the file, or a filehandle reference or a C (e.g. C<\*STDIN>). Throws an exception if filename/C provided is either non-existent or cannot be read for any reason. 99 | 100 | B Normally if you provide the C of a file opened for write, some Operating Systems allow reading from it too, and some don't. Read the documentation for C> for more on this. 101 | 102 | $parser->read($filename); 103 | 104 | # The above is equivalent to the following 105 | $parser->filename($filename); 106 | $parser->read(); 107 | 108 | # You can also read from a previously opened file handle directly 109 | $parser->filehandle(\*STDIN); 110 | $parser->read(); 111 | 112 | Returns once all records have been read or if an exception is thrown for any parsing errors, or if reading has been aborted with the C> method. 113 | 114 | If you provide a string file name as input, the function will handle all C and C operations on files even if any exception is thrown, or if the reading has been aborted. But if you pass a file handle C instead, then the file handle won't be closed and it will be the responsibility of the calling program to close the filehandle. 115 | 116 | $parser->read('myfile.txt'); # Will handle open, parsing, and closing of file automatically. 117 | 118 | open MYFH, "read(\*MYFH); # Will not close MYFH and it is the respo 120 | close MYFH; 121 | 122 | When you do read a new file or input stream with this method, you will lose all the records stored from the previous read operation. So this means that if you want to read a different file with the same parser object, (unless you don't care about the records from the last file you read) you should use the C> method to retrieve all the read records before parsing a new file. So all those calls to C in the example above were parsing three different files, and each successive call overwrote the records from the previous call. 123 | 124 | $parser->read($file1); 125 | my (@records) = $parser->get_records(); 126 | 127 | $parser->read(\*STDIN); 128 | my (@stdin) = $parser->get_records(); 129 | 130 | B To extend the class to other file formats, override C> instead of this one. 131 | 132 | =head3 Future Enhancement 133 | 134 | I method takes only two possible inputs argument types, either a file name, or a file handle. In future this may be enhanced to read from sockets, subroutines, or even just a block of memory (a string reference). Suggestions for other forms of input are welcome.> 135 | 136 | =cut 137 | 138 | sub read { 139 | my ( $self, $input ) = @_; 140 | return if not $self->__is_file_known_or_opened($input); 141 | $self->__store_check_input($input); 142 | $self->__read_and_close_filehandle(); 143 | } 144 | 145 | sub __store_check_input { 146 | my ( $self, $input ) = @_; 147 | return if not defined $input; 148 | return $self->filename($input) if ref($input) eq ''; 149 | return $self->filehandle($input) if ref($input) eq 'GLOB'; 150 | __throw_bad_input_to_read( ref($input) ); 151 | } 152 | 153 | sub __throw_bad_input_to_read { 154 | throw_bad_input_to_read error => 'Unexpected ' . shift 155 | . ' type input to read() ; must be either string filename or GLOB'; 156 | } 157 | 158 | sub __is_file_known_or_opened { 159 | my ( $self, $fname ) = @_; 160 | return 0 if not defined $fname and not exists $self->{__filehandle}; 161 | return 0 if defined $fname and not $fname; 162 | return 1; 163 | } 164 | 165 | sub __read_and_close_filehandle { 166 | my $self = shift; 167 | $self->__init_read_fh; 168 | $self->__read_file_handle; 169 | $self->__close_file; 170 | } 171 | 172 | sub __init_read_fh { 173 | my $self = shift; 174 | $self->lines_parsed(0); 175 | $self->{__bytes_read} = 0; 176 | delete $self->{__records} if exists $self->{__records}; 177 | delete $self->{__abort_reading}; 178 | } 179 | 180 | sub __read_file_handle { 181 | my $self = shift; 182 | my $fh = $self->filehandle(); 183 | while (<$fh>) { 184 | last if not $self->__parse_line_and_next($_); 185 | } 186 | } 187 | 188 | sub __parse_line_and_next { 189 | my $self = shift; 190 | $self->lines_parsed( $self->lines_parsed + 1 ); 191 | $self->__try_to_parse(shift); 192 | return not exists $self->{__abort_reading}; 193 | } 194 | 195 | sub __try_to_parse { 196 | my ( $self, $line ) = @_; 197 | try { $self->save_record($line); } 198 | catch { 199 | $self->__close_file; 200 | $_->rethrow; 201 | }; 202 | } 203 | 204 | sub __close_file { 205 | my $self = shift; 206 | return if not exists $self->{__filename}; 207 | close $self->{__filehandle}; 208 | delete $self->{__filehandle}; 209 | } 210 | 211 | =method filename 212 | 213 | Takes zero or one string argument containing the name of a file. Returns the name of the file that was last opened if any. Returns C if no file has been opened. 214 | 215 | print "Last read ", $parser->filename, "\n"; 216 | 217 | The file name is "persistent" in the object. Meaning, even after you have called C> once, it still remembers the file name. So you can do this: 218 | 219 | $parser->read(shift @ARGV); 220 | print $parser->filename(), ":\n", "=" x (length($parser->filename())+1), "\n", $parser->get_records(), "\n"; 221 | 222 | But if you do a C with a filehandle as argument, you'll see that the last filename is lost - which makes sense. 223 | 224 | $parser->read(\*MYFH); 225 | print "Last file name is lost\n" if not defined $parser->filename(); 226 | 227 | =cut 228 | 229 | sub filename { 230 | my $self = shift; 231 | $self->__open_file( $self->__is_readable_file(shift) ) if scalar(@_); 232 | return ( exists $self->{__filename} ) ? $self->{__filename} : undef; 233 | } 234 | 235 | sub __is_readable_file { 236 | my ( $self, $fname ) = @_; 237 | throw_bad_filename( error => "$fname is not a string" ) 238 | if ref($fname) ne ''; 239 | throw_file_not_found( error => "$fname is not a file" ) 240 | if not -f $fname; 241 | throw_file_not_readable( error => "$fname is not readable" ) 242 | if not -r $fname; 243 | return $fname; 244 | } 245 | 246 | sub __open_file { 247 | my ( $self, $fname ) = @_; 248 | open my $fh, "<$fname" 249 | or throw_cant_open( error => "Error while opening file $fname" ); 250 | $self->__close_file if exists $self->{__filehandle}; 251 | $self->{__filename} = $fname; 252 | $self->{__filehandle} = $fh; 253 | } 254 | 255 | =method filehandle 256 | 257 | Takes zero or one C argument and saves it for future a C> call. Returns the filehandle last saved, or C if none was saved. Remember that after a successful C call, filehandles are lost. 258 | 259 | my $fh = $parser->filehandle(); 260 | 261 | B As such there is a check to ensure one is not supplying a write-only filehandle. For example, if you specify the filehandle of a write-only file or if the file is opened for write and you cannot read from it. The weird thing is that some of the standard filehandles like C don't behave uniformly across all platforms. On most POSIX platforms, C is readable. On such platforms you will not get any exceptions if you try to do this: 262 | 263 | $parser->filehandle(\*STDOUT); ## Works on many POSIX platforms 264 | ## Throws exception on others 265 | 266 | Like in the case of C> method, if after you C with a filehandle, you call C again, this time with a file name, the last filehandle is lost. 267 | 268 | my $lastfh = $parser->filehandle(); 269 | ## Will return STDOUT 270 | 271 | $parser->read('another.txt'); 272 | print "No filehandle saved any more\n" if not defined $parser->filehandle(); 273 | 274 | =cut 275 | 276 | sub filehandle { 277 | my ( $self, $fhref ) = @_; 278 | $self->__save_file_handle($fhref) if $self->__check_file_handle($fhref); 279 | return ( exists $self->{__filehandle} ) ? $self->{__filehandle} : undef; 280 | } 281 | 282 | sub __save_file_handle { 283 | my ( $self, $fhref ) = @_; 284 | $self->{__filehandle} = $$fhref; 285 | delete $self->{__filename} if exists $self->{__filename}; 286 | $self->{__size} = ( stat $$fhref )[7]; 287 | } 288 | 289 | sub __check_file_handle { 290 | my ( $self, $fhref ) = @_; 291 | return 0 if not defined $fhref; 292 | throw_invalid_filehandle( error => "$fhref is not a valid filehandle" ) 293 | if ref($fhref) ne 'GLOB'; 294 | throw_file_not_readable( error => "$$fhref is a closed filehandle" ) 295 | if not defined openhandle($fhref); 296 | throw_file_not_readable( 297 | error => "The filehandle $$fhref is not readable" ) 298 | if not -r $$fhref; 299 | return 1; 300 | } 301 | 302 | =method lines_parsed 303 | 304 | Takes no arguments. Returns the number of lines last parsed. A line is reckoned when the C<\n> character is encountered. 305 | 306 | print $parser->lines_parsed, " lines were parsed\n"; 307 | 308 | The value is auto-updated during the execution of C>. See L of how this can be used in derived classes. 309 | 310 | Again the information in this is "persistent". You can also be assured that every time you call C, the value be auto-reset before parsing. 311 | 312 | =cut 313 | 314 | sub lines_parsed { 315 | my $self = shift; 316 | return $self->{__current_line} = shift if @_; 317 | return ( exists $self->{__current_line} ) ? $self->{__current_line} : 0; 318 | } 319 | 320 | =method save_record 321 | 322 | Takes exactly one argument and that is saved as a record. Additional arguments are ignored. If no arguments are passed, then C is stored as a record. 323 | 324 | In an application that uses a text parser, you will most-likely never call this method directly. It is automatically called within C> for each line. In this base class C, C is simply called with a string containing the raw line of text ; i.e. the line of text will not be Ced or modified in any way. L is a basic example. 325 | 326 | Derived classes can decide to store records in a different form. A derived class could, for example, store the records in the form of hash references (so that when you use C>, you'd get an array of hashes), or maybe even another array reference (so when you use C you'd get an array of arrays). The L does the latter. 327 | 328 | =cut 329 | 330 | sub save_record { 331 | my $self = shift; 332 | $self->{__records} = [] if not defined $self->{__records}; 333 | push @{ $self->{__records} }, shift; 334 | } 335 | 336 | =method abort_reading 337 | 338 | Takes no arguments. Returns C<1>. You will probably never call this method in your main program. 339 | 340 | This method is usually used only in the derived class. See L. 341 | 342 | =cut 343 | 344 | sub abort_reading { 345 | my $self = shift; 346 | return $self->{__abort_reading} = 1; 347 | } 348 | 349 | =method get_records 350 | 351 | Takes no arguments. Returns an array containing all the records saved by the parser. 352 | 353 | foreach my $record ( $parser->get_records ) { 354 | $i++; 355 | print "Record: $i: ", $record, "\n"; 356 | } 357 | 358 | =cut 359 | 360 | sub get_records { 361 | my $self = shift; 362 | return () if not exists $self->{__records}; 363 | return @{ $self->{__records} }; 364 | } 365 | 366 | =method last_record 367 | 368 | Takes no arguments and returns the last saved record. Leaves the saved records untouched. 369 | 370 | my $last_rec = $parser->last_record; 371 | 372 | =cut 373 | 374 | sub last_record { 375 | my $self = shift; 376 | return undef if not exists $self->{__records}; 377 | my (@record) = @{ $self->{__records} }; 378 | return $record[$#record]; 379 | } 380 | 381 | =method pop_record 382 | 383 | Takes no arguments and pops the last saved record. 384 | 385 | my $last_rec = $parser->pop_record; 386 | $uc_last = uc $last_rec; 387 | $parser->save_record($uc_last); 388 | 389 | =cut 390 | 391 | sub pop_record { 392 | my $self = shift; 393 | return undef if not exists $self->{__records}; 394 | pop @{ $self->{__records} }; 395 | } 396 | 397 | =head1 EXAMPLES 398 | 399 | The following examples should illustrate the use of inheritance to parse various types of text file formats. 400 | 401 | =head2 Basic principle 402 | 403 | Derived classes simply need to override one method : C>. With the help of that any arbitrary file format can be read. C should interpret the format of the text and store it in some form by calling C. The C program will then use the records and create an appropriate data structure with it. 404 | 405 | Notice that the creation of a data structure is not the objective of a parser. It is simply concerned with collecting data and arranging it in a form that can be used. That's all. Data structures can be created by a different part of your program using the data collected by your parser. 406 | 407 | =head2 Example 1 : A simple CSV Parser 408 | 409 | We will write a parser for a simple CSV file that reads each line and stores the records as array references. This example is oversimplified, and does B handle embedded newlines. 410 | 411 | package Text::Parser::CSV; 412 | use parent 'Text::Parser'; 413 | use Text::CSV; 414 | 415 | my $csv; 416 | sub save_record { 417 | my ($self, $line) = @_; 418 | $csv //= Text::CSV->new({ binary => 1, auto_diag => 1}); 419 | $csv->parse($line); 420 | $self->SUPER::save_record([$csv->fields]); 421 | } 422 | 423 | That's it! Now in C you can write something like this: 424 | 425 | use Text::Parser::CSV; 426 | 427 | my $csvp = Text::Parser::CSV->new(); 428 | $csvp->read(shift @ARGV); 429 | foreach my $aref ($csvp->get_records) { 430 | my (@arr) = @{$aref}; 431 | print "@arr\n"; 432 | } 433 | 434 | The above program reads the content of a given CSV file and prints the content out in space-separated form. 435 | 436 | =head2 Example 2 : Error checking 437 | 438 | It is easy to add any error checks using exceptions. One of the easiest ways to do this is to C>. We'll modify the CSV parser above to demonstrate that. 439 | 440 | package Text::Parser::CSV; 441 | use Exception::Class ( 442 | 'Text::Parser::CSV::Error', 443 | 'Text::Parser::CSV::TooManyFields' => { 444 | isa => 'Text::Parser::CSV::Error', 445 | }, 446 | ); 447 | 448 | use parent 'Text::Parser'; 449 | use Text::CSV; 450 | 451 | my $csv; 452 | sub save_record { 453 | my ($self, $line) = @_; 454 | $csv //= Text::CSV->new({ binary => 1, auto_diag => 1}); 455 | $csv->parse($line); 456 | my @fields = $csv->fields; 457 | $self->{__csv_header} = \@fields if not scalar($self->get_records); 458 | Text::Parser::CSV::TooManyFields->throw(error => "Too many fields on line #" . $self->lines_parsed) 459 | if scalar(@fields) > scalar(@{$self->{__csv_header}}); 460 | $self->SUPER::save_record(\@fields); 461 | } 462 | 463 | The C class will C all filehandles automatically as soon as an exception is thrown from C. You can catch the exception in C as you would normally, by Cing C> or other such class. 464 | 465 | =head2 Example 3 : Aborting without errors 466 | 467 | We can also abort parsing a text file without throwing an exception. This could be if we got the information we needed. For example: 468 | 469 | package Text::Parser::SomeFile; 470 | use parent 'Text::Parser'; 471 | 472 | sub save_record { 473 | my ($self, $line) = @_; 474 | my ($leading, $rest) = split /\s+/, $line, 2; 475 | return $self->abort_reading() if $leading eq '**ABORT'; 476 | return $self->SUPER::save_record($line); 477 | } 478 | 479 | In this derived class, we have a parser C that would save each line as a record, but would abort reading the rest of the file as soon as it reaches a line with C<**ABORT> as the first word. When this parser is given the following file as input: 480 | 481 | somefile.txt: 482 | 483 | Some text is here. 484 | More text here. 485 | **ABORT reading 486 | This text is not read 487 | This text is not read 488 | This text is not read 489 | This text is not read 490 | 491 | You can now write a program as follows: 492 | 493 | use Text::Parser::SomeFile; 494 | 495 | my $par = Text::Parser::SomeFile->new(); 496 | $par->read('somefile.txt'); 497 | print $par->get_records(), "\n"; 498 | 499 | The output will be: 500 | 501 | Some text is here. 502 | More text here. 503 | 504 | =cut 505 | 506 | 1; 507 | --------------------------------------------------------------------------------