├── t ├── 00-load.t ├── 02_simple_unit.t └── data │ └── 02_simple_unit │ └── 02_simple_unit.pl ├── bin ├── p526 └── ppi_dump ├── my_missing.pl ├── FAQ ├── README ├── Notes └── mad.txt ├── LICENSE ├── translate_regex.pl ├── lex_label.pl └── lib └── PPIx └── Transform └── Perl5_to_Perl6.pm /t/00-load.t: -------------------------------------------------------------------------------- 1 | #!perl -T 2 | use strict; 3 | use warnings; 4 | use Test::More tests => 1; 5 | use lib 'lib'; 6 | 7 | BEGIN { 8 | use_ok( 'PPIx::Transform::Perl5_to_Perl6' ) 9 | or print "Bail out!\n"; 10 | } 11 | 12 | note( 13 | 'Testing PPIx::Transform::Perl5_to_Perl6' 14 | . " $PPIx::Transform::Perl5_to_Perl6::VERSION, Perl $], $^X" 15 | ); 16 | -------------------------------------------------------------------------------- /bin/p526: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use Getopt::Long; 5 | use lib 'lib'; 6 | use PPIx::Transform::Perl5_to_Perl6; 7 | 8 | =begin comments 9 | 10 | TODO 11 | Obviously, many options will be added in the future. 12 | Add a config file option. 13 | 14 | =end comments 15 | 16 | =cut 17 | 18 | my $USAGE = <<'END_OF_USAGE'; 19 | Usage: 20 | p526 [-h|--help] path/to/some_perl_5_program.pl 21 | This program is the main command-line app for the Blue Tiger translator. 22 | It translates Perl 5 source code into Perl 6. 23 | END_OF_USAGE 24 | 25 | GetOptions( 26 | 'help|h' => \( my $opt_help ), 27 | ) or die $USAGE; 28 | 29 | print $USAGE and exit(0) if $opt_help; 30 | 31 | die $USAGE if @ARGV != 1; 32 | my ($script_path) = @ARGV; 33 | 34 | my $PPI_doc = PPI::Document->new($script_path) 35 | or die "Cannot make a PPI::Document from '$script_path': $!"; 36 | 37 | my @warnings; 38 | my $xlate = PPIx::Transform::Perl5_to_Perl6->new( 39 | WARNING_LOG => \@warnings, 40 | ) or die "Failed to create new xlate"; 41 | 42 | $xlate->apply($PPI_doc) 43 | or die "Failed to apply"; 44 | 45 | my $got_code_out = $PPI_doc->serialize; 46 | 47 | print $got_code_out, "\n", join("\n", @warnings), "\n"; 48 | -------------------------------------------------------------------------------- /my_missing.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use 5.010; 3 | 4 | # This code can be run from the command line, 5 | # but is not intended to accomplish anything. 6 | # It is just the data file for lex_label.pl, 7 | # but has its own .pl extension for syntax 8 | # hi-lighting. 9 | 10 | print_it('abc', 42) if 0==1 and check_it('abc') and sqrt($foo); 11 | 12 | ($fruit) = 'apple'; 13 | $fruit = 'apple'; 14 | say $fruit; 15 | 16 | if ( $diet = 'paleo' ) { 17 | $meat = 'Yes'; # `my` gets added here 18 | } 19 | elsif ( $diet = 'vegan' ) { 20 | $meat = 'No'; 21 | } 22 | else { 23 | $meat = 'Maybe'; 24 | } 25 | say $meat; 26 | 27 | $i = 0; 28 | while ($i++<5) { 29 | $j = $i; 30 | } 31 | say $j; 32 | 33 | @array = grep { $_ % 2 == 0 } 1 .. 5; 34 | $array[7] = 8; 35 | 36 | $c = 0; 37 | %hash = map { $_ => ++$c } grep { /a/ } grep /s/, qw( salami baloney ); 38 | %hash = map { $_ => ++$d } grep { /a/ } grep /s/, qw( salami baloney ); # Test redefiniion and $d not being initialized. 39 | 40 | # Bug! Adding this block causes Document->complete to fail!!!! 41 | { 42 | $grain = 'wheat'; 43 | say $grain; 44 | } 45 | 46 | sub foo1 { 47 | if ( $burned_out ) { 48 | $unstable++; 49 | } 50 | if ( !$burned_out ) { 51 | $unstable--; 52 | } 53 | say 'hi!'; 54 | } 55 | -------------------------------------------------------------------------------- /bin/ppi_dump: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use PPI; 5 | use PPI::Dumper; 6 | use Getopt::Long; 7 | Getopt::Long::Configure('bundling'); 8 | 9 | # This program is a development aid for Blue Tiger. 10 | # It parses a piece of Perl code with PPI, then dumps the structure to STDOUT. 11 | 12 | my $USAGE = <<'END_OF_USAGE'; 13 | Usage: ppi_dump [ -hw ] [ -noc ] [ -e 'command' ] [ programfile ] 14 | 15 | -e 'command' 16 | may be used to enter one line of program. If −e is given, ppi_dump 17 | will not look for a filename in the argument list. Multiple −e 18 | commands may be given to build up a multi‐line script. Make sure 19 | to use semicolons where you would in a normal program. 20 | 21 | -w, --whitespace 22 | show whitespace tokens, which are normally suppressed. 23 | 24 | -noc, --nocontent 25 | suppress showing the content of each element. 26 | 27 | -h, --help 28 | prints this usage message and exits normally. 29 | END_OF_USAGE 30 | 31 | GetOptions( 32 | 'e=s@' => \( my $literal_code ), 33 | 'c|content!' => \( my $content = 1 ), 34 | 'w|whitespace' => \( my $whitespace = 0 ), 35 | 'h|help' => \( my $help ), 36 | ) or die $USAGE; 37 | 38 | print $USAGE and exit(0) if $help; 39 | 40 | # The program can either take a filename, or quoted perl code. 41 | my $PPI_doc; 42 | if ($literal_code) { 43 | die $USAGE if @ARGV != 0; 44 | 45 | my $code = join "\n", @{$literal_code}; 46 | $PPI_doc = PPI::Document->new( \$code ) 47 | or die "Could not generate PPI from literal input '$code'"; 48 | } 49 | else { 50 | die $USAGE if @ARGV != 1; 51 | my ( $program_path ) = @ARGV; 52 | 53 | $PPI_doc = PPI::Document->new( $program_path ) 54 | or die "Could not generate PPI from file '$program_path'"; 55 | } 56 | 57 | 58 | my $Dumper = PPI::Dumper->new( 59 | $PPI_doc, 60 | content => $content, 61 | whitespace => $whitespace, 62 | ) or die; 63 | 64 | my @lines = $Dumper->list; 65 | 66 | # This code aligns all the content on the right-hand side. 67 | my $re = qr{ 68 | ^ 69 | ( \s* \S+ ) 70 | (?: [ ]* \t )? 71 | ( \S .* )? 72 | $ 73 | }msx; 74 | my @structs; 75 | my $max_len = 0; 76 | for my $line (@lines) { 77 | my ( $ws_and_class, $content ) = ( $line =~ /$re/ ) or warn; 78 | my $len = length $ws_and_class; 79 | $max_len = $len if $max_len < $len; 80 | push @structs, [ $ws_and_class, $content ]; 81 | } 82 | 83 | for my $struct_aref (@structs) { 84 | my ( $ws_and_class, $content ) = @{$struct_aref}; 85 | if ( not defined $content ) { 86 | print $ws_and_class, "\n"; 87 | } 88 | else { 89 | printf "%-${max_len}s %s\n", $ws_and_class, $content; 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /t/02_simple_unit.t: -------------------------------------------------------------------------------- 1 | #!perl -T 2 | use strict; 3 | use warnings; 4 | use Carp; 5 | use Test::More; 6 | use lib 'lib'; 7 | use PPIx::Transform::Perl5_to_Perl6; 8 | 9 | my $USAGE = <<'END_OF_USAGE'; 10 | Usage: 11 | perl t/02_simple_unit.t [n] 12 | or 13 | prove [-v] t/02_simple_unit.t 14 | This test runs all the unit tests contained in 02_simple_unit.pl. 15 | A single test number (1-based) can be given; e.g. to run the third test: 16 | perl t/02_simple_unit.t 3 17 | END_OF_USAGE 18 | 19 | die $USAGE if @ARGV > 1; 20 | my $single_test = @ARGV ? shift : undef; 21 | 22 | my $p5_and_p6_filename = 't/data/02_simple_unit/02_simple_unit.pl'; 23 | 24 | my $perl5_and_perl6_code = do { 25 | open my $fh, '<', $p5_and_p6_filename 26 | or die "Failed to open test data file '$p5_and_p6_filename': $!"; 27 | local $/; 28 | <$fh>; 29 | }; 30 | 31 | my $test_separator_re = qr{ ^ [#] [ ]? -{3,} \s* $ }msx; 32 | 33 | my $test_re = qr{ 34 | \A 35 | \n? 36 | \# \s* Name: [ ]* ([^\n]+?) \n 37 | \# \s* In: [ ]* \n (.+?) \n 38 | \# \s* Out: [ ]* \n (.+?) \n 39 | (?: \# \s* Warn: [ ]* \n (.+?) \n )? 40 | \z 41 | }msx; 42 | 43 | my ( $first_block, @tests ) = split /$test_separator_re/, $perl5_and_perl6_code; 44 | 45 | # Expect the first block to be comments only. 46 | die if $first_block =~ /$test_re/; 47 | 48 | if ($single_test) { 49 | @tests = $tests[$single_test-1]; 50 | } 51 | plan tests => scalar(@tests); 52 | 53 | # Replace each string with a named hash of its components. 54 | for my $test_block (@tests) { 55 | my @fields = ( $test_block =~ /$test_re/ ) 56 | or die "Ack! '$test_block'"; 57 | 58 | # Force existence of any optional captures. 59 | $#fields = 3 if $#fields < 3; 60 | 61 | my %t; 62 | @t{ qw( NAME IN OUT WARN ) } = @fields; 63 | 64 | $t{WARN} = '' if not defined $t{WARN}; 65 | $test_block = \%t; 66 | } 67 | 68 | for my $test_aref (@tests) { 69 | my %t = %{$test_aref}; 70 | 71 | my $PPI_doc = PPI::Document->new( \$t{IN} ) or die; 72 | #require PPI::Dumper; PPI::Dumper->new( $PPI_doc )->print; 73 | 74 | my @warnings; 75 | my $xlate = PPIx::Transform::Perl5_to_Perl6->new( 76 | WARNING_LOG => \@warnings, 77 | ) or die; 78 | 79 | $xlate->apply($PPI_doc) 80 | or die "Failed to apply xlate to PPI_doc: $t{IN}"; 81 | 82 | my $actual_out = $PPI_doc->serialize; 83 | my $actual_warn = join '', @warnings; 84 | chomp $actual_warn; 85 | 86 | my %actual = ( OUT => $actual_out, WARN => $actual_warn ); 87 | my %expected = ( OUT => $t{OUT}, WARN => $t{WARN} ); 88 | 89 | is_deeply( \%actual, \%expected, $t{NAME} ) or do { 90 | note explain( 'Original: ' => $t{IN} ); 91 | note explain( 'Expected: ' => \%expected ); 92 | note explain( 'Actual: ' => \%actual ); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /FAQ: -------------------------------------------------------------------------------- 1 | F.A.Q. - Fervently Anticipated Questions 2 | (This package is too new for anyone to have actually asked yet.) 3 | 4 | Q: Is this the "official" Perl5-to-Perl6 translator? 5 | A: No, this is a "tinkertoy" project of one of the Perl 6 developers. 6 | However, it might still be the best translator for your needs. 7 | 8 | Q: Well, then where *is* the "official" Perl5-to-Perl6 translator? 9 | A: There isn't one (yet?). The one that Larry Wall is working on (called MAD) 10 | does not yet emit Perl 6 output. See Notes/mad.txt for more details. 11 | A: Perlito is a compiler collection that implements a subset of Perl 5 and Perl 6. 12 | If your code's syntax fits into the Perlito subset of Perl 5, then it can emit 13 | a Perl 6 version of your code. YMMV as to whether Perlito or Blue Tiger better 14 | serves your translation needs. Perlito's objectives are quite different 15 | (and much broader) than Blue Tiger; neither is a subset of the other. 16 | See http://www.perlito.org/ . 17 | A: According to Larry at YAPC::NA::2010, the only thing that will be "official" 18 | in the Perl 6 ecosystem is the language specification and its test suite. 19 | Everything else is free to have multiple designs, implementations, etc. 20 | I had asked specifically because he had claimed the translator project for 21 | himself several years ago, and I did not want to step on any toes. 22 | 23 | Q: What is Perl::Modernize? 24 | A: Perl::Modernize is the intended namespace for the whole Blue Tiger project. 25 | Even though all the early work has been focused on 5->6, I intend the 26 | first released module in Perl::Modernize to be a 5->5 translator, that 27 | automatically places my() in the correct positions in code that is not 28 | `use strict` compliant. 29 | See my 5-minute Lightning talk, "Modernizing Perl, Automatically", at 30 | http://www.youtube.com/watch?v=Bohxcat4g-A&t=22m10s (22m10s - 27m07s). 31 | Slides here: 32 | http://s3.datasaw.com/yapc/modernizing_perl_automatically.pdf 33 | 34 | Q: Why is the project named "Blue Tiger"? 35 | A: For a confluence of reasons: 36 | * The spokesbug for Perl 6 is Camelia, a butterfly. 37 | * This is a migration project. 38 | * A migrating butterfly would be good as a project name. 39 | * The best-known migrating butterfly is the Monarch. 40 | * This was (at the time) not expected to be the primary migration tool. 41 | * The Blue Tiger (Tirumala limniace) is a migratory butterfly that is 42 | not Monarch. 43 | * The initial bulk of the project was written in Auburn, home of the Auburn 44 | _Tigers_ football team, whose colors are orange and 45 | _blue_. 46 | * Much of the code was written overlooking a butterfly garden. 47 | 48 | Q: Why didn't you use Perl 6 to write the translator itself? 49 | A: Good question; after all, the new Perl 6 "grammars" are a powerful 50 | object-oriented fusion of Perl 5's regexes and Parse::RecDescent. 51 | However, the few Perl 6 grammars for parsing Perl 5 are all 52 | experimental, and are not mature/stable/complete/API'ed enough to 53 | support this project. Speed is also a big issue during development. 54 | Once something like PPI is written in Perl 6, and Perl 6 55 | implementations get fast enough to not impede the translator 56 | development, then this whole project might be re-written in Perl 6. 57 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Blue Tiger is an assistant for Perl 5 to Perl 6 translation. 2 | 3 | How to try it without installing: 4 | git clone git://github.com/Util/Blue_Tiger.git 5 | cd Blue_Tiger 6 | bin/p526 path/to/program.pl > program_and_warnings.p6 7 | 8 | Initial Author: Bruce Gray 9 | Email: bruce dot gray at acm dot org 10 | IRC: "Util" on freenode/#perl6 11 | 12 | Pre-requisite modules: 13 | PPI 1.204_03 or higher 14 | 15 | Currently handles (with examples): 16 | Mandatory: 17 | Operator changes . -> ~ 18 | Invariant sigils $hash{$key} -> %hash{$key} 19 | Casts @{$arrayref} -> @($arrayref) 20 | Nums with trailing. 42. -> 42.0 21 | KeywordNoSpace if( -> if ( 22 | Bare hash keys $hash{KEY} -> %hash{'KEY'} 23 | map/grep comma @z=map {...} @a -> @z=map {...}, @a 24 | mapish EXPR @z=map !$_, @a -> @z=map { !$_ }, @a 25 | Optional: 26 | Warnings for user review of transforms. 27 | 28 | Planned/TODO: 29 | Mandatory: 30 | for/loop for(;$i>5;$i++) -> loop (;$i>5;$i++) 31 | foreach arrow for my $i (@a) -> for @a -> $i 32 | open open my $fh,... -> my $fh = open... 33 | bareword filehandle open FH, ... -> my $FH = open... 34 | readline via <> <$fh> -> $fh.get (if scalar) 35 | readline func readline $fh -> $fh.get (if scalar) 36 | while readline while (<$fh>){} -> for $fh.lines {} 37 | magic diamond while (<>){} -> for lines() {...} 38 | ARGV/ARGS @ARGV -> @*ARGS 39 | Hash init? => -> P5=> (sometimes) 40 | rand function rand -> P5rand 41 | qq changes "${foo}bar" -> "{$foo}bar" 42 | "\l$foo" -> "{lc $foo}" 43 | "\v" 44 | double-underscore __PACKAGE__ -> $?PACKAGE 45 | indent-i_fiers "$foo_bar-30" -> "$foo_bar\-30" 46 | or -> "{$foo_bar}-30" 47 | syntax collisions '\qq[...]' -> '\\qq[...]' 48 | (especially qq) "@array" -> "@array[]" 49 | "$a.b()" -> "$a.b\()" 50 | "abc{def()}ghi" -> "abc\{def()\}ghi" 51 | or -> qq:!c "abc{def()}ghi" 52 | Unicode "\N{NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE}" 53 | -> "\c[NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE]" 54 | Unicode? Lots! 55 | Optional (and configurable!): 56 | Hashkey short form %hash{'KEY'} -> %hash 57 | Remove parens if ($foo) {} -> if $foo {} 58 | func vs method @z=map {...} @a -> @z=@a.map({...}) 59 | Infinite loop while (1) {...} -> loop {...} 60 | qq un-hack "@{[ $a * 2 ]}" -> "{$a * 2}" 61 | print/say print "Hi!\n" -> say "Hi!"; 62 | Regex improvements qr{ a [ ]+ b }x -> re{ a b } XXXX smart spacing? 63 | Bad slurp for (){} -> for $fh.slurp 64 | pick int(rand 12) -> (^12).pick 65 | pick array $a[rand @a] -> @a.pick 66 | Hash init %h=map{$_=>1}@a -> %h = @a X=> 1 67 | @_ handling sub f{($n)=@_} -> sub f ($n) {} 68 | Divides ($n % 2) == 0 -> $n %% 2 69 | List::Util comma first {...} @a -> first {...}, @a 70 | 71 | -------------------------------------------------------------------------------- /Notes/mad.txt: -------------------------------------------------------------------------------- 1 | Miscellaneous Attribute Decoration (MAD) is a road not taken by Blue Tiger. 2 | MAD is not directly relevant to Blue Tiger, but since I had a lot of notes 3 | from researching this approach (before rejecting it), I am making them 4 | available here. 5 | 6 | If you want to play with MAD, I suggest that you compile your own Perl 5.10.0 7 | source, since that version should be most compatible (no bitrot w.r.t. MAD). 8 | 9 | http://use.perl.org/articles/06/04/20/0832226.shtml 10 | MAD is the process of hanging sufficient information off the op-tree to be 11 | able to recover the source code afterwards. Up until now, the compilation 12 | phase has simply been required to produce bytecode for the run-time 13 | interpreter, and it's usually impossible to figure out by inspecting the 14 | bytecode to figure out what the original Perl source would have looked 15 | like (think: peep-hole optimisations). 16 | 17 | Being able to go from source code to bytecode back to source code is an 18 | important step in getting Perl 6 to run Perl 5 code). 19 | 20 | http://search.cpan.org/~miyagawa/perl/pod/perl5100delta.pod#MAD 21 | MAD, which stands for Miscellaneous Attribute Decoration, is a 22 | still-in-development work leading to a Perl 5 to Perl 6 converter. To 23 | enable it, it's necessary to pass the argument -Dmad to Configure. The 24 | obtained perl isn't binary compatible with a regular perl 5.10, and has 25 | space and speed penalties; moreover not all regression tests still pass 26 | with it. (Larry Wall, Nicholas Clark) 27 | 28 | http://perl5.git.perl.org/metaconfig.git/blob/HEAD:/U/perl/mad.U 29 | Would you like to build with Misc Attribute Decoration? This is 30 | development work leading to a Perl 5 to Perl 6 convertor, which imposes a 31 | space and speed overhead on the interpreter. 32 | 33 | http://search.cpan.org/~rurban/B-C-1.27/perloptree.pod#MAD 34 | MAD stands for "Misc Attributed Data". 35 | 36 | Larry Wall worked on a new MAD compiler backend outside of the B approach, 37 | dumping the internal op tree representation as XML, not as tree of perl B 38 | objects. 39 | 40 | The idea is that all the information needed to recreate the original 41 | source is stored in the op tree. To do this the tokens for the ops are 42 | associated with ops, these madprops are a list of key-value pairs, where 43 | the key is a character as listed at the end of op.h, the value normally is 44 | a string, but it might also be a op, as in the case of a optimized op 45 | ('O'). Special for the whitespace key '_' (whitespace before) and '#' 46 | (whitespace after), which indicate the whitespace or comment before/after 47 | the previous key-value pair. 48 | 49 | Also when things normally compiled out, like a BEGIN block, which normally 50 | do not results in any ops, instead create a NULLOP with madprops used to 51 | recreate the object. 52 | 53 | Is there any documentation on this? 54 | 55 | Why this awful XML and not the rich tree of perl objects? 56 | 57 | Well there's an advantage. The MAD XML can be seen as some kind of XML 58 | Storable/Freeze of the B op tree, and can be therefore converted outside 59 | of the CHECK block, which means you can actually debug the conversion (= 60 | compilation) process. This is not possible within the CHECK block in the B 61 | backends. 62 | 63 | http://www.nntp.perl.org/group/perl.perl5.porters/2006/03/msg110539.html 64 | Larry's Perl 5 to Perl 5 convertor - nntp.perl.org 65 | I've merged all of Larry's Perl 5 to Perl 5 convertor work into blead. 66 | This was somewhat evil as it was against 5.9.2, and rather a lot has happened 67 | since then. It's conditionally compiled in if you Configure with -Dmad 68 | (MAD stands for Misc Attribute Decoration) 69 | 70 | It's not all working quite yet and I'm not sure why. 71 | --snip-- 72 | So basically I'm stuck on how I've fouled up the tokeniser such that I can't 73 | get XML out. It seems that as soon as PL_madskills is non-zero it goes boom. 74 | 75 | http://www.nntp.perl.org/group/perl.perl5.porters/2006/03/msg110560.html 76 | From: Nicholas Clark 77 | Date: March 10, 2006 11:20 78 | Subject: MAD works (was Re: debugging yacc (was Re: Larry's Perl 5 to Perl 5 convertor)) 79 | I started on trying to take the diff from 5.9.2's perly.y to madly.y and 80 | using that to convert blead's perly.y to madly.y, as a random guess. 81 | Larry mailed me directly about 15 minutes after I started to say that 82 | (effectively) this was what was needed. So I kept going. 83 | 84 | After rather a lot more 3 way diff merging than I like, it now works. 85 | [Yesterday toke.c, today madly.y tomorrow the world^W^Wtime off] 86 | [Mmm, you don't know how much you rely on patch until the diffs get too complex 87 | for it to apply. Thanks Larry, for something else that's really rather useful.] 88 | 89 | Anyway, finally, now if you sync blead and build with -Dmad you can do this: 90 | --snip-- 91 | I claim "weekend". Anyone else is welcome to play with p55 and nomad inside 92 | mad/ and send patches to tidy the hard coded directories. 93 | Oh, and patch madly.y to cope with given/when/break, Rafael's require 94 | tokenisation changes, the new array slice deref syntax, and anything else that 95 | changed since the release of 5.9.2. 96 | 97 | http://www.nntp.perl.org/group/perl.perl5.porters/2006/04/msg111890.html 98 | On Mon, Apr 17, 2006 at 04:59:56AM -0700, Yitzchak Scott-Thoennes wrote: 99 | > By "cope with", do you mean do something more than just what perly.y does? 100 | 101 | Well, I'm not completely sure what needs doing. But the pattern I could see 102 | in the code changes was that Larry replaced every op_free with some form of 103 | call to the MAD code to save the OP. My understanding being that every 104 | intermediate OP that is freed is effectively throwing away some information 105 | about the source code, hence to recreate the byte-perfect original source 106 | code (rather than a deparse-equivalent version) that information needs to be 107 | kept. 108 | 109 | Larry's work was up-to-date with (IIRC) released 5.9.2. But since then there 110 | have been several changes to syntax which have also meant changes to the 111 | optree generation, which in turn means that there are new places that call 112 | op_free(). So I don't think that MAD will work properly on recreating any 113 | script that uses any of the new syntax, because not all information will be 114 | preserved. So by "cope with" I meant making whatever changes are needed to 115 | the new op_free() calls to properly record information using MAD, so that 116 | it can be output in the XML dump. 117 | -------------------------------------------------------------------------------- /t/data/02_simple_unit/02_simple_unit.pl: -------------------------------------------------------------------------------- 1 | # This file is not intended to be run by itself! 2 | # It is just the input for 02_simple_unit.t. 3 | # It is in a separate file, and named .pl, so that the code can be edited 4 | # with syntax coloring. 5 | 6 | # Where possible, we name tests according to Damian's rant from the YAPC movie: 7 | # http://www.perl.org/yapc/2002/movies/themovie/script.txt 8 | # DAMIAN: Ok, so you with me? The arrow becomes the dot, the dot becomes 9 | # the underscore, the underscore had no meaning but the dollar sign, at and 10 | # percent all mean what they did before, except when they don't. The arrow 11 | # means a sub, angle brackets are iterators except when they're comparators, 12 | # but they're never globs, except when you're iterating over filenames. 13 | # Every block is a closure, every closure can be prebound, and there are a 14 | # couple of dozen types of context, including hyperoperated. 15 | 16 | #--- 17 | # Name: The arrow becomes the dot. 18 | # In: 19 | $x->method($y); 20 | # Out: 21 | $x.method($y); 22 | #--- 23 | # Name: The dot becomes the tilde. 24 | # In: 25 | $x = $y . $z; 26 | # Out: 27 | $x = $y ~ $z; 28 | #--- 29 | # Name: The underscore had no meaning. 30 | # In: 31 | _private_sub(123_456_789); 32 | # Out: 33 | _private_sub(123_456_789); 34 | #--- 35 | # Name: The bitwise or (|) becomes a specific bitwise or. 36 | # In: 37 | $x = $y | $z; 38 | # Out: 39 | $x = $y +| $z; 40 | # Warn: 41 | At line 1, position 9, op '|' was changed to '+|', but could have been any of ( '+|', '~|', '?|' ). Verify the context! 42 | #--- 43 | # Name: The bitwise and (&) becomes a specific bitwise and. 44 | # In: 45 | $x = $y & $z; 46 | # Out: 47 | $x = $y +& $z; 48 | # Warn: 49 | At line 1, position 9, op '&' was changed to '+&', but could have been any of ( '+&', '~&', '?&' ). Verify the context! 50 | #--- 51 | # Name: The bitwise xor (^) becomes a specific bitwise xor. 52 | # In: 53 | $x = ^ $z; 54 | # Out: 55 | $x = +^ $z; 56 | # Warn: 57 | At line 1, position 6, op '^' was changed to '+^', but could have been any of ( '+^', '~^', '?^' ). Verify the context! 58 | #--- 59 | # Name: The bitwise not (~) becomes a specific bitwise not. 60 | # In: 61 | $x = $y ~ $z; 62 | # Out: 63 | $x = $y +^ $z; 64 | # Warn: 65 | At line 1, position 9, op '~' was changed to '+^', but could have been any of ( '+^', '~^', '?^' ). Verify the context! 66 | #--- 67 | # Name: The bitwise shift left 68 | # In: 69 | $x = $y << $z; 70 | # Out: 71 | $x = $y +< $z; 72 | # Warn: 73 | At line 1, position 9, op '<<' was changed to '+<', but could have been any of ( '+<', '~<' ). Verify the context! 74 | #--- 75 | # Name: The bitwise shift right 76 | # In: 77 | $x = $y >> $z; 78 | # Out: 79 | $x = $y +> $z; 80 | # Warn: 81 | At line 1, position 9, op '>>' was changed to '+>', but could have been any of ( '+>', '~>' ). Verify the context! 82 | #--- 83 | # Name: The bitwise shift left assign 84 | # In: 85 | $x <<= $y; 86 | # Out: 87 | $x +<= $y; 88 | # Warn: 89 | At line 1, position 4, op '<<=' was changed to '+<=', but could have been any of ( '+<=', '~<=' ). Verify the context! 90 | #--- 91 | # Name: The bitwise shift right 92 | # In: 93 | $x >>= $y; 94 | # Out: 95 | $x +>= $y; 96 | # Warn: 97 | At line 1, position 4, op '>>=' was changed to '+>=', but could have been any of ( '+>=', '~>=' ). Verify the context! 98 | #--- 99 | # Name: The concat assign 100 | # In: 101 | $x .= $y; 102 | # Out: 103 | $x ~= $y; 104 | #--- 105 | # Name: Match binding becomes smart match 106 | # In: 107 | $x =~ /re/; 108 | # Out: 109 | $x ~~ /re/; 110 | #--- 111 | # Name: Negated match binding becomes negated smart match 112 | # In: 113 | $x !~ /re/; 114 | # Out: 115 | $x !~~ /re/; 116 | #--- 117 | # Name: Ternary operator 118 | # In: 119 | $foo = ($x) ? $y : $z; 120 | # Out: 121 | $foo = ($x) ?? $y !! $z; 122 | #--- 123 | # Name: Sigils - array sigil is now @ when keyed 124 | # In: 125 | $foo[$bar] 126 | # Out: 127 | @foo[$bar] 128 | #--- 129 | # Name: Sigils - array sigil is still @ when sliced 130 | # In: 131 | @foo[$bar,$baz] 132 | # Out: 133 | @foo[$bar,$baz] 134 | #--- 135 | # Name: Sigils - hash sigil is now % when keyed 136 | # In: 137 | $foo{$bar} 138 | # Out: 139 | %foo{$bar} 140 | #--- 141 | # Name: Sigils - hash sigil is now % when sliced 142 | # In: 143 | @foo{$bar,$baz} 144 | # Out: 145 | %foo{$bar,$baz} 146 | #--- 147 | # Name: Hash - fix bareword keys 148 | # In: 149 | $z{foo} 150 | $z{'foo'} 151 | $z{"foo"} 152 | $z{$foo} 153 | $z{'foo'.$bar} 154 | # Out: 155 | %z{'foo'} 156 | %z{'foo'} 157 | %z{"foo"} 158 | %z{$foo} 159 | %z{'foo'~$bar} 160 | #--- 161 | # Name: Cast: $$foo remains unchanged 162 | # In: 163 | $$foo 164 | # Out: 165 | $$foo 166 | #--- 167 | # Name: Cast: @$foo remains unchanged 168 | # In: 169 | @$foo 170 | # Out: 171 | @$foo 172 | #--- 173 | # Name: Cast: %$foo remains unchanged 174 | # In: 175 | %$foo 176 | # Out: 177 | %$foo 178 | #--- 179 | # Name: Cast: &$foo remains unchanged 180 | # In: 181 | &$foo 182 | # Out: 183 | &$foo 184 | #--- 185 | # Name: Cast: *$foo remains unchanged 186 | # In: 187 | *$foo 188 | # Out: 189 | *$foo 190 | #--- 191 | # Name: Cast: ${$foo} -> $($foo) 192 | # In: 193 | ${$foo} 194 | # Out: 195 | $($foo) 196 | #--- 197 | # Name: Cast: @{$foo} -> @($foo) 198 | # In: 199 | @{$foo} 200 | # Out: 201 | @($foo) 202 | #--- 203 | # Name: Cast: %{$foo} -> %($foo) 204 | # In: 205 | %{$foo} 206 | # Out: 207 | %($foo) 208 | #--- 209 | # Name: Cast: &{$foo} -> &($foo) 210 | # In: 211 | &{$foo} 212 | # Out: 213 | &($foo) 214 | #--- 215 | # Name: Cast: *{$foo} -> *($foo) 216 | # In: 217 | *{$foo} 218 | # Out: 219 | *($foo) 220 | #--- 221 | # Name: Decimal points: 42 -> 42 : No change for integer 222 | # In: 223 | 42 224 | # Out: 225 | 42 226 | #--- 227 | # Name: Decimal points: 42.1 -> 42.1 : No change for proper FP 228 | # In: 229 | 42.1 230 | # Out: 231 | 42.1 232 | #--- 233 | # Name: Decimal points: 42. -> 42.0 : fix trailing 234 | # In: 235 | 42. 236 | # Out: 237 | 42.0 238 | # Warn: 239 | At line 1, position 1, floating point number '42.' was changed to floating point number '42.0'. Consider changing it to integer '42'. 240 | #--- 241 | # Name: Decimal points: 42_555. -> 42_555.0 : fix trailing, even with underscores. 242 | # In: 243 | 42_555. 244 | # Out: 245 | 42_555.0 246 | # Warn: 247 | At line 1, position 1, floating point number '42_555.' was changed to floating point number '42_555.0'. Consider changing it to integer '42_555'. 248 | #--- 249 | # Name: Keyword requires space before condition: if(condition) -> if (condition) 250 | # In: 251 | if($foo) {print} 252 | unless($foo) {print} 253 | elsif($bar) {print} 254 | while($foo) {print} 255 | until($foo) {print} 256 | foreach(@foo) {print} 257 | for(@foo) {print} 258 | for(my $i = 0; $i < 5; $i++) {print} 259 | foreach(my $i = 0; $i < 5; $i++) {print} 260 | given($foo) {print} 261 | when($foo) {print} 262 | # Out: 263 | if $foo {print} 264 | unless $foo {print} 265 | elsif $bar {print} 266 | while $foo {print} 267 | until $foo {print} 268 | for @foo <-> $_ {print} 269 | for @foo <-> $_ {print} 270 | for (my $i = 0; $i < 5; $i++) {print} 271 | foreach (my $i = 0; $i < 5; $i++) {print} 272 | given ($foo) {print} 273 | when ($foo) {print} 274 | #--- 275 | # Name: Keyword requires space before condition: No change when space exists 276 | # In: 277 | if ($foo) {print} 278 | unless ($foo) {print} 279 | elsif ($bar) {print} 280 | while ($foo) {print} 281 | until ($foo) {print} 282 | foreach (@foo) {print} 283 | for (@foo) {print} 284 | for (my $i = 0; $i < 5; $i++) {print} 285 | given ($foo) {print} 286 | when ($foo) {print} 287 | # Out: 288 | if $foo {print} 289 | unless $foo {print} 290 | elsif $bar {print} 291 | while $foo {print} 292 | until $foo {print} 293 | for @foo <-> $_ {print} 294 | for @foo <-> $_ {print} 295 | for (my $i = 0; $i < 5; $i++) {print} 296 | given ($foo) {print} 297 | when ($foo) {print} 298 | #--- 299 | # Name: Keyword requires space before condition: print if($foo) -> print if ($foo) 300 | # In: 301 | print if($foo) 302 | print unless($foo) 303 | print while($foo) 304 | print until($foo) 305 | print foreach(@foo) 306 | print for(@foo) 307 | print when($foo) 308 | # Out: 309 | print if ($foo) 310 | print unless ($foo) 311 | print while ($foo) 312 | print until ($foo) 313 | print foreach (@foo) 314 | print for (@foo) 315 | print when ($foo) 316 | #--- 317 | # Name: Keyword requires space before condition: No change when space exists (postfix) 318 | # In: 319 | print if ($foo) 320 | print unless ($foo) 321 | print while ($foo) 322 | print until ($foo) 323 | print foreach (@foo) 324 | print for (@foo) 325 | print when ($foo) 326 | # Out: 327 | print if ($foo) 328 | print unless ($foo) 329 | print while ($foo) 330 | print until ($foo) 331 | print foreach (@foo) 332 | print for (@foo) 333 | print when ($foo) 334 | #--- 335 | # Name: map and grep BLOCK now need a comma after the block 336 | # In: 337 | print map { $_ * 2 } 0..3; 338 | print grep { $_ > 2 } 0..3; 339 | # Out: 340 | print map { $_ * 2 }, 0..3; 341 | print grep { $_ > 2 }, 0..3; 342 | #--- 343 | # Name: map and grep EXPR now need that expression to be within a block. 344 | # In: 345 | print map $_ * 2, 0..3; 346 | print grep $_ > 2, 0..3; 347 | # Out: 348 | print map { $_ * 2 }, 0..3; 349 | print grep { $_ > 2 }, 0..3; 350 | #--- 351 | # Name: `foreach my $var (LIST)` becomes `for LIST <->` 352 | # In: 353 | foreach my $i (@a) {} 354 | for my $i (@a) {} 355 | # Out: 356 | for @a <-> $i {} 357 | for @a <-> $i {} 358 | #--- 359 | # Name: `foreach (LIST)` becomes `for LIST <-> $_` 360 | # In: 361 | foreach (@a) {} 362 | # Out: 363 | for @a <-> $_ {} 364 | #--- 365 | # Name: `sub foo { my ($a) = @_; }` becomes `sub foo ($a) { }` 366 | # In: 367 | sub foo { my ($a) = @_; } 368 | # Out: 369 | sub foo ($a) { } 370 | #--- 371 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The Artistic License 2.0 2 | 3 | Copyright (c) 2000-2006, The Perl Foundation. 4 | 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | This license establishes the terms under which a given free software 11 | Package may be copied, modified, distributed, and/or redistributed. 12 | The intent is that the Copyright Holder maintains some artistic 13 | control over the development of that Package while still keeping the 14 | Package available as open source and free software. 15 | 16 | You are always permitted to make arrangements wholly outside of this 17 | license directly with the Copyright Holder of a given Package. If the 18 | terms of this license do not permit the full use that you propose to 19 | make of the Package, you should contact the Copyright Holder and seek 20 | a different licensing arrangement. 21 | 22 | Definitions 23 | 24 | "Copyright Holder" means the individual(s) or organization(s) 25 | named in the copyright notice for the entire Package. 26 | 27 | "Contributor" means any party that has contributed code or other 28 | material to the Package, in accordance with the Copyright Holder's 29 | procedures. 30 | 31 | "You" and "your" means any person who would like to copy, 32 | distribute, or modify the Package. 33 | 34 | "Package" means the collection of files distributed by the 35 | Copyright Holder, and derivatives of that collection and/or of 36 | those files. A given Package may consist of either the Standard 37 | Version, or a Modified Version. 38 | 39 | "Distribute" means providing a copy of the Package or making it 40 | accessible to anyone else, or in the case of a company or 41 | organization, to others outside of your company or organization. 42 | 43 | "Distributor Fee" means any fee that you charge for Distributing 44 | this Package or providing support for this Package to another 45 | party. It does not mean licensing fees. 46 | 47 | "Standard Version" refers to the Package if it has not been 48 | modified, or has been modified only in ways explicitly requested 49 | by the Copyright Holder. 50 | 51 | "Modified Version" means the Package, if it has been changed, and 52 | such changes were not explicitly requested by the Copyright 53 | Holder. 54 | 55 | "Original License" means this Artistic License as Distributed with 56 | the Standard Version of the Package, in its current version or as 57 | it may be modified by The Perl Foundation in the future. 58 | 59 | "Source" form means the source code, documentation source, and 60 | configuration files for the Package. 61 | 62 | "Compiled" form means the compiled bytecode, object code, binary, 63 | or any other form resulting from mechanical transformation or 64 | translation of the Source form. 65 | 66 | 67 | Permission for Use and Modification Without Distribution 68 | 69 | (1) You are permitted to use the Standard Version and create and use 70 | Modified Versions for any purpose without restriction, provided that 71 | you do not Distribute the Modified Version. 72 | 73 | 74 | Permissions for Redistribution of the Standard Version 75 | 76 | (2) You may Distribute verbatim copies of the Source form of the 77 | Standard Version of this Package in any medium without restriction, 78 | either gratis or for a Distributor Fee, provided that you duplicate 79 | all of the original copyright notices and associated disclaimers. At 80 | your discretion, such verbatim copies may or may not include a 81 | Compiled form of the Package. 82 | 83 | (3) You may apply any bug fixes, portability changes, and other 84 | modifications made available from the Copyright Holder. The resulting 85 | Package will still be considered the Standard Version, and as such 86 | will be subject to the Original License. 87 | 88 | 89 | Distribution of Modified Versions of the Package as Source 90 | 91 | (4) You may Distribute your Modified Version as Source (either gratis 92 | or for a Distributor Fee, and with or without a Compiled form of the 93 | Modified Version) provided that you clearly document how it differs 94 | from the Standard Version, including, but not limited to, documenting 95 | any non-standard features, executables, or modules, and provided that 96 | you do at least ONE of the following: 97 | 98 | (a) make the Modified Version available to the Copyright Holder 99 | of the Standard Version, under the Original License, so that the 100 | Copyright Holder may include your modifications in the Standard 101 | Version. 102 | 103 | (b) ensure that installation of your Modified Version does not 104 | prevent the user installing or running the Standard Version. In 105 | addition, the Modified Version must bear a name that is different 106 | from the name of the Standard Version. 107 | 108 | (c) allow anyone who receives a copy of the Modified Version to 109 | make the Source form of the Modified Version available to others 110 | under 111 | 112 | (i) the Original License or 113 | 114 | (ii) a license that permits the licensee to freely copy, 115 | modify and redistribute the Modified Version using the same 116 | licensing terms that apply to the copy that the licensee 117 | received, and requires that the Source form of the Modified 118 | Version, and of any works derived from it, be made freely 119 | available in that license fees are prohibited but Distributor 120 | Fees are allowed. 121 | 122 | 123 | Distribution of Compiled Forms of the Standard Version 124 | or Modified Versions without the Source 125 | 126 | (5) You may Distribute Compiled forms of the Standard Version without 127 | the Source, provided that you include complete instructions on how to 128 | get the Source of the Standard Version. Such instructions must be 129 | valid at the time of your distribution. If these instructions, at any 130 | time while you are carrying out such distribution, become invalid, you 131 | must provide new instructions on demand or cease further distribution. 132 | If you provide valid instructions or cease distribution within thirty 133 | days after you become aware that the instructions are invalid, then 134 | you do not forfeit any of your rights under this license. 135 | 136 | (6) You may Distribute a Modified Version in Compiled form without 137 | the Source, provided that you comply with Section 4 with respect to 138 | the Source of the Modified Version. 139 | 140 | 141 | Aggregating or Linking the Package 142 | 143 | (7) You may aggregate the Package (either the Standard Version or 144 | Modified Version) with other packages and Distribute the resulting 145 | aggregation provided that you do not charge a licensing fee for the 146 | Package. Distributor Fees are permitted, and licensing fees for other 147 | components in the aggregation are permitted. The terms of this license 148 | apply to the use and Distribution of the Standard or Modified Versions 149 | as included in the aggregation. 150 | 151 | (8) You are permitted to link Modified and Standard Versions with 152 | other works, to embed the Package in a larger work of your own, or to 153 | build stand-alone binary or bytecode versions of applications that 154 | include the Package, and Distribute the result without restriction, 155 | provided the result does not expose a direct interface to the Package. 156 | 157 | 158 | Items That are Not Considered Part of a Modified Version 159 | 160 | (9) Works (including, but not limited to, modules and scripts) that 161 | merely extend or make use of the Package, do not, by themselves, cause 162 | the Package to be a Modified Version. In addition, such works are not 163 | considered parts of the Package itself, and are not subject to the 164 | terms of this license. 165 | 166 | 167 | General Provisions 168 | 169 | (10) Any use, modification, and distribution of the Standard or 170 | Modified Versions is governed by this Artistic License. By using, 171 | modifying or distributing the Package, you accept this license. Do not 172 | use, modify, or distribute the Package, if you do not accept this 173 | license. 174 | 175 | (11) If your Modified Version has been derived from a Modified 176 | Version made by someone other than you, you are nevertheless required 177 | to ensure that your Modified Version complies with the requirements of 178 | this license. 179 | 180 | (12) This license does not grant you the right to use any trademark, 181 | service mark, tradename, or logo of the Copyright Holder. 182 | 183 | (13) This license includes the non-exclusive, worldwide, 184 | free-of-charge patent license to make, have made, use, offer to sell, 185 | sell, import and otherwise transfer the Package with respect to any 186 | patent claims licensable by the Copyright Holder that are necessarily 187 | infringed by the Package. If you institute patent litigation 188 | (including a cross-claim or counterclaim) against any party alleging 189 | that the Package constitutes direct or contributory patent 190 | infringement, then this Artistic License to you shall terminate on the 191 | date that such litigation is filed. 192 | 193 | (14) Disclaimer of Warranty: 194 | THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS 195 | IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED 196 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR 197 | NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL 198 | LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL 199 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL 200 | DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, EVEN IF 201 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 202 | -------------------------------------------------------------------------------- /translate_regex.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use 5.012; # Probably works with lower versions, but not tested on them yet. 5 | use PPIx::Regexp; 6 | use PPIx::Regexp::Dumper; 7 | use Data::Dumper; $Data::Dumper::Useqq = 1; 8 | use Getopt::Long; 9 | 10 | my $USAGE = <<"END_OF_USAGE"; 11 | $0 --elms [file ...] 12 | This program is a stand-alone Perl 5 to Perl 6 regexp translator. 13 | 14 | In this, its first incarnation, it has a special (and temporary) --elms option to 15 | automatically translate the data exported from http://elm.eu.org/elms/browse_elms.html 16 | 17 | If no options are given, then it translates each line as it is typed into the console. 18 | END_OF_USAGE 19 | 20 | GetOptions( 21 | 'elms' => \( my $elms ), 22 | ) or die $USAGE; 23 | 24 | my ( $DUMP_BEFORE, $DUMP_AFTER ) = ( 0, 0 ); 25 | 26 | =begin comments 27 | 28 | This program is a stand-alone Perl 5 to Perl 6 regexp translator. 29 | Quickly written due to this conversation: 30 | http://irclog.perlgeek.de/perl6/2013-08-07#i_7420578 31 | 32 | Written in Perl 5 instead of Perl 6, because Perl 6 does not yet have PPI. 33 | 34 | TODO: 35 | Change . into \N iff /s is in effect. 36 | Change ^ and $ into ^^ and $$ iff /m is in effect. 37 | Move modifiers from end to beginning. 38 | Translate non-alphas into escaped forms. 39 | Convert | to || 40 | Is this needed? 41 | Translate tricks like [ ] and [|] ??? 42 | Warn about $1 becoming $0, etc ? 43 | Add tests. 44 | Integrate with the existing Blue Tiger code. 45 | Handle `use re` /flags mode? Probably not. 46 | Named capture: Translate qr{(?bar)} into rx{ $=[bar] } . See S05 "Named scalar aliasing to subpatterns" 47 | 48 | =end comments 49 | 50 | =cut 51 | 52 | #say translate_regexp( 53 | # "qr{([DEQ].{0,1}[LIM].{2,3}[LIVMF][^P]{2,3}[LMVF].[LMIV].{0,3}[DE])|([DE].{0,1}[LIM].{2,3}[LIVMF][^P]{2,3}[LMVF].[LMIV].{0,3}[DEQ])}" 54 | # "qr{foo}smx" 55 | # 'qr{["]}' 56 | # "qr{[']}" 57 | # "qr{[ ]}" 58 | # "qr{abc{2,}}" 59 | # "qr(abc{2,})" 60 | # "qr{ab[-ce]{2,}}" 61 | # "qr{ab [c-ef-z]{19,}}" # /x changes ' ' from Token::Whitespace to Token::Literal! 62 | # "qr{(?:abc)}" # ?: changes from 'Structure::Capture ( ... )' to 'Structure::Modifier (?: ... )' 63 | # 'qr{(?mi)^(?:[a-z]|\d){1,2}(?=\s)}' # Example from S05 64 | #); 65 | 66 | # From http://elm.eu.org/elms/browse_elms.html , export classes as tsv 67 | if ($elms) { 68 | open my $fh, '<', 'elm_classes.tsv' 69 | or die; 70 | while (<$fh>) { 71 | chomp; 72 | next if /^\s*#/; 73 | next if /^"ELMIdentifier"/; # Header line 74 | my ( $id, $desc, $regex, $instances, $pdb_instances ) = split "\t"; 75 | say 'P5: ', $regex; 76 | say 'P6: ', translate_regexp($regex); 77 | say ''; 78 | } 79 | close $fh or warn; 80 | } 81 | else { 82 | while (<>) { 83 | chomp; 84 | next if /^\s*#/; # Skip commented-out lines. 85 | say translate_regexp($_); 86 | } 87 | } 88 | sub translate_regexp { 89 | die if @_ != 1; 90 | my ($regexp_in_a_string) = @_; 91 | 92 | my $re = PPIx::Regexp->new($regexp_in_a_string) 93 | or die; 94 | 95 | # Dump RE before changes are made. 96 | if ($DUMP_BEFORE) { 97 | say $re->source; 98 | my $d = PPIx::Regexp::Dumper->new($re) or die; 99 | $d->print(); 100 | } 101 | 102 | # qr// becomes re// 103 | { 104 | my $e1 = $re->first_element; 105 | if ( $e1->content eq 'qr' ) { 106 | # S05: So you may use parens as your C delimiters, but only if you interpose whitespace 107 | my $spaces = ( $e1->next_sibling->delimiters eq '()' ) ? 1 : 0; 108 | $e1->{content} = 'rx' . ( ' ' x $spaces ); 109 | } 110 | } 111 | 112 | # {2,5} becomes ** 2..5 113 | # {2,} becomes ** 2..* 114 | # {,5} becomes ** 0..5 115 | if ( my $sq_aref = $re->find( 'Structure::Quantifier' ) ) { 116 | for my $sq ( @{$sq_aref} ) { 117 | warn if $sq->start ->content ne '{'; 118 | warn if $sq->finish->content ne '}'; 119 | $sq->start ->{content} = ' ** '; # The trailing space is important, for {,5} not to become ***..5 120 | $sq->finish->{content} = ' '; 121 | my $tl_aref = $sq->find('Token::Literal') 122 | or warn "Empty Structure::Quantifier!!!"; 123 | # Hmmm. 10,100 is 6 tokens, not 3! 124 | if ($tl_aref) { 125 | for my $i ( 0 .. $#{$tl_aref} ) { 126 | next if $tl_aref->[$i]->content ne ','; 127 | warn "XXX Probable bug in your regexp! FOO{,n} is not defined in Perl 5; translating to 0..n anyway\n"; 128 | $tl_aref->[$i]->{content} = ( $i == 0 ) ? '0..' 129 | : ( $i == $#{$tl_aref} ) ? '..*' 130 | : '..' 131 | ; 132 | last; 133 | } 134 | } 135 | } 136 | } 137 | 138 | # [a-z] becomes <[a..z]> 139 | # [-a-z] becomes <-[a..z]> 140 | if ( my $scc_aref = $re->find( 'Structure::CharClass' ) ) { 141 | for my $scc ( @{$scc_aref} ) { 142 | warn if $scc->start ->content ne '['; 143 | warn if $scc->finish->content ne ']'; 144 | 145 | 146 | # If this is a CharClass with only 1 char (like PBP recommends instead of escaping), 147 | # replace with single-quoted string, or double-quoted if the char is a single-quote. 148 | # XXX Are the extra spaces needed? 149 | if ( $scc->children == 1 and ref $scc->child(0) eq 'PPIx::Regexp::Token::Literal' ) { 150 | my $q = $scc->child(0)->content eq q{'} ? q{"} : q{'}; 151 | $scc->start ->{content} = ' ' . $q ; 152 | $scc->finish->{content} = $q . ' ' ; 153 | next; 154 | } 155 | 156 | 157 | $scc->start ->{content} = '<['; 158 | $scc->finish->{content} = ']>'; 159 | 160 | # Handle negated ranges 161 | if ( $scc->negated() ) { 162 | # The ^ is stored in the type(), for some odd reason. 163 | warn if $scc->type->content ne '^'; 164 | $scc->start->{content} = '<-['; 165 | $scc->type ->{content} = ''; 166 | } 167 | 168 | if ( my $nr_aref = $scc->find( 'Node::Range' ) ) { 169 | for my $nr ( @{$nr_aref} ) { 170 | my $c1 = $nr->child(1); 171 | warn if $c1->content ne '-'; 172 | warn if $c1->class ne 'PPIx::Regexp::Token::Operator'; 173 | $c1->{content} = '..'; 174 | $nr->child( 0)->{content} = ' ' . $nr->child( 0)->{content}; 175 | $nr->child(-1)->{content} = $nr->child(-1)->{content} . ' '; 176 | } 177 | } 178 | } 179 | } 180 | 181 | # (?:abc) becomes [abc] 182 | if ( my $sm_aref = $re->find( 'Structure::Modifier' ) ) { 183 | for my $sm ( @{$sm_aref} ) { 184 | # Handling the simple case for now. Will handle more complex cases later; probably with a different approach. 185 | next if $sm->type->content ne '?:'; 186 | next if %{$sm->type->modifiers}; 187 | 188 | warn if $sm->start ->content ne '('; 189 | warn if $sm->finish->content ne ')'; 190 | $sm->start ->{content} = '['; 191 | $sm->finish->{content} = ']'; 192 | $sm->type ->{content} = ''; 193 | } 194 | } 195 | 196 | # (?=\s) becomes 197 | # (?<=\s) becomes 198 | if ( my $sa_aref = $re->find( 'Structure::Assertion' ) ) { 199 | for my $sa ( @{$sa_aref} ) { 200 | warn if $sa->start ->content ne '('; 201 | warn if $sa->finish->content ne ')'; 202 | 203 | my $type = $sa->type->content; 204 | if ( $type eq '?=' ) { 205 | # Positive look-ahead 206 | $sa->start ->{content} = '<'; 207 | $sa->finish->{content} = '>'; 208 | $sa->type ->{content} = '?before '; 209 | } 210 | elsif ( $type eq '?<=' ) { 211 | # Positive look-behind 212 | $sa->start ->{content} = '<'; 213 | $sa->finish->{content} = '>'; 214 | $sa->type ->{content} = '?after '; 215 | } 216 | else { 217 | warn "Don't yet know how to handle a Structure::Assertion if type '$type':\n ", Dumper $sa; 218 | } 219 | 220 | } 221 | } 222 | 223 | if ($DUMP_AFTER) { 224 | my $d = PPIx::Regexp::Dumper->new($re) or die; 225 | $d->print(); 226 | } 227 | 228 | # Use ->content instead of ->source, in order to reflects the changes we have made to the tree. 229 | return $re->content; 230 | } 231 | __END__ 232 | 233 | Relevant snippents from S05-regex.pod : 234 | 235 | =item * 236 | 237 | The new C<:Perl5>/C<:P5> modifier allows Perl 5 regex syntax to be 238 | used instead. (It does not go so far as to allow you to put your 239 | modifiers at the end.) For instance, 240 | 241 | m:P5/(?mi)^(?:[a-z]|\d){1,2}(?=\s)/ 242 | 243 | is equivalent to the Perl 6 syntax: 244 | 245 | m/ :i ^^ [ <[a..z]> || \d ] ** 1..2 / 246 | (?mi) ^ [<[ a..z ]>|\d] ** 1..2 (?=\s)} 247 | 248 | 249 | =item * 250 | 251 | The Perl 5 C regex constructor is gone. 252 | 253 | =item * 254 | 255 | The Perl 6 equivalents are: 256 | 257 | regex { pattern } # always takes {...} as delimiters 258 | rx / pattern / # can take (almost) any chars as delimiters 259 | 260 | You may not use whitespace or alphanumerics for delimiters. Space is 261 | optional unless needed to distinguish from modifier arguments or 262 | function parens. So you may use parens as your C delimiters, 263 | but only if you interpose whitespace: 264 | 265 | rx ( pattern ) # okay 266 | rx( 1,2,3 ) # tries to call rx function 267 | 268 | (This is true for all quotelike constructs in Perl 6.) 269 | 270 | The C form may be used directly as a pattern anywhere a normal C match can. 271 | The C form is really a method definition, and must be used in such a way that 272 | the grammar class it is to be used in is apparent. 273 | -------------------------------------------------------------------------------- /lex_label.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use 5.010; 5 | use PPI; 6 | use Data::Dumper; $Data::Dumper::Useqq = 1; $Data::Dumper::Sortkeys = 1; 7 | 8 | my $program_path = './my_missing.pl'; 9 | 10 | # List of things we are figuring out: 11 | # Where are the lexical scopes? Where do they start, and end? Which statements are in which scopes? 12 | # Where are the subs? Where do they start, and end? Which statements are in which subs, or in MAINline? 13 | # Where are vars used (via, R, W, etc)? 14 | # If used in a sub, and the sub cannot isolate via my(), doesn't that infect all scopes? 15 | # How are vars used in each statement? Read-only, read-write, write-read, write-only? 16 | # Using the scope locations and RO/RW values, for each variable, determine where `my` can be added (it may be multiple locations). 17 | # Determine all code mods needed, and extract changes to be independent of the memory refaddrs. 18 | # Modify the code: Add the `my` keyword to the declaring statements, and/or insert a new `my` statement above the outermost common scope allowed by RO/RW/Sub. 19 | # Do this in reverse order, walking up the tree or line/rowchar order. 20 | 21 | # XXX Need to better define my data structures! 22 | 23 | # XXX Need to add code to deal with (at least warn on) fully qualified variable names. 24 | # Would work better as False, to allow for 1TBS 25 | # my $BLOCK_IS_AT_SUBLEVEL = 1; # Should the actual ::Block braces appear to be its children's level, instead of it's own level? 26 | my $BLOCK_IS_AT_SUBLEVEL = 0; # Should the actual ::Block braces appear to be its children's level, instead of it's own level? 27 | 28 | # Change these to a single (or couple) of hashes (HoR)??? 29 | # XXX Globals; XXX change to params? 30 | # XXX Merge these into ELEMENTS 31 | my %lex; # For every element in the tree, this holds its lexical scope level. 32 | my %lex2; # Holds the starting point element of each lexical scope level. 33 | my %lex2_flipped; 34 | my %location; # For every element in the tree, this holds its location by refaddr, to make it easier to find without walking later. 35 | my %is_whitespace; 36 | 37 | my @lines_AoA; # For each line of source, refaddrs. 38 | 39 | # Consolidate! 40 | my %symbols; # HoA of '@array' => [ refaddr1, refaddr2, ... ]; 41 | my %symbol_first_seen; # HoA of '@array' => [ refaddr, $line, $rowcol ]; 42 | 43 | # my %containing_statement; # refaddr => [ element, ref] 44 | 45 | my %addr_element; # Every element from the PPI tree, indexed by refaddr for reverse lookup. 46 | 47 | my @DEBUG; 48 | 49 | # XXX Hmmm. I might want a separate array just for $Element itself, to ease dumping? 50 | # XXX Delay this for now 51 | # my @ELEMENTS; # Linearly assigned AoR; Fields are: 52 | # INDEX => $ELEMENTS.index_number, 53 | # ELEMENT => $Element, # pointer to the real PPI::Element , 54 | # REFADDR => $Element->refaddr, 55 | # LOCATION => $Element->location == [ $line, $rowchar, $col, $logical_line, $logical_file_name ] 56 | # IS_WHITESPACE => Bool, 57 | # IS_IN_SUB => Bool, 58 | # LEXICAL_SCOPE => String, # 5.8.7.3 59 | 60 | # Note that @ELEMENTS is indexed on REFADDR via the hash %XXX. 61 | 62 | die if not -e $program_path; 63 | 64 | my $PPI_doc = PPI::Document->new( $program_path ) 65 | or die "Could not generate PPI from file '$program_path'"; 66 | # XXX Add location indexing before anything else, to keep memory addresses from changing? 67 | $PPI_doc->index_locations; 68 | # my $foo = $PPI_doc->complete; # Fails on bare blocks! PPI 1.218 69 | # print Dumper $foo; 70 | # say '!!!'; 71 | 72 | # XXX PPI::Statement::Compound says that do{} if $cond is *not* a compound! How to handle? 73 | 74 | # So far, I see no way that a lexical scope can happen without Struct:Block, parented by either PPI::Statement::Compound or Statement::Sub 75 | # XXX No, I still see no way a lexical scope can happen without Struct:Block, but it won't have a ::Compound or ::Sub parent if it is a `map` or `grep`; it just has a ::Statement parent then. 76 | 77 | use PPI::Dumper; 78 | sub dump_it { 79 | die if @_ != 1; 80 | my ($thing_to_dump) = @_; 81 | my $Dumper = PPI::Dumper->new( 82 | $thing_to_dump, 83 | content => 1, 84 | # whitespace => 0, 85 | whitespace => 1, 86 | memaddr => 1, 87 | ) or die; 88 | 89 | my @lines = $Dumper->list; 90 | 91 | # This code aligns all the content on the right-hand side. 92 | my $re = qr{ 93 | ^ 94 | ( \s* \d+ ) 95 | ( \s+ \S+ ) 96 | (?: [ ]* \t )? 97 | ( \S .* )? 98 | $ 99 | }msx; 100 | 101 | my @structs; 102 | my $max_len = 0; 103 | for my $line (@lines) { 104 | my ( $addr, $ws_and_class, $content ) = ( $line =~ /$re/ ) or warn; 105 | my $len = length $ws_and_class; 106 | $max_len = $len if $max_len < $len; 107 | push @structs, [ $addr, $ws_and_class, $content ]; 108 | } 109 | 110 | for my $struct_aref (@structs) { 111 | my ( $addr, $ws_and_class, $content ) = @{$struct_aref}; 112 | # $addr = sprintf '0x%016x', $addr; 113 | # $addr = sprintf '%016d', $addr; 114 | my $level1 = sprintf '%-15s', ($lex{$addr} // ''); 115 | # my $level2 = $lex2{$addr} ? '*' : ''; 116 | my $level2 = $lex2_flipped{$addr} ? '*' : ''; 117 | if ( not defined $content ) { 118 | # say join ' ', $addr, $ws_and_class; 119 | say join ' ', $addr, $level1, $level2, $ws_and_class; 120 | } 121 | else { 122 | $ws_and_class = sprintf "%-${max_len}s", $ws_and_class; 123 | # say join ' ', $addr, $ws_and_class, $content; 124 | say join ' ', $addr, $level1, $level2, $ws_and_class, $content; 125 | } 126 | } 127 | 128 | 129 | # say for @lines; 130 | say '---'; 131 | } 132 | 133 | sub address_of { 134 | return sprintf '0x%016x', $_[0]->refaddr; 135 | } 136 | 137 | # memaddr 138 | # Should the dumper print the memory addresses of each PDOM element. 139 | # True/false value, off by default. 140 | # refaddr method??? 141 | 142 | if ( 1 == 1 ) { 143 | # XXX 144 | # This code was to explore the parents of ::Block nodes. 145 | # It might be obsolete now. 146 | 147 | my $blocks_aref = $PPI_doc->find( 'PPI::Structure::Block' ) 148 | or return; 149 | 150 | ### dump_it($_) for @{$blocks_aref}; 151 | 152 | 153 | # Start building a hash of what lexical level everything is at. 154 | # XXX What order does find() use? 155 | for my $block ( @{$blocks_aref} ) { 156 | # dump_it($block); 157 | my $top = $block->top; 158 | warn 'ok but not seen yet' if $top->class ne 'PPI::Document'; 159 | # say "\nTop is ", join "\t", $top->class, address_of($top); 160 | 161 | # Walk up the tree until parent isa Compound or Sub 162 | my $p = $block->parent; 163 | while (1) { 164 | my $c = $p->class; 165 | # say "parent class is ", $c, ' ', address_of($p); 166 | last if $c eq 'PPI::Statement::Sub' # sub 167 | or $c eq 'PPI::Statement::Compound' # for while if elsif else 168 | or $c eq 'PPI::Statement'; # map, grep, List::MoreUtils::uniq_by(&@), etc 169 | die if $p == $top; 170 | warn "Parent is more than 1 level up! probably ok but not seen yet"; 171 | $p = $p->parent; 172 | } 173 | my $type = $p->class eq 'PPI::Statement::Compound' ? $p->type : ''; # XXX Ack! elsif/else shows as if, and empty block shows as `continue` (in PPI 1.218)!!! 174 | # say "parent class is ", join "\t", address_of($p), $p->class, $type; 175 | # say $p; 176 | # say "---\n"; 177 | } 178 | } 179 | # XXX What does this look like? 180 | # my $hashref = { 181 | # a => 1, 182 | # }; 183 | 184 | # on start of block 185 | 186 | 187 | # XXX Ack! anon subs are not ::Sub ! 188 | # Taking the code from PPI::Dumper::_dump and/or PPI::Node::find, rather than calling ->find(), so that I can add code into enter/exit points, where the original code has no hooks. 189 | # Needs more code since indent level is always the same, and because lex level does not change on *every* recurse (just *some*). 190 | { 191 | # my $level_start = 1; # Each level starts with 1, not 0. 192 | my $level_start = 0; # Each level starts with 1, not 0. 193 | # my @level = ($level_start); # XXX Global - change to param? 194 | 195 | # Since all the plain statements inbetween, say level 3.6 and 3.7, need to be at level 3, yet the info 196 | # about (6 was the last lexical sublevel we used so far in level 3) must be retained, we use the last 197 | # element of @level to keep that info, and omit that last element from read_level(). 198 | # So, while within 3.7, all the plain PPI nodes secretly have @level == (3.7.0) 199 | # Since increments happen on entry, the only level containing 0 in its publicly-seen read_level() 200 | # is the MAIN level, which will show as just "0". 201 | my @level = ($level_start, $level_start); # XXX Global - change to param? 202 | # my @level = ($level_start); # XXX Global - change to param? 203 | sub next_level { $level[-1]++; } # Increment. 204 | sub push_level { push @level, $level_start; } # Deeper. 205 | sub pop_level { pop @level; } # Less deep. 206 | # sub read_level { join '.', @level } # Turn into 5.2.6.4, etc. 207 | sub read_level { join '.', @level[0 .. ($#level-1)] } # Turn into 5.2.6.4, etc. 208 | 209 | # Note that PPI::Node has a ->scope() method, returning a Boolean on if the node represents a lexical scope boundary. 210 | # This is not as useful as it would seem, since we need the start of scope to show up at the block, and ->scope() 211 | # is enabled for the (e.g.) `sub` or `if` statement that contains the ::Block. 212 | # Also, it looks like PPI::Statement::Sub returns false! XXX File a bug report! 213 | sub is_start_of_scope { return !! ( $_[0]->class eq 'PPI::Structure::Block' ) } 214 | 215 | # Code adapted from recursive _dump() in PPI/Dumper.pm 216 | # XXX Consider using the queuing code from PPI::Node::find() ??? 217 | sub determine_lexical_scope_levels { 218 | die if @_ != 1; 219 | my ($Element) = @_; 220 | 221 | # warn 'ok? Not seen yet' if $Element->top ne 'PPI::Document'; 222 | 223 | 224 | # starting element; Possibly starting lexical scope 225 | my $element_started_scope = is_start_of_scope($Element); 226 | 227 | next_level() if $element_started_scope; 228 | 229 | my $ra = $Element->refaddr; # XXX use hex version? 230 | $addr_element{$ra} = $Element; 231 | die if exists $lex{$ra}; 232 | $lex{$ra} = read_level(); 233 | push @DEBUG, read_level; 234 | 235 | $is_whitespace{$ra} = 1 if $Element->class eq 'PPI::Token::Whitespace'; 236 | 237 | my @loc = @{ $Element->location }; 238 | my ( $line, $rowchar, $col, $logical_line, $logical_file_name ) = @loc; 239 | # $rowchar is the literal horizontal character, and $col is the visual column, taking into account tabbing. 240 | $location{$ra} = [ @loc ]; 241 | push @{ $lines_AoA[ $line ] }, $ra; # For each line of source, refaddrs. 242 | 243 | # { 244 | # my $next_element_number = 1 + $#ELEMENTS; 245 | # # XXX Not testted against $element_started_scope !!! 246 | # push @ELEMENTS, { 247 | # INDEX => $next_element_number, 248 | # ELEMENT => $Element, # pointer to the real PPI::Element , 249 | # REFADDR => $Element->refaddr, 250 | # LOCATION => [ @{ $Element->location } ], 251 | # IS_WHITESPACE => !!($Element->class eq 'PPI::Token::Whitespace'), 252 | # # IS_IN_SUB => Bool, 253 | # LEXICAL_SCOPE => read_level(), 254 | # }; 255 | # } 256 | 257 | if ( $element_started_scope ) { 258 | push_level() if $element_started_scope; 259 | 260 | # XXX Experimental; replace the lex level we just put into %lex, with the new incremented level. 261 | # This makes the ::Block show up at the same lex level as its contents. 262 | # We might not care. Play with enabling and disabling this line! 263 | # $lex{$ra} = read_level() if $BLOCK_IS_AT_SUBLEVEL; 264 | 265 | die if exists $lex2{ read_level() }; 266 | $lex2{ read_level() } = $ra; 267 | } 268 | 269 | if ( $Element->class eq 'PPI::Token::Symbol' ) { 270 | # push @{ $symbols{ $Element->symbol} }, $ra; 271 | my $real_sym = $Element->symbol; # This shows @array when used as $array[3] ! 272 | push @{ $symbols{$real_sym} }, [ $ra, read_level() ]; 273 | 274 | $symbol_first_seen{$real_sym} //= [ $ra, $line, $rowchar ]; 275 | 276 | # XXX Need to track where a simple my() can be used, vs where a statement must be inserted. 277 | # XXX Need to add code to show location found, too? 278 | # Hmmm. $grain is only found in one block, and it is written to on the first occurance. 279 | # OK to add my, or my()??? 280 | # 0.8 32 $grain = 'wheat'; 281 | # 0.8 33 print "$grain"; 282 | # XXX Note that any parent being a sub may change things! 283 | # ++ and -- are read-write. How to say it is OK to be undefined? Do I care? 284 | 285 | # 0 36 sub foo1 { 286 | # 0.9 37 if ( $burned_out ) { 287 | # 0.9.1 38 $unstable++; 288 | # 0.9.1 39 } 289 | # 0.9 40 if ( !$burned_out ) { 290 | # 0.9.2 41 $unstable--; 291 | # 0.9.2 42 } 292 | # 0.9 43 print "hi!\n"; 293 | # 0 44 } 294 | # $unstable occurs in 0.9.1 and 0.9.2, which is more than one level. 295 | # Find lowest common parent, which is 0.9 296 | # 0.9 is one level above the earliest occurance (0.9.1), so locate my() just before statement containing the 0.9.1 block. 297 | 298 | # 0 18 $i = 0; 299 | # 0 19 while ($i++<5) { 300 | # 0.4 20 $j = $i; 301 | # 0 21 } 302 | # 0 22 print "$j\n"; 303 | # $j must be defined at the line preceeding the statement that starts level 0.4, 304 | # so before the `while` on 19. 305 | 306 | # 0.6 !! 307 | # 0.7 28 %hash = map { $_ => ++$c } grep { /a/ } grep /s/, qw( salami baloney ); 308 | # 0 !! 309 | # 0.8 !! 310 | # 0.9 29 %hash = map { $_ => ++$d } grep { /a/ } grep /s/, qw( salami baloney ); # Test redefiniion and $d not being initialized. 311 | # Hmmm. The line contains 0.8 and 0.9, but $d is only in 0.8. 312 | # It is a read-write, though. 313 | # $d must have its my() just before the statement that defineds 0.8. 314 | 315 | # XXX Need to test unknown_sub( $foo ); mechanism to determine r-o vs r-w 316 | } 317 | 318 | # Recurse into our children 319 | if ( $Element->isa('PPI::Node') ) { 320 | for my $child ( @{ $Element->{children} } ) { # XXX Change to the published accessor? Node->children does not include brace tokens for PPI::Structure. Try it and see! 321 | # entering child 322 | determine_lexical_scope_levels( $child ); 323 | # exiting child 324 | } 325 | } 326 | if ( $element_started_scope ) { 327 | pop_level() if $element_started_scope; 328 | } 329 | 330 | # leaving element; Possibly leaving lexical scope 331 | # pop_level() if $element_started_scope; 332 | # $output; 333 | } 334 | 335 | } 336 | determine_lexical_scope_levels($PPI_doc); 337 | %lex2_flipped = reverse %lex2; 338 | # print Dumper \%lex; 339 | # print Dumper \%lex2; 340 | # print Dumper \%lex2_flipped; 341 | # print Dumper \%location; 342 | # print Dumper \@lines_AoA; 343 | print Dumper \%symbols; 344 | # print Dumper [@ELEMENTS[0..3]]; 345 | # print Dumper \@ELEMENTS; 346 | # print Dumper \@DEBUG; 347 | dump_it($PPI_doc); 348 | 349 | # XXX Change from printing refaddr to using an array of Elements, with a single hash xref of refaddr-to-array-index. 350 | # This is so we can stop debugging with refaddrs, since they change from run to run. 351 | 352 | # Print the original file's contents, showing the lexical level beside each line. 353 | if ( 1 == 1 ) { 354 | my @lines = split "\n", $PPI_doc->serialize; 355 | for my $i ( 0 .. $#lines ) { 356 | # say '---'; 357 | my $line = $lines[$i]; 358 | my $num = $i + 1; 359 | 360 | my @element_addresses_in_line = @{ $lines_AoA[$num] }; 361 | 362 | # In most structures, there is whitespace (often newline) following the 363 | # opening brace, but on the same line. We want to discount that whitespace 364 | # when determining a line's lexical level, otherwise a line with an opening 365 | # brace will *always* be in two levels! 366 | my @non_whitespace_element_addresses_in_line = grep { !$is_whitespace{$_} } @element_addresses_in_line; 367 | 368 | my %h; 369 | $h{$_}++ for map { $lex{$_} } @non_whitespace_element_addresses_in_line; 370 | my @levels = sort keys %h; 371 | 372 | push @levels, $lex{ $element_addresses_in_line[0] } if ! @levels; # Caused by a whitespace-only line. 373 | 374 | # Lines can have multiple lexical levels if they are like: 375 | # @a = grep { $_ > 3 } @b; 376 | # We list all but the last level on separate lines (to avoid messing up the indentation), and use !! as a visual marker for such occurances. 377 | while ( @levels > 1 ) { 378 | printf "%-7s\t%7s\n", shift(@levels), '!!'; 379 | } 380 | 381 | die if @levels != 1; 382 | printf "%-7s\t%7d\t%s\n", $levels[0], $num, $line; 383 | } 384 | } 385 | 386 | # Need a structure of first occurrence of each var in each scope it appears in? 387 | for my $symbol ( sort keys %symbols ) { 388 | say "!!! $symbol"; 389 | my $occurances_aref = $symbols{$symbol}; 390 | 391 | for my $occurance_aref ( @{$occurances_aref} ) { 392 | my ( $ra, $level ) = @{$occurance_aref}; 393 | say "$ra, $level"; 394 | } 395 | } 396 | 397 | my @symbols_ordered = sort { 398 | $symbol_first_seen{$a}[1] <=> $symbol_first_seen{$b}[1] # line 399 | or $symbol_first_seen{$a}[2] <=> $symbol_first_seen{$b}[2] # rowchar 400 | } keys %symbol_first_seen; 401 | 402 | # XXX Move the symbol itself into its own hash for easier sorting and unpacking. 403 | for my $sym (@symbols_ordered) { 404 | my ( $ra, $line, $rowchar ) = @{ $symbol_first_seen{$sym} }; 405 | my $Element = $addr_element{$ra}; 406 | my $statement = $Element->statement or die; # First parent Statement object lexically 'above' the Element. (or equal to, if Element is a Statement) 407 | say sprintf "%-15s\t%15s\t%2d\t%2d\t%s", $sym, $ra, $line, $rowchar, $statement->refaddr; 408 | } 409 | 410 | # XXX Everything using refaddr must be calculated and processed to the 411 | # point of being able to remove the refaddrs *before* doing any 412 | # modifications. Build a complete worklist. Undef all the hashes just to be 413 | # sure that they cannot be used by a future maintenance programmer; it 414 | # would be a huge source of difficult bugs! 415 | 416 | # Making two worklists; one for vars that just need a prefixed `my`, and one for 'my' statements that need to be added. 417 | # XXX Custom code to use the new ::Variable type of statements where available? 418 | 419 | 420 | __END__ 421 | -------------------------------------------------------------------------------- /lib/PPIx/Transform/Perl5_to_Perl6.pm: -------------------------------------------------------------------------------- 1 | package PPIx::Transform::Perl5_to_Perl6; 2 | use strict; 3 | use warnings; 4 | use Carp qw( carp croak ); 5 | use Params::Util qw( _INSTANCE _ARRAY _ARRAY0 _STRING ); 6 | use base 'PPI::Transform'; 7 | 8 | BEGIN { 9 | # Before 1.204_03, PPI::Transform silently threw away all params to new(). 10 | { 11 | no warnings 'numeric'; 12 | use PPI::Transform 1.204 (); 13 | } 14 | my $ver = $PPI::Transform::VERSION; 15 | if ( $ver eq '1.204_01' or $ver eq '1.204_02' ) { 16 | die "PPI::Transform version 1.204_03 required--this is only version $ver"; 17 | } 18 | } 19 | 20 | =head1 NAME 21 | 22 | PPIx::Transform::Perl5_to_Perl6 23 | 24 | A class to transform Perl 5 source code into equivalent Perl 6. 25 | 26 | =head1 VERSION 27 | 28 | Version 0.01 29 | 30 | =cut 31 | 32 | our $VERSION = '0.01'; 33 | 34 | =head1 SYNOPSIS 35 | 36 | use PPIx::Transform::Perl5_to_Perl6; 37 | 38 | my $transform = PPIx::Transform::Perl5_to_Perl6->new(); 39 | 40 | # Read from one file and write to another 41 | $transform->file( 'some_perl_5_module.pm' => 'some_perl_5_module.pm6' ); 42 | 43 | # Change a file in place 44 | $transform->file( 'some_perl_5_program.pl' ); 45 | 46 | my $code_in = '$x = $y . $z'; 47 | 48 | my $PPI_doc = PPI::Document->new( \$code_in ) or die; 49 | 50 | my $rc = $transform->apply($PPI_doc) or warn; 51 | 52 | my $code_out = $PPI_doc->serialize; 53 | # $code_out contains '$x = $y ~ $z' 54 | 55 | # Or, run this from the command line: 56 | # bin/p526 path/to/program.pl > program_and_warnings.p6 57 | 58 | =head1 DESCRIPTION 59 | 60 | This class implements a document transform that will translate Perl 5 source 61 | code into equivalent Perl 6. 62 | 63 | =cut 64 | 65 | =begin comments 66 | 67 | 2008-01-19 Bruce Gray 68 | Wrote program. 69 | 70 | This program will translate Perl 5 code into Perl 6. Mostly :) 71 | 72 | Currently handles: 73 | Operator changes 74 | Invariant sigils 75 | Casts 76 | Nums with trailing. 77 | KeywordNoSpace 78 | Bare hash keys 79 | map/grep comma 80 | mapish EXPR changes to {BLOCK} 81 | Warnings for user review of transforms 82 | 83 | =end comments 84 | 85 | =cut 86 | 87 | 88 | # Scaffolding to allow us to work as a PPI::Transform. 89 | my %optional_initializer = map { $_ => 1 } qw( WARNING_LOG ); 90 | sub new { 91 | my $self = shift->SUPER::new(@_); 92 | 93 | my $bad = join "\n\t", 94 | grep { !$optional_initializer{$_} } 95 | sort keys %{$self}; 96 | carp "Unexpected initializer keys are being ignored:\n$bad\n " if $bad; 97 | 98 | return $self; 99 | } 100 | 101 | sub document { 102 | croak 'Wrong number of arguments passed to method' if @_ != 2; 103 | my ( $self, $document ) = @_; 104 | croak 'Parameter 2 must be a PPI::Document!' if !_INSTANCE($document, 'PPI::Document'); 105 | 106 | my $change_count = $self->_convert_Perl5_PPI_to_Perl6_PPI($document); 107 | 108 | # XXX Work-around for a bug in PPI. 109 | # PPI::Transform documentation on apply() says: 110 | # Returns true if the transform was applied, 111 | # false if there is an error in the transform process, 112 | # or may die if there is a critical error in the apply handler. 113 | # but if the transform *is* applied without error, and no change happens 114 | # as a result, then apply() incorrectly returns undef. 115 | return 1 if defined $change_count and $change_count == 0; 116 | 117 | return $change_count; 118 | } 119 | 120 | 121 | # Converts the passed document in-place! 122 | # Returns number of changes, 0 if not changes, undef on error. 123 | sub _convert_Perl5_PPI_to_Perl6_PPI { 124 | croak 'Wrong number of arguments passed to method' if @_ != 2; 125 | my ( $self, $PPI_doc ) = @_; 126 | croak 'Parameter 2 must be a PPI::Document!' if !_INSTANCE($PPI_doc, 'PPI::Document'); 127 | 128 | $self->_fix_PPI_shift_equals_op_bug($PPI_doc); 129 | 130 | my $change_count = 0; 131 | $change_count += $self->_translate_all_ops($PPI_doc); 132 | $change_count += $self->_change_sigils($PPI_doc); 133 | $change_count += $self->_change_casts($PPI_doc); 134 | $change_count += $self->_change_trailing_fp($PPI_doc); 135 | $change_count += $self->_insert_space_after_keyword($PPI_doc); 136 | $change_count += $self->_clothe_the_bareword_hash_keys($PPI_doc); 137 | $change_count += $self->_add_a_comma_after_mapish_blocks($PPI_doc); 138 | $change_count += $self->_change_mapish_expr_to_block($PPI_doc); 139 | $change_count += $self->_change_foreach_my_lexvar_to_arrow($PPI_doc); 140 | $change_count += $self->_remove_obsolete_pragmas_and_shbang($PPI_doc); 141 | $change_count += $self->_optionally_change_qw_to_arrow_quotes($PPI_doc); 142 | $change_count += $self->_remove_parens_from_conditionals($PPI_doc); 143 | $change_count += $self->_move_sub_params_from_at_to_declaration($PPI_doc); 144 | 145 | return $change_count; 146 | } 147 | 148 | 149 | # PPI Bug: += is a single operator, but <<= is two operators, << and = . 150 | # <<= should be one operator. Same for >>= . 151 | sub _fix_PPI_shift_equals_op_bug { 152 | croak 'Wrong number of arguments passed to method' if @_ != 2; 153 | my ( $self, $PPI_doc ) = @_; 154 | 155 | my $ops_aref = $PPI_doc->find( 'PPI::Token::Operator' ) 156 | or return; 157 | 158 | my @ops_to_delete; 159 | for my $op ( @{$ops_aref} ) { 160 | my $content = $op->content; 161 | 162 | next unless $content eq '<<' 163 | or $content eq '>>'; 164 | 165 | my $sib = $op->next_sibling 166 | or next; 167 | 168 | next unless $sib->class eq 'PPI::Token::Operator' 169 | and $sib->content eq '='; 170 | 171 | $op->add_content('='); 172 | push @ops_to_delete, $sib; 173 | } 174 | 175 | $_->delete or warn for @ops_to_delete; 176 | 177 | return 1; 178 | } 179 | 180 | 181 | sub _get_all { 182 | croak 'Wrong number of arguments passed to sub' if @_ != 2; 183 | my ( $PPI_doc, $classname ) = @_; 184 | croak 'Parameter 1 must be a PPI::Document!' if !_INSTANCE($PPI_doc, 'PPI::Document'); 185 | croak 'Parameter 2 must be a classname!' if !_STRING($classname); 186 | 187 | $classname = "PPI::$classname" if $classname !~ m{^PPI::}; 188 | 189 | my $aref = $PPI_doc->find($classname) 190 | or return; 191 | 192 | return @{$aref}; 193 | } 194 | 195 | # Each entry is either a straight translation, 196 | # or a list of possible translations. 197 | my %ops_translation = ( 198 | '.' => '~', 199 | '.=' => '~=', 200 | '->' => '.', 201 | 202 | '=~' => '~~', 203 | '!~' => '!~~', 204 | 205 | # Ternary op 206 | '?' => '??', 207 | ':' => '!!', 208 | 209 | # Bitwise ops 210 | # http://perlcabal.org/syn/S03.html#Changes_to_Perl_5_operators 211 | # Bitwise operators get a data type prefix: +, ~, or ?. 212 | # For example, Perl 5's | becomes either +| or ~| or ?|, depending 213 | # on whether the operands are to be treated as numbers, strings, 214 | # or boolean values. 215 | # In Perl 5, the choice of whether to treat the operation as numeric or 216 | # string depended on the data; In Perl 6, it depends on the operator. 217 | # Note that Perl 6 has a short-circuiting xor operator, (^^). Since Perl 5 218 | # had no equivalent operator, nothing should translate to ^^, but we might 219 | # spot code that could advise the user to inspect for a ^^ opportunity. 220 | # Note that Perl 5 has separate ops for prefix versus infix xor 221 | # (~ versus ^). Perl 6 uses typed ^ for both prefix and infix. 222 | '|' => [ '+|', '~|', '?|' ], # bitwise or ( infix) 223 | '&' => [ '+&', '~&', '?&' ], # bitwise and ( infix) 224 | '^' => [ '+^', '~^', '?^' ], # bitwise xor ( infix) 225 | '~' => [ '+^', '~^', '?^' ], # bitwise not (prefix) 226 | 227 | '<<' => [ '+<', '~<' ], # bitwise shift left 228 | '>>' => [ '+>', '~>' ], # bitwise shift right 229 | '<<=' => [ '+<=', '~<=' ], # bitwise shift assign left 230 | '>>=' => [ '+>=', '~>=' ], # bitwise shift assign right 231 | ); 232 | 233 | # Returns number of changes, 0 if not changes, undef on error. 234 | sub _translate_all_ops { 235 | croak 'Wrong number of arguments passed to method' if @_ != 2; 236 | my ( $self, $PPI_doc ) = @_; 237 | 238 | my $change_count = 0; 239 | for my $op ( _get_all( $PPI_doc, 'Token::Operator' ) ) { 240 | my $p5_op = $op->content; 241 | 242 | my $p6_op = $ops_translation{$p5_op} 243 | or next; 244 | 245 | if ( _STRING($p6_op) ) { 246 | $op->set_content( $p6_op ); 247 | $change_count++; 248 | } 249 | elsif ( _ARRAY($p6_op) ) { 250 | my @p6_ops = @{$p6_op}; 251 | my $default_op = $p6_ops[0]; 252 | 253 | my $possible_ops = join ', ', map { "'$_'" } @p6_ops; 254 | $self->log_warn( 255 | $op, 256 | "op '$p5_op' was" 257 | . " changed to '$default_op', but could have been" 258 | . " any of ( $possible_ops ). Verify the context!\n" 259 | ); 260 | 261 | $op->set_content( $default_op ); 262 | $change_count++; 263 | } 264 | else { 265 | carp "Don't know how to handle xlate of op '$p5_op'" 266 | . " (is the entry in %ops_translation misconfigured?)"; 267 | } 268 | } 269 | 270 | return $change_count; 271 | } 272 | 273 | sub _change_sigils { 274 | croak 'Wrong number of arguments passed to method' if @_ != 2; 275 | my ( $self, $PPI_doc ) = @_; 276 | croak 'Parameter 2 must be a PPI::Document!' if !_INSTANCE($PPI_doc, 'PPI::Document'); 277 | 278 | # Easy, since methods raw_type and symbol_type already contain the 279 | # logic to look at subscripts to figure out the real type of the variable. 280 | # Handles $foo[5] -> @foo[5] (array element), 281 | # $foo{$key} -> %foo{$key} (hash element), 282 | # and @foo{'x','y'} -> %foo{'x','y'} (hash slice ). 283 | # 284 | # No change needed for @foo[1,5] (array slice ). 285 | 286 | my $count = 0; 287 | for my $sym ( _get_all( $PPI_doc, 'Token::Symbol' ) ) { 288 | if ( $sym->raw_type ne $sym->symbol_type ) { 289 | $sym->set_content( $sym->symbol() ); 290 | $count++; 291 | } 292 | } 293 | 294 | return $count; 295 | } 296 | 297 | sub _change_casts { 298 | croak 'Wrong number of arguments passed to method' if @_ != 2; 299 | my ( $self, $PPI_doc ) = @_; 300 | croak 'Parameter 2 must be a PPI::Document!' if !_INSTANCE($PPI_doc, 'PPI::Document'); 301 | 302 | # PPI mis-parses `% $foo`, so we cannot easily convert to the 303 | # better-written '%($foo)'. See: 304 | # bin/ppi_dump -e '%{$foo};' -e '%$foo;' -e '% $foo;' -e '% {$foo};' 305 | my $count = 0; 306 | for my $cast ( _get_all( $PPI_doc, 'Token::Cast' ) ) { 307 | my $s = $cast->next_sibling; 308 | if ( $s->isa('PPI::Token::Symbol') ) { 309 | # Don't do anything. %$foo is still a valid form in Perl 6. 310 | } 311 | elsif ( $s->isa('PPI::Token::Cast') and $cast eq "\\" ) { 312 | # Two casts in a row, like \% in \%{"$pack\:\:SUBS"} . 313 | # Skip for now. 314 | } 315 | elsif ( $s->isa('PPI::Structure::Block') ) { 316 | # %{...} becomes %(...). Same with @{...} and ${...}. 317 | if ( $s->start->content eq '{' and $s->finish->content eq '}' ) { 318 | $s->start->set_content('('); 319 | $s->finish->set_content(')'); 320 | $count++; 321 | } 322 | } 323 | elsif ( $s->isa('PPI::Structure::List') and $cast eq "\\" ) { 324 | # Don't do anything for now. 325 | # \( $x, $y ) is not the construct we are looking for. 326 | } 327 | else { 328 | $self->log_warn( 329 | $cast, 330 | 'XXX May have mis-parsed a Cast - sibling class', 331 | ' ', $s->class, 332 | ' ', $s->content, 333 | ); 334 | } 335 | } 336 | 337 | return $count; 338 | } 339 | 340 | sub _change_trailing_fp { 341 | croak 'Wrong number of arguments passed to method' if @_ != 2; 342 | my ( $self, $PPI_doc ) = @_; 343 | croak 'Parameter 2 must be a PPI::Document!' if !_INSTANCE($PPI_doc, 'PPI::Document'); 344 | 345 | # [S02] Implicit Topical Method Calls 346 | # ...you may no longer write a Num as C<42.> with just a trailing dot. 347 | my $count = 0; 348 | for my $fp ( _get_all( $PPI_doc, 'Token::Number::Float' ) ) { 349 | my $n = $fp->content; 350 | if ( $n =~ m{ \A ([_0-9]+) [.] \z }msx ) { # Could have underscore 351 | my $bare_num = $1; 352 | my $n0 = $n . '0'; 353 | $fp->set_content( $n0 ); 354 | $self->log_warn( 355 | $fp, 356 | "floating point number '$n' was changed to floating point number '$n0'. Consider changing it to integer '$bare_num'.", 357 | ); 358 | $count++; 359 | } 360 | } 361 | 362 | return $count; 363 | } 364 | 365 | sub _insert_space_after_keyword { 366 | croak 'Wrong number of arguments passed to method' if @_ != 2; 367 | my ( $self, $PPI_doc ) = @_; 368 | croak 'Parameter 2 must be a PPI::Document!' if !_INSTANCE($PPI_doc, 'PPI::Document'); 369 | 370 | # [S03] Minimal whitespace DWIMmery 371 | # Whitespace is in general required between any keyword and any opening 372 | # bracket that is I introducing a subscript or function arguments. 373 | # Any keyword followed directly by parentheses will be taken as a 374 | # function call instead. 375 | # if $a == 1 { say "yes" } # preferred syntax 376 | # if ($a == 1) { say "yes" } # P5-ish if construct 377 | # if($a,$b,$c) # if function call 378 | # 379 | # [S04] Statement parsing 380 | # Built-in statement-level keywords require whitespace between the 381 | # keyword and the first argument, as well as before any terminating loop. 382 | # In particular, a syntax error will be reported for C-isms such as these: 383 | # if(...) {...} 384 | # while(...) {...} 385 | # for(...) {...} 386 | 387 | my %wanted = ( 388 | if => { 'PPI::Structure::Condition' => 1, }, 389 | unless => { 'PPI::Structure::Condition' => 1, }, 390 | elsif => { 'PPI::Structure::Condition' => 1, }, 391 | while => { 'PPI::Structure::Condition' => 1, }, 392 | until => { 'PPI::Structure::Condition' => 1, }, 393 | given => { 'PPI::Structure::Given' => 1, }, 394 | when => { 'PPI::Structure::When' => 1, 395 | 'PPI::Structure::List' => 1, }, 396 | for => { 'PPI::Structure::List' => 1, 397 | 'PPI::Structure::For' => 1, }, 398 | foreach => { 'PPI::Structure::List' => 1, 399 | 'PPI::Structure::For' => 1, }, 400 | ); 401 | my $count = 0; 402 | for my $keyword ( _get_all( $PPI_doc, 'Token::Word' ) ) { 403 | my $expected_sib_types_href = $wanted{$keyword} 404 | or next; 405 | 406 | my $sib = $keyword->next_sibling 407 | or next; 408 | 409 | my $c = $sib->class 410 | or next; 411 | 412 | if ( $expected_sib_types_href->{$c} ) { 413 | my $space = PPI::Token::Whitespace->new(' '); 414 | $keyword->insert_after($space); 415 | $count++; 416 | } 417 | } 418 | 419 | return $count; 420 | } 421 | 422 | sub _only_schild { 423 | croak 'Wrong number of arguments passed to sub' if @_ != 2; 424 | my ( $element, $class ) = @_; 425 | 426 | my @ss_kids = $element->schildren; 427 | return unless @ss_kids == 1; 428 | my $child = $ss_kids[0]; 429 | return unless $child->isa($class); 430 | return $child; 431 | } 432 | 433 | sub _clothe_the_bareword_hash_keys { 434 | croak 'Wrong number of arguments passed to method' if @_ != 2; 435 | my ( $self, $PPI_doc ) = @_; 436 | croak 'Parameter 2 must be a PPI::Document!' if !_INSTANCE($PPI_doc, 'PPI::Document'); 437 | 438 | # Translate all the bareword hash keys into single-quote. 439 | my $count = 0; 440 | for my $subscript ( _get_all( $PPI_doc, 'Structure::Subscript' ) ) { 441 | 442 | # Must have this structure: 443 | # PPI::Token::Symbol '$z' 444 | # PPI::Structure::Subscript { ... } 445 | # PPI::Statement::Expression 446 | # PPI::Token::Word 'foo' 447 | 448 | next unless $subscript->sprevious_sibling->isa('PPI::Token::Symbol'); 449 | 450 | next unless substr( $subscript, 0, 1 ) eq '{' 451 | and substr( $subscript, -1, 1 ) eq '}'; 452 | 453 | my $expression = _only_schild($subscript, 'PPI::Statement::Expression') 454 | or next; 455 | 456 | my $word = _only_schild($expression, 'PPI::Token::Word') 457 | or next; 458 | 459 | my $quoted_word = "'" . $word->content . "'"; 460 | my $quoted_token = PPI::Token::Quote::Single->new($quoted_word); 461 | 462 | # Cannot use replace() because $quoted_token and $word differ in type. 463 | $word->insert_after($quoted_token) or warn; 464 | $word->delete or warn; 465 | 466 | $count++; 467 | } 468 | 469 | return $count; 470 | } 471 | 472 | sub _add_a_comma_after_mapish_blocks { 473 | croak 'Wrong number of arguments passed to method' if @_ != 2; 474 | my ( $self, $PPI_doc ) = @_; 475 | croak 'Parameter 2 must be a PPI::Document!' if !_INSTANCE($PPI_doc, 'PPI::Document'); 476 | 477 | my %wanted_words = map { $_ => 1 } qw( map grep ); 478 | 479 | # Add a comma after the block in `map BLOCK` or `grep BLOCK`. 480 | my $count = 0; 481 | for my $word ( _get_all( $PPI_doc, 'Token::Word' ) ) { 482 | # Must have this structure: 483 | # PPI::Token::Word 'map' (or 'grep') 484 | # PPI::Structure::Block { ... } 485 | 486 | next unless $wanted_words{ $word->content }; 487 | 488 | my $sib = $word->snext_sibling; 489 | next unless $sib->isa('PPI::Structure::Block'); 490 | 491 | my $comma = PPI::Token::Operator->new(','); 492 | $sib->insert_after($comma) or warn; 493 | 494 | $count++; 495 | } 496 | return $count; 497 | } 498 | 499 | sub _change_mapish_expr_to_block { 500 | croak 'Wrong number of arguments passed to method' if @_ != 2; 501 | my ( $self, $PPI_doc ) = @_; 502 | croak 'Parameter 2 must be a PPI::Document!' if !_INSTANCE($PPI_doc, 'PPI::Document'); 503 | 504 | my %wanted_words = map { $_ => 1 } qw( map grep ); 505 | 506 | # Change `map $_ * 5, @z` to `map { $_ * 5 }, @z` 507 | my $count = 0; 508 | for my $word ( _get_all( $PPI_doc, 'Token::Word' ) ) { 509 | # Must have this structure: 510 | # PPI::Token::Word 'map' 511 | # ... NOT PPI::Structure::Block 512 | # PPI::Token::Operator ',' 513 | # Changing to this new structure: 514 | # PPI::Token::Word 'map' 515 | # PPI::Structure::Block { ... } 516 | # PPI::Statement 517 | # ... from original version 518 | # PPI::Token::Operator ',' 519 | 520 | next unless $wanted_words{ $word->content }; 521 | 522 | my $next_ssib = $word->snext_sibling; 523 | next if $next_ssib->isa('PPI::Structure::Block'); 524 | 525 | # Can't use find() here because we need to search *forward* from $word, 526 | # not *down* within $word. 527 | 528 | my $last_sib = $next_ssib; # Can't be a comma, since `map ,` is invalid 529 | my @elements_to_move = $last_sib; 530 | while ( $last_sib = $last_sib->next_sibling ) { 531 | last if $last_sib->isa('PPI::Token::Operator') 532 | and $last_sib->content eq ','; 533 | push @elements_to_move, $last_sib; 534 | } 535 | next unless $last_sib; 536 | 537 | my $new_block = _make_a_block(); 538 | $next_ssib->insert_before($new_block) or die; 539 | 540 | my $needs_leading_ws = not $elements_to_move[ 0]->isa('PPI::Token::Whitespace'); 541 | my $needs_trailing_ws = not $elements_to_move[-1]->isa('PPI::Token::Whitespace'); 542 | my $s = PPI::Statement->new(); 543 | $s->add_element( PPI::Token::Whitespace->new(' ') ) if $needs_leading_ws; 544 | $_->remove or die for @elements_to_move; 545 | $s->add_element($_) or die for @elements_to_move; 546 | $s->add_element( PPI::Token::Whitespace->new(' ') ) if $needs_trailing_ws; 547 | 548 | $new_block->add_element($s) 549 | or die; 550 | 551 | $count++; 552 | } 553 | return $count; 554 | } 555 | 556 | sub _change_foreach_my_lexvar_to_arrow { 557 | croak 'Wrong number of arguments passed to method' if @_ != 2; 558 | my ( $self, $PPI_doc ) = @_; 559 | croak 'Parameter 2 must be a PPI::Document!' if !_INSTANCE($PPI_doc, 'PPI::Document'); 560 | 561 | my %wanted_words = map { $_ => 1 } qw( for foreach ); 562 | 563 | # XXX Need to log info on ro vs rw! 564 | # XXX Need to force foreach to for! 565 | 566 | # Also, named lex vars in `foreach` loops are read-write in Perl 5, but read-only by default in Perl 6. 567 | # Either make my generated code be rw, and log a message to go back and replace with ro as an optimization 568 | # or generate as ro, and log a message that it might not work! 569 | 570 | # XXX Add code to trim the whitespace when parens are removed around something that already has whitespace. 571 | 572 | # Change `for my $i (@a)` to `for @a -> $i` 573 | my $count = 0; 574 | for my $statement ( _get_all( $PPI_doc, 'Statement::Compound' ) ) { 575 | # Must have this structure: 576 | # PPI::Statement::Compound 577 | # PPI::Token::Word 'for' (or foreach) 578 | # PPI::Token::Word 'my' # Optional - and XXX need to warn when encountered? 579 | # PPI::Token::Symbol '$i' # Optional - use $_ when missing 580 | # PPI::Structure::List ( ... ) 581 | # PPI::Statement 582 | # ... 583 | # Changing to this new structure: 584 | # PPI::Statement::Compound 585 | # PPI::Token::Word 'for' (or foreach) 586 | # PPI::Structure::List ... 587 | # PPI::Statement 588 | # ... 589 | # PPI::Token::Operator '->' 590 | # PPI::Token::Symbol '$i' 591 | 592 | my @sc = $statement->schildren; 593 | next unless @sc and $sc[0] and $sc[0]->class() eq 'PPI::Token::Word' 594 | and $wanted_words{ $sc[0]->content }; 595 | next unless @sc >= 4 596 | and $sc[1]->class() eq 'PPI::Token::Word' and $sc[1]->content eq 'my' 597 | and $sc[2]->class() eq 'PPI::Token::Symbol' 598 | and $sc[3]->class() eq 'PPI::Structure::List' 599 | or @sc >= 3 600 | and $sc[1]->class() eq 'PPI::Token::Symbol' 601 | and $sc[2]->class() eq 'PPI::Structure::List' 602 | or @sc >= 2 603 | and $sc[1]->class() eq 'PPI::Structure::List'; 604 | 605 | my @c = $statement->children; 606 | 607 | _eat_optional_whitespace(\@c); # XXX Can this really occur here? 608 | 609 | # Change keyword "foreach" to "for" if needed. 610 | # Keyword is not needed in @c after this point. 611 | { 612 | my $k = shift @c or die; 613 | die unless $k->class eq 'PPI::Token::Word' and $wanted_words{ $k->content }; 614 | 615 | # $k->replace( PPI::Token::Word->new('for') ) if $k->content eq 'foreach'; # XXX The ->replace method has not yet been implemented in PPI 1.215. 616 | if ( $k->content eq 'foreach' ) { 617 | my $new_k = PPI::Token::Word->new('for') or die; 618 | $k->insert_after($new_k) or die; 619 | $k->delete() or die; 620 | } 621 | } 622 | 623 | _eat_optional_whitespace(\@c); 624 | 625 | # Peek at next element. 626 | # Remove `my` if it was there, and register whether it was there, for later use. 627 | my $had_my; 628 | { 629 | die if not @c; 630 | $had_my = ( $c[0]->class() eq 'PPI::Token::Word' 631 | and $c[0]->content eq 'my' ); 632 | if ($had_my) { 633 | my $keyword_my = shift @c or die; 634 | $keyword_my->delete or die; 635 | } 636 | } 637 | 638 | _eat_optional_whitespace(\@c); 639 | 640 | # Peek at next element. 641 | # Remove $VAR if it was there, and register $VAR, or $_ if absent. 642 | my $var; 643 | { 644 | die if not @c; 645 | if ( $c[0]->class() eq 'PPI::Token::Symbol' ) { 646 | $var = shift @c or die; 647 | $var->remove or die; 648 | } 649 | else { 650 | $var = PPI::Token::Magic->new( '$_' ); 651 | } 652 | } 653 | # die unless @c and $c[0]->class eq 'PPI::Token::Symbol'; 654 | # my $var = shift(@c)->remove or die; 655 | 656 | _eat_optional_whitespace(\@c); 657 | 658 | # Remove parens from (LIST) 659 | die unless @c and $c[0]->class eq 'PPI::Structure::List'; 660 | my $sl = $c[0]; 661 | die unless $sl->start ->content eq '(' 662 | and $sl->finish->content eq ')'; 663 | $sl->start ->set_content(''); 664 | $sl->finish->set_content(''); 665 | 666 | $sl->insert_before( PPI::Token::Whitespace->new(' ') ); 667 | $sl->insert_after($_) for reverse ( 668 | PPI::Token::Whitespace->new(' '), 669 | PPI::Token::Operator ->new('<->'), # XXX Fixup with log message. In fact, make it an option. 670 | PPI::Token::Whitespace->new(' '), 671 | $var, 672 | ); 673 | 674 | $count++; 675 | } 676 | return $count; 677 | } 678 | 679 | sub _remove_obsolete_pragmas_and_shbang { 680 | croak 'Wrong number of arguments passed to method' if @_ != 2; 681 | my ( $self, $PPI_doc ) = @_; 682 | croak 'Parameter 2 must be a PPI::Document!' if !_INSTANCE($PPI_doc, 'PPI::Document'); 683 | 684 | # PPI::Token::Comment '#!/usr/bin/env perl\n' 685 | # PPI::Statement::Include 686 | # PPI::Token::Word 'use' 687 | # PPI::Token::Word 'strict' 688 | # PPI::Token::Structure ';' 689 | # PPI::Statement::Include 690 | # PPI::Token::Word 'use' 691 | # PPI::Token::Word 'warnings' 692 | # PPI::Token::Structure ';' 693 | 694 | my $count = 0; 695 | 696 | # XXX Improve handling of whitespace removal. 697 | # XXX Add "use v6;" ??? 698 | 699 | # XXX Restructure to look at siblings instead? 700 | # XXX remove_child vs delete??? 701 | my $first_non_ws_element = $PPI_doc->find_first( sub { not $_[1]->isa('PPI::Token::Whitespace') } ) 702 | or return; 703 | if ( $first_non_ws_element->isa('PPI::Token::Comment') ) { 704 | if ( $first_non_ws_element =~ m{ \A \s* [#]!/usr/bin/ (?:perl|env\s+perl) \s* \z }msx ) { 705 | my $ws; 706 | if ( $ws = $first_non_ws_element->next_sibling and $ws->isa('PPI::Token::Whitespace') ) { 707 | $PPI_doc->remove_child($ws); 708 | $count++; 709 | } 710 | $PPI_doc->remove_child($first_non_ws_element); 711 | $count++; 712 | 713 | my $e1; 714 | while ( $e1 = $PPI_doc->find_first(sub {1}) and $e1->isa('PPI::Token::Whitespace') ) { 715 | $PPI_doc->remove_child($e1); 716 | $count++; 717 | } 718 | } 719 | 720 | } 721 | 722 | for my $include ( _get_all( $PPI_doc, 'Statement::Include' ) ) { 723 | # XXX Add code to issue info about the removal 724 | # XXX Add code to give more info on specific warnings and on partial strict. 725 | if ( $include =~ m{ \A \s* use \s+ (?:strict|warnings) \s* [;]? \s* \z }msx ) { 726 | my $ws; 727 | if ( $ws = $include->next_sibling and $ws->isa('PPI::Token::Whitespace') ) { 728 | $PPI_doc->remove_child($ws); 729 | $count++; 730 | } 731 | $PPI_doc->remove_child($include); 732 | $count++; 733 | } 734 | } 735 | 736 | return $count; 737 | } 738 | 739 | # XXX Relies on the fact that PPI parses the whitespace inside the parens as being outside of them. 740 | sub _remove_parens_from_conditionals { 741 | croak 'Wrong number of arguments passed to method' if @_ != 2; 742 | my ( $self, $PPI_doc ) = @_; 743 | croak 'Parameter 2 must be a PPI::Document!' if !_INSTANCE($PPI_doc, 'PPI::Document'); 744 | # PPI::Statement::Compound 745 | # PPI::Token::Word 'if' 746 | # PPI::Structure::Condition ( ... ) 747 | # PPI::Statement::Expression $x eq $y 748 | # PPI::Structure::Block { ... } say "Hi" 749 | # PPI::Token::Word 'elsif' 750 | # PPI::Structure::Condition ( ... ) 751 | # PPI::Statement::Expression $x gt $y 752 | # PPI::Structure::Block { ... } say "Ho" 753 | # PPI::Token::Word 'elsif' 754 | # PPI::Structure::Condition ( ... ) 755 | # PPI::Statement::Expression $x le $y 756 | # PPI::Structure::Block { ... } say "He" 757 | 758 | my %wanted = map { $_ => 1 } qw( if elsif unless while until ); 759 | 760 | my $count = 0; 761 | for my $compound ( _get_all( $PPI_doc, 'Statement::Compound' ) ) { 762 | my @sc = $compound->schildren; 763 | 764 | for my $i ( 0+1 .. $#sc-1 ) { 765 | my ( $prior, $this, $next ) = @sc[ $i-1, $i, $i+1 ]; 766 | 767 | next if $this->class ne 'PPI::Structure::Condition'; 768 | 769 | warn if $prior->class ne 'PPI::Token::Word' 770 | or !$wanted{$prior->content}; 771 | 772 | warn if $next->class ne 'PPI::Structure::Block'; 773 | 774 | warn if $this->start ->content ne '(' 775 | or $this->finish->content ne ')'; 776 | 777 | # Remove the parens 778 | $this->start ->set_content(''); 779 | $this->finish->set_content(''); 780 | $count++; 781 | 782 | # Fix the whitespace 783 | my $express; 784 | if ( $express = _only_schild( $this, 'PPI::Statement::Expression' ) 785 | or $express = _only_schild( $this, 'PPI::Statement' ) 786 | ) { 787 | my $sib; 788 | while ( $sib = $express->next_sibling and $sib->class eq 'PPI::Token::Whitespace' ) { 789 | $sib->delete or warn; 790 | } 791 | while ( $sib = $express->previous_sibling and $sib->class eq 'PPI::Token::Whitespace' ) { 792 | $sib->delete or warn; 793 | } 794 | } 795 | else { 796 | warn 'XXX Unexpected PPI below condition'; 797 | } 798 | 799 | # If `while ($a){` , with no space before the brace, insert space 800 | if ( $next->previous_sibling == $this ) { 801 | my $space = PPI::Token::Whitespace->new(' '); 802 | $this->insert_after($space); 803 | } 804 | } 805 | } 806 | 807 | return $count; 808 | } 809 | 810 | # XXX First pass at this optional refactoring. 811 | # XXX Needs to handle lots more variations, and recent Perl 5 sub-signatures. 812 | # XXX Currently fails to inform user about `my ( $first, @rest ) = @_`. 813 | # XXX Currently fails to process `my @args = @_`. 814 | sub _move_sub_params_from_at_to_declaration { 815 | croak 'Wrong number of arguments passed to method' if @_ != 2; 816 | my ( $self, $PPI_doc ) = @_; 817 | croak 'Parameter 2 must be a PPI::Document!' if !_INSTANCE($PPI_doc, 'PPI::Document'); 818 | # PPI::Statement::Sub 819 | # PPI::Token::Word 'sub' 820 | # PPI::Token::Word 'abc' 821 | # PPI::Structure::Block { ... } 822 | # PPI::Statement::Variable 823 | # PPI::Token::Word 'my' 824 | # PPI::Structure::List ( ... ) 825 | # PPI::Statement::Expression 826 | # PPI::Token::Symbol '$a' 827 | # PPI::Token::Operator ',' 828 | # PPI::Token::Symbol '$b' 829 | # PPI::Token::Operator ',' 830 | # PPI::Token::Symbol '$c' 831 | # PPI::Token::Operator '=' 832 | # PPI::Token::Magic '@_' 833 | # PPI::Token::Structure ';' 834 | 835 | my $count = 0; 836 | for my $sub ( _get_all( $PPI_doc, 'Statement::Sub' ) ) { 837 | my ( $sub_word, $sub_name, $block, @junk1 ) = $sub->schildren; 838 | 839 | warn if $sub_word->class ne 'PPI::Token::Word' 840 | or $sub_word->content ne 'sub'; 841 | warn if $sub_name->class ne 'PPI::Token::Word'; 842 | warn if $block ->class ne 'PPI::Structure::Block'; 843 | 844 | my ($sv, @junk2) = $block->schildren; 845 | if ( $sv->class ne 'PPI::Statement::Variable' ) { 846 | $self->log_warn( 847 | $sv, 848 | "sub not given a param definition, " 849 | . "because the first statement within the sub was not `my`.\n" 850 | ); 851 | next; 852 | } 853 | 854 | my ( $my_word, $list, $op, $underscore, $semi, @rest ) = $sv->schildren; 855 | warn if $my_word ->class ne 'PPI::Token::Word' 856 | or $my_word ->content ne 'my'; 857 | if ( 858 | $list ->class ne 'PPI::Structure::List' 859 | or $op ->class ne 'PPI::Token::Operator' 860 | or $op ->content ne '=' 861 | or $underscore->class ne 'PPI::Token::Magic' 862 | or $underscore->content ne '@_' 863 | or $semi ->class ne 'PPI::Token::Structure' 864 | or $semi ->content ne ';' 865 | ) { 866 | $self->log_warn( 867 | $sv, 868 | "sub not given a param definition, " 869 | . "because the first statement within the sub was `my`, " 870 | . "but did not have a structure we recognize (yet).\n" 871 | ); 872 | next; 873 | } 874 | 875 | my $space = PPI::Token::Whitespace->new(' '); 876 | $sub_name->insert_after($space); 877 | $list->remove; 878 | # XXX No good way to insert this so that future passes recognize it, unless we add custom types to PPI. 879 | $space->insert_after($list); 880 | my $sib; 881 | while ( $sib = $sv->next_sibling and $sib->class eq 'PPI::Token::Whitespace' ) { 882 | $sib->delete or warn; 883 | } 884 | $sv->delete; 885 | $count++; 886 | } 887 | } 888 | 889 | sub _optionally_change_qw_to_arrow_quotes { 890 | croak 'Wrong number of arguments passed to method' if @_ != 2; 891 | my ( $self, $PPI_doc ) = @_; 892 | croak 'Parameter 2 must be a PPI::Document!' if !_INSTANCE($PPI_doc, 'PPI::Document'); 893 | 894 | # PPI::Statement::Variable 895 | # PPI::Token::Word 'my' 896 | # PPI::Token::Symbol '@aaa' 897 | # PPI::Token::Operator '=' 898 | # PPI::Token::QuoteLike::Words 'qw( a b c d e f g )' 899 | # PPI::Token::Structure ';' 900 | 901 | my $count = 0; 902 | 903 | # qw is valid with any paired delimiters, *except* parens, which would make it a sub call. 904 | for my $qw ( _get_all( $PPI_doc, 'Token::QuoteLike::Words' ) ) { 905 | if ( $qw =~ m{ \A qw\(( .+ )\) \z }msx ) { 906 | $qw->set_content( "<$1>" ); 907 | $count++; 908 | } 909 | } 910 | 911 | return $count; 912 | } 913 | 914 | sub _make_a_block { 915 | croak 'Wrong number of arguments passed to method' if @_; 916 | 917 | # XXX Flaw in PPI: Cannot simply create PPI::Structure::* with ->new(). 918 | # See https://rt.cpan.org/Public/Bug/Display.html?id=31564 919 | my $new_block = PPI::Structure::Block->new( 920 | PPI::Token::Structure->new('{'), 921 | ) or die; 922 | $new_block->{finish} = PPI::Token::Structure->new('}'); 923 | 924 | return $new_block; 925 | } 926 | 927 | sub _eat_optional_whitespace { 928 | my ($elements_aref) = @_; 929 | return unless @{$elements_aref} 930 | and $elements_aref->[0]->class eq 'PPI::Token::Whitespace'; 931 | $elements_aref->[0]->delete or die; 932 | shift @{$elements_aref}; 933 | return; 934 | } 935 | 936 | sub log_warn { 937 | my ( $self, $loc, @message_parts ) = @_; 938 | my $message = join '', @message_parts; 939 | 940 | # $loc indicates the location where the warning or error occurred. 941 | # It could be an object that provides a location method, 942 | # or a hand-constructed location aref, or undef. 943 | my @location 944 | = !$loc ? () 945 | : _ARRAY($loc) ? @{ $loc } 946 | : _INSTANCE($loc, 'PPI::Element') ? @{ $loc->location() } 947 | : croak("Unknown type passed as location object: ".ref($loc)) 948 | ; 949 | 950 | if (@location) { 951 | # Before PPI-1.204_05, ->location() only returned 3 elements. 952 | my ( $line, $rowchar, $col, $logical_line, $logical_file_name ) = @location; 953 | 954 | my $pos = $rowchar eq $col ? $col : "$rowchar/$col"; 955 | 956 | $message = "At line $line, position $pos, " . $message; 957 | } 958 | 959 | my $log_aref = $self->{WARNING_LOG}; 960 | if ( _ARRAY0($log_aref) ) { 961 | push @{$log_aref}, $message; 962 | } 963 | else { 964 | carp $message; 965 | } 966 | 967 | return; 968 | } 969 | 970 | 1; 971 | 972 | =head1 TODO 973 | 974 | Move the rest of the original code slush into place. 975 | 976 | For each Planned/TODO item in the README file, add code to either implement the change or to warn about manual changes needed. 977 | 978 | Need to add an configuration option on how to translate `for` ( -> vs <-> ). 979 | Need to handle adding a informational note when translating for statements into <->, that it may be able to be shortened to -> . 980 | Need to handle adding a informational note when translating for statements into ->, that it may be need to be lengthened to <-> . 981 | 982 | =head1 SUPPORT 983 | 984 | Please submit a ticket to: 985 | https://github.com/Util/Blue_Tiger/issues 986 | 987 | Emails sent directly to the original author might be answered, but the discussion could not then be found and read by others. 988 | 989 | =head1 AUTHOR 990 | 991 | Bruce Gray Ebgray@cpan.orgE 992 | 993 | =head1 COPYRIGHT 994 | 995 | Copyright 2010-2011 Bruce Gray. 996 | 997 | This program is free software; you can redistribute it and/or modify it under 998 | the terms of version 2.0 of the Artistic License. 999 | 1000 | The full text of the license can be found in the LICENSE file included with 1001 | this module. 1002 | 1003 | =cut 1004 | --------------------------------------------------------------------------------