├── _hg.filemap ├── manage.sh ├── regexgen.test.in ├── README.md ├── regexgen.test.out └── regexgen.cpp /_hg.filemap: -------------------------------------------------------------------------------- 1 | include programs/data/regexgen 2 | rename programs/data/regexgen . -------------------------------------------------------------------------------- /manage.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | source $DIR/../../../manageUtils.sh 4 | 5 | githubProject regexgen 6 | 7 | BASE=$HGROOT/programs/data/regexgen 8 | 9 | case "$1" in 10 | mirror) 11 | syncHg 12 | ;; 13 | compile) 14 | g++ regexgen.cpp -o regexgen -I/usr/include/qt4/QtCore -I/usr/include/qt4/ -lQtCore 15 | ;; 16 | test) 17 | ./regexgen < regexgen.test.in > /tmp/out 18 | diff /tmp/out regexgen.test.out 19 | ;; 20 | esac 21 | -------------------------------------------------------------------------------- /regexgen.test.in: -------------------------------------------------------------------------------- 1 | 1 2 | abc 3 | 2 4 | [abc] 5 | 3 6 | a(bcd)e 7 | 4 8 | a(bcd)?e 9 | 5 10 | a(bc(de)?gh)?i 11 | 6 12 | [a-c]+ 13 | 7 14 | [01]* 15 | 8 16 | a(bc?)?d 17 | 9 18 | a{2,3} 19 | 10 20 | (x(abc?)?){3} 21 | 11 22 | (?i)muh 23 | 12 24 | x|s(a|b)t|y 25 | 13 26 | (a|b){3} 27 | 14 28 | s(a|b)?t 29 | 15 30 | (^|)(mi|moo)($|) 31 | 16 32 | \w\d 33 | 17 34 | ([abc]+)=\1 35 | 18 36 | ([abc])+=\1 37 | 19 38 | (a(b?))\1\2 39 | 20 40 | (a|b|c)\1 41 | 21 42 | (a|x(c|d)=\2y|e) 43 | 22 44 | ^a\$b\^c$ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Regexgen 2 | ========== 3 | 4 | Regexgen enumerates all strings matching a given input regular expressions (up to a finite limit). 5 | 6 | Installation/Usage 7 | ------- 8 | 9 | You can compile the program with gcc and Qt. On a standard Linux KDE system this is a single call to: 10 | 11 | g++ regexgen.cpp -o regexgen -I/usr/include/qt4/QtCore -I/usr/include/qt4/ -lQtCore 12 | 13 | My homepage also provides a [linux 64 binary](http://www.benibela.de/tools_en.html#regexgen). 14 | 15 | --- 16 | 17 | The program then reads an arbitrary input regex from stdin, and prints all possible matches. 18 | 19 | In case there are infinite many matches, you can set, when it should abort, with the command line arguments. (e.g. to treat `x+` as `x{1,5}`) 20 | 21 | Use --help to print a list of allowed arguments. 22 | 23 | Examples: 24 | ------- 25 | 26 | Input: 27 | 28 | [ab]{2} 29 | 30 | Output: 31 | 32 | aa 33 | ba 34 | ab 35 | bb 36 | 37 | Input (famous regex, matching all non-prime numbers. The abort criteria is given as command line argument.): 38 | 39 | ^1?$|^(11+?)\1+$ 40 | 41 | Output: 42 | 43 | 1 44 | 1111 45 | 111111 46 | 11111111 47 | 1111111111 48 | 111111111111 49 | 111111 50 | 111111111 51 | 111111111111 52 | 111111111111111 53 | 111111111111111111 54 | 11111111 55 | 111111111111 56 | 1111111111111111 57 | 11111111111111111111 58 | 111111111111111111111111 59 | 1111111111 60 | 111111111111111 61 | 11111111111111111111 62 | 1111111111111111111111111 63 | 111111111111111111111111111111 64 | 111111111111 65 | 111111111111111111 66 | 111111111111111111111111 67 | 111111111111111111111111111111 68 | 111111111111111111111111111111111111 69 | 70 | 71 | Details 72 | ------ 73 | 74 | It works as follow: 75 | 76 | 1. All ? {} + * | () operators are expanded (to a maximal limit), so that only character classes and backreferences remain. 77 | 78 | e.g. `[a-c]+|t*|([x-z]){2}foo\1|(a|b)(t|u)` becomes `[a-c]|[a-c][a-c]|[a-c][a-c][a-c]|[a-c][a-c][a-c][a-c]||t|tt|tt|ttt|ttt|([x-z][x-z])foo\1|at|au|bt|bu` 79 | 80 | (the | in latter expression are just notation, the program keeps each alternative subregex in a list) 81 | 82 | 2. Backreferences to multiple characters are replaced by backreferences to single characters. 83 | 84 | e.g. the expression above becomes `[a-c]|[a-c][a-c]|[a-c][a-c][a-c]|[a-c][a-c][a-c][a-c]||t|tt|tt|ttt|ttt|([x-z])([x-z])foo\1\2|at|au|bt|bu` 85 | 86 | Now each alternative subregex matches a fixed length string. 87 | 88 | 3. For each of the alternatives, all combinations of picking characters from the classes are printed: 89 | 90 | e.g. the expression above becomes `a|b|c|aa|ba|..|cc|aaa|baa|...|ccc|aaaa|...|cccc||t|tt|tt|ttt|ttt|xxfooxx|yxfooyx|...|zzfoozz|at|au|bt|bu` 91 | 92 | 93 | -------------------------------------------------------------------------------- /regexgen.test.out: -------------------------------------------------------------------------------- 1 | 1 2 | abc 3 | 2 4 | a 5 | b 6 | c 7 | 3 8 | abcde 9 | 4 10 | ae 11 | abcde 12 | 5 13 | ai 14 | abcghi 15 | abcdeghi 16 | 6 17 | a 18 | b 19 | c 20 | aa 21 | ba 22 | ca 23 | ab 24 | bb 25 | cb 26 | ac 27 | bc 28 | cc 29 | aaa 30 | baa 31 | caa 32 | aba 33 | bba 34 | cba 35 | aca 36 | bca 37 | cca 38 | aab 39 | bab 40 | cab 41 | abb 42 | bbb 43 | cbb 44 | acb 45 | bcb 46 | ccb 47 | aac 48 | bac 49 | cac 50 | abc 51 | bbc 52 | cbc 53 | acc 54 | bcc 55 | ccc 56 | aaaa 57 | baaa 58 | caaa 59 | abaa 60 | bbaa 61 | cbaa 62 | acaa 63 | bcaa 64 | ccaa 65 | aaba 66 | baba 67 | caba 68 | abba 69 | bbba 70 | cbba 71 | acba 72 | bcba 73 | ccba 74 | aaca 75 | baca 76 | caca 77 | abca 78 | bbca 79 | cbca 80 | acca 81 | bcca 82 | ccca 83 | aaab 84 | baab 85 | caab 86 | abab 87 | bbab 88 | cbab 89 | acab 90 | bcab 91 | ccab 92 | aabb 93 | babb 94 | cabb 95 | abbb 96 | bbbb 97 | cbbb 98 | acbb 99 | bcbb 100 | ccbb 101 | aacb 102 | bacb 103 | cacb 104 | abcb 105 | bbcb 106 | cbcb 107 | accb 108 | bccb 109 | cccb 110 | aaac 111 | baac 112 | caac 113 | abac 114 | bbac 115 | cbac 116 | acac 117 | bcac 118 | ccac 119 | aabc 120 | babc 121 | cabc 122 | abbc 123 | bbbc 124 | cbbc 125 | acbc 126 | bcbc 127 | ccbc 128 | aacc 129 | bacc 130 | cacc 131 | abcc 132 | bbcc 133 | cbcc 134 | accc 135 | bccc 136 | cccc 137 | aaaaa 138 | baaaa 139 | caaaa 140 | abaaa 141 | bbaaa 142 | cbaaa 143 | acaaa 144 | bcaaa 145 | ccaaa 146 | aabaa 147 | babaa 148 | cabaa 149 | abbaa 150 | bbbaa 151 | cbbaa 152 | acbaa 153 | bcbaa 154 | ccbaa 155 | aacaa 156 | bacaa 157 | cacaa 158 | abcaa 159 | bbcaa 160 | cbcaa 161 | accaa 162 | bccaa 163 | cccaa 164 | aaaba 165 | baaba 166 | caaba 167 | ababa 168 | bbaba 169 | cbaba 170 | acaba 171 | bcaba 172 | ccaba 173 | aabba 174 | babba 175 | cabba 176 | abbba 177 | bbbba 178 | cbbba 179 | acbba 180 | bcbba 181 | ccbba 182 | aacba 183 | bacba 184 | cacba 185 | abcba 186 | bbcba 187 | cbcba 188 | accba 189 | bccba 190 | cccba 191 | aaaca 192 | baaca 193 | caaca 194 | abaca 195 | bbaca 196 | cbaca 197 | acaca 198 | bcaca 199 | ccaca 200 | aabca 201 | babca 202 | cabca 203 | abbca 204 | bbbca 205 | cbbca 206 | acbca 207 | bcbca 208 | ccbca 209 | aacca 210 | bacca 211 | cacca 212 | abcca 213 | bbcca 214 | cbcca 215 | accca 216 | bccca 217 | cccca 218 | aaaab 219 | baaab 220 | caaab 221 | abaab 222 | bbaab 223 | cbaab 224 | acaab 225 | bcaab 226 | ccaab 227 | aabab 228 | babab 229 | cabab 230 | abbab 231 | bbbab 232 | cbbab 233 | acbab 234 | bcbab 235 | ccbab 236 | aacab 237 | bacab 238 | cacab 239 | abcab 240 | bbcab 241 | cbcab 242 | accab 243 | bccab 244 | cccab 245 | aaabb 246 | baabb 247 | caabb 248 | ababb 249 | bbabb 250 | cbabb 251 | acabb 252 | bcabb 253 | ccabb 254 | aabbb 255 | babbb 256 | cabbb 257 | abbbb 258 | bbbbb 259 | cbbbb 260 | acbbb 261 | bcbbb 262 | ccbbb 263 | aacbb 264 | bacbb 265 | cacbb 266 | abcbb 267 | bbcbb 268 | cbcbb 269 | accbb 270 | bccbb 271 | cccbb 272 | aaacb 273 | baacb 274 | caacb 275 | abacb 276 | bbacb 277 | cbacb 278 | acacb 279 | bcacb 280 | ccacb 281 | aabcb 282 | babcb 283 | cabcb 284 | abbcb 285 | bbbcb 286 | cbbcb 287 | acbcb 288 | bcbcb 289 | ccbcb 290 | aaccb 291 | baccb 292 | caccb 293 | abccb 294 | bbccb 295 | cbccb 296 | acccb 297 | bcccb 298 | ccccb 299 | aaaac 300 | baaac 301 | caaac 302 | abaac 303 | bbaac 304 | cbaac 305 | acaac 306 | bcaac 307 | ccaac 308 | aabac 309 | babac 310 | cabac 311 | abbac 312 | bbbac 313 | cbbac 314 | acbac 315 | bcbac 316 | ccbac 317 | aacac 318 | bacac 319 | cacac 320 | abcac 321 | bbcac 322 | cbcac 323 | accac 324 | bccac 325 | cccac 326 | aaabc 327 | baabc 328 | caabc 329 | ababc 330 | bbabc 331 | cbabc 332 | acabc 333 | bcabc 334 | ccabc 335 | aabbc 336 | babbc 337 | cabbc 338 | abbbc 339 | bbbbc 340 | cbbbc 341 | acbbc 342 | bcbbc 343 | ccbbc 344 | aacbc 345 | bacbc 346 | cacbc 347 | abcbc 348 | bbcbc 349 | cbcbc 350 | accbc 351 | bccbc 352 | cccbc 353 | aaacc 354 | baacc 355 | caacc 356 | abacc 357 | bbacc 358 | cbacc 359 | acacc 360 | bcacc 361 | ccacc 362 | aabcc 363 | babcc 364 | cabcc 365 | abbcc 366 | bbbcc 367 | cbbcc 368 | acbcc 369 | bcbcc 370 | ccbcc 371 | aaccc 372 | baccc 373 | caccc 374 | abccc 375 | bbccc 376 | cbccc 377 | acccc 378 | bcccc 379 | ccccc 380 | 7 381 | 382 | 0 383 | 1 384 | 00 385 | 10 386 | 01 387 | 11 388 | 000 389 | 100 390 | 010 391 | 110 392 | 001 393 | 101 394 | 011 395 | 111 396 | 0000 397 | 1000 398 | 0100 399 | 1100 400 | 0010 401 | 1010 402 | 0110 403 | 1110 404 | 0001 405 | 1001 406 | 0101 407 | 1101 408 | 0011 409 | 1011 410 | 0111 411 | 1111 412 | 00000 413 | 10000 414 | 01000 415 | 11000 416 | 00100 417 | 10100 418 | 01100 419 | 11100 420 | 00010 421 | 10010 422 | 01010 423 | 11010 424 | 00110 425 | 10110 426 | 01110 427 | 11110 428 | 00001 429 | 10001 430 | 01001 431 | 11001 432 | 00101 433 | 10101 434 | 01101 435 | 11101 436 | 00011 437 | 10011 438 | 01011 439 | 11011 440 | 00111 441 | 10111 442 | 01111 443 | 11111 444 | 8 445 | ad 446 | abd 447 | abcd 448 | 9 449 | aa 450 | aaa 451 | 10 452 | xxx 453 | xxxab 454 | xxxabc 455 | xxabx 456 | xxabxab 457 | xxabxabc 458 | xxabcx 459 | xxabcxab 460 | xxabcxabc 461 | xabxx 462 | xabxxab 463 | xabxxabc 464 | xabxabx 465 | xabxabxab 466 | xabxabxabc 467 | xabxabcx 468 | xabxabcxab 469 | xabxabcxabc 470 | xabcxx 471 | xabcxxab 472 | xabcxxabc 473 | xabcxabx 474 | xabcxabxab 475 | xabcxabxabc 476 | xabcxabcx 477 | xabcxabcxab 478 | xabcxabcxabc 479 | 11 480 | muh 481 | Muh 482 | mUh 483 | MUh 484 | muH 485 | MuH 486 | mUH 487 | MUH 488 | 12 489 | x 490 | sat 491 | sbt 492 | y 493 | 13 494 | aaa 495 | aab 496 | aba 497 | abb 498 | baa 499 | bab 500 | bba 501 | bbb 502 | 14 503 | st 504 | sat 505 | sbt 506 | 15 507 | mi 508 | mi 509 | moo 510 | moo 511 | mi 512 | mi 513 | moo 514 | moo 515 | 16 516 | 00 517 | 10 518 | 20 519 | 30 520 | 40 521 | 50 522 | 60 523 | 70 524 | 80 525 | 90 526 | a0 527 | b0 528 | c0 529 | d0 530 | e0 531 | f0 532 | g0 533 | h0 534 | i0 535 | j0 536 | k0 537 | l0 538 | m0 539 | n0 540 | o0 541 | p0 542 | q0 543 | r0 544 | s0 545 | t0 546 | u0 547 | v0 548 | w0 549 | x0 550 | y0 551 | z0 552 | A0 553 | B0 554 | C0 555 | D0 556 | E0 557 | F0 558 | G0 559 | H0 560 | I0 561 | J0 562 | K0 563 | L0 564 | M0 565 | N0 566 | O0 567 | P0 568 | Q0 569 | R0 570 | S0 571 | T0 572 | U0 573 | V0 574 | W0 575 | X0 576 | Y0 577 | Z0 578 | _0 579 | 01 580 | 11 581 | 21 582 | 31 583 | 41 584 | 51 585 | 61 586 | 71 587 | 81 588 | 91 589 | a1 590 | b1 591 | c1 592 | d1 593 | e1 594 | f1 595 | g1 596 | h1 597 | i1 598 | j1 599 | k1 600 | l1 601 | m1 602 | n1 603 | o1 604 | p1 605 | q1 606 | r1 607 | s1 608 | t1 609 | u1 610 | v1 611 | w1 612 | x1 613 | y1 614 | z1 615 | A1 616 | B1 617 | C1 618 | D1 619 | E1 620 | F1 621 | G1 622 | H1 623 | I1 624 | J1 625 | K1 626 | L1 627 | M1 628 | N1 629 | O1 630 | P1 631 | Q1 632 | R1 633 | S1 634 | T1 635 | U1 636 | V1 637 | W1 638 | X1 639 | Y1 640 | Z1 641 | _1 642 | 02 643 | 12 644 | 22 645 | 32 646 | 42 647 | 52 648 | 62 649 | 72 650 | 82 651 | 92 652 | a2 653 | b2 654 | c2 655 | d2 656 | e2 657 | f2 658 | g2 659 | h2 660 | i2 661 | j2 662 | k2 663 | l2 664 | m2 665 | n2 666 | o2 667 | p2 668 | q2 669 | r2 670 | s2 671 | t2 672 | u2 673 | v2 674 | w2 675 | x2 676 | y2 677 | z2 678 | A2 679 | B2 680 | C2 681 | D2 682 | E2 683 | F2 684 | G2 685 | H2 686 | I2 687 | J2 688 | K2 689 | L2 690 | M2 691 | N2 692 | O2 693 | P2 694 | Q2 695 | R2 696 | S2 697 | T2 698 | U2 699 | V2 700 | W2 701 | X2 702 | Y2 703 | Z2 704 | _2 705 | 03 706 | 13 707 | 23 708 | 33 709 | 43 710 | 53 711 | 63 712 | 73 713 | 83 714 | 93 715 | a3 716 | b3 717 | c3 718 | d3 719 | e3 720 | f3 721 | g3 722 | h3 723 | i3 724 | j3 725 | k3 726 | l3 727 | m3 728 | n3 729 | o3 730 | p3 731 | q3 732 | r3 733 | s3 734 | t3 735 | u3 736 | v3 737 | w3 738 | x3 739 | y3 740 | z3 741 | A3 742 | B3 743 | C3 744 | D3 745 | E3 746 | F3 747 | G3 748 | H3 749 | I3 750 | J3 751 | K3 752 | L3 753 | M3 754 | N3 755 | O3 756 | P3 757 | Q3 758 | R3 759 | S3 760 | T3 761 | U3 762 | V3 763 | W3 764 | X3 765 | Y3 766 | Z3 767 | _3 768 | 04 769 | 14 770 | 24 771 | 34 772 | 44 773 | 54 774 | 64 775 | 74 776 | 84 777 | 94 778 | a4 779 | b4 780 | c4 781 | d4 782 | e4 783 | f4 784 | g4 785 | h4 786 | i4 787 | j4 788 | k4 789 | l4 790 | m4 791 | n4 792 | o4 793 | p4 794 | q4 795 | r4 796 | s4 797 | t4 798 | u4 799 | v4 800 | w4 801 | x4 802 | y4 803 | z4 804 | A4 805 | B4 806 | C4 807 | D4 808 | E4 809 | F4 810 | G4 811 | H4 812 | I4 813 | J4 814 | K4 815 | L4 816 | M4 817 | N4 818 | O4 819 | P4 820 | Q4 821 | R4 822 | S4 823 | T4 824 | U4 825 | V4 826 | W4 827 | X4 828 | Y4 829 | Z4 830 | _4 831 | 05 832 | 15 833 | 25 834 | 35 835 | 45 836 | 55 837 | 65 838 | 75 839 | 85 840 | 95 841 | a5 842 | b5 843 | c5 844 | d5 845 | e5 846 | f5 847 | g5 848 | h5 849 | i5 850 | j5 851 | k5 852 | l5 853 | m5 854 | n5 855 | o5 856 | p5 857 | q5 858 | r5 859 | s5 860 | t5 861 | u5 862 | v5 863 | w5 864 | x5 865 | y5 866 | z5 867 | A5 868 | B5 869 | C5 870 | D5 871 | E5 872 | F5 873 | G5 874 | H5 875 | I5 876 | J5 877 | K5 878 | L5 879 | M5 880 | N5 881 | O5 882 | P5 883 | Q5 884 | R5 885 | S5 886 | T5 887 | U5 888 | V5 889 | W5 890 | X5 891 | Y5 892 | Z5 893 | _5 894 | 06 895 | 16 896 | 26 897 | 36 898 | 46 899 | 56 900 | 66 901 | 76 902 | 86 903 | 96 904 | a6 905 | b6 906 | c6 907 | d6 908 | e6 909 | f6 910 | g6 911 | h6 912 | i6 913 | j6 914 | k6 915 | l6 916 | m6 917 | n6 918 | o6 919 | p6 920 | q6 921 | r6 922 | s6 923 | t6 924 | u6 925 | v6 926 | w6 927 | x6 928 | y6 929 | z6 930 | A6 931 | B6 932 | C6 933 | D6 934 | E6 935 | F6 936 | G6 937 | H6 938 | I6 939 | J6 940 | K6 941 | L6 942 | M6 943 | N6 944 | O6 945 | P6 946 | Q6 947 | R6 948 | S6 949 | T6 950 | U6 951 | V6 952 | W6 953 | X6 954 | Y6 955 | Z6 956 | _6 957 | 07 958 | 17 959 | 27 960 | 37 961 | 47 962 | 57 963 | 67 964 | 77 965 | 87 966 | 97 967 | a7 968 | b7 969 | c7 970 | d7 971 | e7 972 | f7 973 | g7 974 | h7 975 | i7 976 | j7 977 | k7 978 | l7 979 | m7 980 | n7 981 | o7 982 | p7 983 | q7 984 | r7 985 | s7 986 | t7 987 | u7 988 | v7 989 | w7 990 | x7 991 | y7 992 | z7 993 | A7 994 | B7 995 | C7 996 | D7 997 | E7 998 | F7 999 | G7 1000 | H7 1001 | I7 1002 | J7 1003 | K7 1004 | L7 1005 | M7 1006 | N7 1007 | O7 1008 | P7 1009 | Q7 1010 | R7 1011 | S7 1012 | T7 1013 | U7 1014 | V7 1015 | W7 1016 | X7 1017 | Y7 1018 | Z7 1019 | _7 1020 | 08 1021 | 18 1022 | 28 1023 | 38 1024 | 48 1025 | 58 1026 | 68 1027 | 78 1028 | 88 1029 | 98 1030 | a8 1031 | b8 1032 | c8 1033 | d8 1034 | e8 1035 | f8 1036 | g8 1037 | h8 1038 | i8 1039 | j8 1040 | k8 1041 | l8 1042 | m8 1043 | n8 1044 | o8 1045 | p8 1046 | q8 1047 | r8 1048 | s8 1049 | t8 1050 | u8 1051 | v8 1052 | w8 1053 | x8 1054 | y8 1055 | z8 1056 | A8 1057 | B8 1058 | C8 1059 | D8 1060 | E8 1061 | F8 1062 | G8 1063 | H8 1064 | I8 1065 | J8 1066 | K8 1067 | L8 1068 | M8 1069 | N8 1070 | O8 1071 | P8 1072 | Q8 1073 | R8 1074 | S8 1075 | T8 1076 | U8 1077 | V8 1078 | W8 1079 | X8 1080 | Y8 1081 | Z8 1082 | _8 1083 | 09 1084 | 19 1085 | 29 1086 | 39 1087 | 49 1088 | 59 1089 | 69 1090 | 79 1091 | 89 1092 | 99 1093 | a9 1094 | b9 1095 | c9 1096 | d9 1097 | e9 1098 | f9 1099 | g9 1100 | h9 1101 | i9 1102 | j9 1103 | k9 1104 | l9 1105 | m9 1106 | n9 1107 | o9 1108 | p9 1109 | q9 1110 | r9 1111 | s9 1112 | t9 1113 | u9 1114 | v9 1115 | w9 1116 | x9 1117 | y9 1118 | z9 1119 | A9 1120 | B9 1121 | C9 1122 | D9 1123 | E9 1124 | F9 1125 | G9 1126 | H9 1127 | I9 1128 | J9 1129 | K9 1130 | L9 1131 | M9 1132 | N9 1133 | O9 1134 | P9 1135 | Q9 1136 | R9 1137 | S9 1138 | T9 1139 | U9 1140 | V9 1141 | W9 1142 | X9 1143 | Y9 1144 | Z9 1145 | _9 1146 | 17 1147 | a=a 1148 | b=b 1149 | c=c 1150 | aa=aa 1151 | ba=ba 1152 | ca=ca 1153 | ab=ab 1154 | bb=bb 1155 | cb=cb 1156 | ac=ac 1157 | bc=bc 1158 | cc=cc 1159 | aaa=aaa 1160 | baa=baa 1161 | caa=caa 1162 | aba=aba 1163 | bba=bba 1164 | cba=cba 1165 | aca=aca 1166 | bca=bca 1167 | cca=cca 1168 | aab=aab 1169 | bab=bab 1170 | cab=cab 1171 | abb=abb 1172 | bbb=bbb 1173 | cbb=cbb 1174 | acb=acb 1175 | bcb=bcb 1176 | ccb=ccb 1177 | aac=aac 1178 | bac=bac 1179 | cac=cac 1180 | abc=abc 1181 | bbc=bbc 1182 | cbc=cbc 1183 | acc=acc 1184 | bcc=bcc 1185 | ccc=ccc 1186 | aaaa=aaaa 1187 | baaa=baaa 1188 | caaa=caaa 1189 | abaa=abaa 1190 | bbaa=bbaa 1191 | cbaa=cbaa 1192 | acaa=acaa 1193 | bcaa=bcaa 1194 | ccaa=ccaa 1195 | aaba=aaba 1196 | baba=baba 1197 | caba=caba 1198 | abba=abba 1199 | bbba=bbba 1200 | cbba=cbba 1201 | acba=acba 1202 | bcba=bcba 1203 | ccba=ccba 1204 | aaca=aaca 1205 | baca=baca 1206 | caca=caca 1207 | abca=abca 1208 | bbca=bbca 1209 | cbca=cbca 1210 | acca=acca 1211 | bcca=bcca 1212 | ccca=ccca 1213 | aaab=aaab 1214 | baab=baab 1215 | caab=caab 1216 | abab=abab 1217 | bbab=bbab 1218 | cbab=cbab 1219 | acab=acab 1220 | bcab=bcab 1221 | ccab=ccab 1222 | aabb=aabb 1223 | babb=babb 1224 | cabb=cabb 1225 | abbb=abbb 1226 | bbbb=bbbb 1227 | cbbb=cbbb 1228 | acbb=acbb 1229 | bcbb=bcbb 1230 | ccbb=ccbb 1231 | aacb=aacb 1232 | bacb=bacb 1233 | cacb=cacb 1234 | abcb=abcb 1235 | bbcb=bbcb 1236 | cbcb=cbcb 1237 | accb=accb 1238 | bccb=bccb 1239 | cccb=cccb 1240 | aaac=aaac 1241 | baac=baac 1242 | caac=caac 1243 | abac=abac 1244 | bbac=bbac 1245 | cbac=cbac 1246 | acac=acac 1247 | bcac=bcac 1248 | ccac=ccac 1249 | aabc=aabc 1250 | babc=babc 1251 | cabc=cabc 1252 | abbc=abbc 1253 | bbbc=bbbc 1254 | cbbc=cbbc 1255 | acbc=acbc 1256 | bcbc=bcbc 1257 | ccbc=ccbc 1258 | aacc=aacc 1259 | bacc=bacc 1260 | cacc=cacc 1261 | abcc=abcc 1262 | bbcc=bbcc 1263 | cbcc=cbcc 1264 | accc=accc 1265 | bccc=bccc 1266 | cccc=cccc 1267 | aaaaa=aaaaa 1268 | baaaa=baaaa 1269 | caaaa=caaaa 1270 | abaaa=abaaa 1271 | bbaaa=bbaaa 1272 | cbaaa=cbaaa 1273 | acaaa=acaaa 1274 | bcaaa=bcaaa 1275 | ccaaa=ccaaa 1276 | aabaa=aabaa 1277 | babaa=babaa 1278 | cabaa=cabaa 1279 | abbaa=abbaa 1280 | bbbaa=bbbaa 1281 | cbbaa=cbbaa 1282 | acbaa=acbaa 1283 | bcbaa=bcbaa 1284 | ccbaa=ccbaa 1285 | aacaa=aacaa 1286 | bacaa=bacaa 1287 | cacaa=cacaa 1288 | abcaa=abcaa 1289 | bbcaa=bbcaa 1290 | cbcaa=cbcaa 1291 | accaa=accaa 1292 | bccaa=bccaa 1293 | cccaa=cccaa 1294 | aaaba=aaaba 1295 | baaba=baaba 1296 | caaba=caaba 1297 | ababa=ababa 1298 | bbaba=bbaba 1299 | cbaba=cbaba 1300 | acaba=acaba 1301 | bcaba=bcaba 1302 | ccaba=ccaba 1303 | aabba=aabba 1304 | babba=babba 1305 | cabba=cabba 1306 | abbba=abbba 1307 | bbbba=bbbba 1308 | cbbba=cbbba 1309 | acbba=acbba 1310 | bcbba=bcbba 1311 | ccbba=ccbba 1312 | aacba=aacba 1313 | bacba=bacba 1314 | cacba=cacba 1315 | abcba=abcba 1316 | bbcba=bbcba 1317 | cbcba=cbcba 1318 | accba=accba 1319 | bccba=bccba 1320 | cccba=cccba 1321 | aaaca=aaaca 1322 | baaca=baaca 1323 | caaca=caaca 1324 | abaca=abaca 1325 | bbaca=bbaca 1326 | cbaca=cbaca 1327 | acaca=acaca 1328 | bcaca=bcaca 1329 | ccaca=ccaca 1330 | aabca=aabca 1331 | babca=babca 1332 | cabca=cabca 1333 | abbca=abbca 1334 | bbbca=bbbca 1335 | cbbca=cbbca 1336 | acbca=acbca 1337 | bcbca=bcbca 1338 | ccbca=ccbca 1339 | aacca=aacca 1340 | bacca=bacca 1341 | cacca=cacca 1342 | abcca=abcca 1343 | bbcca=bbcca 1344 | cbcca=cbcca 1345 | accca=accca 1346 | bccca=bccca 1347 | cccca=cccca 1348 | aaaab=aaaab 1349 | baaab=baaab 1350 | caaab=caaab 1351 | abaab=abaab 1352 | bbaab=bbaab 1353 | cbaab=cbaab 1354 | acaab=acaab 1355 | bcaab=bcaab 1356 | ccaab=ccaab 1357 | aabab=aabab 1358 | babab=babab 1359 | cabab=cabab 1360 | abbab=abbab 1361 | bbbab=bbbab 1362 | cbbab=cbbab 1363 | acbab=acbab 1364 | bcbab=bcbab 1365 | ccbab=ccbab 1366 | aacab=aacab 1367 | bacab=bacab 1368 | cacab=cacab 1369 | abcab=abcab 1370 | bbcab=bbcab 1371 | cbcab=cbcab 1372 | accab=accab 1373 | bccab=bccab 1374 | cccab=cccab 1375 | aaabb=aaabb 1376 | baabb=baabb 1377 | caabb=caabb 1378 | ababb=ababb 1379 | bbabb=bbabb 1380 | cbabb=cbabb 1381 | acabb=acabb 1382 | bcabb=bcabb 1383 | ccabb=ccabb 1384 | aabbb=aabbb 1385 | babbb=babbb 1386 | cabbb=cabbb 1387 | abbbb=abbbb 1388 | bbbbb=bbbbb 1389 | cbbbb=cbbbb 1390 | acbbb=acbbb 1391 | bcbbb=bcbbb 1392 | ccbbb=ccbbb 1393 | aacbb=aacbb 1394 | bacbb=bacbb 1395 | cacbb=cacbb 1396 | abcbb=abcbb 1397 | bbcbb=bbcbb 1398 | cbcbb=cbcbb 1399 | accbb=accbb 1400 | bccbb=bccbb 1401 | cccbb=cccbb 1402 | aaacb=aaacb 1403 | baacb=baacb 1404 | caacb=caacb 1405 | abacb=abacb 1406 | bbacb=bbacb 1407 | cbacb=cbacb 1408 | acacb=acacb 1409 | bcacb=bcacb 1410 | ccacb=ccacb 1411 | aabcb=aabcb 1412 | babcb=babcb 1413 | cabcb=cabcb 1414 | abbcb=abbcb 1415 | bbbcb=bbbcb 1416 | cbbcb=cbbcb 1417 | acbcb=acbcb 1418 | bcbcb=bcbcb 1419 | ccbcb=ccbcb 1420 | aaccb=aaccb 1421 | baccb=baccb 1422 | caccb=caccb 1423 | abccb=abccb 1424 | bbccb=bbccb 1425 | cbccb=cbccb 1426 | acccb=acccb 1427 | bcccb=bcccb 1428 | ccccb=ccccb 1429 | aaaac=aaaac 1430 | baaac=baaac 1431 | caaac=caaac 1432 | abaac=abaac 1433 | bbaac=bbaac 1434 | cbaac=cbaac 1435 | acaac=acaac 1436 | bcaac=bcaac 1437 | ccaac=ccaac 1438 | aabac=aabac 1439 | babac=babac 1440 | cabac=cabac 1441 | abbac=abbac 1442 | bbbac=bbbac 1443 | cbbac=cbbac 1444 | acbac=acbac 1445 | bcbac=bcbac 1446 | ccbac=ccbac 1447 | aacac=aacac 1448 | bacac=bacac 1449 | cacac=cacac 1450 | abcac=abcac 1451 | bbcac=bbcac 1452 | cbcac=cbcac 1453 | accac=accac 1454 | bccac=bccac 1455 | cccac=cccac 1456 | aaabc=aaabc 1457 | baabc=baabc 1458 | caabc=caabc 1459 | ababc=ababc 1460 | bbabc=bbabc 1461 | cbabc=cbabc 1462 | acabc=acabc 1463 | bcabc=bcabc 1464 | ccabc=ccabc 1465 | aabbc=aabbc 1466 | babbc=babbc 1467 | cabbc=cabbc 1468 | abbbc=abbbc 1469 | bbbbc=bbbbc 1470 | cbbbc=cbbbc 1471 | acbbc=acbbc 1472 | bcbbc=bcbbc 1473 | ccbbc=ccbbc 1474 | aacbc=aacbc 1475 | bacbc=bacbc 1476 | cacbc=cacbc 1477 | abcbc=abcbc 1478 | bbcbc=bbcbc 1479 | cbcbc=cbcbc 1480 | accbc=accbc 1481 | bccbc=bccbc 1482 | cccbc=cccbc 1483 | aaacc=aaacc 1484 | baacc=baacc 1485 | caacc=caacc 1486 | abacc=abacc 1487 | bbacc=bbacc 1488 | cbacc=cbacc 1489 | acacc=acacc 1490 | bcacc=bcacc 1491 | ccacc=ccacc 1492 | aabcc=aabcc 1493 | babcc=babcc 1494 | cabcc=cabcc 1495 | abbcc=abbcc 1496 | bbbcc=bbbcc 1497 | cbbcc=cbbcc 1498 | acbcc=acbcc 1499 | bcbcc=bcbcc 1500 | ccbcc=ccbcc 1501 | aaccc=aaccc 1502 | baccc=baccc 1503 | caccc=caccc 1504 | abccc=abccc 1505 | bbccc=bbccc 1506 | cbccc=cbccc 1507 | acccc=acccc 1508 | bcccc=bcccc 1509 | ccccc=ccccc 1510 | 18 1511 | a=a 1512 | b=b 1513 | c=c 1514 | aa=a 1515 | ba=a 1516 | ca=a 1517 | ab=b 1518 | bb=b 1519 | cb=b 1520 | ac=c 1521 | bc=c 1522 | cc=c 1523 | aaa=a 1524 | baa=a 1525 | caa=a 1526 | aba=a 1527 | bba=a 1528 | cba=a 1529 | aca=a 1530 | bca=a 1531 | cca=a 1532 | aab=b 1533 | bab=b 1534 | cab=b 1535 | abb=b 1536 | bbb=b 1537 | cbb=b 1538 | acb=b 1539 | bcb=b 1540 | ccb=b 1541 | aac=c 1542 | bac=c 1543 | cac=c 1544 | abc=c 1545 | bbc=c 1546 | cbc=c 1547 | acc=c 1548 | bcc=c 1549 | ccc=c 1550 | aaaa=a 1551 | baaa=a 1552 | caaa=a 1553 | abaa=a 1554 | bbaa=a 1555 | cbaa=a 1556 | acaa=a 1557 | bcaa=a 1558 | ccaa=a 1559 | aaba=a 1560 | baba=a 1561 | caba=a 1562 | abba=a 1563 | bbba=a 1564 | cbba=a 1565 | acba=a 1566 | bcba=a 1567 | ccba=a 1568 | aaca=a 1569 | baca=a 1570 | caca=a 1571 | abca=a 1572 | bbca=a 1573 | cbca=a 1574 | acca=a 1575 | bcca=a 1576 | ccca=a 1577 | aaab=b 1578 | baab=b 1579 | caab=b 1580 | abab=b 1581 | bbab=b 1582 | cbab=b 1583 | acab=b 1584 | bcab=b 1585 | ccab=b 1586 | aabb=b 1587 | babb=b 1588 | cabb=b 1589 | abbb=b 1590 | bbbb=b 1591 | cbbb=b 1592 | acbb=b 1593 | bcbb=b 1594 | ccbb=b 1595 | aacb=b 1596 | bacb=b 1597 | cacb=b 1598 | abcb=b 1599 | bbcb=b 1600 | cbcb=b 1601 | accb=b 1602 | bccb=b 1603 | cccb=b 1604 | aaac=c 1605 | baac=c 1606 | caac=c 1607 | abac=c 1608 | bbac=c 1609 | cbac=c 1610 | acac=c 1611 | bcac=c 1612 | ccac=c 1613 | aabc=c 1614 | babc=c 1615 | cabc=c 1616 | abbc=c 1617 | bbbc=c 1618 | cbbc=c 1619 | acbc=c 1620 | bcbc=c 1621 | ccbc=c 1622 | aacc=c 1623 | bacc=c 1624 | cacc=c 1625 | abcc=c 1626 | bbcc=c 1627 | cbcc=c 1628 | accc=c 1629 | bccc=c 1630 | cccc=c 1631 | aaaaa=a 1632 | baaaa=a 1633 | caaaa=a 1634 | abaaa=a 1635 | bbaaa=a 1636 | cbaaa=a 1637 | acaaa=a 1638 | bcaaa=a 1639 | ccaaa=a 1640 | aabaa=a 1641 | babaa=a 1642 | cabaa=a 1643 | abbaa=a 1644 | bbbaa=a 1645 | cbbaa=a 1646 | acbaa=a 1647 | bcbaa=a 1648 | ccbaa=a 1649 | aacaa=a 1650 | bacaa=a 1651 | cacaa=a 1652 | abcaa=a 1653 | bbcaa=a 1654 | cbcaa=a 1655 | accaa=a 1656 | bccaa=a 1657 | cccaa=a 1658 | aaaba=a 1659 | baaba=a 1660 | caaba=a 1661 | ababa=a 1662 | bbaba=a 1663 | cbaba=a 1664 | acaba=a 1665 | bcaba=a 1666 | ccaba=a 1667 | aabba=a 1668 | babba=a 1669 | cabba=a 1670 | abbba=a 1671 | bbbba=a 1672 | cbbba=a 1673 | acbba=a 1674 | bcbba=a 1675 | ccbba=a 1676 | aacba=a 1677 | bacba=a 1678 | cacba=a 1679 | abcba=a 1680 | bbcba=a 1681 | cbcba=a 1682 | accba=a 1683 | bccba=a 1684 | cccba=a 1685 | aaaca=a 1686 | baaca=a 1687 | caaca=a 1688 | abaca=a 1689 | bbaca=a 1690 | cbaca=a 1691 | acaca=a 1692 | bcaca=a 1693 | ccaca=a 1694 | aabca=a 1695 | babca=a 1696 | cabca=a 1697 | abbca=a 1698 | bbbca=a 1699 | cbbca=a 1700 | acbca=a 1701 | bcbca=a 1702 | ccbca=a 1703 | aacca=a 1704 | bacca=a 1705 | cacca=a 1706 | abcca=a 1707 | bbcca=a 1708 | cbcca=a 1709 | accca=a 1710 | bccca=a 1711 | cccca=a 1712 | aaaab=b 1713 | baaab=b 1714 | caaab=b 1715 | abaab=b 1716 | bbaab=b 1717 | cbaab=b 1718 | acaab=b 1719 | bcaab=b 1720 | ccaab=b 1721 | aabab=b 1722 | babab=b 1723 | cabab=b 1724 | abbab=b 1725 | bbbab=b 1726 | cbbab=b 1727 | acbab=b 1728 | bcbab=b 1729 | ccbab=b 1730 | aacab=b 1731 | bacab=b 1732 | cacab=b 1733 | abcab=b 1734 | bbcab=b 1735 | cbcab=b 1736 | accab=b 1737 | bccab=b 1738 | cccab=b 1739 | aaabb=b 1740 | baabb=b 1741 | caabb=b 1742 | ababb=b 1743 | bbabb=b 1744 | cbabb=b 1745 | acabb=b 1746 | bcabb=b 1747 | ccabb=b 1748 | aabbb=b 1749 | babbb=b 1750 | cabbb=b 1751 | abbbb=b 1752 | bbbbb=b 1753 | cbbbb=b 1754 | acbbb=b 1755 | bcbbb=b 1756 | ccbbb=b 1757 | aacbb=b 1758 | bacbb=b 1759 | cacbb=b 1760 | abcbb=b 1761 | bbcbb=b 1762 | cbcbb=b 1763 | accbb=b 1764 | bccbb=b 1765 | cccbb=b 1766 | aaacb=b 1767 | baacb=b 1768 | caacb=b 1769 | abacb=b 1770 | bbacb=b 1771 | cbacb=b 1772 | acacb=b 1773 | bcacb=b 1774 | ccacb=b 1775 | aabcb=b 1776 | babcb=b 1777 | cabcb=b 1778 | abbcb=b 1779 | bbbcb=b 1780 | cbbcb=b 1781 | acbcb=b 1782 | bcbcb=b 1783 | ccbcb=b 1784 | aaccb=b 1785 | baccb=b 1786 | caccb=b 1787 | abccb=b 1788 | bbccb=b 1789 | cbccb=b 1790 | acccb=b 1791 | bcccb=b 1792 | ccccb=b 1793 | aaaac=c 1794 | baaac=c 1795 | caaac=c 1796 | abaac=c 1797 | bbaac=c 1798 | cbaac=c 1799 | acaac=c 1800 | bcaac=c 1801 | ccaac=c 1802 | aabac=c 1803 | babac=c 1804 | cabac=c 1805 | abbac=c 1806 | bbbac=c 1807 | cbbac=c 1808 | acbac=c 1809 | bcbac=c 1810 | ccbac=c 1811 | aacac=c 1812 | bacac=c 1813 | cacac=c 1814 | abcac=c 1815 | bbcac=c 1816 | cbcac=c 1817 | accac=c 1818 | bccac=c 1819 | cccac=c 1820 | aaabc=c 1821 | baabc=c 1822 | caabc=c 1823 | ababc=c 1824 | bbabc=c 1825 | cbabc=c 1826 | acabc=c 1827 | bcabc=c 1828 | ccabc=c 1829 | aabbc=c 1830 | babbc=c 1831 | cabbc=c 1832 | abbbc=c 1833 | bbbbc=c 1834 | cbbbc=c 1835 | acbbc=c 1836 | bcbbc=c 1837 | ccbbc=c 1838 | aacbc=c 1839 | bacbc=c 1840 | cacbc=c 1841 | abcbc=c 1842 | bbcbc=c 1843 | cbcbc=c 1844 | accbc=c 1845 | bccbc=c 1846 | cccbc=c 1847 | aaacc=c 1848 | baacc=c 1849 | caacc=c 1850 | abacc=c 1851 | bbacc=c 1852 | cbacc=c 1853 | acacc=c 1854 | bcacc=c 1855 | ccacc=c 1856 | aabcc=c 1857 | babcc=c 1858 | cabcc=c 1859 | abbcc=c 1860 | bbbcc=c 1861 | cbbcc=c 1862 | acbcc=c 1863 | bcbcc=c 1864 | ccbcc=c 1865 | aaccc=c 1866 | baccc=c 1867 | caccc=c 1868 | abccc=c 1869 | bbccc=c 1870 | cbccc=c 1871 | acccc=c 1872 | bcccc=c 1873 | ccccc=c 1874 | 19 1875 | aa 1876 | ababb 1877 | 20 1878 | aa 1879 | bb 1880 | cc 1881 | 21 1882 | a 1883 | xc=cy 1884 | xd=dy 1885 | e 1886 | 22 1887 | a$b^c 1888 | -------------------------------------------------------------------------------- /regexgen.cpp: -------------------------------------------------------------------------------- 1 | /* Reverse regular expression engine that calculates strings matching a given regex. 2 | See below "RegEx-Solution-Generator\n" for the help or call it with --help. 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #define likely(x) __builtin_expect(!!(x), 1) 15 | #define unlikely(x) __builtin_expect(!!(x), 0) 16 | 17 | 18 | QMap characterClasses; 19 | QString ALL_CHARACTERS = "0-9A-Za-z_+-*/=<>()[]{}!.,;:$%\"'#~&\\\\|@^?"; 20 | bool printProgress = false; 21 | 22 | struct CharBlock { 23 | //set on init 24 | enum Type {CBT_FIXED, CBT_CHOOSE, CBT_BACKTRACK_START, CBT_BACKTRACK_END, CBT_BACKTRACK}; 25 | Type type; 26 | QByteArray slowData; 27 | int backtrack; 28 | 29 | //set when evaluating 30 | const char * data; 31 | int len; 32 | int choosen; 33 | int pos; 34 | 35 | }; 36 | //typedef QList BlockString; 37 | //typedef QList BlockStringList; 38 | #define BlockString QList< CharBlock > 39 | #define BlockStringList QList< BlockString > 40 | 41 | int randUntil(int until){ 42 | return rand() % until; 43 | } 44 | 45 | char charConv(const QChar& c){ 46 | return c.toAscii(); 47 | } 48 | 49 | int hexToInt(const char c){ 50 | if (c >= '0' && c <= '9') return c - '0'; 51 | if (c >= 'a' && c <= 'f') return 10 + c - 'a'; 52 | if (c >= 'A' && c <= 'F') return 10 + c - 'A'; 53 | throw "invalid character"; 54 | } 55 | unsigned char hexCharacter(const QChar& c1, const QChar& c2){ 56 | fprintf(stderr, qPrintable(QString(">%1,%2<\n").arg(c1).arg(c2))); 57 | return (hexToInt(c1.toLatin1()) << 4) + hexToInt(c2.toLatin1()); 58 | } 59 | 60 | void removePoint(int pos, QList& points){ 61 | for (int i = 0; i < points.size(); i++) 62 | if (points[i] > pos) 63 | points[i]--; 64 | } 65 | void insertPoint(int pos, int len, QList& points){ 66 | if (len == 0) return; 67 | for (int i = 0; i < points.size(); i++) 68 | if (points[i] > pos) 69 | points[i]+=len; 70 | } 71 | 72 | //Replace all backtrack references to bracket pairs by references to one certain character 73 | BlockString purifyBacktracking(const BlockString& strin){ 74 | //--Remove all ids of brackets not referenced by references-- 75 | BlockString str = strin; 76 | QList accessedMatches; 77 | for (int i=str.size()-1;i >= 0;i--) 78 | if (str[i].type == CharBlock::CBT_BACKTRACK && !accessedMatches.contains(str[i].backtrack)) 79 | accessedMatches << str[i].backtrack; 80 | if (accessedMatches.isEmpty()) { 81 | for (int i=str.size()-1;i >= 0;i--) 82 | if (str[i].type == CharBlock::CBT_BACKTRACK_START || str[i].type == CharBlock::CBT_BACKTRACK_END) 83 | str.removeAt(i); 84 | return str; //optimized case, no back tracking used 85 | } 86 | for (int i=str.size()-1;i>=0;i--) 87 | if (str[i].type == CharBlock::CBT_BACKTRACK_START || str[i].type == CharBlock::CBT_BACKTRACK_END) 88 | if (!accessedMatches.contains( str[i].backtrack ) ) 89 | str.removeAt(i); 90 | //--map remaining ids to [0,n]-- 91 | QMap btMap; 92 | foreach (int i, accessedMatches) 93 | btMap.insert(i, btMap.size()); 94 | //--calculate string intervals-- 95 | QList btStart, btEnd; 96 | for (int i=0;i=0;i--) 99 | if (str[i].type == CharBlock::CBT_BACKTRACK_START || str[i].type == CharBlock::CBT_BACKTRACK_END) { 100 | int rid = btMap.value(str[i].backtrack); 101 | if (str[i].type == CharBlock::CBT_BACKTRACK_START && btStart[rid] == -1 ) 102 | btStart[rid] = i; 103 | else if (btEnd[rid] == -1) 104 | btEnd[rid] = i; 105 | } 106 | 107 | if (btStart.contains(-1)) throw "invalid backtrack index (match bracket not opened)"; 108 | if (btStart.contains(-1)) throw "invalid backtrack index (match bracket not closed)"; 109 | //remove now useless bracket-ids 110 | for (int i=str.size()-1;i>=0;i--) 111 | if (str[i].type == CharBlock::CBT_BACKTRACK_START || str[i].type == CharBlock::CBT_BACKTRACK_END) { 112 | removePoint(i, btStart); 113 | removePoint(i, btEnd); 114 | str.removeAt(i); 115 | } 116 | //--expand all \i backtrack references to the size of the i-th match-- 117 | //remap 118 | for (int i=0;i done; 122 | done.resize(accessedMatches.size()); 123 | //topological sort 124 | //if a bracket referenced by a backtracking reference contains another reference, latter reference must be expaned first 125 | for (int i=0;i stack; 128 | QList processing; 129 | stack.append(i); 130 | while (!stack.isEmpty()) { 131 | int c = stack.last(); 132 | done[c] = true; 133 | processing << c; 134 | QList higherPrio; 135 | for (int j=btStart[c]; j < btEnd[c]; j++) 136 | if (str[j].type == CharBlock::CBT_BACKTRACK && str[j].backtrack >= 0) 137 | if (!higherPrio.contains(str[j].backtrack) && !done[str[j].backtrack]) { 138 | if (processing.contains(str[j].backtrack)) throw "Recursive backtrack reference"; 139 | higherPrio << str[j].backtrack; 140 | } 141 | if (!higherPrio.isEmpty()) { 142 | foreach (int j, higherPrio) { 143 | stack << j; 144 | } 145 | } else { 146 | stack.removeLast(); 147 | processing.removeLast(); 148 | //copy reference until we have one reference per character 149 | for (int j=btEnd[c]-1; j >= btStart[c]; j--){ 150 | if (str[j].type == CharBlock::CBT_BACKTRACK && str[j].backtrack >= 0) { 151 | int l = btEnd[str[j].backtrack] - btStart[str[j].backtrack]; 152 | if (l==0) { 153 | removePoint(i, btStart); 154 | removePoint(i, btEnd); 155 | str.removeAt(i); 156 | } else { 157 | insertPoint(j, l-1, btStart); 158 | insertPoint(j, l-1, btEnd); 159 | str[j].backtrack = - str[j].backtrack - 1; //invert backtrack id to mark processed references 160 | for (int k=1;k= 0; j--){ 170 | if (str[j].type == CharBlock::CBT_BACKTRACK && str[j].backtrack >= 0) { 171 | int l = btEnd[str[j].backtrack] - btStart[str[j].backtrack]; 172 | if (l==0) { 173 | removePoint(j, btStart); 174 | removePoint(j, btEnd); 175 | str.removeAt(j); 176 | } else { 177 | insertPoint(j, l-1, btStart); 178 | insertPoint(j, l-1, btEnd); 179 | str[j].backtrack = - str[j].backtrack - 1; //invert backtrack id to mark processed references 180 | for (int k=1;k replace backtrack by character index 187 | for (int i=0; i cycleKill; 203 | int j = str[i].backtrack; 204 | while (str[j].type == CharBlock::CBT_BACKTRACK) { 205 | Q_ASSERT(!cycleKill.contains(j)); 206 | cycleKill << j; 207 | j = str[j].backtrack; 208 | } 209 | str[i].backtrack = j; 210 | } 211 | 212 | return str; 213 | } 214 | 215 | int64_t countPossibilities(const QList& blocks) { 216 | int64_t totalPos = 1; 217 | foreach (const CharBlock& b, blocks) 218 | if (b.type == CharBlock::CBT_CHOOSE){ 219 | totalPos = totalPos * b.slowData.length(); 220 | } 221 | return totalPos; 222 | } 223 | 224 | //takes a simplified like (\[.*\]|.)* and prints all possibilities to choose characters in the character sets 225 | //(this will generate thousand-millions of equal-length words) 226 | void printPossibilities(QList& blocks, bool randomized, int maxLines, int64_t startTotalPos, int64_t combinedTotalPos){ 227 | char word[blocks.length()+1]; 228 | CharBlock vars[blocks.size()+1]; 229 | CharBlock bts[blocks.size()+1]; 230 | int actualBlockCount = 0; 231 | int actualBackTrackCount = 0; 232 | int64_t totalPos = 1; 233 | for (int i=0;i 0); 243 | if (blocks[i].type == CharBlock::CBT_FIXED) Q_ASSERT(blocks[i].len==1); 244 | else vars[actualBlockCount++] = blocks[i]; 245 | break; 246 | case CharBlock::CBT_BACKTRACK: 247 | bts[actualBackTrackCount++] = blocks[i]; 248 | break; 249 | default: Q_ASSERT(false); 250 | } 251 | 252 | } 253 | // for (int i=0;i%s<\n",vars[i].data); 255 | word[blocks.length()]=0; 256 | if (actualBlockCount==0) { 257 | if (unlikely(actualBackTrackCount)) for (int j=0;j 0) totalPos = maxLines; 264 | long int progressNext = printProgress?totalPos / 10:totalPos; 265 | 266 | if (!randomized) { 267 | long int r = 0; 268 | if (printProgress) 269 | fprintf(stderr, " Progress: %li/%li (%i%%) %li/%li (%i%%)\n", 270 | r, totalPos, ((long int)(r)*100)/totalPos, 271 | startTotalPos, combinedTotalPos, ((int64_t)(r + startTotalPos)*100)/combinedTotalPos); 272 | while (true) { 273 | int i=0; 274 | for (;i=progressNext)) { 286 | if (printProgress) { 287 | fprintf(stderr, " Progress: %li/%li (%i%%) %li/%li (%i%%)\n", 288 | r, totalPos, ((long int)(r)*100)/totalPos, 289 | r + startTotalPos, combinedTotalPos, ((int64_t)(r + startTotalPos)*100)/combinedTotalPos); 290 | progressNext = qMin(totalPos, progressNext + totalPos/10); 291 | } 292 | if (maxLines > 0 && r>=maxLines) break; 293 | } 294 | } else { 295 | // printf("%s\n", word); 296 | break; 297 | } 298 | vars[0].choosen+=1; 299 | word[vars[0].pos] = vars[0].data[vars[0].choosen]; 300 | } 301 | } else { 302 | if (maxLines <= 0) maxLines = totalPos; 303 | for (int r=0;r 0); 370 | CharBlock cb; 371 | // qDebug("%s", qPrintable(range)); 372 | if (range.length()==1) { 373 | cb.type = CharBlock::CBT_FIXED; 374 | cb.slowData = range.toLatin1(); 375 | } else { 376 | cb.type = CharBlock::CBT_CHOOSE; 377 | for (int i=(range.startsWith('^')?1:0);i= 'a' && c <= 'z' && !cb.slowData.contains(c-'a'+'A')) cb.slowData += c - 'a'+'A'; 402 | else if (c >= 'A' && c <= 'Z' && !cb.slowData.contains(c-'A'+'a')) cb.slowData += c - 'A'+'a'; 403 | } 404 | if (cb.slowData.size()>1) 405 | cb.type = CharBlock::CBT_CHOOSE; 406 | } 407 | return cb; 408 | } 409 | 410 | 411 | 412 | //add all strings from lists[-1] to list[-2], repeated between minRep and maxRep times; reduces list.size by 2 and increase by maxRep-minRep+1 413 | void multiplyLists(QList& lists, int minRep = 1, int maxRep = 1){ 414 | if (minRep == 0 && maxRep == 0 || lists.size() == 1) return; 415 | 416 | const BlockStringList& repeat = lists.takeLast(); 417 | BlockStringList old; 418 | old = lists.takeLast(); 419 | BlockStringList nev; 420 | BlockStringList result; 421 | if (minRep == 0) result << old; 422 | for (int r = 1; r <= maxRep;r++) { 423 | nev.clear(); 424 | for (int i=0;i= minRep) 428 | result << nev; 429 | old = nev; 430 | } 431 | lists.append(result); 432 | } 433 | 434 | void concatLists(QList& lists, bool merged) { 435 | if (!merged) multiplyLists(lists); 436 | BlockStringList enums = lists.takeLast(); 437 | lists.last().append(enums); 438 | } 439 | 440 | /* 441 | Regex-Match Generator 442 | 443 | It processes a given regex in three steps: 444 | 1. Expand all ? {} + * | operators 445 | This create a few new regex each matching strings of fixed length 446 | 2. Expand all backtracking operators 447 | Afterwards each character in the string is either a link to another 448 | or a character set 449 | 3. Expand all character sets 450 | 451 | */ 452 | 453 | int main(int argc, char* argv[]) 454 | { 455 | //============Parameters (above are some more)=========== 456 | int INFINITY_PLUS = 5; 457 | int INFINITY_STAR = 5; 458 | 459 | bool expandOnly = false; 460 | int maxExpandLines = -1, maxLength = 0; 461 | bool truncateLonger = false; 462 | bool chooseRandomized = false; 463 | 464 | //character classes 465 | characterClasses.insert('d', "0-9"); 466 | characterClasses.insert('w', "0-9a-zA-Z_"); 467 | characterClasses.insert('s', " \t\n\r"); 468 | //escapes 469 | characterClasses.insert('[', "["); 470 | characterClasses.insert(']', "]"); 471 | characterClasses.insert('^', "^"); 472 | characterClasses.insert('\\', "\\"); 473 | 474 | QMap escapes = characterClasses; 475 | characterClasses.insert('-', "-"); 476 | //escapes 477 | escapes.insert('{', "{"); 478 | escapes.insert('}', "}"); 479 | escapes.insert('(', "("); 480 | escapes.insert(')', ")"); 481 | 482 | escapes.insert('+', "+"); 483 | escapes.insert('*', "*"); 484 | escapes.insert('?', "?"); 485 | escapes.insert('.', "."); 486 | escapes.insert('$', "$"); 487 | escapes.insert('^', "^"); 488 | escapes.insert('.', "."); 489 | escapes.insert('|', "|"); 490 | escapes.insert('n', "\n"); 491 | escapes.insert('r', "\r"); 492 | escapes.insert('t', "\t"); 493 | escapes.insert('a', "\x07"); 494 | escapes.insert('e', "\x1B"); 495 | escapes.insert('f', "\x0C"); 496 | escapes.insert('v', "\x0B"); 497 | 498 | escapes.insert('b', ""); //word sepator (ignored) 499 | escapes.insert('B', ""); //word sepator (ignored) 500 | escapes.insert('A', ""); //marker (ignored) 501 | escapes.insert('Z', ""); //marker (ignored) 502 | escapes.insert('z', ""); //marker f(ignored) 503 | 504 | for (int i = 1; i%s<ü\n",cur.toUtf8().data()); 579 | //printf("%i:",cur.length()); 580 | //for (int i=0;i nestedBrackets; 587 | loopCount++; 588 | if (printProgress) fprintf(stderr, "Processing regex %i: %s\n", loopCount, qPrintable(cur)); 589 | //Stack of (block-)stringlists 590 | //Each stringlist contains all possible block match for a subterm of the regex (brackets or enums) 591 | //Everything except charsets is expanded 592 | //Lists can be added => Append all strings of list[-2] to list[-1] 593 | //And multiplied => Replace list[-2] by all concatenated pairs of list[-1] and list[-2] 594 | //Only | causes addition, everything else causes multiplication 595 | //The stack will look like this: (+list) (*list) (+list) (*list) ... (+list) (*list) (temporary list) 596 | QList< BlockStringList > lists; 597 | lists << (BlockStringList()) << (BlockStringList() << BlockString()); 598 | 599 | for (int i=0;i= '0' && cur[i] <= '9'){ 623 | int bt = cur[i].toLatin1() - '0'; 624 | if (i+1 < cur.length() && cur[i+1] >= '0' && cur[i+1] <= '9'){ 625 | bt*=10; 626 | bt += cur[i+1].toLatin1() - '0'; 627 | i++; 628 | } 629 | CharBlock cb; 630 | cb.type = CharBlock::CBT_BACKTRACK; 631 | cb.backtrack = bt; 632 | lists.append(BlockStringList() << (BlockString() << cb)); 633 | merged = false; 634 | break; 635 | } else if (cur[i] == 'Q') { // \Q ... \E literal quotation 636 | BlockString temp; 637 | for (i++; cur[i] != '\\' || cur[i+1] != 'E'; i++) 638 | temp << createBlock(""+cur[i], caseInsensitive); 639 | i++; 640 | lists.append(BlockStringList() << temp); 641 | merged=false; 642 | break; 643 | } else { 644 | Q_ASSERT(escapes.contains(cur[i])); 645 | sub = escapes.value(cur[i]); 646 | if (sub.isEmpty()) 647 | break; 648 | } 649 | lists.append(BlockStringList() << (BlockString() << createBlock(sub,caseInsensitive))); 650 | merged = false; 651 | break; 652 | } 653 | case '.': 654 | if (!merged) multiplyLists(lists); 655 | lists.append(BlockStringList() << (BlockString() << createBlock(ALL_CHARACTERS, false))); 656 | merged = false; 657 | break; 658 | case '?': 659 | if (merged) break; //ignore lazy 660 | //Q_ASSERT(!merged); //don't be lazy 661 | multiplyLists(lists, 0, 1); 662 | merged = true; 663 | break; 664 | case '{': { 665 | int n = cur.indexOf('}',i); 666 | QString sub = cur.mid(i+1,n-1-i); 667 | i = n; 668 | 669 | QStringList temp = sub.split(','); 670 | Q_ASSERT(temp.size()==2 || temp.size()==1); 671 | multiplyLists(lists, temp.first().toInt(), temp.last().isEmpty()?INFINITY_STAR:temp.last().toInt()); 672 | merged = true; 673 | break; 674 | } 675 | case '+': 676 | Q_ASSERT(!merged); 677 | multiplyLists(lists, 1, INFINITY_PLUS); 678 | merged = true; 679 | break; 680 | case '*': 681 | Q_ASSERT(!merged); 682 | multiplyLists(lists, 0, INFINITY_STAR); 683 | merged = true; 684 | break; 685 | case '(': if (cur[i+1] == '?') { 686 | Q_ASSERT(i + 3< cur.length()); 687 | //special command 688 | i+=2; 689 | if (cur[i] == '#') ; //comment 690 | else if (cur[i] == 'i') caseInsensitive = true; 691 | else if (cur[i] == '-' && cur[i+1] == 'i') caseInsensitive = false; 692 | else throw "Unknown special (? bracket command"; 693 | while (cur[i]!=')') i++; 694 | } else { 695 | if (!merged) multiplyLists(lists); 696 | nestingLevel++; 697 | CharBlock cb; 698 | cb.type = CharBlock::CBT_BACKTRACK_START; 699 | cb.backtrack = nestingLevel; 700 | lists.append(BlockStringList()); 701 | lists.append(BlockStringList() << (BlockString() << cb)); 702 | nestedBrackets << nestingLevel; 703 | merged = true; 704 | } 705 | break; 706 | case ')': { 707 | if (!merged) multiplyLists(lists); 708 | concatLists(lists,true); 709 | CharBlock cb; 710 | cb.type = CharBlock::CBT_BACKTRACK_END; 711 | cb.backtrack = nestedBrackets.takeLast(); 712 | for (int i=0;i rbs; 742 | int64_t totalPos = 0, curPos = 0; 743 | for (int i=0;i 0 && bs.size() > maxLength) { 746 | if (!truncateLonger) continue; 747 | else bs.erase(bs.begin() + maxLength, bs.end()); 748 | } 749 | rbs << bs; 750 | totalPos += countPossibilities(bs); 751 | } 752 | for (int i=0;i