├── .cargo
    └── config
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── Makefile
├── README.md
├── brotlizpaq
├── build.rs
├── src
    ├── ast.rs
    ├── documentation.rs
    ├── gen_ir.rs
    ├── gen_zpaql.rs
    ├── grammar.lalrpop
    ├── ir.rs
    ├── main.rs
    ├── options.rs
    ├── post_ir.rs
    ├── post_zpaql.rs
    ├── rtok.rs
    ├── template.rs
    ├── tok.rs
    ├── zpaqcfg.rs
    ├── zpaql.rs
    └── zpaqlvm.rs
└── test
    ├── arrays.cfg
    ├── arrays.py
    ├── bmp_j4.cfg
    ├── bro
    ├── bro.sourcelink
    ├── brotli-dict
    ├── brotli-helper
    ├── brotli.cfg
    ├── brotli.ir
    ├── brotli.py
    ├── brotlimfast.cfg
    ├── brotlinohcomp.cfg
    ├── colorpre.cfg
    ├── kodim23.bmp
    ├── kodim23.flif
    ├── kodim23.png
    ├── kodim23.pnm
    ├── kodim23.sub.bmp
    ├── kodim23.sub.pnm
    ├── kodim23.webp
    ├── lz1.cfg
    ├── lz1.ir
    ├── lz1.orig.cfg
    ├── lz1.py
    ├── lzpre
    ├── lzpre.cpp
    ├── mfast.cfg
    ├── min.pnm
    ├── mixedpi2.cfg
    ├── monarch.bmp
    ├── monarch.flif
    ├── monarch.png
    ├── monarch.pnm
    ├── monarch.webp
    ├── peppers.bmp
    ├── peppers.flif
    ├── peppers.png
    ├── peppers.pnm
    ├── peppers.webp
    ├── pi10k.cfg
    ├── pnm.cfg
    ├── pnm.ir
    ├── pnm.py
    ├── pnmstress.cfg
    ├── rafale.bmp
    ├── rafale.flif
    ├── rafale.png
    ├── rafale.pnm
    ├── rafale.webp
    ├── rle
    ├── rle_cm.cfg
    ├── rle_cm.manual.cfg
    ├── rle_cm.py
    ├── rle_model.cfg
    ├── rle_model.ir
    ├── rle_model.py
    ├── simple_rle
    ├── stress.cfg
    ├── stress.py
    ├── subtract_green
    └── testcase


/.cargo/config:
--------------------------------------------------------------------------------
1 | [target.x86_64-pc-windows-gnu]
2 | linker = "x86_64-w64-mingw32-gcc"
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | target
 2 | zpaq
 3 | zpaqd
 4 | zpaqlpy
 5 | zpaqlpydebug
 6 | zpaqlpy-i686.exe
 7 | zpaqlpy.exe
 8 | zpaqlpy-cross.exe
 9 | src/grammar.rs
10 | *.py~
11 | *.py.swp
12 | *.rs~
13 | *.rs.swp
14 | 
15 | test/min.out
16 | test/min.sub.pnm
17 | test/splash.pypredict
18 | test/splash.zpaqlpredict
19 | test/testar.zpaq
20 | test/testcase.origpy
21 | test/testcase.origz
22 | test/testcase.out
23 | test/testcase.predictpy
24 | test/testcase.predictz
25 | test/testcase.simple
26 | test/testcase.xy
27 | test/xy.gen
28 | test/xy.pnm
29 | test/xy.prydict
30 | test/empty.zpaq
31 | test/dict_is_present.tmp
32 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "zpaqlpy"
 3 | version = "0.1.0"
 4 | authors = ["Kai Lüke <kailueke@riseup.net>"]
 5 | build = "build.rs" # LALRPOP preprocessing
 6 | license = "GPL-3.0"
 7 | description = "zpaqlpy compiler"
 8 | readme = "README.md"
 9 | 
10 | [dependencies]
11 | clap = "2.21.2"
12 | flexi_logger = "0.5.2"
13 | log = "*"
14 | regex = "0.2.1"
15 | 
16 | lalrpop-util = "0.15.2"
17 | 
18 | [build-dependencies.lalrpop]
19 | version = "0.15.2"
20 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | zpaqlpydebug: target/debug/zpaqlpy
 2 | 	cp target/debug/zpaqlpy zpaqlpydebug
 3 | 
 4 | zpaqlpy: target/release/zpaqlpy
 5 | 	cp target/release/zpaqlpy zpaqlpy
 6 | 
 7 | target/debug/zpaqlpy:
 8 | 	# warning: if lalrpop binary is in PATH, it will be used instead of the library call
 9 | 	LALRPOP_LANE_TABLE=enabled cargo +stable build  # or: cargo incremental build # new: CARGO_INCREMENTAL=1 cargo build
10 | 
11 | target/release/zpaqlpy:
12 | 	# warning: if lalrpop binary is in PATH, it will be used instead of the library call
13 | 	LALRPOP_LANE_TABLE=enabled cargo +stable build --release
14 | 
15 | cargotest:
16 | 	cargo +stable check
17 | 
18 | cargolint:
19 | 	cargo +nightly clippy
20 | 
21 | clean:
22 | 	rm target/debug/zpaqlpy target/release/zpaqlpy
23 | 
24 | check:
25 | 	# expects zpaqd to be in the top folder
26 | 	test/stress.py hcomp test/testcase test/splash.pypredict
27 | 	./zpaqlpydebug --run-hcomp test/testcase test/stress.py > test/splash.zpaqlpredict
28 | 	cmp test/splash.pypredict test/splash.zpaqlpredict
29 | 	./zpaqd c test/stress.cfg test/testar.zpaq test/min.pnm
30 | 	./zpaqd d test/testar.zpaq test/xy.pnm
31 | 	cmp test/min.pnm test/xy.pnm
32 | 	test/rle c test/testcase test/testcase.out
33 | 	test/rle_cm.py hcomp test/testcase.out test/xy.prydict  # not compared to r rle_cm.manual h min.out xy.manual yet
34 | 	./zpaqlpydebug --run-hcomp test/testcase.out test/rle_cm.py > test/xy.gen
35 | 	cmp test/xy.gen test/xy.prydict
36 | 	cd test && ../zpaqd c rle_cm.cfg testar.zpaq testcase && cd ..
37 | 	./zpaqd d test/testar.zpaq test/testcase.xy
38 | 	cmp test/testcase test/testcase.xy
39 | 	test/rle c test/min.pnm test/min.out
40 | 	./zpaqd r test/rle_cm.manual.cfg p test/min.out test/xy.pnm
41 | 	cmp test/min.pnm test/xy.pnm
42 | 	./zpaqlpydebug test/lz1.py
43 | 	cd test && ../zpaqd c lz1.cfg testar.zpaq rafale.pnm peppers.pnm monarch.pnm kodim23.pnm && cd ..
44 | 	test/arrays.py pcomp test/lz1.py
45 | 	./zpaqlpydebug test/arrays.py
46 | 	./zpaqd r test/arrays.cfg p test/lz1.py
47 | 	./zpaqd r test/arrays.cfg h test/lz1.py > /dev/null
48 | 	test/simple_rle test/testcase test/testcase.simple
49 | 	test/rle_model.py hcomp test/testcase.simple test/testcase.predictpy
50 | 	./zpaqlpydebug --run-hcomp test/testcase.simple test/rle_model.py > test/testcase.predictz
51 | 	cmp test/testcase.predictz test/testcase.predictpy
52 | 	echo | test/rle_model.py --compare test/testcase pcomp test/testcase.simple test/testcase.origpy
53 | 	./zpaqd r test/rle_model.cfg p test/testcase.simple test/testcase.origz
54 | 	cmp test/testcase.origpy test/testcase.origz
55 | 	./zpaqlpydebug test/pnm.py
56 | 	test/subtract_green test/min.pnm test/min.sub.pnm
57 | 	echo | test/pnm.py --compare test/min.pnm pcomp test/min.sub.pnm /dev/null
58 | 
59 | benchmark:
60 | 	RUST_BACKTRACE=1 ./zpaqlpydebug test/pnm.py
61 | 	cd test && ../zpaqd c pnm.cfg testar.zpaq rafale.pnm peppers.pnm monarch.pnm kodim23.pnm && cd .. && ls -l test/testar.zpaq
62 | 
63 | brotlitest:
64 | 	RUST_BACKTRACE=1 ./zpaqlpydebug test/brotli.py
65 | 	./brotlizpaq c test/testar.zpaq test/testcase test/rafale.pnm test/peppers.pnm test/monarch.pnm test/kodim23.pnm && ls -l test/testar.zpaq
66 | 	# run on input: ../zpaqd r brotli.cfg p pre.dict.br pre.out
67 | 	# debug: ../zpaqd t brotli.cfg p `od -A none -t x1 -v pre.dict.br | tr -d '\n'`
68 | 	# use: ./zpaq x test/testar.zpaq -to various
69 | 	# rm dict_is_present.tmp ; ../zpaqd c brotli.cfg empty.zpaq /dev/null ; ls -l empty.zpaq
70 | 	# rm dict_is_present.tmp ; ../zpaqd c brotli.cfg testar.zpaq rafale.pnm peppers.pnm monarch.pnm kodim23.pnm ; ls -l testar.zpaq
71 | 
72 | otherbenchmarks:
73 | 	cd test && ../zpaqd c bmp_j4.cfg testar.zpaq rafale.bmp peppers.bmp monarch.bmp kodim23.bmp && cd .. && ls -l test/testar.zpaq
74 | 	cd test && ../zpaqd c pnm.cfg empty.zpaq /dev/null && cd .. && ls -l test/empty.zpaq
75 | 	cd test && ../zpaqd c bmp_j4.cfg empty.zpaq /dev/null && cd .. && ls -l test/empty.zpaq
76 | 	cd test && time ../zpaqd c lz1.orig.cfg testar.zpaq rafale.pnm peppers.pnm monarch.pnm kodim23.pnm && cd .. && ls -l test/testar.zpaq
77 | 	cd test && time ../zpaqd c lz1.cfg testar.zpaq rafale.pnm peppers.pnm monarch.pnm kodim23.pnm && cd .. && ls -l test/testar.zpaq
78 | 	cd test && ../zpaqd c lz1.cfg empty.zpaq /dev/null && cd .. && ls -l test/empty.zpaq
79 | 	cd test && ../zpaqd c lz1.orig.cfg empty.zpaq /dev/null && cd .. && ls -l test/empty.zpaq
80 | 	du -cb test/*webp
81 | 	du -cb test/*flif
82 | 	du -cb test/*png
83 | 


--------------------------------------------------------------------------------
/brotlizpaq:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # usage: brotlizpaq a|c archive.zpaq INPUTFILES…
 3 | # a: append to existing archive, c: create (invokation of zpaqd)
 4 | # creates a valid zpaq archive and stores the files compressed with brotli
 5 | # needs path to brotli.cfg, brotli-dict, zpaqd
 6 | # and bro (https://github.com/google/brotli with usage as bro --quality 9 --input IN --output OUT.br)
 7 | 
 8 | # needs pcomp line as "pcomp ./brotli-helper ;" (copied from git or compiled from zpaqlpy test/brotli.py)
 9 | BROTLICFG="test/brotli.cfg"
10 | ZPAQD="./zpaqd"
11 | BRO="test/bro"
12 | BROTLIDICT="test/brotli-dict"
13 | 
14 | set -euo pipefail
15 | # -euxo pipefail to debug
16 | 
17 | if [ -z "$1" ] || [ "$1" == "-h" ] || [ "$1" == "--help" ]
18 |   then
19 |   echo "usage: $0 a|c archive.zpaq INPUTFILES…"
20 |   exit
21 | fi
22 | 
23 | # temp files must not contain a . (so invoke tmpfile instead of mktemp)
24 | TMPCFG=`tempfile`.cfg
25 | TMPHELPER=`tempfile`
26 | TMPLOCK=`tempfile`
27 | cat "$BROTLICFG" |  sed "s#pcomp ./brotli-helper ;#pcomp $TMPHELPER ;#g" > "$TMPCFG"
28 | printf '#!/bin/sh\nTMPBR=`tempfile`.br\n' > "$TMPHELPER"
29 | printf "\"$BRO\"" >> "$TMPHELPER"
30 | echo ' --quality 9 --input "$1" --output "$TMPBR"' >> "$TMPHELPER"
31 | printf "if [ -e \"$TMPLOCK\" ]\n" >> "$TMPHELPER"
32 | printf 'then\n  printf "x" > "$2"  # no dict\n  cat "$TMPBR" >> "$2"\nelse\n' >> "$TMPHELPER"
33 | printf "  cat \"$BROTLIDICT\" " >> "$TMPHELPER"
34 | printf '"$TMPBR" > "$2"\n  touch ' >> "$TMPHELPER"
35 | echo "\"$TMPLOCK\"" >> "$TMPHELPER"
36 | printf 'fi\nrm "$TMPBR"' >> "$TMPHELPER"
37 | 
38 | chmod +x "$TMPHELPER"
39 | 
40 | rm "$TMPLOCK"
41 | "$ZPAQD" "$1" "$TMPCFG" "$2" "${@:3}"
42 | rm "$TMPHELPER" "$TMPCFG" "$TMPLOCK"
43 | 


--------------------------------------------------------------------------------
/build.rs:
--------------------------------------------------------------------------------
 1 | extern crate lalrpop;
 2 | use std::process::Command;
 3 | 
 4 | fn main() {
 5 |     let s = Command::new("lalrpop").arg("src/grammar.lalrpop").status();
 6 |     if s.is_err() {
 7 |         lalrpop::process_root().unwrap();
 8 |     } else if !s.unwrap().success() {
 9 |         println!("external lalrpop failed");
10 |         lalrpop::process_root().unwrap();
11 |     }
12 | }
13 | 


--------------------------------------------------------------------------------
/src/ast.rs:
--------------------------------------------------------------------------------
  1 | #[allow(dead_code)]
  2 | use std::fmt::{Display, Formatter, Error};
  3 | 
  4 | // https://docs.python.org/3.5/library/ast.html
  5 | 
  6 | pub type Identifier = String;
  7 | 
  8 | #[allow(dead_code)]
  9 | #[derive(Debug, Clone)]
 10 | pub enum Stmt {  // boxed as we don't know the size of the trees at compile time
 11 |     // simplification from "args: Box<Arguments>"
 12 |     FunctionDef{name: Identifier, args: Vec<String>, body: Vec<Stmt>, decorator_list: Vec<Expr>,
 13 |                 returns: Option<Expr>, location: String},
 14 |     Return{value: Option<Expr>, location: String},
 15 |     // simplification from "targets: Vec<Expr>", so no unpacking assingments possible a, b = 1, 2
 16 |     Assign{target: Box<Expr>, value: Box<Expr>, location: String},
 17 |     AugAssign{target: Box<Expr>, op: Operator, value: Box<Expr>, location: String},
 18 |     // for-loops are not supported as they need iterators For(target: Box<Expr>, iter: Box<Expr>, body: Vec<Stmt>, orelse: Vec<Stmt>, location: String),
 19 |     While{test: Box<Expr>, body: Vec<Stmt>, orelse: Vec<Stmt>, location: String},
 20 |     If{test: Box<Expr>, body: Vec<Stmt>, orelse: Vec<Stmt>, location: String},
 21 |     // with-blocks are not needed as e.g. opening files is impossible: With(withitem* items, stmt* body)
 22 |     // instead of raise there is a custom error()-function: Raise(expr? exc, expr? cause)
 23 |     // try-catch is impossible: Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody)
 24 |     // asserts would need to raise exceptions: Assert(expr test, expr? msg)
 25 |     Global{names: Vec<Identifier>, location: String},
 26 |     Nonlocal{names: Vec<Identifier>, location: String}, // also not supported but parsed for a better error message about scopes
 27 |     Expr{value: Box<Expr>, location: String},
 28 |     Pass{location: String}, Break{location: String}, Continue{location: String},
 29 | }
 30 | 
 31 | impl Stmt {
 32 |     pub fn location(&self) -> String {
 33 |         match *self {
 34 |             Stmt::FunctionDef{name: _, args: _, body: _, decorator_list: _, returns: _, ref location} => location.clone(),
 35 |             Stmt::Return{value: _, ref location} => location.clone(),
 36 |             Stmt::Assign{target: _, value: _, ref location} => location.clone(),
 37 |             Stmt::AugAssign{target: _, op: _, value: _, ref location} => location.clone(),
 38 |             Stmt::While{test: _, body: _, orelse: _, ref location} => location.clone(),
 39 |             Stmt::If{test: _, body: _, orelse: _, ref location} => location.clone(),
 40 |             Stmt::Global{names: _, ref location} => location.clone(),
 41 |             Stmt::Nonlocal{names: _, ref location} => location.clone(),
 42 |             Stmt::Expr{value: _, ref location} => location.clone(),
 43 |             Stmt::Pass{ref location} => location.clone(),
 44 |             Stmt::Break{ref location} => location.clone(),
 45 |             Stmt::Continue{ref location} => location.clone(),
 46 |         }
 47 |     }
 48 | }
 49 | 
 50 | 
 51 | #[allow(dead_code)]
 52 | #[derive(Debug, Clone)]
 53 | pub enum Expr {
 54 |     BoolOpE{op: BoolOp, values: Vec<Expr>, location: String},
 55 |     BinOp{left: Box<Expr>, op: Operator, right: Box<Expr>, location: String},
 56 |     UnaryOpE{op: UnaryOp, operand: Box<Expr>, location: String},
 57 |     // lambda is not possible because of non-local scopes: Lambda(arguments args, expr body)
 58 |     // @TODO: IfExp(test: Box<Expr>, body: Box<Expr>, orelse: Box<Expr>),
 59 |     Dict{keys: Vec<Expr>, values: Vec<Expr>, location: String},  // not supported but needed for parsing in comp section
 60 |     // simplification: Set(expr* elts)
 61 |     // simplification, lists are not supported except for H and M,
 62 |     //   but there are helper functions for arrays on M and H: ListComp(expr elt, comprehension* generators)
 63 |     Compare{left: Box<Expr>, ops: Vec<CmpOp>, comparators: Vec<Expr>, location: String},
 64 |     Call{func: Identifier, args: Vec<Expr>, keywords: Vec<Keyword>, location: String}, // simplification from func: Box<Expr>
 65 |     Str{s: String, location: String},
 66 |     Num{n: u32, location: String},  // or var?
 67 |     NameConstant{value: u32, location: String},
 68 |     Ellipsis{location: String},
 69 |     Attribute{value: Box<Expr>, attr: Identifier, ctx: ExprContext, location: String},
 70 |     Subscript{value: Box<Expr>, slice: Box<Slice>, ctx: ExprContext, location: String},
 71 |     Starred{value: Box<Expr>, ctx: ExprContext, location: String},
 72 |     Name{id: Identifier, ctx: ExprContext, location: String},
 73 |     List{elts: Vec<Expr>, ctx: ExprContext, location: String},
 74 |     Tuple{elts: Vec<Expr>, ctx: ExprContext, location: String},
 75 | }
 76 | 
 77 | 
 78 | 
 79 | impl Expr {
 80 |     pub fn location(&self) -> String {
 81 |         match *self {
 82 |             Expr::BoolOpE{op: _, values: _,  ref location} => location.clone(),
 83 |             Expr::BinOp{left: _, op: _, right: _, ref location} => location.clone(),
 84 |             Expr::UnaryOpE{op: _, operand:_, ref location} => location.clone(),
 85 |             Expr::Dict{keys: _, values: _,  ref location} => location.clone(),
 86 |             Expr::Compare{left: _, ops:_, comparators: _, ref location} => location.clone(),
 87 |             Expr::Call{func: _, args: _, keywords: _, ref location} => location.clone(),
 88 |             Expr::Str{s: _,  ref location} => location.clone(),
 89 |             Expr::Num{n: _, ref location} => location.clone(),
 90 |             Expr::NameConstant{value: _, ref location} => location.clone(),
 91 |             Expr::Ellipsis{ref location} => location.clone(),
 92 |             Expr::Attribute{value: _, attr: _, ctx: _, ref location} => location.clone(),
 93 |             Expr::Subscript{value: _, slice: _, ctx: _, ref location} => location.clone(),
 94 |             Expr::Starred{value: _, ctx: _, ref location} => location.clone(),
 95 |             Expr::Name{id: _, ctx: _, ref location} => location.clone(),
 96 |             Expr::List{elts: _, ctx: _, ref location} => location.clone(),
 97 |             Expr::Tuple{elts: _, ctx: _, ref location} => location.clone(),
 98 |         }
 99 |     }
100 | }
101 | 
102 | #[allow(dead_code)]
103 | #[derive(Copy, Clone, Debug)]
104 | pub enum ExprContext { Load, Store, Del, AugLoad, AugStore, Param } // only Load and Store are used
105 | 
106 | #[allow(dead_code)]
107 | #[derive(Debug, Clone)]
108 | pub enum Slice {
109 |     Slice{lower: Option<Expr>, upper: Option<Expr>, step: Option<Expr>}, // not supported
110 |     ExtSlice{dims: Vec<Slice>}, // not supported
111 |     Index{value: Box<Expr>},
112 | }
113 | #[derive(Copy, Clone, Debug)]
114 | pub enum BoolOp {
115 |     And, Or,
116 | }
117 | #[derive(Copy, Clone, Debug)]
118 | pub enum Operator {
119 |     Add, Sub, Mult, MatMult, Div, Mod, Pow, LShift, RShift, BitOr, BitXor, BitAnd, FloorDiv,
120 | }
121 | #[derive(Copy, Clone, Debug)]
122 | pub enum UnaryOp {
123 |     Invert, Not, UAdd, USub,
124 | }
125 | #[derive(Copy, Clone, Debug)]
126 | pub enum CmpOp {
127 |     Eq, NotEq, Lt, LtE, Gt, GtE, Is, IsNot, In, NotIn, // In and NotIn are not supported
128 | }
129 | #[derive(Debug, Clone)]
130 | pub struct Comprehension{target: Box<Expr>, iter: Box<Expr>, ifs: Vec<Expr>}
131 | #[derive(Debug, Clone)]
132 | pub struct ExceptHandler{etype: Option<Expr>, name: Option<Identifier>, body: Vec<Stmt>, location: String}
133 | #[derive(Debug, Clone)]
134 | pub struct Arguments{args: Vec<Arg>, vararg: Option<Arg>, kwonlyargs: Vec<Arg>, kw_defaults: Vec<Expr>, kwarg: Option<Arg>, defaults: Vec<Expr>}
135 | #[derive(Debug, Clone)]
136 | pub struct Arg{arg: Identifier, annotation: Option<Expr>, location: String}
137 | #[derive(Debug, Clone)]
138 | pub struct Keyword{arg: Option<Identifier>, value: Box<Expr>}
139 | 
140 | 
141 | // provides nicer printing of AST than just debug-print
142 | impl Display for Stmt {
143 |     fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
144 |         use self::Stmt::*;
145 |         match *self {
146 |             FunctionDef{ref name, ref args, ref body, ref decorator_list, ref returns, ref location} => {
147 |                 let body_block = body.iter().map(|st| format!("  {}", st).replace("\n", "\n  ")).collect::<Vec<String>>()[..].join(",\n");
148 |                 write!(fmt, "FunctionDef ({}, {:?}, [\n{}], {:?}, {:?} {})", name, args, body_block, decorator_list, returns, location)
149 |                 },
150 |             Return{ref value, ref location} => {
151 |                 let expr = match *value { None => "".to_string(), Some(ref r) => format!("  {}", r).replace("\n", "\n  ")};
152 |                 write!(fmt, "Return ({}, {})", expr, location)
153 |                 },
154 |             Assign{ref target, ref value, ref location} => {
155 |                 write!(fmt, "Assign ({} =, {}, {})", format!("{}", target).replace("\n", "\n  "), format!("{}", value).replace("\n", "\n  "), location)
156 |                 },
157 |             AugAssign{ref target, op, ref value, ref location} => {
158 |                 write!(fmt, "AugAssign ({} {:?}, {}, {})", format!("{}", target).replace("\n", "\n  "), op, format!("{}", value).replace("\n", "\n  "), location)
159 |                 },
160 |             While{ref test, ref body, ref orelse, ref location} => {
161 |                 let body_block = body.iter().map(|st| format!("  {}", st).replace("\n", "\n  ")).collect::<Vec<String>>()[..].join(",\n");
162 |                 let else_block = orelse.iter().map(|st| format!("  {}", st).replace("\n", "\n  ")).collect::<Vec<String>>()[..].join(",\n");
163 |                 write!(fmt, "While ({}:, [\n{}], [\n{}], {})", format!("{}", test).replace("\n", "\n  "), body_block, else_block, location)
164 |                 },
165 |             If{ref test, ref body, ref orelse, ref location} => {
166 |                 let body_block = body.iter().map(|st| format!("  {}", st).replace("\n", "\n  ")).collect::<Vec<String>>()[..].join(",\n");
167 |                 let else_block = orelse.iter().map(|st| format!("  {}", st).replace("\n", "\n  ")).collect::<Vec<String>>()[..].join(",\n");
168 |                 write!(fmt, "If ({}:, [\n{}], [\n{}], {})", format!("{}", test).replace("\n", "\n  "), body_block, else_block, location)
169 |                 },
170 |             Global{ref names, ref location} => write!(fmt, "Global ({:?}, {})", names, location),
171 |             Nonlocal{ref names, ref location} => write!(fmt, "Nonlocal ({:?}, {})", names, location),
172 |             Pass{ref location} => write!(fmt, "Pass {})", location),
173 |             Break{ref location} => write!(fmt, "Break {})", location),
174 |             Continue{ref location} => write!(fmt, "Continue {})", location),
175 |             Expr{ref value, ref location} => write!(fmt, "Expr (\n {}, {})", format!("{}", value).replace("\n", "\n  "), location),
176 |         }
177 |     }
178 | }
179 | 
180 | impl Display for Expr {
181 |     fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
182 |         use self::Expr::*;
183 |         match *self {
184 |             BoolOpE{op, ref values, ref location} => write!(fmt, "BoolOpE (\n{},\n{:?}, \n{}, {})", format!("  {}", values[0]).replace("\n", "\n  "), op, format!("  {}", values[1]).replace("\n", "\n  "), location),
185 |             BinOp{ref left, op, ref right, ref location} => write!(fmt, "BinOp (\n{},\n{:?}, \n{}, {})", format!("  {}", left).replace("\n", "\n  "), op, format!("  {}", right).replace("\n", "\n  "), location),
186 |             UnaryOpE{op, ref operand, ref location} => write!(fmt, "UnaryOpE ({:?}, \n{}, {})", op, format!("  {}", operand).replace("\n", "\n  "), location),
187 |             Dict{ref keys, ref values, ref location} => write!(fmt, "Dict ({:?}, {:?}, {})", keys, values, location),
188 |             Compare{ref left, ref ops, ref comparators, ref location} => {
189 |                 let comparators_e = comparators.iter().map(|st| format!("  {}", st).replace("\n", "\n  ")).collect::<Vec<String>>()[..].join(",\n");
190 |                 write!(fmt, "Compare ({}, {:?}, [\n{}], {})", format!("{}", left).replace("\n", "\n  "), ops, comparators_e, location)
191 |                 },
192 |             Call{ref func, ref args, ref keywords, ref location} => {
193 |                 let args_e = args.iter().map(|st| format!("  {}", st).replace("\n", "\n  ")).collect::<Vec<String>>()[..].join(",\n");
194 |                 write!(fmt, "Call ({}, [\n{}], {:?}, {})", func, args_e, keywords, location)
195 |                 },
196 |             Str{ref s, ref location} => write!(fmt, "String ({}, {})", s, location),
197 |             Num{n, ref location} => write!(fmt, "Num ({}, {})", n, location),
198 |             NameConstant{value, ref location} => write!(fmt, "NameConstant ({}, {})", value, location),
199 |             Ellipsis{ref location} => write!(fmt, "Ellipsis ({})", location),
200 |             Name{ref id, ctx, ref location} => write!(fmt, "Name ({}, {:?}, {})", id, ctx, location),
201 |             Subscript{ref value, ref slice, ctx, ref location} => write!(fmt, "Subscript ({}, {:?}, {:?}, {})", format!("{}", value).replace("\n", "\n  "), slice, ctx, location),
202 |             _ => write!(fmt, "DISPLAY NOT IMPLEMENTED"),
203 |         }
204 |     }
205 | }
206 | 
207 | 
208 | 


--------------------------------------------------------------------------------
/src/ir.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt::{Display, Formatter, Error};
  2 | 
  3 | #[derive(Debug, Clone)]
  4 | pub enum IR {
  5 |     Assign{target: IRVar, source: IRVar}, // e.g. M[t7] = 13
  6 |     Assign2Op{target: IRVar, val1: IRVar, op: IROp, val2: IRVar}, // e.g. t3 = 2 + H[t9]
  7 |     Assign1Op{target: IRVar, uop: IRUOp, source: IRVar}, // e.g. t2 = -t3
  8 |     GoTo{label: String},
  9 |     Label{label: String},
 10 |     Halt, // end ZPAQL execution for the current input byte
 11 |     Out{var: IRVar}, // produce pcomp output
 12 |     If{cond_var: IRVar, goto_label: String}, // cond_var is false for 0, otherwise true
 13 |     IfN{cond_var: IRVar, goto_label: String},
 14 |     IfEq{val1: IRVar, val2: IRVar, goto_label: String},
 15 |     IfNeq{val1: IRVar, val2: IRVar, goto_label: String},
 16 |     Error, // end ZPAQL execution totally through the "wrong opcode" message
 17 |     Comment{comment: String},
 18 |     // helper meta commands which will be converted to upper commands before they get to the ZPAQL backend
 19 |     Block{stmts: Vec<IR>},
 20 |     MarkTempVarStart,
 21 |     MarkTempVarEnd,
 22 |     StoreTempVars{ti: Vec<u8>, stack_pos: u32}, // save variables ti to the stack    (remember to also set st.stack_pos afterwards if needed! - not needed when only Call is comming)
 23 |     LoadTempVars{ti: Vec<u8>, stack_pos: u32}, // load ti from the stack (stack_pos+1 is position of saved t1)
 24 |     Call{label: String, args: Vec<IRVar>, stack_pos: u32, ret_id: u32},  // will overwrite t1 with return value
 25 |     Return{var: Option<IRVar>},
 26 |     JumpCode{ret_ids: Vec<u32>, stackend: u32},  // set via st.make_new_return_id(), create jumpers for return ids
 27 |     InitialCode{bsp: u32},
 28 | }
 29 | 
 30 | impl IR {
 31 |     pub fn convert(&self) -> IR { // expand meta commands
 32 |         match *self {
 33 |             IR::InitialCode{bsp} => {
 34 |                 IR::Block{stmts: vec![
 35 |                     IR::Comment{comment: "t255 holds the inital value passed into the A register, first ZPAQL instruction must thus be r=a 255".to_string()},
 36 |                     IR::IfN{cond_var: IRVar::Var{varid: 0}, goto_label: "init_code".to_string()}, // basepointer is not set yet, run init code
 37 |                     IR::If{cond_var: IRVar::Var{varid: 254}, goto_label: "cont_reading".to_string()}, // proceed with read_b function which was stopped through halt in order to have a new input byte in t255
 38 |                     IR::GoTo{label: "call_next".to_string()},
 39 |                     IR::Label{label: "init_code".to_string()},
 40 |                     IR::Assign{target: IRVar::Var{varid: 0}, source: IRVar::Number{value: bsp}}, // initialize bsp
 41 |                     IR::Assign{target: IRVar::Var{varid: 252}, source: IRVar::Var{varid: 0}},  // save globalbsp
 42 |                     IR::GoTo{label: "read_b_end~".to_string()}, // define read_b(), which does not use parameters
 43 |                     IR::Label{label: "read_b".to_string()},
 44 |                     // test if input_c (at t253) is consumed already
 45 |                     IR::Assign2Op{target: IRVar::Var{varid: 1}, val1: IRVar::Var{varid: 253}, op: IROp::Eq, val2: IRVar::Number{value: 4294967294}},
 46 |                     IR::If{cond_var: IRVar::Var{varid: 1}, goto_label: "do_read_in".to_string()},
 47 |                     // was not consumed:
 48 |                     IR::Assign{target: IRVar::Var{varid: 255}, source: IRVar::Var{varid: 253}},  // t255 = input_c
 49 |                     IR::Assign{target: IRVar::Var{varid: 253}, source: IRVar::Number{value: 4294967294}}, // input_c = -1
 50 |                     IR::GoTo{label: "cont_reading".to_string()}, // no halt needed now
 51 |                     IR::Label{label: "do_read_in".to_string()},
 52 |                     // t254 holds reading state
 53 |                     IR::Assign{target: IRVar::Var{varid: 254}, source: IRVar::Number{value: 1}},  // in reading state
 54 |                     IR::Halt, // halt to get t255 filled with new input byte, starts execution from the beginning, therefore we have to jump back to cont_reading
 55 |                     IR::Label{label: "cont_reading".to_string()},
 56 |                     IR::Assign{target: IRVar::Var{varid: 254}, source: IRVar::Number{value: 0}},  // not in reading state
 57 |                     IR::Return{var: Some(IRVar::Var{varid: 255})}.convert(),
 58 |                     IR::Label{label: "read_b_end~".to_string()}, // end of read_b() function
 59 |                 ]}
 60 |             },
 61 |             IR::JumpCode{ref ret_ids, stackend} => {
 62 |                 let mut stmts = vec![
 63 |                     IR::Halt,
 64 |                     IR::Label{label: "find_label_ret_id".to_string()},  // expects ret_id to be in t2
 65 |                 ];
 66 |                 stmts.push(IR::Assign2Op{target: IRVar::Var{varid: 4}, val1: IRVar::Var{varid: 0}, op: IROp::Gt, val2: IRVar::Number{value: stackend-200} });
 67 |                 stmts.push(IR::If{cond_var: IRVar::Var{varid: 4}, goto_label: "throw_error".to_string()});
 68 |                 for ret_id in ret_ids.iter() {
 69 |                     stmts.push(IR::IfEq{val1: IRVar::Var{varid: 2}, val2: IRVar::Number{value: *ret_id}, goto_label: format!("return_id_{}", ret_id)});
 70 |                 }
 71 |                 stmts.push(IR::Label{label: "throw_error".to_string()});
 72 |                 stmts.push(IR::Error);
 73 |                 stmts.push(IR::Halt);
 74 |                 IR::Block{stmts: stmts}
 75 |             },
 76 |             IR::Return{ref var} => {
 77 |                 let mut stmts = vec![];
 78 |                 match var {
 79 |                     &None => {},
 80 |                     // t1 = var
 81 |                     &Some(ref v) => { stmts.push(IR::Assign{target: IRVar::Var{varid: 1}, source: (*v).clone()}); },
 82 |                 }
 83 |                 // load ret_id to t2
 84 |                 stmts.push(IR::Assign{target: IRVar::Var{varid: 2},
 85 |                     source: IRVar::H{index_varid: 0, orig_name: "".to_string()}});
 86 |                 // t0--
 87 |                 stmts.push(IR::Assign2Op{target: IRVar::Var{varid: 0}, val1: IRVar::Var{varid: 0}, op: IROp::Sub, val2: IRVar::Number{value: 1}});
 88 |                 // load old_bsp from t0 to t0
 89 |                 stmts.push(IR::Assign{target: IRVar::Var{varid: 0},
 90 |                     source: IRVar::H{index_varid: 0, orig_name: "".to_string()}});
 91 |                 stmts.push(IR::GoTo{label: "find_label_ret_id".to_string()});
 92 |                 IR::Block{stmts: stmts}
 93 |             },
 94 |             IR::Call{ref label, ref args, stack_pos, ret_id} => {
 95 |                 // Calling convention:
 96 |                 // Expects t0 to be the new bsp (i.e. pointer to stack_pos+2), then with new t0: old_bsp in H[t0-1],
 97 |                 //   ret_id in H[t0], and arg1 in H[t0+1] etc.
 98 |                 // After return, the result value will be in t1 and t0 will be the restored old_bsp
 99 |                 let mut stmts = vec![
100 |                     IR::Assign{target: IRVar::Ht{stack_offset: stack_pos+1, local: true, orig_name: "".to_string()},
101 |                         source: IRVar::Var{varid: 0}}, // save old_bsp
102 |                     IR::Comment{comment: "saved bsp, return id:".to_string()},
103 |                     IR::Assign{target: IRVar::Ht{stack_offset: stack_pos+2, local: true, orig_name: "".to_string()},
104 |                         source: IRVar::Number{value: ret_id}}, // set ret_id
105 |                     IR::Comment{comment: "push arguments:".to_string()},
106 |                 ];
107 |                 for (pos, arg) in args.iter().enumerate() {
108 |                     stmts.push(IR::Assign{target: IRVar::Ht{stack_offset: stack_pos+3+ pos as u32, local: true, orig_name: "".to_string()},
109 |                         source: (*arg).clone()});
110 |                 }
111 |                 stmts.push(IR::Assign2Op{target: IRVar::Var{varid: 0}, val1: IRVar::Var{varid: 0}, op: IROp::Add, val2: IRVar::Number{value: stack_pos+2}});
112 |                 stmts.push(IR::GoTo{label: label.clone()});
113 |                 stmts.push(IR::Label{label: format!("return_id_{}", ret_id)});
114 |                 IR::Block{stmts: stmts}
115 |             },
116 |             IR::StoreTempVars{ref ti, stack_pos} => {
117 |                 let mut stmts = vec![];
118 |                 let mut i = 1;
119 |                 for vi in ti {
120 |                     stmts.push(IR::Assign{target: IRVar::Ht{stack_offset: stack_pos+i, local: true, orig_name: "".to_string()},
121 |                         source: IRVar::Var{varid: *vi}});
122 |                     i += 1;
123 |                 }
124 |                 IR::Block{stmts: stmts}
125 |             },
126 |             IR::LoadTempVars{ref ti, stack_pos} => {
127 |                 let mut stmts = vec![];
128 |                 let mut i = 1;
129 |                 for vi in ti {
130 |                     stmts.push(IR::Assign{target: IRVar::Var{varid: *vi},
131 |                         source: IRVar::Ht{stack_offset: stack_pos+i, local: true, orig_name: "".to_string()}});
132 |                     i += 1;
133 |                 }
134 |                 IR::Block{stmts: stmts}
135 |             },
136 |             _ => self.clone(),
137 |         }
138 |     }
139 | }
140 | 
141 | #[allow(dead_code)]
142 | #[derive(Debug, Clone, PartialEq)]
143 | pub enum IRVar {
144 |   Number{value: u32}, // all computations are in Z_4294967296
145 |   Var{varid: u8},  // varid 255, 254, 253, 252 are reserved for input byte, reading state, input_c, globalbsp
146 |   H{index_varid: u8, orig_name: String},
147 |   Ht{stack_offset: u32, local: bool, orig_name: String},  // stack_pos is offset from t0 (local=true) or t252 (local=false)
148 |   M{index_varid: u8},
149 |   Hx{addr: u32},
150 |   Mx{addr: u32},
151 |   VH(Box<IRVar>), // virtual wrappers for arrays on H and M (exist for type information to reduce dynamic checks)
152 |   VM(Box<IRVar>),
153 | }
154 | 
155 | impl IRVar {
156 |     pub fn tovar(&self) -> IRVar { // expand meta commands
157 |         match *self {
158 |             IRVar::VH(ref x) => { (**x).clone() },
159 |             IRVar::VM(ref x)=> { (**x).clone() },
160 |             _ => self.clone(),
161 |         }
162 |     }
163 | }
164 | 
165 | #[derive(Debug, Copy, Clone, PartialEq)]
166 | pub enum IROp {
167 |   Add,
168 |   Sub,
169 |   Mult,
170 |   Div,
171 |   Pow,
172 |   LShift,
173 |   RShift,
174 |   Mod,
175 | 
176 |   BitOr,
177 |   BitXor,
178 |   BitAnd,
179 | 
180 |   Or,
181 |   And,
182 |   Eq,
183 |   NotEq,
184 |   Lt,
185 |   LtE,
186 |   Gt,
187 |   GtE,
188 | }
189 | 
190 | #[derive(Debug, Clone, Copy)]
191 | pub enum IRUOp {
192 |   Not,     // (== 0)
193 |   Invert,  // bitwise
194 |   USub,
195 | }
196 | 
197 | impl Display for IROp {
198 |     fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
199 |         use self::IROp::*;
200 |         match *self {
201 |             Add => { write!(fmt, "+") },
202 |             Sub => { write!(fmt, "-") },
203 |             Mult => { write!(fmt, "*") },
204 |             Div => { write!(fmt, "/") },
205 |             Pow => { write!(fmt, "**") },
206 |             Mod => { write!(fmt, "%") },
207 |             LShift => { write!(fmt, "<<") },
208 |             RShift => { write!(fmt, ">>") },
209 |             BitOr => { write!(fmt, "|") },
210 |             BitXor => { write!(fmt, "^") },
211 |             BitAnd => { write!(fmt, "&") },
212 | 
213 |             Or => { write!(fmt, "or") },
214 |             And => { write!(fmt, "and") },
215 |             Eq => { write!(fmt, "==") },
216 |             NotEq => { write!(fmt, "!=") },
217 |             Lt => { write!(fmt, "<") },
218 |             LtE => { write!(fmt, "<=") },
219 |             Gt => { write!(fmt, ">") },
220 |             GtE => { write!(fmt, ">=") },
221 |         }
222 |     }
223 | }
224 | 
225 | impl Display for IRUOp {
226 |     fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
227 |         use self::IRUOp::*;
228 |         match *self {
229 |             Not => { write!(fmt, "!") },
230 |             Invert => { write!(fmt, "~") },
231 |             USub => { write!(fmt, "-") },
232 |         }
233 |     }
234 | }
235 | 
236 | impl Display for IR {
237 |     fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
238 |         use self::IR::*;
239 |         match *self {
240 |             Assign{ref target, ref source} => {
241 |                 write!(fmt, " {} = {}", target, source)
242 |             },
243 |             Assign2Op{ref target, ref val1, op, ref val2} => {
244 |                 write!(fmt, " {} = {} {} {}", target, val1, op, val2)
245 |             },
246 |             Assign1Op{ref target, uop, ref source} => {
247 |                 write!(fmt, " {} = {} {}", target, uop, source)
248 |             },
249 |             GoTo{ref label} => {
250 |                 write!(fmt, " goto {}", label)
251 |             },
252 |             Label{ref label} => {
253 |                 write!(fmt, ":{}:", label)
254 |             },
255 |             Halt => {
256 |                 write!(fmt, " halt")
257 |             },
258 |             Out{ref var} => {
259 |                 write!(fmt, " out {}", var)
260 |             },
261 |             If{ref cond_var, ref goto_label} => {
262 |                 write!(fmt, " if {} goto {}", cond_var, goto_label)
263 |             },
264 |             IfN{ref cond_var, ref goto_label} => {
265 |                 write!(fmt, " ifN {} goto {}", cond_var, goto_label)
266 |             },
267 |             IfEq{ref val1, ref val2, ref goto_label} => {
268 |                 write!(fmt, " ifEq {} {} goto {}", val1, val2, goto_label)
269 |             },
270 |             IfNeq{ref val1, ref val2, ref goto_label} => {
271 |                 write!(fmt, " ifNeq {} {} goto {}", val1, val2, goto_label)
272 |             },
273 |             Error => {
274 |                 write!(fmt, " error")
275 |             },
276 |             Block{ref stmts} => {
277 |                 let block = stmts.iter().map(|st| format!("  {}", st).replace("\n", "\n  ")).collect::<Vec<String>>()[..].join("\n");
278 |                 write!(fmt, " {}", block)
279 |             },
280 |             Comment{ref comment} => {
281 |                 write!(fmt, " # {}", comment)
282 |             },
283 |             InitialCode{bsp} => { write!(fmt, " InitialCode (bsp: {})", bsp) },
284 |             MarkTempVarStart => { write!(fmt, " MarkTempVarStart") },
285 |             MarkTempVarEnd => { write!(fmt, " MarkTempVarEnd") },
286 |             StoreTempVars{ref ti, stack_pos} => { write!(fmt, " StoreTempVars(t{:?}, stack_pos: {})", ti, stack_pos) },
287 |             LoadTempVars{ref ti, stack_pos} => { write!(fmt, " LoadTempVars(t{:?}, stack_pos: {})", ti, stack_pos) },
288 |             JumpCode{ref ret_ids, stackend} => { write!(fmt, " JumpCode(ret_ids: {:?}, stackend: {})", ret_ids, stackend) },
289 |             Call{ref label, ref args, stack_pos, ret_id} => { write!(fmt, " t1 = {}({}) # stack_pos: {} return id: {}", label, args.iter().map(|v| format!("{}", v)).collect::<Vec<String>>()[..].join(", "), stack_pos, ret_id) },
290 |             Return{ref var} => { write!(fmt, " return {}", match var { &Some(ref v) => format!("{}", v), &None => "".to_string() } ) },
291 |         }
292 |     }
293 | }
294 | 
295 | 
296 | impl Display for IRVar {
297 |     fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
298 |         use self::IRVar::*;
299 |         match self.tovar() {
300 |             Number{value} => { write!(fmt, "{}", value) },
301 |             Var{varid} => { write!(fmt, "t{}", varid) },
302 |             H{index_varid, ref orig_name} => {
303 |                 write!(fmt, "H[t{}]({})", index_varid, orig_name)
304 |             },
305 |             Ht{stack_offset, local, ref orig_name} => {
306 |                 write!(fmt, "H[t{}+{}]({})", if local {0}else{252}, stack_offset, orig_name)
307 |             },
308 |             M{index_varid} => { write!(fmt, "M[t{}]", index_varid) },
309 |             Mx{addr} => { write!(fmt, "M[{}]", addr) },
310 |             Hx{addr} => { write!(fmt, "H[{}]", addr) },
311 |             _ => unreachable!(),
312 |         }
313 |     }
314 | }
315 | 
316 | 
317 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | #[macro_use]
  2 | extern crate log;
  3 | extern crate flexi_logger;
  4 | #[macro_use]
  5 | extern crate clap;
  6 | extern crate regex;
  7 | 
  8 | pub mod grammar; // synthesized by LALRPOP
  9 | mod tok;
 10 | mod ast;
 11 | mod gen_ir;
 12 | mod zpaqcfg;
 13 | mod zpaql;
 14 | mod gen_zpaql;
 15 | mod post_zpaql;
 16 | mod ir;
 17 | mod template;
 18 | mod documentation;
 19 | mod options;
 20 | mod rtok;
 21 | mod post_ir;
 22 | mod zpaqlvm;
 23 | 
 24 | use flexi_logger::{init,LogConfig};
 25 | use std::io::{Read,Write};
 26 | use std::fs::File;
 27 | use std::process::exit;
 28 | use std::panic;
 29 | use std::str::FromStr;
 30 | use std::collections::HashMap;
 31 | 
 32 | fn main() {
 33 |     let mut optioncfg = options::Options::new();
 34 |     let matches = clap::App::new("zpaqlpy compiler")
 35 |                     .version(crate_version!())
 36 |                     .about("Compile a zpaqlpy source file to a ZPAQ configuration file for usage with zpaqd
 37 | Copyright (C) 2016 Kai Lüke kailueke@riseup.net
 38 | This program comes with ABSOLUTELY NO WARRANTY and is free software, you are welcome to redistribute it
 39 | under certain conditions, see https://www.gnu.org/licenses/gpl-3.0.en.html")
 40 |                     .args_from_usage(
 41 |                               "-o, --output=[FILE] 'Set the output file (default: INPUT with suffix .cfg, - for stdout is not supported)'
 42 |                               [INPUT]              'Set the input file (- for stdin), must be a valid Python file in form of the template (UTF-8, no BOM)'
 43 |                               -v...                'Set the level of verbosity (-v: warn, -vv: info, -vvv: debug, -vvvv: trace)'
 44 |                               --info-zpaq                        'Show information on the ZPAQ standard'
 45 |                               --info-zpaql                       'Show information on the ZPAQL target language'
 46 |                               --info-zpaqlpy                     'Show information on the supported Python-subset (zpaqlpy)'
 47 |                               --info-zpaqlir                     'Show information on the used intermediate representation language (zpaqlir)'
 48 |                               --info-tutorial                    'Show a small tutorial'
 49 |                               -S                                 'Write only intermediate representation code to output (suffix for default gets INPUT.ir)'
 50 |                               --suppress-pcomp                   'Behave as if \"def pcomp(): pass\" is present, emit an empty pcomp section'
 51 |                               --suppress-hcomp                   'Behave as if \"def hcomp(): pass\" is present, emit an empty hcomp section'
 52 |                               --disable-comp                     'No context-mixing components and arithmetic coding, also suppress hcomp'
 53 |                               --disable-optim                    'Disable lifetime optimisation passes'
 54 |                               --fixed-global-access              'Calculate full address for globals on each access'
 55 |                               --ignore-errors                    'Continues for some errors which lead to an invalid ZPAQ config file'
 56 |                               --emit-template                    'Print an empty template (supports -o)'
 57 |                               --no-post-zpaql                    'Disable ZPAQL optimisation pass for successive byte assignments on an array in M'
 58 |                               --no-comments                      'Do not write original code lines as comments beside output'
 59 |                               --no-pc-comments                   'Do not annotate programme counter for opcodes'
 60 |                               --stacksize=[NUMBER]            'Set size of stack to NUMBER (default: 1048576 = 1MiB, <= 2^32 - 2^?h)'
 61 |                               --extern-tokenizer                 'Use python3 -m tokenize -e instead of internal tokenizer'
 62 |                               --run-hcomp=[FILE]                 'Execute the resulting cfg file like \"zpaqd r CFG h FILE\" and print H[0]…H[n-1]'
 63 |                               --notemp_debug_cfg                 'Disable temporary new feature'"
 64 |                     ).get_matches();
 65 |     if matches.is_present("info-zpaq") {
 66 |         println!("{}", documentation::INFO_ZPAQ);
 67 |         return;
 68 |     }
 69 |     if matches.is_present("info-zpaql") {
 70 |         println!("{}", documentation::INFO_ZPAQL);
 71 |         return;
 72 |     }
 73 |     if matches.is_present("info-zpaqlpy") {
 74 |         println!("{}", documentation::INFO_ZPAQLPY);
 75 |         return;
 76 |     }
 77 |     if matches.is_present("info-zpaqlir") {
 78 |         println!("{}", documentation::INFO_ZPAQLIR);
 79 |         return;
 80 |     }
 81 |     if matches.is_present("info-tutorial") {
 82 |         println!("{}", documentation::INFO_TUTORIAL);
 83 |         return;
 84 |     }
 85 |     // please keep in sync with defaults in options.rs
 86 |     optioncfg.suppress_pcomp = matches.is_present("suppress-pcomp");
 87 |     optioncfg.suppress_hcomp = matches.is_present("suppress-hcomp");
 88 |     optioncfg.disable_comp = matches.is_present("disable-comp");
 89 |     optioncfg.disable_optim = matches.is_present("disable-optim");
 90 |     optioncfg.ignore_errors = matches.is_present("ignore-errors");
 91 |     optioncfg.fixed_global_access = matches.is_present("fixed-global-access");
 92 |     optioncfg.temp_debug_cfg = !matches.is_present("notemp_debug_cfg");
 93 |     optioncfg.emit_ir = matches.is_present("S");
 94 |     optioncfg.extern_tokenizer = matches.is_present("extern-tokenizer");
 95 |     optioncfg.comments = !matches.is_present("no-comments");
 96 |     optioncfg.no_post_zpaql = matches.is_present("no-post-zpaql");
 97 |     optioncfg.pc_as_comment = !matches.is_present("no-pc-comments");
 98 |     let log_level = match matches.occurrences_of("v") {
 99 |         0 => "error",
100 |         1 => "warn",
101 |         2 => "info",
102 |         3 => "debug",
103 |         4 | _ => "trace",
104 |     };
105 |     init(LogConfig::new(), Some(log_level.to_string())).unwrap();
106 |     if matches.is_present("stacksize") {
107 |         optioncfg.stacksize = u32::from_str(matches.value_of("stacksize").unwrap()).unwrap_or_else(|e| {
108 |             error!("stacksize must be a number: {}", e);
109 |             panic!("error") });
110 |     }
111 | 
112 |     // write out an empty python template source file and quit
113 |     if matches.is_present("emit-template") {
114 |         if matches.is_present("output") {
115 |             let template_name = matches.value_of("output").unwrap().to_string();
116 |             let mut template_file = std::fs::OpenOptions::new().write(true).truncate(true).create(true).open(
117 |                     &std::path::Path::new(&template_name[..])
118 |             ).unwrap_or_else(
119 |                         |e| { error!("Could not create {}: {}", template_name, e); exit(3) }
120 |             );
121 |             write!(template_file, "{}", template::EMPTY_SOURCE).unwrap();
122 |         } else {
123 |             println!("{}", template::EMPTY_SOURCE);
124 |         }
125 |         return;
126 |     }
127 |     let mut input = String::new(); // content of source file
128 |     match matches.value_of("INPUT").unwrap_or_else(|| { error!("No input file specified. Invoke with --help or -h to see usage."); exit(1) } ) {
129 |         "-" => {
130 |             {
131 |             let stdin = std::io::stdin();
132 |             stdin.lock().read_to_string(&mut input).unwrap();
133 |             }
134 |         },
135 |         filename => {
136 |             std::fs::File::open(&std::path::Path::new(filename)).unwrap_or_else(
137 |                     |e| { error!("Could not open {}: {}", filename, e); exit(2) }
138 |                 ).read_to_string(&mut input).unwrap();
139 |         },
140 |     };
141 |     let outname = if matches.is_present("output") {
142 |         matches.value_of("output").unwrap().to_string()
143 |     } else {
144 |         let mut inp = matches.value_of("INPUT").unwrap_or("out.py").to_string();
145 |         if inp == "-" { inp = "out.py".to_string(); }
146 |         if inp.ends_with(".py") {
147 |             inp.pop(); inp.pop();
148 |         }
149 |         if optioncfg.emit_ir {
150 |             inp.push_str("ir");
151 |         } else {
152 |             inp.push_str("cfg");
153 |         }
154 |         inp
155 |     };
156 |     // create output file
157 |     let output = std::fs::OpenOptions::new().write(true).truncate(true).create(true).open(&std::path::Path::new(&outname[..])).unwrap_or_else(
158 |             |e| { error!("Could not create {}: {}", outname, e); exit(3) }
159 |         );
160 |     // start compiler
161 |     let zcfgfile = compile(&optioncfg, input, output);
162 |     if matches.is_present("run-hcomp") && zcfgfile.is_some() {  // support debugging of computation in hcomp like the python script
163 |         let hinput = matches.value_of("run-hcomp").unwrap();
164 |         let zcfg = zcfgfile.unwrap();
165 |         let (mut hvm, _) = zpaqlvm::ZPAQLVM::new(&zcfg);
166 |         for byte in std::fs::File::open(&std::path::Path::new(hinput)).unwrap_or_else(
167 |                         |e| { error!("Could not open {}: {}", hinput, e); exit(2) }
168 |                     ).bytes() {
169 |             let b = byte.unwrap();
170 |             hvm.run(b as u32);
171 |             println!("{}: {:?}", b, &hvm.h[0..zcfg.n as usize]);
172 |         }
173 |     }
174 | }
175 | 
176 | /// parses the input source string (which must be based on a template and it's conditions)
177 | /// according to the options, by first tokenizing the input and then spliting up the sections
178 | /// before parsing, prefixed with the common code in comp-section
179 | /// and give back the AST for these sections, both starting with the same comp-code
180 | /// (see tok::seperate_sections)
181 | fn parse(optioncfg: &options::Options, input: &String) -> (Vec<ast::Stmt>, Vec<ast::Stmt>) {
182 |     let tokens = if optioncfg.extern_tokenizer { // external tokenizer is requested
183 |         let tokens_extern = tok::tokenize(input); // calls python -m tokenize -e
184 |         let result = panic::catch_unwind(|| {
185 |             let tokens  = rtok::tokenize(input); // compare output with internal tokenizer
186 |             for (t, te) in tokens.iter().zip(tokens_extern.iter()) {
187 |                 if t != te {
188 |                     error!("tokens differ (intern, extern): {:?} ←→ {:?}", t, te);
189 |                 }
190 |             }
191 |         });
192 |         if let Err(_) = result { // Err(err)
193 |             error!("internal tokenizer failed on input, run without external tokenizer to see it's error message");
194 |             // panic::resume_unwind(err);
195 |         }
196 |         tokens_extern
197 |     } else {
198 |         rtok::tokenize(input) // use internal tokenizer only
199 |     };
200 |     let (hcomp, pcomp) = tok::seperate_sections(tokens);
201 |     info!("extracted section hcomp:");
202 |     for tokn in &hcomp {
203 |         debug!("  {:?},", tokn);
204 |     }
205 |     info!("extracted section pcomp:");
206 |     for tokn in &pcomp {
207 |         debug!("  {:?},", tokn);
208 |     }
209 |     info!("end of extracted sections");
210 |     let parsed_hcomp = grammar::ProgParser::new().parse(hcomp).unwrap_or_else(|e| panic!("parser error: {:?}", e) );
211 |     info!("parsed grammar hcomp");
212 |     debug!("[\n  {}]", parsed_hcomp.iter().map(|st| format!("{}", st).replace("\n", "\n  ")).collect::<Vec<String>>()[..].join(",\n  "));
213 |     let parsed_pcomp = grammar::ProgParser::new().parse(pcomp).unwrap();
214 |     info!("parsed grammar pcomp");
215 |     debug!("[\n  {}]", parsed_pcomp.iter().map(|st| format!("{}", st).replace("\n", "\n  ")).collect::<Vec<String>>()[..].join(",\n  "));
216 |     (parsed_hcomp, parsed_pcomp)
217 | }
218 | 
219 | fn xcomp_ir_string(convert: bool, xcomp_ir: &[ir::IR]) -> String { // expand and break down meta-instructions
220 |     xcomp_ir.iter().map(|st| format!("{}", if convert {st.convert()} else {st.clone()})).collect::<Vec<String>>()[..].join("\n")
221 | }
222 | 
223 | /// compile ASTs to IR code for hcomp and pcomp and read in the comp-section to zpaqcfgfile
224 | fn build_ir(optioncfg: &options::Options, parsed_hcomp: Vec<ast::Stmt>, parsed_pcomp: Vec<ast::Stmt>, input: String) -> (zpaqcfg::ZPAQCfgFile, Vec<ir::IR>, Vec<ir::IR>) {
225 |     // the first 6 assignments contain the values for ph, pm, hh, hm, n and pcomp_invocation
226 |     let zpaqcfgfile = gen_ir::read_context_model(&parsed_pcomp[..6], optioncfg);
227 |     info!("generate IR for hcomp");
228 |     let mut hcomp_ir = gen_ir::gen_code(true, &parsed_hcomp[6..], &zpaqcfgfile, input.clone(), optioncfg);
229 |     hcomp_ir = post_ir::optimise(hcomp_ir, optioncfg);
230 |     debug!("\n{}", xcomp_ir_string(false, &hcomp_ir[..]));
231 |     info!("generate IR for pcomp");
232 |     let mut pcomp_ir = gen_ir::gen_code(false, &parsed_pcomp[6..], &zpaqcfgfile, input, optioncfg);
233 |     pcomp_ir = post_ir::optimise(pcomp_ir, optioncfg);
234 |     debug!("\n{}", xcomp_ir_string(false, &pcomp_ir[..]));
235 |     (zpaqcfgfile, hcomp_ir, pcomp_ir)
236 | }
237 | 
238 | /// compile input source file and write a ZPAQ configuration to output, following options as specified
239 | fn compile(optioncfg: &options::Options, input: String, mut output: File) -> Option<zpaqcfg::ZPAQCfgFile> {
240 |     let (parsed_hcomp, parsed_pcomp) = parse(optioncfg, &input);
241 |     let (mut zpaqcfgfile, hcomp_ir, pcomp_ir) = build_ir(optioncfg, parsed_hcomp, parsed_pcomp, input);
242 |     if optioncfg.disable_comp { // suppress usage of context-mixing model
243 |         zpaqcfgfile.n = 0;
244 |     }
245 |     if optioncfg.emit_ir { // do not write out compiled ZPAQL code to file but IR code
246 |         info!("write out IR cfg file");
247 |         if !hcomp_ir.is_empty() && !optioncfg.suppress_hcomp && zpaqcfgfile.n > 0 {
248 |             zpaqcfgfile.hcomp = vec![zpaql::ZPAQLOp::Halt]; // fill with dummy content
249 |         }
250 |         if !pcomp_ir.is_empty() && !optioncfg.suppress_pcomp {
251 |             zpaqcfgfile.pcomp = vec![zpaql::ZPAQLOp::Halt]; // fill with dummy content
252 |         }
253 |         zpaqcfgfile.finalise(optioncfg).unwrap();
254 |         zpaqcfgfile.write_header(&output);
255 |         write!(output, "hcomp\n").unwrap();  // similar implementation as .write_hcomp and .write_pcomp but for IR
256 |         if !zpaqcfgfile.hcomp.is_empty() {
257 |             info!("emit IR for hcomp");
258 |             write!(output, "{}\n", xcomp_ir_string(true, &hcomp_ir[..])).unwrap();
259 |         }
260 |         if !zpaqcfgfile.pcomp.is_empty() {
261 |             info!("emit IR for pcomp");
262 |             write!(output, "pcomp\n{}\n", xcomp_ir_string(true, &pcomp_ir[..])).unwrap();
263 |         }
264 |         write!(output, "end\n").unwrap();
265 |         None
266 |     } else {
267 |         if !hcomp_ir.is_empty() && !optioncfg.suppress_hcomp && zpaqcfgfile.n > 0 {
268 |             info!("generate ZPAQL for hcomp"); // only if a CM model is present and if hcomp is not suppressed
269 |             zpaqcfgfile.hcomp = vec![zpaql::ZPAQLOp::RsetA{n: 255}];
270 |             zpaqcfgfile.hcomp.extend_from_slice(&post_zpaql::replace_array_assignment(gen_zpaql::emit_zpaql(&hcomp_ir, &mut gen_zpaql::Cache{last_hold: HashMap::<zpaql::Loc, ir::IRVar>::new()}, optioncfg), optioncfg));
271 |         }
272 |         if !pcomp_ir.is_empty() && !optioncfg.suppress_pcomp {
273 |             info!("generate ZPAQL for pcomp"); // only if pcomp is not suppressed
274 |             zpaqcfgfile.pcomp = vec![zpaql::ZPAQLOp::RsetA{n: 255}];
275 |             zpaqcfgfile.pcomp.extend_from_slice(&post_zpaql::replace_array_assignment(gen_zpaql::emit_zpaql(&pcomp_ir, &mut gen_zpaql::Cache{last_hold: HashMap::<zpaql::Loc, ir::IRVar>::new()}, optioncfg), optioncfg));
276 |         }
277 |         zpaqcfgfile.finalise(optioncfg).unwrap();
278 |         debug!("hcomp:\n{}", zpaqcfgfile.hcomp.iter().map(|st| format!("  {}", st)).collect::<Vec<String>>()[..].join("\n"));
279 |         debug!("pcomp:\n{}", zpaqcfgfile.pcomp.iter().map(|st| format!("  {}", st)).collect::<Vec<String>>()[..].join("\n"));
280 |         info!("write out ZPAQL cfg file");
281 |         zpaqcfgfile.write_header(&output);
282 |         zpaqcfgfile.write_hcomp(&output, optioncfg);
283 |         zpaqcfgfile.write_pcomp(&output, optioncfg);
284 |         zpaqcfgfile.write_end(&output);
285 |         Some(zpaqcfgfile)
286 |     }
287 | }
288 | 
289 | /*
290 | #[test]
291 | fn tokenizer() {
292 |     //assert!(grammar::ProgParser::new().parse(tok::tokenize("\n22\n")).is_ok());
293 |     assert_eq!(tok::tokenize("pass"), rtok::tokenize("pass"));  // relies on external programme call
294 | }
295 | */
296 | 
297 | 


--------------------------------------------------------------------------------
/src/options.rs:
--------------------------------------------------------------------------------
 1 | 
 2 | /// command line options and their default values (please keep in sync with option parsing main.rs)
 3 | 
 4 | pub struct Options {
 5 |     pub emit_ir: bool,
 6 |     pub suppress_hcomp: bool,
 7 |     pub suppress_pcomp: bool,
 8 |     pub disable_comp: bool,
 9 |     pub extern_tokenizer: bool,
10 |     pub comments: bool,
11 |     pub stacksize: u32,
12 |     pub disable_optim: bool,
13 |     pub fixed_global_access: bool,
14 |     pub ignore_errors: bool,
15 |     pub pc_as_comment: bool,
16 |     pub no_post_zpaql: bool,
17 | 
18 |     pub temp_debug_cfg: bool,
19 | }
20 | 
21 | impl Options {
22 |     pub fn new() -> Options {
23 |         Options{ // anyway overwritten in main.rs, but try to keep in sync
24 |             emit_ir: false,
25 |             suppress_hcomp: false,
26 |             suppress_pcomp: false,
27 |             disable_comp: false,
28 |             extern_tokenizer: false,
29 |             comments: true,
30 |             disable_optim: false,
31 |             fixed_global_access: false,
32 |             ignore_errors: false,
33 |             pc_as_comment: true,
34 |             temp_debug_cfg: true,
35 |             no_post_zpaql: false,
36 |             stacksize: 1048576,  // 1 MB
37 |         }
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/post_ir.rs:
--------------------------------------------------------------------------------
  1 | use ir::{IR, IRVar};
  2 | use options;
  3 | 
  4 | pub fn optimise(ir_code: Vec<IR>, optioncfg: &options::Options) -> Vec<IR> {
  5 |     let deblocked_no_unused_functions = remove_unused_functions(deblock(ir_code));
  6 |     if !optioncfg.disable_optim {
  7 |         lighten_save_load(remove_unused_assignments(deblocked_no_unused_functions), optioncfg)
  8 |     } else {
  9 |         deblocked_no_unused_functions
 10 |     }
 11 | }
 12 | 
 13 | pub fn deblock(ir_code: Vec<IR>) -> Vec<IR> {
 14 |     let mut irc = vec![];
 15 |     for cmd in ir_code {
 16 |         match cmd {
 17 |             IR::Block{stmts} => { irc.extend_from_slice(&deblock(stmts)); },
 18 |             c => { irc.push(c); },
 19 |         }
 20 |     }
 21 |     irc
 22 | }
 23 | 
 24 | // maybe in the future: move code out of loop for speed?
 25 | 
 26 | pub fn remove_unused_assignments(ir_code: Vec<IR>) -> Vec<IR> {
 27 |     // @TODO:
 28 |     // keep original variable if it's read-only and not changed
 29 |     // replace if with ifNeq? in gen_ir?! +++ IfNeq/Eq in gen_zpaql.rs, parser, cases and match on eval
 30 |     // ir:if with ==? how to optimise? IR-pass: t3=a!=b,
 31 |     //       replace if t3 goto with ifNeq a b (before unusedassignments pass)
 32 |     // t8 = H[17] * 3
 33 |     // H[18] = t8
 34 | 
 35 |     let mut irc = vec![];
 36 |     for cmd in ir_code {
 37 |         match cmd {
 38 |             IR::Assign{target: IRVar::Var{varid: a}, source: IRVar::Var{varid: b}} => {
 39 |                 if a != b {
 40 |                     irc.push(IR::Assign{target: IRVar::Var{varid: a}, source: IRVar::Var{varid: b}});
 41 |                 }
 42 |             },
 43 |             c => { irc.push(c); },
 44 |         }
 45 |     }
 46 |     irc
 47 | }
 48 | 
 49 | pub fn remove_unused_functions(mut ir_code: Vec<IR>) -> Vec<IR> {
 50 |     loop {
 51 |         let mut used = vec![];
 52 |         let mut irc = vec![];
 53 |         for cmd in ir_code.iter() {
 54 |             match cmd {
 55 |                 &IR::Call{ref label, args: _, stack_pos: _, ret_id: _} => { used.push(label.clone()); },
 56 |                 _ => {},
 57 |             }
 58 |         }
 59 |         let mut deleted =  0;
 60 |         let mut in_func = "".to_string();  // end label of function to be removed
 61 |         let mut remove_ret_ids = vec![];
 62 |         for cmd in ir_code {
 63 |             match cmd {
 64 |                 IR::GoTo{label} => {
 65 |                     if in_func.is_empty() && label.ends_with("_end~") {
 66 |                         if !used.contains(&(label[..label.len()-5].to_string())) {
 67 |                             debug!("removing function {}", &label[..label.len()-5]);
 68 |                             in_func = label;
 69 |                         } else {
 70 |                             irc.push(IR::GoTo{label: label});
 71 |                         }
 72 |                     } else if in_func.is_empty() {
 73 |                         irc.push(IR::GoTo{label: label});
 74 |                     }
 75 |                 },
 76 |                 IR::Label{label} => {
 77 |                     if in_func.is_empty() {
 78 |                         irc.push(IR::Label{label: label});
 79 |                     } else if label == in_func {
 80 |                         in_func = "".to_string();
 81 |                         deleted += 1;
 82 |                     }
 83 |                 },
 84 |                 IR::Call{label, args, stack_pos, ret_id} => {
 85 |                     if in_func.is_empty() {
 86 |                         irc.push(IR::Call{label: label, args: args, stack_pos: stack_pos, ret_id: ret_id});
 87 |                     } else {
 88 |                         remove_ret_ids.push(ret_id);
 89 |                     }
 90 |                 },
 91 |                 IR::JumpCode{mut ret_ids, stackend} => {
 92 |                     if in_func.is_empty() {
 93 |                         ret_ids.retain(|i| !remove_ret_ids.contains(i) );
 94 |                         irc.push(IR::JumpCode{ret_ids: ret_ids, stackend: stackend});
 95 |                     }
 96 |                 },
 97 |                 c => {
 98 |                     if in_func.is_empty() {
 99 |                         irc.push(c);
100 |                     }
101 |                 },
102 |             }
103 |         }
104 |         ir_code = irc;
105 |         if deleted == 0 {
106 |             break;
107 |         }
108 |     }
109 |     ir_code
110 | }
111 | 
112 | 
113 | // @TODO: if tx holds a non-temporary variable, but e.g. a local variabe, it needs to be live
114 | // until the end of a while loop! currently not a problem as all tx are done after a single python statement
115 | 
116 | /// Lifetime optimisation to exclude non-live temporary variables from being stored on stack before a call.
117 | /// Expects to get input from deblock(), so a flat vec without blocks
118 | pub fn lighten_save_load(mut ir_code: Vec<IR>, optioncfg: &options::Options) -> Vec<IR> {
119 |     let mut live_ids = vec![];
120 |     let mut non_store_live_ids = vec![];
121 |     let mut in_scope = false;
122 |     let mut left_in_scope = false;
123 |     let mut irc = vec![];
124 |     ir_code.reverse();
125 |     for cmd in ir_code {
126 |         let c = cmd.clone();
127 |         match (cmd, in_scope) {
128 |             (IR::MarkTempVarEnd, true) => {  // @TODO: support with push on vectors like in symboltable
129 |                 error!("inner function detected, not yet supported for optimisation, use --disable-optim");
130 |                 if !optioncfg.ignore_errors {
131 |                     panic!("error");
132 |                 }
133 |             },
134 |             (IR::MarkTempVarEnd, false) => {
135 |                 in_scope = true;
136 |                 live_ids.clear();
137 |                 non_store_live_ids.clear();
138 |                 irc.push(c);
139 |             },
140 |             (IR::MarkTempVarStart, true) => {
141 |                 in_scope = false;
142 |                 live_ids.clear();
143 |                 non_store_live_ids.clear();
144 |                 irc.push(c);
145 |             },
146 |             (IR::Assign{target, source}, true) => {
147 |                 match target {
148 |                     IRVar::Var{varid} => { live_ids.retain(|&x| x != varid); },
149 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
150 |                     IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
151 |                     _ => {},
152 |                 }
153 |                 match source {
154 |                     IRVar::Var{varid} => { live_ids.push(varid); },
155 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
156 |                     IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
157 |                     _ => {},
158 |                 }
159 |                 irc.push(c);
160 |             },
161 |             (IR::Assign2Op{target, val1, op: _, val2}, true) => {
162 |                 match target {
163 |                     IRVar::Var{varid} => { live_ids.retain(|&x| x != varid); },
164 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
165 |                     IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
166 |                     _ => {},
167 |                 }
168 |                 match val1 {
169 |                     IRVar::Var{varid} => { live_ids.push(varid); },
170 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
171 |                     IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
172 |                     _ => {},
173 |                 }
174 |                 match val2 {
175 |                     IRVar::Var{varid} => { live_ids.push(varid); },
176 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
177 |                     IRVar::H{index_varid, orig_name: _, } => { live_ids.push(index_varid); },
178 |                     _ => {},
179 |                 }
180 |                 irc.push(c);
181 |             },
182 |             (IR::Assign1Op{target, uop: _, source}, true) => {
183 |                 match target {
184 |                     IRVar::Var{varid} => { live_ids.retain(|&x| x != varid); },
185 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
186 |                     IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
187 |                     _ => {},
188 |                 }
189 |                 match source {
190 |                     IRVar::Var{varid} => { live_ids.push(varid); },
191 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
192 |                     IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
193 |                     _ => {},
194 |                 }
195 |                 irc.push(c);
196 |             },
197 |             (IR::Out{var}, true) => {
198 |                 match var {
199 |                     IRVar::Var{varid} => { live_ids.push(varid); },
200 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
201 |                     IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
202 |                     _ => {},
203 |                 }
204 |                 irc.push(c);
205 |             },
206 |             (IR::If{cond_var, goto_label: _}, true) => {
207 |                 match cond_var {
208 |                     IRVar::Var{varid} => { live_ids.push(varid); },
209 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
210 |                     IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
211 |                     _ => {},
212 |                 }
213 |                 irc.push(c);
214 |             },
215 |             (IR::IfN{cond_var, goto_label: _}, true) => {
216 |                 match cond_var {
217 |                     IRVar::Var{varid} => { live_ids.push(varid); },
218 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
219 |                     IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
220 |                     _ => {},
221 |                 }
222 |                 irc.push(c);
223 |             },
224 |             (IR::IfEq{val1, val2, goto_label: _}, true) => {
225 |                 match val1 {
226 |                     IRVar::Var{varid} => { live_ids.push(varid); },
227 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
228 |                     IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
229 |                     _ => {},
230 |                 }
231 |                 match val2 {
232 |                     IRVar::Var{varid} => { live_ids.push(varid); },
233 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
234 |                     IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
235 |                     _ => {},
236 |                 }
237 |                 irc.push(c);
238 |             },
239 |             (IR::IfNeq{val1, val2, goto_label: _}, true) => {
240 |                 match val1 {
241 |                     IRVar::Var{varid} => { live_ids.push(varid); },
242 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
243 |                     IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
244 |                     _ => {},
245 |                 }
246 |                 match val2 {
247 |                     IRVar::Var{varid} => { live_ids.push(varid); },
248 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
249 |                     IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
250 |                     _ => {},
251 |                 }
252 |                 irc.push(c);
253 |             },
254 |             (IR::Return{var: Some(varx)}, true) => {
255 |                 match varx {
256 |                     IRVar::Var{varid} => { live_ids.push(varid); },
257 |                     IRVar::M{index_varid} => { live_ids.push(index_varid); },
258 |                     IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
259 |                     _ => {},
260 |                 }
261 |                 irc.push(c);
262 |             },
263 |             (IR::Call{label: _, args, stack_pos: _, ret_id: _}, true) => {
264 |                 for var in args {
265 |                     match var {
266 |                         IRVar::Var{varid} => { live_ids.push(varid); },
267 |                         IRVar::M{index_varid} => { live_ids.push(index_varid); },
268 |                         IRVar::H{index_varid, orig_name: _} => { live_ids.push(index_varid); },
269 |                         _ => {},
270 |                     }
271 |                 }
272 |                 irc.push(c);
273 |             },
274 |             (IR::Call{label: _, args, stack_pos: _, ret_id: _}, false) => {  // sometimes arguments live time spans over mutiple store load cyles if the call arguments are calles themselves
275 |                 for var in args {
276 |                     match var {
277 |                         IRVar::Var{varid} => { non_store_live_ids.push(varid); },
278 |                         IRVar::M{index_varid} => { non_store_live_ids.push(index_varid); },
279 |                         IRVar::H{index_varid, orig_name: _} => { non_store_live_ids.push(index_varid); },
280 |                         _ => {},
281 |                     }
282 |                 }
283 |                 irc.push(c);
284 |             },
285 |             (IR::StoreTempVars{ti, stack_pos}, false) => {
286 |                 // in_scope is false here so that return assignment and call arguments are not counted as live,
287 |                 // but exactly the same list as in LoadTempVars is used
288 |                 if left_in_scope {
289 |                     let mut tn = vec![];
290 |                     debug!("Store as before {:?}", ti);
291 |                     for i in ti {
292 |                         if live_ids.contains(&i) {
293 |                             tn.push(i);
294 |                         }
295 |                     }
296 |                     debug!("Store after optimisation {:?}", tn);
297 |                     irc.push(IR::StoreTempVars{ti: tn, stack_pos: stack_pos});
298 |                     in_scope = true;
299 |                     left_in_scope = false;
300 |                     for i in non_store_live_ids.iter() {  // they are needed as call arguments after store but don't need to be stored
301 |                         if !live_ids.contains(i) {
302 |                             live_ids.push(*i);
303 |                         }
304 |                     }
305 |                     non_store_live_ids.clear();
306 |                 } else {
307 |                     irc.push(c);
308 |                 }
309 |             },
310 |             (IR::LoadTempVars{ti, stack_pos}, true) => {
311 |                 let mut tn = vec![];
312 |                 debug!("Load as before {:?}", ti);
313 |                 for i in ti {
314 |                     if live_ids.contains(&i) {
315 |                         tn.push(i);
316 |                     }
317 |                 }
318 |                 debug!("Load after optimisation {:?}", tn);
319 |                 irc.push(IR::LoadTempVars{ti: tn, stack_pos: stack_pos});
320 |                 in_scope = false;
321 |                 left_in_scope = true;
322 |                 non_store_live_ids.clear();
323 |             },
324 |             (_, _) => { irc.push(c); },
325 |         }
326 |     }
327 |     irc.reverse();
328 |     irc
329 | }
330 | 
331 | 


--------------------------------------------------------------------------------
/src/post_zpaql.rs:
--------------------------------------------------------------------------------
  1 | use zpaql::{ZPAQLOp, Loc, Reg, OtherReg};
  2 | use options;
  3 | 
  4 | /// optimise for assignments on VM-arrays:  t1 = t1 + 1; M[t1] = byte; t1 = t1 + 1; M[t1] = byte …
  5 | pub fn replace_array_assignment(zcode: Vec<ZPAQLOp>, optioncfg: &options::Options) -> Vec<ZPAQLOp> {
  6 |     if optioncfg.no_post_zpaql {
  7 |         return zcode;
  8 |     }
  9 |     let mut code = vec![];
 10 |     let mut j: i64 = (zcode.len() as i64) - 1i64;
 11 |     let mut ismatch = false;
 12 |     while j >= 0 {
 13 |         let i = j as usize;
 14 |         if i >= 6 {
 15 |             match (ismatch, &zcode[i-6], &zcode[i-5], &zcode[i-4], &zcode[i-3], &zcode[i-2], &zcode[i-1], &zcode[i]) {
 16 |                 //     (cmt)    (cmt)    a++    r=a 1    (cmt)    c=a    *c= 3
 17 |                 //     (cmt)    (cmt)    a++    r=a 1    (cmt)    c=a    *c= 3
 18 |                 // becomes:
 19 |                 //     (cmt) c++      *c= 3
 20 |                 //     (cmt) c++     *c= 3  a=c   r=a 1
 21 |                 (false, &ZPAQLOp::Comment{ref comment}, &ZPAQLOp::Comment{comment: _}, &ZPAQLOp::Inc(Loc::Reg(Reg::A)),
 22 |                         &ZPAQLOp::RsetA{n: 1}, &ZPAQLOp::Comment{comment: _},
 23 |                         &ZPAQLOp::Set{target: Loc::Reg(Reg::OtherReg(OtherReg::C)), source: Loc::Reg(Reg::A)},
 24 |                         &ZPAQLOp::SetN{target: Loc::MC, n: value}  ) => {
 25 |                         // last occurrence, save to R from C
 26 |                         // reverse order for code!
 27 |                         code.push(ZPAQLOp::RsetA{n: 1});
 28 |                         code.push(ZPAQLOp::Set{target: Loc::Reg(Reg::A), source: Loc::Reg(Reg::OtherReg(OtherReg::C))});
 29 |                         code.push(ZPAQLOp::SetN{target: Loc::MC, n: value});
 30 |                         code.push(ZPAQLOp::Inc(Loc::Reg(Reg::OtherReg(OtherReg::C))));
 31 |                         code.push(ZPAQLOp::Comment{comment: comment.clone()});
 32 |                         j -= 6;  // jump over all seven
 33 |                         ismatch = true;
 34 |                 },
 35 |                 (true, &ZPAQLOp::Comment{ref comment}, &ZPAQLOp::Comment{comment: _}, &ZPAQLOp::Inc(Loc::Reg(Reg::A)),
 36 |                         &ZPAQLOp::RsetA{n: 1}, &ZPAQLOp::Comment{comment: _},
 37 |                         &ZPAQLOp::Set{target: Loc::Reg(Reg::OtherReg(OtherReg::C)), source: Loc::Reg(Reg::A)},
 38 |                         &ZPAQLOp::SetN{target: Loc::MC, n: value}  ) => {
 39 |                         // after first occurrence, don't save R again
 40 |                         // reverse order for code!
 41 |                         code.push(ZPAQLOp::SetN{target: Loc::MC, n: value});
 42 |                         code.push(ZPAQLOp::Inc(Loc::Reg(Reg::OtherReg(OtherReg::C))));
 43 |                         code.push(ZPAQLOp::Comment{comment: comment.clone()});
 44 |                         j -= 6;  // jump over all seven
 45 |                         ismatch = true;
 46 |                 },
 47 |                 //     (cmt)    (cmt)    a++    r=a 1    (cmt)    c=a    *c=0
 48 |                 //     (cmt)    (cmt)    a++    r=a 1    (cmt)    c=a    *c=0
 49 |                 // becomes:
 50 |                 //     (cmt) c++      *c=0
 51 |                 //     (cmt) c++     *c=0  a=c   r=a 1
 52 |                 (false, &ZPAQLOp::Comment{ref comment}, &ZPAQLOp::Comment{comment: _}, &ZPAQLOp::Inc(Loc::Reg(Reg::A)),
 53 |                         &ZPAQLOp::RsetA{n: 1}, &ZPAQLOp::Comment{comment: _},
 54 |                         &ZPAQLOp::Set{target: Loc::Reg(Reg::OtherReg(OtherReg::C)), source: Loc::Reg(Reg::A)},
 55 |                         &ZPAQLOp::Zero(Loc::MC)  ) => {
 56 |                         // last occurrence, save to R from C
 57 |                         // reverse order for code!
 58 |                         code.push(ZPAQLOp::RsetA{n: 1});
 59 |                         code.push(ZPAQLOp::Set{target: Loc::Reg(Reg::A), source: Loc::Reg(Reg::OtherReg(OtherReg::C))});
 60 |                         code.push(ZPAQLOp::Zero(Loc::MC));
 61 |                         code.push(ZPAQLOp::Inc(Loc::Reg(Reg::OtherReg(OtherReg::C))));
 62 |                         code.push(ZPAQLOp::Comment{comment: comment.clone()});
 63 |                         j -= 6;  // jump over all seven
 64 |                         ismatch = true;
 65 |                 },
 66 |                 (true, &ZPAQLOp::Comment{ref comment}, &ZPAQLOp::Comment{comment: _}, &ZPAQLOp::Inc(Loc::Reg(Reg::A)),
 67 |                         &ZPAQLOp::RsetA{n: 1}, &ZPAQLOp::Comment{comment: _},
 68 |                         &ZPAQLOp::Set{target: Loc::Reg(Reg::OtherReg(OtherReg::C)), source: Loc::Reg(Reg::A)},
 69 |                         &ZPAQLOp::Zero(Loc::MC)  ) => {
 70 |                         // after first occurrence, don't save R again
 71 |                         // reverse order for code!
 72 |                         code.push(ZPAQLOp::Zero(Loc::MC));
 73 |                         code.push(ZPAQLOp::Inc(Loc::Reg(Reg::OtherReg(OtherReg::C))));
 74 |                         code.push(ZPAQLOp::Comment{comment: comment.clone()});
 75 |                         j -= 6;  // jump over all seven
 76 |                         ismatch = true;
 77 |                 },
 78 |                 // ### same without comments ### (does not match against three cmds)
 79 |                 //     a++    r=a 1       c=a    *c= 3
 80 |                 //     a++    r=a 1      c=a    *c= 3
 81 |                 // becomes:
 82 |                 //     c++      *c= 3
 83 |                 //     c++     *c= 3  a=c   r=a 1
 84 |                 (false, _, _, _, &ZPAQLOp::Inc(Loc::Reg(Reg::A)),
 85 |                         &ZPAQLOp::RsetA{n: 1},
 86 |                         &ZPAQLOp::Set{target: Loc::Reg(Reg::OtherReg(OtherReg::C)), source: Loc::Reg(Reg::A)},
 87 |                         &ZPAQLOp::SetN{target: Loc::MC, n: value}  ) => {
 88 |                         // last occurrence, save to R from C
 89 |                         // reverse order for code!
 90 |                         code.push(ZPAQLOp::RsetA{n: 1});
 91 |                         code.push(ZPAQLOp::Set{target: Loc::Reg(Reg::A), source: Loc::Reg(Reg::OtherReg(OtherReg::C))});
 92 |                         code.push(ZPAQLOp::SetN{target: Loc::MC, n: value});
 93 |                         code.push(ZPAQLOp::Inc(Loc::Reg(Reg::OtherReg(OtherReg::C))));
 94 |                         j -= 3;  // jump over all four
 95 |                         ismatch = true;
 96 |                 },
 97 |                 (true, _, _, _, &ZPAQLOp::Inc(Loc::Reg(Reg::A)),
 98 |                         &ZPAQLOp::RsetA{n: 1},
 99 |                         &ZPAQLOp::Set{target: Loc::Reg(Reg::OtherReg(OtherReg::C)), source: Loc::Reg(Reg::A)},
100 |                         &ZPAQLOp::SetN{target: Loc::MC, n: value}  ) => {
101 |                         // after first occurrence, don't save R again
102 |                         // reverse order for code!
103 |                         code.push(ZPAQLOp::SetN{target: Loc::MC, n: value});
104 |                         code.push(ZPAQLOp::Inc(Loc::Reg(Reg::OtherReg(OtherReg::C))));
105 |                         j -= 3;  // jump over all four
106 |                         ismatch = true;
107 |                 },
108 |                 //     a++    r=a 1      c=a    *c=0
109 |                 //     a++    r=a 1      c=a    *c=0
110 |                 // becomes:
111 |                 //     c++      *c=0
112 |                 //     c++     *c=0  a=c   r=a 1
113 |                 (false, _, _, _, &ZPAQLOp::Inc(Loc::Reg(Reg::A)),
114 |                         &ZPAQLOp::RsetA{n: 1},
115 |                         &ZPAQLOp::Set{target: Loc::Reg(Reg::OtherReg(OtherReg::C)), source: Loc::Reg(Reg::A)},
116 |                         &ZPAQLOp::Zero(Loc::MC)  ) => {
117 |                         // last occurrence, save to R from C
118 |                         // reverse order for code!
119 |                         code.push(ZPAQLOp::RsetA{n: 1});
120 |                         code.push(ZPAQLOp::Set{target: Loc::Reg(Reg::A), source: Loc::Reg(Reg::OtherReg(OtherReg::C))});
121 |                         code.push(ZPAQLOp::Zero(Loc::MC));
122 |                         code.push(ZPAQLOp::Inc(Loc::Reg(Reg::OtherReg(OtherReg::C))));
123 |                         j -= 3;  // jump over all four
124 |                         ismatch = true;
125 |                 },
126 |                 (true, _, _, _, &ZPAQLOp::Inc(Loc::Reg(Reg::A)),
127 |                         &ZPAQLOp::RsetA{n: 1},
128 |                         &ZPAQLOp::Set{target: Loc::Reg(Reg::OtherReg(OtherReg::C)), source: Loc::Reg(Reg::A)},
129 |                         &ZPAQLOp::Zero(Loc::MC)  ) => {
130 |                         // after first occurrence, don't save R again
131 |                         // reverse order for code!
132 |                         code.push(ZPAQLOp::Zero(Loc::MC));
133 |                         code.push(ZPAQLOp::Inc(Loc::Reg(Reg::OtherReg(OtherReg::C))));
134 |                         j -= 3;  // jump over all four
135 |                         ismatch = true;
136 |                 },
137 |                 _ => { code.push(zcode[i].clone()); ismatch = false; }
138 |             }
139 |         } else {
140 |             code.push(zcode[i].clone());
141 |             ismatch = false;
142 |         }
143 |         j -= 1;
144 |     }
145 |     code.reverse();
146 |     code
147 | }
148 | 
149 | 


--------------------------------------------------------------------------------
/src/zpaqcfg.rs:
--------------------------------------------------------------------------------
 1 | use std::io::Write;
 2 | use std::fs::File;
 3 | use zpaql::{ZPAQLOp, set_positions};
 4 | use options;
 5 | 
 6 | pub struct ZPAQCfgFile {
 7 |     pub hh: u8,
 8 |     pub hm: u8,
 9 |     pub ph: u8,
10 |     pub pm: u8,
11 |     pub n: u8, // == model.len()
12 |     pub model: Vec<(u8, String)>,  // ? maybe data type instead of string
13 |     pub pcomp_invocation: String,
14 |     pub stacksize: u32,  // <= 2^32 - 2^?h
15 |     pub hcomp: Vec<ZPAQLOp>,
16 |     pub pcomp: Vec<ZPAQLOp>,
17 |     pub finalised: bool,
18 | }
19 | 
20 | fn calc_xh_size(hlog: u8, stacksize: u32, optioncfg: &options::Options) -> u8 {
21 |     // using the formular log(x+y) = log(x) + log(1 + y/x)
22 |     let nhlog: f32 = (hlog as f32 + (1f32 + (stacksize as f32)/2f32.powi(hlog as i32)).log2() ).ceil();
23 |     if nhlog > 32f32 {
24 |         error!("size of H is too big: **2^{}** = {} + 2^{} = stacksize + 2^?h <= 2^32", stacksize, hlog, nhlog as u64);
25 |         if !optioncfg.ignore_errors{
26 |             panic!("error");
27 |         }
28 |     }
29 |     nhlog as u8
30 | }
31 | 
32 | impl ZPAQCfgFile {
33 |     pub fn finalise(&mut self, optioncfg: &options::Options) -> Result<(), ()> {
34 |         if self.finalised {
35 |             Err(())
36 |         } else {
37 |             let total_hh = calc_xh_size(self.hh, if self.hcomp.is_empty() || self.n == 0 {0} else {self.stacksize}, optioncfg);
38 |             let total_ph = calc_xh_size(self.ph, if self.pcomp.is_empty() {0} else {self.stacksize}, optioncfg);
39 |             self.hh = total_hh;
40 |             self.ph = total_ph;
41 |             self.hcomp = set_positions(&self.hcomp, optioncfg);
42 |             self.pcomp = set_positions(&self.pcomp, optioncfg);
43 |             self.finalised = true;
44 |             Ok(())
45 |         }
46 |     }
47 |     pub fn write_header(&self, mut output: &File) {
48 |         assert!(self.finalised);
49 |         write!(output, "comp {} {} {} {} {} (hh hm ph pm n)\n",
50 |                 self.hh,
51 |                 self.hm,
52 |                 self.ph,
53 |                 self.pm, self.n).unwrap();
54 |         if self.n > 0 {
55 |             for &(i, ref c) in &self.model {
56 |                 write!(output, "  {} {}\n", i, c).unwrap();
57 |             }
58 |         }
59 |     }
60 |     pub fn write_hcomp(&self, mut output: &File, optioncfg: &options::Options) {
61 |         assert!(self.finalised);
62 |         write!(output, "hcomp\n").unwrap();
63 |         if self.hcomp.is_empty() || self.n == 0 {
64 |             write!(output, "  halt\n").unwrap();
65 |         } else {
66 |             let mut pc = 0;
67 |             for zpaqlop in &self.hcomp {
68 |                 if optioncfg.pc_as_comment {
69 |                     write!(output, "  {}        ({})\n", zpaqlop, pc).unwrap();
70 |                 } else {
71 |                     write!(output, "  {}\n", zpaqlop).unwrap();
72 |                 }
73 |                 pc += zpaqlop.size();
74 |             }
75 |         }
76 |     }
77 |     pub fn write_pcomp(&self, mut output: &File, optioncfg: &options::Options) {
78 |         assert!(self.finalised);
79 |         if !self.pcomp.is_empty() {
80 |             write!(output, "pcomp {} ;\n", self.pcomp_invocation).unwrap();
81 |             let mut pc = 0;
82 |             for zpaqlop in &self.pcomp {
83 |                 if optioncfg.pc_as_comment {
84 |                     write!(output, "  {}        ({})\n", zpaqlop, pc).unwrap();
85 |                 } else {
86 |                     write!(output, "  {}\n", zpaqlop).unwrap();
87 |                 }
88 |                 pc += zpaqlop.size();
89 |             }
90 |         }
91 |     }
92 |     pub fn write_end(&self, mut output: &File) {
93 |         assert!(self.finalised);
94 |         write!(output, "end\n").unwrap();
95 |     }
96 | }
97 | 
98 | 


--------------------------------------------------------------------------------
/src/zpaql.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::{HashMap};
  2 | use std::fmt::{Display, Formatter, Error};
  3 | use options;
  4 | 
  5 | // @TODO: try traits instead of the nested enums
  6 | #[derive(Debug, Clone, PartialEq, Eq, Hash)]
  7 | pub enum Reg {
  8 |     A,
  9 |     OtherReg(OtherReg),
 10 | }
 11 | 
 12 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 13 | pub enum OtherReg {
 14 |     B,
 15 |     C,
 16 |     D,
 17 | }
 18 | 
 19 | #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 20 | pub enum Loc {
 21 |     Reg(Reg),
 22 |     MB,
 23 |     MC,
 24 |     HD,
 25 | }
 26 | 
 27 | #[allow(dead_code)]
 28 | #[derive(Debug, Clone, PartialEq)]
 29 | pub enum SwapLoc { // with A
 30 |     OtherReg(OtherReg),
 31 |     MB, // only low 8 bits of A are touched
 32 |     MC, // only low 8 bits of A are touched
 33 |     HD,
 34 | }
 35 | 
 36 | 
 37 | impl Display for Reg {
 38 |     fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
 39 |         match *self {
 40 |             Reg::A => { write!(fmt, "a") },
 41 |             Reg::OtherReg(ref r) => { write!(fmt, "{}", r) },
 42 |         }
 43 |     }
 44 | }
 45 | 
 46 | impl Display for OtherReg {
 47 |     fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
 48 |         match *self {
 49 |             OtherReg::B => { write!(fmt, "b") },
 50 |             OtherReg::C => { write!(fmt, "c") },
 51 |             OtherReg::D => { write!(fmt, "d") },
 52 |         }
 53 |     }
 54 | }
 55 | 
 56 | impl Display for Loc {
 57 |     fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
 58 |         match *self {
 59 |             Loc::MB => { write!(fmt, "*b") },
 60 |             Loc::MC => { write!(fmt, "*c") },
 61 |             Loc::HD => { write!(fmt, "*d") },
 62 |             Loc::Reg(ref r) => { write!(fmt, "{}", r) },
 63 |         }
 64 |     }
 65 | }
 66 | 
 67 | impl Display for SwapLoc {
 68 |     fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
 69 |         match *self {
 70 |             SwapLoc::MB => { write!(fmt, "*b") },
 71 |             SwapLoc::MC => { write!(fmt, "*c") },
 72 |             SwapLoc::HD => { write!(fmt, "*d") },
 73 |             SwapLoc::OtherReg(ref r) => { write!(fmt, "{}", r) },
 74 |         }
 75 |     }
 76 | }
 77 | 
 78 | 
 79 | #[allow(dead_code)]
 80 | #[derive(Debug, Clone, PartialEq)]
 81 | pub enum ZPAQLOp {  // please extend match in .size() after changes here
 82 |     Error,
 83 |     Halt,
 84 |     Out,
 85 |     Hash,  // A := (A + M[B] + 512) * 773
 86 |     HashD,  // H[D] := (H[D] + A + 512) * 773
 87 | 
 88 |     Inc(Loc),
 89 |     Dec(Loc),
 90 |     Not(Loc),
 91 |     Zero(Loc),
 92 |     Set{target: Loc, source: Loc},
 93 |     SwapA(SwapLoc),
 94 |     SetN{target: Loc, n: u8},  // 2 byte opcode
 95 |     SetR{target: Reg, r: u8}, // 2 byte opcode
 96 |     Aadd(Loc),
 97 |     Asub(Loc),
 98 |     Amult(Loc),
 99 |     Adiv(Loc),
100 |     Amod(Loc),
101 |     Aand(Loc),
102 |     Aandnot(Loc),
103 |     Aor(Loc),
104 |     Axor(Loc),
105 |     Alshift(Loc),
106 |     Arshift(Loc),
107 |     Aeq(Loc), // sets F
108 |     Alt(Loc), // sets F
109 |     Agt(Loc), // sets F
110 | 
111 |     JT{n: i8},  // PCnextInstr += n (signed)    in bytecode N is positive: ((N+128) mod 256) - 128    // 2 byte opcode
112 |     JF{n: i8}, // 2 byte opcode
113 |     JMP{n: i8}, // 2 byte opcode
114 |     RsetA{n: u8}, // 2 byte opcode
115 |     AaddN{n: u8}, // 2 byte opcode
116 |     AsubN{n: u8}, // 2 byte opcode
117 |     AmultN{n: u8}, // 2 byte opcode
118 |     AdivN{n: u8}, // 2 byte opcode
119 |     AmodN{n: u8}, // 2 byte opcode
120 |     AandN{n: u8}, // 2 byte opcode
121 |     AandnotN{n: u8}, // 2 byte opcode
122 |     AorN{n: u8}, // 2 byte opcode
123 |     AxorN{n: u8}, // 2 byte opcode
124 |     AlshiftN{n: u8}, // 2 byte opcode
125 |     ArshiftN{n: u8}, // 2 byte opcode
126 |     AeqN{n: u8},  // sets F  // 2 byte opcode
127 |     AltN{n: u8}, // 2 byte opcode
128 |     AgtN{n: u8}, // 2 byte opcode
129 | 
130 |     LJ{n: u16},  // jump to n, operands as bytecode PC := 256 * M + N    // 3 byte opcode
131 | 
132 |     Comment{comment: String}, // 0 byte opcode
133 |     Label{label: String, position: u32},  // 0 byte opcode
134 |     GoTo{label: String},  // virtual 3 byte opcode, becomes LJ
135 | }  // please extend match in .size() after changes here
136 | 
137 | /// convert goto helper instruction to long jumps
138 | pub fn set_positions(code: &[ZPAQLOp], optioncfg: &options::Options) -> Vec<ZPAQLOp> {
139 |     let mut pos: u16 = 0;
140 |     let mut ops = vec![];
141 |     let mut labels: HashMap<String, u16> = HashMap::<String, u16>::new();
142 |     for instr in code {
143 |         match instr {
144 |             &ZPAQLOp::Label{ref label, position: _} => {
145 |                 labels.insert(label.clone(), pos);
146 |                 },
147 |             i => {
148 |                 let new_pos: u32 = pos as u32 + i.size() as u32;
149 |                 if new_pos > 65535 {
150 |                     error!("zpaql file gets too big with instruction at {} (only 64k are allowed)", new_pos);
151 |                     if !optioncfg.ignore_errors {
152 |                         panic!("error");
153 |                     }
154 |                 }
155 |                 pos = new_pos as u16;
156 |             },
157 |         }
158 |     }
159 |     for instr in code {
160 |         match instr {
161 |             &ZPAQLOp::Label{label: _, position: _} => {
162 |                 // ops.push(ZPAQLOp::Comment{comment: format!("{}:", label)});
163 |             },
164 |             &ZPAQLOp::GoTo{ref label} => {
165 |                 let posi = labels.get(label).unwrap_or_else(|| { error!("label {} not found", label); panic!("error") } );
166 |                 // ops.push(ZPAQLOp::Comment{comment: format!("goto {}", label)});
167 |                 ops.push(ZPAQLOp::LJ{n: *posi});
168 |             },
169 |             i => { ops.push(i.clone()); },
170 |         }
171 |     }
172 |     ops
173 | }
174 | 
175 | impl ZPAQLOp {
176 | 
177 |     /// opcode size in bytes, please extend match if you add helper meta opcodes
178 |     pub fn size(&self) -> u16 {
179 |         use self::ZPAQLOp::*;
180 |         match *self {
181 |             Comment{comment: _} => 0,
182 |             Label{label: _, position: _} => 0,
183 | 
184 |             JT{n: _} | JF{n: _} | JMP{n: _} | RsetA{n: _} | AaddN{n: _}
185 |             | AsubN{n: _} | AmultN{n: _} | AdivN{n: _} | AmodN{n: _} | AandN{n: _}
186 |             | AandnotN{n: _} | AorN{n: _} | AxorN{n: _} | AlshiftN{n: _} | ArshiftN{n: _}
187 |             | AeqN{n: _} | AltN{n: _} | AgtN{n: _} => 2,
188 | 
189 |             LJ{n: _} => 3,
190 |             GoTo{label: _} => 3, // virtual instruction, becomes LJ
191 | 
192 |             Inc(_) | Dec(_) | Not(_) | Zero(_) | Set{target: _, source: _}
193 |             | SwapA(_) | Aadd(_) | Asub(_) | Amult(_) | Adiv(_) | Amod(_) | Aand(_) | Aandnot(_)
194 |             | Aor(_) | Axor(_) | Alshift(_) | Arshift(_) | Aeq(_) | Alt(_) | Agt(_) => 1,
195 | 
196 |             SetN{target: _, n: _} | SetR{target: _, r: _} => 2,
197 | 
198 |             Error | Halt | Out | Hash | HashD => 1,
199 |         }
200 |     }
201 | }
202 | 
203 | 
204 | impl Display for ZPAQLOp {
205 |     fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
206 |         use self::ZPAQLOp::*;
207 |         match *self {
208 |             Label{ref label, position} => { write!(fmt, "{}: {}", label, position) },
209 |             GoTo{ref label} => { write!(fmt, "goto {}", label) },
210 |             Error => { write!(fmt, "error") },
211 |             Halt => { write!(fmt, "halt") },
212 |             Out => { write!(fmt, "out") },
213 |             Hash => { write!(fmt, "hash") },
214 |             HashD => { write!(fmt, "hashd") },
215 | 
216 |             Inc(ref l) => { write!(fmt, "{}++", l) },
217 |             Dec(ref l) => { write!(fmt, "{}--", l) },
218 |             Not(ref l) => { write!(fmt, "{}!", l) },
219 |             Zero(ref l) => { write!(fmt, "{}=0", l) },
220 |             Set{ref target, ref source} => { write!(fmt, "{}={}", target, source) },
221 |             SwapA(ref l) => { write!(fmt, "{}<>a", l) },
222 |             SetN{ref target, n} => { write!(fmt, "{}= {}", target, n) },
223 |             SetR{ref target, r} => { write!(fmt, "{}=r {}", target, r) },
224 |             Aadd(ref l) => { write!(fmt, "a+={}", l) },
225 |             Asub(ref l) => { write!(fmt, "a-={}", l) },
226 |             Amult(ref l) => { write!(fmt, "a*={}", l) },
227 |             Adiv(ref l) => { write!(fmt, "a/={}", l) },
228 |             Amod(ref l) => { write!(fmt, "a%={}", l) },
229 |             Aand(ref l) => { write!(fmt, "a&={}", l) },
230 |             Aandnot(ref l) => { write!(fmt, "a&~{}", l) },
231 |             Aor(ref l) => { write!(fmt, "a|={}", l) },
232 |             Axor(ref l) => { write!(fmt, "a^={}", l) },
233 |             Alshift(ref l) => { write!(fmt, "a<<={}", l) },
234 |             Arshift(ref l) => { write!(fmt, "a>>={}", l) },
235 |             Aeq(ref l) => { write!(fmt, "a=={}", l) },
236 |             Alt(ref l) => { write!(fmt, "a<{}", l) },
237 |             Agt(ref l) => { write!(fmt, "a>{}", l) },
238 | 
239 | 
240 |             JT{n} => { write!(fmt, "jt {}", n) },
241 |             JF{n} => { write!(fmt, "jf {}", n) },
242 |             JMP{n} => { write!(fmt, "jmp {}", n) },
243 |             RsetA{n} => { write!(fmt, "r=a {}", n) },
244 | 
245 |             AaddN{n} => { write!(fmt, "a+= {}", n) },
246 |             AsubN{n} => { write!(fmt, "a-= {}", n) },
247 |             AmultN{n} => { write!(fmt, "a*= {}", n) },
248 |             AdivN{n} => { write!(fmt, "a/= {}", n) },
249 |             AmodN{n} => { write!(fmt, "a%= {}", n) },
250 |             AandN{n} => { write!(fmt, "a&= {}", n) },
251 |             AandnotN{n} => { write!(fmt, "a&~ {}", n) },
252 |             AorN{n} => { write!(fmt, "a|= {}", n) },
253 |             AxorN{n} => { write!(fmt, "a^= {}", n) },
254 |             AlshiftN{n} => { write!(fmt, "a<<= {}", n) },
255 |             ArshiftN{n} => { write!(fmt, "a>>= {}", n) },
256 |             AeqN{n} => { write!(fmt, "a== {}", n) },
257 |             AltN{n} => { write!(fmt, "a< {}", n) },
258 |             AgtN{n} => { write!(fmt, "a> {}", n) },
259 | 
260 |             LJ{n} => { write!(fmt, "lj {}", n) },
261 | 
262 |             Comment{ref comment} => { write!(fmt, "({})", comment.replace("(", "〈").replace(")", "〉") ) },
263 | 
264 |         }
265 |     }
266 | }
267 | 
268 | 
269 | 


--------------------------------------------------------------------------------
/src/zpaqlvm.rs:
--------------------------------------------------------------------------------
  1 | use zpaqcfg::ZPAQCfgFile;
  2 | use zpaql::ZPAQLOp;
  3 | use zpaql::ZPAQLOp::*;
  4 | use zpaql::Loc;
  5 | use zpaql::Loc::{MC, MB, HD};
  6 | use zpaql::Reg::{A, OtherReg};
  7 | use zpaql::OtherReg::{B, C, D};
  8 | use zpaql::SwapLoc;
  9 | 
 10 | use std::u32;
 11 | 
 12 | // ZPAQL VM for internal testing and exposed (via --run-hcomp) as debugging tool for H[i] values for a file (which is not provided by zpaqd)
 13 | 
 14 | pub struct ZPAQLVM {
 15 |     pub code: Vec<Option<ZPAQLOp>>,
 16 |     pub pc: u16,
 17 |     pub h: Vec<u32>,
 18 |     pub m: Vec<u8>,
 19 |     pub a: u32, pub b: u32, pub c:u32, pub d:u32, pub f: bool,
 20 |     pub r: Vec<u32>,
 21 |     pub outbuf: Vec<u8>,  // only available in pcomp, ignored in hcomp
 22 | }
 23 | 
 24 | pub fn zops_to_vmops(ops: &[ZPAQLOp]) -> Vec<Option<ZPAQLOp>> {
 25 |     let mut code = vec![];
 26 |     for cmd in ops {
 27 |         if cmd.size() != 0 {
 28 |             code.push(Some(cmd.clone()));
 29 |             for _ in 1..cmd.size() {
 30 |                 code.push(None);  // opcode had more than one byte, therefore this address is invalid
 31 |             }
 32 |         }
 33 |     }
 34 |     code
 35 | }
 36 | 
 37 | impl ZPAQLVM {
 38 |     pub fn new(cfgfile: &ZPAQCfgFile) -> (ZPAQLVM, ZPAQLVM) {
 39 |         let hh_size: usize = 2u32.pow(cfgfile.hh as u32) as usize;
 40 |         let hm_size: usize = 2u32.pow(cfgfile.hm as u32) as usize;
 41 |         let ph_size: usize = 2u32.pow(cfgfile.ph as u32) as usize;
 42 |         let pm_size: usize = 2u32.pow(cfgfile.pm as u32) as usize;
 43 |         let mut hh: Vec<u32> = Vec::with_capacity(hh_size);
 44 |         hh.resize(hh_size, 0);
 45 |         let mut hm: Vec<u8> = Vec::with_capacity(hm_size);
 46 |         hm.resize(hm_size, 0);
 47 |         let mut ph: Vec<u32> = Vec::with_capacity(ph_size);
 48 |         ph.resize(ph_size, 0);
 49 |         let mut pm: Vec<u8> = Vec::with_capacity(pm_size);
 50 |         pm.resize(pm_size, 0);
 51 |         let mut hr: Vec<u32> = Vec::with_capacity(256);
 52 |         hr.resize(256, 0);
 53 |         let mut pr: Vec<u32> = Vec::with_capacity(256);
 54 |         pr.resize(256, 0);
 55 |         let hcomp = zops_to_vmops(&cfgfile.hcomp);
 56 |         let pcomp = zops_to_vmops(&cfgfile.pcomp);
 57 |         let hcomp_vm = ZPAQLVM{code: hcomp, pc: 0, h: hh, m: hm, r: hr, a: 0, b: 0, c: 0, d: 0, f: false, outbuf: vec![]};
 58 |         let pcomp_vm = ZPAQLVM{code: pcomp, pc: 0, h: ph, m: pm, r: pr, a: 0, b: 0, c: 0, d: 0, f: false, outbuf: vec![]};
 59 |         (hcomp_vm, pcomp_vm)
 60 |     }
 61 |     pub fn run(&mut self, c: u32) {
 62 |         self.a = c;
 63 |         self.pc = 0;
 64 |         while self.code[self.pc as usize] != Some(Halt) {
 65 |             self.step();
 66 |         }
 67 |     }
 68 |     fn step(&mut self) {
 69 |         let pc_opcode = self.pc;
 70 |         let opcodebox = self.code[pc_opcode as usize].clone();
 71 |         let opcode = opcodebox.unwrap_or_else(
 72 |                         || { error!("error: can't execute part of an opcode at {} (invalid jump destination)", self.pc); panic!("error") }
 73 |                     );
 74 |         self.pc += opcode.size();
 75 |         match opcode {
 76 |                 Error => { error!("Error while running ZPAQL at {}: error instruction", pc_opcode); panic!("error"); },
 77 |                 Halt => { error!("Error while running ZPAQL at {}: can't execute halt", pc_opcode); panic!("error") },
 78 |                 Out => { self.outbuf.push(self.a as u8); },
 79 |                 Hash => { self.a = zmul(zadd(zadd(self.a, self.get_value(&MB)), 512), 773); },
 80 |                 HashD => {
 81 |                     let v = zadd(self.get_value(&HD), self.a);
 82 |                     self.set_value(&HD, zmul(zadd(v, 512), 773) ); },
 83 |                 Inc(ref loc) => {
 84 |                     let v = zadd(self.get_value(loc), 1);
 85 |                     self.set_value(loc, v); },
 86 |                 Dec(ref loc) => {
 87 |                     let v = zsub(self.get_value(loc), 1);
 88 |                     self.set_value(loc, v); },
 89 |                 Not(ref loc) => {
 90 |                     let v = !self.get_value(loc);
 91 |                     self.set_value(loc, v); },
 92 |                 Zero(ref loc) => { self.set_value(loc, 0); },
 93 |                 Set{ref target, ref source} => {
 94 |                     let v = self.get_value(source);
 95 |                     self.set_value(target, v); },
 96 |                 SwapA(ref swaploc) => {
 97 |                     match swaploc {
 98 |                         &SwapLoc::MB => {
 99 |                             let mb = self.get_value(&MB);
100 |                             let v = self.a;
101 |                             self.set_value(&MB, v.clone());
102 |                             self.a = (v & (u32::MAX - 255u32)) + mb; },  // swap only lower 8 bit
103 |                         &SwapLoc::MC => {
104 |                             let mc = self.get_value(&MC);
105 |                             let v = self.a;
106 |                             self.set_value(&MC, v.clone());
107 |                             self.a = (v & (u32::MAX - 255u32)) + mc; },
108 |                         &SwapLoc::HD => {
109 |                             let hd = self.get_value(&HD);
110 |                             let v = self.a;
111 |                             self.set_value(&HD, v);
112 |                             self.a = hd;},
113 |                         &SwapLoc::OtherReg(ref oreg) => {
114 |                             let t = self.get_value(&Loc::Reg(OtherReg(oreg.clone())));
115 |                             let v = self.a;
116 |                             self.set_value(&Loc::Reg(OtherReg(oreg.clone())), v);
117 |                             self.a = t; },
118 |                     }
119 |                 },
120 |                 SetN{ref target, n} => { self.set_value(&target, n as u32); },
121 |                 SetR{ref target, r} => {
122 |                     let rval = self.r[r as usize];
123 |                     self.set_value(&Loc::Reg(target.clone()), rval); },
124 |                 Aadd(ref loc) => { self.a = zadd(self.a, self.get_value(loc)); },
125 |                 Asub(ref loc) => { self.a = zsub(self.a, self.get_value(loc)); },
126 |                 Amult(ref loc) => { self.a = zmul(self.a, self.get_value(loc)); },
127 |                 Adiv(ref loc) => { self.a = zdiv(self.a, self.get_value(loc)); },  // &Loc::Reg(A)
128 |                 Amod(ref loc) => { self.a = zmod(self.a, self.get_value(loc)); },
129 |                 Aand(ref loc) => { self.a = self.a & self.get_value(loc); },
130 |                 Aandnot(ref loc) => { self.a = self.a & (!self.get_value(loc)); },
131 |                 Aor(ref loc) => { self.a = self.a | self.get_value(loc); },
132 |                 Axor(ref loc) => { self.a = self.a ^ self.get_value(loc); },
133 |                 Alshift(ref loc) => { self.a = zlshift(self.a, self.get_value(loc)); },
134 |                 Arshift(ref loc) => { self.a = zrshift(self.a, self.get_value(loc)); },
135 |                 Aeq(ref loc) => { self.f = self.a == self.get_value(loc);  },
136 |                 Alt(ref loc) => { self.f = self.a < self.get_value(loc);  },
137 |                 Agt(ref loc) => { self.f = self.a > self.get_value(loc);  },
138 | 
139 |                 JT{n} => if self.f { self.pc = (self.pc as i32 + n as i32) as u16; } ,  // PCnextInstr += n (signed)    in bytecode N is positive: ((N+128) mod 256) - 128
140 |                 JF{n} => if !self.f { self.pc = (self.pc as i32 + n as i32) as u16; } ,
141 |                 JMP{n} => { self.pc = (self.pc as i32 + n as i32) as u16;  },
142 |                 RsetA{n} => { self.r[n as usize] = self.a;  },
143 |                 AaddN{n} => { self.a = zadd(self.a, n as u32); },
144 |                 AsubN{n} => { self.a = zsub(self.a, n as u32); },
145 |                 AmultN{n} => { self.a = zmul(self.a, n as u32); },
146 |                 AdivN{n} => { self.a = zdiv(self.a, n as u32); },
147 |                 AmodN{n} => { self.a = zmod(self.a, n as u32); },
148 |                 AandN{n} => { self.a = self.a & (n as u32); },
149 |                 AandnotN{n} => { self.a = self.a & (!(n as u32)); },
150 |                 AorN{n} => { self.a = self.a | (n as u32); },
151 |                 AxorN{n} => { self.a = self.a ^ (n as u32); },
152 |                 AlshiftN{n} => { self.a = zlshift(self.a, n as u32); },
153 |                 ArshiftN{n} => { self.a = zrshift(self.a, n as u32); },
154 |                 AeqN{n} => { self.f = self.a == (n as u32);  },
155 |                 AltN{n} => { self.f = self.a < (n as u32);  },
156 |                 AgtN{n} => { self.f = self.a > (n as u32); },
157 | 
158 |                 LJ{n} => { self.pc = n; },  // jump to n, operands as bytecode PC := 256 * M + N
159 | 
160 |                 ref cmd => { panic!("can't execute {}: {}", pc_opcode, cmd); },
161 |         }
162 |     }
163 |     fn get_value(&self, loc: &Loc) -> u32 {
164 |         match loc {
165 |             &Loc::Reg(A) => self.a,
166 |             &Loc::Reg(OtherReg(B)) => self.b,
167 |             &Loc::Reg(OtherReg(C)) => self.c,
168 |             &Loc::Reg(OtherReg(D)) => self.d,
169 |             &MB => self.m[(self.b as usize % self.m.len()) as usize] as u32,
170 |             &MC => self.m[(self.c as usize % self.m.len()) as usize] as u32,
171 |             &HD => self.h[(self.d as usize % self.h.len()) as usize],
172 |         }
173 |     }
174 |     fn set_value(&mut self, loc: &Loc, v: u32) {
175 |         match loc {
176 |             &Loc::Reg(A) => { self.a = v; },
177 |             &Loc::Reg(OtherReg(B)) => { self.b = v; },
178 |             &Loc::Reg(OtherReg(C)) => { self.c = v; },
179 |             &Loc::Reg(OtherReg(D)) => { self.d = v; },
180 |             &MB => {
181 |                 let leng = self.m.len();
182 |                 self.m[(self.b as usize % leng) as usize] = v as u8; },  // use only least 8 bit
183 |             &MC => {
184 |                 let leng = self.m.len();
185 |                 self.m[(self.c as usize % leng) as usize] = v as u8; },
186 |             &HD => {
187 |                 let leng = self.h.len();
188 |                 self.h[(self.d as usize % leng) as usize] = v; },
189 |         }
190 |     }
191 | }
192 | 
193 | fn zadd(x: u32, y: u32) -> u32 {
194 |     let (res, _) = x.overflowing_add(y);
195 |     res
196 | }
197 | 
198 | fn zsub(x: u32, y: u32) -> u32 {
199 |     let (res, _) = x.overflowing_sub(y);
200 |     res
201 | }
202 | 
203 | fn zmul(x: u32, y: u32) -> u32 {
204 |     let (res, _) = x.overflowing_mul(y);
205 |     res
206 | }
207 | 
208 | fn zdiv(x: u32, y: u32) -> u32 {
209 |     if y == 0 {
210 |         0
211 |     } else {
212 |         let (res, _) = x.overflowing_div(y);
213 |         res
214 |     }
215 | }
216 | 
217 | fn zmod(x: u32, y: u32) -> u32 {
218 |     if y == 0 {
219 |         0
220 |     } else {
221 |         let (res, _) = x.overflowing_rem(y);
222 |         res
223 |     }
224 | }
225 | 
226 | fn zlshift(x: u32, y: u32) -> u32 {
227 |     let (res, _) = x.overflowing_shl(y);
228 |     res
229 | }
230 | 
231 | fn zrshift(x: u32, y: u32) -> u32 {
232 |     let (res, _) = x.overflowing_shr(y);
233 |     res
234 | }
235 | 
236 | 


--------------------------------------------------------------------------------
/test/bmp_j4.cfg:
--------------------------------------------------------------------------------
  1 | (zpaq 6.00+ config file for 24-bit bmp files)
  2 | 
  3 | comp 17 17 0 3 33 (hh hm ph pm n)
  4 |   0 const 160
  5 |   1 cm $1+20 255
  6 |   2 cm $1+20 255
  7 |   3 cm $1+20 255
  8 |   4 cm $1+20 255
  9 |   5 cm $1+20 255
 10 |   6 cm $1+20 255
 11 |   7 cm $1+20 255
 12 |   8 cm $1+20 255
 13 |   9 icm $1+16
 14 |   10 cm $1+20 255
 15 |   11 icm $1+16
 16 |   12 cm $1+20 255
 17 |   13 icm $1+16
 18 |   14 cm $1+20 255
 19 |   15 icm $1+16
 20 |   16 cm $1+20 255
 21 |   17 cm $1+20 255
 22 |   18 cm $1+20 255
 23 |   19 cm $1+20 255
 24 |   20 cm $1+20 255
 25 |   21 icm $1+16
 26 |   22 icm $1+16
 27 |   23 cm 11 255
 28 | 
 29 |   24 mix 16 0 24 16 255
 30 |   25 mix 11 0 25 20 255
 31 |   26 mix2 0 24 25 40 0
 32 |   27 mix 0 0 26 32 0
 33 |   28 mix2 0 26 27 40 0
 34 |   29 sse 16 28 8 255
 35 |   30 mix2 8 28 29 40 255
 36 |   31 sse 8 30 8 255
 37 |   32 mix2 0 30 31 40 0
 38 | 
 39 | hcomp
 40 |   *c=a (save in rotating buffer)
 41 |   a=c a== 0 if (compute lookup ilog table)
 42 |     d= 255 *d=0 d++ *d=0
 43 |     a= 1 a<<= 16 r=a 0
 44 |     a= 7 a*= 100 a+= 74 a*= 100 a+= 54 a*= 100 a+= 10 a*= 100 a+= 2 r=a 1
 45 |     a= 14 a*= 100 a+= 15 a*= 100 a+= 57 a*= 100 a+= 76 c=a
 46 |     d= 2
 47 |     do
 48 |       a=d a*= 2 a-- b=a
 49 |       a=r 1 a/=b b=a
 50 |       a=c a+=b c=a
 51 |       a=d a+= 255 d=a a=c a>>= 24 *d=a a=d a-= 255 d=a
 52 |       d++
 53 |     a=r 0 b=a a=d a<b while
 54 |    c=0 a= 255 r=a 0
 55 |   endif
 56 |   a=c a== 3 if
 57 |     b=0 a=*b
 58 |     a== 0 ifnot b++ b++ a=*b a>>= 8 b-- a+=*b a++ a++ endif
 59 |     a+= 20
 60 |     r=a 19
 61 |   endif
 62 | 
 63 |   a=r 19 a==c if b=c a=*b a<<= 8 b-- a+=*b a*= 3 a+= 3 a|= 3 a^= 3 r=a 0 endif (r0=w)
 64 | 
 65 |   a=c a%= 3 r=a 1 (r1=color)
 66 |   b=c b-- b-- a=*b r=a 2 (r2=buf(3))
 67 |   a=c a++ b=r 0 a-=b b=a a=*b r=a 3 (r3=buf(w))
 68 |   a=c a-- a-- b=r 0 a-=b b=a a=*b r=a 4 (r4=buf(w+3))
 69 |   a=c a+= 4 b=r 0 a-=b b=a a=*b r=a 5 (r5=buf(w-3))
 70 |   a=r 2 b=r 3 a+=b b=r 4 a+=b b=r 5 a+=b r=a 6 (r6=mean)
 71 |   a=r 2 a*=a b=a a=r 3 a*=a a+=b b=a a=r 4 a*=a a+=b b=a a=r 5 a*=a a+=b b=a a=r 6 a*=a a/= 4 b<>a a-=b a>>= 2 r=a 7 (r7=var)
 72 |   a=r 6 a>>= 2 r=a 6 (r6>>=2)
 73 |   a=r 7 a+= 255 d=a a=*d r=a 8 (r8=logvar)
 74 | 
 75 |   b=c a=*b r=a 9 (r9=buf(1))
 76 |   b=c b-- a=*b r=a 10 (r10=buf(2))
 77 |   a=c a-= 3 b=a a=*b r=a 11 (r11=buf(4))
 78 |   a=c a-= 5 b=a a=*b r=a 12 (r12=buf(6))
 79 |   a=c a++ b=r 0 a-=b a-=b b=a a=*b r=a 13 (r13=buf(w*2))
 80 |   a=c a+= 7 b=r 0 a-=b a-=b b=a a=*b r=a 14 (r14=buf(w*2-6))
 81 |   a=c a+= 4 b=r 0 a-=b a-=b b=a a=*b r=a 15 (r15=buf(w*2-3))
 82 |   a=c a-= 5 b=r 0 a-=b a-=b b=a a=*b r=a 16 (r16=buf(w*2+6))
 83 |   a=c b=r 0 a-=b b=a a=*b r=a 17 (r17=buf(w+1))
 84 |   a=c a+= 3 b=r 0 a-=b b=a a=*b r=a 18 (r18=buf(w-2))
 85 | 
 86 |   d=0 do *d=0 d++ a=d a< 24 while
 87 | 
 88 |   d=0
 89 |   d++
 90 |   a=r 3 b=r 4 a-=b b=r 2 a+=b hashd d++
 91 |   a=r 2 b=r 3 a-=b b=r 5 a+=b hashd d++
 92 |   a=r 2 a*= 2 b=r 12 a-=b hashd d++
 93 |   a=r 3 a*= 2 b=r 13 a-=b hashd d++
 94 |   a=r 4 a*= 2 b=r 16 a-=b hashd d++
 95 |   a=r 5 a*= 2 b=r 14 a-=b hashd d++
 96 |   a=r 9 b=r 18 a-=b b=r 5 a+=b hashd d++
 97 |   a=r 3 b=r 15 a-=b b=r 5 a+=b hashd d++
 98 |   a= 24 a*= 16 b=a a=r 8 a<<= 1 a&=b b=a a=r 6 a>>= 1 a|=b hashd d++
 99 | 
100 |   b=r 11 a=r 9 a-=b b=r 2 a+=b hashd d++
101 |   b=r 17 a=r 9 a-=b b=r 3 a+=b hashd d++
102 |   a=r 2 hashd b=r 11 a=r 9 a-=b hashd d++
103 |   a=r 9 hashd a=r 10 hashd d++
104 |   a=r 3 hashd b=r 17 a=r 9 a-=b hashd d++
105 |   b=r 2 a=r 3 a+=b a>>= 3 hashd a=r 9 a>>= 4 hashd a=r 10 a>>= 4 hashd d++
106 |   a=r 6 hashd a=r 8 a>>= 4 hashd d++
107 |   a=r 2 hashd a=r 9 hashd a=r 10 hashd d++
108 |   a=r 3 hashd d++
109 |   a=r 3 hashd a=r 9 hashd a=r 10 hashd d++
110 |   a=r 2 hashd d++
111 |   a=r 2 hashd a=r 9 hashd d++
112 |   a=r 3 hashd a=r 9 hashd d++
113 |   d++
114 | 
115 |   d=0
116 |   do
117 |     a=r 1 hashd d++
118 |   a=d a< 24 while
119 | 
120 |   a=r 9 a>>= 4 a*= 3 b=r 1 a+=b a<<= 9 *d=a (mix)
121 |   d++
122 |   a=r 1 a<<= 9 *d=a (mix)
123 | 
124 |   d++ d++ d++ d++
125 | 
126 |   a=r 3 (b=r 4 a-=b) b=r 2 a+=b a>>= 3 a*= 3 b=r 1 a+=b a<<= 9 *d=a (sse)
127 |   c++
128 |   halt
129 | pcomp ../zpaqd r colorpre p ;
130 |   a> 255 ifnot
131 |     *c=a (save input)
132 |     a=c a== 19 if
133 |       b=c a=*b a<<= 8
134 |       b-- a+=*b a*= 3 r=a 1
135 |       a+= 3 a|= 3 a^= 3 r=a 0      
136 |     endif
137 |     a=c a> 53 if
138 |       b=r 0 a=c a-= 54 a%=b b=a
139 |       a=r 1 a>b ifnot
140 |         a=*c out
141 |       else
142 |         a=b a%= 3 a== 2 if
143 |           b=c a=*b d=a b-- b-- a=*b a-=d out
144 |           a+=d out
145 |           b++ a-=*b out
146 |         endif
147 |       endif
148 |     else
149 |       a=*c out
150 |     endif
151 |     c++
152 |   else
153 |     c=0
154 |   endif
155 |   halt
156 | end
157 | 


--------------------------------------------------------------------------------
/test/bro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/bro


--------------------------------------------------------------------------------
/test/bro.sourcelink:
--------------------------------------------------------------------------------
1 | https://github.com/google/brotli
2 | 


--------------------------------------------------------------------------------
/test/brotli-dict:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/brotli-dict


--------------------------------------------------------------------------------
/test/brotli-helper:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # dict_is_present.tmp has to be removed before a block is appended, will be created after first segment is added and remain while other segments are appended to block (so they don't need the dict prepended to them again)
 3 | set -eu
 4 | TMPBR=`tempfile`.br
 5 | ./bro --quality 9 --input "$1" --output "$TMPBR"
 6 | if [ -e dict_is_present.tmp ]
 7 | then
 8 |   printf "x" > "$2"  # no dict
 9 |   cat "$TMPBR" >> "$2"
10 | else
11 |   cat ./brotli-dict "$TMPBR" > "$2"
12 |   touch dict_is_present.tmp
13 | fi
14 | rm "$TMPBR"
15 | 


--------------------------------------------------------------------------------
/test/colorpre.cfg:
--------------------------------------------------------------------------------
 1 | (preprocessor for bmp_j2.cfg. To run: zpaq rcolorpre.cfg)
 2 | (This is not a config file for compression)
 3 | comp 0 0 0 3 0
 4 | hcomp
 5 |   halt
 6 | pcomp ;
 7 |   a> 255 ifnot
 8 |     *c=a (save input)
 9 |     a=c a== 19 if
10 |       b=c a=*b a<<= 8
11 |       b-- a+=*b a*= 3 r=a 1
12 |       a+= 3 a|= 3 a^= 3 r=a 0
13 |     endif
14 |     a=c a> 53 if
15 |       b=r 0 a=c a-= 54 a%=b b=a
16 |       a=r 1 a>b ifnot
17 |         a=*c out
18 |       else      
19 |         a=b a%= 3 a== 2 if
20 |           b=c a=*b d=a b-- a=*b out
21 |           a-=d out
22 |           a+=d b-- a-=*b out
23 |         endif
24 |       endif
25 |     else
26 |       a=*c out
27 |     endif
28 |     c++
29 |   else
30 |     c=0
31 |   endif
32 |   halt
33 | end
34 | 


--------------------------------------------------------------------------------
/test/kodim23.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/kodim23.bmp


--------------------------------------------------------------------------------
/test/kodim23.flif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/kodim23.flif


--------------------------------------------------------------------------------
/test/kodim23.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/kodim23.png


--------------------------------------------------------------------------------
/test/kodim23.pnm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/kodim23.pnm


--------------------------------------------------------------------------------
/test/kodim23.sub.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/kodim23.sub.bmp


--------------------------------------------------------------------------------
/test/kodim23.sub.pnm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/kodim23.sub.pnm


--------------------------------------------------------------------------------
/test/kodim23.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/kodim23.webp


--------------------------------------------------------------------------------
/test/lz1.ir:
--------------------------------------------------------------------------------
  1 | comp 21 0 21 24 1 (hh hm ph pm n)
  2 |   0 icm 12
  3 | hcomp
  4 |     # t255 holds the inital value passed into the A register, first ZPAQL instruction must thus be r=a 255
  5 |    ifN t0 goto init_code
  6 |    if t254 goto cont_reading
  7 |    goto call_next
  8 |   :init_code:
  9 |    t0 = 1
 10 |    t252 = t0
 11 |    goto read_b_end~
 12 |   :read_b:
 13 |    t1 = t253 == 4294967294
 14 |    if t1 goto do_read_in
 15 |    t255 = t253
 16 |    t253 = 4294967294
 17 |    goto cont_reading
 18 |   :do_read_in:
 19 |    t254 = 1
 20 |    halt
 21 |   :cont_reading:
 22 |    t254 = 0
 23 |       t1 = t255
 24 |      t2 = H[t0]()
 25 |      t0 = t0 - 1
 26 |      t0 = H[t0]()
 27 |      goto find_label_ret_id
 28 |   :read_b_end~:
 29 |  #                     20: pass
 30 |  #                     154: h_state = 5
 31 |  H[t0+1](h_state) = 5
 32 |  #                     155: first_run = True
 33 |  H[t0+2](first_run) = 1
 34 |  #                     157: def hcomp(c):  # pcomp bytecode is passed first (or 0 if there is none)
 35 |  goto hcomp_end~
 36 | :hcomp:
 37 |  MarkTempVarStart
 38 |  # Arg c at t0 + 1
 39 |  #                     160:   global h_state, first_run
 40 |  # Global h_state via H[t252+1]
 41 |  # Global first_run via H[t252+2]
 42 |  #                     161:   if first_run:
 43 |  ifN H[t252+2](first_run) goto else_1
 44 |  #                     162:     first_run = False  # skip pcomp bytecode
 45 |  H[t252+2](first_run) = 0
 46 |  #                     163:     if c == 0:
 47 |  t1 = H[t0+1](c) == 0
 48 |  ifN t1 goto else_4
 49 |  #                     164:       return
 50 |     t2 = H[t0]()
 51 |    t0 = t0 - 1
 52 |    t0 = H[t0]()
 53 |    goto find_label_ret_id
 54 |  goto endif_5
 55 | :else_4:
 56 | :endif_5:
 57 |  #                     165:     if c == 1:
 58 |  t1 = H[t0+1](c) == 1
 59 |  ifN t1 goto else_7
 60 |  #                     166:       c = read_b()
 61 |  
 62 |     H[t0+2]() = t0
 63 |    # saved bsp, return id:
 64 |    H[t0+3]() = 0
 65 |    # push arguments:
 66 |    t0 = t0 + 3
 67 |    goto read_b
 68 |   :return_id_0:
 69 |  
 70 |  H[t0+1](c) = t1
 71 |  #                     167:       c += read_b()*256 # read length
 72 |  
 73 |     H[t0+3]() = t0
 74 |    # saved bsp, return id:
 75 |    H[t0+4]() = 1
 76 |    # push arguments:
 77 |    t0 = t0 + 4
 78 |    goto read_b
 79 |   :return_id_1:
 80 |  t2 = t1
 81 |  
 82 |  t1 = t2 * 256
 83 |  H[t0+1](c) = H[t0+1](c) + t1
 84 |  #                     168:       while c > 0:
 85 | :while_9:
 86 |  t1 = H[t0+1](c) > 0
 87 |  ifN t1 goto whileend_11
 88 |  #                     169:         hH[0] = read_b()
 89 |  
 90 |     H[t0+2]() = t0
 91 |    # saved bsp, return id:
 92 |    H[t0+3]() = 2
 93 |    # push arguments:
 94 |    t0 = t0 + 3
 95 |    goto read_b
 96 |   :return_id_2:
 97 |  
 98 |  t2 = 0
 99 |  H[t2]() = t1
100 |  #                     170:         c -= 1
101 |  H[t0+1](c) = H[t0+1](c) - 1
102 |  goto while_9
103 | :whileend_11:
104 |  #                     171:       return
105 |     t2 = H[t0]()
106 |    t0 = t0 - 1
107 |    t0 = H[t0]()
108 |    goto find_label_ret_id
109 |  goto endif_8
110 | :else_7:
111 | :endif_8:
112 |  goto endif_2
113 | :else_1:
114 | :endif_2:
115 |  #                     172:   if h_state == 1:  # (expect code ccxxxxxx as input) (cc is number of offset bytes following) (00xxxxxx means x+1 literal bytes follow)
116 |  t1 = H[t252+1](h_state) == 1
117 |  ifN t1 goto else_14
118 |  #                     173:     a=c
119 |  H[t0+2](a) = H[t0+1](c)
120 |  #                     174:     a>>= 6
121 |  H[t0+2](a) = H[t0+2](a) >> 6
122 |  #                     175:     a&= 3
123 |  H[t0+2](a) = H[t0+2](a) & 3
124 |  #                     176:     if a > 0:
125 |  t1 = H[t0+2](a) > 0
126 |  ifN t1 goto else_17
127 |  #                     177:       a += 1
128 |  H[t0+2](a) = H[t0+2](a) + 1
129 |  #                     178:       h_state = a
130 |  H[t252+1](h_state) = H[t0+2](a)
131 |  #                     179:       a = c
132 |  H[t0+2](a) = H[t0+1](c)
133 |  #                     180:       a>>= 3
134 |  H[t0+2](a) = H[t0+2](a) >> 3
135 |  #                     181:       hH[0] = ( a + 512) * 773
136 |  t2 = H[t0+2](a) + 512
137 |  t1 = t2 * 773
138 |  t2 = 0
139 |  H[t2]() = t1
140 |  goto endif_18
141 | :else_17:
142 |  #                     183:       a = c
143 |  H[t0+2](a) = H[t0+1](c)
144 |  #                     184:       a&= 63
145 |  H[t0+2](a) = H[t0+2](a) & 63
146 |  #                     185:       a+= 5
147 |  H[t0+2](a) = H[t0+2](a) + 5
148 |  #                     186:       h_state = a
149 |  H[t252+1](h_state) = H[t0+2](a)
150 |  #                     187:       a = c
151 |  H[t0+2](a) = H[t0+1](c)
152 |  #                     188:       hH[0] = ( a + 512) * 773
153 |  t2 = H[t0+2](a) + 512
154 |  t1 = t2 * 773
155 |  t2 = 0
156 |  H[t2]() = t1
157 | :endif_18:
158 |  goto endif_15
159 | :else_14:
160 |  #                     189:   elif h_state == 5: # (end of literal) # + #  (init)
161 |  t1 = H[t252+1](h_state) == 5
162 |  ifN t1 goto else_20
163 |  #                     190:     h_state = 1
164 |  H[t252+1](h_state) = 1
165 |  #                     191:     hH[0] = 0
166 |  t1 = 0
167 |  H[t1]() = 0
168 |  goto endif_21
169 | :else_20:
170 |  #                     193:     if h_state > 5:
171 |  t1 = H[t252+1](h_state) > 5
172 |  ifN t1 goto else_23
173 |  #                     194:       hH[0] = (c + 512) * 773
174 |  t2 = H[t0+1](c) + 512
175 |  t1 = t2 * 773
176 |  t2 = 0
177 |  H[t2]() = t1
178 |  goto endif_24
179 | :else_23:
180 | :endif_24:
181 |  #                     195:     h_state -= 1
182 |  H[t252+1](h_state) = H[t252+1](h_state) - 1
183 | :endif_21:
184 | :endif_15:
185 |  #                     196:   c = h_state
186 |  H[t0+1](c) = H[t252+1](h_state)
187 |  #                     197:   if h_state > 5:
188 |  t1 = H[t252+1](h_state) > 5
189 |  ifN t1 goto else_26
190 |  #                     198:     c = 5
191 |  H[t0+1](c) = 5
192 |  goto endif_27
193 | :else_26:
194 | :endif_27:
195 |  #                     199:   hH[0] = (hH[0] + c + 512) * 773
196 |  t4 = 0
197 |  t3 = H[t4]() + H[t0+1](c)
198 |  t2 = t3 + 512
199 |  t1 = t2 * 773
200 |  t2 = 0
201 |  H[t2]() = t1
202 |  # insert return as it might not be done by the function:
203 |     t2 = H[t0]()
204 |    t0 = t0 - 1
205 |    t0 = H[t0]()
206 |    goto find_label_ret_id
207 |  MarkTempVarEnd
208 | :hcomp_end~:
209 |  #                     201: pass
210 | :call_next:
211 |  t253 = 4294967294
212 |     H[t0+3]() = t0
213 |    # saved bsp, return id:
214 |    H[t0+4]() = 3
215 |    # push arguments:
216 |    H[t0+5]() = t255
217 |    t0 = t0 + 4
218 |    goto hcomp
219 |   :return_id_3:
220 |     halt
221 |   :find_label_ret_id:
222 |    t4 = t0 > 1048377
223 |    if t4 goto throw_error
224 |    ifEq t2 0 goto return_id_0
225 |    ifEq t2 1 goto return_id_1
226 |    ifEq t2 2 goto return_id_2
227 |    ifEq t2 3 goto return_id_3
228 |   :throw_error:
229 |    error
230 |    halt
231 | pcomp
232 |     # t255 holds the inital value passed into the A register, first ZPAQL instruction must thus be r=a 255
233 |    ifN t0 goto init_code
234 |    if t254 goto cont_reading
235 |    goto call_next
236 |   :init_code:
237 |    t0 = 1
238 |    t252 = t0
239 |    goto read_b_end~
240 |   :read_b:
241 |    t1 = t253 == 4294967294
242 |    if t1 goto do_read_in
243 |    t255 = t253
244 |    t253 = 4294967294
245 |    goto cont_reading
246 |   :do_read_in:
247 |    t254 = 1
248 |    halt
249 |   :cont_reading:
250 |    t254 = 0
251 |       t1 = t255
252 |      t2 = H[t0]()
253 |      t0 = t0 - 1
254 |      t0 = H[t0]()
255 |      goto find_label_ret_id
256 |   :read_b_end~:
257 |  #                     20: pass
258 |  #                     218: i = 0  # position in 16M output buffer
259 |  H[t0+1](i) = 0
260 |  #                     219: state = 0
261 |  H[t0+2](state) = 0
262 |  #                     220: leng = 0  # length of match or literal
263 |  H[t0+3](leng) = 0
264 |  #                     221: off = 0  # offset of match back from i
265 |  H[t0+4](off) = 0
266 |  #                     222: BUFSIZE_max = (1<<24) - 1
267 |  t2 = 1 << 24
268 |  t1 = t2 - 1
269 |  H[t0+5](BUFSIZE_max) = t1
270 |  #                     224: def pcomp(c):  # passing c is like having c = read_b() as first line
271 |  goto pcomp_end~
272 | :pcomp:
273 |  MarkTempVarStart
274 |  # Arg c at t0 + 1
275 |  #                     227:   global i, state, leng, off
276 |  # Global i via H[t252+1]
277 |  # Global state via H[t252+2]
278 |  # Global leng via H[t252+3]
279 |  # Global off via H[t252+4]
280 |  #                     228:   if c == NONE:  # restart
281 |  t1 = H[t0+1](c) == 4294967295
282 |  ifN t1 goto else_2
283 |  #                     229:     i = 0
284 |  H[t252+1](i) = 0
285 |  #                     230:     state = 0
286 |  H[t252+2](state) = 0
287 |  #                     231:     leng = 0
288 |  H[t252+3](leng) = 0
289 |  #                     232:     off = 0
290 |  H[t252+4](off) = 0
291 |  #                     233:     return
292 |     t2 = H[t0]()
293 |    t0 = t0 - 1
294 |    t0 = H[t0]()
295 |    goto find_label_ret_id
296 |  goto endif_3
297 | :else_2:
298 | :endif_3:
299 |  #                     234:   if state == 0: # expecting a literal or match code
300 |  t1 = H[t252+2](state) == 0
301 |  ifN t1 goto else_5
302 |  #                     235:     state = 1+(c>>6)
303 |  t2 = H[t0+1](c) >> 6
304 |  t1 = 1 + t2
305 |  H[t252+2](state) = t1
306 |  #                     236:     if state == 1: # literal
307 |  t1 = H[t252+2](state) == 1
308 |  ifN t1 goto else_8
309 |  #                     237:       off = 0
310 |  H[t252+4](off) = 0
311 |  #                     238:       leng = c+1
312 |  t1 = H[t0+1](c) + 1
313 |  H[t252+3](leng) = t1
314 |  goto endif_9
315 | :else_8:
316 |  #                     239:     elif state==2: # short match
317 |  t1 = H[t252+2](state) == 2
318 |  ifN t1 goto else_11
319 |  #                     240:       off = c&7
320 |  t1 = H[t0+1](c) & 7
321 |  H[t252+4](off) = t1
322 |  #                     241:       leng = (c>>3)-3
323 |  t2 = H[t0+1](c) >> 3
324 |  t1 = t2 - 3
325 |  H[t252+3](leng) = t1
326 |  goto endif_12
327 | :else_11:
328 |  #                     243:       off = 0
329 |  H[t252+4](off) = 0
330 |  #                     244:       leng = (c&63)+1  # match
331 |  t2 = H[t0+1](c) & 63
332 |  t1 = t2 + 1
333 |  H[t252+3](leng) = t1
334 | :endif_12:
335 | :endif_9:
336 |  goto endif_6
337 | :else_5:
338 |  #                     245:   elif state == 1: # decoding a literal with leng bytes remaining
339 |  t1 = H[t252+2](state) == 1
340 |  ifN t1 goto else_14
341 |  #                     246:     out(c)
342 |  out H[t0+1](c)
343 |  #                     247:     pM[i&BUFSIZE_max] = c
344 |  t1 = H[t252+1](i) & H[t252+5](BUFSIZE_max)
345 |  M[t1] = H[t0+1](c)
346 |  #                     248:     i += 1
347 |  H[t252+1](i) = H[t252+1](i) + 1
348 |  #                     249:     leng -= 1
349 |  H[t252+3](leng) = H[t252+3](leng) - 1
350 |  #                     250:     if leng == 0:
351 |  t1 = H[t252+3](leng) == 0
352 |  ifN t1 goto else_17
353 |  #                     251:       state = 0
354 |  H[t252+2](state) = 0
355 |  goto endif_18
356 | :else_17:
357 | :endif_18:
358 |  goto endif_15
359 | :else_14:
360 |  #                     252:   elif state > 2: # state==3, state==4: expecting 2,3 match offset bytes
361 |  t1 = H[t252+2](state) > 2
362 |  ifN t1 goto else_20
363 |  #                     253:     off = off<<8|c
364 |  t2 = H[t252+4](off) << 8
365 |  t1 = t2 | H[t0+1](c)
366 |  H[t252+4](off) = t1
367 |  #                     254:     state -= 1
368 |  H[t252+2](state) = H[t252+2](state) - 1
369 |  goto endif_21
370 | :else_20:
371 |  #                     256:     off = off<<8|c
372 |  t2 = H[t252+4](off) << 8
373 |  t1 = t2 | H[t0+1](c)
374 |  H[t252+4](off) = t1
375 |  #                     257:     off = i-off-1
376 |  t2 = H[t252+1](i) - H[t252+4](off)
377 |  t1 = t2 - 1
378 |  H[t252+4](off) = t1
379 |  #                     258:     while leng:
380 | :while_22:
381 |  ifN H[t252+3](leng) goto whileend_24
382 |  #                     259:       c=pM[off&BUFSIZE_max]
383 |  t1 = H[t252+4](off) & H[t252+5](BUFSIZE_max)
384 |  H[t0+1](c) = M[t1]
385 |  #                     260:       pM[i&BUFSIZE_max]=c
386 |  t1 = H[t252+1](i) & H[t252+5](BUFSIZE_max)
387 |  M[t1] = H[t0+1](c)
388 |  #                     261:       i += 1
389 |  H[t252+1](i) = H[t252+1](i) + 1
390 |  #                     262:       off += 1
391 |  H[t252+4](off) = H[t252+4](off) + 1
392 |  #                     263:       out(c)
393 |  out H[t0+1](c)
394 |  #                     264:       leng -= 1
395 |  H[t252+3](leng) = H[t252+3](leng) - 1
396 |  goto while_22
397 | :whileend_24:
398 |  #                     265:     state = 0
399 |  H[t252+2](state) = 0
400 | :endif_21:
401 | :endif_15:
402 | :endif_6:
403 |  # insert return as it might not be done by the function:
404 |     t2 = H[t0]()
405 |    t0 = t0 - 1
406 |    t0 = H[t0]()
407 |    goto find_label_ret_id
408 |  MarkTempVarEnd
409 | :pcomp_end~:
410 |  #                     267: pass
411 | :call_next:
412 |  t253 = 4294967294
413 |     H[t0+6]() = t0
414 |    # saved bsp, return id:
415 |    H[t0+7]() = 0
416 |    # push arguments:
417 |    H[t0+8]() = t255
418 |    t0 = t0 + 7
419 |    goto pcomp
420 |   :return_id_0:
421 |     halt
422 |   :find_label_ret_id:
423 |    t4 = t0 > 1048377
424 |    if t4 goto throw_error
425 |    ifEq t2 0 goto return_id_0
426 |   :throw_error:
427 |    error
428 |    halt
429 | end
430 | 


--------------------------------------------------------------------------------
/test/lz1.orig.cfg:
--------------------------------------------------------------------------------
 1 | (lz1.cfg
 2 | 
 3 | (C) 2011 Dell Inc. Written by Matt Mahoney
 4 | Licensed under GPL v3, http://www.gnu.org/copyleft/gpl.html)
 5 | 
 6 | comp 0 0 0 24 1
 7 |   0 icm 12 (sometimes "0 cm 20 48" will compress better)
 8 | hcomp
 9 |   (c=state: 0=init, 1=expect LZ77 literal or match code,
10 |    2..4=expect n-1 offset bytes,
11 |    5..68=expect n-4 literals)
12 |   b=a (save input)
13 |   a=c a== 1 if (expect code ccxxxxxx as input)
14 |     (cc is number of offset bytes following)
15 |     (00xxxxxx means x+1 literal bytes follow)
16 |     a=b a>>= 6 a&= 3 a> 0 if
17 |       a++ c=a (high 2 bits is code length)
18 |       *d=0 a=b a>>= 3 hashd
19 |     else
20 |       a=b a&= 63 a+= 5 c=a (literal length)
21 |       *d=0 a=b hashd
22 |     endif
23 |   else
24 |     a== 5 if (end of literal)
25 |       c= 1 *d=0
26 |     else
27 |       a== 0 if (init)
28 |         c= 124 *d=0 (5+length of postprocessor)
29 |       else (literal or offset)
30 |         c--
31 |         (model literals in order 1 context, offset order 0)
32 |         a> 5 if *d=0 a=b hashd endif
33 |       endif
34 |     endif
35 |   endif
36 | 
37 |   (model parse state as context)
38 |   a=c a> 5 if a= 5 endif hashd
39 |   halt
40 | pcomp ./lzpre c ; (code below is equivalent to "lzpre d")
41 |   a> 255 if (end of segment)
42 |    b=0 d=0  (reset, is last command before halt)
43 |   else
44 |   (LZ77 decoder: b=i, c=c d=state r1=len r2=off
45 |     state = d = 0 = expect literal or match code
46 |                 1 = decoding a literal with len bytes left
47 |                 2 = expecting last offset byte of a match
48 |                 3,4 = expecting 2,3 match offset bytes
49 |     i = b = position in 16M output buffer
50 |     c = c = input byte
51 |     len = r1 = length of match or literal
52 |     off = r2 = offset of match back from i
53 |   Input format:
54 |     00llllll: literal of length lllllll=1..64 to follow
55 |     01lllooo oooooooo: length lll=5..12, offset o=1..2048
56 |     10llllll oooooooo oooooooo: l=1..64 offset=1..65536
57 |     11llllll oooooooo oooooooo oooooooo: 1..64, 1..2^24)
58 |   c=a a=d a== 0 if
59 |     a=c a>>= 6 a++ d=a
60 |     a== 1 if (state?)
61 |       a+=c r=a 1 a=0 r=a 2 (literal len=c+1 off=0)
62 |     else
63 |       a== 2 if a=c a&= 7 r=a 2 (short match: off=c&7)
64 |         a=c a>>= 3 a-= 3 r=a 1 (len=(c>>3)-3)
65 |       else (3 or 4 byte match)
66 |         a=c a&= 63 a++ r=a 1 a=0 r=a 2 (off=0, len=(c&63)-1)
67 |       endif
68 |     endif
69 |   else
70 |     a== 1 if (writing literal)
71 |       a=c *b=a b++ out
72 |       a=r 1 a-- a== 0 if d=0 endif r=a 1 (if (--len==0) state=0)
73 |     else
74 |       a> 2 if (reading offset)
75 |         a=r 2 a<<= 8 a|=c r=a 2 d-- (off=off<<8|c, --state)
76 |       else (state==2, write match)
77 |         a=r 2 a<<= 8 a|=c c=a a=b a-=c a-- c=a (c=i-off-1)
78 |         d=r 1 (d=len)
79 |         do (copy and output d=len bytes)
80 |           a=*c *b=a out c++ b++
81 |         d-- a=d a> 0 while
82 |         (d=state=0. off, len don't matter)
83 |       endif
84 |     endif
85 |   endif
86 |   endif
87 |   halt
88 | end
89 | 


--------------------------------------------------------------------------------
/test/lz1.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (C) 2016 Kai Lüke kailueke@riseup.net
  3 | # This program comes with ABSOLUTELY NO WARRANTY and is free software, you are welcome to redistribute it
  4 | # under certain conditions, see https://www.gnu.org/licenses/gpl-3.0.en.html
  5 | ### BEGIN OF EDITABLE SECTION - do not remove this marker or place something before and after it
  6 | 
  7 | # definition of the array sizes and the context mixing linear tree
  8 | hh = 0  # size of hH[] is 2**hh
  9 | hm = 0
 10 | ph = 0
 11 | pm = 24
 12 | n = len({  # can also be an empty {}, then hcomp won't be included and (preprocessed) data is just stored and not arithmetically coded
 13 | 0: "icm 12",  # means indirect context model SIZE=2**12 (see --info-zpaq for link to ZPAQ spec)
 14 | })
 15 | pcomp_invocation = "./lzpre c"  # information for zpaqd about preprocessor invocation,
 16 | #                        like you would execute it in the shell, passed additional parameters
 17 | #                        at the end will be <inputfile> <outputfile>
 18 | 
 19 | # put shared functions and variables of pcomp and hcomp here,
 20 | # then they are copied into the hcomp and pcomp section before compilation
 21 | 
 22 | pass
 23 | ### END OF EDITABLE SECTION - do not remove this marker or place something before and after it
 24 | # ***Exposed API***
 25 | # c = read_b()
 26 | # push_b(c)
 27 | # c = peek_b()
 28 | # out(c)
 29 | # error()
 30 | # hH, pH, hM, pM as 32- and 8-bit arrays with the defined size 2**hh, … and support for len(hH), …
 31 | # hh, hm, ph, pm and n are also available as constants
 32 | # arrayvar = alloc_pH(arraysize)  # if backend implementations addr_alloc_pH(size), addr_free_pH(addr) … are defined
 33 | # arrayvar = array_pH(numbervar)  # cast needed when passed between functions
 34 | # len_pH(arrayvar)
 35 | # free_pH(arrayvar)
 36 | # … analog for pM, hH, hM
 37 | 
 38 | import sys, array, argparse
 39 | from collections import deque
 40 | input_buf = []
 41 | output = deque([])
 42 | NONE = 4294967295
 43 | input_c = NONE-1
 44 | 
 45 | def out(a):
 46 |   if cmpbuf is not None:
 47 |     expected = cmpbuf.popleft()
 48 |     if a != expected:
 49 |       import ipdb; ipdb.set_trace()
 50 |   output.append(a)
 51 | 
 52 | def read_b():
 53 |   global input_c, input_buf, input_last_a
 54 |   if input_c == NONE-1:  # was already consumed
 55 |     if len(input_buf) == 0:
 56 |       raise WouldNotBeReached
 57 |     a = input_buf.popleft()
 58 |     print_hcomp_status()
 59 |     input_last_a = a
 60 |     return a
 61 |   else:
 62 |     tmp = input_c
 63 |     input_c = NONE-1
 64 |     return tmp
 65 | 
 66 | 
 67 | def peek_b():
 68 |   global input_c
 69 |   if input_c == NONE-1:
 70 |     push_b(read_b())
 71 |   return input_c
 72 | 
 73 | def push_b(c):
 74 |   """can only be executed once and will overwrite otherwise"""
 75 |   global input_c
 76 |   input_c = c
 77 | 
 78 | def error():
 79 |   raise Exception("error() invoked (zpaq execution will fail with: Bad ZPAQL opcode)")
 80 | 
 81 | hH = array.array('L', [0 for x in range(0, 2**hh)])
 82 | hM = array.array('B', [0 for x in range(0, 2**hm)])
 83 | 
 84 | pH = array.array('L', [0 for x in range(0, 2**ph)])
 85 | pM = array.array('B', [0 for x in range(0, 2**pm)])
 86 | 
 87 | 
 88 | def alloc_pM(size):
 89 |   return VirtArray(pM, addr_alloc_pM(size), size)
 90 | def alloc_pH(size):
 91 |   return VirtArray(pH, addr_alloc_pH(size), size)
 92 | def alloc_hH(size):
 93 |   return VirtArray(hH, addr_alloc_hH(size), size)
 94 | def alloc_hM(size):
 95 |   return VirtArray(hM, addr_alloc_hM(size), size)
 96 | def free_pM(va):
 97 |   if va.addr == NONE:
 98 |     raise Exception("double free (not visible in zpaq execution)")
 99 |   addr_free_pM(va.addr)
100 |   va.addr = NONE
101 | def free_pH(va):
102 |   if va.addr == NONE:
103 |     raise Exception("double free (not visible in zpaq execution)")
104 |   addr_free_pH(va.addr)
105 |   va.addr = NONE
106 | def free_hH(va):
107 |   if va.addr == NONE:
108 |     raise Exception("double free (not visible in zpaq execution)")
109 |   addr_free_hH(va.addr)
110 |   va.addr = NONE
111 | def free_hM(va):
112 |   if va.addr == NONE:
113 |     raise Exception("double free (not visible in zpaq execution)")
114 |   addr_free_hM(va.addr)
115 |   va.addr = NONE
116 | 
117 | # casting addresses which where written itself into an pH/hH entry back to array objects
118 | array_pH = lambda addr: addr if type(addr) is VirtArray else VirtArray(pH, addr, pH[addr-2])
119 | array_pM = lambda addr: addr if type(addr) is VirtArray else VirtArray(pM, addr, get32_pM(addr-5))
120 | array_hH = lambda addr: addr if type(addr) is VirtArray else VirtArray(hH, addr, hH[addr-2])
121 | array_hM = lambda addr: addr if type(addr) is VirtArray else VirtArray(hM, addr, get32_hM(addr-5))
122 | len_hM = len_pM = len_pH = len_hH = lambda va: va.size
123 | 
124 | class VirtArray:
125 |   addr = None  # addr in array for index 0
126 |   array = None  # one of hH, hM, pH, pM
127 |   size = None
128 |   def __init__(self, array, addr, size):
129 |     self.array = array
130 |     self.addr = addr
131 |     self.size = size
132 |     assert self.size < 2147483648, "address too big, 32. bit is used to distinguish between H and M"
133 |   def __getitem__(self, key):
134 |     return self.array[self.addr+key]
135 |   def __setitem__(self, key, item):
136 |     self.array[self.addr+key] = item.addr if type(item) is VirtArray else item
137 |   def __len__(self):
138 |     raise Exception("instead of len() use one of len_hM, len_pM, len_pH or len_hH")
139 |   def __str__(self):
140 |     return str(self.array[self.addr:self.addr+self.size])
141 | 
142 | 
143 | pass
144 | ### BEGIN OF EDITABLE SECTION - do not remove this marker, may only use own variables and functions  beside API and those of the first section
145 | 
146 | # place global variables of hcomp and custom functions into this section
147 | 
148 | # Ported to Python by Kai Lüke, 2016, from lz1.cfg:
149 | #  (C) 2011 Dell Inc. Written by Matt Mahoney
150 | #  Licensed under GPL v3, http://www.gnu.org/copyleft/gpl.html)
151 | 
152 | # (state: 0=init, 1=expect LZ77 literal or match code,
153 | # 2..4=expect n-1 offset bytes,
154 | # 5..68=expect n-4 literals)
155 | 
156 | h_state = 5
157 | first_run = True
158 | 
159 | def hcomp(c):  # pcomp bytecode is passed first (or 0 if there is none)
160 |   # having only pass in hcomp means that this whole section won't be included
161 |   # add code here for computation of hH[0] … hH[n-1]
162 |   global h_state, first_run
163 |   if first_run:
164 |     first_run = False  # skip pcomp bytecode
165 |     if c == 0:
166 |       return
167 |     if c == 1:
168 |       c = read_b()
169 |       c += read_b()*256 # read length
170 |       while c > 0:
171 |         hH[0] = read_b()
172 |         c -= 1
173 |       return
174 |   if h_state == 1:  # (expect code ccxxxxxx as input) (cc is number of offset bytes following) (00xxxxxx means x+1 literal bytes follow)
175 |     a=c
176 |     a>>= 6
177 |     a&= 3
178 |     if a > 0:
179 |       a += 1
180 |       h_state = a
181 |       a = c
182 |       a>>= 3
183 |       hH[0] = ( a + 512) * 773
184 |     else:
185 |       a = c
186 |       a&= 63
187 |       a+= 5
188 |       h_state = a
189 |       a = c
190 |       hH[0] = ( a + 512) * 773
191 |   elif h_state == 5: # (end of literal) # + #  (init)
192 |     h_state = 1
193 |     hH[0] = 0
194 |   else: # (literal or offset)
195 |     if h_state > 5:
196 |       hH[0] = (c + 512) * 773
197 |     h_state -= 1
198 |   c = h_state
199 |   if h_state > 5:
200 |     c = 5
201 |   hH[0] = (hH[0] + c + 512) * 773
202 | 
203 | pass
204 | ### END OF EDITABLE SECTION - do not remove this marker and the pass statement before
205 | 
206 | ### BEGIN OF EDITABLE SECTION - do not remove this marker, may only use own variables and functions beside API and those of the first section
207 | 
208 | # place global variables of pcomp and custom functions into this section
209 | 
210 | # Ported to Python by Kai Lüke, 2016, from C++ decode section of lzpre.cpp:
211 | #  (C) 2011 Dell Inc. Written by Matt Mahoney
212 | #  Licensed under GPL v3, http://www.gnu.org/copyleft/gpl.html
213 | 
214 | # Input format:
215 | #    00llllll: literal of length lllllll=1..64 to follow
216 | #    01lllooo oooooooo: length lll=5..12, offset o=1..2048
217 | #    10llllll oooooooo oooooooo: l=1..64 offset=1..65536
218 | #    11llllll oooooooo oooooooo oooooooo: 1..64, 1..2^24)
219 | 
220 | i = 0  # position in 16M output buffer
221 | state = 0
222 | leng = 0  # length of match or literal
223 | off = 0  # offset of match back from i
224 | BUFSIZE_max = (1<<24) - 1
225 | 
226 | def pcomp(c):  # passing c is like having c = read_b() as first line
227 |   # having only pass in pcomp means that this whole section won't be included
228 |   # add code here which writes output via out(x)
229 |   global i, state, leng, off
230 |   if c == NONE:  # restart
231 |     i = 0
232 |     state = 0
233 |     leng = 0
234 |     off = 0
235 |     return
236 |   if state == 0: # expecting a literal or match code
237 |     state = 1+(c>>6)
238 |     if state == 1: # literal
239 |       off = 0
240 |       leng = c+1
241 |     elif state==2: # short match
242 |       off = c&7
243 |       leng = (c>>3)-3
244 |     else:
245 |       off = 0
246 |       leng = (c&63)+1  # match
247 |   elif state == 1: # decoding a literal with leng bytes remaining
248 |     out(c)
249 |     pM[i&BUFSIZE_max] = c
250 |     i += 1
251 |     leng -= 1
252 |     if leng == 0:
253 |       state = 0
254 |   elif state > 2: # state==3, state==4: expecting 2,3 match offset bytes
255 |     off = off<<8|c
256 |     state -= 1
257 |   else:  # state == 2, expecting last offset byte of a match code
258 |     off = off<<8|c
259 |     off = i-off-1
260 |     while leng:
261 |       c=pM[off&BUFSIZE_max]
262 |       pM[i&BUFSIZE_max]=c
263 |       i += 1
264 |       off += 1
265 |       out(c)
266 |       leng -= 1
267 |     state = 0
268 | 
269 | pass
270 | ### END OF EDITABLE SECTION - do not remove this marker and the pass statement before
271 | 
272 | class WouldNotBeReached(Exception):
273 |   """used for handling EOF in read_b() as execution does not continue after last byte (or end-of-segment in pcomp) is consumed"""
274 |   pass
275 | 
276 | def finish_output():
277 |   global output
278 |   try:
279 |     args.output[0].write(bytes(output))
280 |   except:  # stdout
281 |     args.output[0].buffer.write(bytes(output))
282 |   output = deque([])
283 |   if len(args.output) > 1:
284 |     args.output.pop(0)
285 | 
286 | import argparse
287 | parser = argparse.ArgumentParser()
288 | parser.add_argument('method', help='run either hcomp or pcomp on each byte of the input\nfor hcomp output will be pairs of input and contexts', choices=['hcomp', 'pcomp'])
289 | parser.add_argument('input', nargs='?', type=argparse.FileType('rb'), default=sys.stdin, help='input file')
290 | parser.add_argument('--append', type=argparse.FileType('rb'), dest='addseg', default=[], metavar='FILE', action='append', help='additional input files')
291 | parser.add_argument('--compare', type=argparse.FileType('rb'), dest='compare', default=None, metavar='EXPECTEDFILE', help='compare pcomp output and run ipdb for mismatch')
292 | parser.add_argument('output', nargs='*', type=argparse.FileType('wb'), default=[sys.stdout], help='output file')
293 | args = parser.parse_args()
294 | cmpbuf = None
295 | if args.compare:
296 |   cmpbuf = deque(args.compare.read())
297 | input_buf = deque(args.input.read())
298 | if args.method == 'pcomp':
299 |   input_buf.append(NONE)  # end of segment
300 | for additional_segment in args.addseg:
301 |   input_buf.extend(additional_segment.read())
302 |   if args.method == 'pcomp':
303 |     input_buf.append(NONE)
304 | input_last_a = None
305 | 
306 | def print_hcomp_status():
307 |   global input_last_a
308 |   if input_last_a is None:
309 |     return
310 |   line = '{}: {}\n'.format(input_last_a, list(hH[:n]))
311 |   if args.method == 'pcomp' and input_last_a == NONE:
312 |     finish_output()
313 |   input_last_a = None
314 |   if args.method == 'hcomp':
315 |     try:  # stdout
316 |       args.output[0].write(line)
317 |     except:
318 |       args.output[0].write(bytes(line, 'utf-8'))
319 | 
320 | if args.method == 'hcomp':
321 |   while len(input_buf) > 0:
322 |     input_c = NONE-1
323 |     input_last_a = input_buf.popleft()
324 |     try:
325 |       hcomp(input_last_a)
326 |     except WouldNotBeReached:
327 |       pass
328 |     print_hcomp_status()
329 | elif args.method == 'pcomp':
330 |   while len(input_buf) > 0:
331 |     input_c = NONE-1
332 |     input_last_a = input_buf.popleft()
333 |     try:
334 |       pcomp(input_last_a)
335 |     except WouldNotBeReached:
336 |       pass
337 |     print_hcomp_status()
338 | 
339 | 
340 | 


--------------------------------------------------------------------------------
/test/lzpre:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/lzpre


--------------------------------------------------------------------------------
/test/lzpre.cpp:
--------------------------------------------------------------------------------
  1 | /* lzpre.cpp v2.1 - LZ77 preprocessor for ZPAQ
  2 | 
  3 | (C) 2011 Dell Inc. Written by Matt Mahoney
  4 | Licensed under GPL v3, http://www.gnu.org/copyleft/gpl.html
  5 | 
  6 | Usage: lzpre c|d input output
  7 | c = compress, d = decompress
  8 | 
  9 | Compressed format is byte oriented LZ77. Lengths and offsets are MSB first:
 10 | 
 11 |   00xxxxxx                            x+1 (1..64) literals follow
 12 |   01xxxyyy yyyyyyyy                   copy x+5 (5..12), offset y+1 (1..2048)
 13 |   10xxxxxx yyyyyyyy yyyyyyyy          copy x+1 (1..64), offset y+1 (1..65536)
 14 |   11xxxxxx yyyyyyyy yyyyyyyy yyyyyyyy copy x+1 (1..64), offset y+1 (1..2^24)
 15 | 
 16 | Decompression needs 16 MB memory. The compressor uses 64 MB consisting
 17 | of 2 16 MB buffers and a 8M (32 MB) hash table as an index to find matches.
 18 | For each byte position in the input buffer, the table stores 2 hashes
 19 | of order 10 and 5 in buckets of size 16 and 8 respectively. The buckets
 20 | are searched in that order, taking the longest match found (greedy
 21 | matching), breaking ties by taking the smaller offset. If a match
 22 | is found, then the remaining buckets are skipped. To update, the bucket
 23 | entry indexed by the low bits of position is replaced.
 24 | 
 25 | The minimum match length is 5, 6, or 7 for a match code of length
 26 | 2, 3, 4 respectively. Matches and literal strings longer than 64 are
 27 | coded as a series of length 64 codes.
 28 | 
 29 | As a speed optimization, each hash table entry contains a 24 bit pointer
 30 | and the byte pointed to, packed into 32 bits. If the byte in the hash
 31 | table mismatches, then this avoids a cache miss to compare the first
 32 | byte of the buffer. The hash table is aligned so that buckets do not
 33 | cross 64 byte cache lines.
 34 | 
 35 | To compile: g++ -O3 lzpre.cpp -o lzpre
 36 | 
 37 | */
 38 | 
 39 | #include <stdio.h>
 40 | #include <stdlib.h>
 41 | #include <string.h>
 42 | #include <time.h>
 43 | 
 44 | // Write literal sequence buf[i-lit..i-1], set lit=0
 45 | void write_literal(unsigned char* buf, int i, int& lit, FILE* out) {
 46 |   while (lit>0) {
 47 |     int lit1=lit;
 48 |     if (lit1>64) lit1=64;
 49 |     putc(lit1-1, out);
 50 |     for (int j=i-lit; j<i-lit+lit1; ++j) putc(buf[j], out);
 51 |     lit-=lit1;
 52 |   }
 53 | }
 54 | 
 55 | // Write match sequence of given length and offset (off in 1..2^24)
 56 | void write_match(int len, int off, FILE* out) {
 57 |   --off;
 58 |   while (len>0) {
 59 |     int len1=len;
 60 |     if (len1>64) len1=64;
 61 |     if (off<2048 && len1>=5 && len1<=12) {
 62 |       putc(64+(len1-5)*8+(off>>8), out);
 63 |       putc(off, out);
 64 |     }
 65 |     else if (off<65536) {
 66 |       putc(128+len1-1, out);
 67 |       putc(off>>8, out);
 68 |       putc(off, out);
 69 |     }
 70 |     else {
 71 |       putc(192+len1-1, out);
 72 |       putc(off>>16, out);
 73 |       putc(off>>8, out);
 74 |       putc(off, out);
 75 |     }
 76 |     len-=len1;
 77 |   }
 78 | }
 79 | 
 80 | // Args are c|d input output
 81 | int main(int argc, char** argv) {
 82 | 
 83 |   // Start timer
 84 |   clock_t start=clock();
 85 | 
 86 |   // Check args
 87 |   int cmd;
 88 |   if (argc!=4 || (cmd=argv[1][0])!='c' && cmd!='d') {
 89 |     fprintf(stderr, "To compress/decompress: lzpre c/d input output\n");
 90 |     return 1;
 91 |   }
 92 | 
 93 |   // Open files
 94 |   FILE* in=fopen(argv[2], "rb");
 95 |   if (!in) return perror(argv[2]), 1;
 96 |   FILE* out=fopen(argv[3], "wb");
 97 |   if (!out) return perror(argv[3]), 1;
 98 | 
 99 |   // Tunable LZ77 parameters
100 |   const int HTSIZE=1<<23;   // hashtable size, must be a power of 2
101 |   const int BUFSIZE=1<<24;  // buffer size (each of 2), max 2^24
102 |   const int HASHES=2;       // number of hashes computed per byte
103 |   const int HASHORDER[HASHES]={10,5};  // context order per hash
104 |   const int HASHMUL[HASHES]={44,48};   // hash multipliers
105 |   const unsigned int HASHBUCKET[HASHES]={16,8}; // searches per hash
106 | 
107 |   // Allocate buf (uncompressed data) and hashtable ht (compression only)
108 |   // ht[h] low 24 bits points to buf[i..i+HASHORDER-1], high 8 bits is buf[i]
109 |   unsigned char* buf=(unsigned char*)calloc(BUFSIZE, 1+(cmd=='c'));
110 |   int* ht=(int*)(cmd=='c'?calloc(HTSIZE+16, sizeof(int)):buf);
111 |   if (!buf || !ht) return fprintf(stderr, "Out of memory\n"), 1;
112 |   ht+=16-((ht-(int*)0)&15);  // align on 64 byte address
113 |   int h[HASHES]={0};  // context hashes of buf[i..]
114 | 
115 |   // Compress
116 |   while (cmd=='c') {
117 | 
118 |     // Read block into second half of buf
119 |     const int n=fread(buf+BUFSIZE, 1, BUFSIZE, in)+BUFSIZE;
120 |     if (n<=BUFSIZE) break;
121 | 
122 |     // Scan the block just read in. ht may point to previous block
123 |     int lit=0;  // number of output literals pending
124 |     for (int i=BUFSIZE; i<n;) {
125 | 
126 |       // Search for longest match, or pick closest in case of tie
127 |       // Try the longest context orders first. If a match is found, then
128 |       // skip the lower orders as a speed optimization.
129 |       int blen=0, bp=0, len=0;
130 |       for (int j=0; j<HASHES; ++j) {
131 |         for (int k=0; k<int(HASHBUCKET[j]); ++k) {
132 |           int p=ht[h[j]+k];
133 |           if ((p>>24&255)==buf[i]) {  // compare in ht first
134 |             p=(p&BUFSIZE-1)+BUFSIZE;
135 |             if (p>=i) p-=BUFSIZE;
136 |             if (p>0 && p<i && p+(1<<24)>i) {
137 |               for (len=0; i+len<n && buf[p+len]==buf[i+len]; ++len);
138 |               if (len>blen || len==blen && p>bp) blen=len, bp=p;
139 |             }
140 |           }
141 |           if (blen>=128) break;
142 |         }
143 |         if (blen>=HASHORDER[j]) break;
144 |       }
145 | 
146 |       // If match is long enough, then output any pending literals first,
147 |       // and then the match. blen is the length of the match.
148 |       const int off=i-bp;  // offset
149 |       if (blen>=5+(off>=2048)+(off>=65536) && off>0 && off<(1<<24)) {
150 |         write_literal(buf, i, lit, out);
151 |         write_match(blen, off, out);
152 |       }
153 | 
154 |       // Otherwise add to literal length
155 |       else {
156 |         blen=1;
157 |         ++lit;
158 |       }
159 | 
160 |       // Update index, advance blen bytes
161 |       while (blen--) {
162 |         for (int j=0; j<HASHES; ++j) {
163 |           ht[h[j]+(i&HASHBUCKET[j]-1)]=(i&BUFSIZE-1)+(buf[i]<<24);
164 |         }
165 |         ++i;
166 |         for (int j=0; j<HASHES; ++j) {
167 |           if (i+HASHORDER[j]<=n) {
168 |             h[j]/=HASHBUCKET[j];
169 |             h[j]*=HASHMUL[j];
170 |             h[j]+=buf[i+HASHORDER[j]-1]+1;
171 |             h[j]*=HASHBUCKET[j];
172 |             h[j]&=HTSIZE-1;
173 |           }
174 |         }
175 |       }
176 |     }
177 | 
178 |     // Write pending literals at end of block
179 |     write_literal(buf, n, lit, out);
180 | 
181 |     // Move data from second half of buf to first half if more input
182 |     // is expected.
183 |     if (n==BUFSIZE*2) memmove(buf, buf+BUFSIZE, BUFSIZE);
184 |   }
185 | 
186 |   // Decode. state is as follows:
187 |   // 0 = expecting a literal or match code.
188 |   // 1 = decoding a literal with len bytes remaining.
189 |   // 2 = expecting last offset byte of a match code.
190 |   // 3,4 = expecting 2,3 match offset bytes.
191 |   if (cmd=='d') {
192 |     int c, i=0, state=0, len=0, off=0;
193 |     while ((c=getc(in))!=EOF) {
194 |       if (state==0) {
195 |         state=1+(c>>6);
196 |         if (state==1) off=0, len=c+1;  // literal
197 |         else if (state==2) off=c&7, len=(c>>3)-3;  // short match
198 |         else off=0, len=(c&63)+1;  // match
199 |       }
200 |       else if (state==1) { // literal
201 |         putc(buf[i++&BUFSIZE-1]=c, out);
202 |         if (--len==0) state=0;
203 |       }
204 |       else if (state>2) {  // reading offset
205 |         off=off<<8|c;
206 |         --state;
207 |       }
208 |       else { // state==2, match
209 |         off=off<<8|c;
210 |         off=i-off-1;
211 |         while (len--)
212 |           putc(buf[i++&BUFSIZE-1]=buf[off++&BUFSIZE-1], out);
213 |         state=0;
214 |       }
215 |     }
216 |   }
217 | 
218 |   // Print compression statistics
219 |   printf("%ld -> %ld in %1.2f sec\n", ftell(in), ftell(out),
220 |     double(clock()-start)/CLOCKS_PER_SEC);
221 |   return 0;
222 | }
223 | 
224 | 


--------------------------------------------------------------------------------
/test/mfast.cfg:
--------------------------------------------------------------------------------
 1 | comp 2 2 0 0 4 (hh hm ph pm n)
 2 |                     (where H gets the size of 2^hh in hcomp or 2^ph in comp,
 3 |                      M 2^hm or 2^pm and n is the number of
 4 |                      context-mixing components)
 5 |   0 cm 19 4   (will get an order 1 context)
 6 |   1 icm 16    (order 2, chained to isse)
 7 |   2 isse 19 1 (order 4, has reference to ICM component 1)
 8 |   3 mix2 0 0 2 24 0 (moderate adapting mixer between CM and ISSE
 9 |                      based on which predicts better, no contexts even for bits)
10 |   (ICM and ISSE part adapted from fast.cfg)
11 | hcomp
12 |   r=a 2 (R2 = A, input byte in R2)
13 |   d=0
14 |   a<<= 9 *d=a (H[D] = A) (set context to actual byte)
15 |   (leaving first 9 bits free for the partially decoded byte)
16 |   a=r 2 (A = R2)
17 |   *b=a (M[B] = A) (save input byte in rotating buffer)
18 |                   (full M is used with pointer b)
19 |   a=0 hash (shortcut for A = (A + M[B] + 512) * 773)
20 |   b-- hash
21 |   d= 1 *d=a (order 2 hash for H[1])
22 |   b-- hash b-- hash
23 |   d= 2 *d=a (order 4 hash for H[2])
24 |   (H[3] stays 0 as fixed context for MIX2)
25 |   halt (execution stops here for this input byte)
26 | end
27 | 
28 | 


--------------------------------------------------------------------------------
/test/min.pnm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/min.pnm


--------------------------------------------------------------------------------
/test/mixedpi2.cfg:
--------------------------------------------------------------------------------
 1 | (mixed_pi2.cfg
 2 | use the next expected digit as context for CM or a general text model of fast.cfg
 3 | a MIX2 will select between them
 4 | To compress: zpaqd c mixed_pi2.cfg text_pi.zpaq text_with_appearance_of_pi.txt
 5 |   Copyright 2016 Kai Lüke kailueke@riseup.net
 6 |   This program comes with ABSOLUTELY NO WARRANTY and is free software, you are welcome to redistribute it
 7 |   under certain conditions, see https://www.gnu.org/licenses/gpl-3.0.en.html
 8 | )
 9 | 
10 | comp 2 14 0 0 4  (2^14 > 10000)
11 |   0 cm 18 0   (order 1 or digits of pi)
12 |   1 icm 16    (order 2, chained to isse)
13 |   2 isse 19 1 (order 4)
14 |   3 mix2 0 0 2 24 0 (moderate adapting mixer between CM and ISSE based on which predicts better)
15 | hcomp
16 |   r=a 2
17 |   a=r 0
18 |   a== 0 if (only first run)
19 |     (Compute pi to 10000 digits using the formula:
20 |        pi=4; for (d=r1*20/3;d>0;--d) pi=pi*d/(2*d+1)+2;
21 |      where r1 is the number of base 100 digits.
22 |      The precision is 1 bit per iteration so 20/3
23 |      is slightly more than the log2(100) we need.)
24 |     a= 100 a*=a r=a 1 (r1 = digits base 100)
25 |     a*= 7 d=a (d = n iterations)
26 |     *b= 4 (M=4)
27 |     do
28 | 
29 |       (multiply M *= d, carry in c)
30 |       b=r 1 c=0
31 |       do
32 |         b--
33 |         a=*b a*=d a+=c c=a a%= 10 *b=a
34 |         a=c a/= 10 c=a
35 |       a=b a> 0 while
36 | 
37 |       (divide M /= (2d+1), remainder in c)
38 |       a=d a+=d a++ d=a
39 |       do
40 |         a=c a*= 10 a+=*b c=a a/=d *b=a
41 |         a=c a%=d c=a
42 |       a=r 1 b++ a>b while
43 |       a=d a>>= 1 d=a
44 | 
45 |       (add 2)
46 |       b=0 a= 2 a+=*b *b=a
47 |     d-- a=d a== 0 until
48 |     c= 2 (point to 4 of 3.14)
49 |     a= 1
50 |     r=a 0
51 |     a<<= 14 a-- (last element of ring buffer)
52 |     b=a
53 |     a-= 4 (first element of ring bufer, pointer in r3)
54 |     r=a 3
55 |     halt (input 0 came from pcomp, also to restart c=2 is enough)
56 |   endif
57 |   (CM part)
58 |   d=0
59 |   a=r 2  a-= 48
60 |   c--
61 |   a==*c
62 |   c++
63 |   if (pi: set context for expected digit)
64 |     a=*c c++ a<<= 1 a++ a<<= 9 *d=a (distinguish between pi number context and character context by 1 bit for sure)
65 |   else (other:)
66 |     a=r 2 a<<= 10 *d=a c= 2 (set context to actual byte)
67 |   endif
68 |   
69 |   (a in r2, lower border of ring buffer in r3)
70 |   (ICM and ISSE part adapted from fast.cfg)
71 |   a=r 2
72 |   *b=a a=0 (save in rotating buffer M)
73 |   hash b--
74 |     d=a (save hash) a=r 3 a>b if b++ b++ b++ b++ endif a=d
75 |   hash d= 1 *d=a
76 |   b--
77 |     d=a (save hash) a=r 3 a>b if b++ b++ b++ b++ endif a=d
78 |   hash b--
79 |     d=a (save hash) a=r 3 a>b if b++ b++ b++ b++ endif a=d
80 |   hash d= 2 *d=a
81 | 
82 |   halt
83 | end
84 | 


--------------------------------------------------------------------------------
/test/monarch.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/monarch.bmp


--------------------------------------------------------------------------------
/test/monarch.flif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/monarch.flif


--------------------------------------------------------------------------------
/test/monarch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/monarch.png


--------------------------------------------------------------------------------
/test/monarch.pnm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/monarch.pnm


--------------------------------------------------------------------------------
/test/monarch.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/monarch.webp


--------------------------------------------------------------------------------
/test/peppers.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/peppers.bmp


--------------------------------------------------------------------------------
/test/peppers.flif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/peppers.flif


--------------------------------------------------------------------------------
/test/peppers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/peppers.png


--------------------------------------------------------------------------------
/test/peppers.pnm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/peppers.pnm


--------------------------------------------------------------------------------
/test/peppers.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/peppers.webp


--------------------------------------------------------------------------------
/test/pi10k.cfg:
--------------------------------------------------------------------------------
 1 | (pi10k.cfg 2016 Kai Lüke and Matt Mahoney
 2 | instead of generating the digits in pcomp phase, use the next expected digit as context
 3 | To compress: zpaqd cinst pi10k.cfg pi10k.zpaq pi10000.txt)
 4 | 
 5 | comp 0 14 0 0 1  (2^14 > 10000)
 6 |   0 cm 13 0
 7 | hcomp
 8 |   ifnot (only first run)
 9 |     (Compute pi to 10000 digits in M using the formula:
10 |        pi=4; for (d=r1*20/3;d>0;--d) pi=pi*d/(2*d+1)+2;
11 |      where r1 is the number of base 100 digits.
12 |      The precision is 1 bit per iteration so 20/3
13 |      is slightly more than the log2(100) we need.)
14 |     a= 100 a*=a r=a 1 (r1 = digits base 100)
15 |     a*= 7 d=a (d = n iterations)
16 |     *b= 4 (M=4)
17 |     do
18 | 
19 |       (multiply M *= d, carry in c)
20 |       b=r 1 c=0
21 |       do
22 |         b--
23 |         a=*b a*=d a+=c c=a a%= 10 *b=a
24 |         a=c a/= 10 c=a
25 |       a=b a> 0 while
26 | 
27 |       (divide M /= (2d+1), remainder in c)
28 |       a=d a+=d a++ d=a
29 |       do
30 |         a=c a*= 10 a+=*b c=a a/=d *b=a
31 |         a=c a%=d c=a
32 |       a=r 1 b++ a>b while
33 |       a=d a>>= 1 d=a
34 | 
35 |       (add 2)
36 |       b=0 a= 2 a+=*b *b=a
37 |     d-- a=d a== 0 until
38 |     halt
39 |   endif
40 |   a=*b a<<= 9 *d=a b++ (set context for expected digit taken from M)
41 |   halt
42 | end
43 | 


--------------------------------------------------------------------------------
/test/pnm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (C) 2016 Kai Lüke kailueke@riseup.net
  3 | # This program comes with ABSOLUTELY NO WARRANTY and is free software, you are welcome to redistribute it
  4 | # under certain conditions, see https://www.gnu.org/licenses/gpl-3.0.en.html
  5 | ### BEGIN OF EDITABLE SECTION - do not remove this marker or place something before and after it
  6 | 
  7 | # definition of the array sizes and the context mixing linear tree
  8 | hh = 4  # size 2**hh of hH[] is enough for the 13 context entries
  9 | hm = 17  # increase for image width > (2**17)/3
 10 | ph = 0
 11 | pm = 0
 12 | n = len({
 13 | 0: "cm 19 22 (L, color)",  # SIZE 2**19, COUNTLIMIT 22
 14 | 1: "cm 19 22 (U, color)",
 15 | 2: "cm 19 22 (UL, color)",
 16 | 3: "cm 19 22 (UR, color)",
 17 | 4: "cm 19 22 (avg L UL U UR, color)",
 18 | 5: "icm 10 (L, color)",
 19 | 6: "icm 10 (U, color)",
 20 | 7: "cm 19 22 (c)",
 21 | 8: "const 160",
 22 | 9: "mix 11 0 9 60 255", # sizebits j m rate mask. adaptive mixing of predictions 0 to 9
 23 | 10: "icm 10 (avg l u, color)",
 24 | 11: "isse 24 10 (hash of UL U L c color)",
 25 | 12: "mix2 2 9 11 90 0 (color)",  # adaptive mixing of first mixer and isse
 26 | })
 27 | pcomp_invocation = "./subtract_green"  # information for zpaqd about preprocessor invocation,
 28 | #                        like you would execute it in the shell, passed additional parameters
 29 | #                        at the end will be <inputfile> <outputfile>
 30 | 
 31 | 
 32 | # put commonly used functions and variables here
 33 | # as pcomp and hcomp do only include their own definitions
 34 | 
 35 | 
 36 | ### END OF EDITABLE SECTION - do not remove this marker or place something before and after it
 37 | import sys, array, argparse
 38 | from collections import deque
 39 | input_buf = []
 40 | output = deque([])
 41 | NONE = 4294967295
 42 | input_c = NONE-1
 43 | 
 44 | def out(a):
 45 |   if cmpbuf is not None:
 46 |     expected = cmpbuf.popleft()
 47 |     if a != expected:
 48 |       import ipdb; ipdb.set_trace()
 49 |   output.append(a)
 50 | 
 51 | def read_b():
 52 |   global input_c, input_buf, input_last_a
 53 |   if input_c == NONE-1:  # was already consumed
 54 |     if len(input_buf) == 0:
 55 |       raise WouldNotBeReached
 56 |     a = input_buf.popleft()
 57 |     print_hcomp_status()
 58 |     input_last_a = a
 59 |     return a
 60 |   else:
 61 |     tmp = input_c
 62 |     input_c = NONE-1
 63 |     return tmp
 64 | 
 65 | 
 66 | def peek_b():
 67 |   global input_c
 68 |   if input_c == NONE-1:
 69 |     push_b(read_b())
 70 |   return input_c
 71 | 
 72 | def push_b(c):
 73 |   """can only be executed once and will overwrite otherwise"""
 74 |   global input_c
 75 |   input_c = c
 76 | 
 77 | def error():
 78 |   """zpaq execution will fail with: Bad ZPAQL opcode"""
 79 |   raise Exception
 80 | 
 81 | hH = array.array('L', [0 for x in range(0, 2**hh)])
 82 | hM = array.array('B', [0 for x in range(0, 2**hm)])
 83 | 
 84 | pH = array.array('L', [0 for x in range(0, 2**ph)])
 85 | pM = array.array('B', [0 for x in range(0, 2**pm)])
 86 | 
 87 | ### BEGIN OF EDITABLE SECTION - do not remove this marker, may only use own variables and functions
 88 | 
 89 | 
 90 | def read_after_whitespace():
 91 |   while True:
 92 |     c = read_b()
 93 |     if c == 0x20 or c == 9 or c == 10 or c == 13: # skip whitespace
 94 |       continue
 95 |     elif c == 0x23: # skip comment line
 96 |      while c != 10 and c != 13:
 97 |        c = read_b()
 98 |     else:
 99 |       return c
100 | 
101 | line_len = 0  # (1+width)*3 for all three colors
102 | total_color_bytes = 0
103 | 
104 | def read_after_header():
105 |   global line_len, total_color_bytes
106 |   c = read_after_whitespace() # skip over first delimiter
107 |   z = c - 0x30
108 |   line_len = 0
109 |   while z < 10 and z >= 0:  # read width
110 |     line_len *= 10
111 |     line_len += z
112 |     z = read_b() - 0x30
113 |   total_color_bytes = line_len * 3
114 |   line_len += 1  # increase by one to also have space for the upper left pixel
115 |   line_len *= 3
116 |   c = read_after_whitespace()
117 |   z = c - 0x30
118 |   height = 0
119 |   while z < 10 and z >= 0:  # skip height
120 |     height *= 10
121 |     height += z
122 |     z = read_b() - 0x30
123 |   total_color_bytes *= height
124 |   c = read_after_whitespace()
125 |   z = c - 0x30
126 |   maxc = 0
127 |   while z < 10 and z >= 0:
128 |     maxc *= 10
129 |     maxc += z
130 |     z = read_b() - 0x30
131 |   if maxc > 255:
132 |     error() # only three 8-bit RGB channels supported
133 |   # skip single whitespace in z
134 |   return read_b() # read first color byte
135 | 
136 | buf_pos = 0
137 | color = 0
138 | reading_header = True
139 | color_bytes_read = 0
140 | first_run = True
141 | 
142 | def hcomp(c):  # pcomp bytecode is passed first (or 0 if there is none)
143 |   global buf_pos, reading_header, color, color_bytes_read, first_run
144 |   if first_run:
145 |     first_run = False  # skip pcomp bytecode
146 |     if c == 0:
147 |       return
148 |     if c == 1:
149 |       c = read_b()
150 |       c += read_b()*256 # read length
151 |       while c > 0:
152 |         hH[0] = read_b() << 9  # try to save some space when encoding pcomp bytecode
153 |         hH[5] = hH[0]
154 |         c -= 1
155 |       return
156 |   color = (color + 1) % 3
157 |   if reading_header and c == 0x50:
158 |     c = read_b()
159 |     if c == 0x36: # detected header P6
160 |       c = read_after_header()
161 |       color = 0  # red
162 |       color_bytes_read = 0
163 |       reading_header = False
164 |   color_bytes_read += 1
165 |   upper = hM[(buf_pos+4) % line_len]
166 |   upper_left = hM[(buf_pos+1)%line_len]
167 |   upper_right = hM[(buf_pos+7) % line_len]
168 |   left = hM[(line_len+buf_pos-2) % line_len]  # fetch left pixel's relevant color part
169 |   hM[buf_pos] = c
170 |   buf_pos = (buf_pos + 1) % line_len # move history buffer pointer
171 |   hH[0] = ((left << 2) + color) << 9
172 |   hH[1] = ((upper << 2) + color) << 9
173 |   hH[2] = ((upper_left << 2) + color) << 9
174 |   hH[3] = ((upper_right << 2) + color) << 9
175 |   hH[4] = (((upper_left + upper + upper_right + left)//4 << 2) + color) << 9
176 |   hH[5] = ((left << 2) + color) << 9
177 |   hH[6] = ((upper << 2) + color) << 9
178 |   hH[7] = c << 9
179 |   hH[10] = (((left+upper)//2 << 2) + color) << 9
180 |   hH[11] = (((((((upper_left + 512) * 773) + upper + 512) * 773) + left + 512) * 773) + color + 512) * 773 + c # 4 order hash
181 |   hH[12] = color
182 |   if upper > upper_left:
183 |     dH = upper - upper_left
184 |   else:
185 |     dH = upper_left - upper
186 |   if left > upper_left:
187 |     dV = left - upper_left
188 |   else:
189 |     dV = upper_left - left
190 |   if (dH + dV) < 17: # threshold for simple smooth region detection
191 |     hH[9] = (4 + color) << 8
192 |   else:
193 |     hH[9] = color << 8
194 |   if color_bytes_read == total_color_bytes: # await next PNM image
195 |     reading_header = True
196 | 
197 | pass
198 | ### END OF EDITABLE SECTION - do not remove this marker and the pass statement before
199 | 
200 | ### BEGIN OF EDITABLE SECTION - do not remove this marker, may only use own variables and functions + read_b(), peek_b(), push_b(), out(b)
201 | 
202 | def pcomp_read_after_whitespace():
203 |   while True:
204 |     c = read_b()
205 |     if c == 0x20 or c == 9 or c == 10 or c == 13: # skip whitespace
206 |       out(c)
207 |       continue
208 |     elif c == 0x23: # skip comment line
209 |      while c != 10 and c != 13:
210 |        out(c)
211 |        c = read_b()
212 |      out(c)
213 |     else:
214 |       return c
215 | 
216 | def pcomp_read_after_header():
217 |   c = pcomp_read_after_whitespace() # skip over first delimiter
218 |   out(c)
219 |   z = c - 0x30
220 |   while z < 10 and z >= 0:  # read width
221 |     c = read_b()
222 |     out(c)
223 |     z = c - 0x30
224 |   c = pcomp_read_after_whitespace()
225 |   out(c)
226 |   z = c - 0x30
227 |   while z < 10 and z >= 0:  # skip height
228 |     c = read_b()
229 |     out(c)
230 |     z = c - 0x30
231 |   c = pcomp_read_after_whitespace()
232 |   out(c)
233 |   z = c - 0x30
234 |   while z < 10 and z >= 0:
235 |     c = read_b()
236 |     out(c)
237 |     z = c - 0x30
238 |   # skip single whitespace in z
239 |   return read_b() # read first color byte
240 | 
241 | pcomp_reading_header = True
242 | pcomp_color = 0
243 | g = 0
244 | 
245 | def pcomp(c):  # passing c is like having c = read_b() as first line
246 |   global pcomp_reading_header, pcomp_color, g
247 |   if c == NONE:
248 |     pcomp_reading_header = True
249 |     return
250 |   pcomp_color = (pcomp_color + 1) % 3
251 |   if pcomp_reading_header and c == 0x50:
252 |     out(c)
253 |     c = read_b()
254 |     if c == 0x36: # detected header P6
255 |       out(c)
256 |       c = pcomp_read_after_header()
257 |       pcomp_color = 0  # red
258 |       pcomp_reading_header = False
259 |   if pcomp_color == 1:
260 |     # wrong data
261 |     out(c)
262 |     pcomp_color = 0
263 |   elif pcomp_color == 0:
264 |     r = c
265 |     c = read_b()
266 |     out((r+c) % 256)
267 |     out(c)
268 |     g = c
269 |     pcomp_color = 1
270 |   elif pcomp_color == 2:
271 |     out((c+g) % 256)
272 | 
273 | pass
274 | ### END OF EDITABLE SECTION - do not remove this marker and the pass statement before
275 | 
276 | class WouldNotBeReached(Exception):
277 |   """used for handling EOF in read_b() as execution does not continue after last byte (or end-of-segment in pcomp) is consumed"""
278 |   pass
279 | 
280 | def finish_output():
281 |   global output
282 |   try:
283 |     args.output[0].write(bytes(output))
284 |   except:  # stdout
285 |     args.output[0].buffer.write(bytes(output))
286 |   output = deque([])
287 |   if len(args.output) > 1:
288 |     args.output.pop(0)
289 | 
290 | import argparse
291 | parser = argparse.ArgumentParser()
292 | parser.add_argument('method', help='run either hcomp or pcomp on each byte of the input\nfor hcomp output will be pairs of input and contexts', choices=['hcomp', 'pcomp'])
293 | parser.add_argument('input', nargs='?', type=argparse.FileType('rb'), default=sys.stdin, help='input file')
294 | parser.add_argument('--append', type=argparse.FileType('rb'), dest='addseg', metavar='FILE', default=[], action='append', help='additional input files')
295 | parser.add_argument('--compare', type=argparse.FileType('rb'), dest='compare', default=None, metavar='EXPECTEDFILE', help='compare pcomp output and run ipdb for mismatch')
296 | parser.add_argument('output', nargs='*', type=argparse.FileType('wb'), default=[sys.stdout], help='output file')
297 | args = parser.parse_args()
298 | cmpbuf = None
299 | if args.compare:
300 |   cmpbuf = deque(args.compare.read())
301 | input_buf = deque(args.input.read())
302 | if args.method == 'pcomp':
303 |   input_buf.append(NONE)  # end of segment
304 | for additional_segment in args.addseg:
305 |   input_buf.extend(additional_segment.read())
306 |   if args.method == 'pcomp':
307 |     input_buf.append(NONE)
308 | input_last_a = None
309 | 
310 | def print_hcomp_status():
311 |   global input_last_a
312 |   if input_last_a is None:
313 |     return
314 |   line = '{}: {}\n'.format(input_last_a, list(hH[:n]))
315 |   if args.method == 'pcomp' and input_last_a == NONE:
316 |     finish_output()
317 |   input_last_a = None
318 |   if args.method == 'hcomp':
319 |     try:  # stdout
320 |       args.output[0].write(line)
321 |     except:
322 |       args.output[0].write(bytes(line, 'utf-8'))
323 | 
324 | if args.method == 'hcomp':
325 |   while len(input_buf) > 0:
326 |     input_c = NONE-1
327 |     input_last_a = input_buf.popleft()
328 |     try:
329 |       hcomp(input_last_a)
330 |     except WouldNotBeReached:
331 |       pass
332 |     print_hcomp_status()
333 | elif args.method == 'pcomp':
334 |   while len(input_buf) > 0:
335 |     input_c = NONE-1
336 |     input_last_a = input_buf.popleft()
337 |     try:
338 |       pcomp(input_last_a)
339 |     except WouldNotBeReached:
340 |       pass
341 |     print_hcomp_status()
342 | 


--------------------------------------------------------------------------------
/test/rafale.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/rafale.bmp


--------------------------------------------------------------------------------
/test/rafale.flif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/rafale.flif


--------------------------------------------------------------------------------
/test/rafale.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/rafale.png


--------------------------------------------------------------------------------
/test/rafale.pnm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/rafale.pnm


--------------------------------------------------------------------------------
/test/rafale.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pothos/zpaqlpy/487fe0f544c6c63db8638525706a2c86c5b98ba3/test/rafale.webp


--------------------------------------------------------------------------------
/test/rle:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # c in out
 3 | # d in out
 4 | import sys
 5 | 
 6 | mode = sys.argv[1]
 7 | input = sys.argv[2]
 8 | output = sys.argv[3]
 9 | 
10 | if mode == 'c':
11 |   with open(input, mode='rb') as fi:
12 |     with open(output, mode='wb') as fo:
13 |       last = None
14 |       count = 0
15 |       data = []
16 |       for a in fi.read():
17 |         if a != last or count == 255:
18 |           if last != None:
19 |             data.append(last)
20 |             data.append(count)
21 |           last = a
22 |           count = 1
23 |         else:
24 |           count += 1
25 |       if last != None:
26 |         data.append(last)
27 |         data.append(count)
28 |       fo.write(bytes(data))
29 | 
30 | elif mode == 'd':
31 |   with open(input, mode='rb') as fi:
32 |     with open(output, mode='wb') as fo:
33 |       data = []
34 |       ichar = False
35 |       charv = None
36 |       for a in fi.read():
37 |         if not ichar:
38 |           charv = a
39 |           ichar = True
40 |           continue
41 |         else:
42 |           for i in range(0, a):
43 |             data.append(charv)
44 |           ichar = False
45 |       fo.write(bytes(data))
46 | 


--------------------------------------------------------------------------------
/test/rle_cm.cfg:
--------------------------------------------------------------------------------
  1 | comp 21 0 21 24 1 (hh hm ph pm n)
  2 |   0 cm 18 255
  3 | hcomp
  4 |   r=a 255        (0)
  5 |   (t255 holds the inital value passed into the A register, first ZPAQL instruction must thus be r=a 255)        (2)
  6 |   (         ifN t0 goto init_code)        (2)
  7 |   a=r 0        (2)
  8 |   a== 0        (4)
  9 |   jf 3        (6)
 10 |   lj 23        (8)
 11 |   (         if t254 goto cont_reading)        (11)
 12 |   a=r 254        (11)
 13 |   a== 0        (13)
 14 |   jt 3        (15)
 15 |   lj 68        (17)
 16 |   (         goto call_next)        (20)
 17 |   lj 178        (20)
 18 |   (        :init_code:)        (23)
 19 |   (         t0 = 1)        (23)
 20 |   a= 1        (23)
 21 |   r=a 0        (25)
 22 |   (         t252 = t0)        (27)
 23 |   r=a 252        (27)
 24 |   (         goto read_b_end~)        (29)
 25 |   lj 92        (29)
 26 |   (        :read_b:)        (32)
 27 |   (         t1 = t253 == 4294967294)        (32)
 28 |   c=0        (32)
 29 |   c--        (33)
 30 |   c--        (34)
 31 |   a=r 253        (35)
 32 |   a==c        (37)
 33 |   a=0        (38)
 34 |   jf 1        (39)
 35 |   a++        (41)
 36 |   r=a 1        (42)
 37 |   (         if t1 goto do_read_in)        (44)
 38 |   a== 0        (44)
 39 |   jt 3        (46)
 40 |   lj 63        (48)
 41 |   (         t255 = t253)        (51)
 42 |   a=r 253        (51)
 43 |   r=a 255        (53)
 44 |   (         t253 = 4294967294)        (55)
 45 |   a=0        (55)
 46 |   a--        (56)
 47 |   a--        (57)
 48 |   r=a 253        (58)
 49 |   (         goto cont_reading)        (60)
 50 |   lj 68        (60)
 51 |   (        :do_read_in:)        (63)
 52 |   (         t254 = 1)        (63)
 53 |   a= 1        (63)
 54 |   r=a 254        (65)
 55 |   (         halt)        (67)
 56 |   halt        (67)
 57 |   (        :cont_reading:)        (68)
 58 |   (         t254 = 0)        (68)
 59 |   a=0        (68)
 60 |   r=a 254        (69)
 61 |   (         t1 = t255)        (71)
 62 |   a=r 255        (71)
 63 |   r=a 1        (73)
 64 |   (         t2 = H[t0]〈〉)        (75)
 65 |   d=r 0        (75)
 66 |   a=*d        (77)
 67 |   r=a 2        (78)
 68 |   (         t0 = t0 - 1)        (80)
 69 |   a=r 0        (80)
 70 |   a--        (82)
 71 |   r=a 0        (83)
 72 |   (         t0 = H[t0]〈〉)        (85)
 73 |   d=a        (85)
 74 |   a=*d        (86)
 75 |   r=a 0        (87)
 76 |   (         goto find_label_ret_id)        (89)
 77 |   lj 208        (89)
 78 |   (        :read_b_end~:)        (92)
 79 |   (                    68: mode = 0)        (92)
 80 |   (         H[t0+1]〈mode〉 = 0)        (92)
 81 |   a=r 0        (92)
 82 |   a++        (94)
 83 |   d=a        (95)
 84 |   *d=0        (96)
 85 |   (                    70: def hcomp〈c〉:)        (97)
 86 |   (         goto hcomp_end~)        (97)
 87 |   lj 178        (97)
 88 |   (        :hcomp:)        (100)
 89 |   (         MarkTempVarStart)        (100)
 90 |   (Arg c at t0 + 1)        (100)
 91 |   (                    71:   global mode)        (100)
 92 |   (Global mode via H[t252+1])        (100)
 93 |   (                    72:   c <<= 1)        (100)
 94 |   (         H[t0+1]〈c〉 = H[t0+1]〈c〉 << 1)        (100)
 95 |   c= 1        (100)
 96 |   a=r 0        (102)
 97 |   a++        (104)
 98 |   d=a        (105)
 99 |   a=*d        (106)
100 |   a<<=c        (107)
101 |   c=a        (108)
102 |   *d=c        (109)
103 |   (                    73:   if not mode:)        (110)
104 |   (         t1 = ! H[t252+1]〈mode〉)        (110)
105 |   a=r 252        (110)
106 |   a++        (112)
107 |   d=a        (113)
108 |   a=*d        (114)
109 |   a++        (115)
110 |   a== 1        (116)
111 |   jt 1        (118)
112 |   a=0        (120)
113 |   r=a 1        (121)
114 |   (         ifN t1 goto else_1)        (123)
115 |   a== 0        (123)
116 |   jf 3        (125)
117 |   lj 135        (127)
118 |   (                    74:     mode = 1)        (130)
119 |   (         H[t252+1]〈mode〉 = 1)        (130)
120 |   *d= 1        (130)
121 |   (         goto endif_2)        (132)
122 |   lj 145        (132)
123 |   (        :else_1:)        (135)
124 |   (                    76:     c += 1)        (135)
125 |   (         H[t0+1]〈c〉 = H[t0+1]〈c〉 + 1)        (135)
126 |   a=r 0        (135)
127 |   a++        (137)
128 |   d=a        (138)
129 |   *d++        (139)
130 |   (                    77:     mode = 0)        (140)
131 |   (         H[t252+1]〈mode〉 = 0)        (140)
132 |   a=r 252        (140)
133 |   a++        (142)
134 |   d=a        (143)
135 |   *d=0        (144)
136 |   (        :endif_2:)        (145)
137 |   (                    78:   c<<= 9)        (145)
138 |   (         H[t0+1]〈c〉 = H[t0+1]〈c〉 << 9)        (145)
139 |   c= 9        (145)
140 |   a=r 0        (147)
141 |   a++        (149)
142 |   d=a        (150)
143 |   a=*d        (151)
144 |   a<<=c        (152)
145 |   c=a        (153)
146 |   *d=c        (154)
147 |   (                    79:   hH[0] = c)        (155)
148 |   (         t1 = 0)        (155)
149 |   a=0        (155)
150 |   r=a 1        (156)
151 |   (         H[t1]〈〉 = H[t0+1]〈c〉)        (158)
152 |   d=r 1        (158)
153 |   *d=c        (160)
154 |   (insert return as it might not be done by the function:)        (161)
155 |   (         t2 = H[t0]〈〉)        (161)
156 |   d=r 0        (161)
157 |   a=*d        (163)
158 |   r=a 2        (164)
159 |   (         t0 = t0 - 1)        (166)
160 |   a=r 0        (166)
161 |   a--        (168)
162 |   r=a 0        (169)
163 |   (         t0 = H[t0]〈〉)        (171)
164 |   d=a        (171)
165 |   a=*d        (172)
166 |   r=a 0        (173)
167 |   (         goto find_label_ret_id)        (175)
168 |   lj 208        (175)
169 |   (         MarkTempVarEnd)        (178)
170 |   (        :hcomp_end~:)        (178)
171 |   (                    81: pass)        (178)
172 |   (        :call_next:)        (178)
173 |   (         t253 = 4294967294)        (178)
174 |   a=0        (178)
175 |   a--        (179)
176 |   a--        (180)
177 |   r=a 253        (181)
178 |   (         H[t0+2]〈〉 = t0)        (183)
179 |   a=r 0        (183)
180 |   a+= 2        (185)
181 |   d=a        (187)
182 |   a=r 0        (188)
183 |   *d=a        (190)
184 |   (saved bsp, return id:)        (191)
185 |   (         H[t0+3]〈〉 = 0)        (191)
186 |   d++        (191)
187 |   *d=0        (192)
188 |   (push arguments:)        (193)
189 |   (         H[t0+4]〈〉 = t255)        (193)
190 |   d++        (193)
191 |   a=r 255        (194)
192 |   *d=a        (196)
193 |   (         t0 = t0 + 3)        (197)
194 |   c= 3        (197)
195 |   a=r 0        (199)
196 |   a+=c        (201)
197 |   r=a 0        (202)
198 |   (         goto hcomp)        (204)
199 |   lj 100        (204)
200 |   (        :return_id_0:)        (207)
201 |   (         halt)        (207)
202 |   halt        (207)
203 |   (        :find_label_ret_id:)        (208)
204 |   (         t4 = t0 > 1048377)        (208)
205 |   a= 15        (208)
206 |   a<<= 8        (210)
207 |   a+= 255        (212)
208 |   a<<= 8        (214)
209 |   a+= 57        (216)
210 |   c=a        (218)
211 |   a=r 0        (219)
212 |   a>c        (221)
213 |   a=0        (222)
214 |   jf 1        (223)
215 |   a++        (225)
216 |   r=a 4        (226)
217 |   (         if t4 goto throw_error)        (228)
218 |   a== 0        (228)
219 |   jt 3        (230)
220 |   lj 244        (232)
221 |   (         ifEq t2 0 goto return_id_0)        (235)
222 |   c=r 2        (235)
223 |   a=0        (237)
224 |   a==c        (238)
225 |   jf 3        (239)
226 |   lj 207        (241)
227 |   (        :throw_error:)        (244)
228 |   (         error)        (244)
229 |   error        (244)
230 |   (         halt)        (245)
231 |   halt        (245)
232 | pcomp ./rle c ;
233 |   r=a 255        (0)
234 |   (t255 holds the inital value passed into the A register, first ZPAQL instruction must thus be r=a 255)        (2)
235 |   (         ifN t0 goto init_code)        (2)
236 |   a=r 0        (2)
237 |   a== 0        (4)
238 |   jf 3        (6)
239 |   lj 23        (8)
240 |   (         if t254 goto cont_reading)        (11)
241 |   a=r 254        (11)
242 |   a== 0        (13)
243 |   jt 3        (15)
244 |   lj 68        (17)
245 |   (         goto call_next)        (20)
246 |   lj 191        (20)
247 |   (        :init_code:)        (23)
248 |   (         t0 = 1)        (23)
249 |   a= 1        (23)
250 |   r=a 0        (25)
251 |   (         t252 = t0)        (27)
252 |   r=a 252        (27)
253 |   (         goto read_b_end~)        (29)
254 |   lj 92        (29)
255 |   (        :read_b:)        (32)
256 |   (         t1 = t253 == 4294967294)        (32)
257 |   c=0        (32)
258 |   c--        (33)
259 |   c--        (34)
260 |   a=r 253        (35)
261 |   a==c        (37)
262 |   a=0        (38)
263 |   jf 1        (39)
264 |   a++        (41)
265 |   r=a 1        (42)
266 |   (         if t1 goto do_read_in)        (44)
267 |   a== 0        (44)
268 |   jt 3        (46)
269 |   lj 63        (48)
270 |   (         t255 = t253)        (51)
271 |   a=r 253        (51)
272 |   r=a 255        (53)
273 |   (         t253 = 4294967294)        (55)
274 |   a=0        (55)
275 |   a--        (56)
276 |   a--        (57)
277 |   r=a 253        (58)
278 |   (         goto cont_reading)        (60)
279 |   lj 68        (60)
280 |   (        :do_read_in:)        (63)
281 |   (         t254 = 1)        (63)
282 |   a= 1        (63)
283 |   r=a 254        (65)
284 |   (         halt)        (67)
285 |   halt        (67)
286 |   (        :cont_reading:)        (68)
287 |   (         t254 = 0)        (68)
288 |   a=0        (68)
289 |   r=a 254        (69)
290 |   (         t1 = t255)        (71)
291 |   a=r 255        (71)
292 |   r=a 1        (73)
293 |   (         t2 = H[t0]〈〉)        (75)
294 |   d=r 0        (75)
295 |   a=*d        (77)
296 |   r=a 2        (78)
297 |   (         t0 = t0 - 1)        (80)
298 |   a=r 0        (80)
299 |   a--        (82)
300 |   r=a 0        (83)
301 |   (         t0 = H[t0]〈〉)        (85)
302 |   d=a        (85)
303 |   a=*d        (86)
304 |   r=a 0        (87)
305 |   (         goto find_label_ret_id)        (89)
306 |   lj 221        (89)
307 |   (        :read_b_end~:)        (92)
308 |   (                    86: case = 0)        (92)
309 |   (         H[t0+1]〈case〉 = 0)        (92)
310 |   a=r 0        (92)
311 |   a++        (94)
312 |   d=a        (95)
313 |   *d=0        (96)
314 |   (                    87: last = 0)        (97)
315 |   (         H[t0+2]〈last〉 = 0)        (97)
316 |   d++        (97)
317 |   *d=0        (98)
318 |   (                    89: def pcomp〈c〉:  # passing c is like having c = read_b〈〉 as first line)        (99)
319 |   (         goto pcomp_end~)        (99)
320 |   lj 191        (99)
321 |   (        :pcomp:)        (102)
322 |   (         MarkTempVarStart)        (102)
323 |   (Arg c at t0 + 1)        (102)
324 |   (                    90:   global case, last)        (102)
325 |   (Global case via H[t252+1])        (102)
326 |   (Global last via H[t252+2])        (102)
327 |   (                    91:   if case == 0:  # c is byte to load)        (102)
328 |   (         t1 = H[t252+1]〈case〉 == 0)        (102)
329 |   c=0        (102)
330 |   a=r 252        (103)
331 |   a++        (105)
332 |   d=a        (106)
333 |   a=*d        (107)
334 |   a==c        (108)
335 |   a=0        (109)
336 |   jf 1        (110)
337 |   a++        (112)
338 |   r=a 1        (113)
339 |   (         ifN t1 goto else_2)        (115)
340 |   a== 0        (115)
341 |   jf 3        (117)
342 |   lj 138        (119)
343 |   (                    92:     case = 1)        (122)
344 |   (         H[t252+1]〈case〉 = 1)        (122)
345 |   *d= 1        (122)
346 |   (                    93:     last = c)        (124)
347 |   (         H[t252+2]〈last〉 = H[t0+1]〈c〉)        (124)
348 |   a=r 0        (124)
349 |   a++        (126)
350 |   d=a        (127)
351 |   c=*d        (128)
352 |   a=r 252        (129)
353 |   a+= 2        (131)
354 |   d=a        (133)
355 |   *d=c        (134)
356 |   (         goto endif_3)        (135)
357 |   lj 174        (135)
358 |   (        :else_2:)        (138)
359 |   (                    95:     case = 0)        (138)
360 |   (         H[t252+1]〈case〉 = 0)        (138)
361 |   a=r 252        (138)
362 |   a++        (140)
363 |   d=a        (141)
364 |   *d=0        (142)
365 |   (                    96:     while c > 0:)        (143)
366 |   (        :while_4:)        (143)
367 |   (         t1 = H[t0+1]〈c〉 > 0)        (143)
368 |   c=0        (143)
369 |   a=r 0        (144)
370 |   a++        (146)
371 |   d=a        (147)
372 |   a=*d        (148)
373 |   a>c        (149)
374 |   a=0        (150)
375 |   jf 1        (151)
376 |   a++        (153)
377 |   r=a 1        (154)
378 |   (         ifN t1 goto whileend_6)        (156)
379 |   a== 0        (156)
380 |   jf 3        (158)
381 |   lj 174        (160)
382 |   (                    97:       c-= 1)        (163)
383 |   (         H[t0+1]〈c〉 = H[t0+1]〈c〉 - 1)        (163)
384 |   *d--        (163)
385 |   (                    98:       out〈last〉)        (164)
386 |   (         out H[t252+2]〈last〉)        (164)
387 |   a=r 252        (164)
388 |   a+= 2        (166)
389 |   d=a        (168)
390 |   a=*d        (169)
391 |   out        (170)
392 |   (         goto while_4)        (171)
393 |   lj 143        (171)
394 |   (        :whileend_6:)        (174)
395 |   (        :endif_3:)        (174)
396 |   (insert return as it might not be done by the function:)        (174)
397 |   (         t2 = H[t0]〈〉)        (174)
398 |   d=r 0        (174)
399 |   a=*d        (176)
400 |   r=a 2        (177)
401 |   (         t0 = t0 - 1)        (179)
402 |   a=r 0        (179)
403 |   a--        (181)
404 |   r=a 0        (182)
405 |   (         t0 = H[t0]〈〉)        (184)
406 |   d=a        (184)
407 |   a=*d        (185)
408 |   r=a 0        (186)
409 |   (         goto find_label_ret_id)        (188)
410 |   lj 221        (188)
411 |   (         MarkTempVarEnd)        (191)
412 |   (        :pcomp_end~:)        (191)
413 |   (                    100: pass)        (191)
414 |   (        :call_next:)        (191)
415 |   (         t253 = 4294967294)        (191)
416 |   a=0        (191)
417 |   a--        (192)
418 |   a--        (193)
419 |   r=a 253        (194)
420 |   (         H[t0+3]〈〉 = t0)        (196)
421 |   a=r 0        (196)
422 |   a+= 3        (198)
423 |   d=a        (200)
424 |   a=r 0        (201)
425 |   *d=a        (203)
426 |   (saved bsp, return id:)        (204)
427 |   (         H[t0+4]〈〉 = 0)        (204)
428 |   d++        (204)
429 |   *d=0        (205)
430 |   (push arguments:)        (206)
431 |   (         H[t0+5]〈〉 = t255)        (206)
432 |   d++        (206)
433 |   a=r 255        (207)
434 |   *d=a        (209)
435 |   (         t0 = t0 + 4)        (210)
436 |   c= 4        (210)
437 |   a=r 0        (212)
438 |   a+=c        (214)
439 |   r=a 0        (215)
440 |   (         goto pcomp)        (217)
441 |   lj 102        (217)
442 |   (        :return_id_0:)        (220)
443 |   (         halt)        (220)
444 |   halt        (220)
445 |   (        :find_label_ret_id:)        (221)
446 |   (         t4 = t0 > 1048377)        (221)
447 |   a= 15        (221)
448 |   a<<= 8        (223)
449 |   a+= 255        (225)
450 |   a<<= 8        (227)
451 |   a+= 57        (229)
452 |   c=a        (231)
453 |   a=r 0        (232)
454 |   a>c        (234)
455 |   a=0        (235)
456 |   jf 1        (236)
457 |   a++        (238)
458 |   r=a 4        (239)
459 |   (         if t4 goto throw_error)        (241)
460 |   a== 0        (241)
461 |   jt 3        (243)
462 |   lj 257        (245)
463 |   (         ifEq t2 0 goto return_id_0)        (248)
464 |   c=r 2        (248)
465 |   a=0        (250)
466 |   a==c        (251)
467 |   jf 3        (252)
468 |   lj 220        (254)
469 |   (        :throw_error:)        (257)
470 |   (         error)        (257)
471 |   error        (257)
472 |   (         halt)        (258)
473 |   halt        (258)
474 | end
475 | 


--------------------------------------------------------------------------------
/test/rle_cm.manual.cfg:
--------------------------------------------------------------------------------
 1 | (ABCD registers, A is accumulator and holds input/output for OUT cmd
 2 | R0-255: 32 bit array of length 256
 3 | F: condition flag
 4 | H: 32 bit array of length 2^h. In HCOMP, H[i] is the input to COMP[i]
 5 | M: 8 bit array of length 2^m
 6 | )
 7 | 
 8 | comp 20 0 20 24 1 (hh hm ph pm n)  (n ist für Anzahl der Teile in comp)
 9 |   0 cm 18 255
10 | hcomp
11 |   a<<= 1
12 |   ifnot
13 |     a==a
14 |   else
15 |     a++
16 |     a<a
17 |   endif
18 |   a<<= 9
19 |   *d=a
20 |   halt
21 | pcomp ./rle c ; (information for zpaqd to call the python script)
22 |   b<>a (==N comparisons are only available for A)
23 |   a== 0 if (b is ichar)
24 |     b<>a (swap back)
25 |     b++ (case: load byte)
26 |     c=a (c is charv)
27 |   else
28 |     b<>a (swap back)
29 |     b-- (case: write out bytes c times)
30 |     do a> 0 if
31 |       c<>a (now c holds the count, a the byte)
32 |       out
33 |       c--
34 |       c<>a (bring byte to c again)
35 |     forever endif
36 |   endif
37 |   halt
38 | end
39 | 


--------------------------------------------------------------------------------
/test/rle_cm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # simple features: if-else, while, for
  4 | # extra features: functions, global vs local function variables, own arrays and hcomp with return instead of hH or pM etc, dict?
  5 | 
  6 | ### BEGIN OF EDITABLE SECTION - do not remove this marker or place something before and after it
  7 | 
  8 | hh = 0
  9 | hm = 0
 10 | ph = 0
 11 | pm = 24
 12 | n = len({
 13 | 0: "cm 18 255"  # SIZE 2^18, LIMIT 255
 14 | })
 15 | pcomp_invocation = "./rle c"  # information for zpaqd about preprocessor invocation
 16 | 
 17 | # put commonly used functions and variables here
 18 | # as pcomp and hcomp do only include their own definitions
 19 | 
 20 | 
 21 | ### END OF EDITABLE SECTION - do not remove this marker or place something before and after it
 22 | import sys, array, argparse
 23 | input_buf = []
 24 | output = []
 25 | NONE = 4294967295
 26 | input_c = NONE
 27 | 
 28 | def out(a):
 29 |   output.append(a)
 30 | 
 31 | def read_b():
 32 |   global input_c, input_buf
 33 |   if input_c == NONE:  # was already consumed
 34 |     if len(input_buf) == 0:
 35 |       return NONE
 36 |     a = input_buf.pop(0)
 37 |     if args.method == 'hcomp':
 38 |       print_hcomp_status()
 39 |       global input_last_a
 40 |       input_last_a = a
 41 |     return a
 42 |   else:
 43 |     tmp = input_c
 44 |     input_c = NONE
 45 |     return tmp
 46 | 
 47 | 
 48 | def peek_b():
 49 |   global input_c
 50 |   if input_c == NONE:
 51 |     push_b(read_b())
 52 |   return input_c
 53 | 
 54 | def push_b(c):
 55 |   """can only be executed once and will overwrite otherwise"""
 56 |   global input_c
 57 |   input_c = c
 58 | 
 59 | 
 60 | hH = array.array('L', [0 for x in range(0, 2**hh)])
 61 | hM = array.array('B', [0 for x in range(0, 2**hm)])
 62 | 
 63 | pH = array.array('L', [0 for x in range(0, 2**ph)])
 64 | pM = array.array('B', [0 for x in range(0, 2**pm)])
 65 | 
 66 | ### BEGIN OF EDITABLE SECTION - do not remove this marker, may only use own variables and functions
 67 | 
 68 | mode = 0
 69 | 
 70 | def hcomp(c):
 71 |   global mode
 72 |   c <<= 1
 73 |   if not mode:
 74 |     mode = 1
 75 |   else:
 76 |     c += 1
 77 |     mode = 0
 78 |   c<<= 9
 79 |   hH[0] = c
 80 | 
 81 | pass
 82 | ### END OF EDITABLE SECTION - do not remove this marker and the pass statement before
 83 | 
 84 | ### BEGIN OF EDITABLE SECTION - do not remove this marker, may only use own variables and functions + read_b(), peek_b(), push_b(), out(b)
 85 | 
 86 | case = 0
 87 | last = 0
 88 | 
 89 | def pcomp(c):  # passing c is like having c = read_b() as first line
 90 |   global case, last
 91 |   if case == 0:  # c is byte to load
 92 |     case = 1
 93 |     last = c
 94 |   else:  # write out content of last c times
 95 |     case = 0
 96 |     while c > 0:
 97 |       c-= 1
 98 |       out(last)
 99 | 
100 | pass
101 | ### END OF EDITABLE SECTION - do not remove this marker and the pass statement before
102 | 
103 | 
104 | import argparse
105 | parser = argparse.ArgumentParser()
106 | parser.add_argument('method', help='run either hcomp or pcomp on each byte of the input\nfor hcomp output will be pairs of input and contexts', choices=['hcomp', 'pcomp'])
107 | parser.add_argument('input', nargs='?', type=argparse.FileType('rb'), default=sys.stdin, help='input file')
108 | parser.add_argument('output', nargs='?', type=argparse.FileType('wb'), default=sys.stdout, help='output file')
109 | args = parser.parse_args()
110 | input_buf = list(args.input.read())  # @TODO: more than one input file, separated with NONE (only pcomp?)
111 | input_last_a = None
112 | def print_hcomp_status():
113 |   global input_last_a
114 |   if input_last_a is None:
115 |     return
116 |   line = '{}: {}\n'.format(input_last_a, list(hH[:n]))
117 |   input_last_a = None
118 |   try:  # stdout
119 |     args.output.write(line)
120 |   except:
121 |     args.output.write(bytes(line, 'utf-8'))
122 | if args.method == 'hcomp':
123 |   while len(input_buf) > 0:
124 |     input_c = NONE
125 |     input_last_a = input_buf.pop(0)
126 |     hcomp(input_last_a)  # @TODO: hcomp also gets -1 after EOS like pcomp?
127 |     print_hcomp_status()
128 | elif args.method == 'pcomp':
129 |   while len(input_buf) > 0:
130 |     input_c = NONE
131 |     pcomp(input_buf.pop(0))
132 |   input_c = NONE
133 |   pcomp(NONE)
134 |   try:
135 |     args.output.write(bytes(output))
136 |   except:  # stdout
137 |     args.output.buffer.write(bytes(output))
138 | 


--------------------------------------------------------------------------------
/test/rle_model.ir:
--------------------------------------------------------------------------------
  1 | comp 21 0 21 0 3 (hh hm ph pm n)
  2 |   0 cm 19 22
  3 |   1 cm 19 22
  4 |   2 mix2 1 0 1 30 0
  5 | hcomp
  6 |     # t255 holds the inital value passed into the A register, first ZPAQL instruction must thus be r=a 255
  7 |    ifN t0 goto init_code
  8 |    if t254 goto cont_reading
  9 |    goto call_next
 10 |   :init_code:
 11 |    t0 = 4
 12 |    t252 = t0
 13 |    goto read_b_end~
 14 |   :read_b:
 15 |    t1 = t253 == 4294967294
 16 |    if t1 goto do_read_in
 17 |    t255 = t253
 18 |    t253 = 4294967294
 19 |    goto cont_reading
 20 |   :do_read_in:
 21 |    t254 = 1
 22 |    halt
 23 |   :cont_reading:
 24 |    t254 = 0
 25 |       t1 = t255
 26 |      t2 = H[t0]()
 27 |      t0 = t0 - 1
 28 |      t0 = H[t0]()
 29 |      goto find_label_ret_id
 30 |   :read_b_end~:
 31 |  #                     21: pass
 32 |  #                     159: at_counter = False  # if false, then c is byte, otherwise c is a counter
 33 |  H[t0+1](at_counter) = 0
 34 |  #                     160: last_value = 0
 35 |  H[t0+2](last_value) = 0
 36 |  #                     161: last_counter = 0
 37 |  H[t0+3](last_counter) = 0
 38 |  #                     163: def hcomp(c):  # pcomp bytecode is passed first (or 0 if there is none)
 39 |  goto hcomp_end~
 40 | :hcomp:
 41 |  MarkTempVarStart
 42 |  # Arg c at t0 + 1
 43 |  #                     164:   global at_counter, last_value, last_counter
 44 |  # Global at_counter via H[t252+1]
 45 |  # Global last_value via H[t252+2]
 46 |  # Global last_counter via H[t252+3]
 47 |  #                     165:   if at_counter:
 48 |  ifN H[t252+1](at_counter) goto else_1
 49 |  #                     166:     last_counter = c
 50 |  H[t252+3](last_counter) = H[t0+1](c)
 51 |  goto endif_2
 52 | :else_1:
 53 |  #                     168:     last_value = c
 54 |  H[t252+2](last_value) = H[t0+1](c)
 55 | :endif_2:
 56 |  #                     171:   hH[0] = (last_value << 1) + at_counter  # at_counter will occupy one bit, therefore shift
 57 |  t2 = H[t252+2](last_value) << 1
 58 |  t1 = t2 + H[t252+1](at_counter)
 59 |  t2 = 0
 60 |  H[t2]() = t1
 61 |  #                     172:   hH[0] <<= 9  # again shift to the side because of the xor with the partially decoded byte
 62 |  t1 = 0
 63 |  H[t1]() = H[t1]() << 9
 64 |  #                     174:   hH[1] = (last_counter << 1) + at_counter
 65 |  t2 = H[t252+3](last_counter) << 1
 66 |  t1 = t2 + H[t252+1](at_counter)
 67 |  t2 = 1
 68 |  H[t2]() = t1
 69 |  #                     175:   hH[1] <<= 9
 70 |  t1 = 1
 71 |  H[t1]() = H[t1]() << 9
 72 |  #                     176:   hH[2] = at_counter + 0  # context for mixer: is at counter (1) or not (0)
 73 |  t1 = H[t252+1](at_counter) + 0
 74 |  t2 = 2
 75 |  H[t2]() = t1
 76 |  #                     177:   at_counter = not at_counter
 77 |  t1 = ! H[t252+1](at_counter)
 78 |  H[t252+1](at_counter) = t1
 79 |  # insert return as it might not be done by the function:
 80 |     t2 = H[t0]()
 81 |    t0 = t0 - 1
 82 |    t0 = H[t0]()
 83 |    goto find_label_ret_id
 84 |  MarkTempVarEnd
 85 | :hcomp_end~:
 86 |  #                     181: pass
 87 | :call_next:
 88 |  t253 = 4294967294
 89 |     H[t0+4]() = t0
 90 |    # saved bsp, return id:
 91 |    H[t0+5]() = 0
 92 |    # push arguments:
 93 |    H[t0+6]() = t255
 94 |    t0 = t0 + 5
 95 |    goto hcomp
 96 |   :return_id_0:
 97 |     halt
 98 |   :find_label_ret_id:
 99 |    t4 = t0 > 1048380
100 |    if t4 goto throw_error
101 |    ifEq t2 0 goto return_id_0
102 |   :throw_error:
103 |    error
104 |    halt
105 | pcomp
106 |     # t255 holds the inital value passed into the A register, first ZPAQL instruction must thus be r=a 255
107 |    ifN t0 goto init_code
108 |    if t254 goto cont_reading
109 |    goto call_next
110 |   :init_code:
111 |    t0 = 1
112 |    t252 = t0
113 |    goto read_b_end~
114 |   :read_b:
115 |    t1 = t253 == 4294967294
116 |    if t1 goto do_read_in
117 |    t255 = t253
118 |    t253 = 4294967294
119 |    goto cont_reading
120 |   :do_read_in:
121 |    t254 = 1
122 |    halt
123 |   :cont_reading:
124 |    t254 = 0
125 |       t1 = t255
126 |      t2 = H[t0]()
127 |      t0 = t0 - 1
128 |      t0 = H[t0]()
129 |      goto find_label_ret_id
130 |   :read_b_end~:
131 |  #                     21: pass
132 |  #                     189: case_loading = False
133 |  H[t0+1](case_loading) = 0
134 |  #                     190: last = NONE
135 |  H[t0+2](last) = 4294967295
136 |  #                     192: def pcomp(c):
137 |  goto pcomp_end~
138 | :pcomp:
139 |  MarkTempVarStart
140 |  # Arg c at t0 + 1
141 |  #                     193:   global case_loading, last
142 |  # Global case_loading via H[t252+1]
143 |  # Global last via H[t252+2]
144 |  #                     194:   if c == NONE:  # start of new segment, so restart our code
145 |  t1 = H[t0+1](c) == 4294967295
146 |  ifN t1 goto else_2
147 |  #                     195:     case_loading = False
148 |  H[t252+1](case_loading) = 0
149 |  #                     196:     last = NONE
150 |  H[t252+2](last) = 4294967295
151 |  #                     197:     return
152 |     t2 = H[t0]()
153 |    t0 = t0 - 1
154 |    t0 = H[t0]()
155 |    goto find_label_ret_id
156 |  goto endif_3
157 | :else_2:
158 | :endif_3:
159 |  #                     198:   if not case_loading:  # c is byte to load
160 |  t1 = ! H[t252+1](case_loading)
161 |  ifN t1 goto else_4
162 |  #                     199:     case_loading = True
163 |  H[t252+1](case_loading) = 1
164 |  #                     200:     last = c
165 |  H[t252+2](last) = H[t0+1](c)
166 |  goto endif_5
167 | :else_4:
168 |  #                     202:     case_loading = False
169 |  H[t252+1](case_loading) = 0
170 |  #                     203:     while c > 0:
171 | :while_6:
172 |  t1 = H[t0+1](c) > 0
173 |  ifN t1 goto whileend_8
174 |  #                     204:       c-= 1
175 |  H[t0+1](c) = H[t0+1](c) - 1
176 |  #                     205:       out(last)
177 |  out H[t252+2](last)
178 |  goto while_6
179 | :whileend_8:
180 | :endif_5:
181 |  # insert return as it might not be done by the function:
182 |     t2 = H[t0]()
183 |    t0 = t0 - 1
184 |    t0 = H[t0]()
185 |    goto find_label_ret_id
186 |  MarkTempVarEnd
187 | :pcomp_end~:
188 |  #                     208: pass
189 | :call_next:
190 |  t253 = 4294967294
191 |     H[t0+3]() = t0
192 |    # saved bsp, return id:
193 |    H[t0+4]() = 0
194 |    # push arguments:
195 |    H[t0+5]() = t255
196 |    t0 = t0 + 4
197 |    goto pcomp
198 |   :return_id_0:
199 |     halt
200 |   :find_label_ret_id:
201 |    t4 = t0 > 1048377
202 |    if t4 goto throw_error
203 |    ifEq t2 0 goto return_id_0
204 |   :throw_error:
205 |    error
206 |    halt
207 | end
208 | 


--------------------------------------------------------------------------------
/test/rle_model.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (C) 2016 Kai Lüke kailueke@riseup.net
  3 | # This program comes with ABSOLUTELY NO WARRANTY and is free software, you are welcome to redistribute it
  4 | # under certain conditions, see https://www.gnu.org/licenses/gpl-3.0.en.html
  5 | ### BEGIN OF EDITABLE SECTION - do not remove this marker or place something before and after it
  6 | 
  7 | # definition of the array sizes and the context mixing linear tree
  8 | hh = 2  # size of hH[] is 2**hh
  9 | hm = 0
 10 | ph = 0
 11 | pm = 0
 12 | n = len({  # can also be an empty {}, then hcomp won't be included and (preprocessed) data is just stored and not arithmetically coded
 13 | 0: "cm 19 22",  # context table size 2*19 with partly decoded byte as 9 bit hash xored with the context, count limit 22
 14 | 1: "cm 19 22",
 15 | 2: "mix2 1 0 1 30 0",  # will mix 0 and 1 together, context table size 2**1 with and-0 masking of the partly decoded byte which is added to the context, learning rate 30
 16 | })
 17 | pcomp_invocation = "./simple_rle"  # information for zpaqd about preprocessor invocation,
 18 | #                        like you would execute it in the shell, passed additional parameters
 19 | #                        at the end will be <inputfile> <outputfile>
 20 | 
 21 | # put shared functions and variables of pcomp and hcomp here,
 22 | # then they are copied into the hcomp and pcomp section before compilation
 23 | 
 24 | pass
 25 | ### END OF EDITABLE SECTION - do not remove this marker or place something before and after it
 26 | # ***Exposed API***
 27 | # c = read_b()
 28 | # push_b(c)
 29 | # c = peek_b()
 30 | # out(c)
 31 | # error()
 32 | # hH, pH, hM, pM as 32- and 8-bit arrays with the defined size 2**hh, … and support for len(hH), …
 33 | # hh, hm, ph, pm and n are also available as constants
 34 | # arrayvar = alloc_pH(arraysize)  # if backend implementations addr_alloc_pH(size), addr_free_pH(addr) … are defined
 35 | # arrayvar = array_pH(numbervar)  # cast needed when passed between functions
 36 | # len_pH(arrayvar)
 37 | # free_pH(arrayvar)
 38 | # … analog for pM, hH, hM
 39 | 
 40 | import sys, array, argparse
 41 | from collections import deque
 42 | input_buf = []
 43 | output = deque([])
 44 | NONE = 4294967295
 45 | input_c = NONE-1
 46 | 
 47 | def out(a):
 48 |   if cmpbuf is not None:
 49 |     expected = cmpbuf.popleft()
 50 |     if a != expected:
 51 |       import ipdb; ipdb.set_trace()
 52 |   output.append(a)
 53 | 
 54 | def read_b():
 55 |   global input_c, input_buf, input_last_a
 56 |   if input_c == NONE-1:  # was already consumed
 57 |     if len(input_buf) == 0:
 58 |       raise WouldNotBeReached
 59 |     a = input_buf.popleft()
 60 |     print_hcomp_status()
 61 |     input_last_a = a
 62 |     return a
 63 |   else:
 64 |     tmp = input_c
 65 |     input_c = NONE-1
 66 |     return tmp
 67 | 
 68 | 
 69 | def peek_b():
 70 |   global input_c
 71 |   if input_c == NONE-1:
 72 |     push_b(read_b())
 73 |   return input_c
 74 | 
 75 | def push_b(c):
 76 |   """can only be executed once and will overwrite otherwise"""
 77 |   global input_c
 78 |   input_c = c
 79 | 
 80 | def error():
 81 |   raise Exception("error() invoked (zpaq execution will fail with: Bad ZPAQL opcode)")
 82 | 
 83 | hH = array.array('L', [0 for x in range(0, 2**hh)])
 84 | hM = array.array('B', [0 for x in range(0, 2**hm)])
 85 | 
 86 | pH = array.array('L', [0 for x in range(0, 2**ph)])
 87 | pM = array.array('B', [0 for x in range(0, 2**pm)])
 88 | 
 89 | 
 90 | def alloc_pM(size):
 91 |   return VirtArray(pM, addr_alloc_pM(size), size)
 92 | def alloc_pH(size):
 93 |   return VirtArray(pH, addr_alloc_pH(size), size)
 94 | def alloc_hH(size):
 95 |   return VirtArray(hH, addr_alloc_hH(size), size)
 96 | def alloc_hM(size):
 97 |   return VirtArray(hM, addr_alloc_hM(size), size)
 98 | def free_pM(va):
 99 |   if va.addr == NONE:
100 |     raise Exception("double free (not visible in zpaq execution)")
101 |   if va.array is not pM:
102 |     raise Exception("wrong type")
103 |   addr_free_pM(va.addr)
104 |   va.addr = NONE
105 | def free_pH(va):
106 |   if va.addr == NONE:
107 |     raise Exception("double free (not visible in zpaq execution)")
108 |   if va.array is not pH:
109 |     raise Exception("wrong type")
110 |   addr_free_pH(va.addr)
111 |   va.addr = NONE
112 | def free_hH(va):
113 |   if va.addr == NONE:
114 |     raise Exception("double free (not visible in zpaq execution)")
115 |   if va.array is not hH:
116 |     raise Exception("wrong type")
117 |   addr_free_hH(va.addr)
118 |   va.addr = NONE
119 | def free_hM(va):
120 |   if va.addr == NONE:
121 |     raise Exception("double free (not visible in zpaq execution)")
122 |   if va.array is not hM:
123 |     raise Exception("wrong type")
124 |   addr_free_hM(va.addr)
125 |   va.addr = NONE
126 | 
127 | # casting addresses which where written itself into an pH/hH entry back to array objects
128 | array_pH = lambda addr: (addr if addr.array is pH else error()) if type(addr) is VirtArray else VirtArray(pH, addr, pH[addr-2])  # wrong type error?
129 | array_pM = lambda addr: (addr if addr.array is pM else error()) if type(addr) is VirtArray else VirtArray(pM, addr, get32_pM(addr-5))  # wrong type error?
130 | array_hH = lambda addr: (addr if addr.array is hH else error()) if type(addr) is VirtArray else VirtArray(hH, addr, hH[addr-2])  # wrong type error?
131 | array_hM = lambda addr: (addr if addr.array is hM else error()) if type(addr) is VirtArray else VirtArray(hM, addr, get32_hM(addr-5))  # wrong type error?
132 | len_hM = lambda va: va.size if va.array is hM else error() # wrong type
133 | len_pM = lambda va: va.size if va.array is pM else error() # wrong type
134 | len_pH = lambda va: va.size if va.array is pH else error() # wrong type
135 | len_hH = lambda va: va.size if va.array is hH else error() # wrong type
136 | 
137 | class VirtArray:
138 |   addr = None  # addr in array for index 0
139 |   array = None  # one of hH, hM, pH, pM
140 |   size = None
141 |   def __init__(self, array, addr, size):
142 |     self.array = array
143 |     self.addr = addr
144 |     self.size = size
145 |     assert self.size < 2147483648, "address too big, 32. bit is used to distinguish between H and M"
146 |   def __getitem__(self, key):
147 |     return self.array[self.addr+key]
148 |   def __setitem__(self, key, item):
149 |     self.array[self.addr+key] = item.addr if type(item) is VirtArray else item
150 |   def __len__(self):
151 |     raise Exception("instead of len() use one of len_hM, len_pM, len_pH or len_hH")
152 |   def __str__(self):
153 |     return str(self.array[self.addr:self.addr+self.size])
154 | 
155 | 
156 | pass
157 | ### BEGIN OF EDITABLE SECTION - do not remove this marker, may only use own variables and functions beside API and those of the first section
158 | 
159 | 
160 | # place global variables of hcomp and custom functions into this section
161 | 
162 | at_counter = False  # if false, then c is byte, otherwise c is a counter
163 | last_value = 0
164 | last_counter = 0
165 | 
166 | def hcomp(c):  # pcomp bytecode is passed first (or 0 if there is none)
167 |   global at_counter, last_value, last_counter
168 |   if at_counter:
169 |     last_counter = c
170 |   else:
171 |     last_value = c
172 |   # first part of the context for the first CM is the byte replicated and
173 |   # the second part is whether we are at a counter (then we predict for a byte) or vice versa
174 |   hH[0] = (last_value << 1) + at_counter  # at_counter will occupy one bit, therefore shift
175 |   hH[0] <<= 9  # again shift to the side because of the xor with the partially decoded byte
176 |   # second CM same but uses the counter for prediction
177 |   hH[1] = (last_counter << 1) + at_counter
178 |   hH[1] <<= 9
179 |   hH[2] = at_counter + 0  # context for mixer: is at counter (1) or not (0)
180 |   at_counter = not at_counter
181 | 
182 | 
183 | 
184 | pass
185 | ### END OF EDITABLE SECTION - do not remove this marker and the pass statement before
186 | 
187 | ### BEGIN OF EDITABLE SECTION - do not remove this marker, may only use own variables and functions beside API and those of the first section
188 | 
189 | 
190 | # place global variables of pcomp and custom functions into this section
191 | 
192 | case_loading = False
193 | last = NONE
194 | 
195 | def pcomp(c):
196 |   global case_loading, last
197 |   if c == NONE:  # start of new segment, so restart our code
198 |     case_loading = False
199 |     last = NONE
200 |     return
201 |   if not case_loading:  # c is byte to load
202 |     case_loading = True
203 |     last = c
204 |   else:  # write out content of last c times
205 |     case_loading = False
206 |     while c > 0:
207 |       c-= 1
208 |       out(last)
209 | 
210 | 
211 | pass
212 | ### END OF EDITABLE SECTION - do not remove this marker and the pass statement before
213 | 
214 | class WouldNotBeReached(Exception):
215 |   """used for handling EOF in read_b() as execution does not continue after last byte (or end-of-segment in pcomp) is consumed"""
216 |   pass
217 | 
218 | def finish_output():
219 |   global output
220 |   try:
221 |     args.output[0].write(bytes(output))
222 |   except:  # stdout
223 |     args.output[0].buffer.write(bytes(output))
224 |   output = deque([])
225 |   if len(args.output) > 1:
226 |     args.output.pop(0)
227 | 
228 | import argparse
229 | parser = argparse.ArgumentParser()
230 | parser.add_argument('method', help='run either hcomp or pcomp on each byte of the input\nfor hcomp output will be pairs of input and contexts', choices=['hcomp', 'pcomp'])
231 | parser.add_argument('input', nargs='?', type=argparse.FileType('rb'), default=sys.stdin, help='input file')
232 | parser.add_argument('--append', type=argparse.FileType('rb'), dest='addseg', default=[], metavar='FILE', action='append', help='additional input files')
233 | parser.add_argument('--compare', type=argparse.FileType('rb'), dest='compare', default=None, metavar='EXPECTEDFILE', help='compare pcomp output and run ipdb for mismatch')
234 | parser.add_argument('output', nargs='*', type=argparse.FileType('wb'), default=[sys.stdout], help='output file')
235 | args = parser.parse_args()
236 | cmpbuf = None
237 | if args.compare:
238 |   cmpbuf = deque(args.compare.read())
239 | input_buf = deque(args.input.read())
240 | if args.method == 'pcomp':
241 |   input_buf.append(NONE)  # end of segment
242 | for additional_segment in args.addseg:
243 |   input_buf.extend(additional_segment.read())
244 |   if args.method == 'pcomp':
245 |     input_buf.append(NONE)
246 | input_last_a = None
247 | 
248 | def print_hcomp_status():
249 |   global input_last_a
250 |   if input_last_a is None:
251 |     return
252 |   line = '{}: {}\n'.format(input_last_a, list(hH[:n]))
253 |   if args.method == 'pcomp' and input_last_a == NONE:
254 |     finish_output()
255 |   input_last_a = None
256 |   if args.method == 'hcomp':
257 |     try:  # stdout
258 |       args.output[0].write(line)
259 |     except:
260 |       args.output[0].write(bytes(line, 'utf-8'))
261 | 
262 | if args.method == 'hcomp':
263 |   while len(input_buf) > 0:
264 |     input_c = NONE-1
265 |     input_last_a = input_buf.popleft()
266 |     try:
267 |       hcomp(input_last_a)
268 |     except WouldNotBeReached:
269 |       pass
270 |     print_hcomp_status()
271 | elif args.method == 'pcomp':
272 |   while len(input_buf) > 0:
273 |     input_c = NONE-1
274 |     input_last_a = input_buf.popleft()
275 |     try:
276 |       pcomp(input_last_a)
277 |     except WouldNotBeReached:
278 |       pass
279 |     print_hcomp_status()
280 | 
281 | 


--------------------------------------------------------------------------------
/test/simple_rle:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (C) 2016 Kai Lüke kailueke@riseup.net
 3 | # This program comes with ABSOLUTELY NO WARRANTY and is free software, you are welcome to redistribute it
 4 | # under certain conditions, see https://www.gnu.org/licenses/gpl-3.0.en.html
 5 | 
 6 | import sys
 7 | input = sys.argv[1]
 8 | output = sys.argv[2]
 9 | with open(input, mode='rb') as fi:
10 |   with open(output, mode='wb') as fo:
11 |       last = None
12 |       count = 0
13 |       data = []
14 |       for a in fi.read():
15 |         if a != last or count == 255:  # count only up to 255 to use one byte
16 |           if last != None:  # write out the pair
17 |             data.append(last)
18 |             data.append(count)
19 |           last = a  # start counting
20 |           count = 1
21 |         else:
22 |           count += 1  # continue counting
23 |       if last != None:
24 |         data.append(last)
25 |         data.append(count)
26 |       fo.write(bytes(data))
27 | 


--------------------------------------------------------------------------------
/test/stress.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # simple features: if-else, while, for
  4 | # extra features: functions, global vs local function variables, own arrays and hcomp with return instead of hH or pM etc, dict?
  5 | 
  6 | ### BEGIN OF EDITABLE SECTION - do not remove this marker or place something before and after it
  7 | 
  8 | hh = 0
  9 | hm = 15
 10 | ph = 0
 11 | pm = 0
 12 | n = len({
 13 | 0: "cm 16 22"  # SIZE 2^16, LIMIT 255
 14 | })
 15 | pcomp_invocation = ""  # information for zpaqd about preprocessor invocation
 16 | 
 17 | # put commonly used functions and variables here
 18 | # as pcomp and hcomp do only include their own definitions
 19 | 
 20 | 
 21 | ### END OF EDITABLE SECTION - do not remove this marker or place something before and after it
 22 | import sys, array, argparse
 23 | from collections import deque
 24 | input_buf = []
 25 | output = deque([])
 26 | NONE = 4294967295
 27 | input_c = NONE
 28 | 
 29 | def out(a):
 30 |   output.append(a)
 31 | 
 32 | def read_b():
 33 |   global input_c, input_buf
 34 |   if input_c == NONE:  # was already consumed
 35 |     if len(input_buf) == 0:
 36 |       return NONE
 37 |     a = input_buf.popleft()
 38 |     if args.method == 'hcomp':
 39 |       print_hcomp_status()
 40 |       global input_last_a
 41 |       input_last_a = a
 42 |     return a
 43 |   else:
 44 |     tmp = input_c
 45 |     input_c = NONE
 46 |     return tmp
 47 | 
 48 | 
 49 | def peek_b():
 50 |   global input_c
 51 |   if input_c == NONE:
 52 |     push_b(read_b())
 53 |   return input_c
 54 | 
 55 | def push_b(c):
 56 |   """can only be executed once and will overwrite otherwise"""
 57 |   global input_c
 58 |   input_c = c
 59 | 
 60 | def error():
 61 |   raise Exception
 62 | 
 63 | hH = array.array('L', [0 for x in range(0, 2**hh)])
 64 | hM = array.array('B', [0 for x in range(0, 2**hm)])
 65 | 
 66 | pH = array.array('L', [0 for x in range(0, 2**ph)])
 67 | pM = array.array('B', [0 for x in range(0, 2**pm)])
 68 | 
 69 | ### BEGIN OF EDITABLE SECTION - do not remove this marker, may only use own variables and functions
 70 | 
 71 | line_len = 8
 72 | buf_pos = 110
 73 | 
 74 | reading_header = True
 75 | 
 76 | def read_5_times():
 77 |   read_b()
 78 |   read_b()
 79 |   read_b()
 80 |   y=peek_b()
 81 |   x=read_b()
 82 |   if x!=y:
 83 |     error()
 84 |   push_b(x)
 85 |   if x!= peek_b():
 86 |     error()
 87 |   x=read_b()
 88 |   if x!=y:
 89 |     error()
 90 |   return read_b()
 91 | 
 92 | def w(v):
 93 |   return v << 9
 94 | 
 95 | def calc(up, left):
 96 |   if True and left != left:
 97 |     error()
 98 |   if False or up != up:
 99 |     error()
100 |   if left:
101 |     l = left ** 2
102 |     left = l // left
103 |   else:
104 |     up *= 87
105 |     up //= 87
106 |     if True:
107 |       return w(up//4 + left//4)
108 |     else:
109 |       return (up//4 + left//4) << 9
110 |   v = up//4 + left//4
111 |   v <<= 9
112 |   return v
113 | 
114 | 
115 | def hcomp(c):
116 |   if len(hH) != 2**hh:
117 |     error()
118 |   if 2**26 != 67108864:
119 |     error()
120 |   x = 0
121 |   x = ~x
122 |   x = not x
123 |   if not x and x:
124 |     error()
125 |   global line_len, buf_pos, reading_header
126 |   if reading_header:
127 |     if c != 0x50 and 0 < read_b():
128 |       read_b()
129 |       z = read_b() - 0x30
130 |       while z < 10 and z >= 0:
131 |         line_len *= 10
132 |         line_len += z
133 |         z = read_b() - 0x30
134 |       line_len *= 3
135 |       z = read_b() - 0x30
136 |       c = read_5_times()
137 |       reading_header = False
138 |   up = hM[buf_pos]
139 |   left = hM[(line_len+buf_pos-3) % line_len]
140 |   hM[buf_pos] = c
141 |   buf_pos = (buf_pos + 1) % line_len
142 |   hH[0] = calc(up, left)
143 | 
144 | pass
145 | ### END OF EDITABLE SECTION - do not remove this marker and the pass statement before
146 | 
147 | ### BEGIN OF EDITABLE SECTION - do not remove this marker, may only use own variables and functions + read_b(), peek_b(), push_b(), out(b)
148 | 
149 | def pcomp(c):  # passing c is like having c = read_b() as first line
150 |   pass
151 | 
152 | pass
153 | ### END OF EDITABLE SECTION - do not remove this marker and the pass statement before
154 | 
155 | 
156 | import argparse
157 | parser = argparse.ArgumentParser()
158 | parser.add_argument('method', help='run either hcomp or pcomp on each byte of the input\nfor hcomp output will be pairs of input and contexts', choices=['hcomp', 'pcomp'])
159 | parser.add_argument('input', nargs='?', type=argparse.FileType('rb'), default=sys.stdin, help='input file')
160 | parser.add_argument('output', nargs='?', type=argparse.FileType('wb'), default=sys.stdout, help='output file')
161 | args = parser.parse_args()
162 | input_buf = deque(args.input.read())  # @TODO: more than one input file, separated with NONE (only pcomp?)
163 | input_last_a = None
164 | def print_hcomp_status():
165 |   global input_last_a
166 |   if input_last_a is None:
167 |     return
168 |   line = '{}: {}\n'.format(input_last_a, list(hH[:n]))
169 |   input_last_a = None
170 |   try:  # stdout
171 |     args.output.write(line)
172 |   except:
173 |     args.output.write(bytes(line, 'utf-8'))
174 | if args.method == 'hcomp':
175 |   while len(input_buf) > 0:
176 |     input_c = NONE
177 |     input_last_a = input_buf.popleft()
178 |     hcomp(input_last_a)
179 |     print_hcomp_status()
180 | elif args.method == 'pcomp':
181 |   while len(input_buf) > 0:
182 |     input_c = NONE
183 |     pcomp(input_buf.popleft())
184 |   input_c = NONE
185 |   pcomp(NONE)
186 |   try:
187 |     args.output.write(bytes(output))
188 |   except:  # stdout
189 |     args.output.buffer.write(bytes(output))
190 | 


--------------------------------------------------------------------------------
/test/subtract_green:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (C) 2016 Kai Lüke kailueke@riseup.net
  3 | # This program comes with ABSOLUTELY NO WARRANTY and is free software, you are welcome to redistribute it
  4 | # under certain conditions, see https://www.gnu.org/licenses/gpl-3.0.en.html
  5 | 
  6 | # c in out
  7 | # d in out
  8 | import sys
  9 | from collections import deque
 10 | 
 11 | input = sys.argv[1]
 12 | output = sys.argv[2]
 13 | 
 14 | def out(c):
 15 |   data.append(c)
 16 | 
 17 | def read_after_whitespace():
 18 |   while True:
 19 |     c = read_b()
 20 |     if c == 0x20 or c == 9 or c == 10 or c == 13: # skip whitespace
 21 |       out(c)
 22 |       continue
 23 |     elif c == 0x23: # skip comment line
 24 |      while c != 10 and c != 13:
 25 |        out(c)
 26 |        c = read_b()
 27 |      out(c)
 28 |     else:
 29 |       return c
 30 | 
 31 | def read_after_header():
 32 |   c = read_after_whitespace() # skip over first delimiter
 33 |   out(c)
 34 |   z = c - 0x30
 35 |   while z < 10 and z >= 0:  # read width
 36 |     c = read_b()
 37 |     out(c)
 38 |     z = c - 0x30
 39 |   c = read_after_whitespace()
 40 |   out(c)
 41 |   z = c - 0x30
 42 |   while z < 10 and z >= 0:  # skip height
 43 |     c = read_b()
 44 |     out(c)
 45 |     z = c - 0x30
 46 |   c = read_after_whitespace()
 47 |   out(c)
 48 |   z = c - 0x30
 49 |   maxc = 0
 50 |   while z < 10 and z >= 0:
 51 |     maxc *= 10
 52 |     maxc += z
 53 |     c = read_b()
 54 |     out(c)
 55 |     z = c - 0x30
 56 |   if maxc > 255:
 57 |     raise Exception("Only three 8-bit RGB channels supported")
 58 |   c = read_after_whitespace() # read first color byte
 59 |   return c
 60 | 
 61 | input_buf = []
 62 | data = deque([])
 63 | def read_b():
 64 |   if len(input_buf) == 0:
 65 |     return None
 66 |   return input_buf.popleft()
 67 | 
 68 | def convert():
 69 |   global input_buf
 70 |   reading_header = True
 71 |   color = 0
 72 |   with open(input, mode='rb') as fi:
 73 |     with open(output, mode='wb') as fo:
 74 |       input_buf = deque(fi.read())
 75 |       while True:
 76 |         c = read_b()
 77 |         if c is None:
 78 |           break
 79 |         color = (color + 1) % 3
 80 |         if reading_header and c == 0x50:
 81 |           out(c)
 82 |           c = read_b()
 83 |           if c == 0x36: # detected header P6
 84 |             out(c)
 85 |             c = read_after_header()
 86 |             color = 0  # red
 87 |             reading_header = False
 88 |         if color == 1:
 89 |           # wrong data
 90 |           out(c)
 91 |           color = 0
 92 |         elif color == 0:
 93 |           r = c
 94 |           c = read_b()
 95 |           if c is None:
 96 |             out(r)
 97 |             break
 98 |           out((256+r-c) % 256)
 99 |           out(c)
100 |           g = c
101 |           color = 1
102 |         elif color == 2:
103 |           out((256+c-g) % 256)
104 |       # write out
105 |       fo.write(bytes(data))
106 | 
107 | convert()
108 | 


--------------------------------------------------------------------------------
/test/testcase:
--------------------------------------------------------------------------------
1 | Other researchers remain unconvinced (see references below), including Dr. Chris Johnson, a professor of pediatrics at the University of Texas Health Sciences Center at [[San Antonio]] and cochair of the [[American Academy of Pediatrics]] Autism Expert Panel, who says, &quot;There is a chance we're seeing a true rise, but right now I don't think anybody can answer that question for sure.&quot; ([[Newsweek]] reference below).
2 | 
3 | * {{cite journal | author=Volkmar RM and Rutter M. | title=Childhood disintegrative disorder: Results of the DSM-IV autism field trial | journal=Journal of the American Academy of Child and Adolescent Psychiatry | year=1995 | volume=34 | pages=1092-1095}}
4 | 
5 | In 1939, Sheikh [[Shakhbut Bin-Sultan Al Nahyan]] granted [[Petroleum]] concessions, and oil was first found in 1958.  At first, oil money had a marginal impact.  A few lowrise concete buildings were erected, and the first paved road was completed in 1961, but Sheikh Shakbut, uncertain whether the new oil royalties would last, took a cautious approach, prefering to save the revenue rather than investing it in development.  His brother, [[Zayed bin Sultan Al Nahayan]], saw that oil wealth had the potential to transform Abu Dhabi.  The ruling Al Nahayan family decided that Sheikh Zayed should replace his brother as Ruler and carry out his vision of developing the country.  On [[August 6]], [[1966]], with the assistance of the British, Sheikh Zayed became the new ruler.  See generally, Al-Fahim, M, ''From Rags to Riches: A Story of Abu Dhabi'', Chapter Six (London Centre of Arab Studies, 1995), ISBN 1 900404 00 1.
6 | aaaabbbbbbbdddddddbccccccbcababcbcbccccbbbbccccbbbbccccddddbbbbcccc
7 | 


--------------------------------------------------------------------------------