├── LICENSE
├── Makefile
├── README.md
├── address.c
├── address.h
├── compile
├── operations.c
├── operations.h
├── par.sed
├── read.c
├── read.h
├── samples
    ├── binary-add.sed
    ├── generate-table-of-contents.sed
    └── tic-tac-toe.sed
├── sed
├── sed-bin.c
├── status.h
└── test


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Quentin L'Hours
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | objs = address.o operations.o read.o sed-bin.o
 2 | BIN ?= sed-bin
 3 | 
 4 | $(BIN): $(objs)
 5 | 	@# the line below is implicit with GNU make, add it for BSD compatibility
 6 | 	$(CC) $(objs) -o $@
 7 | 
 8 | sed-bin.o: generated.c generated-init.c
 9 | 
10 | .PHONY: clean
11 | 
12 | clean:
13 | 	rm -f *.o $(BIN)
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # sed-bin: Compile a sed script
  2 | 
  3 | This project allows to translate **sed** to **C** to be able to compile the
  4 | result and generate a binary that will have the exact same behavior as the
  5 | original sed script, for example `echo foo | sed s/foo/bar/` will be replaced
  6 | by `echo foo | ./sed-bin`.
  7 | 
  8 | # Table of contents
  9 | 
 10 | * [sed-bin: Compile a sed script](#sed-bin-Compile-a-sed-script)
 11 | * [Quick start](#Quick-start)
 12 |   * [Setup](#Setup)
 13 |   * [How to use](#How-to-use)
 14 |   * [Quick step-by-step](#Quick-step-by-step)
 15 |   * [Full walk-through with a bigger script](#Full-walk-through-with-a-bigger-script)
 16 | * [Sample scripts](#Sample-scripts)
 17 | * [How it works](#How-it-works)
 18 |   * [Some generated code](#Some-generated-code)
 19 | * [Why](#Why)
 20 | * [Translating the translator](#Translating-the-translator)
 21 | * [Using this project as a sed alternative](#Using-this-project-as-a-sed-alternative)
 22 | * [Notes](#Notes)
 23 | 
 24 | # Quick start
 25 | 
 26 | ## Setup
 27 | Clone the repo and move inside its directory, you'll need the usual UNIX
 28 | core and build utils (sed, libc, C compiler, shell, make).
 29 | 
 30 | *Note: this project is currently tested with the GNU libc (2.31), GNU sed (4.5)
 31 | and GCC (10.1.1) on Fedora 32. Some additional tests have been done on FreeBSD 12.1*
 32 | 
 33 | ## How to use
 34 | 
 35 | ### Quick step-by-step
 36 | 
 37 | Let's take a simple example:
 38 | ```bash
 39 | sh$ echo foo | sed s/foo/bar/
 40 | bar
 41 | ```
 42 | Assuming you want to compile `s/foo/bar/`:
 43 | 
 44 | - Use the provided [compile](./compile) shell script, this takes care of the C
 45 |   translation and compilation steps:
 46 | ```bash
 47 | sh$ echo s/foo/bar/ | ./compile
 48 | + cat
 49 | + ./par.sed
 50 | + make
 51 | cc    -c -o sed-bin.o sed-bin.c
 52 | cc    -c -o address.o address.c
 53 | cc    -c -o operations.o operations.c
 54 | cc    -c -o read.o read.c
 55 | cc   sed-bin.o address.o operations.o read.o   -o sed-bin
 56 | Compiled sed script available: ./sed-bin
 57 | ```
 58 | 
 59 | - Once the generated C code is compiled, you can use the resulting `sed-bin`
 60 |   binary in place of `sed s/foo/bar/`:
 61 | ```bash
 62 | sh$ echo foo | ./sed-bin
 63 | bar
 64 | ```
 65 | 
 66 | That's about it!
 67 | 
 68 | ### Full walk-through with a bigger script
 69 | 
 70 | Say you want to compile the following sed script which is used to generate the
 71 | table of contents of this project's README (can also be found in the [samples
 72 | directory](./samples)):
 73 | 
 74 | ```sed
 75 | #!/bin/sed -f
 76 | 
 77 | # Generate table of contents with links for markdown files
 78 | # Usage: sed -f <this-script> <mardown file>
 79 | 
 80 | # ignore code blocks
 81 | /^```/,/^```/d
 82 | 
 83 | # no need to index ourselves
 84 | /^# Table of contents/d
 85 | 
 86 | # found heading
 87 | /^#/{
 88 |   # save our line and first work on the actual URI
 89 |   h
 90 |   # strip leading blanks
 91 |   s/^#*[[:blank:]]*//
 92 |   s/[[:blank:]]/-/g
 93 |   # punctuation and anything funky gets lost
 94 |   s/[^-[:alnum:]]//g
 95 |   # swap with hold and work on the displayed title
 96 |   x
 97 |   # get rid of last leading # and potential white spaces
 98 |   s/^\(#\)*#[[:blank:]]*/\1/
 99 |   # the remaining leading # (if any) will be used for indentation
100 |   s/#/  /g
101 |   # prepare the first half of the markdown
102 |   s/\( *\)\(.*\)/\1* [\2](#/
103 |   # append the link kept and remove the newline
104 |   G
105 |   s/\(.*\)[[:space:]]\(.*\)/\1\2)/p
106 | }
107 | d
108 | ```
109 | 
110 | Let's use the provided translator [par.sed](./par.sed) which is a big sed
111 | script translating other sed scripts to C code. Redirect the output to a file
112 | named `generated.c`. Another file with some declarations called
113 | `generated-init.c` will be created by the translator automatically. You'll need
114 | those two files to generate a working binary.
115 | 
116 | ```sh
117 | sh$ sed -f par.sed < samples/generate-table-of-contents.sed > generated.c
118 | ```
119 | 
120 | If you take a peek at `generated.c`, you'll note that for simplicity and
121 | readability the generated code is mostly functions calls, the actual C code
122 | doing the work is not generated but mostly found in
123 | [operations.c](./operations.c). Now we're ready to compile the generated code:
124 | 
125 | ```sh
126 | sh$ make
127 | cc    -c -o sed-bin.o sed-bin.c
128 | cc    -c -o address.o address.c
129 | cc    -c -o operations.o operations.c
130 | cc    -c -o read.o read.c
131 | cc   sed-bin.o address.o operations.o read.o   -o sed-bin
132 | ```
133 | 
134 | A binary named `sed-bin` has been generated, it should have the exact same
135 | behavior as the sed script:
136 | 
137 | ```sh
138 | sh$ ./sed-bin < README.md
139 | * [sed-bin: Compile a sed script](#sed-bin-Compile-a-sed-script)
140 | * [Quick start](#Quick-start)
141 |   * [Setup](#Setup)
142 |   * [How to use](#How-to-use)
143 |   * [Quick step-by-step](#Quick-step-by-step)
144 |   * [Full walk-through with a bigger script](#Full-walk-through-with-a-bigger-script)
145 | * [Sample scripts](#Sample-scripts)
146 | * [How it works](#How-it-works)
147 |   * [Some generated code](#Some-generated-code)
148 | * [Why](#Why)
149 | * [Translating the translator](#Translating-the-translator)
150 | * [Using this project as a sed alternative](#Using-this-project-as-a-sed-alternative)
151 | * [Notes](#Notes)
152 | ```
153 | 
154 | # Sample scripts
155 | 
156 | Some example sed scripts are available in the [samples](./samples) directory:
157 | 
158 | - [samples/binary-add.sed](./samples/binary-add.sed)
159 | - [samples/generate-table-of-contents.sed](./samples/generate-table-of-contents.sed)
160 | - [samples/tic-tac-toe.sed](./samples/tic-tac-toe.sed)
161 | - [par.sed (sed to C translator)](./par.sed)
162 | 
163 | Other notable sed scripts tested with this project:
164 | 
165 | - [sokoban.sed](https://github.com/aureliojargas/sokoban.sed), a
166 |   [sokoban](https://en.wikipedia.org/wiki/Sokoban) game written by Aurelio
167 |   Jargas
168 | - [dc.sed](http://sed.sourceforge.net/grabbag/scripts/dc.sed), an arbitrary
169 |   precision reverse polish notation calculator written by Greg Ubben
170 | - [chainlint.sed](https://github.com/git/git/blob/master/t/chainlint.sed), a git
171 |   internal tool to detect broken "&&" chains in shell scripts.
172 | 
173 | # How it works
174 | 
175 | ## Some generated code
176 | The translator [par.sed](./par.sed) (which is written in sed itself) converts
177 | sed commands calls to valid C code:
178 | 
179 | ```bash
180 | sh$ echo y/o/u/ | sed -f ./par.sed
181 | ```
182 | 
183 | Will output:
184 | 
185 | ```c
186 | y(&status, "o", "u");
187 | ```
188 | 
189 | The actual logic to handle `y` (and most other commands) is not generated, we
190 | just need to translate the sed syntax to valid C code, which here stays fairly
191 | readable.
192 | 
193 | Let's look at a slightly more complex example:
194 | 
195 | ```sed
196 | /foo/{
197 |   p;x
198 | }
199 | ```
200 | 
201 | Translates to:
202 | ```c
203 | static Regex reg_1 = {.compiled = false, .str = "foo"};
204 | if (addr_r(&status, &reg_1))
205 | {
206 | 
207 | p(&status);
208 | x(&status);
209 | 
210 | }
211 | ```
212 | 
213 | And an example of how labels are handled:
214 | 
215 | ```sed
216 | b end
217 | 
218 | # some comment
219 | i \
220 | Doesn't look like this\
221 | code is reachable
222 | 
223 | : end
224 | ```
225 | 
226 | Translates to:
227 | ```c
228 | goto end_label;
229 | 
230 | 
231 | // some comment
232 | i("Doesn't look like this\ncode is reachable");
233 | 
234 | end_label:;
235 | ```
236 | 
237 | # Why
238 | 
239 | Not much practical use to this, here are some thoughts:
240 | 
241 | - Debugging a sed script is hard, one possible way is to run `sed` in gdb,
242 |   but this assumes some familiarity with the implementation. Here the generated
243 |   C code is rather close to the original sed script, which should allow gdb to
244 |   be easier to use (`make -B CFLAGS=-g` for symbols).
245 | - Might be useful for obfuscation or maybe to limit the scope of sed? Resulting
246 |   binaries are usually smaller than a full `sed` binary as well.
247 | - Better speed? Since the generated code is specific to a script, one might
248 |   expect it to be much faster than using `sed`, since we can skip parsing,
249 |   walking the AST etc. I didn't do any serious measurements yet, but so far it
250 |   seems slightly faster than GNU sed (around 20% faster to translate the
251 |   translator for instance).
252 | 
253 | # Translating the translator
254 | 
255 | The basic idea of this project is to translate **sed** code to **C** code, to
256 | compile it and have a resulting binary with the same behavior as the original
257 | script.
258 | 
259 | Now, since the translator from sed to C is written in sed, we should be able to
260 | translate the translator, compile it, and then be able to use the compiled
261 | version to translate other sed scripts.
262 | 
263 | Translate the translator (`par.sed`) with itself:
264 | 
265 | ```sh
266 | sh$ ./par.sed < ./par.sed > generated.c
267 | ```
268 | 
269 | ```sh
270 | sh$ make
271 | cc    -c -o sed-bin.o sed-bin.c
272 | cc    -c -o address.o address.c
273 | cc    -c -o operations.o operations.c
274 | cc    -c -o read.o read.c
275 | cc   sed-bin.o address.o operations.o read.o   -o sed-bin
276 | ```
277 | 
278 | We now have a binary that should be able to translate sed code, let's try to
279 | translate the translator with it:
280 | 
281 | ```sh
282 | sh$ ./sed-bin < ./par.sed | diff -s generated.c -
283 | Files generated.c and - are identical
284 | ```
285 | 
286 | Generated code is identical, which means that at this point we have a
287 | standalone binary that is able to translate other sed scripts to C. We no
288 | longer need another sed implementation as a starting point to make the
289 | translation.
290 | 
291 | # Using this project as a sed alternative
292 | 
293 | A shell script named [sed](./sed) is available in this repository, providing
294 | the same interface as a POSIX sed implementation.
295 | 
296 | ```sh
297 | sh$ echo foo | ./sed s/foo/bar/
298 | bar
299 | ```
300 | 
301 | Here `./sed` automates argument parsing, translation, compilation and execution
302 | of the resulting binary. On one hand this is much heavier than the usual sed
303 | implementation, but on the other hand it provides an easy way to quickly test
304 | and compare this project with other implementations.
305 | 
306 | The default translation is done with the `./par.sed` translator script, which
307 | will use the default **sed** binary available on the system. So that means we
308 | need to use a full **sed** implementation to provide another **sed**
309 | implementation which doesn't make much sense. To get rid of this initial
310 | **sed** dependency simply translate and compile `par.sed`, save the generated
311 | binary and then use the **sed** shell script with `SED_TRANSLATOR` environment
312 | variable set to the newly created binary.
313 | 
314 | For example:
315 | 
316 | ```sh
317 | sh$ BIN=compiled-translator ./compile ./par.sed
318 | + cat ./par.sed
319 | + ./par.sed
320 | + make
321 | cc    -c -o address.o address.c
322 | cc    -c -o operations.o operations.c
323 | cc    -c -o read.o read.c
324 | cc    -c -o sed-bin.o sed-bin.c
325 | cc address.o operations.o read.o sed-bin.o -o compiled-translator
326 | Compiled sed script available: compiled-translator
327 | sh$ echo foo | SED_TRANSLATOR=./compiled-translator ./sed 's/foo/bar/'
328 | bar
329 | ```
330 | 
331 | # Notes
332 | 
333 | - Incomplete features / known issues:
334 |   - with 2 addresses, the `c` command will be executed every time for each
335 |   matching line instead of only once when leaving the range
336 |   - no pattern/hold space overflow checks, currently both limited to 8192 bytes
337 |   as per the minimum POSIX spec requirement. Going over that limit will most
338 |   likely cause a segfault.
339 | 
340 | - The translator does not handle invalid sed scripts, it will just generate
341 |   invalid C code (and potentially loop endlessly) which will probably fail to
342 |   compile, make sure you can run your script with an actual `sed` implementation
343 |   before attempting to translate it.
344 | 
345 | - Non POSIX support is currently not planned, if you are using GNU sed, you can
346 |   try to see what is not supported by running your script with the `--posix`
347 |   option. Also check out the [POSIX specification](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html).
348 | 
349 | - Only `-n` (suppress the default output) is accepted as a command line
350 |   argument of the resulting binary.
351 | 
352 | - The generated binaries currently only accept data from stdin:
353 |   `./sed-bin < file` not `./sed-bin file`. If you have multiple files use
354 |   `cat file1 file2 file3 | ./sed-bin`.
355 | 
356 | - The C code is very rough around the edges (by that I mean dirty and unsafe,
357 |   for instance allocating everything on the stack without checking any
358 |   overflow), I'm still working on it, but contributions (issues/comments/pull
359 |   requests) are also welcomed :)
360 | 


--------------------------------------------------------------------------------
/address.c:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include <regex.h>
  3 | #include <stdbool.h>
  4 | #include <stddef.h>
  5 | 
  6 | #include "address.h"
  7 | #include "status.h"
  8 | 
  9 | /*
 10 |  * How to handle ranges is a bit subjective, the POSIX spec doesn't say much.
 11 |  * For instance if we're looping without reading any new input, should the
 12 |  * range match again (assuming the range addresses are still matching)?
 13 |  * GNU sed discards matches only once when the exit condition is reached, even
 14 |  * in a loop, why would a numeric range stop matching if we're still on the same
 15 |  * line? Probably because that's the only way with regexes.
 16 |  *
 17 |  * The current implementation only uses a suppress list in the <line-nb>,/regex/
 18 |  * address because it's the only way to make it work for this, I feel like for
 19 |  * the other combinations it makes more sense to allow to match again, even if
 20 |  * it is less consistent. Implicitly /regex/,/regex/ will never match twice in
 21 |  * the end case as well.
 22 |  *
 23 |  * Should a range number end once it is equal (similarly to regex addresses) or
 24 |  * once it goes over? What should 2!{1,2=} print for line 3,4,5... of input?
 25 |  * busybox seems to have a bug there, we'll follow the GNU way, that is check if
 26 |  * we went over.
 27 | */
 28 | 
 29 | bool addr_nn(
 30 |   const Status *const status,
 31 |   const size_t start,
 32 |   const size_t end
 33 | ) {
 34 |   const size_t line_nb = status->line_nb;
 35 |   return (line_nb >= start && line_nb <= end)
 36 |     || line_nb == start; // covers start > end
 37 | }
 38 | 
 39 | bool addr_nr(
 40 |   Status *const status,
 41 |   const size_t start,
 42 |   Regex *const end,
 43 |   const size_t id
 44 | ) {
 45 |   /*
 46 |    * Since we systematically match if line nb >= start we need to remember if we
 47 |    * reached the end address, which is the task of the suppressed array.
 48 |   */
 49 |   const size_t line_nb = status->line_nb;
 50 |   size_t *const range_ids = status->range_ids;
 51 |   size_t *const suppressed_range_ids = status->suppressed_range_ids;
 52 |   size_t *free_slot = NULL;
 53 |   size_t i;
 54 |   for (i = 0; i < MAX_ACTIVE_RANGES; ++i) {
 55 |     if (suppressed_range_ids[i] == id) {
 56 |       return false;
 57 |     } else if (range_ids[i] == id) {
 58 |       break;
 59 |     } else if (free_slot == NULL && range_ids[i] == 0) {
 60 |       free_slot = range_ids + i;
 61 |     }
 62 |   }
 63 |   if (i == MAX_ACTIVE_RANGES) {
 64 |     // Could not find active range, let's check if we can start a new one
 65 |     if (line_nb >= start) {
 66 |       if (addr_r(status, end)) {
 67 |         suppressed_range_ids[i] = id;
 68 |       } else {
 69 |         assert(free_slot);
 70 |         *free_slot = id;
 71 |       }
 72 |       return true;
 73 |     }
 74 |   } else {
 75 |     // inside active regex range, need to check if we can free it.
 76 |     if (addr_r(status, end)) {
 77 |       suppressed_range_ids[i] = id;
 78 |       range_ids[i] = 0;
 79 |     }
 80 |     return true;
 81 |   }
 82 |   return false;
 83 | }
 84 | 
 85 | bool addr_rn(
 86 |   Status *const status,
 87 |   Regex *const start,
 88 |   const size_t end,
 89 |   const size_t id
 90 | ) {
 91 |   const size_t line_nb = status->line_nb;
 92 |   size_t *const range_ids = status->range_ids;
 93 |   size_t *free_slot = NULL;
 94 |   size_t i;
 95 |   for (i = 0; i < MAX_ACTIVE_RANGES; ++i) {
 96 |     if (range_ids[i] == id) {
 97 |       break;
 98 |     } else if (free_slot == NULL && range_ids[i] == 0) {
 99 |       free_slot = range_ids + i;
100 |     }
101 |   }
102 |   if (i == MAX_ACTIVE_RANGES) {
103 |     // Could not find active range, let's check if we can start a new one
104 |     if (addr_r(status, start)) {
105 |       if (line_nb < end) {
106 |         assert(free_slot);
107 |         *free_slot = id;
108 |       }
109 |       return true;
110 |     }
111 |   } else {
112 |     // inside active range, need to check if we can free it.
113 |     if (line_nb >= end) {
114 |       range_ids[i] = 0;
115 |     }
116 |     return line_nb <= end;
117 |   }
118 |   return false;
119 | }
120 | 
121 | bool addr_rr(
122 |   Status *const status,
123 |   Regex *const start,
124 |   Regex *const end,
125 |   const size_t id
126 | ) {
127 |   size_t *const range_ids = status->range_ids;
128 |   size_t *free_slot = NULL;
129 |   size_t i;
130 |   for (i = 0; i < MAX_ACTIVE_RANGES; ++i) {
131 |     if (range_ids[i] == id) {
132 |       break;
133 |     } else if (free_slot == NULL && range_ids[i] == 0) {
134 |       free_slot = range_ids + i;
135 |     }
136 |   }
137 |   if (i == MAX_ACTIVE_RANGES) {
138 |     // Could not find active range, let's check if we can start a new one
139 |     if (addr_r(status, start)) {
140 |       assert(free_slot);
141 |       *free_slot = id;
142 |       return true;
143 |     }
144 |   } else {
145 |     // inside active range, need to check if we can free it.
146 |     if (addr_r(status, end)) {
147 |       range_ids[i] = 0;
148 |     }
149 |     return true;
150 |   }
151 |   return false;
152 | }
153 | 
154 | bool addr_n(const Status *const status, const size_t line_nb) {
155 |   return status->line_nb == line_nb;
156 | }
157 | 
158 | bool addr_r(Status *const status, Regex *const regex) {
159 |   status->last_regex = regex;
160 |   regex_t *const regex_obj = &regex->obj;
161 | 
162 |   if (!regex->compiled) {
163 |     assert(regcomp(regex_obj, regex->str, 0) == 0);
164 |     regex->compiled = true;
165 |   }
166 | 
167 |   const char *const pattern_space = status->pattern_space;
168 | 
169 |   return !regexec(regex_obj, pattern_space, 0, NULL, 0);
170 | }
171 | 


--------------------------------------------------------------------------------
/address.h:
--------------------------------------------------------------------------------
 1 | #ifndef ADDRESS_H
 2 | #define ADDRESS_H
 3 | 
 4 | #include <stdbool.h>
 5 | 
 6 | #include "status.h"
 7 | 
 8 | bool addr_nn(const Status *const status, const size_t start, const size_t end);
 9 | bool addr_nr(Status *const status, const size_t start, Regex *const end, const size_t id);
10 | bool addr_rn(Status *const status, Regex *const start, const size_t end, const size_t id);
11 | bool addr_rr(Status *const status, Regex *const start, Regex *const end, const size_t id);
12 | 
13 | bool addr_n(const Status *const status, const size_t line);
14 | bool addr_r(Status *const status, Regex *const regex);
15 | 
16 | #endif /* ADDRESS_H */
17 | 


--------------------------------------------------------------------------------
/compile:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | usage() {
 4 |   cat <<EOF
 5 | usage: $basename [sed script file]
 6 | 
 7 | Translate a sed script to C and compile the result to produce a standalone
 8 | binary reproducing the script behavior.
 9 | 
10 | If no file is provided the sed code to be translated will be read from stdin.
11 | EOF
12 | 
13 |   exit "${1-0}"
14 | }
15 | 
16 | readonly basename="${0##*/}"
17 | 
18 | for arg do
19 |   case "$arg" in
20 |     --help|-h)
21 |         usage
22 |         ;;
23 |       *)
24 |         if ! [ -e "$arg" ]; then
25 |           printf '%s: %s: No such file or directory\n' "$basename" "$arg"
26 |           usage 1
27 |         fi >&2
28 |         ;;
29 |   esac
30 | done
31 | 
32 | readonly translator=./par.sed
33 | readonly generated_file=generated.c
34 | readonly bin="${BIN-./sed-bin}"
35 | 
36 | set -x
37 | 
38 | cat "$@" | "$translator" > "$generated_file"
39 | make
40 | 
41 | { set +x; } 2> /dev/null
42 | 
43 | echo Compiled sed script available: "$bin"
44 | 


--------------------------------------------------------------------------------
/operations.c:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include <ctype.h>
  3 | #include <regex.h>
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | #include <string.h>
  7 | 
  8 | #include "operations.h"
  9 | #include "read.h"
 10 | #include "status.h"
 11 | 
 12 | static size_t expand_replace(
 13 |   char *const replace_expanded,
 14 |   const char *const pattern_space,
 15 |   const char *const replace,
 16 |   const regmatch_t *pmatch
 17 | ) {
 18 |   const size_t replace_len = strlen(replace);
 19 |   bool found_backslash = false;
 20 |   size_t replace_expanded_index = 0;
 21 |   for (size_t replace_index = 0; replace_index < replace_len; ++replace_index) {
 22 |     const char replace_char = replace[replace_index];
 23 |     switch (replace_char) {
 24 |       case '\\':
 25 |         // double backslash case
 26 |         if (found_backslash) {
 27 |           replace_expanded[replace_expanded_index++] = '\\';
 28 |         }
 29 |         found_backslash = !found_backslash;
 30 |         break;
 31 |       case '&':
 32 |         if (!found_backslash) {
 33 |           const regoff_t so = pmatch[0].rm_so;
 34 |           const regoff_t eo = pmatch[0].rm_eo;
 35 |           memmove(
 36 |             replace_expanded + replace_expanded_index,
 37 |             pattern_space + so,
 38 |             eo
 39 |           );
 40 |           replace_expanded_index += eo - so;
 41 |         } else {
 42 |           replace_expanded[replace_expanded_index++] = replace_char;
 43 |           found_backslash = false;
 44 |         }
 45 |         break;
 46 |       case '1':
 47 |       case '2':
 48 |       case '3':
 49 |       case '4':
 50 |       case '5':
 51 |       case '6':
 52 |       case '7':
 53 |       case '8':
 54 |       case '9':
 55 |         if (found_backslash) {
 56 |           const size_t back_ref_index = replace_char - '0';
 57 |           const regoff_t so = pmatch[back_ref_index].rm_so;
 58 |           // case when there is match but the capture group is empty:
 59 |           //   echo foo | sed 's/\(x\)*foo/\1bar/'
 60 |           // here the substitution is done but \1 is empty
 61 |           if (so != -1) {
 62 |             const regoff_t eo = pmatch[back_ref_index].rm_eo;
 63 |             memmove(
 64 |               replace_expanded + replace_expanded_index,
 65 |               pattern_space + so,
 66 |               eo
 67 |             );
 68 |             replace_expanded_index += eo - so;
 69 |           }
 70 |           found_backslash = false;
 71 |         } else {
 72 |           replace_expanded[replace_expanded_index++] = replace_char;
 73 |         }
 74 |         break;
 75 |       default:
 76 |         if (found_backslash) {
 77 |           found_backslash = false;
 78 |           if (replace_char == 'n') {
 79 |             replace_expanded[replace_expanded_index++] = '\n';
 80 |           }
 81 |         } else {
 82 |           replace_expanded[replace_expanded_index++] = replace_char;
 83 |         }
 84 |         break;
 85 |     }
 86 |   }
 87 |   return replace_expanded_index;
 88 | }
 89 | 
 90 | static size_t substitution(
 91 |   regex_t *const regex,
 92 |   char *pattern_space,
 93 |   const char *const replace,
 94 |   size_t *const sub_nb,
 95 |   const size_t nth
 96 | ) {
 97 |   regmatch_t pmatch[MAX_MATCHES];
 98 |   if (regexec(
 99 |         regex,
100 |         pattern_space,
101 |         MAX_MATCHES,
102 |         pmatch,
103 |         *sub_nb > 0 ? REG_NOTBOL : 0
104 |   )) {
105 |     // Can return 0 later as well in cases like s/^//, rely on sub_nb value to
106 |     // check if substitution happened
107 |     return 0;
108 |   }
109 | 
110 |   (*sub_nb)++;
111 | 
112 |   const regoff_t so = pmatch[0].rm_so; // start offset
113 |   assert(so != -1);
114 |   const regoff_t eo = pmatch[0].rm_eo; // end offset
115 |   if (nth > *sub_nb) {
116 |     return eo;
117 |   }
118 |   // TODO arbitrary size, might be too small
119 |   char replace_expanded[PATTERN_SIZE];
120 |   const size_t replace_expanded_len =
121 |     expand_replace(replace_expanded, pattern_space, replace, pmatch);
122 | 
123 |   const size_t pattern_space_len = strlen(pattern_space);
124 |   // empty match, s/^/foo/ for instance
125 |   if (eo == 0) {
126 |     if (*sub_nb == 1) {
127 |       memmove(
128 |         pattern_space + replace_expanded_len,
129 |         pattern_space,
130 |         pattern_space_len + 1 // include \0
131 |       );
132 |       memmove(pattern_space, replace_expanded, replace_expanded_len);
133 |       return replace_expanded_len;
134 |     } else if (pattern_space_len == 1) {
135 |       // case:  echo 'Hello ' | sed 's|[^ ]*|yo|g'
136 |       pattern_space++;
137 |       memmove(pattern_space, replace_expanded, replace_expanded_len);
138 |       pattern_space[replace_expanded_len] = '\0';
139 |       return replace_expanded_len + 1; // +1 since we did pattern_space++
140 |     }
141 |     return 1;
142 |   }
143 | 
144 |   size_t po = 0;
145 |   size_t ro = 0;
146 | 
147 |   for (po = so; po < eo && ro < replace_expanded_len; ++po, ++ro) {
148 |     pattern_space[po] = replace_expanded[ro];
149 |   }
150 | 
151 |   if (po < eo) {
152 |     // Matched part was longer than replaced part, let's shift the rest to the
153 |     // left.
154 |     memmove(
155 |       pattern_space + po,
156 |       pattern_space + eo,
157 |       pattern_space_len - po
158 |     );
159 |     return po;
160 |   } else if (ro < replace_expanded_len) {
161 |     memmove(
162 |       pattern_space + eo + replace_expanded_len - ro,
163 |       pattern_space + eo,
164 |       pattern_space_len - eo
165 |     );
166 |     memmove(
167 |       pattern_space + eo,
168 |       replace_expanded + ro,
169 |       replace_expanded_len - ro
170 |     );
171 | 
172 |     pattern_space[pattern_space_len + replace_expanded_len - (eo - so)] = 0;
173 |     return so + replace_expanded_len;
174 |   }
175 |   return eo;
176 | }
177 | 
178 | void a(Status *const status, const char *const output) {
179 |   Pending_output *const p =
180 |     &status->pending_outputs[status->pending_output_counter++];
181 |   p->is_filepath = false;
182 |   p->direct_output = output;
183 | }
184 | 
185 | void c(Status *const status, const char *const output) {
186 |   char *const pattern_space = status->pattern_space;
187 |   pattern_space[0] = '\0';
188 |   puts(output);
189 | }
190 | 
191 | void d(Status *const status) {
192 |   status->pattern_space[0] = '\0';
193 | }
194 | 
195 | operation_ret D(Status *const status) {
196 |   char *const pattern_space = status->pattern_space;
197 |   const char *const newline_location = strchr(pattern_space, '\n');
198 |   if (newline_location == NULL) {
199 |     pattern_space[0] = '\0';
200 |     return CONTINUE;
201 |   }
202 | 
203 |   // Backward memmove instead of moving the pattern space ptr forward because
204 |   // this would mean losing part of the limited stack space that we have
205 |   memmove(
206 |     pattern_space,
207 |     newline_location + 1, // + 1 to start copying after the newline
208 |     strlen(newline_location + 1) + 1 // last +1 to move \0 as well
209 |   );
210 |   status->skip_read = true;
211 |   return CONTINUE;
212 | }
213 | 
214 | void equal(const Status *const status) {
215 |   const size_t line_nb = status->line_nb;
216 |   printf("%zu\n", line_nb);
217 | }
218 | 
219 | void g(Status *const status) {
220 |   char *const pattern_space = status->pattern_space;
221 |   const char *const hold_space = status->hold_space;
222 |   memcpy(
223 |     pattern_space,
224 |     hold_space,
225 |     strlen(hold_space) + 1 // include \0
226 |   );
227 | }
228 | 
229 | void G(Status *status) {
230 |   char *const pattern_space = status->pattern_space;
231 |   const char *const hold_space = status->hold_space;
232 |   const size_t pattern_space_len = strlen(pattern_space);
233 |   memcpy(
234 |     pattern_space + pattern_space_len + 1, // we'll place the \n in between
235 |     hold_space,
236 |     strlen(hold_space) + 1 // include \0
237 |   );
238 |   pattern_space[pattern_space_len] = '\n';
239 | }
240 | 
241 | void h(Status *status) {
242 |   const char *const pattern_space = status->pattern_space;
243 |   char *const hold_space = status->hold_space;
244 |   memcpy(
245 |     hold_space,
246 |     pattern_space,
247 |     strlen(pattern_space) + 1 // include \0
248 |   );
249 | }
250 | 
251 | void H(Status *status) {
252 |   const char *const pattern_space = status->pattern_space;
253 |   char *const hold_space = status->hold_space;
254 |   const size_t hold_space_len = strlen(hold_space);
255 |   memcpy(
256 |     hold_space + hold_space_len + 1, // we'll place the \n in between
257 |     pattern_space,
258 |     strlen(pattern_space) + 1 // include \0
259 |   );
260 |   hold_space[hold_space_len] = '\n';
261 | }
262 | 
263 | void i(const char *const output) {
264 |   puts(output);
265 | }
266 | 
267 | void l(const Status *const status) {
268 |   const char *const pattern_space = status->pattern_space;
269 |   for (size_t i = 0, fold_counter = 0; pattern_space[i]; ++i, ++fold_counter) {
270 |     const char c = pattern_space[i];
271 |     if (fold_counter > 80) {
272 |       puts("\\");
273 |       fold_counter = 0;
274 |     }
275 |     if (isprint(c)) {
276 |       if (c == '\\') { // needs to be doubled
277 |         putchar('\\');
278 |         fold_counter++;
279 |       }
280 |       putchar(c);
281 |     } else {
282 |       fold_counter++;
283 |       switch (c) {
284 |         case '\n':
285 |           // POSIX states:
286 |           // > [...] '\t', '\v' ) shall be written as the corresponding escape
287 |           // > sequence; the '\n' in that table is not applicable
288 |           //
289 |           // toybox and gnu sed still print newlines as "\n", I'll choose to
290 |           // stick to my understanding of POSIX there.
291 |           puts("$");
292 |           fold_counter = 0;
293 |         case '\a':
294 |           printf("\\a");
295 |           break;
296 |         case '\b':
297 |           printf("\\b");
298 |           break;
299 |         case '\f':
300 |           printf("\\f");
301 |           break;
302 |         case '\r':
303 |           printf("\\r");
304 |           break;
305 |         case '\t':
306 |           printf("\\t");
307 |           break;
308 |         case '\v':
309 |           printf("\\v");
310 |           break;
311 |         default:
312 |           fold_counter += 2; // 3 counting the beginning of the else branch
313 |           printf("\\%03hho", c);
314 |           break;
315 |       }
316 |     }
317 |   }
318 |   puts("$");
319 | }
320 | 
321 | operation_ret n(Status *const status) {
322 |   if (!status->suppress_default_output) {
323 |     puts(status->pattern_space);
324 |   }
325 |   if (!read_pattern(status, status->pattern_space, PATTERN_SIZE)) {
326 |     return BREAK;
327 |   }
328 |   return 0;
329 | }
330 | 
331 | operation_ret N(Status *const status) {
332 |   char *const pattern_space = status->pattern_space;
333 |   const size_t pattern_space_len = strlen(pattern_space);
334 |   if (!read_pattern(
335 |         status,
336 |         pattern_space + pattern_space_len + 1,
337 |         PATTERN_SIZE - pattern_space_len - 1)
338 |   ) {
339 |     return BREAK;
340 |   }
341 |   pattern_space[pattern_space_len] = '\n';
342 |   return 0;
343 | }
344 | 
345 | void p(const Status *const status) {
346 |   const char *const pattern_space = status->pattern_space;
347 |   puts(pattern_space);
348 | }
349 | 
350 | void P(const Status *const status) {
351 |   const char *const pattern_space = status->pattern_space;
352 |   const char *const pattern_space_at_newline = strchr(pattern_space, '\n');
353 |   if (pattern_space_at_newline) {
354 |     const unsigned int first_line_length =
355 |       pattern_space_at_newline - pattern_space;
356 |     printf("%.*s\n", first_line_length, pattern_space);
357 |   } else {
358 |     p(status);
359 |   }
360 | }
361 | 
362 | void q(const Status *const status) {
363 |   if (!status->suppress_default_output) {
364 |     p(status);
365 |   }
366 |   exit(0);
367 | }
368 | 
369 | void r(Status *const status, const char *const filepath) {
370 |   Pending_output *const p =
371 |     &status->pending_outputs[status->pending_output_counter++];
372 |   p->is_filepath = true;
373 |   p->filepath = filepath;
374 | }
375 | 
376 | void s(
377 |   Status *const status,
378 |   Regex *const regex,
379 |   const char *const replace,
380 |   const size_t opts,
381 |   const size_t nth,
382 |   FILE *const f
383 | ) {
384 |   status->last_regex = regex;
385 |   regex_t *const regex_obj = &regex->obj;
386 | 
387 |   if (!regex->compiled) {
388 |     assert(regcomp(regex_obj, regex->str, 0) == 0);
389 |     regex->compiled = true;
390 |   }
391 | 
392 |   const bool opt_g = opts & S_OPT_G;
393 |   const bool opt_p = opts & S_OPT_P;
394 | 
395 |   char *pattern_space = status->pattern_space;
396 |   size_t sub_nb = 0;
397 |   do {
398 |     const size_t initial_sub_nb = sub_nb;
399 |     const size_t pattern_offset = substitution(
400 |       regex_obj,
401 |       pattern_space,
402 |       replace,
403 |       &sub_nb,
404 |       nth
405 |     );
406 |     if (initial_sub_nb == sub_nb) {
407 |       break;
408 |     }
409 |     pattern_space += pattern_offset;
410 |   } while (
411 |     (opt_g || nth > sub_nb) &&
412 |     pattern_space[0]
413 |   );
414 | 
415 |   if (sub_nb >= nth) {
416 |     status->sub_success = true;
417 |     if (opt_p) {
418 |       puts(status->pattern_space);
419 |     }
420 |     w(status, f);
421 |   }
422 | }
423 | 
424 | void w(const Status *const status, FILE *const f) {
425 |   if (f) {
426 |     fputs(status->pattern_space, f);
427 |     fputc('\n', f); // unlike puts, fputs doesn't add a trailing newline
428 | 
429 |     // Potential following reads on the same file within the same sed script
430 |     // should return the up-to-date content, this is used in tests to avoid
431 |     // external checks and is correctly handled by GNU sed
432 |     fflush(f);
433 |   }
434 | }
435 | 
436 | void x(Status *const status) {
437 |   char *const pattern_space = status->pattern_space;
438 |   char *const hold_space = status->hold_space;
439 |   status->pattern_space = hold_space;
440 |   status->hold_space = pattern_space;
441 | }
442 | 
443 | void y(Status *const status, const char *const set1, const char *const set2) {
444 |   char *const pattern_space = status->pattern_space;
445 |   // Not the most efficient, might refactor this if I move to a C++ translation
446 |   for (size_t pattern_index = 0; pattern_space[pattern_index]; ++pattern_index) {
447 |     for (size_t set_index = 0; set1[set_index] && set2[set_index]; ++set_index) {
448 |       if (pattern_space[pattern_index] == set1[set_index]) {
449 |         pattern_space[pattern_index] = set2[set_index];
450 |       }
451 |     }
452 |   }
453 | }
454 | 


--------------------------------------------------------------------------------
/operations.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPERATIONS_H
 2 | #define OPERATIONS_H
 3 | 
 4 | #include <stdbool.h>
 5 | 
 6 | #include "status.h"
 7 | 
 8 | #define S_OPT_G 0x01
 9 | #define S_OPT_P 0x02
10 | 
11 | void a(Status *const status, const char *const output);
12 | void c(Status *const status, const char *const output);
13 | void d(Status *const status);
14 | operation_ret D(Status *const status);
15 | void equal(const Status *const status);
16 | void g(Status *const status);
17 | void G(Status *const status);
18 | void h(Status *const status);
19 | void H(Status *const status);
20 | void i(const char *const output);
21 | void l(const Status *const status);
22 | operation_ret n(Status *const status);
23 | operation_ret N(Status *const status);
24 | void p(const Status *const status);
25 | void P(const Status *const status);
26 | void q(const Status *const status);
27 | void r(Status *const status, const char *const filepath);
28 | void s(
29 |   Status *const status,
30 |   Regex *const regex,
31 |   const char *const replace,
32 |   const size_t opts,
33 |   const size_t nth,
34 |   FILE *const f
35 | );
36 | void w(const Status *const status, FILE *const f);
37 | void x(Status *const status);
38 | void y(Status *const status, const char *const set1, const char *const set2);
39 | 
40 | #endif /* OPERATIONS_H */
41 | 


--------------------------------------------------------------------------------
/par.sed:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/sed -f
  2 | 
  3 | # The first line of the hold space is used for temporary storage in this script,
  4 | # never use it to store data longer than a single command.
  5 | # Second line will act as an id to create unique variable names for w cmd files
  6 | # Same for the third but for regexes
  7 | # 2nd and 3rd lines should be located from the bottom since the hold might grow
  8 | # between the current first and second line.
  9 | 
 10 | 1{
 11 |   x
 12 |   s/.*/\
 13 | 0\
 14 | 0&/
 15 |   x
 16 |   /^#n/{
 17 |     s/.*/status.suppress_default_output = true;/w generated-init.c
 18 |     d
 19 |   }
 20 | }
 21 | 
 22 | : start
 23 | 
 24 | # remove ; and spaces
 25 | s/^[;[:blank:]][;[:blank:]]*//g
 26 | t start
 27 | 
 28 | # If empty line, read the next one
 29 | /^$/{
 30 |   n
 31 |   b start
 32 | }
 33 | 
 34 | # curly braces need to be printed, and then removed
 35 | s/^[{}]/&\
 36 | /
 37 | t curly_bracket_found
 38 | # if this last check failed then we can assume the next chars will be an actual
 39 | # command
 40 | b cmd_check
 41 | 
 42 | : curly_bracket_found
 43 | # print and remove
 44 | P
 45 | s/^..//
 46 | t start
 47 | 
 48 | : cmd_check
 49 | s|^#|//|; t comment
 50 | /^[bt]/{
 51 |   # All labels will be suffixed by "_label" to prevent C reserved words
 52 |   # conflicts. For instance naming a label "break", "continue" or "case" is fine
 53 |   # in sed but not in C.
 54 |   s/^b[[:blank:]]*\([^[:blank:];}][^[:blank:];}]*\)/goto \1_label;\
 55 | /
 56 |   t label_cmds
 57 |   s/^t[[:blank:]]*\([^[:blank:];}][^[:blank:];}]*\)/if (status.sub_success) { status.sub_success = false; goto \1_label; }\
 58 | /
 59 |   t label_cmds
 60 | 
 61 |   s/./&{ if (!status.suppress_default_output) puts(status.pattern_space); continue; }\
 62 | /
 63 |   s/^t/&if (status.sub_success) /
 64 |   s/.//
 65 |   t label_cmds
 66 | }
 67 | # semi-colon needed since declarations cannot directly follow a label in C
 68 | s/^:[[:blank:]]*\([^;}][^[:blank:];}]*\)/\1_label:;\
 69 | /; t label_cmds
 70 | s/^r[[:blank:]]*//; t r_cmd
 71 | s/^w[[:blank:]]*//; t w_cmd
 72 | s/^s//; t s_cmd
 73 | s/^y//; t y_cmd
 74 | s/^[hHgGlpPqx]/&(\&status);\
 75 | /
 76 | t single_char_cmd
 77 | s/^=/equal(\&status);\
 78 | /
 79 | t single_char_cmd
 80 | s/^d/{ &(\&status); continue; }\
 81 | /
 82 | t single_char_cmd
 83 | s/^D/if (&(\&status) == CONTINUE) continue;\
 84 | /
 85 | t single_char_cmd
 86 | s/^[Nn]/if (&(\&status) == BREAK) break;\
 87 | /
 88 | t single_char_cmd
 89 | 
 90 | s/^\([aci]\)[[:blank:]]*\\$/\1/; t aci_cmds
 91 | 
 92 | : address_check
 93 | s|^/|&|; t addr_regex
 94 | s|^\\\(.\)|\1|; t addr_regex
 95 | s/^[0-9]/&/; t addr_number
 96 | s/^\$//; t addr_last_line
 97 | s/^./invalid command: &/
 98 | t fail
 99 | s/.*/Missing command: &/
100 | t fail
101 | 
102 | : comment
103 | p
104 | d
105 | 
106 | : single_char_cmd
107 | P
108 | s/.*\n//
109 | t start
110 | s/.*/single char cmd cleanup: &/
111 | b fail
112 | 
113 | : addr_last_line
114 | H
115 | s/.*/status.last_line_addr_present = true;/w generated-init.c
116 | g
117 | s/.*\n//
118 | x
119 | s/\(.*\)\n.*/\1/
120 | # work on the hold, if second address, do not add a newline (we've already built
121 | # the start of the C code on a new line)
122 | /^[^rn]/s/$/\
123 | /
124 | x
125 | # add address followed by a newline, the pattern should never have more than one
126 | # newline
127 | s/.*/status.last_line_nb\
128 | &/
129 | # save address to hold and strip it from pattern, only leaving rest of the line
130 | H
131 | s/^.*\n//
132 | x
133 | # back to hold
134 | # remove H call newline and the very last line which is the rest of the current
135 | # line.
136 | # include address type at the very top (n).
137 | s/^\([rn]*\)\(.*\)\n\(.*\)\n.*/\1n\2\3/
138 | t valid_s_or_addr_parsing
139 | b fail
140 | 
141 | : addr_number
142 | x
143 | # work on the hold, if second address, do not add a newline (we've already built
144 | # the start of the C code on a new line)
145 | /^[^rn]/s/$/\
146 | /
147 | x
148 | # save address to hold and strip it from pattern, only leaving rest of the line
149 | H
150 | s/^[0-9]*//
151 | x
152 | # back to hold
153 | # remove H call newline and the rest of the line (only keep the number), also
154 | # include address type at the very top (n).
155 | s/^\([rn]*\)\(.*\)\n\([0-9][0-9]*\).*/\1n\2\3/
156 | t valid_s_or_addr_parsing
157 | b fail
158 | 
159 | : addr_regex
160 | x
161 | # s/^/r/ triggers a bug on FreeBSD where the pattern space gets flushed
162 | # entirely if it starts with a newline before executing a s/^/foo/
163 | # substitution. Workaround is to use s/.*/preceding text&/
164 | s/.*/r&/
165 | t regex_start_process
166 | 
167 | : r_cmd
168 | s/["\]/\\&/g
169 | s/.*/r(\&status, "&");/
170 | n
171 | b start
172 | 
173 | : w_cmd
174 | s/["\]/\\&/g
175 | H
176 | x
177 | 
178 | s/\(.*\)\n\(.*\)\n\(.*\)\n\(.*\)/\1\
179 | \2x\
180 | \3\
181 | w(\&status, wfile_\2);\
182 | FILE *const wfile_\2 = open_file(open_file_paths, open_file_handles, "\4");/
183 | 
184 | h
185 | s/\(.*\)\n.*/\1/
186 | x
187 | s/.*\n\(.*\)/\1/
188 | w generated-init.c
189 | g
190 | s/\(.*\)\n.*/\1/
191 | x
192 | s/.*\n\(.*\)/\1/
193 | n
194 | b start
195 | 
196 | : s_cmd
197 | # s cmd needs a scope since for the case:
198 | #   /foo/s/bar/baz/
199 | #   if addr("foo") static reg = ...; s(reg);
200 | # Here static ends up alone in the if, which is no good, so we add a scope:
201 | #   if addr("foo") { static reg = ...; s(reg); }
202 | # This issue cannot happen with addresses since they cannot be chained without
203 | # brackets: /foo//bar/p -> invalid but /foo/{/bar/p} -> valid and not an issue.
204 | i \
205 | {
206 | 
207 | x
208 | # at the top of the hold, track the number of delimiters encountered:
209 | # s/foo -> s0 but s/foo/bar -> s1
210 | s/.*/s0&/
211 | 
212 | t regex_start_process
213 | 
214 | : y_cmd
215 | x
216 | s/.*/y0&/
217 | t regex_start_process
218 | 
219 | : regex_start_process
220 | # insert start of s C code at the bottom of the hold (we omit the name of the
221 | # function since we don't know here if we are currently processing the s command
222 | # or a regex address)
223 | # If we are processing the second address in a range, we want to avoid adding a
224 | # newline since we have the beginning of the C code for this range at the bottom
225 | # of the hold.
226 | /^[rn][rn]/!s/$/\
227 | /
228 | 
229 | # check if this is an empty pattern, in which case we want to use the last one
230 | 
231 | /^[rs]/{
232 |   x
233 |   /^\(.\)\1/{
234 |     s//\1/
235 |     x
236 |     s/$/status.last_regex/
237 |     t regex_valid_delim_eaten
238 |   }
239 |   x
240 | }
241 | /^[srn]/s/$/\
242 | /
243 | s/$/"/
244 | # reset sub success value
245 | t regex_insert_c_start
246 | : regex_insert_c_start
247 | x
248 | 
249 | : regex_eat_next
250 | 
251 | # Nothing on the line except our saved delimiter, meaning we reached the end of
252 | # the line without finding a backslash or the closing delimiter, which is
253 | # invalid
254 | /^.$/{
255 |   s/.*/Missing closing delimiter/
256 |   b fail
257 | }
258 | # Case where we only have our delimiter and a backslash on the line, meaning
259 | # there's a newline in the s command
260 | /^.\\$/{
261 |   # remove escape
262 |   s/\\$//
263 |   x
264 |   # insert literal \n in C code
265 |   s/$/\\n/
266 |   x
267 |   # read next line and remove automatic newline between delim and next line
268 |   N
269 |   s/^\(.\)\n/\1/
270 |   t regex_eat_next
271 | }
272 | 
273 | x
274 | /^\[/{
275 |   x
276 |   s/^\(.\)\(\[:[[:alpha:]][[:alpha:]]*:]\)/\2\
277 | \1/
278 |   t regex_save_char
279 |   /^\(.\)]/{
280 |     s//]\
281 | \1/
282 |     x
283 |     # found end of range, remove our hold mark
284 |     s/\[//
285 |     x
286 |     t regex_save_char
287 |   }
288 |   # Literal double quotes and backslashes must be escaped in the C code
289 |   s/^\(.\)\([\"]\)/\\\2\
290 | \1/
291 |   t regex_save_char
292 |   # any char in a range is litteral
293 |   s/^\(.\)\(.\)/\2\
294 | \1/
295 |   t regex_save_char
296 | }
297 | x
298 | 
299 | # Found our delimiter
300 | /^\(.\)\1/{
301 |   s//\1/
302 |   x
303 |   s/$/"/
304 |   t regex_valid_delim_eaten
305 | }
306 | 
307 | x
308 | # [] ranges are only relevant for BREs
309 | /^[rs][^1]/{
310 |   x
311 |   s/^\(.\)\\\[/\\\\[\
312 | \1/
313 |   t regex_save_char
314 |   /^.\[/{
315 |     # special case of leading closing square bracket in range: []...]
316 |     s/^\(.\)\[]/[]\
317 | \1/
318 |     # special case of negative leading closing square bracket in range: [^]...]
319 |     s/^\(.\)\[^]/[^]\
320 | \1/
321 |     s/^\(.\)\[/[\
322 | \1/
323 |     # found range, save this char at the top of the hold to remember to treat
324 |     # every character literally
325 |     x
326 |     s/.*/[&/
327 |     x
328 |     t regex_save_char
329 |   }
330 |   x
331 | }
332 | x
333 | # case of escaped delimiter s/bin\/bash/bin\/sh/, in that case we just remove
334 | # the backslash and process the char as any non delimiter one.
335 | s/^\(.\)\\\1/\1\
336 | \1/
337 | t regex_save_char
338 | s/^\(.\)\\n/\\n\
339 | \1/
340 | t regex_save_char
341 | s/^\(.\)\(\\\\\)/\2\2\
342 | \1/
343 | t regex_save_char
344 | # TODO, backslash should be removed when no op, example \j -> j
345 | # Literal double quotes and backslashes must be escaped in the C code
346 | s/^\(.\)\([\"]\)/\\\2\
347 | \1/
348 | t regex_save_char
349 | 
350 | # Default case, normal character
351 | s/^\(.\)\(.\)/\2\
352 | \1/
353 | t regex_save_char
354 | 
355 | : regex_save_char
356 | H
357 | # get rid of eaten char and newline
358 | s/.*\n//
359 | x
360 | # On the bottom line we have: {chars eaten}<newline>{delim}{rest of line}, we
361 | # just want to append the {chars eaten} to the end of the line before the last,
362 | # the one containing the C code under construction.
363 | s/\(.*\)\n\(.*\)\n.*/\1\2/
364 | x
365 | t regex_eat_next
366 | 
367 | : regex_valid_delim_eaten
368 | 
369 | # Found second delim for the s cmd
370 | s/^s1\(.*\)$/s\1/
371 | t s_cmd_handle_options
372 | 
373 | # case of regex closing a range: swap chars since we insert from the beginning
374 | s/^r\([rn]\)/\1r/
375 | 
376 | /^y/b skip_regex_creation
377 | # At this point if we do not have a string on the last line then that means
378 | # we're in the last_regex case, skip regex creation
379 | /"$/!b skip_regex_creation
380 | # move the id to the top and increment it
381 | s/\(.*\)\n\(.*\)\n\(.*\)\n\(.*\)/\2\
382 | \1\
383 | \3\
384 | \4/
385 | 
386 | s/\n/|&/
387 | t id_inc_start
388 | : id_inc_start
389 | s/^\([0-9]*\)0|/\11/; t id_inc_end
390 | s/^\([0-9]*\)1|/\12/; t id_inc_end
391 | s/^\([0-9]*\)2|/\13/; t id_inc_end
392 | s/^\([0-9]*\)3|/\14/; t id_inc_end
393 | s/^\([0-9]*\)4|/\15/; t id_inc_end
394 | s/^\([0-9]*\)5|/\16/; t id_inc_end
395 | s/^\([0-9]*\)6|/\17/; t id_inc_end
396 | s/^\([0-9]*\)7|/\18/; t id_inc_end
397 | s/^\([0-9]*\)8|/\19/; t id_inc_end
398 | 
399 | : id_inc_loop
400 | s/^\([0-9]*\)9|/\1|0/
401 | t id_inc_loop
402 | s/^|/1/; t id_inc_end
403 | b id_inc_start
404 | : id_inc_end
405 | 
406 | # id is incremented, move it back down and use it
407 | 
408 | s/^\([0-9]*\)\n\(.*\)\n\(.*\)\n\(.*\)/\2\
409 | \1\
410 | \3\&reg_\1\
411 | static Regex reg_\1 = {.compiled = false, .str = \4};/
412 | # save current line we are working on
413 | G
414 | # save everything to hold
415 | h
416 | # only keep regex declaration and print it
417 | s/.*\n\(.*\)\n.*/\1/p
418 | # restore everything
419 | g
420 | # cleanup line we were working on
421 | s/.*\n//
422 | x
423 | # get rid of regex declaration and saved current line
424 | s/\(.*\)\n.*\n.*/\1/
425 | : skip_regex_creation
426 | s/^y1/y/
427 | 
428 | # Found first delim for the s/y cmd
429 | /^[sy]0/{
430 |   s/^\([sy]\)0\(.*\)$/\11\2, "/
431 |   x
432 |   t regex_eat_next
433 | }
434 | 
435 | x
436 | # remove delim, we don't need to keep it anymore
437 | s/.//
438 | x
439 | t valid_s_or_addr_parsing
440 | 
441 | # POSIX specifies s valid opts are: g, <nth occurence>, w <file> and p
442 | : s_cmd_handle_options
443 | # At this point we don't know yet if there are any options.
444 | # Prepare 3 lines for options: 1st for g and p, 2nd for nth and 3rd for w.
445 | s/$/\
446 | 0\
447 | 1\
448 | NULL/
449 | x
450 | # remove delim, we don't need to keep it anymore
451 | s/.//
452 | t s_cmd_eat_options
453 | 
454 | : s_cmd_eat_options
455 | /^[gp]/{
456 |   s/^g/G/
457 |   s/^p/P/
458 |   s/./&\
459 | /
460 |   # save to hold and remove processed option from pattern
461 |   H
462 |   s/..//
463 |   x
464 |   # process and clean saved line
465 |   s/\(.*\)\n\(.*\)\n\(.*\)\n\(.*\)\n\(.*\)\n.*/\1\
466 | \2 | S_OPT_\5\
467 | \3\
468 | \4/
469 |   x
470 |   t s_cmd_eat_options
471 | }
472 | 
473 | /^[0-9]/{
474 |   s/^[0-9]*/&\
475 | /
476 |   H
477 |   # rm nb
478 |   s///
479 |   # rm newline
480 |   s/.//
481 |   x
482 |   s/\(.*\)\n.*\n\(.*\)\n\(.*\)\n.*/\1\
483 | \3\
484 | \2/
485 |   x
486 |   t s_cmd_eat_options
487 | }
488 | 
489 | /^w/{
490 |   s/^w[[:blank:]]*//
491 |   # w_cmd variation
492 |   s/["\]/\\&/g
493 |   H
494 |   x
495 | 
496 |   # 1 - rest of the top of the hold
497 |   # 2 - id for files
498 |   # 3 - id for regexes
499 |   #   - s cmd call in progress
500 |   #   - g/p opts
501 |   #   - nth
502 |   #   - NULL placeholder for the FILE ptr
503 |   # 4 - filepath
504 | 
505 |   s/\(.*\)\n\(.*\)\n\(.*\n.*\n.*\n.*\)\n.*\n\(.*\)/\1\
506 | \2x\
507 | \3\
508 | wfile_\2\
509 | FILE *const wfile_\2 = open_file(open_file_paths, open_file_handles, "\4");/
510 | 
511 |   # we can overwrite everything since the whole rest of the line is part of the
512 |   # filename
513 |   h
514 |   s/\(.*\)\n.*/\1/
515 |   x
516 |   s/.*\n\(.*\)/\1/
517 |   w generated-init.c
518 |   # clean it as this will be considered as the rest of the current line
519 |   s/.*//
520 |   x
521 |   # no options left since w <file> must be last
522 |   t end_of_s_opts
523 | }
524 | 
525 | x
526 | 
527 | : end_of_s_opts
528 | # 1 - rest of the top of the hold
529 | # 2 - s cmd call in progress
530 | # 3 - g/p opts
531 | # 4 - nth
532 | # 5 - FILE ptr
533 | s/\(.*\)\n\(.*\)\n\(.*\)\n\(.*\)\n\(.*\)/\1\
534 | \2, \3, \4, \5/
535 | 
536 | b valid_s_or_addr_parsing
537 | 
538 | : valid_s_or_addr_parsing
539 | /^[rn]/{
540 |   # range, we'll append the LINE macro which will act as an unique id (this is
541 |   # "better" than COUNTER (since LINE is standardized) as long as we can
542 |   # guarantee that we'll never generate two range calls on the same line, that's
543 |   # why using the "=" command is not an option)
544 |   /^.[rn]/{
545 |     # number,number ranges do not need an id since the line number is fixed
546 |     # during each whole cycle
547 |     /^nn/!s/$/, __LINE__/
548 |     t s_or_addr_close_function
549 |   }
550 |   # single address, we need to check if another one follows
551 |   x
552 |   s/^[[:blank:]]*,[[:blank:]]*\([^[:blank:]]\)/\1/
553 |   x
554 |   t append_comma
555 |   b s_or_addr_close_function
556 | 
557 |   : append_comma
558 |   s/$/, /
559 |   x
560 |   t address_check
561 | }
562 | 
563 | : s_or_addr_close_function
564 | # close C function call + add ";" if not an address
565 | s/$/)/
566 | /^[sy]/s/$/;/
567 | x
568 | # negative address
569 | /^[[:blank:]]*!/{
570 |   s///
571 |   x
572 |   # invert result with xor, unfortunately C and sed negation are on the opposite
573 |   # side of the operand, so we'll do with that for now.
574 |   s/$/ ^ true/
575 |   x
576 | }
577 | # push remaining current line on hold
578 | H
579 | # clean hold
580 | # 1st line is hold unrelated to the current processing (except for the leading
581 | # command name)
582 | # 2nd line is the C code that we need to print, we'll swap it last
583 | # 3rd line is the rest of the line on which the s cmd was
584 | g
585 | s/^\(.*\)\n\(.*\)\n\(.*\)$/\1\
586 | \3\
587 | \2/
588 | 
589 | # save result to hold
590 | h
591 | # get rid of everything except C code (which is last), and print it, this is
592 | # also where we actually complete the name and fixed args of the function.
593 | # The very top of the hold contains the info needed to generate the correct
594 | # function name
595 | s/^\([^[:space:]]*\).*\n/\1(\&status, /
596 | s/^[nr].*/if (addr_&)/
597 | /^s/s/$/\
598 | }/
599 | p
600 | # clean the C code from the hold
601 | g
602 | s/^\(.*\)\n.*/\1/
603 | h
604 | # hold still contains current line, we need to remove it from there and also
605 | # have it in the pattern space
606 | s/.*\n//
607 | x
608 | s/\(.*\)\n.*/\1/
609 | # reset sub success value
610 | t valid_hold_reorder
611 | : valid_hold_reorder
612 | 
613 | # clean temp chars at the top of the hold
614 | s/^[^[:space:]]*//
615 | x
616 | t start
617 | b fail
618 | 
619 | : label_cmds
620 | P
621 | s/.*\n//
622 | t start
623 | s/.*/label cmds cleanup: &/
624 | b fail
625 | 
626 | : aci_cmds
627 | N
628 | s/\\$//
629 | t aci_cmds
630 | # remove first newline
631 | s/\n//
632 | # "\n" -> '\n'
633 | s/\\n/\
634 | /g
635 | # \<any char> -> <any char>
636 | s/\\\(.\)/\1/g
637 | # quotes and backslashes must be escaped for the C
638 | s/[\"]/\\&/g
639 | # '\n' -> "\n" for the C
640 | s/\n/\\n/g
641 | s/^i\(.*\)/i("\1");/
642 | s/^a\(.*\)/a(\&status, "\1");/
643 | s/^c\(.*\)/{ c(\&status, "\1"); continue; }/
644 | n
645 | b start
646 | 
647 | : fail
648 | s/.*/#error Translation failure - &/
649 | q
650 | 


--------------------------------------------------------------------------------
/read.c:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include <stdio.h>
 3 | #include <string.h>
 4 | 
 5 | #include "status.h"
 6 | 
 7 | #define READ_CHUNK_SIZE 512
 8 | 
 9 | static void handle_pending_ouput(Status *const status) {
10 |   for (size_t i = 0; i < status->pending_output_counter; ++i) {
11 |     const Pending_output p = status->pending_outputs[i];
12 |     if (p.is_filepath) {
13 |       FILE *const f = fopen(p.filepath, "r");
14 |       if (!f) {
15 |         continue;
16 |       }
17 |       char *read_chunk[READ_CHUNK_SIZE];
18 |       size_t nread;
19 |       while ((nread = fread(read_chunk, 1, READ_CHUNK_SIZE, f)) > 0) {
20 |         fwrite(read_chunk, 1, nread, stdout);
21 |       }
22 |       fclose(f);
23 |     } else {
24 |       puts(p.direct_output);
25 |     }
26 |   }
27 |   status->pending_output_counter = 0;
28 | }
29 | 
30 | bool read_pattern(Status *const status, char *const buf, const size_t size) {
31 |   handle_pending_ouput(status);
32 | 
33 |   if (status->last_line_addr_present &&
34 |       status->line_nb > 0 &&
35 |       status->line_nb == status->last_line_nb) {
36 |     return 0;
37 |   }
38 | 
39 |   size_t read_len;
40 |   if (!status->last_line_addr_present || status->line_nb == 0) {
41 |     if (!fgets(buf, size, stdin)) {
42 |       return 0;
43 |     }
44 |     read_len = strlen(buf);
45 |   } else {
46 |     read_len = strlen(status->next_line);
47 |     if (read_len) {
48 |       memcpy(buf, status->next_line, read_len);
49 |     }
50 |   }
51 | 
52 |   status->sub_success = false;
53 |   status->line_nb++;
54 | 
55 |   // try to read the next line, if we fail then that means that the current line
56 |   // is the last one
57 |   if (status->last_line_addr_present &&
58 |       !fgets(status->next_line, size, stdin)) {
59 |     status->last_line_nb = status->line_nb;
60 |   }
61 | 
62 |   // fgets includes the newline, remove it
63 |   if (read_len && buf[read_len - 1] == '\n') {
64 |     buf[read_len - 1] = 0;
65 |   }
66 |   return 1;
67 | }
68 | 


--------------------------------------------------------------------------------
/read.h:
--------------------------------------------------------------------------------
 1 | #ifndef READ_H
 2 | #define READ_H
 3 | 
 4 | #include <stdbool.h>
 5 | 
 6 | #include "status.h"
 7 | 
 8 | bool read_pattern(Status *const status, char *const buf, const size_t size);
 9 | #endif /* READ_H */
10 | 


--------------------------------------------------------------------------------
/samples/binary-add.sed:
--------------------------------------------------------------------------------
 1 | #!/bin/sed -f
 2 | 
 3 | # Example usage:
 4 | # sh$ $ echo 1 + 10 + 0 + 11  | sed -f ./samples/binary-add.sed
 5 | # 0
 6 | # 1
 7 | # 1
 8 | 
 9 | # bc version:
10 | # sh$ echo 'ibase=2; obase=2; 1 + 10 + 0 + 11' | bc
11 | # 110
12 | 
13 | s/[[:blank:]]//g
14 | h
15 | : start
16 | s/[01]+/+/g
17 | s/[01]$//g
18 | s/++*/+/g
19 | s/^+*//
20 | s/+*$//
21 | x
22 | s/[01]*\([01]\)+/\1+/g
23 | s/[01]*\([01]\)$/\1/g
24 | 
25 | t reduce
26 | : reduce
27 | s/0+\([01]\)/\1/; t reduce
28 | s/\([01]\)+0/\1/; t reduce
29 | 
30 | /^$/d
31 | 
32 | s/1+1/0/
33 | t handle_carry
34 | 
35 | p
36 | g
37 | b start
38 | 
39 | : handle_carry
40 | x
41 | s/^/1+/
42 | s/+$//
43 | x
44 | b reduce
45 | 


--------------------------------------------------------------------------------
/samples/generate-table-of-contents.sed:
--------------------------------------------------------------------------------
 1 | #!/bin/sed -f
 2 | 
 3 | # Generate table of contents with links for markdown files
 4 | # Usage: sed -f <this-script> <mardown file>
 5 | 
 6 | # ignore code blocks
 7 | /^```/,/^```/d
 8 | 
 9 | # no need to index ourselves
10 | /^# Table of contents/d
11 | 
12 | # found heading
13 | /^#/{
14 |   # save our line and first work on the actual URI
15 |   h
16 |   # strip leading blanks
17 |   s/^#*[[:blank:]]*//
18 |   s/[[:blank:]]/-/g
19 |   # punctuation and anything funky gets lost
20 |   s/[^-[:alnum:]]//g
21 |   # swap with hold and work on the displayed title
22 |   x
23 |   # get rid of last leading # and potential white spaces
24 |   s/^\(#\)*#[[:blank:]]*/\1/
25 |   # the remaining leading # (if any) will be used for indentation
26 |   s/#/  /g
27 |   # prepare the first half of the markdown
28 |   s/\( *\)\(.*\)/\1* [\2](#/
29 |   # append the link kept and remove the newline
30 |   G
31 |   s/\(.*\)[[:space:]]\(.*\)/\1\2)/p
32 | }
33 | d
34 | 


--------------------------------------------------------------------------------
/samples/tic-tac-toe.sed:
--------------------------------------------------------------------------------
  1 | #!/bin/sed -f
  2 | 
  3 | 1c \
  4 | Press enter to start playing\
  5 | \
  6 | Controls:\
  7 |   a => move left\
  8 |   d => move right\
  9 |   enter => place your choice
 10 | 
 11 | 2{
 12 |   s/.*/x\
 13 | -------\
 14 | |?| | |\
 15 | -------\
 16 | | | | |\
 17 | -------\
 18 | | | | |\
 19 | -------/
 20 |   t next
 21 | }
 22 | 
 23 | s/a//; t left
 24 | s/d//; t right
 25 | s/^$//; t enter
 26 | 
 27 | d
 28 | 
 29 | :left
 30 | g
 31 | s/\(.*\) \([^?]*\)?/\1?\2 /; t next
 32 | d
 33 | 
 34 | :right
 35 | g
 36 | s/?\([^ ]*\) / \1?/; t next
 37 | d
 38 | 
 39 | :enter
 40 | g
 41 | s/^\(.\)\(.*\)?/\1\2\1/; t check_victory
 42 | 
 43 | :placed
 44 | s/ /?/; t switch_player_indicator
 45 | b draw
 46 | 
 47 | :switch_player_indicator
 48 | s/^x/o/; t next
 49 | s/^o/x/; t next
 50 | 
 51 | :next
 52 | h
 53 | s/^.//
 54 | p
 55 | d
 56 | 
 57 | :win
 58 | s/^\(.\)\(.*\)/\2\
 59 | \
 60 | Winner: \1/
 61 | q
 62 | 
 63 | :draw
 64 | s/^\(.\)\(.*\)/\2\
 65 | \
 66 | Draw/
 67 | q
 68 | 
 69 | :check_victory
 70 | /\([xo]\)|\1|\1/{
 71 |   b win
 72 | }
 73 | 
 74 | /-------\
 75 | |\([xo]\)|.|.|\
 76 | -------\
 77 | |\1|.|.|\
 78 | -------\
 79 | |\1|.|.|\
 80 | -------/{
 81 |   b win
 82 | }
 83 | 
 84 | /-------\
 85 | |.|\([xo]\)|.|\
 86 | -------\
 87 | |.|\1|.|\
 88 | -------\
 89 | |.|\1|.|\
 90 | -------/{
 91 |   b win
 92 | }
 93 | 
 94 | /-------\
 95 | |.|.|\([xo]\)|\
 96 | -------\
 97 | |.|.|\1|\
 98 | -------\
 99 | |.|.|\1|\
100 | -------/{
101 |   b win
102 | }
103 | 
104 | /-------\
105 | |\([xo]\)|.|.|\
106 | -------\
107 | |.|\1|.|\
108 | -------\
109 | |.|.|\1|\
110 | -------/{
111 |   b win
112 | }
113 | 
114 | /-------\
115 | |.|.|\([xo]\)|\
116 | -------\
117 | |.|\1|.|\
118 | -------\
119 | |\1|.|.|\
120 | -------/{
121 |   b win
122 | }
123 | 
124 | b placed
125 | 


--------------------------------------------------------------------------------
/sed:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | pwd0=$PWD
 4 | cd -P -- "${0%/*}/"
 5 | 
 6 | usage() {
 7 |   basename="${0##*/}"
 8 |   cat << EOF
 9 | An implementation of sed based on C translation.
10 | 
11 | Usage:
12 | 
13 |   sed [-n] script [file...]
14 | 
15 |   sed [-n] -e script [-e script]... [-f script_file]... [file...]
16 | 
17 |   sed [-n] [-e script]... -f script_file [-f script_file]...  [file...]
18 | EOF
19 |   exit "${1-0}"
20 | }
21 | 
22 | bin="${BIN-./sed-bin}"
23 | default_translator=./par.sed
24 | translator="${SED_TRANSLATOR-$default_translator}"
25 | generated_file=generated.c
26 | 
27 | nb_args="$#"
28 | e_opt_found=false
29 | f_opt_found=false
30 | n_opt_found=false
31 | no_opt_script_found=false
32 | script=
33 | while [ "$nb_args" -gt 0 ]; do
34 |   case "$1" in
35 |     -e)
36 |       e_opt_found=true
37 |       if "$f_opt_found"; then
38 |         usage 1 >&2
39 |       fi
40 | 
41 |       shift; nb_args="$((nb_args - 1))"
42 |       script="$script
43 | $1"
44 |       ;;
45 |     -f)
46 |       f_opt_found=true
47 |       shift; nb_args="$((nb_args - 1))"
48 |       script="$script
49 | $(cd "$pwd0"; cat "$1")"
50 |       ;;
51 |     -h|--help)
52 |       usage
53 |       ;;
54 |     -n)
55 |       n_opt_found=true
56 |       ;;
57 |     *)
58 |       if "$e_opt_found" || "$f_opt_found"; then
59 |         break
60 |       else
61 |         no_opt_script_found=true
62 |         script="$1"
63 |         shift
64 |         break
65 |       fi
66 |       ;;
67 |   esac
68 |   shift; nb_args="$((nb_args - 1))"
69 | done
70 | 
71 | if "$e_opt_found" || "$f_opt_found"; then
72 |   # delete extra leading newline, this is important for #n handling
73 |   script="${script#?}"
74 | elif ! "$no_opt_script_found"; then
75 |   usage 1 >&2
76 | fi
77 | 
78 | printf '%s\n' "$script" | "$translator" > "$generated_file" &&
79 |   make -s &&
80 |   cat "$@" | {
81 |     set --
82 |     if "$n_opt_found"; then
83 |       set -- -n
84 |     fi
85 |     case "$bin" in
86 |       /*)
87 |         set -- "$bin" "$@"
88 |         ;;
89 |       *)
90 |         set -- ./"$bin" "$@"
91 |         ;;
92 |     esac
93 |     "$@"
94 |   }
95 | 


--------------------------------------------------------------------------------
/sed-bin.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <limits.h>
 3 | #include <stdbool.h>
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <string.h>
 7 | 
 8 | #include "address.h"
 9 | #include "operations.h"
10 | #include "read.h"
11 | #include "status.h"
12 | 
13 | static FILE *open_file(
14 |   const char **const open_file_paths,
15 |   FILE **const open_file_handles,
16 |   const char *const filepath
17 | ) {
18 |   size_t i;
19 |   for (i = 0; open_file_paths[i]; ++i) {
20 |     if (open_file_paths[i] == filepath) {
21 |       return open_file_handles[i];
22 |     }
23 |     // No opened file and maxed out opened file capacity
24 |     assert(i < MAX_WFILES);
25 |   }
26 |   open_file_paths[i] = filepath;
27 |   FILE *const file_handle = fopen(filepath, "w");
28 |   assert(file_handle);
29 |   open_file_handles[i] = file_handle;
30 |   return file_handle;
31 | }
32 | 
33 | int main(int argc, char **argv) {
34 |   Status status = {
35 |     .pattern_space = (char[PATTERN_SIZE]){0},
36 |     .hold_space = (char[PATTERN_SIZE]){0},
37 |     .sub_success = false,
38 |     .line_nb = 0,
39 |     .last_line_nb = UINT_MAX,
40 |     .skip_read = false,
41 |     .last_regex = NULL,
42 |     .range_ids = (size_t [MAX_ACTIVE_RANGES]){0},
43 |     .suppressed_range_ids = (size_t [MAX_ACTIVE_RANGES]){0},
44 |     .pending_outputs = (Pending_output[MAX_PENDING_OUTPUT]){{0}},
45 |     .pending_output_counter = 0,
46 |     .next_line = (char[PATTERN_SIZE]){0},
47 |     .last_line_addr_present = false,
48 |     .suppress_default_output = false,
49 |   };
50 | 
51 |   if (argc > 1) {
52 |     assert(strcmp(argv[1], "-n") == 0 && argc == 2);
53 |     status.suppress_default_output = true;
54 |   }
55 | 
56 |   const char *open_file_paths[MAX_WFILES] = {NULL};
57 |   FILE *open_file_handles[MAX_WFILES] = {NULL};
58 | 
59 |   #include "generated-init.c"
60 | 
61 |   while (true) {
62 |     if (status.skip_read) {
63 |       status.skip_read = false;
64 |     } else if (!read_pattern(&status, status.pattern_space, PATTERN_SIZE)) {
65 |       break;
66 |     }
67 |     status.skip_read = false;
68 |     #include "generated.c"
69 |     if (!status.suppress_default_output) {
70 |       puts(status.pattern_space);
71 |     }
72 |   }
73 |   return EXIT_SUCCESS;
74 | }
75 | 


--------------------------------------------------------------------------------
/status.h:
--------------------------------------------------------------------------------
 1 | #ifndef STATUS_H
 2 | #define STATUS_H
 3 | 
 4 | #define PATTERN_SIZE 8192
 5 | #define MAX_MATCHES 10
 6 | #define MAX_ACTIVE_RANGES 100
 7 | #define MAX_PENDING_OUTPUT 100
 8 | #define MAX_WFILES 10
 9 | 
10 | typedef enum {
11 |   CONTINUE,
12 |   BREAK
13 | } operation_ret;
14 | 
15 | #include <stdbool.h>
16 | 
17 | typedef struct {
18 |   bool is_filepath;
19 |   union {
20 |     const char *direct_output; // resulting from a cmd
21 |     const char *filepath; // resulting from r cmd
22 |   };
23 | } Pending_output;
24 | 
25 | #include <regex.h>
26 | 
27 | typedef struct {
28 |   bool compiled;
29 |   union {
30 |     const char *str;
31 |     regex_t obj;
32 |   };
33 | } Regex;
34 | 
35 | typedef struct {
36 |   char *pattern_space;
37 |   char *hold_space;
38 |   bool sub_success;
39 |   size_t line_nb;
40 |   size_t last_line_nb;
41 |   bool skip_read;
42 |   Regex *last_regex;
43 |   size_t *const range_ids;
44 |   size_t *const suppressed_range_ids;
45 |   Pending_output *const pending_outputs;
46 |   size_t pending_output_counter;
47 |   char *const next_line;
48 |   bool last_line_addr_present;
49 |   bool suppress_default_output;
50 | } Status;
51 | 
52 | #endif /* STATUS_H */
53 | 


--------------------------------------------------------------------------------
/test:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | # testlib taken from https://github.com/lhoursquentin/clash
  4 | c_red="$(printf '\033[1;31m')"
  5 | c_green="$(printf '\033[1;32m')"
  6 | c_bold="$(printf '\033[1m')"
  7 | c_reset="$(printf '\033[0m')"
  8 | 
  9 | condition()
 10 | {
 11 |   nb_args="$#"
 12 |   set -- "$@" "$@"
 13 |   index=0
 14 |   while [ "$index" -lt "$nb_args" ]; do
 15 |     if [ "$1" = '--' ]; then
 16 |       separator_index="$index"
 17 |     fi
 18 |     index="$((index + 1))"
 19 |     shift
 20 |   done
 21 |   index=0
 22 |   while [ "$index" != "$separator_index" ]; do
 23 |     set -- "$@" "$1"
 24 |     index="$((index + 1))"
 25 |     shift
 26 |   done
 27 |   shift "$(($# - separator_index))"
 28 |   "$@" || return # return for set -e
 29 | }
 30 | 
 31 | assert() {
 32 |   condition_retval=0
 33 |   condition "$@" || condition_retval="$?" # for set -e
 34 |   if [ "$condition_retval" "$assert_operator" 0 ]; then
 35 |     return
 36 |   fi
 37 | 
 38 |   assertion="$*"
 39 |   shift "$((separator_index + 1))"
 40 |   {
 41 |     [ -t 2 ] && color=true || unset color
 42 |     printf '%sError%s - ' "${color+$c_red}" "${color+$c_reset}"
 43 |     printf "$@"
 44 |     echo
 45 |   } >&2
 46 |   failed_assertions="$failed_assertions
 47 | $assertion
 48 | -------------------------------------"
 49 |   return 1
 50 | }
 51 | 
 52 | assert_true() {
 53 |   printf '%s -> true\n' "$*"
 54 |   assert_operator='='
 55 |   assert "$@"
 56 | }
 57 | 
 58 | assert_false() {
 59 |   printf '%s -> false\n' "$*"
 60 |   assert_operator='!='
 61 |   assert "$@"
 62 | }
 63 | 
 64 | assert_out() {
 65 |   printf '%s\n' "$*"
 66 |   out_file="$(mktemp)"
 67 |   condition "$@" > "$out_file"
 68 |   shift "$((separator_index + 1))"
 69 |   output="$(cat "$out_file"; printf x)"
 70 |   output="${output%x}"
 71 |   rm "$out_file"
 72 |   assert_operator='='
 73 |   assert [ "$output" = "$*" ] -- 'Output differ'
 74 | }
 75 | 
 76 | end_tests() {
 77 |   [ -t 1 ] && color=true || unset color
 78 |   printf '%s================ END ================%s\n' "${color+$c_bold}" "${color+$c_reset}"
 79 |   if [ -z "$failed_assertions" ]; then
 80 |     printf '%sPASS%s\n' "${color+$c_green}" "${color+$c_reset}"
 81 |   else
 82 |     printf '%s\n%sFAIL%s\n' "$failed_assertions" "${color+$c_red}" "${color+$c_reset}"
 83 |     return 1
 84 |   fi
 85 | }
 86 | 
 87 | # end of testlib
 88 | 
 89 | bin=./sed-bin
 90 | translator=./par.sed
 91 | generated_file=generated.c
 92 | generated_init_file=generated-init.c
 93 | 
 94 | trap 'rm -f "$bin_generated_file_output" "$previous_generated_init_file" "$expected_output"' EXIT
 95 | expected_output="$(mktemp)"
 96 | bin_generated_file_output="$(mktemp)"
 97 | previous_generated_init_file="$(mktemp)"
 98 | # toybox, busybox, bsd sed and gnu sed are the most common targets
 99 | sed_implementation='sed' # requiring an IFS split rules out default zsh
100 | # --posix is usually for the gnu version
101 | if "$sed_implementation" --posix '' /dev/null 2> /dev/null; then
102 |   sed_implementation="$sed_implementation --posix"
103 | fi
104 | 
105 | compile_translator() {
106 |   # Tranlate the translator with itself
107 |   "$translator" < "$translator" > "$generated_file" &&
108 |   # So after doing some experiments, it seems that on FreeBSD the inode might
109 |   # be the exact same when re-creating the file very quicky, and in that case
110 |   # if the timestamp is also the same, meaning we recreate the file in less
111 |   # than one second, then make concludes that everything is up-to-date. The
112 |   # workaround here to force make to re-compile sed-bin.c is to remove sed-bin
113 |   # and sed-bin.o, which would have to be built again anyways.
114 |   rm -f sed-bin.o sed-bin &&
115 |   make -s &&
116 |   cp "$generated_init_file" "$previous_generated_init_file" &&
117 |   # Compiled tranlator should be able to translate the translator and yield the
118 |   # exact same output
119 |   "$bin" < "$translator" > "$bin_generated_file_output" &&
120 |   diff "$generated_file" "$bin_generated_file_output" &&
121 |   diff "$previous_generated_init_file" "$generated_init_file"
122 | }
123 | 
124 | equal_output() {
125 |   echo --------------------------
126 | 
127 |   printf '%s\n' "$input" | timeout 20 $sed_implementation $n_opt "$sed_code" > "$expected_output" &&
128 |   printf '%s\n' "$sed_code" | timeout 20 "$translator" > "$generated_file" &&
129 |   rm -f sed-bin.o sed-bin &&
130 |   make -s &&
131 |   printf '%s\n' "$input" | timeout 20 "$bin" $n_opt > "$bin_generated_file_output" &&
132 |   diff "$expected_output" "$bin_generated_file_output"
133 | }
134 | 
135 | verify() {
136 |   sed_code="$1"
137 |   input="$2"
138 |   n_opt=
139 |   assert_true equal_output "$sed_code" -- "input: $input"
140 |   n_opt=-n
141 |   assert_true equal_output "$n_opt" "$sed_code" -- "input: $input"
142 |   echo ==========================
143 | }
144 | 
145 | # Replace by smaller
146 | verify 's/Hell/Y/' 'Hello World'
147 | verify 's/llo W/y w/' 'Hello World'
148 | verify 's/ World/!/' 'Hello World'
149 | # Replace by bigger
150 | verify 's/Hello/What in the/' 'Hello World'
151 | verify 's/o/ what is wrong with this/' 'Hello World'
152 | verify 's/ld/k! Oh no :(/' 'Hello World'
153 | # Replace by same size
154 | verify 's/Hello/Yolo!/' 'Hello World'
155 | verify 's/o/a/' 'Hello World'
156 | verify 's/ld/k!/' 'Hello World'
157 | # With regex
158 | verify 's/[hH]/Well h/' 'Hello World'
159 | verify 's/. ./a-w/' 'Hello World'
160 | verify 's/o.*/!/' 'Hello World'
161 | verify 's/.*//' 'Hello World'
162 | verify 's/.*/Bye/' 'Hello World'
163 | verify 's/[Hh]ello.* World*//' 'Hello World'
164 | verify 's/Hello World/Bye/' 'Hello World'
165 | verify 's/Hello World//' 'Hello World'
166 | # Fail
167 | verify 's/xz/nope/' 'Hello World'
168 | # Back refs
169 | verify 's/\([^ ]*\)/\1! \1/' 'Hello World'
170 | verify 's/\([^ ]*\) \(.\)/\1! \2 /' 'Hello World'
171 | verify 's/.*r/& & &/' 'Hello World'
172 | verify 's/Hello/\& literal ampersand/' 'Hello World'
173 | verify 's/\(x\)*foo/b\1ar/' 'foo'
174 | verify 's/\(x\)*foo/b\1ar/' 'xfoo'
175 | verify 's/\(x\)*foo/b\1ar/' 'xxfoo'
176 | verify 's/\(x\)*foo/\1/' 'foo'
177 | verify 's/\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)/\1\2\3\4\5\6\7\8\9/' '123456789'
178 | # g opt
179 | verify 's/o/a/g' 'Hello World'
180 | verify 's/o/abocde/g' 'Hello World'
181 | verify 's/o/ yo/g' 'Hello World'
182 | verify 's/damn/no/g' 'Oh damn god damnit'
183 | verify 's/damn/n/g' 'Oh damn god damnit'
184 | verify 's/.*/Bye/g' 'Hello World'
185 | verify 's/\([^ ]*\)/\1! \1/g' 'Hello World'
186 | verify 's/[^a]/foo/g' 'bar'
187 | verify 's/^a/foo/g' 'aaaa'
188 | verify 's/a//g' 'aaaa'
189 | verify 's/a//g' 'abaca'
190 | 
191 | # p opt
192 | verify 's/foo/bar/p;d' 'padding foo padding'
193 | verify 's/foo/bar/pg;d' 'padding foo padding foo padding'
194 | verify 's/foo/bar/gp;d' 'padding foo padding foo padding'
195 | verify 's/^/hey/p;d' 'foo'
196 | verify 's/^//p;d' 'foo'
197 | 
198 | # s with \n
199 | verify 'N; s/^foo\nbar/yo\nlo/g' 'foo
200 | bar'
201 | 
202 | # s empty match check
203 | verify 's/[^ ]*/yo/g' 'Hello world'
204 | verify 's/[^ ]*/yo/g' 'Hello '
205 | verify 's/[^ ]*/yo/' 'foo'
206 | 
207 | # s insert at beginning and end
208 | verify 's/^/Leading insert/' 'Hello World'
209 | verify 's/$/Trailing insert/' 'Hello World'
210 | 
211 | verify 's/x/y/3' 'axbxcxdxex
212 | fxgxhxixjx'
213 | verify 's/x/y/3g' 'axbxcxdxex
214 | fxgxhxixjx'
215 | verify 's/x/y/p3g' 'axbxcxdxex
216 | fxgxhxixjx'
217 | verify 's/x/y/3pg' 'axbxcxdxex
218 | fxgxhxixjx'
219 | verify 's/x/y/pg3' 'axbxcxdxex
220 | fxgxhxixjx'
221 | verify 's/x/y/4; t
222 | d' 'axbxcxdxex
223 | fxgxhxixjx'
224 | # sub success should not be set if the nth is never reached
225 | verify 's/x/y/40; t
226 | d' 'axbxcxdxex
227 | fxgxhxixjx'
228 | # check we can parse nth over 9
229 | verify 's/x/y/12' 'xxxxxxxxxxxxxxxxxxxx'
230 | 
231 | test_file="$(mktemp)"
232 | rm "$test_file"
233 | 
234 | verify 's/x/y/pg3w '"$test_file"'
235 | r '"$test_file" 'axbxcxdxex
236 | fxgxhxixjx'
237 | rm "$test_file"
238 | 
239 | verify 's/non-matching/y/pg3w '"$test_file"'
240 | r '"$test_file" 'axbxcxdxex
241 | fxgxhxixjx'
242 | rm "$test_file"
243 | 
244 | # t
245 | verify ': start
246 | s/^[^[:blank:]]//; t start' 'Hello World'
247 | 
248 | # Label name not taking the whole line is not supported by FreeBSD.
249 | # Check that we support it anyways.
250 | 
251 | # t, verify that reading a newline resets the substitution success to false
252 | verify 't end; s/foo//; d; : end' 'foo
253 | bar'
254 | verify 't end
255 | s/foo//; d; : end' 'foo
256 | bar'
257 | verify 's/.*/yo/; n; t end
258 | p; : end
259 | d' 'foo
260 | bar'
261 | verify 's/.*/yo/; N; t end
262 | p; : end
263 | d' 'foo
264 | bar'
265 | verify 's/^/x/; t; s/^/y/' 'foo
266 | bar'
267 | verify 's/^/x/; t   ; s/^/y/' 'foo
268 | bar'
269 | verify 's/^/x/; t
270 | s/^/y/' 'foo
271 | bar'
272 | verify 's/non matching pattern/x/; t; s/^/y/' 'foo
273 | bar'
274 | verify 's/non matching pattern/x/; t ; s/^/y/' 'foo
275 | bar'
276 | verify 's/non matching pattern/x/; t
277 | s/^/y/' 'foo
278 | bar'
279 | 
280 | 
281 | # b
282 | verify ':start
283 | s/.//; /^[^[:blank:]]/b start' 'Hello World'
284 | verify ':start; s/.//; /^[^[:blank:]]/b start' 'Hello World'
285 | verify 's/^/x/; b; s/^/y/' 'foo
286 | bar'
287 | verify 's/^/x/; b   ; s/^/y/' 'foo
288 | bar'
289 | verify 's/^/x/; b
290 | s/^/y/' 'foo
291 | bar'
292 | 
293 | # H and G
294 | verify 'H;H;G;' 'Hello World'
295 | 
296 | # p and P
297 | verify 'h; s/ World//; x; s/Hello //; H; s/.*/end/; H; g; p' 'Hello World'
298 | verify 'h; s/ World//; x; s/Hello //; H; s/.*/end/; H; g; P' 'Hello World'
299 | verify P foo
300 | 
301 | # = equal
302 | verify '=' 'Abc'
303 | verify '=' 'This line
304 | is not a
305 | single line'
306 | 
307 | # d and D
308 | verify 's/Hello/foo/; t end
309 | d; s/foo/bar/; :end' 'Hello
310 | World
311 | Hello
312 | You'
313 | verify 's/Hello/foo/; t end
314 | D; s/foo/bar/; :end' 'Hello
315 | World
316 | Hello
317 | You'
318 | verify '/^hey/{=;q;}; N; s/bar/hey/; D; s/.*/unreach/; p' 'foo
319 | bar
320 | baz'
321 | verify '/match fail/d; s/foo/bar/' 'foo'
322 | 
323 | # This one is actually a meaningful use of D (combined with P), kind of an
324 | # unreadable upgrade of `grep -o`
325 | verify 't match
326 | s/[^:]*: \(...\)/\
327 | \1\
328 | /
329 | D
330 | : match
331 | P
332 | D
333 | ' 'stuff: foo, other stuff: bar
334 | some stuff: baz
335 | unrelated
336 | final stuff: fun'
337 | 
338 | # n and N
339 | verify 's/Hello/foo/; n; s/World/bar/; q' 'Hello
340 | World'
341 | verify ': start
342 | s/^.//; N; b start' 'This line
343 | is not a
344 | single line'
345 | 
346 | # ultimate test, compile the translator and verify that the binary can become
347 | # the translator
348 | assert_true compile_translator -- 'translating the translator with a compiled translator yields the original translation'
349 | 
350 | verify 'i \
351 | f"oo  \
352 | b\jr\
353 | b\\az\
354 | b\nuf\
355 | funk
356 | s/Hello/Bye/' 'Hello
357 | World'
358 | 
359 | # empty pattern
360 | verify '/foo/s///' 'foo bar
361 | funk'
362 | verify '/foo/s///' 'foo bar
363 | funk'
364 | verify '/foo/{/funk/s//hey/;}' 'foo funk'
365 | verify 's/foo/bar/; s//funk/' 'foo foo foo'
366 | 
367 | verify '/foo/!s/f/z/' 'foo
368 | bar
369 | foo'
370 | verify '/foo/!s/f/z/' 'foo bar foo'
371 | verify '1!s/f/z/' 'foo
372 | foo'
373 | verify '1!s/f/z/' 'foo bar'
374 | 
375 | # ranges
376 | verify '1,3s/^/x/' 'foo
377 | bar
378 | funk
379 | fax'
380 | verify '2,3s/^/x/' 'foo
381 | bar
382 | funk
383 | fax'
384 | verify '2,2s/^/x/' 'foo
385 | bar
386 | funk
387 | fax'
388 | verify '2,10s/^/x/' 'foo
389 | bar
390 | funk
391 | fax'
392 | verify '2,1s/^/x/' 'foo
393 | bar
394 | funk
395 | fax'
396 | 
397 | verify '1,/bar/s/^/x/' 'foo
398 | bar
399 | funk
400 | fax'
401 | 
402 | verify '/foo/,/funk/s/^/x/' 'foo
403 | bar
404 | funk
405 | fax'
406 | 
407 | verify '\,foo,,/funk/s,^,x,' 'foo
408 | bar
409 | funk
410 | fax'
411 | 
412 | verify '/foo/,\,funk,s,^,x,' 'foo
413 | bar
414 | funk
415 | fax'
416 | 
417 | verify '\,foo,,\,funk,s,^,x,' 'foo
418 | bar
419 | funk
420 | fax'
421 | 
422 | verify '/foo/,/funk/s/^/x/; 1n' 'foo
423 | bar
424 | funk
425 | fax'
426 | 
427 | verify '/foo/,/bar/s/^/x/' 'foo
428 | bar
429 | funk
430 | fax'
431 | 
432 | verify '/foo/,/bar/s/^/x/; 1n' 'foo
433 | bar
434 | funk
435 | fax'
436 | 
437 | verify '/foo/,/non existing/s/^/x/' 'foo
438 | bar
439 | funk
440 | fax'
441 | 
442 | verify '/foo/,3s/^/x/' 'foo
443 | bar
444 | funk
445 | fax'
446 | 
447 | verify '/foo/,3s/^/x/; 1n;' 'foo
448 | bar
449 | funk
450 | fax'
451 | 
452 | # Busybox v1.30.1 fails this one
453 | verify '2!{1,2s/^/> /;}' 'foo
454 | bar
455 | funk
456 | fax'
457 | 
458 | verify 'a \
459 | funk\
460 | punk
461 | s/^/x/
462 | a \
463 | yo\
464 | yeah
465 | s/^/y/
466 | n
467 | s/^/z/' 'foo
468 | bar
469 | baz
470 | funk'
471 | 
472 | verify '$s/^/last line: /' 'foo
473 | bar
474 | baz'
475 | 
476 | verify '${s/^/last line: /; p;}' 'foo
477 | bar
478 | baz'
479 | 
480 | verify '$,1s/^/last line: /' 'foo
481 | bar
482 | baz'
483 | 
484 | verify '2,$s/^/not first: /' 'foo
485 | bar
486 | baz
487 | funk'
488 | 
489 | verify '/bar/,$s/^/after bar: /' 'foo
490 | bar
491 | baz
492 | funk'
493 | 
494 | verify '/bar/,${s/^/after bar: /;}' 'foo
495 | bar
496 | baz
497 | funk'
498 | 
499 | verify '$,/funk/s/^/between last line and funk: /' 'foo
500 | bar
501 | baz
502 | funk'
503 | 
504 | verify '$,/non-existing/s/^/between last line and some non matching regex: /' 'foo
505 | bar
506 | baz
507 | funk'
508 | 
509 | verify 'c \
510 | punk\
511 | dunk
512 | s/^/x/' 'foo
513 | bar
514 | baz
515 | funk'
516 | 
517 | verify 'y/abc/def/' 'abbac'
518 | verify 'y abc def ' 'abbac'
519 | verify 'ya\abcadefa' 'abbac'
520 | verify 'h; y/abc/def/; h' 'abbac'
521 | verify 'y/axb/d\ne/' 'HelloxWorld'
522 | verify 'N; y/a\nb/dxe/' 'Hello
523 | World'
524 | verify 'y///' 'foo'
525 | verify 's/a/b/; y/c/d/; s/e/f/' 'abcdef'
526 | 
527 | verify 'l' 'foo \\ \ '"$(printf '\a\b\f\r\t\v')"' é '"$(printf '\001\020\300')"' bar'
528 | 
529 | verify 'r non-existing-file
530 | s/^/x/' 'bar
531 | baz'
532 | 
533 | test_file="$(mktemp)"
534 | echo foo > "$test_file"
535 | 
536 | verify 'r '"$test_file"'
537 | s/^/x/' 'bar
538 | baz'
539 | 
540 | # like a/c/i the filename takes the whole line
541 | verify 'r '"$test_file"'; s/^/x/' 'bar
542 | baz'
543 | 
544 | # Even if w is not executed, POSIX specifies that files must be created before
545 | # processing
546 | rm "$test_file"
547 | verify '/non matching pattern/w '"$test_file" 'foo'
548 | assert_true [ -f "$test_file" ] -- 'File created without w cmd execution'
549 | 
550 | verify 's/^/x/; /foo/w '"$test_file"'
551 | s/^/y/; r '"$test_file"'
552 | s/^/z/' 'foo'
553 | 
554 | rm "$test_file"
555 | 
556 | verify 's/^$/x/' ''
557 | 
558 | # tabs as a separator
559 | verify '	/f/	s/foo/bar/;	p' 'foo'
560 | 
561 | # #n
562 | verify '#n' 'foo'
563 | verify '#no output' 'foo'
564 | verify '#n
565 | s/^/x/' 'foo
566 | bar'
567 | verify '#n
568 | n
569 | s/^/x/' 'foo
570 | bar'
571 | verify '#n
572 | q' 'foo
573 | bar'
574 | verify '#n
575 | b' 'foo
576 | bar'
577 | verify '#n
578 | t' 'foo
579 | bar'
580 | 
581 | # BRE ranges
582 | verify 's/[/]/x/g' 'a/b/c'
583 | verify 's/a/[\n]/g' 'a/b/c'
584 | verify 's/[x/y]/[xy]/g' 'a/b/c'
585 | verify 'N;N;s/\na[\n]b\n/yo/g' '
586 | anb
587 | 
588 | '
589 | # FreeBSD sed fails this one
590 | verify 'y/[/]/' '[[]]'
591 | verify '/[/]/s/[/]/yo/' '/'
592 | verify 's/hey/[/' 'hey'
593 | # With GNU sed not in POSIX mode [\n] means newline, whereas in POSIX mode
594 | # this means either backslash or n, we'll follow POSIX mode behavior.
595 | verify 's/[\n[:blank:]\n]/foo/g' 'n \ n \ n'
596 | verify 's/\[/]foo/' 'a[b'
597 | verify 's/[[:blank:]/]/foo/g' 'a/b c'
598 | verify 's/[^]/]/x/g' 'a]b/c'
599 | 
600 | end_tests
601 | 


--------------------------------------------------------------------------------