├── .gitignore ├── README.adoc ├── SUMMARY.adoc ├── book.json ├── images ├── check-leak-pkg-path-gc.gnu ├── check-leak-pkg-path-gc.png ├── check-leak-pkg-path.gnu └── check-leak-pkg-path.png ├── testing ├── README.adoc ├── introduction.adoc ├── preparing-tests.adoc ├── running-tests.adoc ├── test-file-layout.adoc ├── test-modes.adoc ├── test-nginx.adoc ├── test-suite-layout.adoc └── testing-erroneous-cases.adoc └── util ├── fmt.pl └── word-count.pl /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.swp 3 | *.swo 4 | *.bak 5 | /_book/ 6 | -------------------------------------------------------------------------------- /README.adoc: -------------------------------------------------------------------------------- 1 | = Programming OpenResty 2 | 3 | This is an official guide on OpenResty programming written by the OpenResty creator. 4 | This book is still in preparation. Please check back often for updates. 5 | 6 | The entire Programming OpenResty book, written by Yichun Zhang, is available 7 | here. All content is licensed under the link:http://creativecommons.org/licenses/by-nc-sa/3.0/[Creative 8 | Commons Attribution Non Commercial Share Alike 3.0 license]. You can download 9 | or browse the rendered book in various different formats on the GitBook 10 | website below. 11 | 12 | https://openresty.gitbooks.io/programming-openresty/content/ 13 | 14 | The latest source of the book can be found in the following GitHub repository: 15 | 16 | https://github.com/openresty/programming-openresty 17 | 18 | Pull requests are always welcome. 19 | -------------------------------------------------------------------------------- /SUMMARY.adoc: -------------------------------------------------------------------------------- 1 | = Summary 2 | 3 | . link:README.adoc[Introduction] 4 | . link:testing/README.adoc[Automated Testing] 5 | .. link:testing/introduction.adoc[Introduction] 6 | .. link:testing/test-nginx.adoc[Test::Nginx] 7 | .. link:testing/test-suite-layout.adoc[Test Suite Layout] 8 | .. link:testing/test-file-layout.adoc[Test File Layout] 9 | .. link:testing/running-tests.adoc[Running Tests] 10 | .. link:testing/preparing-tests.adoc[Preparing Tests] 11 | .. link:testing/testing-erroneous-cases.adoc[Testing Erroneous Cases] 12 | .. link:testing/test-modes.adoc[Test Modes] 13 | .. link:testing/advanced-topics.adoc[Advanced Topics] 14 | -------------------------------------------------------------------------------- /book.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Yichun Zhang ", 3 | "description": "Official Guide on OpenResty Programming", 4 | "generator": "site", 5 | "links": { 6 | "sidebar": { 7 | "About The Author": "http://agentzh.org/", 8 | "OpenResty Home": "https://openresty.org" 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /images/check-leak-pkg-path-gc.gnu: -------------------------------------------------------------------------------- 1 | set terminal pngcairo noenhanced background "#ffffff" fontscale 1.0 size 800, 500 enhanced 2 | 3 | set encoding utf8 4 | set boxwidth 1 5 | set grid 6 | set output "images/check-leak-pkg-path-gc.png" 7 | 8 | set xlabel "Elapsed Time (sec)" 9 | set ylabel "Memory Footprint (KB)" 10 | set yrange [1500:7700] 11 | set xrange [0:2.1] 12 | set style line 1 lc rgb '#009900' lt 1 lw 2 pt 7 ps 1 13 | plot 'images/check-leak-pkg-path-gc.dat' title '' with linespoints ls 1 14 | -------------------------------------------------------------------------------- /images/check-leak-pkg-path-gc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openresty/programming-openresty/8d56881cd4415822b9634796b9bb7f068ccab8bb/images/check-leak-pkg-path-gc.png -------------------------------------------------------------------------------- /images/check-leak-pkg-path.gnu: -------------------------------------------------------------------------------- 1 | set terminal pngcairo noenhanced background "#ffffff" fontscale 1.0 size 800, 500 enhanced 2 | 3 | set encoding utf8 4 | set boxwidth 1 5 | set grid 6 | set output "images/check-leak-pkg-path.png" 7 | 8 | set xlabel "Elapsed Time (sec)" 9 | set ylabel "Memory Footprint (KB)" 10 | set yrange [1500:7700] 11 | set xrange [0:2.1] 12 | set style line 1 lc rgb '#009900' lt 1 lw 2 pt 7 ps 1 13 | plot 'images/check-leak-pkg-path.dat' title '' with linespoints ls 1 14 | -------------------------------------------------------------------------------- /images/check-leak-pkg-path.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openresty/programming-openresty/8d56881cd4415822b9634796b9bb7f068ccab8bb/images/check-leak-pkg-path.png -------------------------------------------------------------------------------- /testing/README.adoc: -------------------------------------------------------------------------------- 1 | = Automated Testing 2 | 3 | Automated testing plays a critical role in software development and maintainance. OpenResty provides a data-driven test scaffold for writing declarative test cases for NGINX C modules, Lua libraries, and even OpenResty applications. The test cases are written in a specification-like format, which is both intuitive to read and write for humans and also easy to handle for machines. The data-driven approach makes it easy to run the same tests in wildly different ways that can help expose issues in different scenarios or with different kinds of external tools. 4 | 5 | This chapter introduces the `Test::Nginx` test scaffold that has been widely used to organize test suites for almost all the OpenResty components, including the `ngx_http_lua` module, most of the `lua-resty-*` Lua libraries, as well as full-blown business applications like CloudFlare's Lua CDN and Lua SSL. 6 | 7 | *Keywords:* Testing, Mocking -------------------------------------------------------------------------------- /testing/introduction.adoc: -------------------------------------------------------------------------------- 1 | == Introduction 2 | 3 | OpenResty itself has been relying on automated testing to remain high quality 4 | over the years. As OpenResty core developers, we embrace the test driven 5 | development (TDD) process all the time. An excellent result of our TDD 6 | practices over the years is a huge set of test suites for all the OpenResty 7 | components. These test suites are so large as a whole, so it is impractical 8 | to run all the tests thoroughly on a single machine. A relatively large 9 | test cluster is often run on Amazon EC2 to run all these tests in all existing 10 | test modes. Lying at the heart of these test suites is usually the 11 | `Test::Nginx` test scaffold module developed by the OpenResty team. 12 | 13 | The `Test::Nginx` scaffold provides a generic simple specification language 14 | for expressing and organizing test cases in an intuitive way. It also provides 15 | various powerful testing modes or "engines" to run the tests in various 16 | different ways in the hope of exposing bugs in different settings. It is 17 | also supported to extend the test specification language to add custom 18 | abstractions for advanced testing needs, usually found in application-level 19 | regression testing. 20 | 21 | === Conceptual Roadmap 22 | 23 | === Overview 24 | -------------------------------------------------------------------------------- /testing/preparing-tests.adoc: -------------------------------------------------------------------------------- 1 | == Preparing Tests 2 | 3 | As we have seen in the previous sections, `Test::Nginx` provides a simple 4 | declarative format to express test cases. Each test case is represented 5 | by a test block. A test block consists of a title, an optional description, 6 | and several data sections for specifying inputs and expected outputs. In 7 | this section, we will have a close look at how to prepare such test cases 8 | for different test requirements. 9 | 10 | Designing test cases is an art, in many ways. It may, sometimes, take even 11 | more time and effort than implementing the feature to be tested, according 12 | to our own experience. `Test::Nginx` tries hard to make writing tests as 13 | simple as possible but it still cannot automate the whole test case design 14 | process. Only you know exactly what to test and how it can be tested anyway. 15 | This section will focus on the basic primitives provided by `Test::Nginx` 16 | that you can take advantage of to devise clever and effective test cases. 17 | 18 | === Preparing NGINX Configuration 19 | 20 | In a test block, we can use different data sections to specify our custom 21 | snippets in different positions of the final `nginx.conf` configuration 22 | file generated by `Test::Nginx`. 23 | 24 | The most common one is the `config` section which is used to insert custom 25 | snippets inside the `server {}` configuration block for the default test 26 | server. We can also use the `http_config` section to insert our custom 27 | content into the `http {}` configuration block of `nginx.conf`. The `main_config` 28 | section can be used to insert content into the top-level scope of the NGINX 29 | configuration. Let's consider the following example. 30 | 31 | [source,test-base] 32 | ---- 33 | === TEST 1: 34 | --- main_config 35 | env MY_ENVIRONMENT; 36 | 37 | --- http_config 38 | init_worker_by_lua_block { 39 | print("init") 40 | } 41 | 42 | --- config 43 | location = /t { 44 | echo ok; 45 | } 46 | 47 | --- request 48 | GET /t 49 | --- response_body 50 | ok 51 | ---- 52 | 53 | This test block will generate an `nginx.conf` file with the following basic 54 | structure: 55 | 56 | [source,nginx] 57 | ---- 58 | ... 59 | env MY_ENVIRONMENT; 60 | 61 | http { 62 | ... 63 | 64 | init_worker_by_lua_block { 65 | print("init") 66 | } 67 | 68 | server { 69 | ... 70 | 71 | location = /t { 72 | echo ok; 73 | } 74 | } 75 | } 76 | ---- 77 | 78 | Please pay attention to how the `main_config`, `http_config`, and `config` 79 | data sections' values are mapped into different locations in the NGINX 80 | configuration file. 81 | 82 | When in doubt, we can always check out the actual `nginx.conf` file generated 83 | by the test scaffold at the location `t/servroot/conf/nginx.conf` in the 84 | current working directory (usually just being the root directory of the 85 | current project). 86 | 87 | `Test::Nginx` generates a new `nginx.conf` file for each test block, which 88 | makes it possible for each test block to become self-contained. By default, 89 | the test scaffold automatically starts a new NGINX server before running 90 | each test block and shuts down the server immediately after running the 91 | block. Fortunately, NGINX is a lightweight server and it is usually very 92 | fast to start and stop. Thus, the test blocks are not that slow to run 93 | as it might look. 94 | 95 | === Preparing Requests 96 | 97 | The simplest way to prepare a request is to use the `request` data section, 98 | as in 99 | 100 | [source,test-base] 101 | ---- 102 | --- request 103 | GET /t?a=1&b=2 104 | ---- 105 | 106 | The HTTP/1.1 protocol is used by default. You can explicitly make it use 107 | the HTTP/1.0 protocol if desired: 108 | 109 | [source,test-base] 110 | ---- 111 | --- request 112 | GET /t?a=1&b=2 HTTP/1.0 113 | ---- 114 | 115 | Leading spaces or empty lines in the value of the `request` section are 116 | automatically discarded. You can even add comments by leading them with 117 | a `#` character, as in 118 | 119 | [source,test-base] 120 | ---- 121 | --- request 122 | 123 | # this is a simple test: 124 | GET /t 125 | ---- 126 | 127 | You can add some additional request headers at the same time through the 128 | `more_headers` section as below. 129 | 130 | [source,test-base] 131 | ---- 132 | --- request 133 | GET /t 134 | --- more_headers 135 | Foo: bar 136 | Bar: baz 137 | ---- 138 | 139 | ==== Pipelined Requests 140 | 141 | Preparing pipelined HTTP requests are also possible. But you need to use 142 | the `pipelined_requests` section instead of `request`. For instance, 143 | 144 | [source,test-base] 145 | ---- 146 | === TEST 1: pipelined requests 147 | --- config 148 | location = /t { 149 | echo ok; 150 | } 151 | 152 | --- pipelined_requests eval 153 | ["GET /t", "GET /t"] 154 | 155 | --- response_body eval 156 | ["ok\n", "ok\n"] 157 | ---- 158 | 159 | It is worth noting that we use the `eval` filter with the `pipelined_requests` 160 | section to treat the literal value of that section as Perl code. This way 161 | we can construct a Perl array of the request strings, which is the expected 162 | data format for the `pipelined_requests` section. Similarly we need a similar 163 | trick for the `response_body` section when checking outputs. With an array 164 | of expected response body data, we can expect and check different values 165 | for different individual request in the pipeline. Note, however, not every 166 | data section supports the same array-typed value semantics as `response_body`. 167 | 168 | === Checking Responses 169 | 170 | We have already visited the `response_body` and `error_code` data sections 171 | for checking the response body data and response status code, respectively. 172 | 173 | The `response_body` data section always performs an exact whole-string 174 | comparison between the section value and the actual response body. It tries 175 | to be clever when long string value comparison fails. Consider the following 176 | sample output from `prove`. 177 | 178 | .... 179 | t/foo.t .. 1/? 180 | # Failed test 'TEST 1: long string test - response_body - response is expected (req 0)' 181 | # at .../test-nginx/lib/Test/Nginx/Socket.pm line 1282. 182 | # got: ..."IT 2.x is enabled.\x{0a}\x{0a}"... 183 | # length: 409 184 | # expected: ..."IT 2.x is not enabled.\x{0a}"... 185 | # length: 412 186 | # strings begin to differ at char 400 (line 1 column 400) 187 | # Looks like you failed 1 test of 2. 188 | /tmp/foo.t .. Dubious, test returned 1 (wstat 256, 0x100) 189 | Failed 1/2 subtests 190 | 191 | Test Summary Report 192 | ------------------- 193 | /tmp/foo.t (Wstat: 256 Tests: 2 Failed: 1) 194 | Failed test: 2 195 | Non-zero exit status: 1 196 | Files=1, Tests=2, 0 wallclock secs (0.01 usr 0.00 sys + 0.09 cusr 0.03 csys = 0.13 CPU) 197 | Result: FAIL 198 | .... 199 | 200 | From this test report, we can clearly see that 201 | 202 | . it is the test block with the title `TEST 1: long string test` that is 203 | failing, 204 | . it is the `response_body` data section check that fails, 205 | . the actual response body data is 409 bytes long while the expected value 206 | is 412 bytes, and 207 | . the expected value has an additional `not` word in the string fragment 208 | `IT 2.x is enabled` and the difference starts at the offset 400 in the 209 | long string. 210 | 211 | Behind the scene, `Test::Nginx` uses the Perl module link:https://metacpan.org/pod/Test::LongString[Test::LongString] 212 | to do the long string comparisons. It is also particularly useful while 213 | checking response body data in binary formats. 214 | 215 | If your response body data is in a multi-line textual format, then you 216 | may also want to use a `diff`-style output when the data does not match. 217 | To achieve this, we can call the `no_long_string()` Perl function before 218 | the `run_tests()` function call in the prologue part of the test file. 219 | Below is such an example. 220 | 221 | [source,test-base] 222 | ---- 223 | use Test::Nginx::Socket 'no_plan'; 224 | 225 | no_long_string(); 226 | 227 | run_tests(); 228 | 229 | __DATA__ 230 | 231 | === TEST 1: 232 | --- config 233 | location = /t { 234 | echo "Life is short."; 235 | echo "Moon is bright."; 236 | echo "Sun is shining."; 237 | } 238 | --- request 239 | GET /t 240 | --- response_body 241 | Life is short. 242 | Moon is deem. 243 | Sun is shining. 244 | ---- 245 | 246 | Note the `no_long_string()` call in the prologue part. It is important 247 | to place it before the `run_tests()` call otherwise it would be too late 248 | for it to take effect, obviously. 249 | 250 | Invoking the `prove` utility (or any shell wrappers for it) to run this 251 | test file gives the following details about the test failure: 252 | 253 | .... 254 | # Failed test 'TEST 1: - response_body - response is expected (req 0)' 255 | # at .../test-nginx/lib/Test/Nginx/Socket.pm line 1277. 256 | # @@ -1,3 +1,3 @@ 257 | # Life is short. 258 | # -Moon is deem. 259 | # +Moon is bright. 260 | # Sun is shining. 261 | # Looks like you failed 1 test of 2. 262 | .... 263 | 264 | It is obvious that the second line of the response body output is different. 265 | 266 | You can even further disable the `diff`-style comparison mode by adding 267 | a `no_diff()` Perl function call in the prologue part. Then the failure 268 | report will look like this: 269 | 270 | .... 271 | # Failed test 'TEST 1: - response_body - response is expected (req 0)' 272 | # at .../test-nginx/lib/Test/Nginx/Socket.pm line 1277. 273 | # got: 'Life is short. 274 | # Moon is bright. 275 | # Sun is shining. 276 | # ' 277 | # expected: 'Life is short. 278 | # Moon is deem. 279 | # Sun is shining. 280 | # ' 281 | # Looks like you failed 1 test of 2. 282 | .... 283 | 284 | That is, `Test::Nginx` just gives full listing of the actual response body 285 | data and the expected one without any abbreviations or hand-holding. 286 | 287 | ==== Pattern Matching on Response Bodies 288 | 289 | When the request body may change in some ways or you just care about certain 290 | key words in a long data string, you can specify a Perl regular expression 291 | to do a pattern match against the actual request body data. This is achieved 292 | by the `response_body_like` data section. For example, 293 | 294 | [source,test-base] 295 | ---- 296 | --- response_body_like: age: \d+ 297 | ---- 298 | 299 | Be careful when you are using the multi-line data section value form. A 300 | trailing newline character appended to your section value may make your 301 | pattern never match. In this case the `chomp` filter we introduced in an 302 | early section can be very helpful here. For example, 303 | 304 | [source,test-base] 305 | ---- 306 | --- response_body_like chomp 307 | age: \d+ 308 | ---- 309 | 310 | You can also use the `eval` filter to construct a Perl regular expression 311 | object with a Perl expression, as in 312 | 313 | [source,test-base] 314 | ---- 315 | --- response_body_like eval 316 | qr/age: \d+/ 317 | ---- 318 | 319 | This is the most flexible form to specify a pattern. 320 | 321 | NOTE: Perl uses the `qr` quoting structure to explicitly construct regular 322 | expression objects. You can use various different quoting forms like `qr/.../`, 323 | `qr!...!`, `qr#...#`, and `qr{...}`. 324 | 325 | ==== Checking Response Headers 326 | 327 | The `response_headers` data section can be used to validate response header 328 | entries. For example, 329 | 330 | [source,test-base] 331 | ---- 332 | --- response_headers 333 | Foo: bar 334 | Bar: baz 335 | !Blah 336 | ---- 337 | 338 | This section dictates 3 tests actually: 339 | 340 | . The response header `Foo` must appear and must take the value `bar`; 341 | . The response header `Bar` must appear and must take the value `baz`; 342 | and 343 | . The response header `Blah` must not appear or take an empty value. 344 | 345 | === Checking NGINX Error Logs 346 | 347 | In addition to responses, the NGINX error log file is also an important 348 | output channel for an NGINX server setup. 349 | 350 | ==== True-False Tests 351 | 352 | One immediate testing requirement is to check whether or not a piece of 353 | text appears in any error log messages. Such checks can be done via the 354 | data sections `error_log` and `no_error_log`, respectively. The former 355 | ensures that some lines in the error log file contain the string specified 356 | as the section value while the latter tests the opposite: ensuring that 357 | no line contains the pattern. 358 | 359 | For example, 360 | 361 | [source,test-base] 362 | ---- 363 | --- error_log 364 | Hello world from my server 365 | ---- 366 | 367 | Then the string `Hello world from my server` (without the trailing new-line) 368 | must appear in at least one line of the NGINX error log. You can specify 369 | multiple strings in separate lines of the section value to perform different 370 | checks, for instance, 371 | 372 | [source,test-base] 373 | ---- 374 | --- error_log 375 | This is a dog! 376 | Is it a cat? 377 | ---- 378 | 379 | Then it performs two error log checks, one is to ensure that the string 380 | `This is a dog!` appears in some error log lines. The order of these two 381 | string patterns do not matter at all. 382 | 383 | If one of the string pattern failed to match any lines in the error log 384 | file, then we would get a test failure report from `prove` like below. 385 | 386 | .... 387 | # Failed test 'TEST 1: simple test - pattern "This is a dog!" matches a line in error.log (req 0)' 388 | .... 389 | 390 | If you want to specify a Perl regular expression (regex) as one of the 391 | patterns, then you should use the `eval` section filter to construct a 392 | Perl-array as the section value, as in 393 | 394 | [source,test-base] 395 | ---- 396 | --- error_log eval 397 | [ 398 | "This is a dog!", 399 | qr/\w+ is a cat\?/, 400 | ] 401 | ---- 402 | 403 | As we have seen earlier, Perl regexes can be constructed via the `qr/.../` 404 | quoting syntax. Perl string patterns in the Perl array specified by double 405 | quotes or single quotes are still treated as plain string patterns, as 406 | usual. If the array contains only one regex pattern, then you can omit 407 | the array itself, as in 408 | 409 | [source,test-base] 410 | ---- 411 | --- error_log eval 412 | qr/\w+ is a cat\?/ 413 | ---- 414 | 415 | `Test::Nginx` puts the error log file of the test NGINX server in the file 416 | path `t/servroot/logs/error.log`. As a test writer, we frequently check 417 | out this file directly when things go wrong. For example, it is common 418 | to make mistakes or typos in the patterns we specify for the `error_log` 419 | section. Also, scanning the raw log file can give us insight about the 420 | details of the NGINX internal working when the NGINX debugging logs are 421 | enabled in the NGINX build. 422 | 423 | The `no_error_log` section is very similar to `error_log` but it checks 424 | the nonexistence of the string patterns in the NGINX error log file. One 425 | of the most frequent uses of the `no_error_log` section is to ensure that 426 | there is _no_ error level messages in the log file. 427 | 428 | [source,test-base] 429 | ---- 430 | --- no_error_log 431 | [error] 432 | ---- 433 | 434 | If, however, there is a line in the nginx error log file that contains 435 | the string `[error]`, then the test fails. Below is such an example. 436 | 437 | .... 438 | # Failed test 'TEST 1: simple test - pattern "[error]" should not match any line in error.log but matches line "2016/02/01 11:59:50 [error] 1788\#0: *1 lua entry thread aborted: runtime error: content_by_lua(nginx.conf:42):2: bad"' 439 | .... 440 | 441 | This is a great way to find the details of the error quickly by just looking 442 | at the test report. 443 | 444 | Like `error_log`, this section also supports Perl array values and Perl 445 | regex values through the `eval` filter. 446 | 447 | ==== Grep Tests 448 | 449 | The `error_log` and `no_error_log` sections are very handy in quickly checking 450 | the appearance of contain patterns in the NGINX error log file. But they 451 | have serious limitations in that it is impossible to impose stronger constraints 452 | on the relative order of the messages containing the patterns nor on the 453 | number of their occurrences. 454 | 455 | To address such limitations, `Test::Nginx::Socket` provides an alternative 456 | way to check NGINX error logs in a way similar to the famous UNIX tool, 457 | `grep`. The sections `grep_error_log` and `grep_error_log_out` are used 458 | for this purpose. The test writer uses the `grep_error_log` section to 459 | specify a pattern, with which the test framework scans through the NGINX 460 | error log file and collect all the matched parts of the log file lines 461 | along the way, forming a final result. This aggregated log data result 462 | is then matched against the expected value specified as the value of the 463 | `grep_error_log_out` section, in a similar way as with the `response_body` 464 | section discussed above. 465 | 466 | It is easiest to explain with a simple example. 467 | 468 | [source,test-base] 469 | ---- 470 | === TEST 1: simple grep test for error logs 471 | --- config 472 | location = /t { 473 | content_by_lua_block { 474 | print("it is matched!") 475 | print("it is matched!") 476 | print("it is matched!") 477 | } 478 | } 479 | --- request 480 | GET /t 481 | --- grep_error_log: it is matched! 482 | --- grep_error_log_out 483 | it is matched! 484 | it is matched! 485 | it is matched! 486 | ---- 487 | 488 | Here we use the Lua function `print()` provided by the 489 | link:https://github.com/openresty/lua-nginx-module#readme[ngx_http_lua] 490 | module to generate NGINX error log messages at the `notice` level. This 491 | test case tests the number of the log messages containing the string `it 492 | is matched!`. It is important to note that only the _matched_ part of the 493 | log file lines are collected in the final result instead of the whole log 494 | lines. This simplifies the comparison a lot since NGINX error log messages 495 | can contain varying details like timestamps and connection numbers. 496 | 497 | A more useful form of this test is to specify a Perl regex pattern in the 498 | `grep_error_log` section. Consider the following example. 499 | 500 | [source,test-base] 501 | ---- 502 | === TEST 1: simple grep test for error logs 503 | --- config 504 | location = /t { 505 | content_by_lua_block { 506 | print("test: before sleeping...") 507 | ngx.sleep(0.001) -- sleeping for 1ms 508 | print("test: after sleeping...") 509 | } 510 | } 511 | --- request 512 | GET /t 513 | --- grep_error_log eval: qr/test: .*?\.\.\./ 514 | --- grep_error_log_out 515 | test: before sleeping... 516 | test: after sleeping... 517 | ---- 518 | 519 | We specify a Perl regex pattern, `test: .*?\.\.\.`, here to filter out 520 | all the error log messages starting with `test:` and ending with `...`. 521 | And naturally in this test we also require the relative order of these 522 | two messages, that is, `before sleeping` must appear _before_ `after sleeping`. 523 | Otherwise, we shall see failure reports like below: 524 | 525 | .... 526 | # Failed test 'TEST 1: simple grep test for error logs - grep_error_log_out (req 0)' 527 | # at ..../lib/Test/Nginx/Socket.pm line 1048. 528 | # got: "test: after sleeping...\x{0a}test: before sleeping...\x{0a}" 529 | # length: 49 530 | # expected: "test: before sleeping...\x{0a}test: after sleeping...\x{0a}" 531 | # length: 49 532 | # strings begin to differ at char 7 (line 1 column 7) 533 | .... 534 | 535 | As with the `response_body` section, we can also call the `no_long_string()` 536 | Perl function before `run_tests()` in the test file prologue, so as to 537 | disable the long string output mode and enable the `diff` mode. Then the 538 | test failure would look like this: 539 | 540 | .... 541 | # Failed test 'TEST 1: simple grep test for error logs - grep_error_log_out (req 0)' 542 | # at .../lib/Test/Nginx/Socket.pm line 1044. 543 | # @@ -1,2 +1,2 @@ 544 | # -test: before sleeping... 545 | # test: after sleeping... 546 | # +test: before sleeping... 547 | .... 548 | 549 | Obviously, for this test case, the `diff` format looks better. 550 | 551 | ==== Extra Delay Before Log Checks 552 | 553 | By default, `Test::Nginx::Socket` performs the NGINX error log checks not 554 | long after it receives the complete HTTP response for the test request. 555 | Sometimes, when the log messages are generated by the server after sending 556 | out the response, the error log checks may be carried out too early 557 | that the messages are not yet written into the log file. In this case, 558 | we can specify an extra delay via the `wait` data section for the test 559 | scaffold to wait for the error log messages. Here is an example: 560 | 561 | [source,test-base] 562 | ---- 563 | === TEST 1: wait for the timer 564 | --- config 565 | location = /t { 566 | content_by_lua_block { 567 | local function f(premature) 568 | print("HERE!") 569 | end 570 | assert(ngx.timer.at(0.1, f)) 571 | } 572 | } 573 | --- request 574 | GET /t 575 | --- error_log 576 | HERE! 577 | --- no_error_log 578 | [error] 579 | --- wait: 0.12 580 | ---- 581 | 582 | Here we create a timer via the `ngx.timer.at` Lua function, which expires 583 | after 0.1 seconds. Due to the asynchronous nature of timers, the request 584 | handler does not wait for the timer to expire and immediately finishes 585 | processing the current request and sends out a response with an empty body. 586 | To check for the log message `HERE!` generated by the timer handler `f`, 587 | we have to specify an extra delay for the test scaffold to wait. The 0.12 588 | seconds time is specified in this example but any values larger than 0.1 589 | would suffice. Without the `wait` section, this test case would fail with 590 | the following output: 591 | 592 | .... 593 | # Failed test 'TEST 1: wait for the timer - pattern "HERE!" matches a line in error.log (req 0)' 594 | .... 595 | 596 | Obviously the test scaffold checks the error log too soon, even before 597 | the timer handler runs. 598 | 599 | === Section Review 600 | 601 | `Test::Nginx::Socket` offers a rich set of data sections for specifying 602 | various different input data and expected output data, ranging from NGINX 603 | configuration file snippets, test requests, to expected responses and error 604 | log messages. We have already demonstrated the power of data driven testing 605 | and declarative test case crafting. We want to achieve multiple goals at 606 | the same time, that is, not only to make the tests self-contained and highly 607 | readable, but also to make the test report easy to interpret and analyze 608 | when some of the tests fail. Raw files automatically generated by the test 609 | scaffold, like `t/servroot/conf/nginx.conf` and `t/servroot/logs/error.log`, 610 | should be checked frequently when manually debugging the test cases. The 611 | next section extends the discussion of this section with a focus on testing 612 | erroneous cases. 613 | -------------------------------------------------------------------------------- /testing/running-tests.adoc: -------------------------------------------------------------------------------- 1 | == Running Tests 2 | 3 | Like most Perl-based testing frameworks, `Test:Nginx` relies on Perl's 4 | `prove` 5 | command-line utility to run the test files. The `prove` utility is usually 6 | shipped with the standard perl distribution so we should already have it 7 | when we have `perl` installed. 8 | 9 | `Test::Nginx` always invokes a real NGINX server and a real socket client 10 | to run the tests. It automatically uses the `nginx` program found in the 11 | system environment `PATH`. It is your responsibility to specify the right 12 | `nginx` in your `PATH` environment for the test suite. Usually we just 13 | specify the path of the `nginx` program inside the `OpenResty` installation 14 | tree. For example, 15 | 16 | [source,bash] 17 | ---- 18 | export PATH=/usr/local/openresty/nginx/sbin:$PATH 19 | ---- 20 | 21 | Here we assume that OpenResty is installed to the default prefix, i.e., 22 | `/usr/local/openresty/`. 23 | 24 | You can always use the `which` command to verify if the `PATH` environment 25 | is indeed set properly: 26 | 27 | [source,console] 28 | ---- 29 | $ which nginx 30 | /usr/local/openresty/nginx/sbin/nginx 31 | ---- 32 | 33 | For convenience, we usually wrap such environment settings in a custom 34 | shell script so that we do not risk polluting the system-wide or account-wide 35 | environment settings nor take on the burden of manually setting the environments 36 | manually for every shell session. For example, I usually have a local bash 37 | script named `go` in each project I work on. A typical `go` script might 38 | look like below 39 | 40 | [source,bash] 41 | ---- 42 | #!/usr/bin/env bash 43 | 44 | export PATH=/usr/local/openresty/nginx/sbin:$PATH 45 | 46 | exec prove "$@" 47 | ---- 48 | 49 | Then we can use this `./go` script to substitute the `prove` utility in 50 | any of the subsequent commands involving `prove`. 51 | 52 | Because `Test::Nginx` makes heavy use of environment variables for the 53 | callers to fine tune the testing behaviors (as we shall see in later sections), 54 | such shell wrapper scripts also make it easy to manage all these environment 55 | variable settings and hard to get things wrong. 56 | 57 | NOTE: Please do not confuse the name of this bash script with Google's 58 | Go programming language. It has nothing to do with the Go language in any 59 | way. 60 | 61 | === Running A Single File 62 | 63 | If you want to run a single test file, say, `t/foo.t`, then all you need 64 | to do is just to type the following command in your terminal. 65 | 66 | [source,bash] 67 | ---- 68 | prove t/foo.t 69 | ---- 70 | 71 | Here inside `t/foo.t` we employ the simple test file example presented 72 | in the previous section. We repeat the content below for the reader's 73 | convenience. 74 | 75 | [source,test-base] 76 | .t/foo.t 77 | ---- 78 | use Test::Nginx::Socket 'no_plan'; 79 | 80 | run_tests(); 81 | 82 | __DATA__ 83 | 84 | === TEST 1: hello, world 85 | This is just a simple demonstration of the 86 | echo directive provided by ngx_http_echo_module. 87 | --- config 88 | location = /t { 89 | echo "hello, world!"; 90 | } 91 | --- request 92 | GET /t 93 | --- response_body 94 | hello, world! 95 | --- error_code: 200 96 | ---- 97 | 98 | It is worth mentioning that we could run the following command instead 99 | if we have a custom wrapper script called `./go` for `prove` (as mentioned 100 | earlier in this section): 101 | 102 | [source,bash] 103 | ---- 104 | ./go foo.t 105 | ---- 106 | 107 | When everything goes well, it generates an output like this: 108 | 109 | .... 110 | t/foo.t .. ok 111 | All tests successful. 112 | Files=1, Tests=2, 0 wallclock secs (0.02 usr 0.01 sys + 0.08 cusr 0.03 csys = 0.14 CPU) 113 | Result: PASS 114 | .... 115 | 116 | This is a very concise summary. The first line tells you all tests were 117 | passed while the second line gives you a summary of the number of test 118 | files (1 in this case), the number of tests (2 in this case), and the wallclock 119 | and CPU times used to run all the tests. 120 | 121 | It is interesting to see that we have only one test block in the sample 122 | test file but in the test summary output by `prove` we see that the number 123 | of tests are 2. Why the difference? We can easily find it out by asking 124 | `prove` to generate a detailed test report for all the individual tests. 125 | This is achieved by passing the `-v` option (meaning "verbose") to the 126 | `prove` command we used earlier: 127 | 128 | [source,bash,linenums] 129 | ---- 130 | prove -v t/foo.t 131 | ---- 132 | 133 | Now the output shows all the individual tests performed in that test file: 134 | 135 | .... 136 | t/foo.t .. 137 | ok 1 - TEST 1: hello, world - status code ok 138 | ok 2 - TEST 1: hello, world - response_body - response is expected (req 0) 139 | 1..2 140 | ok 141 | All tests successful. 142 | Files=1, Tests=2, 0 wallclock secs (0.01 usr 0.01 sys + 0.07 cusr 0.03 csys = 0.12 CPU) 143 | Result: PASS 144 | .... 145 | 146 | Obviously, the first test is doing the status code check, which is dictated 147 | by the `error_code` data section in the test block, and the second test 148 | is doing the response body check, required by the `response_body` section. 149 | Now the mystery is solved. 150 | 151 | It is worth mentioning that the `--- error_code: 200` section is automatically 152 | assumed when no `error_code` section is explicitly provided in the test 153 | block. So our test block above can be simplified by removing the `--- error_code: 154 | 200` line without affecting the number of tests. This is because that checking 155 | 200 response status code is so common that `Test::Nginx` makes it the default. 156 | If you expect a different status code, like 500, then just add an explicit 157 | `error_code` section. 158 | 159 | From this example, we can see that one test block can contain multiple 160 | tests and the number of tests for any given test block can be determined 161 | or predicted by looking at the data sections performing output checks. 162 | This is important when we provide a "test plan" ourselves to the test file 163 | where a "test plan" is the exact number of tests we _expect_ the current 164 | test file to run. If a different number of tests than the plan were actually 165 | run, then the test result would be considered malicious even when all the 166 | tests are passed successfully. Thus, a test plan adds a strong constraint 167 | on the total number of tests expected to be run. For our `t/foo.t` file 168 | here, however, we intentionally avoid providing any test plans by passing 169 | the `'no_plan'` argument to the `use` statement that loads the `Test::Nginx::Socket` 170 | module. We will revisit the "test plan" feature and explain how to provide 171 | one in a later section. 172 | 173 | === Running Multiple Files 174 | 175 | Running multiple test files is straightforward; just specify the file 176 | names on the `prove` command line, as in 177 | 178 | [source,bash] 179 | ---- 180 | prove -v t/foo.t t/bar.t t/baz.t 181 | ---- 182 | 183 | If you want to run all the test files directly under the `t/` directory, 184 | then using a shell wildcard can be handy: 185 | 186 | [source,bash] 187 | ---- 188 | prove -v t/*.t 189 | ---- 190 | 191 | In the case that you have sub-directories under `t/`, you can specify 192 | the `-r` option to ask `prove` to recursively traverse the while directory 193 | tree rooted at `t/` to find test files: 194 | 195 | [source,bash] 196 | ---- 197 | prove -r t/ 198 | ---- 199 | 200 | This command is also the standard way to run the whole test suite of a 201 | project. 202 | 203 | === Running Individual Test Blocks 204 | 205 | `Test::Nginx` makes it easy to run an individual test block in a given 206 | file. Just add the special data section `ONLY` to that test block you want 207 | to run individually and `prove` will skip all the other test blocks while 208 | running that test file. For example, 209 | 210 | [source,test-base] 211 | ---- 212 | === TEST 1: hello, world 213 | This is just a simple demonstration of the 214 | echo directive provided by ngx_http_echo_module. 215 | --- config 216 | location = /t { 217 | echo "hello, world!"; 218 | } 219 | --- request 220 | GET /t 221 | --- response_body 222 | hello, world! 223 | --- ONLY 224 | ---- 225 | 226 | Now `prove` won't run any other test blocks (if any) in the same test file. 227 | 228 | This is very handy while debugging a particular test block. You can focus 229 | on one test case at a time without worrying about other unrelated test 230 | cases stepping in your way. 231 | 232 | When using the link:http://www.vim.org/[Vim] editor, we can quickly insert 233 | a `--- ONLY` line to the test block we are viewing in the vim file buffer, 234 | and then type `:!prove %` in the command mode of vim without leaving the 235 | editor window. This works because vim automatically expands the special 236 | `%` placeholder with the path of the current active file being edited. 237 | This workflow is great since you never leave your editor window and you 238 | never have to type the title (or other IDs) of your test block nor the 239 | path of the containing test file. You can quickly jump between test blocks 240 | even across different files. Test-driven development usually demands very 241 | frequent interactions and iterations, and `Test::Nginx` is particularly 242 | optimized to speed up this process. 243 | 244 | Sometimes you may forget to remove the `--- ONLY` line from some test files 245 | even after debugging, this will incorrectly skip all the other tests in 246 | those files. To catch such mistakes, `Test::Nginx` always reports a warning 247 | for files using the `ONLY` special section, as in 248 | 249 | [source,console] 250 | ---- 251 | $ prove t/foo.t 252 | t/foo.t .. # I found ONLY: maybe you're debugging? 253 | t/foo.t .. ok 254 | All tests successful. 255 | Files=1, Tests=2, 0 wallclock secs (0.01 usr 0.00 sys + 0.09 cusr 0.03 csys = 0.13 CPU) 256 | Result: PASS 257 | ---- 258 | 259 | This way it is much easier to identify any leftover `--- ONLY` lines. 260 | 261 | Similar to `ONLY`, `Test::Nginx` also provides the `LAST` data section 262 | to make the containing test block become the last test block being run 263 | in that test file. 264 | 265 | NOTE: The special data sections `ONLY` and `LAST` are actually features 266 | inherited from the `Test::Base` module. 267 | 268 | === Skipping Tests 269 | 270 | We can specify the special `SKIP` data section to skip running the containing 271 | test block unconditionally. This is handy when we write a test case that 272 | is for a future feature or a test case for a known bug that we haven't 273 | had the time to fix right now. For example, 274 | 275 | [source,test-base] 276 | ---- 277 | === TEST 1: test for the future 278 | --- config 279 | location /t { 280 | some_fancy_directive; 281 | } 282 | --- request 283 | GET /t 284 | --- response_body 285 | blah blah blah 286 | --- SKIP 287 | ---- 288 | 289 | It is also possible to skip a whole test file in the prologue part. Just 290 | replace the `use` statement with the following form. 291 | 292 | [source,Perl] 293 | ---- 294 | use Test::Nginx::Socket skip_all => "some reasons"; 295 | ---- 296 | 297 | Then running the test file gives something like follows. 298 | 299 | .... 300 | t/foo.t .. skipped: some reasons 301 | .... 302 | 303 | NOTE: It is also possible to conditionally skip a whole test file but it 304 | requires a little bit of Perl programming. Interested readers can try using 305 | a `BEGIN {}` before the `use` statement to calculate the value of 306 | the `skip_all` option on the fly. 307 | 308 | === Test Running Order 309 | 310 | ==== Test File Running Order 311 | 312 | Test files are usually run by the alphabetical order of their file names. 313 | Some people prefer explicitly controlling the running order of their test 314 | files by prefixing the test file names with number sequences like `001-`, 315 | `002-`, and etc. 316 | 317 | The test suite of the link:https://github.com/openresty/lua-nginx-module#readme[ngx_http_lua] 318 | module follows this practice, for example, which has test file names like 319 | below 320 | 321 | .... 322 | t/000-sanity.t 323 | t/001-set.t 324 | t/002-content.t 325 | t/003-errors.t 326 | ... 327 | t/139-ssl-cert-by.t 328 | .... 329 | 330 | Although the `prove` utility supports running test files in multiple parallel 331 | jobs via the `-jN` option, `Test::Nginx` does not really support this mode 332 | since all the test cases share exactly the same test server directory, 333 | `t/servroot/`, and the same listening ports, as we have already seen, while 334 | parallel running requires strictly isolated running environments for each 335 | individual thread of execution. One can still manually split the test files 336 | into different groups and run each group on a different (virtual) machine 337 | or an isolated environment like a Linux container. 338 | 339 | ==== Test Block Running Order 340 | 341 | By default, the `Test::Nginx` scaffold _shuffles_ the test blocks in each 342 | file and run them in a _random_ order. This behavior encourages writing 343 | self-contained and independent test cases and also increases the chance 344 | of hitting a bug by actively mutating the relative running order of the 345 | test cases. This may, indeed, confuse new comers, coming from a more traditional 346 | testing platform. 347 | 348 | We can always disable this test block shuffling behavior by calling the 349 | Perl function, `no_shuffle()`, imported by the `Test::Nginx::Socket` module, 350 | before the `run_tests()` call in the test file prologue. For example, 351 | 352 | [source,Perl] 353 | ---- 354 | use Test::Nginx::Socket 'no_plan'; 355 | 356 | no_shuffle(); 357 | run_tests(); 358 | 359 | __DATA__ 360 | ... 361 | ---- 362 | 363 | With the `no_shuffle()` call in place, the test blocks are run in the exact 364 | same order as their appearance in the test file. 365 | -------------------------------------------------------------------------------- /testing/test-file-layout.adoc: -------------------------------------------------------------------------------- 1 | == Test File Layout 2 | :special_data_line: __DATA__ 3 | 4 | Test files usually have a common file extension, `.t`, to distinguish themselves 5 | from other types of files in the source tree. Each test file is a Perl 6 | script per se. `Test::Nginx` follows a special design that decomposes each 7 | test file into two main parts: the first part is a very short prologue 8 | that consists of a few lines of Perl code while the second part is a listing 9 | of the test cases in a special data format. These two parts are separated 10 | by the following special line 11 | 12 | .... 13 | __DATA__ 14 | .... 15 | 16 | The `perl` interpreter or the `prove` utility stops interpreting the file 17 | content as Perl source code once they see this special line. Everything 18 | after this line is treated as *data* in plain text that is reachable 19 | by the Perl code above this line. The most interesting part of each `.t` 20 | test file is the stuff after this line, i.e., the data part. 21 | 22 | NOTE: The special `{special_data_line}` notation is a powerful feature 23 | of the Perl programming language that allows embedding arbitrary free-text 24 | data in any Perl script files that can be manipulated by the containing 25 | Perl scripts themselves. `Test::Nginx` takes advantage of this feature 26 | to allow data-driven test case specifications in a simple format or language 27 | that is easily understandable by everyone, even those without any prior 28 | experiences in Perl programming. 29 | 30 | === The Prologue Part 31 | 32 | The first part, i.e., the "prologue" above the `{special_data_line}` line 33 | is usually just a few lines of Perl code. You do not have to know Perl 34 | programming to write them down because they are so simple and seldom or 35 | never change. The simplest Perl code prologue is as follows: 36 | 37 | [source,perl,linenums] 38 | ---- 39 | use Test::Nginx::Socket 'no_plan'; 40 | run_tests(); 41 | ---- 42 | 43 | The first line is just loading the Perl module (or class), `Test::Nginx::Socket` 44 | and passing the option `'no_plan'` to it to disable test plans (we will 45 | talk more about test plans in later chapters and we do not bother worrying 46 | about it here). `Test::Nginx::Socket` is one of the most popular classes 47 | in the `Test::Nginx` test framework. The second line just calls the `run_tests` 48 | Perl function imported automatically from the `Test::Nginx::Socket` module 49 | to run all the test cases defined in the data part of the test file (i.e., 50 | the things coming after the `{special_data_line}` line). 51 | 52 | There are, however, more complicated prologue parts in many real-world 53 | test suites. Such prologues usually define some special environment variables 54 | or Perl variables that can be shared and referenced in the test cases defined 55 | in the "data part", or just call some other Perl functions imported by 56 | the `Test::Nginx::Socket` module to customize the testing configurations 57 | and behaviors for the current test file. We will return to such fancier 58 | prologues in later sections. They can be very helpful in some cases. 59 | 60 | NOTE: Perl allows function calls to omit the parentheses if the context 61 | is unambiguous. So we may see Perl function calls without parentheses in 62 | real-world test files' prologue part, like `run_tests;`. We may use such 63 | forms in examples presented in later sections because they are more compact. 64 | 65 | === The Data Part 66 | 67 | The data part is the most important part of any test files powered by `Test::Nginx`. 68 | This is where test cases reside. It uses a simple specification format 69 | to express test cases so that the user does not use Perl or any other general-purpose 70 | languages to present the tests themselves. This special specification format 71 | is an instance of Domain-Specific Languages (DSL) where the "domain" is 72 | defined as testing code running upon or inside NGINX. Use of a DSL to present 73 | test cases opens the door for presenting the test cases as _data_ instead 74 | of code. This is also why `Test::Nginx` is a data-driven testing framework. 75 | 76 | The test case specification in the data part is composed by a series of 77 | _test blocks_. Each test block usually corresponds to a single test case, 78 | which has a _title_, an optional _description_, and a series of _data sections_. 79 | The structure of a test block is described by the following template. 80 | 81 | [source,test-base] 82 | ---- 83 | === title 84 | optional description 85 | goes here... 86 | --- section1 87 | value1 goes 88 | here 89 | --- section2 90 | value2 is 91 | here 92 | --- section3 93 | value3 94 | ---- 95 | 96 | ==== Block Titles 97 | 98 | As we can see, each test block starts with a title line prefixed by three 99 | equal signs (`===`). It is important to _avoid_ any leading spaces at the 100 | beginning of the line. The title is mandatory and is important to describe 101 | the intention of the current test case in the most concise form, and also 102 | to identify the test block in the test report when test failures happen. 103 | By convention we put a `TEST N:` prefix in this title, for instance, `TEST 104 | 3: test the simplest form`. Don't worry about maintaining the test ordinal 105 | numbers in these titles yourself, we will introduce a command-line utility 106 | called link:https://raw.githubusercontent.com/openresty/openresty-devel-utils/master/reindex[reindex] 107 | in a later section that can automatically update the ordinal numbers in 108 | the block titles for you. 109 | 110 | ==== Block Descriptions 111 | 112 | Each test block can carry an optional description right after the block 113 | title line. This description can span multiple lines if needed. It is a 114 | more detailed description of the intention of the test block than the block 115 | title and may also give some background information about the current test. 116 | Many test cases just omit this part for convenience. 117 | 118 | ==== Data Sections 119 | 120 | Every test block carries one or more _data sections_ right after the block 121 | description (if any). Data sections always have a name and a value, which 122 | specify any input data fields and the _expected_ output data fields. 123 | 124 | The name of a data section is the word after the line prefix `---`. Spaces 125 | are allowed though not syntactically required after `---`. We usually use 126 | a single space between the prefix and the section name for aesthetic considerations 127 | and we hope that you follow this convention as well. The section names 128 | usually contain just alphanumeric letters and underscore characters. 129 | 130 | Section values are specified in two forms. One is all the lines after the 131 | section name line, before the next section or the next block. The other 132 | form is more concise and specifies the value directly on the same line 133 | as the section name, but right after the first colon character (`:`). The 134 | latter form requires that the value contains no line-breaks. Any spaces 135 | around the colon are always discarded and never count as a part of the 136 | section value; furthermore, the trailing line-break character in the one-line 137 | form does not count either. 138 | 139 | If no visible values come after the section name in either form, then the 140 | section takes an empty string value, which is still a _defined_ value, 141 | however. On the other hand, omitting the section name (and value) altogether 142 | makes that section _undefined_. 143 | 144 | `Test::Nginx` offers various pre-defined data section names that can be 145 | used in the test blocks for different purposes. Some data sections are 146 | for specifying input data, some are for expected output, and some for controlling 147 | whether the current test block should be run at all. 148 | 149 | It is best to explain data sections in a concrete test block example. 150 | 151 | [source,test-base] 152 | ---- 153 | === TEST 1: hello, world 154 | This is just a simple demonstration of the 155 | echo directive provided by ngx_http_echo_module. 156 | --- config 157 | location = /t { 158 | echo "hello, world!"; 159 | } 160 | --- request 161 | GET /t 162 | --- response_body 163 | hello, world! 164 | --- error_code: 200 165 | ---- 166 | 167 | Here we have two input data sections, `config` and `request`, for specifying 168 | a custom NGINX configuration snippet in the default `server {}` and the 169 | HTTP request sent by the test scaffold to the test NGINX server, respectively. 170 | In addition, we have one output data section, `response_body`, for specifying 171 | the expected response body output by the test NGINX server. If the actual 172 | response body data is different from what we specify under the `response_body` 173 | section, this test case fails. We have another output data section, `error_code`, 174 | which specifies its value on the same line of the section name. We see 175 | that a colon character is used to separate the section name and values. 176 | Obviously, the `error_code` section specifies the expected HTTP response 177 | status code, which is 200. 178 | 179 | Empty lines around data sections are always discarded by `Test::Nginx::Socket`. 180 | Thus the test block above can be rewritten as below without changing its 181 | meaning. 182 | 183 | [source,test-base] 184 | ---- 185 | === TEST 1: hello, world 186 | This is just a simple demonstration of the 187 | echo directive provided by ngx_http_echo_module. 188 | 189 | --- config 190 | location = /t { 191 | echo "hello, world!"; 192 | } 193 | 194 | --- request 195 | GET /t 196 | 197 | --- response_body 198 | hello, world! 199 | 200 | --- error_code: 200 201 | ---- 202 | 203 | Some users prefer this style for aesthetic reasons. We are free to choose 204 | whatever form you like. 205 | 206 | There are also some special data sections that specify neither input nor 207 | output. They are just used to _control_ how test blocks are run. For example, 208 | the `ONLY` section makes _only_ the current test block in the current test 209 | file run and all the other test blocks are skipped. This is extremely useful 210 | for running an individual test block in any given file, which is a common 211 | requirement while debugging a particular test failure. Also, the special 212 | `SKIP` section can skip running the containing test block unconditionally, 213 | handy for preparing test cases for future features without introducing 214 | any expected test failures. We will visit more such "control sections" 215 | in later sections. 216 | 217 | We shall see, in a later section, that the user can define their own data 218 | sections or extend existing ones by writing a little bit of custom Perl 219 | code to satisfy more complicated testing requirements. 220 | 221 | ==== Section Filters 222 | 223 | Data sections can take one or more _filters_. Filters are handy when you 224 | want to adjust or convert the section values in certain ways. 225 | 226 | Syntactically, filters are specified right after the section name with 227 | at least one space character as the separator. Multiple filters are also 228 | separated by spaces and are applied in the order they are written. 229 | 230 | `Test::Nginx::Socket` provides many filters for your convenience. Consider 231 | the following data section from the aforementioned test block. 232 | 233 | [source,test-base] 234 | ---- 235 | --- error_code: 200 236 | ---- 237 | 238 | If we want to place the section value, 200, in a separate line, like below, 239 | 240 | [source,test-base] 241 | ---- 242 | --- error_code 243 | 200 244 | ---- 245 | 246 | then the section value would contain a trailing new line, which leads to 247 | a test failure. This is because the one-line form always excludes the trailing 248 | new-line character while the multi-line form always includes one. To explicitly 249 | exclude the trailing new-line in the multi-line form, we can employ the 250 | `chomp` filter, as in 251 | 252 | [source,test-base] 253 | ---- 254 | --- error_code chomp 255 | 200 256 | ---- 257 | 258 | Now it has exactly the same semantics as the previous one-line form. 259 | 260 | Some filters have a more dramatic effect on the section values. For instance, 261 | the `eval` filter evaluates the section value as arbitrary Perl code, and 262 | the Perl value resulting from the execution will be used as the final section 263 | value. The following section demonstrates using the `eval` filter to produce 264 | 4096 a's: 265 | 266 | [source,test-base] 267 | ---- 268 | --- response_body eval 269 | "a" x 4096 270 | ---- 271 | 272 | The original value of the `response_body` section above is a Perl expression 273 | where the `x` symbol is a Perl operator is used to construct a string that 274 | repeats the string specified as the left-hand-side N times where N is specified 275 | by the right-hand-side. The resulting 4096-byte Perl string after evaluating 276 | this expression dictated by the `eval` filter will be used as the final 277 | section value for comparison with the actual response body data. It is 278 | obvious that use of the `eval` filter and a Perl expression here is much 279 | more readable and manageable than directly pasting that 4096-byte string 280 | in the test block. 281 | 282 | As with data sections, the user can also define their own filters, as we 283 | shall see in a later section. 284 | 285 | === A Complete Example 286 | 287 | We can conclude this section by a complete test file example given below, 288 | with both the prologue part and the data part. 289 | 290 | [source,test-base] 291 | ---- 292 | use Test::Nginx::Socket 'no_plan'; 293 | 294 | run_tests(); 295 | 296 | __DATA__ 297 | 298 | === TEST 1: hello, world 299 | This is just a simple demonstration of the 300 | echo directive provided by ngx_http_echo_module. 301 | --- config 302 | location = /t { 303 | echo "hello, world!"; 304 | } 305 | --- request 306 | GET /t 307 | --- response_body 308 | hello, world! 309 | --- error_code: 200 310 | ---- 311 | 312 | We will see how to actually run such test files in the next section. 313 | 314 | NOTE: The test file layout described in this section is exactly the same 315 | as the test files based on other test frameworks derived from `Test::Base`, 316 | the superclass of `Test::Nginx::Socket`, except those specialized test 317 | sections and specialized Perl functions defined only in `Test::Nginx::Socket`. 318 | All the `Test::Base` derivatives share the same basic layout and syntax. 319 | They proudly inherit the same veins of blood. 320 | -------------------------------------------------------------------------------- /testing/test-modes.adoc: -------------------------------------------------------------------------------- 1 | == Test Modes 2 | 3 | One unique feature of `Test::Nginx` is that it allows running the same 4 | test suite in wildly different ways, or test modes, by just configuring 5 | some system environment variables. Different test modes have different 6 | focuses and may find different categories of bugs or performance issues 7 | in 8 | the applications being tested. The data driven nature of the test framework 9 | makes it easy to add new test modes without changing the user test files 10 | at all. And it is also possible to combine different test modes to form 11 | new (hybrid) test modes. The capability of running the same test suite 12 | in many different ways helps squeezing more value out of the tests 13 | we already have. 14 | 15 | This section will iterate through various different test modes supported 16 | by `Test::Nginx::Socket` and their corresponding system environment variables 17 | used to enable or control them. 18 | 19 | === Benchmark Mode 20 | 21 | `Test::Nginx` has built-in support for performance testing or benchmarking. 22 | It can invoke external load testing tools like `ab` and `weighttp` to load 23 | each test case as hard as possible. 24 | 25 | To enable this benchmark testing mode, you can specify the `TEST_NGINX_BENCHMARK` 26 | system environment variable before running the `prove` command. For example, 27 | 28 | [source,bash] 29 | ---- 30 | export TEST_NGINX_BENCHMARK='2000 2' 31 | prove t/foo.t 32 | ---- 33 | 34 | This will run all the test cases in `t/foo.t` in benchmark mode. In particular, 35 | the first number, `2000` in the environment variable value indicates the 36 | total number of requests used to flood the server while the second number, 37 | `2`, means that the number of concurrent connections the client will use. 38 | 39 | If the test case uses an HTTP 1.1 request (which is the default), then 40 | the test scaffold 41 | will invoke the `weighttp` tool. If it is an HTTP 1.0 request, then the 42 | test scaffold invokes the `ab` tool. 43 | 44 | This test mode requires the `unbuffer` command-line utility from the `expect` 45 | package, as well as the `ab` and `weighttp` load testing tools. On Ubuntu/Debian 46 | systems, we can install most of the dependencies with the command 47 | 48 | [source,bash] 49 | ---- 50 | sudo apt-get install expect apache2-utils 51 | ---- 52 | 53 | You may need to build and install `weighttp` from source on Ubuntu/Debian 54 | yourself due to the lack of the Debian package. 55 | 56 | For the Mac OS X system, on the other hand, we can use `homebrew` to install 57 | it like this: 58 | 59 | [source,bash] 60 | ---- 61 | brew install expect weighttp 62 | ---- 63 | 64 | Now let's consider the following example. 65 | 66 | .t/hello.t 67 | [source,test-base] 68 | ---- 69 | use Test::Nginx::Socket 'no_plan'; 70 | 71 | run_tests(); 72 | 73 | __DATA__ 74 | 75 | === TEST 1: hello world 76 | --- config 77 | location = /hello { 78 | return 200 "hello world\n"; 79 | } 80 | --- request 81 | GET /hello 82 | --- response_body 83 | hello world 84 | ---- 85 | 86 | Then we run this test file in the benchmark mode, like this: 87 | 88 | [source,bash] 89 | ---- 90 | export TEST_NGINX_BENCHMARK='200000 2' 91 | prove t/hello.t 92 | ---- 93 | 94 | The output should look like this: 95 | 96 | .... 97 | t/hello.t .. TEST 1: hello world 98 | weighttp -c2 -k -n200000 http://127.0.0.1:1984/hello 99 | weighttp - a lightweight and simple webserver benchmarking tool 100 | 101 | starting benchmark... 102 | spawning thread #1: 2 concurrent requests, 200000 total requests 103 | progress: 10% done 104 | progress: 20% done 105 | progress: 30% done 106 | progress: 40% done 107 | progress: 50% done 108 | progress: 60% done 109 | progress: 70% done 110 | progress: 80% done 111 | progress: 90% done 112 | progress: 100% done 113 | 114 | finished in 2 sec, 652 millisec and 752 microsec, 75393 req/s, 12218 kbyte/s 115 | requests: 200000 total, 200000 started, 200000 done, 200000 succeeded, 0 failed, 0 errored 116 | status codes: 200000 2xx, 0 3xx, 0 4xx, 0 5xx 117 | traffic: 33190005 bytes total, 30790005 bytes http, 2400000 bytes data 118 | t/hello.t .. ok 119 | All tests successful. 120 | Files=1, Tests=2, 3 wallclock secs ( 0.01 usr 0.00 sys + 0.33 cusr 1.47 csys = 1.81 CPU) 121 | Result: PASS 122 | .... 123 | 124 | The most important line in this: 125 | 126 | .... 127 | finished in 2 sec, 652 millisec and 752 microsec, 75393 req/s, 12218 kbyte/s 128 | .... 129 | 130 | We can see that this test case can achieve 75393 requests per second and 131 | 12218 KB per second. Not bad for a single NGINX worker process! 132 | 133 | It is also important to keep an eye on failed requests. We surely do not 134 | care about the performance of error pages. We can get the number of error 135 | responses by checking the following output lines: 136 | 137 | .... 138 | requests: 200000 total, 200000 started, 200000 done, 200000 succeeded, 0 failed, 0 errored 139 | status codes: 200000 2xx, 0 3xx, 0 4xx, 0 5xx 140 | .... 141 | 142 | We are glad to see that all our requests succeeded in this run. 143 | 144 | If we want to benchmark the performance of multiple NGINX worker processes 145 | so as to utilize multiple CPU cores, then we can add the following lines 146 | to the test file prologue, _before_ the line `run_tests()`: 147 | 148 | [source,perl] 149 | ---- 150 | master_on(); 151 | workers(4); 152 | ---- 153 | 154 | This way we can have 4 NGINX worker processes sharing the load. 155 | 156 | Behind the scenes, the test scaffold assembles the command line involving 157 | `weighttp` from the test block specification, in this case, the command 158 | line looks like this: 159 | 160 | [source,bash] 161 | ---- 162 | weighttp -c2 -k -n200000 http://127.0.0.1:1984/hello 163 | ---- 164 | 165 | There exists complicated cases, however, where the test scaffold fails 166 | to derive the exact command line equivalent. 167 | 168 | We can also enforce HTTP 1.0 requests in our test block by appending the 169 | "HTTP/1.0" string to the value of the `--- request` section: 170 | 171 | .... 172 | --- request 173 | GET /hello HTTP/1.0 174 | .... 175 | 176 | In this case, the test scaffold will invoke the `ab` tool to flood the 177 | matching HTTP 1.0 request. The output might look like this: 178 | 179 | .... 180 | t/hello.t .. TEST 1: hello world 181 | ab -r -d -S -c2 -k -n200000 http://127.0.0.1:1984/hello 182 | This is ApacheBench, Version 2.3 <$Revision: 1706008 $> 183 | Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ 184 | Licensed to The Apache Software Foundation, http://www.apache.org/ 185 | 186 | Benchmarking 127.0.0.1 (be patient) 187 | Completed 20000 requests 188 | Completed 40000 requests 189 | Completed 60000 requests 190 | Completed 80000 requests 191 | Completed 100000 requests 192 | Completed 120000 requests 193 | Completed 140000 requests 194 | Completed 160000 requests 195 | Completed 180000 requests 196 | Completed 200000 requests 197 | Finished 200000 requests 198 | 199 | 200 | Server Software: openresty/1.9.15.1 201 | Server Hostname: 127.0.0.1 202 | Server Port: 1984 203 | 204 | Document Path: /hello 205 | Document Length: 12 bytes 206 | 207 | Concurrency Level: 2 208 | Time taken for tests: 3.001 seconds 209 | Complete requests: 200000 210 | Failed requests: 0 211 | Keep-Alive requests: 198000 212 | Total transferred: 33190000 bytes 213 | HTML transferred: 2400000 bytes 214 | Requests per second: 66633.75 [#/sec] (mean) 215 | Time per request: 0.030 [ms] (mean) 216 | Time per request: 0.015 [ms] (mean, across all concurrent requests) 217 | Transfer rate: 10798.70 [Kbytes/sec] received 218 | 219 | Connection Times (ms) 220 | min avg max 221 | Connect: 0 0 1 222 | Processing: 0 0 132 223 | Waiting: 0 0 132 224 | Total: 0 0 132 225 | t/hello.t .. ok 226 | All tests successful. 227 | Files=1, Tests=2, 4 wallclock secs ( 0.02 usr 0.00 sys + 0.51 cusr 1.39 csys = 1.92 CPU) 228 | Result: PASS 229 | .... 230 | 231 | The most important output lines, in this case, are 232 | 233 | .... 234 | Failed requests: 0 235 | Requests per second: 66633.75 [#/sec] (mean) 236 | Transfer rate: 10798.70 [Kbytes/sec] received 237 | .... 238 | 239 | Different hardware and operating systems may lead to very different results. 240 | Therefore, it generally does not make sense at all to directly compare 241 | numbers obtained from different machines and systems. 242 | 243 | Clever users can write some external scripts to record and compare these 244 | numbers across different runs, so as to keep track of performance changes 245 | in the web server or application. Such comparison scripts must take into 246 | account any measurement errors and any disturbances from other processes 247 | running in the same system. 248 | 249 | Performance benchmark is a large topic and we gives it a more detailed 250 | treatment in a dedicated chapter. 251 | 252 | === HUP Reload Mode 253 | 254 | By default, the test scaffold always starts a fresh instance of the NGINX 255 | server right before running each individual test block and stops the server 256 | right after 257 | the checks of the current test block are all done. This ensures that there 258 | is no side effects among test blocks, especially those running successively. 259 | But 260 | it can also be desired to ensure everything also works fine when the NGINX 261 | server is just reloading its configurations without a full server restart. 262 | Such configuration 263 | reloading is usually done via sending the `HUP` signal to the master process 264 | of NGINX. So we usually call it "HUP reload". 265 | 266 | NOTE: On some non-UNIX-style operating systems like Microsoft Windows, 267 | there is no such things as signals. In such platforms, NGINX users usually 268 | use the `-s reload` 269 | command-line option of the `nginx` executable to do the same thing. It 270 | should be noted, however, the use of 271 | the `-s reload` option has one side effect that can be annoying: it loads 272 | the nginx configuration _twice_ instead of just once, which may incur unnecessary 273 | initialization overhead. Therefore, we should always use the `HUP` signal 274 | instead of `-s reload` whenever possible. 275 | 276 | One example of OpenResty features that behaves different upon HUP reload 277 | than server 278 | restart is the shared dictionary mechanism (https://github.com/openresty/lua-nginx-module/#lua_shared_dict[lua_shared_dict]) 279 | that does not wipe out 280 | any existing data in the shared memory storage during HUP reload. When 281 | testing 282 | this feature or application code relying on this feature, it is wise to 283 | test how it behaves upon HUP reload. We saw in the past that some 3rd-party 284 | NGINX C modules dealing with shared memory, for example, have bugs across 285 | HUP reloads, like nasty memory leaks. 286 | 287 | `Test::Nginx` has built-in support for the HUP reload test mode, which 288 | can be enabled by specifying the `TEST_NGINX_USE_HUP=1` environment: 289 | 290 | [source,bash] 291 | ---- 292 | export TEST_NGINX_USE_HUP=1 293 | ---- 294 | 295 | Then we can run our existing test suite as usual but now HUP signal is 296 | used by the test scaffold to reload the NGINX configuration specified by 297 | different test blocks. 298 | The NGINX server will only be automatically shut down when the test harness 299 | finishes running each test file. 300 | 301 | NOTE: We can even avoid the automatic server shutdown behavior upon test 302 | file completion by specifying the `TEST_NGINX_NO_CLEAN=1` environment. 303 | See the later section 304 | <> for more details. 305 | 306 | UNIX signals like `HUP` usually work asynchronously. Thus, there is a delay 307 | between the test scaffold finishes sending the `HUP` signal to the NGINX 308 | server and the NGINX server forks off a new worker process using the newly 309 | loaded configuration and starts accepting new connections with the new 310 | worker. For this reason, there is a (small) chance that the request of 311 | a test block is served by an NGINX worker process still using the configuration 312 | specified by the previous test block. Although `Test::Nginx` tries hard 313 | to wait as long as it can with some simple heuristics, some test blocks 314 | may still experience some intermittent test failures due to the mismatch 315 | of the NGINX configuration. Be prepared for such false positives when using 316 | the HUP reload testing mode. This is also one of the reasons why the HUP 317 | reload mode is not the default. We hope this issue can be further improved 318 | in the future. 319 | 320 | Another limitation with the HUP reload mode is that HUP reloads only happen 321 | upon test block boundaries. There are cases where it is desired to issue 322 | HUP reload in the middle of a test block. We can achieve that by using 323 | some custom Lua code in your test block to send a `HUP` signal yourself, 324 | as in 325 | 326 | [source,lua] 327 | ---- 328 | local f = assert(io.open("t/servroot/logs/nginx.pid", "r")) 329 | local master_pid = assert(f:read()) 330 | assert(f:close()) 331 | assert(os.execute("kill -HUP " .. master_pid) == 0) 332 | ---- 333 | 334 | === Valgrind Mode 335 | 336 | One of the biggest enemies in web servers or web applications that are 337 | supposed 338 | to run in a 24x7 manner is memory issues. Memory issues include memory 339 | leaks, memory invalid reads (like reading beyond the buffer boundary), 340 | and memory invalid writes (like buffer overflow). In case of memory leaks, 341 | the processes can take up more and more memory in the system and eventually 342 | exhaust all the physical memory available, leading to unresponsive systems 343 | or triggering the system to start killing processes with force. Memory 344 | invalid accesses, on the other hand, can lead to process crashes (like 345 | segmentation faults), or worse, leading to nondeterminism in the process' 346 | s behavior (like giving out wrong results). 347 | 348 | http://valgrind.org/[Valgrind] is a powerful tool for programmers to detect 349 | a wide range of memory 350 | issues, including many memory leaks and many memory invalid accesses. This 351 | is usually for debugging lower level code like the OpenResty core (including 352 | the NGINX core), the Lua or LuaJIT VM, as well as those Lua libraries involved 353 | with C and/or FFI. Plain Lua code without using FFI is considered "safe" 354 | and is not subject to most of the memory issues. 355 | 356 | NOTE: Plain Lua code without using FFI can still contain bugs that result 357 | in memory leaks, like inserting new keys into a globally shared Lua table 358 | without control or appending a string to a global Lua string infinitely. 359 | Such memory leaks, however, cannot be detected by Valgrind since it is 360 | managed by Lua or LuaJIT's garbage collector. 361 | 362 | `Test::Nginx` provides a testing mode that can automatically use Valgrind 363 | to run the existing tests and check if there is any memory issues that 364 | can be caught by Valgrind. This test mode is called "Valgrind mode". To 365 | enable this mode, just set the environment `TEST_NGINX_USE_VALGRIND`, as 366 | in 367 | 368 | [source,bash] 369 | ---- 370 | export TEST_NGINX_USE_VALGRIND=1 371 | ---- 372 | 373 | Then just run the test files as usual. 374 | 375 | Let's consider the following example. 376 | 377 | [source,test-base] 378 | ---- 379 | === TEST 1: C strlen() 380 | --- config 381 | location = /t { 382 | content_by_lua_block { 383 | local ffi = require "ffi" 384 | local C = ffi.C 385 | 386 | if not pcall(function () return C.strlen end) then 387 | ffi.cdef[[ 388 | size_t strlen(const char *s); 389 | ]] 390 | end 391 | 392 | local buf = ffi.new("char[3]", {48, 49, 0}) 393 | local len = tonumber(C.strlen(buf)) 394 | ngx.say("strlen: ", len) 395 | } 396 | } 397 | --- request 398 | GET /t 399 | --- response_body 400 | strlen: 2 401 | --- no_error_log 402 | [error] 403 | ---- 404 | 405 | Here we use the `ffi.new` API to allocate a C string buffer of 3 bytes 406 | long and initialize the buffer with the bytes 48, 49, and 0, in the decimal 407 | ASCII code. Then we call the standard C function `strlen` via the `ffi.C` 408 | API with our C string buffer. 409 | 410 | It is worth noting that we need to first declare the `strlen` function 411 | prototype via the `ffi.cdef` API. Since we declare the C function in the 412 | request handler (`content_by_lua_block`), we should only declare it once 413 | instead of upon every request. To achieve that, we use a Lua `if` statement 414 | to check if the symbol `strlen` is already declared (when `strlen` is not 415 | declared or defined, the Lua expression `C.strlen` would throw out a Lua 416 | exception, which can make the `pcall` call fail). 417 | 418 | This example contains no memory issues since we properly initialize our 419 | C string buffer by setting the null terminator character (`\0`) at end 420 | of our C string. The C function `strlen` should correctly report back the 421 | length of the string, which is `2`, without reading beyond our buffer boundary. 422 | Now we run this test file with the Valgrind mode enabled using the default 423 | OpenResty installation's `nginx`: 424 | 425 | [source,bash] 426 | ---- 427 | export TEST_NGINX_USE_VALGRIND=1 428 | export PATH=/usr/local/openresty/nginx/sbin:$PATH 429 | 430 | prove t/a.t 431 | ---- 432 | 433 | There should be a lot of output. The first few lines should look like this: 434 | 435 | .... 436 | t/a.t .. TEST 1: C strlen() 437 | ==7366== Invalid read of size 4 438 | ==7366== at 0x546AE31: str_fastcmp (lj_str.c:57) 439 | ==7366== by 0x546AE31: lj_str_new (lj_str.c:166) 440 | ==7366== by 0x547903C: lua_setfield (lj_api.c:903) 441 | ==7366== by 0x4CAD18: ngx_http_lua_cache_store_code (ngx_http_lua_cache.c:119) 442 | ==7366== by 0x4CAB25: ngx_http_lua_cache_loadbuffer (ngx_http_lua_cache.c:187) 443 | ==7366== by 0x4CB61A: ngx_http_lua_content_handler_inline (ngx_http_lua_contentby.c:300) 444 | .... 445 | 446 | Ouch! Valgrind reports a memory invalid read error. Fortunately it is just 447 | a false positive due to the optimization inside the LuaJIT VM when it is 448 | trying to create a new Lua string. The LuaJIT code repository maintains 449 | a file named https://github.com/LuaJIT/LuaJIT/blob/master/src/lj.supp[lj.supp] 450 | that lists all the known Valgrind false positives 451 | that can be used to suppress these messages. We can simply copy that file 452 | over and rename it to `valgrind.suppress` in the current working directory. 453 | Then `Test::Nginx` will automatically feed this `valgrind.suppress` file 454 | into Valgrind while running the tests in Valgrind mode. Let's try that: 455 | 456 | [source,bash] 457 | ---- 458 | cp -i /path/to/luajit-2.0/src/lj.supp ./valgrind.suppress 459 | prove t/a.t 460 | ---- 461 | 462 | This time, the test scaffold is calmed: 463 | 464 | .... 465 | t/a.t .. TEST 1: C strlen() 466 | t/a.t .. ok 467 | All tests successful. 468 | Files=1, Tests=3, 2 wallclock secs ( 0.01 usr 0.00 sys + 1.51 cusr 0.06 csys = 1.58 CPU) 469 | Result: PASS 470 | .... 471 | 472 | We might encounter other Valgrind false positives like some of those in 473 | the NGINX core or the OpenSSL library. We can add those to the `valgrind.suppress` 474 | file as needed. The `Test::Nginx` test scaffold always outputs suppression 475 | rules that can be added directly to the suppression file. For the example 476 | above, the last few lines of the output are like below. 477 | 478 | .... 479 | { 480 | 481 | Memcheck:Addr4 482 | fun:str_fastcmp 483 | fun:lj_str_new 484 | fun:lua_setfield 485 | fun:ngx_http_lua_cache_store_code 486 | fun:ngx_http_lua_cache_loadbuffer 487 | fun:ngx_http_lua_content_handler_inline 488 | fun:ngx_http_core_content_phase 489 | fun:ngx_http_core_run_phases 490 | fun:ngx_http_process_request 491 | fun:ngx_http_process_request_line 492 | fun:ngx_epoll_process_events 493 | fun:ngx_process_events_and_timers 494 | fun:ngx_single_process_cycle 495 | fun:main 496 | } 497 | t/a.t .. ok 498 | All tests successful. 499 | Files=1, Tests=3, 2 wallclock secs ( 0.01 usr 0.00 sys + 1.47 cusr 0.07 csys = 1.55 CPU) 500 | Result: PASS 501 | .... 502 | 503 | The suppression rule generated is the stuff between the curly braces (including 504 | the curly braces themselves): 505 | 506 | .... 507 | { 508 | 509 | Memcheck:Addr4 510 | fun:str_fastcmp 511 | fun:lj_str_new 512 | fun:lua_setfield 513 | fun:ngx_http_lua_cache_store_code 514 | fun:ngx_http_lua_cache_loadbuffer 515 | fun:ngx_http_lua_content_handler_inline 516 | fun:ngx_http_core_content_phase 517 | fun:ngx_http_core_run_phases 518 | fun:ngx_http_process_request 519 | fun:ngx_http_process_request_line 520 | fun:ngx_epoll_process_events 521 | fun:ngx_process_events_and_timers 522 | fun:ngx_single_process_cycle 523 | fun:main 524 | } 525 | .... 526 | 527 | We could have simply copied and pasted this rule into the `valgrind.suppress` 528 | file. It is worth mentioning however, we can make this rule more general 529 | to exclude the C function frames belonging to the NGINX core and the ngx_lua 530 | module (near the bottom of the rule) since this false positive is related 531 | to LuaJIT only. 532 | 533 | Let's continue our experiment with our current example. Now we edit our 534 | test case and change the following line 535 | 536 | [source,lua] 537 | ---- 538 | local buf = ffi.new("char[3]", {48, 49, 0}) 539 | ---- 540 | 541 | to 542 | 543 | [source,lua] 544 | ---- 545 | local buf = ffi.new("char[3]", {48, 49, 50}) 546 | ---- 547 | 548 | That is, we replace the null character (with ASCII code 0) to a non-null 549 | character whose ASCII code is 50. This change makes our C string buffer 550 | lacks any null terminators and thus calling `strlen` on it will result 551 | in memory reads beyond our buffer boundary. 552 | 553 | Unfortunately running this edited test file fail to yield any Valgrind 554 | error reports regarding this memory issue: 555 | 556 | .... 557 | t/a.t .. TEST 1: C strlen() 558 | t/a.t .. 1/? 559 | # Failed test 'TEST 1: C strlen() - response_body - response is expected (repeated req 0, req 0)' 560 | # at /home/agentzh/git/lua-nginx-module/../test-nginx/lib/Test/Nginx/Socket.pm line 1346. 561 | # got: "strlen: 4\x{0a}" 562 | # length: 10 563 | # expected: "strlen: 2\x{0a}" 564 | # length: 10 565 | # strings begin to differ at char 9 (line 1 column 9) 566 | # Looks like you failed 1 test of 3. 567 | .... 568 | 569 | The response body check fails as expected. This time `strlen` returns 4, 570 | which is larger than our buffer size, 3. This is a clear indication of 571 | memory buffer over-read. So why does Valgrind fail to catch this? 572 | 573 | To answer this question, we need some knowledge about how LuaJIT allocates 574 | memory. By default, LuaJIT uses its own memory allocator atop the system 575 | allocator (usually provided by the standard C library). For performance 576 | reasons, LuaJIT pre-allocates large memory blocks than request. Because 577 | Valgrind has no knowledge about LuaJIT's own allocator and Lua user-level 578 | buffer boundary definitions, it can be cheated and can get confused. 579 | 580 | To remove this limitation, we can enforce LuaJIT to use the system allocator 581 | instead of its own. To achieve this, we need build LuaJIT with special 582 | compilation options like below. 583 | 584 | [source,bash] 585 | ---- 586 | make CCDEBUG=-g XCFLAGS='-DLUAJIT_USE_VALGRIND -DLUAJIT_USE_SYSMALLOC' 587 | ---- 588 | 589 | The most important option is `-DLUAJIT_USE_SYSMALLOC` which forces LuaJIT 590 | to use the system allocator. The other options are important for our debugging 591 | as well, for example, the `CCDEBUG=-g` option is to enable debug symbols 592 | in the LuaJIT binary while `-DLUAJIT_USE_VALGRIND` enables some other special 593 | collaborations with Valgrind inside the LuaJIT VM. 594 | 595 | If we are using the OpenResty bundle, we can simply build another special 596 | version of OpenResty like below: 597 | 598 | .... 599 | ./configure \ 600 | --prefix=/opt/openresty-valgrind \ 601 | --with-luajit-xcflags='-DLUAJIT_USE_VALGRIND -DLUAJIT_USE_SYSMALLOC' \ 602 | --with-debug \ 603 | -j4 604 | make -j4 605 | sudo make install 606 | .... 607 | 608 | This will build and install a special debug version of OpenResty for Valgrind 609 | checks to the file system location `/opt/openresty-valgrind`. 610 | 611 | NOTE: There is some other LuaJIT special build options that can further 612 | help us, like `-DLUA_USE_APICHECK` and `-DLUA_USE_ASSERT`. But they are 613 | beyond the scope of our current example. 614 | 615 | Now let's try running our previous buggy example with this special OpenResty 616 | and Valgrind: 617 | 618 | [source,bash] 619 | ---- 620 | export TEST_NGINX_USE_VALGRIND=1 621 | export PATH=/opt/openresty-valgrind/nginx/sbin:$PATH 622 | 623 | prove t/a.t 624 | ---- 625 | 626 | This time Valgrind succeeds in catching the memory bug! 627 | 628 | .... 629 | t/a.t .. TEST 1: C strlen() 630 | ==8128== Invalid read of size 1 631 | ==8128== at 0x4C2BC34: strlen (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) 632 | ==8128== by 0x5467217: lj_vm_ffi_call (in /opt/luajit21sysm/lib/libluajit-5.1.so.2.1.0) 633 | ==8128== by 0x54B5DE7: lj_ccall_func (lj_ccall.c:1136) 634 | ==8128== by 0x54CAD45: lj_cf_ffi_meta___call (lib_ffi.c:230) 635 | ==8128== by 0x5465147: lj_BC_FUNCC (in /opt/luajit21sysm/lib/libluajit-5.1.so.2.1.0) 636 | ==8128== by 0x4C72BC: ngx_http_lua_run_thread (ngx_http_lua_util.c:1015) 637 | ==8128== by 0x4CB039: ngx_http_lua_content_by_chunk (ngx_http_lua_contentby.c:120) 638 | ... 639 | .... 640 | 641 | We omit the rest of the output for brevity. Here Valgrind reports an invalid 642 | read of one byte of memory in the C function `strlen`, which is exactly 643 | what we'd expect. Mission accomplished! 644 | 645 | NOTE: LuaJIT built with the system allocator should only be used with Valgrind 646 | only. On computer architectures like x86_64, such LuaJIT may not even start 647 | up. 648 | 649 | From this example, we can see how application-level memory allocation optimizations 650 | and management can compromise the effectiveness of Valgrind's memory issue 651 | detection. Similarly, the NGINX core also comes with its own memory allocator 652 | via "memory pools". Such memory pools tend to allocate page-sized memory 653 | blocks for small allocations and thus can also inversely affect Valgrind' 654 | s detection. OpenResty provides a https://github.com/openresty/no-pool-nginx[patch] 655 | for the NGINX core to disable the memory pool optimizations altogether. 656 | The easiest way to use the patch is to specify the `--with-no-pool-patch` 657 | option when running the `./configure` script while building OpenResty. 658 | 659 | NOTE: Since NGINX 1.9.13, NGINX provides a C macro `NGX_DEBUG_PALLOC` which 660 | when set can be used to achieve similar effect as OpenResty's "no-pool 661 | patch". But still the "no-pool patch" is much more aggressive and thorough 662 | and can help find more potential memory problems in NGINX related C code. 663 | 664 | This Valgrind mode is used by OpenResty developers on a daily basis and 665 | has helped locate countless memory manage bugs in the OpenResty C and Lua/FFI 666 | code base. Interestingly, this test mode also located memory issues in 667 | the official NGINX core and the official LuaJIT core. Unlike analyzing 668 | core dumps, Valgrind can almost always find the first scene of memory offends, 669 | studying the memory error reports can usually give rise to immediate code 670 | fixes. 671 | 672 | As with all the other tools, Valgrind has its own limitations and cannot 673 | find all the memory issues even when we carefully disable application level 674 | memory allocators as demonstrated above. For example, 675 | 676 | 1. memory issues on 677 | the C runtime stack cannot be caught by Valgrind (at least for Valgrind' 678 | s default memcheck tool). 679 | 2. Also, memory leaks in application-level resource 680 | managers cannot be detected. For example, memory leaks in NGINX's global 681 | memory pool won't get detected since NGINX always destroy all the memory 682 | pools upon process termination. Similarly, an ever growing Lua object managed 683 | by the Lua garbage collector (GC) won't get caught either, since the Lua 684 | VM always frees all its GC-managed objects. 685 | 686 | Understanding the weakness of the tool is as important as understanding 687 | its strengths. We shall see an alternative approach in the next section 688 | for detecting leaks in the application-level memory managers. 689 | 690 | NOTE: Google's https://github.com/google/sanitizers/wiki/AddressSanitizer[AddressSanitizer] 691 | tool can also be used to detect memory 692 | issues. As compared to Valgrind, it has the advantages of running much 693 | faster 694 | and can detect memory issues on the C runtime stack as well. Unfortunately 695 | it has its own limitations too. For example, it requires special C/C++ 696 | compiler options to rebuild all the related C code and C libraries for 697 | the best result. Also, it cannot find problems in dynamically generated 698 | machine code (like from a Just-in-Time compiler) or hand-written 699 | assembly code (like LuaJIT's Lua interpreter). Therefore, OpenResty developers 700 | use Valgrind much more often. 701 | 702 | === Naive Memory Leak Check Mode 703 | 704 | As we have seen from the previous section, Valgrind is great at detecting 705 | a wide range of memory leaks and memory invalid accesses. But Valgrind 706 | also suffers from limitations in detecting leaks in application-level memory 707 | managers such as garbage collectors (GC) and memory pools, which is also 708 | quite common in reality. To see this, let's consider the following simple 709 | example that leaks in LuaJIT's GC-managed memory. 710 | 711 | [source,test-base] 712 | ---- 713 | === TEST 1: 714 | --- config 715 | location = /t { 716 | content_by_lua_block { 717 | package.path = "/path/to/some/lib/?.lua;" .. package.path 718 | ngx.say("ok") 719 | } 720 | } 721 | --- request 722 | GET /t 723 | --- response_body 724 | ok 725 | --- no_error_log 726 | [error] 727 | ---- 728 | 729 | This example demonstrates a common mistake made by many OpenResty beginners. 730 | The `package.path` field specifies the search paths used by the `require` 731 | builtin function for loading pure Lua modules. This string value is hooked 732 | up in the global Lua table `package` which has the same lifetime as the 733 | current Lua virtual machine (VM) instance. Since Lua VM instances usually 734 | have the same lifetime as NGINX worker processes (unless the `lua_code_cache` 735 | directive is turned off in `nginx.conf`), prepending a new string to the 736 | value of `package.path` in a request handler like `content_by_lua_block` 737 | apparently results in a memory leak. 738 | 739 | Unfortunately Valgrind cannot find this leak at all since the leak happens 740 | in the GC-managed memory inside the Lua VM because all such leaked memory 741 | will always get released upon GC destruction (or VM destruction) before 742 | the 743 | current process exits, which fools Valgrind to think that there is no leaks 744 | at all. Interested readers can try running this example with the "Valgrind 745 | test mode" as explained in the previous section. 746 | 747 | To address this limitation of Valgrind, `Test::Nginx::Socket` introduces 748 | a new test mode called "naive memory leak check mode", or just "check leak 749 | mode" for short. In this mode, the test scaffold performs the following 750 | things: 751 | 752 | 1. loads the NGINX server with many of the test request specified in the 753 | test block, in a way similar to the "benchmark test mode" we discussed 754 | earlier, 755 | 2. and at the same time, periodically polls and records the memory footprint 756 | of the NGINX worker process with the system command `ps`, 757 | 3. and finally analyzes the memory usage data points collected in 2) by 758 | finds the slope (`k`) of a line that best fits those data points. 759 | 760 | To make use of this mode, just specify the `TEST_NGINX_CHECK_LEAK=1` environment, 761 | before running existing test files, as in 762 | 763 | [source,bash] 764 | ---- 765 | export TEST_NGINX_CHECK_LEAK=1 766 | prove t/a.t 767 | ---- 768 | 769 | Assuming the `t/a.t` test file contains the test block example given above, 770 | we should get an output similar to the following. 771 | 772 | .... 773 | t/a.t .. TEST 1: 774 | LeakTest: [3740 3756 3620 3624 4180 3808 4044 4240 4272 4888 3876 3520 4516 775 | 4368 5216 4796 4420 4508 4068 5336 5220 3888 4196 4544 4100 3696 5028 5080 776 | 4580 3936 5236 4308 5320 4748 5464 4032 5492 4996 4588 4932 4632 6388 5228 777 | 5516 4680 5348 5420 5964 5436 5128 5720 6324 5700 4948 4312 6208 5192 5268 778 | 5600 4144 6556 4248 5648 6612 4044 5408 5120 5120 5740 6048 6412 5636 6488 779 | 5184 6036 5436 5808 4904 4980 6772 5148 7160 6576 6724 5024 6768 7264 5540 780 | 5700 5284 5244 4512 5752 6752 6868 6064 4940 5636 6388 7468] 781 | LeakTest: k=22.6 782 | t/e.t .. ok 783 | All tests successful. 784 | Files=1, Tests=3, 6 wallclock secs ( 0.01 usr 0.01 sys + 0.61 cusr 1.68 csys = 2.31 CPU) 785 | Result: PASS 786 | .... 787 | 788 | The special output lines from this test mode have the prefix `LeakTest:`. 789 | The first such line lists all the data points for the memory footprint 790 | size in the unit of kilo bytes (KB), collected every 0.02 seconds. And 791 | the second line is the slope (`k`) of the data line that best fits these 792 | data points. And in this case, `k` equals to `22.6`. 793 | 794 | The slope of the line can usually serve as an indication for the speed 795 | of memory leaking. The larger the slope is, the faster the leak is. A 2-digit 796 | data line slope here is very likely an indication of memory 797 | leak. To be sure, we plot these data points in a graph using the `gnuplot` 798 | tool. 799 | 800 | image::../images/check-leak-pkg-path.png[] 801 | 802 | There are quite some fluctuations in the graph. This is due to how garbage 803 | collector normally behaves. It usually allocates page-sized or even larger 804 | memory blocks than actually requested for performance reasons and delays 805 | the release of unused memory blocks because of the sweep phase or something 806 | else. Still, it is clear that the memory usage is going up over all. 807 | 808 | We can try enforcing a full garbage collection cycle upon the entry of 809 | our request handler, like this: 810 | 811 | [source,nginx] 812 | ---- 813 | content_by_lua_block { 814 | collectgarbage() 815 | package.path = "/path/to/some/lib/?.lua;" .. package.path 816 | ngx.say("ok") 817 | } 818 | ---- 819 | 820 | This way we can ensure that there is _no_ memory garbage hanging around 821 | after the point we call the Lua builtin function `collectgarbage()`. 822 | 823 | Now the output looks like this: 824 | 825 | .... 826 | t/e.t .. TEST 1: 827 | LeakTest: [2464 2464 2360 2464 2232 2520 2380 2536 2440 2320 2300 2464 828 | 2576 2584 2540 2408 2608 2420 2596 2596 2332 2648 2660 2460 2680 2320 829 | 2688 2616 2332 2628 2408 2728 2716 2380 2752 2360 2768 2376 2372 2376 830 | 2732 2800 2808 2816 2464 2396 2668 2688 2848 2672 2412 2416 2536 2420 831 | 2424 2632 2904 2668 2912 2564 2724 2448 2932 2944 2856 2960 2616 2672 832 | 2976 2620 2984 2600 2808 2980 3004 2996 3236 3012 2724 3168 3072 3536 833 | 3260 3412 3028 2700 2480 3188 2808 3536 2640 3056 2764 3052 3440 3308 834 | 3064 2680 2828 3372] 835 | LeakTest: k=7.4 836 | t/e.t .. ok 837 | All tests successful. 838 | Files=1, Tests=3, 6 wallclock secs ( 0.02 usr 0.00 sys + 0.62 cusr 1.75 csys = 2.39 CPU) 839 | Result: PASS 840 | .... 841 | 842 | We can see this time, the slope of the best-fitting line is much smaller, 843 | but still much larger than 0. 844 | 845 | The line graph is now much smoother, as expected: 846 | 847 | image::../images/check-leak-pkg-path-gc.png[] 848 | 849 | And we can see that the line is still going upward relatively steadily 850 | over time. 851 | 852 | Large fluctuations and variations in the memory footprint may create noises 853 | in our data samples and even result in false positives. We already saw 854 | how big fluctuations may result in large data-fitting line slopes. It 855 | is usually a 856 | good idea to enforce full garbage collection cycles frequently to reduce 857 | such noises at least in GC-managed memory. The `collectgarbage()` function, 858 | however, is quite expensive in terms of CPU resources and may hurt the 859 | over-all performance very badly. Ensure you do not call it often (like 860 | in every request) in the "benchmark test mode" introduced above or even 861 | in production applications. 862 | 863 | In reality, this brute-force "check leak" test mode has helped catching 864 | quite a lot of real memory leaks in OpenResty's test suites over the years. 865 | Most of those leaks made their way around the Valgrind test mode since 866 | they happened in GC-managed memory or NGINX's memory pools. 867 | 868 | NOTE: The NGINX no-pool patch mentioned in the previous section does not help 869 | here since all the leaked memory blocks in the pool still get released 870 | before the process exits. 871 | 872 | Nevertheless, there exists one big drawback of this test mode. Unlike Valgrind, 873 | it cannot give any detailed information about the locations where leaks 874 | (may) happen. All it reports are just data samples and other metrics that 875 | verify just the _existence_ of a leak (at least to some extend). We shall 876 | see in a later chapter how we can use the "memory leak flame graphs" to 877 | overcome this limitation even for leaks and big swings in GC-managed or 878 | pool-managed memory. 879 | 880 | === Mockeagain Mode 881 | 882 | [[no-clean]] 883 | === Manual Debugging Mode 884 | 885 | === SystemTap Mode 886 | -------------------------------------------------------------------------------- /testing/test-nginx.adoc: -------------------------------------------------------------------------------- 1 | == Test::Nginx 2 | 3 | link:https://metacpan.org/pod/Test::Nginx[Test::Nginx] is a test framework 4 | that drives test cases written for any 5 | code running atop NGINX, and also, naturally, the NGINX core itself. It 6 | is written in Perl because of the rich testing facilities and toolchain 7 | already accumulated in the Perl world for years. Fortunately, the user 8 | does not really need to know Perl for writing test cases atop this scaffold 9 | since `Test::Nginx` provides a very simple notation to present the test 10 | cases in a specification-like format. 11 | 12 | The simple test specification format, or language, used in `Test::Nginx` 13 | is just a dialect of the more general testing language provided by the 14 | link:https://metacpan.org/pod/distribution/Test-Base/lib/Test/Base.pod[Test::Base] 15 | testing module in the Perl world. In fact, `Test::Nginx` is just a subclass 16 | of `Test::Base` in the sense of object-oriented programming. This means 17 | that all the features offered by `Test::Base` are available in `Test::Nginx` 18 | and `Test::Nginx` just provides handy primitives and notations that simplify 19 | testing in the NGINX and OpenResty context. The core idea of `Test::Base` 20 | is so useful that we have been using testing scaffolds based on `Test::Base` 21 | in many different projects even including Haskell programs and Linux kernel 22 | modules. `Test::Nginx` is such an example we created for the NGINX and 23 | OpenResty world. Detailed discussion of the `Test::Base` framework itself 24 | is beyond the scope of this book, but we will introduce the important features 25 | of `Test::Base` that are inherited by `Test::Nginx` in the later sections. 26 | 27 | `Test::Nginx` is distributed via link:http://www.cpan.org/[CPAN], the Comprehensive 28 | Perl Archive Network, just like most of the other Perl libraries. If you 29 | already have `perl` installed in your system (many Linux distributions 30 | ship with `perl` by default), then you can install `Test::Nginx` with the 31 | following simple command: 32 | 33 | [source,bash] 34 | ---- 35 | cpan Test::Nginx 36 | ---- 37 | 38 | For the first time that the `cpan` utility is run, you may be prompted 39 | to configure the `cpan` utility to fit your requirements. If you are unsure 40 | about those options, just choose the automatic configuration option (if 41 | available) or just accept all the default settings. 42 | 43 | `Test::Nginx` provides several different testing classes for different 44 | user requirements. The most frequently used one is `Test::Nginx::Socket`. 45 | The rest of this chapter will focus on this testing class and its subclasses. 46 | We will use the names `Test::Nginx` and `Test::Nginx::Socket` interchangeably 47 | from now on to mean the `Test::Nginx::Socket` test module and its subclasses, 48 | unless otherwise specified. 49 | 50 | // Alas. GitBook does not support sidebar blocks in its AsciiDoc render. 51 | // .Another Test::Nginx 52 | 53 | NOTE: There is actually another different testing scaffold called `Test::Nginx`, 54 | created by Maxim Dounin and maintained by the official NGINX team. That 55 | testing module is shipped with the link:http://hg.nginx.org/nginx-tests/file/tip[official 56 | NGINX test suite] and has no 57 | relationship with our `Test::Nginx` except that both of these are meant 58 | to test NGINX related code. The NGINX team's `Test::Nginx` requires the 59 | user to directly code in Perl to convey all the test cases, which means 60 | that tests written for their `Test::Nginx` are not data driven and requires 61 | decent knowledge about Perl programming. 62 | -------------------------------------------------------------------------------- /testing/test-suite-layout.adoc: -------------------------------------------------------------------------------- 1 | == Test Suite Layout 2 | 3 | Projects using `Test::Nginx` to drive their test suites usually have a 4 | common directory layout and common test file name patterns to organize 5 | their tests. This makes it easy for the user to reason about the 6 | location of the test suite in a project source tree and the usage of 7 | the tests. It is not really required, however, to use this common 8 | convention; it is just highly recommended. 9 | 10 | By convention, such projects have a `t/` directory at the root of their 11 | source tree where test files reside in. Each test file contains test cases 12 | that are closely related in some way and has the file extension `.t` to 13 | easily identify themselves as "test files". Below is the directory tree 14 | structure of a real-world test suite inside the 15 | link:https://github.com/openresty/headers-more-nginx-module[headers-more-nginx-module] 16 | project: 17 | 18 | .... 19 | └── t 20 |    ├── bug.t 21 |    ├── builtin.t 22 |    ├── eval.t 23 |    ├── input-conn.t 24 |    ├── input-cookie.t 25 |    ├── input-ua.t 26 |    ├── input.t 27 |    ├── phase.t 28 |    ├── sanity.t 29 |    ├── subrequest.t 30 |    ├── unused.t 31 |    └── vars.t 32 | .... 33 | 34 | When you have many test files, you can also group them further with sub-directories 35 | under `t/`. For example, in the link:https://github.com/openresty/lua-nginx-module[lua-nginx-module] 36 | project, we have sub-directores like `023-rewrite/` and `024-access/` under 37 | its `t/` directory. 38 | 39 | In essence, each `.t` file is a Perl script file runnable by either `perl` 40 | or Perl's universal test harness tool named link:http://perldoc.perl.org/prove.html[prove]. 41 | We usually use the 42 | `prove` command-line utility to run such `.t` files to obtain test results. 43 | Although `.t` files are Perl scripts per se, they usually do not have much 44 | Perl code at all. Instead, all of the test cases are declared as cleanly 45 | formatted "data" in these `.t` files. 46 | 47 | NOTE: The test suite layout convention we use here has also been used by the 48 | Perl community for many years. Because `Test::Nginx` is written in Perl 49 | and reuses Perl's testing toolchain, it makes sense for us to simply follow 50 | that convention in the NGINX and OpenResty world as well. 51 | -------------------------------------------------------------------------------- /testing/testing-erroneous-cases.adoc: -------------------------------------------------------------------------------- 1 | == Testing Erroneous Cases 2 | 3 | Most robust software invests heavily on error handling, and naturally test 4 | designers focus on corner cases and erroneous scenarios to maximize code 5 | coverage of the tests. 6 | 7 | The previous section introduces data sections provided by `Test::Nginx::Socket` 8 | for examining messages in the NGINX error log file, which is a powerful 9 | tool to check for errors in the tests. Sometimes we want to test more extreme 10 | cases like server startup failures, malformed responses, bad requests, 11 | and various kinds of timeout errors. 12 | 13 | === Expected Server Startup Failures 14 | 15 | Sometimes the NGINX server is expected to fail to start, like using an 16 | NGINX configuration directive in the wrong way or some hard prerequisites 17 | are not met in early initialization. If we want to test such cases, especially 18 | the error log messages generated for such failures, we could use the `must_die` 19 | data section in our test block to signal the test scaffold that the NGINX 20 | server is _expected_ to die upon startup in this very block. 21 | 22 | The following example tests the case of throwing a Lua exception in the 23 | context of `init_by_lua_block` of the `ngx_http_lua` module. 24 | 25 | [source,test-base] 26 | ---- 27 | === TEST 1: dying in init_by_lua_block 28 | --- http_config 29 | init_by_lua_block { 30 | error("I am dying!") 31 | } 32 | --- config 33 | --- must_die 34 | --- error_log 35 | I am dying! 36 | ---- 37 | 38 | The Lua code in `init_by_lua_block` runs in the NGINX master process during 39 | the NGINX configuration file loading process. Throwing out a Lua exception 40 | there aborts the NGINX startup process immediately. The occurrence of the 41 | `must_die` section tells the test scaffold to treat NGINX server startup 42 | failures as a test pass while a successful startup as a test failure. The 43 | `error_log` section there ensures that the server fails in the expected 44 | way, that is, due to the "I am dying!" exception. 45 | 46 | If we remove the `--- must_die` line from the test block above, then the 47 | test file won't even run to completion: 48 | 49 | .... 50 | t/a.t .. nginx: [error] init_by_lua error: init_by_lua:2: I am dying! 51 | stack traceback: 52 | [C]: in function 'error' 53 | init_by_lua:2: in main chunk 54 | Bailout called. Further testing stopped: TEST 1: dying in init_by_lua_block 55 | - Cannot start nginx using command 56 | "nginx -p .../t/servroot/ -c .../t/servroot/conf/nginx.conf > /dev/null". 57 | .... 58 | 59 | By default the test scaffold treats NGINX server startup failures as fatal 60 | errors in running the tests. The `must_die` section, however, turns such 61 | a failure into a normal test checkup. 62 | 63 | === Expected Malformed Responses 64 | 65 | HTTP responses should always be well-formed, but unfortunately the real 66 | world is complicated and there indeed exists cases where the responses 67 | can be malformed, like being truncated due to some unexpected causes. 68 | As a test designer, we always want to test such strange abnormal cases, 69 | among other things. 70 | 71 | Naturally, `Test::Nginx::Socket` treats malformed responses from the NGINX 72 | server as an error since it always does sanity checks on the responses 73 | it receives from the test server by default. But for test cases where we 74 | expect a malformed or truncated response sent from the server, we should 75 | explicitly tell the test scaffold to disable the response sanity check 76 | via the `ignore_response` data section. 77 | 78 | Consider the following example that closes the downstream connection immediately 79 | after sending out the first part of the response body. 80 | 81 | [source,test-base] 82 | ---- 83 | === TEST 1: aborting response body stream 84 | --- config 85 | location = /t { 86 | content_by_lua_block { 87 | ngx.print("hello") 88 | ngx.flush(true) 89 | ngx.exit(444) 90 | } 91 | } 92 | --- request 93 | GET /t 94 | --- ignore_response 95 | --- no_error_log 96 | [error] 97 | ---- 98 | 99 | The `ngx.flush(true)` call in the `content_by_lua_block` handler is to 100 | ensure that any response body data buffered by NGINX is indeed flushed 101 | out to the system socket send buffers, which also usually means flushing 102 | the output data to the client side for local sockets. Also, the `ngx.exit(444)` 103 | call is used to immediately close the current downstream connection so 104 | it just interrupts the response body stream in the HTTP 1.1 chunked encoding. 105 | The important part is the `--- ignore_response` line which tells the test 106 | scaffold not to complain about the interrupted response data stream. If 107 | the test block above goes without this line, we will see the following 108 | test failure while running `prove`: 109 | 110 | .... 111 | # Failed test 'TEST 1: aborting response body stream - no last chunk found - 5 112 | # hello 113 | # ' 114 | .... 115 | 116 | Obviously, the test scaffold complains about the lack of the "last chunk" 117 | used to indicate the end of the chunked encoded data stream. Because the 118 | server aborts the connection in the middle of response body data sending, 119 | there is no chance for the server to properly send well-formed response 120 | bodies in the chunked encoding. 121 | 122 | === Testing Timeout Errors 123 | 124 | Timeout errors are one of the most common network issues in the real world. 125 | Timeout might happen due to many reasons, like packet dropping on the wire 126 | or on the other end, connectivity problems, and other expensive operations 127 | blocking the event loop. Most of applications want to ensure 128 | they have a timeout protection that prevents them from waiting for too 129 | long. 130 | 131 | Testing and emulating timeout errors are often tricky in a self-contained 132 | unit test framework since most of the network traffic initiated by the 133 | test 134 | cases are local only, that is, going through the local "loopback" device 135 | that has perfect latency and throughput. We will examine some of the tricks 136 | that can be used to reliably emulate various different kinds of timeout 137 | errors in the test suite. 138 | 139 | ==== Connecting Timeouts 140 | 141 | Connecting timeouts in the context of the TCP protocol are easiest to emulate. 142 | Just point the connecting target to a remote address that always drops 143 | any incoming (`SYN`) packets via a firewall rule or something similar. 144 | We provide such a "black-hole service" at the port 12345 of the `agentzh.org` 145 | host. You can make use of it if your test running environment allows public 146 | network access. Consider the following test case. 147 | 148 | [source,test-base] 149 | ---- 150 | === TEST 1: connect timeout 151 | --- config 152 | resolver 8.8.8.8; 153 | resolver_timeout 1s; 154 | 155 | location = /t { 156 | content_by_lua_block { 157 | local sock = ngx.socket.tcp() 158 | sock:settimeout(100) -- ms 159 | local ok, err = sock:connect("agentzh.org", 12345) 160 | if not ok then 161 | ngx.log(ngx.ERR, "failed to connect: ", err) 162 | return ngx.exit(500) 163 | end 164 | ngx.say("ok") 165 | } 166 | } 167 | --- request 168 | GET /t 169 | --- response_body_like: 500 Internal Server Error 170 | --- error_code: 500 171 | --- error_log 172 | failed to connect: timeout 173 | ---- 174 | 175 | We have to configure the `resolver` directive here because we need to resolve 176 | the domain name `agentzh.org` at request time (in Lua). We check the NGINX 177 | error log via the `error_log` section for the error string returned by 178 | the cosocket object's `connect()` method. 179 | 180 | It is important to use a relatively small timeout threshold in the test 181 | cases so that we do not have to wait for too long to complete the test 182 | run. Tests are meant to be run very often. The more frequently we run the 183 | tests, the more value we may gain from automating the tests. 184 | 185 | It is worth mentioning that the test scaffold's HTTP client does have a 186 | timeout threshold as well, which is 3 seconds by default. If your test 187 | request takes more than 3 seconds, you get an error message in the test 188 | report: 189 | 190 | .... 191 | ERROR: client socket timed out - TEST 1: connect timeout 192 | .... 193 | 194 | This message is what we would get if we commented out the `settimeout` 195 | call and relies on the default 60 second timeout threshold in cosockets. 196 | 197 | We could change this default timeout threshold used by the test scaffold 198 | client by setting a value to the `timeout` data section, as in 199 | 200 | [source,test-base] 201 | ---- 202 | --- timeout: 10 203 | ---- 204 | 205 | Now we have 10 seconds of timeout protection instead of 3. 206 | 207 | ==== Reading Timeouts 208 | 209 | Emulating reading timeouts is also easy. Just try reading from a wire where 210 | the other end never writes anything but still keeps the connection alive. 211 | Consider the following example: 212 | 213 | [source,test-base] 214 | ---- 215 | === TEST 1: read timeout 216 | --- main_config 217 | stream { 218 | server { 219 | listen 5678; 220 | content_by_lua_block { 221 | ngx.sleep(10) -- 10 sec 222 | } 223 | } 224 | } 225 | --- config 226 | lua_socket_log_errors off; 227 | location = /t { 228 | content_by_lua_block { 229 | local sock = ngx.socket.tcp() 230 | sock:settimeout(100) -- ms 231 | assert(sock:connect("127.0.0.1", 5678)) 232 | ngx.say("connected.") 233 | local data, err = sock:receive() -- try to read a line 234 | if not data then 235 | ngx.say("failed to receive: ", err) 236 | else 237 | ngx.say("received: ", data) 238 | end 239 | } 240 | } 241 | --- request 242 | GET /t 243 | --- response_body 244 | connected. 245 | failed to receive: timeout 246 | --- no_error_log 247 | [error] 248 | ---- 249 | 250 | Here we use the `main_config` data section to define a TCP server of our 251 | own, listening at the port of 5678 on the local host. This is a mocked-up 252 | server 253 | that can establish new TCP connections but never write out anything and 254 | just sleep for 10 second before closing the session. Note that we are using 255 | the link:https://github.com/openresty/stream-lua-nginx-module#readme[ngx_stream_lua] 256 | module in the `stream {}` configuration block. In our `location = /t`, 257 | which is the main target of this test case, connects to our mock server 258 | and tries to read a line from the wire. Apparently the 100ms timeout threshold 259 | on the client side is reached first and we can successfully exercise the 260 | error handling code for the reading timeout error. 261 | 262 | ==== Sending Timeouts 263 | 264 | Triggering sending timeouts is much harder than connecting and reading 265 | timeouts. This is due to the asynchronous nature of writing. 266 | 267 | For performance reasons, there exists at least two layers of buffers for 268 | writes: 269 | 270 | . the userland send buffers inside the NGINX core, and 271 | . the socket send buffers in the operating system kernel's TCP/IP stack 272 | implementation 273 | 274 | To make the situation even worse, there also at least exists a system-level 275 | receive buffer layer on the other end of the connection. 276 | 277 | To make a send timeout error happen, the most naive way is to fill out 278 | all these buffers along the data sending chain while ensuring that the 279 | other end never actually reads anything on the application level. Thus, 280 | buffering makes a sending timeout particularly hard to reproduce and emulate 281 | in a typical testing and development environment with a small amount of 282 | (test) payload. 283 | 284 | Fortunately there is a userland trick that can intercept the libc wrappers 285 | for the actual system calls for socket I/O and do funny things that could 286 | otherwise be very difficult to achieve. Our link:https://github.com/openresty/mockeagain[mockeagain] 287 | library implements such a trick and supports emulating timeout errors at 288 | user-specified precise positions in the output data stream. 289 | 290 | The following example triggers a sending timeout right after sending out 291 | the "hello, world" string as the response body. 292 | 293 | [source,test-base] 294 | ---- 295 | === TEST 1: send timeout 296 | --- config 297 | send_timeout 100ms; 298 | postpone_output 1; 299 | 300 | location = /t { 301 | content_by_lua_block { 302 | ngx.say("hi bob!") 303 | local ok, err = ngx.flush(true) 304 | if not ok then 305 | ngx.log(ngx.ERR, "flush #1 failed: ", err) 306 | return 307 | end 308 | 309 | ngx.say("hello, world!") 310 | local ok, err = ngx.flush(true) 311 | if not ok then 312 | ngx.log(ngx.ERR, "flush #2 failed: ", err) 313 | return 314 | end 315 | } 316 | } 317 | --- request 318 | GET /t 319 | --- ignore_response 320 | --- error_log 321 | flush #2 failed: timeout 322 | --- no_error_log 323 | flush #1 failed 324 | ---- 325 | 326 | Note the `send_timeout` directive that is used to configure the sending 327 | timeout for NGINX downstream writing operations. Here we use a small threshold, 328 | `100ms`, to ensure our test case runs fast and never hits the default 3 329 | seconds timeout threshold of the test scaffold client. The `postpone_output 330 | 1` directive effectively turns off the "postpone output buffer" of NGINX, 331 | which may hold our output data before even reaching the libc system call 332 | wrappers. Finally, the `ngx.flush()` call in Lua ensures that _no_ buffers 333 | along the NGINX output filter chain holds our data without sending downward. 334 | 335 | Before running this test case, we have to set the following system environment 336 | variables (in the bash syntax): 337 | 338 | [source,bash] 339 | ---- 340 | export LD_PRELOAD="mockeagain.so" 341 | export MOCKEAGAIN="w" 342 | export MOCKEAGAIN_WRITE_TIMEOUT_PATTERN='hello, world' 343 | export TEST_NGINX_EVENT_TYPE='poll' 344 | ---- 345 | 346 | Let's go through them one by one: 347 | 348 | . The `LD_PRELOAD="mockeagain.so"` assignment pre-loads the `mockeagain` 349 | library into the running processes, including the NGINX server process 350 | started by the test scaffold, of course. You may also need to set the `LD_LIBRARY_PATH` 351 | environment to include the directory path of the `mockeagain.so` file if 352 | the file is not in the default system library search paths. 353 | . The `MOCKEAGAIN="w"` assignment enables the `mockeagain` library to intercept 354 | and do funny things about the writing operations on nonblocking sockets. 355 | . The `MOCKEAGAIN_WRITE_TIMEOUT_PATTERN='hello, world'` assignment makes 356 | `mockeagain` refuse to send more data after seeing the specified string 357 | pattern, `hello, world`, in the output data stream. 358 | . The `TEST_NGINX_EVENT_TYPE='poll'` setting makes NGINX server uses the 359 | `poll` event API instead of the system default (being `epoll` on Linux, 360 | for example). This is because `mockeagain` only supports `poll` events 361 | for now. Behind the scene, this environment just makes the test scaffold 362 | generate the following `nginx.conf` snippet. 363 | + 364 | [source,nginx] 365 | ----- 366 | events { 367 | use poll; 368 | } 369 | ----- 370 | + 371 | You need to ensure, however, that your NGINX or OpenResty build has the 372 | `poll` support compiled in. Basically, the build should have the `./configure` 373 | option `--with-poll_module`. 374 | + 375 | We have plans to add epoll edge-triggering support to `mockeagain` in the 376 | future. Hopefully by that time we do not have to use `poll` at least on 377 | Linux. 378 | 379 | Now you should get the test block above passed! 380 | 381 | Ideally, we could set these environments directly inside the test file 382 | because this test case will never pass without these environments anyway. 383 | We could add the following Perl code snippet to the very beginning of the 384 | test file prologue (yes, even before the `use` statement): 385 | 386 | [source,Perl] 387 | ---- 388 | BEGIN { 389 | $ENV{LD_PRELOAD} = "mockeagain.so"; 390 | $ENV{MOCKEAGAIN} = "w"; 391 | $ENV{MOCKEAGAIN_WRITE_TIMEOUT_PATTERN} = 'hello, world'; 392 | $ENV{TEST_NGINX_EVENT_TYPE} = 'poll'; 393 | } 394 | ---- 395 | 396 | The `BEGIN {}` block is required here because it runs before Perl loads 397 | any modules, especially `Test::Nginx::Socket`, in which we want these environments 398 | to take effect. 399 | 400 | It is a bad idea, however, to hard-code the path of the `mockeagain.so` 401 | file in the test file itself since different test runners might put `mockeagain` 402 | in different places in the file system. Better let the test runner configure 403 | the `LD_LIBRARY_PATH` environment containing the actual library path from 404 | outside. 405 | 406 | ===== Mockeagain Troubleshooting 407 | 408 | If you are seeing the following error while running the test case above, 409 | 410 | .... 411 | ERROR: ld.so: object 'mockeagain.so' from LD_PRELOAD cannot be preloaded (cannot open shared object file): ignored. 412 | .... 413 | 414 | then you should check whether you have added the directory path of your 415 | `mockeagain.so` library to the `LD_LIBRARY_PATH` environment. On my system, 416 | for example, I have 417 | 418 | .... 419 | export LD_LIBRARY_PATH=$HOME/git/mockeagain:$LD_LIBRARY_PATH 420 | .... 421 | 422 | If you are seeing an error similar to the following, 423 | 424 | .... 425 | nginx: [emerg] invalid event type "poll" in .../t/servroot/conf/nginx.conf:76 426 | .... 427 | 428 | then your NGINX or OpenResty build does not have the poll module compiled 429 | in. And you should rebuild your NGINX or OpenResty by passing the `--with-poll_module` 430 | option to the `./configure` command line. 431 | 432 | We will revisit the `mockeagain` library in the `Test Modes` section soon. 433 | 434 | === Mocking Bad Backend Responses 435 | 436 | Earlier in this section we have already seen examples that uses the 437 | link:https://github.com/openresty/stream-lua-nginx-module#readme[ngx_stream_lua] 438 | module to mock a backend TCP server that accepts new incoming connections 439 | but never writes anything back. We could of course do fancier things in 440 | such a mocked server like emulating a buggy or malicious backend server 441 | that returns bad response data. 442 | 443 | For example, while testing a Memcached client, it would be pretty hard 444 | to emulate erroneous error responses or ill-formed responses with a real 445 | Memcached server. Now it is trivial with mocking: 446 | 447 | [source,test-base] 448 | ---- 449 | === TEST 1: get() results in an error response 450 | --- main_config 451 | stream { 452 | server { 453 | listen 1921; 454 | content_by_lua_block { 455 | ngx.print("SERVER_ERROR\r\n") 456 | } 457 | } 458 | } 459 | --- config 460 | location /t { 461 | content_by_lua_block { 462 | local memcached = require "resty.memcached" 463 | local memc = memcached:new() 464 | 465 | assert(memc:connect("127.0.0.1", 1921)) 466 | 467 | local res, flags, err = memc:get("dog") 468 | if not res then 469 | ngx.say("failed to get: ", err) 470 | return 471 | end 472 | 473 | ngx.say("get: ", res) 474 | memc:close() 475 | } 476 | } 477 | --- request 478 | GET /t 479 | --- response_body 480 | failed to get: SERVER_ERROR 481 | --- no_error_log 482 | [error] 483 | ---- 484 | 485 | Our mocked-up Memcached server can behave in any way that we like. Hooray! 486 | 487 | NOTE: `Test::Nginx::Socket` provides the data sections `tcp_listen`, `tcp_query`, 488 | `tcp_reply`, and etc to enable the builtin mocked TCP server of the test 489 | scaffold. You can use this facility when you do not want to depend on the 490 | `ngx_stream_lua` module or the NGINX stream subsystem for your test suite. 491 | Indeed, we were solely relying on the builtin TCP server of `Test::Nginx::Socket` 492 | before the `ngx_stream_lua` module was born. Similarly, `Test::Nginx::Socket` 493 | offers a builtin UDP server via the data sections `udp_listen`, `udp_query`, 494 | `udp_reply`, and etc. You can refer to the link:https://metacpan.org/pod/Test::Nginx::Socket[official 495 | documentation] of `Test::Nginx::Socket` for more details. 496 | 497 | === Emulating Bad Clients 498 | 499 | The `Test::Nginx::Socket` test framework provides special data sections 500 | to help emulating ill-behaved HTTP clients. 501 | 502 | ==== Crafting Bad Requests 503 | 504 | The `raw_request` data section can be used to specify whatever data for 505 | the test request. It is often used with the `eval` section filter so that 506 | we can easily encode special characters like `\r`. Let's look at the following 507 | example. 508 | 509 | [source,test-nginx] 510 | ---- 511 | === TEST 1: missing the Host request header 512 | --- config 513 | location = /t { 514 | return 200; 515 | } 516 | --- raw_request eval 517 | "GET /t HTTP/1.1\r 518 | Connection: close\r 519 | \r 520 | " 521 | --- response_body_like: 400 Bad Request 522 | --- error_code: 400 523 | ---- 524 | 525 | So we easily construct a malformed request that does not have a `Host` 526 | header, which results in a 400 response from the NGINX server, as expected. 527 | 528 | The `request` data section we have been using so far, on the other hand, 529 | always ensures that a well-formed HTTP request is sent to the test server. 530 | 531 | ==== Emulating Client Aborts 532 | 533 | Client aborts are a very intriguing phenomenon in the web world. Sometimes 534 | we want the server to continue processing even after the client aborts 535 | the connection; on other occasions we just want to abort the whole request 536 | handler immediately in such cases. Either way, we need robust way to emulate 537 | client aborts in our unit test cases. 538 | 539 | We have already discussed the `timeout` data section that can be used to 540 | adjust the default timeout protection threshold used by the test scaffold 541 | client. We could also use it to abort the connection prematurely. A small 542 | timeout threshold is often desired for this purpose. To suppress the test 543 | scaffold from printing out an error on client timeout, we can specify the 544 | `abort` data section to signal the test scaffold. Let's put these together 545 | in a simple test case. 546 | 547 | [source,test-nginx] 548 | ---- 549 | === TEST 1: abort processing in the Lua callback on client aborts 550 | --- config 551 | location = /t { 552 | lua_check_client_abort on; 553 | 554 | content_by_lua_block { 555 | local ok, err = ngx.on_abort(function () 556 | ngx.log(ngx.NOTICE, "on abort handler called!") 557 | ngx.exit(444) 558 | end) 559 | 560 | if not ok then 561 | error("cannot set on_abort: " .. err) 562 | end 563 | 564 | ngx.sleep(0.7) -- sec 565 | ngx.log(ngx.NOTICE, "main handler done") 566 | } 567 | } 568 | --- request 569 | GET /t 570 | --- timeout: 0.2 571 | --- abort 572 | --- ignore_response 573 | --- no_error_log 574 | [error] 575 | main handler done 576 | --- error_log 577 | client prematurely closed connection 578 | on abort handler called! 579 | ---- 580 | 581 | In this example, we make the test scaffold client abort the connection 582 | after 0.2 seconds via the `timeout` section. Also we prevent the test scaffold 583 | from printing out the client timeout error by specifying the `abort` section. 584 | Finally, in the Lua application code, we checks for client abort events 585 | by turning on the `lua_check_client_abort` directive and aborts the server 586 | processing by calling `ngx.exit(444)` in our Lua callback function registered 587 | by the `ngx.on_abort` API. 588 | 589 | ==== Clients Never Closing Connections 590 | 591 | Unlike most well-formed HTTP clients in the market, the HTTP client used 592 | by `Test::Nginx::Socket` _never_ actively closes the connection unless a 593 | timeout error happens (exceeding the timeout threshold as specified by 594 | the `--- timeout` section). This can ensure the NGINX server always actually 595 | closes the connection when the request specifies the "Connection: close" 596 | request header. 597 | 598 | When the server does not close the connection, there is a "connection leak" 599 | bug on the server side. For example, NGINX uses reference counting (in 600 | `r->main->count`) in its HTTP subsystem to determine whether a request 601 | can be closed and freed. When there is an error in this reference counting, 602 | NGINX may never close the request, leading to resource leaks. In such cases, 603 | the corresponding test cases always fail with a client-side timeout error, 604 | for instance, 605 | 606 | [source] 607 | ---- 608 | # Failed test 'ERROR: client socket timed out - TEST 1: foo 609 | # ' 610 | ---- 611 | 612 | Obviously `Test::Nginx::Socket` is a malicious HTTP client by default in 613 | this aspect. This is also why our test scaffold avoids using a well-formed 614 | HTTP client library itself. Most test suite is focusing on extreme and 615 | erroneous cases anyway and well-formed HTTP clients help hiding problems 616 | instead of exposing them. 617 | -------------------------------------------------------------------------------- /util/fmt.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | #use encoding 'utf8'; 6 | 7 | my $infile = shift or 8 | die "No input file specified.\n"; 9 | 10 | open my $in, "<:encoding(UTF-8)", $infile 11 | or 12 | die "cannot open $infile for reading: $!\n"; 13 | 14 | my $changes = 0; 15 | 16 | my $skipping; 17 | my $res; 18 | while (<$in>) { 19 | if (/^(?:\.{4}|-{4})\s*$/) { 20 | $skipping = !$skipping; 21 | next; 22 | } 23 | 24 | if (/^\s*```+/) { 25 | $skipping = !$skipping; 26 | next; 27 | } 28 | 29 | if ($skipping) { 30 | next; 31 | } 32 | 33 | if (/^\s|^\s*$|^\/\//) { # verbatim 34 | next; 35 | } 36 | 37 | next if /^\[\^\w+\]:/; 38 | 39 | my $orig = $_; 40 | 41 | if (/\p{Han}/) { 42 | s/.{39}.*?(?:[ \t”“,:。!?、]|\p{Han})/$&\n/gso; 43 | } else { 44 | s/.{70}.*?(?:[ \t'"?”“,:。!?、]|[,.;!]\s)/$&\n/gso; 45 | } 46 | 47 | s/\s*\n\s*/\n/gms; 48 | $changes++ if $orig ne $_; 49 | 50 | } continue { 51 | $res .= $_; 52 | } 53 | 54 | close $in; 55 | 56 | warn "$changes changes\n"; 57 | 58 | if ($changes && $res) { 59 | open my $out, ">:encoding(UTF-8)", $infile or 60 | die "Cannot open $infile for writing: $!\n"; 61 | print $out $res; 62 | close $out; 63 | warn "$infile wrote.\n"; 64 | } 65 | -------------------------------------------------------------------------------- /util/word-count.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | my $nfiles = 0; 7 | my $total = 0; 8 | for my $file (@ARGV) { 9 | $total += process_file($file); 10 | $nfiles++; 11 | } 12 | 13 | sub process_file { 14 | my $file = shift; 15 | my $in_code = 0; 16 | my $count = 0; 17 | open my $in, $file 18 | or die "cannot open $file for reading: $!\n"; 19 | while (<$in>) { 20 | if (/^-----*$/) { 21 | $in_code = !$in_code; 22 | next; 23 | } 24 | if (/^\.\.\.\.\.*$/) { 25 | $in_code = !$in_code; 26 | next; 27 | } 28 | 29 | next if $in_code; 30 | 31 | while (/\b([A-Za-z][-A-Za-z_]+)\b/g) { 32 | if ($1 ne 'link') { 33 | $count++; 34 | } 35 | } 36 | } 37 | close $in; 38 | printf "$file: $count words found.\n"; 39 | return $count; 40 | } 41 | print "\nFor total $total words found in $nfiles files.\n"; 42 | --------------------------------------------------------------------------------