├── .luacheckrc ├── covgen.sh ├── .lua-format ├── Makefile ├── rockspecs ├── url-1.1.0-1.rockspec ├── url-1.2.2-1.rockspec ├── url-1.2.0-1.rockspec ├── url-1.2.1-1.rockspec ├── url-1.3.0-1.rockspec ├── url-1.3.1-1.rockspec ├── url-1.0-0.rockspec ├── url-1.0-1.rockspec ├── url-1.0.1-1.rockspec ├── url-1.0.2-1.rockspec ├── url-scm-1.rockspec ├── url-2.0.0-1.rockspec ├── url-2.1.0-1.rockspec └── url-1.4.0-1.rockspec ├── LICENSE ├── url.lua ├── .github └── workflows │ └── test.yml ├── .clang-format ├── test ├── encode_decode_test.lua └── parse_test.lua ├── README.md ├── tests └── perfcheck.lua └── src ├── codec.c └── parse.c /.luacheckrc: -------------------------------------------------------------------------------- 1 | std = 'max' 2 | include_files = { 3 | 'url.lua', 4 | 'test/*_test.lua', 5 | } 6 | ignore = { 7 | 'assert', 8 | } 9 | -------------------------------------------------------------------------------- /covgen.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | set -ex 4 | 5 | mkdir -p ./coverage 6 | lcov -c -d ./src -o coverage/lcov.info.all 7 | lcov -r coverage/lcov.info.all '*/include/*' -o coverage/lcov.info.all 8 | lcov -r coverage/lcov.info.all '*/deps/*' -o coverage/lcov.info 9 | genhtml -o coverage/html coverage/lcov.info 10 | -------------------------------------------------------------------------------- /.lua-format: -------------------------------------------------------------------------------- 1 | break_after_table_lb: true 2 | break_before_table_rb: false 3 | break_before_functioncall_rp: true 4 | break_before_functiondef_rp: true 5 | chop_down_table: true 6 | extra_sep_at_table_end: true 7 | keep_simple_control_block_one_line: false 8 | keep_simple_function_one_line: false 9 | column_table_limit: 1 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SRCS=$(wildcard src/*.c) 2 | SOBJ=$(SRCS:.c=.$(LIB_EXTENSION)) 3 | INSTALL?=install 4 | ifdef URL_COVERAGE 5 | COVFLAGS=--coverage 6 | endif 7 | 8 | 9 | .PHONY: all install clean 10 | 11 | all: $(SOBJ) 12 | 13 | %.o: %.c 14 | $(CC) $(CFLAGS) $(WARNINGS) $(COVFLAGS) $(CPPFLAGS) -o $@ -c $< 15 | 16 | %.$(LIB_EXTENSION): %.o 17 | $(CC) -o $@ $^ $(LDFLAGS) $(LIBS) $(PLATFORM_LDFLAGS) $(COVFLAGS) 18 | 19 | install: $(SOBJ) 20 | $(INSTALL) -d $(INST_LIBDIR) 21 | $(INSTALL) $(SOBJ) $(INST_LIBDIR) 22 | $(INSTALL) url.lua $(INST_LUADIR) 23 | rm -f ./src/*.o 24 | rm -f ./src/*.so 25 | -------------------------------------------------------------------------------- /rockspecs/url-1.1.0-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "url" 2 | version = "1.1.0-1" 3 | source = { 4 | url = "git://github.com/mah0x211/lua-url.git", 5 | tag = "v1.1.0" 6 | } 7 | description = { 8 | summary = "url functions", 9 | homepage = "https://github.com/mah0x211/lua-url", 10 | license = "MIT/X11", 11 | maintainer = "Masatoshi Teruya" 12 | } 13 | dependencies = { 14 | "lua >= 5.1", 15 | "uriparser >= 0.8.4" 16 | } 17 | build = { 18 | type = "builtin", 19 | modules = { 20 | url = "url.lua", 21 | ['url.codec'] = "codec.c" 22 | } 23 | } 24 | 25 | -------------------------------------------------------------------------------- /rockspecs/url-1.2.2-1.rockspec: -------------------------------------------------------------------------------- 1 | rockspec_format = "3.0" 2 | package = "url" 3 | version = "1.2.2-1" 4 | source = { 5 | url = "git+https://github.com/mah0x211/lua-url.git", 6 | tag = "v1.2.2" 7 | } 8 | description = { 9 | summary = "url functions", 10 | homepage = "https://github.com/mah0x211/lua-url", 11 | license = "MIT/X11", 12 | maintainer = "Masatoshi Teruya" 13 | } 14 | dependencies = { 15 | "lua >= 5.1", 16 | } 17 | build = { 18 | type = "builtin", 19 | modules = { 20 | url = "url.lua", 21 | ['url.codec'] = "src/codec.c", 22 | ["url.parse"] = "src/parse.c", 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /rockspecs/url-1.2.0-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "url" 2 | version = "1.2.0-1" 3 | source = { 4 | url = "gitrec://github.com/mah0x211/lua-url.git", 5 | tag = "v1.2.0" 6 | } 7 | description = { 8 | summary = "url functions", 9 | homepage = "https://github.com/mah0x211/lua-url", 10 | license = "MIT/X11", 11 | maintainer = "Masatoshi Teruya" 12 | } 13 | dependencies = { 14 | "lua >= 5.1", 15 | "luarocks-fetch-gitrec >= 0.2", 16 | } 17 | build = { 18 | type = "builtin", 19 | modules = { 20 | url = "url.lua", 21 | ['url.codec'] = "src/codec.c", 22 | ["url.parse"] = "src/parse.c", 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /rockspecs/url-1.2.1-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "url" 2 | version = "1.2.1-1" 3 | source = { 4 | url = "gitrec://github.com/mah0x211/lua-url.git", 5 | tag = "v1.2.1" 6 | } 7 | description = { 8 | summary = "url functions", 9 | homepage = "https://github.com/mah0x211/lua-url", 10 | license = "MIT/X11", 11 | maintainer = "Masatoshi Teruya" 12 | } 13 | dependencies = { 14 | "lua >= 5.1", 15 | "luarocks-fetch-gitrec >= 0.2", 16 | } 17 | build = { 18 | type = "builtin", 19 | modules = { 20 | url = "url.lua", 21 | ['url.codec'] = "src/codec.c", 22 | ["url.parse"] = "src/parse.c", 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /rockspecs/url-1.3.0-1.rockspec: -------------------------------------------------------------------------------- 1 | rockspec_format = "3.0" 2 | package = "url" 3 | version = "1.3.0-1" 4 | source = { 5 | url = "git+https://github.com/mah0x211/lua-url.git", 6 | tag = "v1.3.0", 7 | } 8 | description = { 9 | summary = "url functions", 10 | homepage = "https://github.com/mah0x211/lua-url", 11 | license = "MIT/X11", 12 | maintainer = "Masatoshi Fukunaga" 13 | } 14 | dependencies = { 15 | "lua >= 5.1", 16 | } 17 | build = { 18 | type = "builtin", 19 | modules = { 20 | url = "url.lua", 21 | ["url.codec"] = { 22 | sources = { "src/codec.c" }, 23 | }, 24 | ["url.parse"] = { 25 | incdirs = { "deps/lauxhlib" }, 26 | sources = { "src/parse.c" }, 27 | }, 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /rockspecs/url-1.3.1-1.rockspec: -------------------------------------------------------------------------------- 1 | rockspec_format = "3.0" 2 | package = "url" 3 | version = "1.3.1-1" 4 | source = { 5 | url = "git+https://github.com/mah0x211/lua-url.git", 6 | tag = "v1.3.1", 7 | } 8 | description = { 9 | summary = "url functions", 10 | homepage = "https://github.com/mah0x211/lua-url", 11 | license = "MIT/X11", 12 | maintainer = "Masatoshi Fukunaga" 13 | } 14 | dependencies = { 15 | "lua >= 5.1", 16 | } 17 | build = { 18 | type = "builtin", 19 | modules = { 20 | url = "url.lua", 21 | ["url.codec"] = { 22 | sources = { "src/codec.c" }, 23 | }, 24 | ["url.parse"] = { 25 | incdirs = { "deps/lauxhlib" }, 26 | sources = { "src/parse.c" }, 27 | }, 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /rockspecs/url-1.0-0.rockspec: -------------------------------------------------------------------------------- 1 | package = "url" 2 | version = "1.0-0" 3 | source = { 4 | url = "git://github.com/mah0x211/lua-url.git", 5 | tag = "v1.0.0" 6 | } 7 | description = { 8 | summary = "url functions", 9 | homepage = "https://github.com/mah0x211/lua-url", 10 | license = "MIT/X11", 11 | maintainer = "Masatoshi Teruya" 12 | } 13 | dependencies = { 14 | "lua >= 5.1" 15 | } 16 | external_dependencies = { 17 | URIPARSER = { 18 | header = "uriparser/Uri.h", 19 | library = "uriparser" 20 | } 21 | } 22 | build = { 23 | type = "builtin", 24 | modules = { 25 | url = "url.lua", 26 | ['url.codec'] = "codec.c", 27 | ['url.parser'] = { 28 | sources = { "parser.c" }, 29 | libraries = { "uriparser" }, 30 | incdirs = { 31 | "$(URIPARSER_INCDIR)" 32 | }, 33 | libdirs = { 34 | "$(URIPARSER_LIBDIR)" 35 | } 36 | } 37 | } 38 | } 39 | 40 | -------------------------------------------------------------------------------- /rockspecs/url-1.0-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "url" 2 | version = "1.0-1" 3 | source = { 4 | url = "git://github.com/mah0x211/lua-url.git", 5 | tag = "v1.0.1" 6 | } 7 | description = { 8 | summary = "url functions", 9 | homepage = "https://github.com/mah0x211/lua-url", 10 | license = "MIT/X11", 11 | maintainer = "Masatoshi Teruya" 12 | } 13 | dependencies = { 14 | "lua >= 5.1" 15 | } 16 | external_dependencies = { 17 | URIPARSER = { 18 | header = "uriparser/Uri.h", 19 | library = "uriparser" 20 | } 21 | } 22 | build = { 23 | type = "builtin", 24 | modules = { 25 | url = "url.lua", 26 | ['url.codec'] = "codec.c", 27 | ['url.parser'] = { 28 | sources = { "parser.c" }, 29 | libraries = { "uriparser" }, 30 | incdirs = { 31 | "$(URIPARSER_INCDIR)" 32 | }, 33 | libdirs = { 34 | "$(URIPARSER_LIBDIR)" 35 | } 36 | } 37 | } 38 | } 39 | 40 | -------------------------------------------------------------------------------- /rockspecs/url-1.0.1-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "url" 2 | version = "1.0.1-1" 3 | source = { 4 | url = "git://github.com/mah0x211/lua-url.git", 5 | tag = "v1.0.1" 6 | } 7 | description = { 8 | summary = "url functions", 9 | homepage = "https://github.com/mah0x211/lua-url", 10 | license = "MIT/X11", 11 | maintainer = "Masatoshi Teruya" 12 | } 13 | dependencies = { 14 | "lua >= 5.1" 15 | } 16 | external_dependencies = { 17 | URIPARSER = { 18 | header = "uriparser/Uri.h", 19 | library = "uriparser" 20 | } 21 | } 22 | build = { 23 | type = "builtin", 24 | modules = { 25 | url = "url.lua", 26 | ['url.codec'] = "codec.c", 27 | ['url.parser'] = { 28 | sources = { "parser.c" }, 29 | libraries = { "uriparser" }, 30 | incdirs = { 31 | "$(URIPARSER_INCDIR)" 32 | }, 33 | libdirs = { 34 | "$(URIPARSER_LIBDIR)" 35 | } 36 | } 37 | } 38 | } 39 | 40 | -------------------------------------------------------------------------------- /rockspecs/url-1.0.2-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "url" 2 | version = "1.0.2-1" 3 | source = { 4 | url = "git://github.com/mah0x211/lua-url.git", 5 | tag = "v1.0.2" 6 | } 7 | description = { 8 | summary = "url functions", 9 | homepage = "https://github.com/mah0x211/lua-url", 10 | license = "MIT/X11", 11 | maintainer = "Masatoshi Teruya" 12 | } 13 | dependencies = { 14 | "lua >= 5.1" 15 | } 16 | external_dependencies = { 17 | URIPARSER = { 18 | header = "uriparser/Uri.h", 19 | library = "uriparser" 20 | } 21 | } 22 | build = { 23 | type = "builtin", 24 | modules = { 25 | url = "url.lua", 26 | ['url.codec'] = "codec.c", 27 | ['url.parser'] = { 28 | sources = { "parser.c" }, 29 | libraries = { "uriparser" }, 30 | incdirs = { 31 | "$(URIPARSER_INCDIR)" 32 | }, 33 | libdirs = { 34 | "$(URIPARSER_LIBDIR)" 35 | } 36 | } 37 | } 38 | } 39 | 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Masatoshi Fukunaga 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /rockspecs/url-scm-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "url" 2 | version = "scm-1" 3 | source = { 4 | url = "git+https://github.com/mah0x211/lua-url.git", 5 | } 6 | description = { 7 | summary = "url functions", 8 | homepage = "https://github.com/mah0x211/lua-url", 9 | license = "MIT/X11", 10 | maintainer = "Masatoshi Fukunaga", 11 | } 12 | dependencies = { 13 | "lua >= 5.1", 14 | "lauxhlib >= 0.3.1", 15 | } 16 | build = { 17 | type = "make", 18 | build_variables = { 19 | LIB_EXTENSION = "$(LIB_EXTENSION)", 20 | CFLAGS = "$(CFLAGS)", 21 | WARNINGS = "-Wall -Wno-trigraphs -Wmissing-field-initializers -Wreturn-type -Wmissing-braces -Wparentheses -Wno-switch -Wunused-function -Wunused-label -Wunused-parameter -Wunused-variable -Wunused-value -Wuninitialized -Wunknown-pragmas -Wshadow -Wsign-compare", 22 | CPPFLAGS = "-I$(LUA_INCDIR)", 23 | LDFLAGS = "$(LIBFLAG)", 24 | URL_COVERAGE = "$(URL_COVERAGE)", 25 | }, 26 | install_variables = { 27 | LIB_EXTENSION = "$(LIB_EXTENSION)", 28 | INST_LIBDIR = "$(LIBDIR)/url/", 29 | INST_LUADIR = "$(LUADIR)", 30 | }, 31 | } 32 | -------------------------------------------------------------------------------- /rockspecs/url-2.0.0-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "url" 2 | version = "2.0.0-1" 3 | source = { 4 | url = "git+https://github.com/mah0x211/lua-url.git", 5 | tag = "v2.0.0", 6 | } 7 | description = { 8 | summary = "url functions", 9 | homepage = "https://github.com/mah0x211/lua-url", 10 | license = "MIT/X11", 11 | maintainer = "Masatoshi Fukunaga", 12 | } 13 | dependencies = { 14 | "lua >= 5.1", 15 | "lauxhlib >= 0.3.1", 16 | } 17 | build = { 18 | type = 'make', 19 | build_variables = { 20 | LIB_EXTENSION = "$(LIB_EXTENSION)", 21 | CFLAGS = "$(CFLAGS)", 22 | WARNINGS = "-Wall -Wno-trigraphs -Wmissing-field-initializers -Wreturn-type -Wmissing-braces -Wparentheses -Wno-switch -Wunused-function -Wunused-label -Wunused-parameter -Wunused-variable -Wunused-value -Wuninitialized -Wunknown-pragmas -Wshadow -Wsign-compare", 23 | CPPFLAGS = "-I$(LUA_INCDIR)", 24 | LDFLAGS = "$(LIBFLAG)", 25 | URL_COVERAGE = "$(URL_COVERAGE)", 26 | }, 27 | install_variables = { 28 | LIB_EXTENSION = "$(LIB_EXTENSION)", 29 | INST_LIBDIR = "$(LIBDIR)/url/", 30 | INST_LUADIR = "$(LUADIR)", 31 | }, 32 | } 33 | -------------------------------------------------------------------------------- /rockspecs/url-2.1.0-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "url" 2 | version = "2.1.0-1" 3 | source = { 4 | url = "git+https://github.com/mah0x211/lua-url.git", 5 | tag = "v2.1.0", 6 | } 7 | description = { 8 | summary = "url functions", 9 | homepage = "https://github.com/mah0x211/lua-url", 10 | license = "MIT/X11", 11 | maintainer = "Masatoshi Fukunaga", 12 | } 13 | dependencies = { 14 | "lua >= 5.1", 15 | "lauxhlib >= 0.3.1", 16 | } 17 | build = { 18 | type = "make", 19 | build_variables = { 20 | LIB_EXTENSION = "$(LIB_EXTENSION)", 21 | CFLAGS = "$(CFLAGS)", 22 | WARNINGS = "-Wall -Wno-trigraphs -Wmissing-field-initializers -Wreturn-type -Wmissing-braces -Wparentheses -Wno-switch -Wunused-function -Wunused-label -Wunused-parameter -Wunused-variable -Wunused-value -Wuninitialized -Wunknown-pragmas -Wshadow -Wsign-compare", 23 | CPPFLAGS = "-I$(LUA_INCDIR)", 24 | LDFLAGS = "$(LIBFLAG)", 25 | URL_COVERAGE = "$(URL_COVERAGE)", 26 | }, 27 | install_variables = { 28 | LIB_EXTENSION = "$(LIB_EXTENSION)", 29 | INST_LIBDIR = "$(LIBDIR)/url/", 30 | INST_LUADIR = "$(LUADIR)", 31 | }, 32 | } 33 | -------------------------------------------------------------------------------- /rockspecs/url-1.4.0-1.rockspec: -------------------------------------------------------------------------------- 1 | rockspec_format = "3.0" 2 | package = "url" 3 | version = "1.4.0-1" 4 | source = { 5 | url = "git+https://github.com/mah0x211/lua-url.git", 6 | tag = "v1.4.0", 7 | } 8 | description = { 9 | summary = "url functions", 10 | homepage = "https://github.com/mah0x211/lua-url", 11 | license = "MIT/X11", 12 | maintainer = "Masatoshi Fukunaga" 13 | } 14 | dependencies = { 15 | "lua >= 5.1", 16 | "lauxhlib >= 0.3.1", 17 | } 18 | build = { 19 | type = 'make', 20 | build_variables = { 21 | LIB_EXTENSION = "$(LIB_EXTENSION)", 22 | CFLAGS = "$(CFLAGS)", 23 | WARNINGS = "-Wall -Wno-trigraphs -Wmissing-field-initializers -Wreturn-type -Wmissing-braces -Wparentheses -Wno-switch -Wunused-function -Wunused-label -Wunused-parameter -Wunused-variable -Wunused-value -Wuninitialized -Wunknown-pragmas -Wshadow -Wsign-compare", 24 | CPPFLAGS = "-I$(LUA_INCDIR)", 25 | LDFLAGS = "$(LIBFLAG)", 26 | URL_COVERAGE = "$(URL_COVERAGE)", 27 | }, 28 | install_variables = { 29 | LIB_EXTENSION = "$(LIB_EXTENSION)", 30 | INST_LIBDIR = "$(LIBDIR)/url/", 31 | INST_LUADIR = "$(LUADIR)", 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /url.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (C) 2013 Masatoshi Teruya 3 | -- 4 | -- Permission is hereby granted, free of charge, to any person obtaining a copy 5 | -- of this software and associated documentation files (the "Software"), to deal 6 | -- in the Software without restriction, including without limitation the rights 7 | -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | -- copies of the Software, and to permit persons to whom the Software is 9 | -- furnished to do so, subject to the following conditions: 10 | -- 11 | -- The above copyright notice and this permission notice shall be included in 12 | -- all copies or substantial portions of the Software. 13 | -- 14 | -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | -- THE SOFTWARE. 21 | -- 22 | local codec = require('url.codec') 23 | 24 | return { 25 | encode_uri = codec.encode_uri, 26 | encode_form = codec.encode_form, 27 | encode2396 = codec.encode2396, 28 | encode3986 = codec.encode3986, 29 | decode_uri = codec.decode_uri, 30 | decode_form = codec.decode_form, 31 | decode = codec.decode, 32 | parse = require('url.parse'), 33 | } 34 | 35 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: [push] 4 | 5 | jobs: 6 | luacheck: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - 10 | name: Checkout 11 | uses: actions/checkout@v2 12 | - 13 | name: Setup Lua 14 | uses: leafo/gh-actions-lua@v8.0.0 15 | - 16 | name: Setup Luarocks 17 | uses: leafo/gh-actions-luarocks@v4 18 | - 19 | name: Install Tools 20 | run: luarocks install luacheck 21 | - 22 | name: Run luacheck 23 | run: | 24 | luacheck . 25 | 26 | test: 27 | runs-on: ubuntu-latest 28 | strategy: 29 | matrix: 30 | lua-version: 31 | - "5.1" 32 | - "5.2" 33 | - "5.3" 34 | - "5.4" 35 | - "luajit-2.0.5" 36 | - "luajit-openresty" 37 | steps: 38 | - 39 | name: Checkout 40 | uses: actions/checkout@v2 41 | with: 42 | submodules: 'true' 43 | - 44 | name: Setup Lua ${{ matrix.lua-version }} 45 | uses: leafo/gh-actions-lua@v8.0.0 46 | with: 47 | luaVersion: ${{ matrix.lua-version }} 48 | - 49 | name: Setup Luarocks 50 | uses: leafo/gh-actions-luarocks@v4 51 | with: 52 | luaRocksVersion: 3.8.0 53 | - 54 | name: Install 55 | run: | 56 | luarocks make URL_COVERAGE=1 57 | - 58 | name: Install Tools 59 | run: | 60 | sudo apt install lcov -y 61 | luarocks install testcase 62 | - 63 | name: Run Test 64 | run: | 65 | testcase ./test 66 | - 67 | name: Generate coverage reports 68 | run: | 69 | sh ./covgen.sh 70 | - 71 | name: Upload c coverage to Codecov 72 | uses: codecov/codecov-action@v2 73 | with: 74 | files: ./coverage/lcov.info 75 | flags: unittests 76 | 77 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | BasedOnStyle: LLVM 4 | AlignAfterOpenBracket: Align 5 | AlignArrayOfStructures: Left 6 | AlignConsecutiveAssignments: AcrossComments 7 | AlignConsecutiveBitFields: AcrossComments 8 | AlignConsecutiveDeclarations: None 9 | AlignConsecutiveMacros: AcrossComments 10 | AlignEscapedNewlines: Right 11 | AlignOperands: Align 12 | AlignTrailingComments: true 13 | AllowAllArgumentsOnNextLine: true 14 | AllowAllParametersOfDeclarationOnNextLine: false 15 | AllowShortBlocksOnASingleLine: Never 16 | AllowShortCaseLabelsOnASingleLine: false 17 | AllowShortEnumsOnASingleLine: false 18 | AllowShortFunctionsOnASingleLine: Empty 19 | AllowShortIfStatementsOnASingleLine: Never 20 | AllowShortLoopsOnASingleLine: false 21 | AlwaysBreakAfterReturnType: None 22 | AlwaysBreakBeforeMultilineStrings: false 23 | AlwaysBreakTemplateDeclarations: No 24 | BinPackArguments: true 25 | BinPackParameters: true 26 | BitFieldColonSpacing: None 27 | BraceWrapping: 28 | AfterCaseLabel: false 29 | AfterControlStatement: Never 30 | AfterEnum: false 31 | AfterFunction: false 32 | AfterStruct: false 33 | AfterUnion: false 34 | AfterExternBlock: false 35 | BeforeCatch: false 36 | BeforeElse: false 37 | BeforeWhile: false 38 | IndentBraces: false 39 | SplitEmptyFunction: true 40 | SplitEmptyRecord: true 41 | BreakBeforeBinaryOperators: None 42 | BreakBeforeBraces: Linux 43 | BreakBeforeTernaryOperators: false 44 | BreakStringLiterals: true 45 | ColumnLimit: 80 46 | CommentPragmas: '^ IWYU pragma:' 47 | ContinuationIndentWidth: 4 48 | DeriveLineEnding: true 49 | DerivePointerAlignment: true 50 | DisableFormat: false 51 | ExperimentalAutoDetectBinPacking: false 52 | FixNamespaceComments: true 53 | ForEachMacros: 54 | - foreach 55 | - Q_FOREACH 56 | - BOOST_FOREACH 57 | IncludeBlocks: Preserve 58 | IncludeCategories: 59 | - Regex: '^"(llvm|llvm-c|clang|clang-c)/' 60 | Priority: 2 61 | SortPriority: 0 62 | - Regex: '^(<|"(gtest|gmock|isl|json)/)' 63 | Priority: 3 64 | SortPriority: 0 65 | - Regex: '.*' 66 | Priority: 1 67 | SortPriority: 0 68 | IncludeIsMainRegex: '(Test)?$' 69 | IncludeIsMainSourceRegex: '' 70 | IndentCaseBlocks: false 71 | IndentCaseLabels: false 72 | IndentExternBlock: NoIndent 73 | IndentGotoLabels: false 74 | IndentPPDirectives: AfterHash 75 | IndentWidth: 4 76 | IndentWrappedFunctionNames: false 77 | InsertTrailingCommas: None 78 | KeepEmptyLinesAtTheStartOfBlocks: false 79 | MacroBlockBegin: '' 80 | MacroBlockEnd: '' 81 | MaxEmptyLinesToKeep: 1 82 | NamespaceIndentation: None 83 | PPIndentWidth: 1 84 | PenaltyBreakAssignment: 2 85 | PenaltyBreakBeforeFirstCallParameter: 19 86 | PenaltyBreakComment: 300 87 | PenaltyBreakFirstLessLess: 120 88 | PenaltyBreakString: 1000 89 | PenaltyBreakTemplateDeclaration: 10 90 | PenaltyExcessCharacter: 1000000 91 | PenaltyReturnTypeOnItsOwnLine: 60 92 | PointerAlignment: Right 93 | # QualifierAlignment Leave 94 | # QualifierOrder [] 95 | ReflowComments: true 96 | SortIncludes: CaseSensitive 97 | SpaceAfterCStyleCast: false 98 | SpaceAfterLogicalNot: false 99 | SpaceBeforeAssignmentOperators: true 100 | SpaceBeforeCaseColon: false 101 | SpaceBeforeParens: ControlStatements 102 | SpaceBeforeRangeBasedForLoopColon: false 103 | SpaceBeforeSquareBrackets: false 104 | SpaceInEmptyBlock: false 105 | SpaceInEmptyParentheses: false 106 | SpacesBeforeTrailingComments: 1 107 | SpacesInCStyleCastParentheses: false 108 | SpacesInConditionalStatement: false 109 | SpacesInContainerLiterals: false 110 | SpacesInParentheses: false 111 | SpacesInSquareBrackets: false 112 | Standard: Latest 113 | StatementMacros: 114 | - Q_UNUSED 115 | - QT_REQUIRE_VERSION 116 | TabWidth: 8 117 | UseCRLF: false 118 | UseTab: Never 119 | WhitespaceSensitiveMacros: 120 | - STRINGIZE 121 | - PP_STRINGIZE 122 | - BOOST_PP_STRINGIZE 123 | ... 124 | -------------------------------------------------------------------------------- /test/encode_decode_test.lua: -------------------------------------------------------------------------------- 1 | local testcase = require('testcase') 2 | local url = require('url') 3 | 4 | local ALPHA_LO = 'abcdefghijklmnopqrstuvwxyz' 5 | local ALPHA_UP = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 6 | local DIGIT = '0123456789' 7 | local ALPHADIGIT = ALPHA_LO .. ALPHA_UP .. DIGIT 8 | local TESTSTR = [[ !"#$%&\'()*+,-./ 9 | 0123456789 10 | :;<=>?@ 11 | ABCDEFGHIJKLMNOPQRSTUVWXYZ 12 | [\]^_` 13 | abcdefghijklmnopqrstuvwxyz 14 | {|}~]] 15 | 16 | function testcase.encode_uri() 17 | -- test that encodeURL 18 | local s = url.encode_uri(TESTSTR) 19 | s = string.gsub(s, '%%[a-fA-F0-9][a-fA-F0-9]', '') 20 | 21 | local mark = "!#$&'()*+,./:;=?@_~-" 22 | local unescaped = ALPHADIGIT .. mark 23 | assert.equal(#s, #unescaped) 24 | assert.re_match(s, '[' .. unescaped .. ']') 25 | assert.not_re_match(s, '[^' .. unescaped .. ']') 26 | end 27 | 28 | function testcase.encode_form() 29 | -- test that encodeURL 30 | local s = url.encode_form(TESTSTR) 31 | s = string.gsub(s, '%%[a-fA-F0-9][a-fA-F0-9]', '') 32 | 33 | local mark = "+*-._~" 34 | local unescaped = ALPHADIGIT .. mark 35 | assert.equal(#s, #unescaped) 36 | assert.re_match(s, '[' .. unescaped .. ']') 37 | assert.not_re_match(s, '[^' .. unescaped .. ']') 38 | end 39 | 40 | function testcase.encode2396() 41 | -- test that encode2396 42 | local s = url.encode2396(TESTSTR) 43 | s = string.gsub(s, '%%[a-fA-F0-9][a-fA-F0-9]', '') 44 | 45 | local mark = "!'()*._~-" 46 | local unescaped = ALPHADIGIT .. mark 47 | assert.equal(#s, #unescaped) 48 | assert.re_match(s, '[' .. unescaped .. ']') 49 | assert.not_re_match(s, '[^' .. unescaped .. ']') 50 | end 51 | 52 | function testcase.encode3986() 53 | -- test that encode3986 54 | local s = url.encode3986(TESTSTR) 55 | s = string.gsub(s, '%%[a-fA-F0-9][a-fA-F0-9]', '') 56 | 57 | local mark = '._~-' 58 | local unescaped = ALPHADIGIT .. mark 59 | assert.equal(#s, #unescaped) 60 | assert.re_match(s, '[' .. unescaped .. ']') 61 | assert.not_re_match(s, '[^' .. unescaped .. ']') 62 | end 63 | 64 | function testcase.decode_uri() 65 | local escaped = '' 66 | for i = 1, 0x7E do 67 | escaped = escaped .. string.format('%%%02X', i) 68 | end 69 | 70 | -- test that decodeURL did not decode '#$&+,/:;=?@' characters 71 | local decoded = assert(url.decode_uri(escaped)) 72 | local s = '' 73 | for c in string.gmatch(decoded, '%%[a-fA-F0-9][a-fA-F0-9]') do 74 | local n = tonumber(string.sub(c, 2), 16) 75 | s = s .. string.char(n) 76 | end 77 | local mark = '#$&+,/:;=?@' 78 | local undecoded = mark 79 | assert.equal(#s, #undecoded) 80 | assert.re_match(s, '[' .. undecoded .. ']') 81 | assert.not_re_match(s, '[^' .. undecoded .. ']') 82 | end 83 | 84 | function testcase.decode_form() 85 | local escaped = '' 86 | for i = 1, 0x7E do 87 | escaped = escaped .. string.format('%%%02X', i) 88 | end 89 | 90 | -- test that decode all escaped characters 91 | local decoded = assert(url.decode_form(escaped)) 92 | local s = '' 93 | for c in string.gmatch(decoded, '%%[a-fA-F0-9][a-fA-F0-9]') do 94 | local n = tonumber(string.sub(c, 2), 16) 95 | s = s .. string.char(n) 96 | end 97 | assert.equal(#s, 0) 98 | 99 | -- test that decode_form is decode '+' to ' ' 100 | assert.equal(url.decode_form('h+ello+++world!'), 'h ello world!') 101 | end 102 | 103 | function testcase.decode() 104 | local escaped = '' 105 | for i = 1, 0x7E do 106 | escaped = escaped .. string.format('%%%02X', i) 107 | end 108 | 109 | -- test that decode all escaped characters 110 | local decoded = assert(url.decode(escaped)) 111 | local s = '' 112 | for c in string.gmatch(decoded, '%%[a-fA-F0-9][a-fA-F0-9]') do 113 | local n = tonumber(string.sub(c, 2), 16) 114 | s = s .. string.char(n) 115 | end 116 | assert.equal(#s, 0) 117 | end 118 | 119 | function testcase.decode_unicode_point() 120 | -- test that decode unicode point 121 | assert.equal(url.decode_uri( 122 | '%u0041 %u00E8 %u3042 %uD869%uDEB2 %u0041 %u00E8 %u3042 %uD869%uDEB2'), 123 | 'A è あ 𪚲 A è あ 𪚲') 124 | 125 | -- test that returns err if invalid code point 126 | local cp = '%20%u4' 127 | local s, err = url.decode_uri(cp) 128 | assert.is_nil(s) 129 | assert.equal(string.sub(cp, 1, err), '%20%') 130 | end 131 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | lua-url 2 | ==== 3 | 4 | [![test](https://github.com/mah0x211/lua-url/actions/workflows/test.yml/badge.svg)](https://github.com/mah0x211/lua-url/actions/workflows/test.yml) 5 | [![codecov](https://codecov.io/gh/mah0x211/lua-url/branch/master/graph/badge.svg)](https://codecov.io/gh/mah0x211/lua-url) 6 | 7 | 8 | url string utility. 9 | 10 | ## Installation 11 | 12 | ```sh 13 | luarocks install url 14 | ``` 15 | 16 | 17 | ## Encoding 18 | 19 | ``` 20 | str = encode_uri( str ) 21 | str = encode_form( str ) 22 | str = encode2396( str ) 23 | str = encode3986( str ) 24 | ``` 25 | 26 | encode a string to a percent-encoded string. 27 | 28 | - `encode_uri` encodes characters except `ALPHA_DIGIT (a-zA-Z0-9)` and `!#$&'()*+,./:;=?@_~-`. 29 | - based on ECMAScript. please see [developer.mozilla.org](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI) for more details. 30 | - `encode_form` encodes characters except `ALPHA_DIGIT` and `*-._~`. 31 | - based on https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set 32 | - `encode2396` encodes characters except `ALPHA_DIGIT` and `!'()*._~-`. 33 | - based on RFC 2396. 34 | - `encode3986` encodes characters except `ALPHA_DIGIT` and `._~-`. 35 | - based on RFC 3986. 36 | 37 | 38 | **Parameters** 39 | 40 | - `str:string`: a string. 41 | 42 | **Returns** 43 | 44 | - `str:string`: a encoded string. 45 | 46 | 47 | ## Decoding 48 | 49 | ``` 50 | str, err = decode_uri( str ) 51 | str, err = decode_form( str ) 52 | str, err = decode( str ) 53 | ``` 54 | 55 | decode a percent-encoded string. 56 | 57 | - `decode_uri` decodes percent-encoded characters except `#$&+,/:;=?@`. 58 | - based on ECMAScript. please see [developer.mozilla.org](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURI) for more details. 59 | - `decode_form` decodes all percent-encoded characters, and replaces any `+` characters with spaces ` `. 60 | - `decode` decodes all percent-encoded characters. 61 | 62 | **Parameters** 63 | 64 | - `str:string`: encoded uri string. 65 | 66 | **Returns** 67 | 68 | - `str:string`: decoded string on success, or `nil` on failure. 69 | - `err:integer`: position at where the illegal character was found. 70 | 71 | 72 | ## Parser 73 | 74 | ### res, cur, err = parse( url [, parse_query [, init [, is_querystring]]] ) 75 | 76 | returns the table of parsed url. 77 | 78 | **Parameters** 79 | 80 | - `url:string`: url string. 81 | - `parse_query:boolean`: parse query-string if `true`. 82 | - `init:integer`: where to cursor start position. (default `0`) 83 | - `is_querystring:boolean`: `url` is query string. (default `false`) 84 | 85 | **Returns** 86 | 87 | - `res:table`: url info table. 88 | - `cur:number`: cursor stop position. 89 | - `err:string`: error character. 90 | 91 | 92 | **Example** 93 | 94 | ```lua 95 | local dump = require('dump') 96 | local url = require('url') 97 | 98 | local res, cur, err = url.parse( 99 | 'head http://user:pass@host.com:8080/p/a/t/h/?query=string&query=value#hash tail', 100 | true, 5) 101 | print(dump({ 102 | res = res, 103 | cur = cur, 104 | err = err, 105 | })) 106 | --[[ 107 | { 108 | cur = 74, 109 | err = " ", 110 | res = { 111 | fragment = "hash", 112 | host = "host.com:8080", 113 | hostname = "host.com", 114 | password = "pass", 115 | path = "/p/a/t/h/", 116 | port = "8080", 117 | query = "?query=string&query=value", 118 | query_params = { 119 | query = { 120 | [1] = "string", 121 | [2] = "value" 122 | } 123 | }, 124 | scheme = "http", 125 | user = "user", 126 | userinfo = "user:pass" 127 | } 128 | } 129 | --]] 130 | 131 | res, cur, err = url.parse( 132 | 'head http://user:pass@host.com:8080/p/a/t/h/?query=string&query=value#hash tail', 133 | false, 5) 134 | print(dump({ 135 | res = res, 136 | cur = cur, 137 | err = err, 138 | })) 139 | --[[ 140 | { 141 | cur = 74, 142 | err = " ", 143 | res = { 144 | fragment = "hash", 145 | host = "host.com:8080", 146 | hostname = "host.com", 147 | password = "pass", 148 | path = "/p/a/t/h/", 149 | port = "8080", 150 | query = "?query=string&query=value", 151 | scheme = "http", 152 | user = "user", 153 | userinfo = "user:pass" 154 | } 155 | } 156 | --]] 157 | 158 | -- parse query 159 | res, cur, err = url.parse('head query=string&query=value#hash tail', true, 5, 160 | true) 161 | print(dump({ 162 | res = res, 163 | cur = cur, 164 | err = err, 165 | })) 166 | --[[ 167 | { 168 | cur = 34, 169 | err = " ", 170 | res = { 171 | fragment = "hash", 172 | query = "query=string&query=value", 173 | query_params = { 174 | query = { 175 | [1] = "string", 176 | [2] = "value" 177 | } 178 | } 179 | } 180 | } 181 | --]] 182 | ``` 183 | -------------------------------------------------------------------------------- /tests/perfcheck.lua: -------------------------------------------------------------------------------- 1 | -- conversion table 2 | --[[ 3 | 0-9 4 | a-zA-Z 5 | !#$&'()*+,-./:;=?@_~ 6 | ]] 7 | local TBL_ENCURI = { 8 | -- ctrl-code: 0-32 9 | nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, 10 | nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, 11 | nil, 12 | -- SP " % 13 | nil, '!', nil, '#', '$', nil, '&', '\'', '(', ')', '*', '+', ',', '-', '.', 14 | '/', 15 | -- < > 16 | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', nil, '=', nil, 17 | '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 18 | -- [ \ 19 | 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', nil, nil, 20 | -- ] ^ ` 21 | nil, nil, '_', nil, 22 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 23 | -- { | } 24 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', nil, nil, nil, '~' 25 | }; 26 | 27 | --[[ 28 | 0-9 29 | a-zA-Z 30 | !'()*-._~ 31 | ]] 32 | local TBL_ENC2396 = { 33 | -- ctrl-code: 0-32 34 | nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, 35 | nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, 36 | nil, 37 | -- SP " # $ % & + , 38 | nil, '!', nil, nil, nil, nil, nil, '\'', '(', ')', '*', nil, nil, '-', '.', 39 | -- / : ; < = 40 | nil, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', nil, nil, nil, nil, 41 | -- > ? @ 42 | nil, nil, nil, 43 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 44 | -- [ \ ] ^ 45 | 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', nil, nil, nil, nil, 46 | -- ` 47 | '_', nil, 48 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 49 | -- { | } 50 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', nil, nil, nil, '~' 51 | }; 52 | 53 | --[[ 54 | 0-9 55 | a-zA-Z 56 | -._~ 57 | ]] 58 | local TBL_ENC3986 = { 59 | -- ctrl-code: 0-32 60 | nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, 61 | nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, 62 | nil, 63 | -- SP ! " # $ % & ' ( ) * + , 64 | nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, '-', '.', 65 | -- / : ; < = 66 | nil, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', nil, nil, nil, nil, 67 | -- > ? @ 68 | nil, nil, nil, 69 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 70 | -- [ \ ] ^ 71 | 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', nil, nil, nil, nil, 72 | -- ` 73 | '_', nil, 74 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 75 | -- { | } 76 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', nil, nil, nil, '~' 77 | }; 78 | 79 | 80 | local function encode( src, tbl ) 81 | local dest = ''; 82 | 83 | if type(src) == 'string' then 84 | local len = #src; 85 | local code; 86 | 87 | for i = 1, len, 1 do 88 | code = string.byte( src, i ); 89 | if tbl[code] then 90 | dest = dest .. string.char(code); 91 | else 92 | dest = dest .. '%' .. string.format( '%02X', code ); 93 | end 94 | end 95 | end 96 | 97 | return dest; 98 | end 99 | 100 | local function encodeURI( src ) 101 | return encode( src, TBL_ENCURI ); 102 | end 103 | 104 | local function encode2396( src ) 105 | return encode( src, TBL_ENC2396 ); 106 | end 107 | 108 | local function encode3986( src ) 109 | return encode( src, TBL_ENC3986 ); 110 | end 111 | 112 | 113 | local function decode( src, tbl ) 114 | if type(src) == 'string' then 115 | return string.gsub( src, '%%([%x][%x])', function( hex ) 116 | local dec = tonumber( hex, 16 ); 117 | if tbl[dec] then 118 | return '%' .. hex; 119 | end 120 | return string.char( dec ); 121 | end); 122 | end 123 | 124 | return ''; 125 | end 126 | 127 | local function decodeURI( src ) 128 | return decode( src, TBL_ENCURI ); 129 | end 130 | 131 | local function decode2396( src ) 132 | return decode( src, TBL_ENC2396 ); 133 | end 134 | 135 | local function decode3986( src ) 136 | return decode( src, TBL_ENC3986 ); 137 | end 138 | 139 | 140 | local url = require('../url'); 141 | 142 | 143 | local str =[[ !"#$%&\'()*+,-./\n 144 | 0123456789 145 | :;<=>?@ 146 | ABCDEFGHIJKLMNOPQRSTUVWXYZ 147 | [\]^_` 148 | abcdefghijklmnopqrstuvwxyz 149 | {|}~]]; 150 | 151 | local unpack = unpack or table.unpack; 152 | local num = tonumber( _G.arg[1] ) 153 | local ec,dc; 154 | 155 | print( 156 | 'usage: lua ' .. _G.arg[0] .. 157 | ' <1:use url module>' 158 | ); 159 | if _G.arg[2] == '1' then 160 | print( 'use: url module' ); 161 | encodeURI = url.encodeURI; 162 | encode2396 = url.encode2396; 163 | encode3986 = url.encode3986; 164 | decodeURI = url.decodeURI; 165 | decode2396 = url.decode2396; 166 | decode3986 = url.decode3986; 167 | else 168 | print( 'use: pure lua implementation' ); 169 | end 170 | print( 'loop: ', num ); 171 | 172 | for i = 0, num do 173 | ec = encodeURI( str ); 174 | dc = decodeURI( ec ); 175 | --[[ 176 | print( 'encodeURI: ', ec, '\n', 177 | '-> decodeURI: ', assert( decodeURI( ec ) == str ), '\n', 178 | '-> decode2396: ', assert( decode2396( ec ) == str ), '\n', 179 | '-> decode3986: ', assert( decode3986( ec ) == str ), '\n', 180 | '' 181 | ); 182 | --]] 183 | 184 | ec = encode2396( str ); 185 | dc = decode2396( ec ); 186 | --[[ 187 | print( 'encode2396', ec, '\n', 188 | '-> decodeURI: ', assert( decodeURI( ec ) ~= str ), '\n', 189 | '-> decode2396: ', assert( decode2396( ec ) == str ), '\n', 190 | '-> decode3986: ', assert( decode3986( ec ) == str ), '\n', 191 | '' 192 | ); 193 | --]] 194 | 195 | ec = encode3986( str ); 196 | dc = decode3986( ec ); 197 | --[[ 198 | print( 'encode3986:', ec, '\n', 199 | '-> decodeURI: ', assert( decodeURI( ec ) ~= str ), '\n', 200 | '-> decode2396: ', assert( decode2396( ec ) ~= str ), '\n', 201 | '-> decode3986: ', assert( decode3986( ec ) == str ), '\n', 202 | '' 203 | ); 204 | --]] 205 | end 206 | 207 | -------------------------------------------------------------------------------- /src/codec.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2014 Masatoshi Teruya 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * 23 | * src/codec.c 24 | * 25 | * Created by Masatoshi Teruya on 14/04/11. 26 | * 27 | */ 28 | 29 | #include 30 | 31 | /* 32 | encodeURI : 0-9 a-zA-Z !#$&'()*+,-./:;=?@_~ 33 | 34 | uric = reserved | unreserved | escaped 35 | reserved = ";" | "," | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" 36 | unreserved = alphanum | mark 37 | mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 38 | 39 | escaped = "%" hex hex 40 | 41 | hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | 42 | "a" | "b" | "c" | "d" | "e" | "f" 43 | 44 | digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 45 | 46 | alphanum = alpha | digit 47 | alpha = lowalpha | upalpha 48 | 49 | lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | 50 | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | 51 | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" 52 | upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | 53 | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | 54 | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" 55 | */ 56 | static const unsigned char UNRESERVED_URI[256] = { 57 | // ctrl-code: 0-31 58 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59 | 0, 0, 0, 0, 0, 0, 60 | 61 | // SP " % 62 | 0, 0, '!', 0, '#', '$', 0, '&', '\'', '(', ')', '*', '+', ',', '-', '.', 63 | '/', 64 | 65 | // digit 66 | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 67 | 68 | // < > 69 | ':', ';', 0, '=', 0, '?', '@', 70 | 71 | // alpha-upper 72 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 73 | 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 74 | // [ \ ] ^ ` 75 | 'Z', 0, 0, 0, 0, '_', 0, 76 | 77 | // alpha-lower 78 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 79 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 80 | // { | } 81 | 'z', 0, 0, 0, '~'}; 82 | 83 | /* 84 | encode_form : 0-9 a-zA-Z *-._~ 85 | https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set 86 | 87 | application/x-www-form-urlencoded = percent-encode 88 | except: alpha | digit | "*" | "-" | 89 | "." | "_" 90 | 91 | parcent-encode = component-percent-encode | 92 | "!" | "'" | "(" | ")" | "~" 93 | 94 | component-percent-encode = userinfo-percent-encode | 95 | "$" | "%" | "&" | "+" | "," 96 | 97 | userinfo-percent-encode = path-percent-encode | 98 | "/" | ":" | ";" | "=" | "@" | "[" | "\" | 99 | "]" | "^" | "|" 100 | 101 | path-percent-encode = query-percent-encode | 102 | "?" | "`" | "{" | "}" 103 | 104 | query-percent-encode = c0-control-percent-encode | 105 | code-points-gt-7e-encode | 106 | " " | '"' | "#" | "<" | ">" 107 | 108 | c0-control-percent-encode = 0x0 to 0x1F 109 | 110 | code-points-gt-7e-encode = greater than "~" 111 | 112 | 113 | unreserved = alpha | digit | mark 114 | 115 | alpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | 116 | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | 117 | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" 118 | "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | 119 | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | 120 | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" 121 | 122 | digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 123 | 124 | mark = *-._~ 125 | */ 126 | static const unsigned char UNRESERVED_FORM[256] = { 127 | // ctrl-code: 0-31 128 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 129 | 0, 0, 0, 0, 0, 0, 130 | 131 | // SP ! " # $ % & \ ( ) + , / 132 | 0, '+', 0, 0, 0, 0, 0, 0, 0, 0, 0, '*', 0, 0, '-', '.', 0, 133 | 134 | // digit : ; < = > ? @ 135 | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0, 0, 136 | 137 | // alpha-upper 138 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 139 | 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 140 | // [ \ ] ^ ` 141 | 'Z', 0, 0, 0, 0, '_', 0, 142 | 143 | // alpha-lower 144 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 145 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 146 | // { | } 147 | 'z', 0, 0, 0, '~'}; 148 | 149 | /* 150 | RFC 2396 : 0-9 a-zA-Z !'()*-._~ 151 | 152 | unreserved = alphanum | mark 153 | 154 | mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 155 | 156 | alphanum = alpha | digit 157 | 158 | alpha = lowalpha | upalpha 159 | 160 | lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | 161 | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | 162 | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" 163 | 164 | upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | 165 | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | 166 | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" 167 | 168 | digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 169 | */ 170 | static const unsigned char UNRESERVED_2396[256] = { 171 | // ctrl-code: 0-31 172 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 173 | 0, 0, 0, 0, 0, 0, 174 | 175 | // SP " # $ % & + , / 176 | 0, 0, '!', 0, 0, 0, 0, 0, '\'', '(', ')', '*', 0, 0, '-', '.', 0, 177 | 178 | // digit : ; < = > ? @ 179 | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0, 0, 180 | 181 | // alpha-upper 182 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 183 | 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 184 | // [ \ ] ^ ` 185 | 'Z', 0, 0, 0, 0, '_', 0, 186 | 187 | // alpha-lower 188 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 189 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 190 | // { | } 191 | 'z', 0, 0, 0, '~'}; 192 | 193 | /* 194 | RFC 3986 : 0-9 a-zA-Z -._~ 195 | 196 | unreserved = alpha | digit | "-" | "." | "_" | "~" 197 | 198 | alpha = lowalpha | upalpha 199 | 200 | lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | 201 | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | 202 | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" 203 | 204 | upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | 205 | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | 206 | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" 207 | 208 | digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 209 | */ 210 | static const unsigned char UNRESERVED_3986[256] = { 211 | // ctrl-code: 0-31 212 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 213 | 0, 0, 0, 0, 0, 0, 214 | 215 | // SP ! " # $ % & ' ( ) * + , / 216 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', '.', 0, 217 | 218 | // digit 219 | // : ; < = > ? @ 220 | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0, 0, 221 | 222 | // alpha-upper 223 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 224 | 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 225 | // [ \ ] ^ ` 226 | 'Z', 0, 0, 0, 0, '_', 0, 227 | 228 | // alpha-lower 229 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 230 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 231 | // { | } 232 | 'z', 0, 0, 0, '~'}; 233 | 234 | /* 235 | hex = 0-16 236 | '0' = 48 237 | '7' = 55 238 | 'A' = 65 239 | 'W' = 87 240 | 'a' = 97 241 | uppercase: 242 | '7' = 'A' - 10 243 | lowercase: 244 | 'W' = 'a' - 10 245 | */ 246 | static const unsigned char DEC2HEX[16] = "0123456789ABCDEF"; 247 | 248 | static const char HEX2DEC[256] = { 249 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 250 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 251 | // 0 1 2 3 4 5 6 7 8 9 252 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0, 0, 0, 0, 0, 0, 253 | // A B C D E F 254 | 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 256 | // a b c d e f 257 | 11, 12, 13, 14, 15, 16}; 258 | 259 | // %[hex][hex]*4 260 | #define CODEC_UTF8ENC_LEN 12 261 | #define CODEC_UTF8DEC_LEN 4 262 | 263 | static int encode_lua(lua_State *L, const unsigned char *tbl) 264 | { 265 | size_t len = 0; 266 | unsigned char *src = (unsigned char *)lauxh_checklstring(L, 1, &len); 267 | unsigned char dest[3] = {'%', 0}; 268 | luaL_Buffer b = {0}; 269 | 270 | lua_settop(L, 1); 271 | luaL_buffinit(L, &b); 272 | for (size_t i = 0; i < len; i++) { 273 | unsigned char c = *src; 274 | unsigned char unreserved = tbl[c]; 275 | if (unreserved) { 276 | luaL_addchar(&b, unreserved); 277 | } else { 278 | // *src >> 4 = *src / 16 279 | dest[1] = DEC2HEX[c >> 4]; 280 | // *src & 0xf = *src % 16 281 | dest[2] = DEC2HEX[c & 0xf]; 282 | luaL_addlstring(&b, (char *)dest, 3); 283 | } 284 | src++; 285 | } 286 | 287 | luaL_pushresult(&b); 288 | return 1; 289 | } 290 | 291 | static int encode_uri_lua(lua_State *L) 292 | { 293 | return encode_lua(L, UNRESERVED_URI); 294 | } 295 | static int encode_form_lua(lua_State *L) 296 | { 297 | return encode_lua(L, UNRESERVED_FORM); 298 | } 299 | static int encode2396_lua(lua_State *L) 300 | { 301 | return encode_lua(L, UNRESERVED_2396); 302 | } 303 | static int encode3986_lua(lua_State *L) 304 | { 305 | return encode_lua(L, UNRESERVED_3986); 306 | } 307 | 308 | /* 309 | hex: 0xf = 0-15 = 4bit 310 | utf8 code-point: u+0000 ... u+10ffff = 0-1114111 = 21bit 311 | ascii: u+0000 ... u+007f = 0-127 = 7bit 312 | */ 313 | static int unicode_pt2utf8(luaL_Buffer *b, uint32_t cp) 314 | { 315 | unsigned char dest[4] = {0}; 316 | 317 | // range: u+0000 ... u+007f 318 | // bit: 0xxx xxxx 319 | if (cp < 0x80) { 320 | luaL_addchar(b, cp); 321 | } 322 | // range: u+0080 ... u+07ff 323 | // bit: [110y yyyx]:0xc0 324 | // [10xx xxxx]:0x80 325 | else if (cp < 0x800) { 326 | dest[0] = 0xc0 | (cp >> 6); 327 | dest[1] = 0x80 | (cp & 0x3f); 328 | luaL_addlstring(b, (char *)dest, 2); 329 | } 330 | // range: u+d800 ... u+dfff use for surrogate pairs 331 | else if (cp > 0xD7FF && cp < 0xE000) { 332 | return -2; 333 | } 334 | // range: u+0800 ... u+ffff 335 | // bit: [1110 yyyy]:0xe0 336 | // [10yx xxxx]:0x80 337 | // [10xx xxxx]:0x80 338 | else if (cp < 0x10000) { 339 | dest[0] = 0xe0 | (cp >> 12); 340 | dest[1] = 0x80 | ((cp >> 6) & 0x3f); 341 | dest[2] = 0x80 | (cp & 0x3f); 342 | luaL_addlstring(b, (char *)dest, 3); 343 | } 344 | // range: u+10000 ... u+1FFFFF 345 | // bit: [1111 0yyy]:0xf0 346 | // [10yy xxxx]:0x80 347 | // [10xx xxxx]:0x80 348 | // [10xx xxxx]:0x80 349 | // 350 | // RFC 3629: UTF-8, characters from the U+0000..U+10FFFF 351 | // else if( cp < 0x200000 ){ 352 | else if (cp < 0x110000) { 353 | dest[0] = 0xf0 | (cp >> 18); 354 | dest[1] = 0x80 | ((cp >> 12) & 0x3f); 355 | dest[2] = 0x80 | ((cp >> 6) & 0x3f); 356 | dest[3] = 0x80 | (cp & 0x3f); 357 | luaL_addlstring(b, (char *)dest, 4); 358 | } 359 | /* UTF-8 now max 4 bytes 360 | // range: u+200000 ... u+3FFFFFF 361 | // bit: [1111 10yy]:0xf8 362 | // [10yy xxxx]:0x80 363 | // [10xx xxxx]:0x80 364 | // [10xx xxxx]:0x80 365 | // [10xx xxxx]:0x80 366 | else if( cp < 0x400000 ){ 367 | dest[0] = 0xf8 | ( cp >> 24 ); 368 | dest[1] = 0x80 | ( ( cp >> 18 ) & 0x3f ); 369 | dest[2] = 0x80 | ( ( cp >> 12 ) & 0x3f ); 370 | dest[3] = 0x80 | ( ( cp >> 6 ) & 0x3f ); 371 | dest[4] = 0x80 | ( cp & 0x3f ); 372 | luaL_addlstring(b, (char *)dest, 5); 373 | } 374 | // range: u+4000000 ... u+7FFFFFFF 375 | // bit: [1111 110y]:0xfc 376 | // [10yy xxxx]:0x80 377 | // [10yy xxxx]:0x80 378 | // [10yy xxxx]:0x80 379 | // [10xx xxxx]:0x80 380 | // [10xx xxxx]:0x80 381 | else if( cp < 0x800000 ){ 382 | dest[0] = 0xfc | ( cp >> 30 ); 383 | dest[1] = 0x80 | ( ( cp >> 24 ) & 0x3f ); 384 | dest[2] = 0x80 | ( ( cp >> 18 ) & 0x3f ); 385 | dest[3] = 0x80 | ( ( cp >> 12 ) & 0x3f ); 386 | dest[4] = 0x80 | ( ( cp >> 6 ) & 0x3f ); 387 | dest[5] = 0x80 | ( cp & 0x3f ); 388 | luaL_addlstring(b, (char *)dest, 6); 389 | } 390 | //*/ 391 | // invalid: code-point > 0x7FFFFFFF 392 | else { 393 | errno = ERANGE; 394 | return -1; 395 | } 396 | 397 | return 0; 398 | } 399 | 400 | typedef enum { 401 | DECODE_ALL = 0, 402 | DECODE_URI = 1, 403 | DECODE_FORM = 2 404 | } decode_type_e; 405 | 406 | /* 407 | hex: 0xf = 0-15 = 4bit 408 | unicode code-point: u+0000 ... u+10ffff = 21bit 409 | ascii: u+0000 ... u+007f = 0-127 = 7bit 410 | */ 411 | static int decode(lua_State *L, char *str, size_t slen, decode_type_e dectype) 412 | { 413 | luaL_Buffer b = {0}; 414 | 415 | luaL_buffinit(L, &b); 416 | 417 | for (size_t i = 0; i < slen; i++) { 418 | unsigned char *src = (unsigned char *)str + i; 419 | if (*src != '%') { 420 | if (dectype == DECODE_FORM && *src == '+') { 421 | luaL_addchar(&b, ' '); 422 | } else { 423 | luaL_addchar(&b, *src); 424 | } 425 | continue; 426 | } 427 | // percent-encoding(%hex) must have more than 2 byte strings after '%'. 428 | else if (slen < i + 2) { 429 | lua_pushnil(L); 430 | lua_pushinteger(L, i + 1); 431 | return 2; 432 | } 433 | /* 434 | hex(8bit) to decimal 435 | e.g. 436 | hex:'%41' 437 | '4' to hex:0x04[0000 0100] 438 | '1' to hex:0x01[0000 0001] 439 | 0x04[0000 0100] << 4bit 440 | 0x40[0100 0000] | 0x01[0000 0001] 441 | 0x41[0100 0001] 442 | 0x41 = 65 = 'A' 443 | 444 | hex:'%7a' 445 | '7' to hex:0x07[0000 0111] 446 | 'a' to hex:0x0a[0000 1010] 447 | 0x07[0000 0111] << 4bit 448 | 0x70[0111 0000] | 0x0a[0000 1010] 449 | 0x7a[0111 1010] 450 | 0x7a:122 = 'z' 451 | 452 | hex:'%7a4' 453 | '7' to hex:0x07[0000 0111] 454 | 'a' to hex:0x0a[0000 1010] 455 | '4' to hex:0x04[0000 0100] 456 | 0x007[0000 0000 0111] << 8bit 457 | 0x00a[0000 0000 1010] << 4bit 458 | 0x70[0111 0000 0000] | [0000 1010 0000] | 0x04[0000 0100] 459 | 0x7a4[0111 1010 0100] = 1956 460 | */ 461 | // %[hex]*2 462 | else if (HEX2DEC[src[1]] && HEX2DEC[src[2]]) { 463 | /* 464 | hi = HEX2DEC( src[1] )-1 << 4; 465 | lo = HEX2DEC( src[2] )-1; 466 | hl = hi | lo; 467 | */ 468 | uint32_t hl = ((HEX2DEC[src[1]] - 1) << 4) | (HEX2DEC[src[2]] - 1); 469 | 470 | // decodeURI did not decode the following characters: '#$&+,/:;=?@' 471 | if (dectype == DECODE_URI) { 472 | switch (hl) { 473 | case '#': 474 | case '$': 475 | case '&': 476 | case '+': 477 | case ',': 478 | case '/': 479 | case ':': 480 | case ';': 481 | case '=': 482 | case '?': 483 | case '@': 484 | luaL_addlstring(&b, (char *)src, 3); 485 | i += 2; 486 | continue; 487 | } 488 | } 489 | luaL_addchar(&b, hl); 490 | i += 2; 491 | continue; 492 | } 493 | // %u[hex]*4 494 | else if (src[1] == 'u' && HEX2DEC[src[2]] && HEX2DEC[src[3]] && 495 | HEX2DEC[src[4]] && HEX2DEC[src[5]]) { 496 | uint32_t hi = (HEX2DEC[src[2]] - 1) << 4 | (HEX2DEC[src[3]] - 1); 497 | uint32_t lo = (HEX2DEC[src[4]] - 1) << 4 | (HEX2DEC[src[5]] - 1); 498 | uint32_t hl = (hi << 8) | lo; 499 | 500 | switch (unicode_pt2utf8(&b, hl)) { 501 | case 0: 502 | i += 5; 503 | continue; 504 | case -2: 505 | // surrogate pairs 506 | if (src[6] == '%' && src[7] == 'u' && HEX2DEC[src[8]] && 507 | HEX2DEC[src[9]] && HEX2DEC[src[10]] && HEX2DEC[src[11]]) { 508 | size_t surp = 0x10000 + (hl - 0xD800) * 0x400; 509 | 510 | hi = (HEX2DEC[src[8]] - 1) << 4 | (HEX2DEC[src[9]] - 1); 511 | lo = (HEX2DEC[src[10]] - 1) << 4 | (HEX2DEC[src[11]] - 1); 512 | surp += ((hi << 8) | lo) - 0xDC00; 513 | if (unicode_pt2utf8(&b, surp) == 0) { 514 | i += 11; 515 | continue; 516 | } 517 | } 518 | } 519 | } 520 | lua_pushnil(L); 521 | lua_pushinteger(L, i + 1); 522 | return 2; 523 | } 524 | 525 | luaL_pushresult(&b); 526 | return 1; 527 | } 528 | 529 | static int decode_uri_lua(lua_State *L) 530 | { 531 | size_t len = 0; 532 | const char *src = lauxh_checklstring(L, 1, &len); 533 | 534 | lua_settop(L, 1); 535 | return decode(L, (char *)src, len, DECODE_URI); 536 | } 537 | 538 | static int decode_form_lua(lua_State *L) 539 | { 540 | size_t len = 0; 541 | const char *src = lauxh_checklstring(L, 1, &len); 542 | 543 | lua_settop(L, 1); 544 | return decode(L, (char *)src, len, DECODE_FORM); 545 | } 546 | 547 | static int decode_lua(lua_State *L) 548 | { 549 | size_t len = 0; 550 | const char *src = lauxh_checklstring(L, 1, &len); 551 | 552 | lua_settop(L, 1); 553 | return decode(L, (char *)src, len, DECODE_ALL); 554 | } 555 | 556 | LUALIB_API int luaopen_url_codec(lua_State *L) 557 | { 558 | struct luaL_Reg method[] = { 559 | {"encode_uri", encode_uri_lua }, 560 | {"encode_form", encode_form_lua}, 561 | {"encode2396", encode2396_lua }, 562 | {"encode3986", encode3986_lua }, 563 | {"decode_uri", decode_uri_lua }, 564 | {"decode_form", decode_form_lua}, 565 | {"decode", decode_lua }, 566 | {NULL, NULL } 567 | }; 568 | int i; 569 | 570 | // method 571 | lua_newtable(L); 572 | i = 0; 573 | while (method[i].name) { 574 | lauxh_pushfn2tbl(L, method[i].name, method[i].func); 575 | i++; 576 | } 577 | 578 | return 1; 579 | } 580 | -------------------------------------------------------------------------------- /test/parse_test.lua: -------------------------------------------------------------------------------- 1 | local concat = table.concat 2 | local testcase = require('testcase') 3 | local parse = require('url.parse') 4 | 5 | function testcase.parse_empty_url() 6 | -- test that parse empty-url 7 | local u, cur, err = parse('') 8 | assert.equal(cur, 0) 9 | assert.is_nil(err) 10 | assert.equal(u, {}) 11 | end 12 | 13 | function testcase.parse_illegal_url() 14 | -- test that parse empty-url 15 | local u, cur, err = parse(string.char(0)) 16 | assert.equal(cur, 0) 17 | assert.is_nil(err) 18 | assert.equal(u, {}) 19 | end 20 | 21 | function testcase.parse_full_url() 22 | -- test that parse full url 23 | local s = concat({ 24 | 'http://', 25 | 'user:pswd@', 26 | 'host.com', 27 | ':8080', 28 | '/p/a/t/h/', 29 | '?q1=v1-1&q1=v1-1&q2=v2', 30 | '#hash', 31 | }) 32 | local u, cur, err = parse(s) 33 | assert.equal(cur, #s) 34 | assert.is_nil(err) 35 | assert.equal(u, { 36 | scheme = 'http', 37 | userinfo = 'user:pswd', 38 | user = 'user', 39 | password = 'pswd', 40 | host = 'host.com:8080', 41 | hostname = 'host.com', 42 | port = '8080', 43 | path = '/p/a/t/h/', 44 | query = '?q1=v1-1&q1=v1-1&q2=v2', 45 | fragment = 'hash', 46 | }) 47 | 48 | -- test that parse full ipv4 url 49 | s = concat({ 50 | 'http://', 51 | 'user:pswd@', 52 | '127.0.0.1', 53 | ':8080', 54 | '/p/a/t/h/', 55 | '?q1=v1-1&q1=v1-1&q2=v2', 56 | '#hash', 57 | }) 58 | u, cur, err = parse(s) 59 | assert.equal(cur, #s) 60 | assert.is_nil(err) 61 | assert.equal(u, { 62 | scheme = 'http', 63 | userinfo = 'user:pswd', 64 | user = 'user', 65 | password = 'pswd', 66 | host = '127.0.0.1:8080', 67 | hostname = '127.0.0.1', 68 | port = '8080', 69 | path = '/p/a/t/h/', 70 | query = '?q1=v1-1&q1=v1-1&q2=v2', 71 | fragment = 'hash', 72 | }) 73 | end 74 | 75 | function testcase.parse_without_password() 76 | -- test that parse url without userinfo 77 | local segments = { 78 | 'http://', 79 | 'user@', 80 | 'host.com', 81 | ':8080', 82 | '/p/a/t/h/', 83 | '?q1=v1-1&q1=v1-1&q2=v2', 84 | '#hash', 85 | } 86 | local s = concat(segments) 87 | local u, cur, err = parse(s) 88 | assert.equal(cur, #s) 89 | assert.is_nil(err) 90 | assert.equal(u, { 91 | scheme = 'http', 92 | userinfo = 'user', 93 | user = 'user', 94 | host = 'host.com:8080', 95 | hostname = 'host.com', 96 | port = '8080', 97 | path = '/p/a/t/h/', 98 | query = '?q1=v1-1&q1=v1-1&q2=v2', 99 | fragment = 'hash', 100 | }) 101 | end 102 | 103 | function testcase.parse_without_userinfo() 104 | -- test that parse url without userinfo 105 | local segments = { 106 | 'http://', 107 | 'host.com', 108 | ':8080', 109 | '/p/a/t/h/', 110 | '?q1=v1-1&q1=v1-1&q2=v2', 111 | '#hash', 112 | } 113 | local s = concat(segments) 114 | local u, cur, err = parse(s) 115 | assert.equal(cur, #s) 116 | assert.is_nil(err) 117 | assert.equal(u, { 118 | scheme = 'http', 119 | host = 'host.com:8080', 120 | hostname = 'host.com', 121 | port = '8080', 122 | path = '/p/a/t/h/', 123 | query = '?q1=v1-1&q1=v1-1&q2=v2', 124 | fragment = 'hash', 125 | }) 126 | end 127 | 128 | function testcase.parse_without_userinfo_hostname() 129 | -- test that parse url without userinfo 130 | local segments = { 131 | 'http://', 132 | ':8080', 133 | '/p/a/t/h/', 134 | '?q1=v1-1&q1=v1-1&q2=v2', 135 | '#hash', 136 | } 137 | local s = concat(segments) 138 | local u, cur, err = parse(s) 139 | assert.equal(cur, #s) 140 | assert.is_nil(err) 141 | assert.equal(u, { 142 | scheme = 'http', 143 | host = ':8080', 144 | hostname = '', 145 | port = '8080', 146 | path = '/p/a/t/h/', 147 | query = '?q1=v1-1&q1=v1-1&q2=v2', 148 | fragment = 'hash', 149 | }) 150 | end 151 | 152 | function testcase.parse_without_userinfo_port() 153 | -- test that parse url without userinfo and port 154 | local segments = { 155 | 'http://', 156 | 'host.com', 157 | '/p/a/t/h/', 158 | '?q1=v1-1&q1=v1-1&q2=v2', 159 | '#hash', 160 | } 161 | local s = concat(segments) 162 | local u, cur, err = parse(s) 163 | assert.equal(cur, #s) 164 | assert.is_nil(err) 165 | assert.equal(u, { 166 | scheme = 'http', 167 | host = 'host.com', 168 | hostname = 'host.com', 169 | path = '/p/a/t/h/', 170 | query = '?q1=v1-1&q1=v1-1&q2=v2', 171 | fragment = 'hash', 172 | }) 173 | end 174 | 175 | function testcase.parse_without_userinfo_port_pathname() 176 | -- test that parse url without userinfo, port and pathname 177 | local segments = { 178 | 'http://', 179 | 'host.com', 180 | '?q1=v1-1&q1=v1-1&q2=v2', 181 | '#hash', 182 | } 183 | local s = concat(segments) 184 | local u, cur, err = parse(s) 185 | assert.equal(cur, #s) 186 | assert.is_nil(err) 187 | assert.equal(u, { 188 | scheme = 'http', 189 | host = 'host.com', 190 | hostname = 'host.com', 191 | query = '?q1=v1-1&q1=v1-1&q2=v2', 192 | fragment = 'hash', 193 | }) 194 | end 195 | 196 | function testcase.parse_without_userinfo_port_pathname_query() 197 | -- test that parse url without userinfo, port, pathname and query 198 | local segments = { 199 | 'http://', 200 | 'host.com', 201 | '#hash', 202 | } 203 | local s = concat(segments) 204 | local u, cur, err = parse(s) 205 | assert.equal(cur, #s) 206 | assert.is_nil(err) 207 | assert.equal(u, { 208 | scheme = 'http', 209 | host = 'host.com', 210 | hostname = 'host.com', 211 | fragment = 'hash', 212 | }) 213 | end 214 | 215 | function testcase.parse_without_userinfo_port_pathname_query_fragment() 216 | -- test that parse url without userinfo, port, pathname, query and fragment 217 | local segments = { 218 | 'http://', 219 | 'host.com', 220 | } 221 | local s = concat(segments) 222 | local u, cur, err = parse(s) 223 | assert.equal(cur, #s) 224 | assert.is_nil(err) 225 | assert.equal(u, { 226 | scheme = 'http', 227 | host = 'host.com', 228 | hostname = 'host.com', 229 | }) 230 | end 231 | 232 | function testcase.parse_without_scheme() 233 | -- test that parse url without scheme, userinfo, port, pathname, query and fragment 234 | local segments = { 235 | 'host.com:8080', 236 | } 237 | local s = concat(segments) 238 | local u, cur, err = parse(s) 239 | assert.equal(cur, #s) 240 | assert.is_nil(err) 241 | assert.equal(u, { 242 | path = 'host.com:8080', 243 | }) 244 | end 245 | 246 | function testcase.parse_without_authority() 247 | -- test that parse file scheme 248 | local segments = { 249 | 'file://', 250 | '/p/a/t/h/', 251 | '?q1=v1-1&q1=v1-1&q2=v2', 252 | '#hash', 253 | } 254 | local s = concat(segments) 255 | local u, cur, err = parse(s) 256 | assert.equal(cur, #s) 257 | assert.is_nil(err) 258 | assert.equal(u, { 259 | scheme = 'file', 260 | path = '/p/a/t/h/', 261 | query = '?q1=v1-1&q1=v1-1&q2=v2', 262 | fragment = 'hash', 263 | }) 264 | end 265 | 266 | function testcase.parse_scheme() 267 | -- test that parse scheme with host 268 | local s = 'http://localhost' 269 | local u, cur, err = parse(s) 270 | assert.equal(cur, #s) 271 | assert.is_nil(err) 272 | assert.equal(u, { 273 | scheme = 'http', 274 | host = 'localhost', 275 | hostname = 'localhost', 276 | }) 277 | 278 | -- test that parse scheme with ipv6 host 279 | s = 'https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:80' 280 | u, cur, err = parse(s) 281 | assert.equal(cur, #s) 282 | assert.is_nil(err) 283 | assert.equal(u, { 284 | scheme = 'https', 285 | host = '[2001:db8:85a3:8d3:1319:8a2e:370:7348]:80', 286 | hostname = '[2001:db8:85a3:8d3:1319:8a2e:370:7348]', 287 | port = '80', 288 | }) 289 | 290 | -- test that parse scheme with port 291 | s = 'https://:80' 292 | u, cur, err = parse(s) 293 | assert.equal(cur, #s) 294 | assert.is_nil(err) 295 | assert.equal(u, { 296 | scheme = 'https', 297 | host = ':80', 298 | hostname = '', 299 | port = '80', 300 | }) 301 | 302 | -- test that parse scheme with pathname 303 | s = 'https://./foo/bar' 304 | u, cur, err = parse(s) 305 | assert.equal(cur, #s) 306 | assert.is_nil(err) 307 | assert.equal(u, { 308 | scheme = 'https', 309 | path = './foo/bar', 310 | }) 311 | 312 | -- test that return an error if invalid scheme format 313 | s = 'http:/localhost' 314 | u, cur, err = parse(s) 315 | assert.equal(cur, 4) 316 | assert.equal(err, ':') 317 | assert.equal(u, {}) 318 | end 319 | 320 | function testcase.parse_host() 321 | -- test that parse host 322 | local s = 'http://example.com' 323 | local u, cur, err = parse(s) 324 | assert.equal(cur, #s) 325 | assert.is_nil(err) 326 | assert.equal(u, { 327 | scheme = 'http', 328 | host = 'example.com', 329 | hostname = 'example.com', 330 | }) 331 | 332 | -- test that parse host that contains percent-encoded string 333 | s = 'http://ex%45mple.com' 334 | u, cur, err = parse(s) 335 | assert.equal(cur, #s) 336 | assert.is_nil(err) 337 | assert.equal(u, { 338 | scheme = 'http', 339 | host = 'ex%45mple.com', 340 | hostname = 'ex%45mple.com', 341 | }) 342 | 343 | -- test that parse ipv4 host 344 | s = 'http://127.0.0.1' 345 | u, cur, err = parse(s) 346 | assert.equal(cur, #s) 347 | assert.is_nil(err) 348 | assert.equal(u, { 349 | scheme = 'http', 350 | host = '127.0.0.1', 351 | hostname = '127.0.0.1', 352 | }) 353 | 354 | -- test that parse host with empty-userinfo 355 | s = 'http://@example.com' 356 | u, cur, err = parse(s) 357 | assert.equal(cur, #s) 358 | assert.is_nil(err) 359 | assert.equal(u, { 360 | scheme = 'http', 361 | host = 'example.com', 362 | hostname = 'example.com', 363 | }) 364 | 365 | -- test that return an error if userinfo delimiter is declared more than once 366 | s = 'http://@@example.com' 367 | u, cur, err = parse(s) 368 | assert.equal(string.sub(s, 1, cur), 'http://@') 369 | assert.equal(err, '@') 370 | assert.equal(u, { 371 | scheme = 'http', 372 | }) 373 | 374 | -- test that return an error if found invalid character 375 | s = 'http://example.com|' 376 | u, cur, err = parse(s) 377 | assert.equal(string.sub(s, 1, cur), 'http://example.com') 378 | assert.equal(err, '|') 379 | assert.equal(u, { 380 | scheme = 'http', 381 | host = 'example.com', 382 | hostname = 'example.com', 383 | }) 384 | 385 | -- test that return an error if invalid host format 386 | s = 'http://$localhost' 387 | u, cur, err = parse(s) 388 | assert.equal(string.sub(s, 1, cur), 'http://') 389 | assert.equal(err, '$') 390 | assert.equal(u, { 391 | scheme = 'http', 392 | }) 393 | 394 | -- test that return an error if contains a invalid percent-encoded string 395 | s = 'http://exa%2mple.com/foo/bar' 396 | u, cur, err = parse(s) 397 | assert.equal(string.sub(s, 1, cur), 'http://exa') 398 | assert.equal(err, '%') 399 | assert.equal(u, { 400 | scheme = 'http', 401 | }) 402 | 403 | -- test that return an error if no host after userinfo 404 | s = 'http://user:pswd@/foo/bar' 405 | u, cur, err = parse(s) 406 | assert.equal(string.sub(s, 1, cur), 'http://user:pswd@') 407 | assert.equal(err, '/') 408 | assert.equal(u, { 409 | scheme = 'http', 410 | password = 'pswd', 411 | user = 'user', 412 | userinfo = 'user:pswd', 413 | }) 414 | 415 | -- test that return an error if userinfo is declared more than once 416 | s = 'http://user:pswd@example.com@example.com/foo/bar' 417 | u, cur, err = parse(s) 418 | assert.equal(string.sub(s, 1, cur), 'http://user:pswd@example.com') 419 | assert.equal(err, '@') 420 | assert.equal(u, { 421 | scheme = 'http', 422 | password = 'pswd', 423 | user = 'user', 424 | userinfo = 'user:pswd', 425 | }) 426 | end 427 | 428 | function testcase.parse_port() 429 | -- test that parse port 430 | local s = 'http://example.com:80' 431 | local u, cur, err = parse(s) 432 | assert.equal(cur, #s) 433 | assert.is_nil(err) 434 | assert.equal(u, { 435 | scheme = 'http', 436 | host = 'example.com:80', 437 | hostname = 'example.com', 438 | port = '80', 439 | }) 440 | 441 | -- test that parse port without host 442 | s = 'http://:80' 443 | u, cur, err = parse(s) 444 | assert.equal(cur, #s) 445 | assert.is_nil(err) 446 | assert.equal(u, { 447 | scheme = 'http', 448 | host = ':80', 449 | hostname = '', 450 | port = '80', 451 | }) 452 | 453 | -- test that parse port with query 454 | s = 'http://:80?foo=bar' 455 | u, cur, err = parse(s) 456 | assert.equal(cur, #s) 457 | assert.is_nil(err) 458 | assert.equal(u, { 459 | scheme = 'http', 460 | host = ':80', 461 | hostname = '', 462 | port = '80', 463 | query = '?foo=bar', 464 | }) 465 | 466 | -- test that parse port with fragment 467 | s = 'http://:65535#foo=bar' 468 | u, cur, err = parse(s) 469 | assert.equal(cur, #s) 470 | assert.is_nil(err) 471 | assert.equal(u, { 472 | scheme = 'http', 473 | host = ':65535', 474 | hostname = '', 475 | port = '65535', 476 | fragment = 'foo=bar', 477 | }) 478 | 479 | -- test that can be omit the port after ':' 480 | for _, v in ipairs({ 481 | { 482 | s = 'http://example.com:?foo=bar', 483 | exp = { 484 | scheme = 'http', 485 | host = 'example.com', 486 | hostname = 'example.com', 487 | query = '?foo=bar', 488 | }, 489 | }, 490 | { 491 | s = 'http://:?foo=bar', 492 | exp = { 493 | scheme = 'http', 494 | query = '?foo=bar', 495 | }, 496 | }, 497 | }) do 498 | u, cur, err = parse(v.s) 499 | assert.equal(cur, #v.s) 500 | assert.is_nil(err) 501 | assert.equal(u, v.exp) 502 | end 503 | 504 | -- test that return an error if port greater than 65535 505 | s = 'http://example.com:65536/foo/bar' 506 | u, cur, err = parse(s) 507 | assert.equal(string.sub(s, 1, cur), 'http://example.com:6553') 508 | assert.equal(err, '6') 509 | assert.equal(u, { 510 | scheme = 'http', 511 | }) 512 | 513 | -- test that return an error if port contains non-digit character 514 | s = 'http://user:pswd@example:80pswd:?foo' 515 | u, cur, err = parse(s) 516 | assert.equal(string.sub(s, 1, cur), 'http://user:pswd@example:80') 517 | assert.equal(err, 'p') 518 | assert.equal(u, { 519 | scheme = 'http', 520 | userinfo = 'user:pswd', 521 | user = 'user', 522 | password = 'pswd', 523 | }) 524 | end 525 | 526 | function testcase.parse_userinfo() 527 | -- test that parse userinfo 528 | local s = 'http://user:ps%77d@example.com/foo/bar' 529 | local u, cur, err = parse(s) 530 | assert.equal(cur, #s) 531 | assert.is_nil(err) 532 | 533 | assert.equal(u, { 534 | scheme = 'http', 535 | host = 'example.com', 536 | hostname = 'example.com', 537 | userinfo = 'user:ps%77d', 538 | user = 'user', 539 | password = 'ps%77d', 540 | path = '/foo/bar', 541 | }) 542 | 543 | -- test that return error if contains invlida percent-encoded string 544 | s = 'http://user:pswd%2@example.com' 545 | u, cur, err = parse(s) 546 | assert.equal(string.sub(s, 1, cur), 'http://user:pswd') 547 | assert.equal(err, '%') 548 | assert.equal(u, { 549 | scheme = 'http', 550 | }) 551 | 552 | -- test that return error if userinfo is not terminated by '@' 553 | s = 'http://user:psw?d@example.com' 554 | u, cur, err = parse(s) 555 | assert.equal(string.sub(s, 1, cur), 'http://user:psw') 556 | assert.equal(err, '?') 557 | assert.equal(u, { 558 | scheme = 'http', 559 | }) 560 | 561 | -- test that return error if userinfo is not terminated by '@' 562 | s = 'http://user:pswd' 563 | u, cur, err = parse(s) 564 | assert.equal(cur, #s) 565 | assert.equal(err, '\0') 566 | assert.equal(u, { 567 | scheme = 'http', 568 | }) 569 | end 570 | 571 | function testcase.parse_pathname() 572 | -- test that parse path 573 | local s = '/foo/bar/baz%20qux' 574 | local u, cur, err = parse(s) 575 | assert.equal(cur, #s) 576 | assert.is_nil(err) 577 | assert.equal(u, { 578 | path = '/foo/bar/baz%20qux', 579 | }) 580 | 581 | -- test that parse path with fragment 582 | s = '/foo/bar/baz%20qux#fragment-value' 583 | u, cur, err = parse(s) 584 | assert.equal(cur, #s) 585 | assert.is_nil(err) 586 | assert.equal(u, { 587 | path = '/foo/bar/baz%20qux', 588 | fragment = 'fragment-value', 589 | }) 590 | 591 | -- test that return an error if contains a invalid character 592 | s = '/foo/bar|baz%20qux' 593 | u, cur, err = parse(s, true) 594 | assert.equal(string.sub(s, 1, cur), '/foo/bar') 595 | assert.equal(err, '|') 596 | assert.equal(u, { 597 | path = '/foo/bar', 598 | }) 599 | 600 | -- test that return an error if contains a invalid percent-encoded string 601 | s = '/foo/bar/baz%2qux' 602 | u, cur, err = parse(s, true) 603 | assert.equal(string.sub(s, 1, cur), '/foo/bar/baz') 604 | assert.equal(err, '%') 605 | assert.equal(u, {}) 606 | end 607 | 608 | function testcase.parse_query_string() 609 | -- test that parse query 610 | local s = '?q1=v1-1%20&q1=v1-1&q2=v2' 611 | local u, cur, err = parse(s) 612 | assert.equal(cur, #s) 613 | assert.is_nil(err) 614 | assert.equal(u, { 615 | query = s, 616 | }) 617 | 618 | -- test that parse query string with fragment 619 | s = '?q1=v1-1#&q2=v2' 620 | u, cur, err = parse(s) 621 | assert.equal(cur, #s) 622 | assert.is_nil(err) 623 | assert.equal(u, { 624 | query = '?q1=v1-1', 625 | fragment = '&q2=v2', 626 | }) 627 | 628 | -- test that return an error if contains a invalid character 629 | s = '?q1=v1-1|#&q2=v2' 630 | u, cur, err = parse(s) 631 | assert.equal(string.sub(s, 1, cur), '?q1=v1-1') 632 | assert.equal(err, '|') 633 | assert.equal(u, { 634 | query = '?q1=v1-1', 635 | }) 636 | 637 | -- test that return an error if contains a invalid percent-encoded string 638 | s = '?q1=v1-1&%2q2=v2' 639 | u, cur, err = parse(s) 640 | assert.equal(string.sub(s, 1, cur), '?q1=v1-1&') 641 | assert.equal(err, '%') 642 | assert.equal(u, { 643 | query = '?q1=v1-1&', 644 | }) 645 | end 646 | 647 | function testcase.parse_query_params() 648 | -- test that parse query 649 | local s = '?&key&q1=v1-1&&q1=v1-1%20&=&q2=v2&q+%233=v3+val%23%5a%7A' 650 | local u, cur, err = parse(s, true) 651 | assert.equal(cur, #s) 652 | assert.is_nil(err) 653 | assert.equal(u, { 654 | query = s, 655 | query_params = { 656 | [''] = { 657 | '', 658 | }, 659 | key = { 660 | '', 661 | }, 662 | q1 = { 663 | 'v1-1', 664 | 'v1-1 ', 665 | }, 666 | q2 = { 667 | 'v2', 668 | }, 669 | ['q #3'] = { 670 | 'v3 val#Zz', 671 | }, 672 | }, 673 | }) 674 | 675 | -- test that parse query 676 | s = '?' 677 | u, cur, err = parse(s, true) 678 | assert.equal(cur, #s) 679 | assert.is_nil(err) 680 | assert.equal(u, {}) 681 | 682 | -- test that return an error if contains a invalid character 683 | s = '?q1=v1-1&q2=v2|' 684 | u, cur, err = parse(s, true) 685 | assert.equal(string.sub(s, 1, cur), '?q1=v1-1&q2=v2') 686 | assert.equal(err, '|') 687 | assert.equal(u, { 688 | query = '?q1=v1-1&q2=v2', 689 | query_params = { 690 | q1 = { 691 | 'v1-1', 692 | }, 693 | q2 = { 694 | 'v2', 695 | }, 696 | }, 697 | }) 698 | 699 | -- test that return an error if contains a invalid percent-encoded string 700 | s = '?q1=v1-1&q2=%2v2' 701 | u, cur, err = parse(s, true) 702 | assert.equal(string.sub(s, 1, cur), '?q1=v1-1&q2=') 703 | assert.equal(err, '%') 704 | assert.equal(u, { 705 | query = '?q1=v1-1&q2=', 706 | query_params = { 707 | q1 = { 708 | 'v1-1', 709 | }, 710 | q2 = { 711 | '', 712 | }, 713 | }, 714 | }) 715 | end 716 | 717 | function testcase.parse_as_query() 718 | -- test that parse query 719 | local s = 'q1=v1-1&q1=v1-1%20&q2=v2' 720 | local u, cur, err = parse(s, true, nil, true, nil) 721 | assert.equal(cur, #s) 722 | assert.is_nil(err) 723 | assert.equal(u, { 724 | query = s, 725 | query_params = { 726 | q1 = { 727 | 'v1-1', 728 | 'v1-1 ', 729 | }, 730 | q2 = { 731 | 'v2', 732 | }, 733 | }, 734 | }) 735 | 736 | -- test that return an error if contains a invalid character 737 | s = 'q1=v1-1&q2=v2|' 738 | u, cur, err = parse(s, true, nil, true) 739 | assert.equal(string.sub(s, 1, cur), 'q1=v1-1&q2=v2') 740 | assert.equal(err, '|') 741 | assert.equal(u, { 742 | query = 'q1=v1-1&q2=v2', 743 | query_params = { 744 | q1 = { 745 | 'v1-1', 746 | }, 747 | q2 = { 748 | 'v2', 749 | }, 750 | }, 751 | }) 752 | 753 | -- test that return an error if contains a invalid percent-encoded string 754 | s = 'q1=v1-1&q2=v2-1&q2=%2v2-2' 755 | u, cur, err = parse(s, true, nil, true) 756 | assert.equal(string.sub(s, 1, cur), 'q1=v1-1&q2=v2-1&q2=') 757 | assert.equal(cur, 19) 758 | assert.equal(err, '%') 759 | assert.equal(u, { 760 | query = 'q1=v1-1&q2=v2-1&q2=', 761 | query_params = { 762 | q1 = { 763 | 'v1-1', 764 | }, 765 | q2 = { 766 | 'v2-1', 767 | '', 768 | }, 769 | }, 770 | }) 771 | end 772 | 773 | function testcase.parse_fragment() 774 | -- test that parse fragment 775 | local segments = { 776 | '#foo?bar#baz%20/qux', 777 | } 778 | local s = concat(segments) 779 | local u, cur, err = parse(s) 780 | assert.equal(cur, #s) 781 | assert.is_nil(err) 782 | assert.equal(u, { 783 | fragment = 'foo?bar#baz%20/qux', 784 | }) 785 | 786 | -- test that parse empty fragment 787 | s = '#' 788 | u, cur, err = parse(s) 789 | assert.equal(cur, #s) 790 | assert.is_nil(err) 791 | assert.equal(u, { 792 | fragment = '', 793 | }) 794 | 795 | -- test that return an error if contains a invalid percent-encoded string 796 | s = '#foo%1' 797 | u, cur, err = parse(s) 798 | assert.equal(string.sub(s, 1, cur), '#foo') 799 | assert.equal(err, '%') 800 | assert.equal(u, {}) 801 | 802 | -- test that return an error if contains a invalid character 803 | s = '#fo|o' 804 | u, cur, err = parse(s) 805 | assert.equal(string.sub(s, 1, cur), '#fo') 806 | assert.equal(err, '|') 807 | assert.equal(u, { 808 | fragment = 'fo', 809 | }) 810 | end 811 | 812 | -------------------------------------------------------------------------------- /src/parse.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017 Masatoshi Teruya 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * src/parse.c 23 | * lua-url 24 | * Created by Masatoshi Teruya on 17/10/19. 25 | * 26 | */ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | /** 37 | * RFC 3986 38 | * 39 | * pct-encoded = "%" HEXDIG HEXDIG 40 | * pchar = "!" / "$" / "%" / "&" / "'" / "(" / ")" / "*" / "+" / "," 41 | * / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~" 42 | * / ALPHA / DIGIT / 43 | * 44 | * userinfo = *( pchar except "@" ) 45 | * hostname = *( pchar except ":" / "@" ) 46 | * query = *( pchar / "/" / "?" ) 47 | * fragment = *( pchar / "/" / "?" ) 48 | * 49 | * 50 | * WHATWG-URL 51 | * 52 | * URL-units = pct-encoded / URL-code-points 53 | * URL-code-points = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," 54 | * / "-" / "." / "/" / ":" / ";" / "=" / "?" / "@" / "_" 55 | * / "~" / ALPHA / DIGIT 56 | * 57 | * followings are jump-symbols; 58 | * path-delimiter = "/" 59 | * port = ":" 60 | * userinfo = "@" 61 | * query = "?" 62 | * fragment = "#" 63 | */ 64 | static const unsigned char URIC[256] = { 65 | // ctrl-code: 0-32 66 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67 | 0, 0, 0, 0, 0, 0, 68 | // SP " 69 | 0, 0, '!', 0, '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', 70 | // use query and fragment 71 | '/', 72 | // DIGIT 73 | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 74 | // use hostname 75 | ':', 76 | // < > 77 | ';', 0, '=', 0, 78 | // use query and fragment 79 | '?', '@', 80 | // ALPHA 81 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 82 | 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 83 | // [ \ ] ^ ` 84 | 'Z', 0, 0, 0, 0, '_', 0, 85 | // ALPHA 86 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 87 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 88 | // { | } 89 | 'z', 0, 0, 0, '~'}; 90 | 91 | /** 92 | * pct-encoded = "%" HEXDIG HEXDIG 93 | * HEXDIG = "A" / "B" / "C" / "D" / "E" / "F" 94 | * / "a" / "b" / "c" / "d" / "e" / "f" 95 | * / DIGIT 96 | */ 97 | static inline int is_percentencoded(const unsigned char *str) 98 | { 99 | return isxdigit(str[0]) && isxdigit(str[1]); 100 | } 101 | 102 | static inline int unhex(unsigned char c) 103 | { 104 | if ('0' <= c && c <= '9') { 105 | return c - '0'; 106 | } else if ('a' <= c && c <= 'f') { 107 | return c - 'a' + 10; 108 | } else if ('A' <= c && c <= 'F') { 109 | return c - 'A' + 10; 110 | } 111 | return 0; 112 | } 113 | 114 | static inline void unescape(lua_State *L, const char *str, size_t len) 115 | { 116 | luaL_Buffer b = {0}; 117 | 118 | luaL_buffinit(L, &b); 119 | for (size_t i = 0; i < len; i++) { 120 | switch (str[i]) { 121 | case '+': 122 | luaL_addchar(&b, ' '); 123 | continue; 124 | 125 | case '%': 126 | luaL_addchar(&b, (unhex(str[i + 1]) << 4) | unhex(str[i + 2])); 127 | i += 2; 128 | continue; 129 | 130 | default: 131 | luaL_addchar(&b, str[i]); 132 | } 133 | } 134 | luaL_pushresult(&b); 135 | } 136 | 137 | /** 138 | * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 139 | * dec-octet = DIGIT ; 0-9 140 | * / %x31-39 DIGIT ; 10-99 141 | * / "1" 2DIGIT ; 100-199 142 | * / "2" %x30-34 DIGIT ; 200-249 143 | * / "25" %x30-35 ; 250-255 144 | */ 145 | static int parse_ipv4(unsigned char *url, size_t urllen, size_t *cur) 146 | { 147 | size_t pos = *cur; 148 | size_t head = pos; 149 | int nseg = 0; 150 | int dec = -1; 151 | 152 | for (; pos < urllen; pos++) { 153 | switch (url[pos]) { 154 | case '0' ... '9': 155 | if (pos - head < 4) { 156 | // convert to integer 157 | if (dec == -1) { 158 | dec = url[pos] - '0'; 159 | } else { 160 | dec = (dec << 3) + (dec << 1) + (url[pos] - '0'); 161 | } 162 | 163 | if (dec <= 0xFF) { 164 | continue; 165 | } 166 | } 167 | break; 168 | 169 | case '.': 170 | if (pos - head && nseg < 3) { 171 | dec = -1; 172 | head = pos + 1; 173 | nseg++; 174 | continue; 175 | } 176 | break; 177 | 178 | default: 179 | // done 180 | if (nseg == 3 && dec != -1) { 181 | *cur = pos; 182 | return url[pos]; 183 | } 184 | } 185 | break; 186 | } 187 | 188 | // illegal byte sequence 189 | *cur = pos; 190 | return url[head]; 191 | } 192 | 193 | /** 194 | * IPv6address = 6( h16 ":" ) ls32 195 | * / "::" 5( h16 ":" ) ls32 196 | * / [ h16 ] "::" 4( h16 ":" ) ls32 197 | * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 198 | * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 199 | * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 200 | * / [ *4( h16 ":" ) h16 ] "::" ls32 201 | * / [ *5( h16 ":" ) h16 ] "::" h16 202 | * / [ *6( h16 ":" ) h16 ] "::" 203 | * 204 | * ls32 = ( h16 ":" h16 ) / IPv4address 205 | * h16 = 1*4HEXDIG 206 | */ 207 | static int parse_ipv6(unsigned char *url, size_t urllen, size_t *cur) 208 | { 209 | size_t pos = *cur; 210 | size_t head = 0; 211 | int zerogrp = 0; 212 | int nbit = 0; 213 | 214 | if (url[pos] == ':') { 215 | zerogrp = url[pos + 1] == ':'; 216 | // not zero group 217 | if (!zerogrp) { 218 | return url[pos + 1]; 219 | } 220 | pos += 2; 221 | nbit += 16; 222 | } 223 | 224 | for (; pos < urllen; pos++) { 225 | switch (url[pos]) { 226 | // found finish 227 | case ']': 228 | *cur = pos; 229 | return ']'; 230 | 231 | // zero-group 232 | case ':': 233 | // illegal byte sequence 234 | // zero group already defined 235 | if (zerogrp) { 236 | *cur = pos; 237 | return url[pos]; 238 | } 239 | nbit += 16; 240 | zerogrp = 1; 241 | continue; 242 | 243 | // h16 244 | case '0' ... '9': 245 | case 'A' ... 'F': 246 | case 'a' ... 'f': 247 | if (nbit < 128) { 248 | nbit += 16; 249 | head = pos; 250 | if (isxdigit(url[++pos]) && isxdigit(url[++pos]) && 251 | isxdigit(url[++pos])) { 252 | pos++; 253 | } 254 | switch (url[pos]) { 255 | case ':': 256 | if (url[pos + 1] != ']') { 257 | continue; 258 | } 259 | break; 260 | 261 | // found finish 262 | case ']': 263 | if (nbit == 128) { 264 | *cur = pos; 265 | return ']'; 266 | } 267 | break; 268 | 269 | // embed ipv4 270 | case '.': 271 | // if nbit is less than or equal to 112, can be 272 | // embedded ipv4 address (32 bit). 273 | // 274 | // max nbit = 128 bit(IPv6) - 32 bit(IPv4) 275 | // = 96 276 | if (nbit <= 112) { 277 | *cur = head; 278 | return parse_ipv4(url, urllen, cur); 279 | } 280 | break; 281 | } 282 | } 283 | } 284 | 285 | break; 286 | } 287 | 288 | // illegal byte sequence 289 | *cur = pos; 290 | return url[pos]; 291 | } 292 | 293 | typedef struct { 294 | lua_State *L; 295 | const char *s; 296 | size_t head; 297 | const char *key; 298 | size_t klen; 299 | int is_key_encoded; 300 | } query_param_t; 301 | 302 | static inline void query_param_init(query_param_t *p, lua_State *L, 303 | const char *s, size_t head) 304 | { 305 | *p = (query_param_t){L, s, head, NULL, 0, 0}; 306 | } 307 | 308 | static inline void query_param_set_keytail(query_param_t *p, size_t pos, 309 | int is_encoded) 310 | { 311 | p->key = p->s + p->head; 312 | p->klen = pos - p->head; 313 | p->head = pos + 1; 314 | p->is_key_encoded = is_encoded; 315 | } 316 | 317 | static int query_param_set_tail(query_param_t *p, size_t pos, int is_encoded) 318 | { 319 | lua_State *L = p->L; 320 | const char *key = p->key; 321 | size_t klen = p->klen; 322 | const char *val = ""; 323 | size_t vlen = 0; 324 | size_t len = pos - p->head; 325 | int is_key_encoded = p->is_key_encoded; 326 | int is_val_encoded = 0; 327 | 328 | if (key) { 329 | if (len) { 330 | // key=val 331 | val = p->s + p->head; 332 | vlen = len; 333 | is_val_encoded = is_encoded; 334 | } 335 | } else if (len) { 336 | // key="" 337 | key = p->s + p->head; 338 | klen = len; 339 | is_key_encoded = is_encoded; 340 | } else { 341 | return 0; 342 | } 343 | 344 | // get value table 345 | if (is_key_encoded) { 346 | unescape(L, key, klen); 347 | } else { 348 | lua_pushlstring(L, key, klen); 349 | } 350 | lua_pushvalue(L, -1); 351 | lua_rawget(L, -3); 352 | if (lua_istable(L, -1)) { 353 | // value table exists 354 | lua_replace(L, -2); 355 | } else { 356 | // create value table 357 | int ref = LUA_NOREF; 358 | lua_pop(L, 1); 359 | lua_createtable(L, 1, 0); 360 | ref = lauxh_ref(L); 361 | lauxh_pushref(L, ref); 362 | lua_rawset(L, -3); 363 | lauxh_pushref(L, ref); 364 | lauxh_unref(L, ref); 365 | } 366 | 367 | // push value to value table 368 | if (is_val_encoded) { 369 | unescape(L, val, vlen); 370 | } else { 371 | lua_pushlstring(L, val, vlen); 372 | } 373 | lua_rawseti(L, -2, lauxh_rawlen(L, -2) + 1); 374 | lua_pop(L, 1); 375 | 376 | p->head = pos + 1; 377 | p->key = NULL; 378 | p->klen = 0; 379 | p->is_key_encoded = 0; 380 | 381 | return 1; 382 | } 383 | 384 | static inline int parse_query(lua_State *L, unsigned char *url, size_t urllen, 385 | size_t *cur, int parse_params) 386 | { 387 | size_t head = *cur; 388 | size_t pos = 0; 389 | query_param_t p = {0}; 390 | int nparam = 0; 391 | int is_encoded = 0; 392 | 393 | // skip query delimiter 394 | if (url[head] == '?') { 395 | head++; 396 | } 397 | // skip param separator 398 | while (url[head] == '&') { 399 | head++; 400 | } 401 | pos = head; 402 | 403 | if (parse_params) { 404 | query_param_init(&p, L, (const char *)url, pos); 405 | lua_pushstring(L, "query_params"); 406 | lua_newtable(L); 407 | } 408 | 409 | for (; pos < urllen; pos++) { 410 | switch (URIC[url[pos]]) { 411 | // illegal byte sequence 412 | case 0: 413 | // fallthrough 414 | 415 | // fragment 416 | case '#': 417 | goto PARSE_DONE; 418 | 419 | // percent-encoded 420 | case '%': 421 | // invalid percent-encoded format 422 | if (!is_percentencoded(url + pos + 1)) { 423 | goto PARSE_DONE; 424 | } 425 | // skip "%" 426 | pos += 2; 427 | // fallthrough 428 | 429 | case '+': 430 | is_encoded = 1; 431 | break; 432 | 433 | // key-value separator 434 | case '=': 435 | if (parse_params) { 436 | query_param_set_keytail(&p, pos, is_encoded); 437 | is_encoded = 0; 438 | } 439 | break; 440 | 441 | // next key-value pair 442 | case '&': 443 | if (parse_params) { 444 | nparam += query_param_set_tail(&p, pos, is_encoded); 445 | // skip param separator 446 | while (url[pos + 1] == '&') { 447 | pos++; 448 | } 449 | if (url[pos] == '&') { 450 | p.head = pos + 1; 451 | } 452 | } 453 | 454 | break; 455 | } 456 | } 457 | 458 | PARSE_DONE: 459 | // add query_params and query field 460 | if (parse_params) { 461 | nparam += query_param_set_tail(&p, pos, is_encoded); 462 | if (nparam) { 463 | lua_rawset(L, -3); 464 | } else { 465 | lua_pop(L, 2); 466 | } 467 | } 468 | if (pos > head) { 469 | lauxh_pushlstr2tbl(L, "query", (const char *)url + *cur, pos - *cur); 470 | } 471 | *cur = pos; 472 | return url[pos]; 473 | } 474 | 475 | /** 476 | * fragment = *( pchar / "/" / "?" ) 477 | * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 478 | * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 479 | * pct-encoded = "%" HEXDIG HEXDIG 480 | * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 481 | * / "*" / "+" / "," / ";" / "=" 482 | */ 483 | static int parse_fragment(lua_State *L, unsigned char *url, size_t urllen, 484 | size_t *cur) 485 | { 486 | size_t pos = *cur; 487 | size_t head = pos; 488 | 489 | // parse fragment 490 | for (; pos < urllen; pos++) { 491 | switch (URIC[url[pos]]) { 492 | // illegal byte sequence 493 | case 0: 494 | lauxh_pushlstr2tbl(L, "fragment", (const char *)url + head, 495 | pos - head); 496 | *cur = pos; 497 | return url[pos]; 498 | 499 | // percent-encoded 500 | case '%': 501 | // invalid percent-encoded format 502 | if (!is_percentencoded(url + pos + 1)) { 503 | *cur = pos; 504 | return '%'; 505 | } 506 | // skip "%" 507 | pos += 2; 508 | } 509 | } 510 | 511 | lauxh_pushlstr2tbl(L, "fragment", (const char *)url + head, pos - head); 512 | *cur = pos; 513 | 514 | return 0; 515 | } 516 | 517 | /* 518 | URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] 519 | 520 | host = IP-literal / IPv4address / reg-name 521 | 522 | IP-literal = "[" ( IPv6address / IPvFuture ) "]" 523 | 524 | IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) 525 | 526 | IPv6address = 6( h16 ":" ) ls32 527 | / "::" 5( h16 ":" ) ls32 528 | / [ h16 ] "::" 4( h16 ":" ) ls32 529 | / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 530 | / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 531 | / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 532 | / [ *4( h16 ":" ) h16 ] "::" ls32 533 | / [ *5( h16 ":" ) h16 ] "::" h16 534 | / [ *6( h16 ":" ) h16 ] "::" 535 | 536 | h16 = 1*4HEXDIG 537 | ls32 = ( h16 ":" h16 ) / IPv4address 538 | 539 | IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 540 | dec-octet = DIGIT ; 0-9 541 | / %x31-39 DIGIT ; 10-99 542 | / "1" 2DIGIT ; 100-199 543 | / "2" %x30-34 DIGIT ; 200-249 544 | / "25" %x30-35 ; 250-255 545 | 546 | */ 547 | static int parse_lua(lua_State *L) 548 | { 549 | int argc = lua_gettop(L); 550 | size_t urllen = 0; 551 | const char *src = luaL_checklstring(L, 1, &urllen); 552 | unsigned char *url = (unsigned char *)src; 553 | unsigned char c = 0; 554 | size_t head = 0; 555 | size_t tail = 0; 556 | size_t phead = 0; 557 | size_t cur = 0; 558 | size_t userinfo = 0; 559 | size_t portnum = 0; 560 | int chk_scheme = 1; 561 | int omit_hostname = 0; 562 | int parse_params = 0; 563 | int is_querystring = 0; 564 | 565 | // check arguments 566 | if (argc > 4) { 567 | argc = 4; 568 | lua_settop(L, 4); 569 | } 570 | switch (argc) { 571 | case 4: 572 | // url is query-string 573 | is_querystring = lauxh_optboolean(L, 4, 0); 574 | case 3: 575 | // initial cursor option 576 | cur = lauxh_optuint64(L, 3, cur); 577 | case 2: 578 | // parse query-params option 579 | parse_params = lauxh_optboolean(L, 2, 0); 580 | } 581 | 582 | lua_settop(L, 1); 583 | lua_newtable(L); 584 | if (!urllen) { 585 | lua_pushinteger(L, 0); 586 | return 2; 587 | } else if (is_querystring) { 588 | goto PARSE_QUERY; 589 | } 590 | 591 | // check first byte 592 | switch (url[cur]) { 593 | // illegal byte sequence 594 | case 0: 595 | lua_pushinteger(L, cur); 596 | return 2; 597 | 598 | // query-string 599 | case '?': 600 | goto PARSE_QUERY; 601 | 602 | // fragment 603 | case '#': 604 | cur++; 605 | goto PARSE_FRAGMENT; 606 | } 607 | 608 | PARSE_PATHNAME: 609 | // pathname 610 | head = cur; 611 | for (; cur < urllen; cur++) { 612 | switch (URIC[url[cur]]) { 613 | // illegal byte sequence 614 | case 0: 615 | lauxh_pushlstr2tbl(L, "path", src + head, cur - head); 616 | lua_pushinteger(L, cur); 617 | lua_pushlstring(L, src + cur, 1); 618 | return 3; 619 | 620 | // query-string 621 | case '?': 622 | lauxh_pushlstr2tbl(L, "path", src + head, cur - head); 623 | goto PARSE_QUERY; 624 | 625 | // fragment 626 | case '#': 627 | lauxh_pushlstr2tbl(L, "path", src + head, cur - head); 628 | cur++; 629 | goto PARSE_FRAGMENT; 630 | 631 | // percent-encoded 632 | case '%': 633 | // invalid percent-encoded format 634 | if (!is_percentencoded(url + cur + 1)) { 635 | lua_pushinteger(L, cur); 636 | lua_pushlstring(L, src + cur, 1); 637 | return 3; 638 | } 639 | // skip "%" 640 | cur += 2; 641 | // fallthrough to disable chk_scheme 642 | 643 | // set chk_scheme to 0 if not scheme characters 644 | case '!': 645 | case '$': 646 | // 0x24-2A = & ' ( ) * 647 | case '&' ... '*': 648 | case ',': 649 | case '.': 650 | case '/': 651 | case ';': 652 | case '=': 653 | case '@': 654 | case '_': 655 | case '~': 656 | chk_scheme = 0; 657 | break; 658 | 659 | case ':': 660 | // use as scheme separator 661 | if (chk_scheme) { 662 | chk_scheme = 0; 663 | goto PARSE_SCHEME; 664 | } 665 | break; 666 | } 667 | } 668 | 669 | // set path 670 | lauxh_pushlstr2tbl(L, "path", src + head, cur - head); 671 | lua_pushinteger(L, cur); 672 | return 2; 673 | 674 | PARSE_QUERY: 675 | switch (parse_query(L, url, urllen, &cur, parse_params)) { 676 | // done 677 | case 0: 678 | lua_pushinteger(L, cur); 679 | return 2; 680 | 681 | // fragment 682 | case '#': 683 | cur++; 684 | goto PARSE_FRAGMENT; 685 | 686 | // illegal byte sequence 687 | default: 688 | lua_pushinteger(L, cur); 689 | lua_pushlstring(L, src + cur, 1); 690 | return 3; 691 | } 692 | 693 | PARSE_FRAGMENT: 694 | // parse fragment 695 | head = cur; 696 | switch (parse_fragment(L, url, urllen, &cur)) { 697 | // done 698 | case 0: 699 | lua_pushinteger(L, cur); 700 | return 2; 701 | 702 | // illegal byte sequence 703 | default: 704 | lua_pushinteger(L, cur); 705 | lua_pushlstring(L, src + cur, 1); 706 | return 3; 707 | } 708 | 709 | PARSE_SCHEME: 710 | // must be double-slash 711 | if ((cur + 2) >= urllen || url[cur + 1] != '/' || url[cur + 2] != '/') { 712 | lua_pushinteger(L, cur); 713 | lua_pushlstring(L, src + cur, 1); 714 | return 3; 715 | } 716 | // set "scheme" to scheme field 717 | lauxh_pushlstr2tbl(L, "scheme", src + head, cur - head); 718 | // skip "://" 719 | cur += 3; 720 | 721 | PARSE_HOST: 722 | // parse host 723 | head = cur; 724 | // check first byte 725 | switch (url[cur]) { 726 | // parse ipv6 727 | case '[': 728 | goto PARSE_IPV6; 729 | 730 | case '/': 731 | case '.': 732 | // host required if userinfo is defined 733 | if (!userinfo) { 734 | // some scheme (e.g. file) can be omit parsing the authority 735 | goto PARSE_PATHNAME; 736 | } 737 | // illegal byte sequence 738 | lua_pushinteger(L, cur); 739 | lua_pushlstring(L, src + cur, 1); 740 | return 3; 741 | 742 | case ':': 743 | omit_hostname = 1; 744 | tail = cur; 745 | cur++; 746 | goto PARSE_PORT; 747 | 748 | case '@': 749 | CHECK_USERINFO: 750 | // userinfo already parsed 751 | if (userinfo) { 752 | // illegal byte sequence 753 | lua_pushinteger(L, cur); 754 | lua_pushlstring(L, src + cur, 1); 755 | return 3; 756 | } 757 | // previous string is treated as userinfo 758 | if (cur - head) { 759 | lauxh_pushlstr2tbl(L, "userinfo", src + head, cur - head); 760 | lauxh_pushlstr2tbl(L, "user", src + head, cur - head); 761 | } 762 | userinfo = cur; 763 | cur++; 764 | goto PARSE_HOST; 765 | 766 | default: 767 | // host must be started with ALPHA / DIGIT / '%' (percent-encoded) 768 | if (url[cur] != '%' && !isalnum(url[cur])) { 769 | // illegal byte sequence 770 | lua_pushinteger(L, cur); 771 | lua_pushlstring(L, src + cur, 1); 772 | return 3; 773 | } 774 | } 775 | 776 | #define push_host() \ 777 | do { \ 778 | lauxh_pushlstr2tbl(L, "host", src + head, cur - head); \ 779 | lauxh_pushlstr2tbl(L, "hostname", src + head, cur - head); \ 780 | } while (0) 781 | 782 | for (; cur < urllen; cur++) { 783 | switch (URIC[url[cur]]) { 784 | // illegal byte sequence 785 | case 0: 786 | push_host(); 787 | lua_pushinteger(L, cur); 788 | lua_pushlstring(L, src + cur, 1); 789 | return 3; 790 | 791 | case '.': 792 | continue; 793 | 794 | case '@': 795 | goto CHECK_USERINFO; 796 | 797 | case ':': 798 | tail = cur; 799 | cur++; 800 | goto PARSE_PORT; 801 | 802 | case '/': 803 | push_host(); 804 | goto PARSE_PATHNAME; 805 | 806 | case '?': 807 | push_host(); 808 | goto PARSE_QUERY; 809 | 810 | case '#': 811 | push_host(); 812 | cur++; 813 | goto PARSE_FRAGMENT; 814 | 815 | // percent-encoded 816 | case '%': 817 | // invalid percent-encoded format 818 | if (!is_percentencoded(url + cur + 1)) { 819 | lua_pushinteger(L, cur); 820 | lua_pushlstring(L, src + cur, 1); 821 | return 3; 822 | } 823 | // skip "%" 824 | cur += 2; 825 | } 826 | } 827 | 828 | push_host(); 829 | lua_pushinteger(L, cur); 830 | return 2; 831 | 832 | PARSE_IPV6: 833 | // parse ipv6 834 | head = cur; 835 | cur++; 836 | switch (parse_ipv6(url, urllen, &cur)) { 837 | // found delemiter 838 | case ']': 839 | cur++; 840 | switch (url[cur]) { 841 | case ':': 842 | tail = cur; 843 | cur++; 844 | goto PARSE_PORT; 845 | 846 | case '/': 847 | push_host(); 848 | goto PARSE_PATHNAME; 849 | 850 | case '?': 851 | push_host(); 852 | goto PARSE_QUERY; 853 | 854 | // illegal byte sequence 855 | default: 856 | push_host(); 857 | lua_pushinteger(L, cur); 858 | lua_pushlstring(L, src + cur, 1); 859 | return 3; 860 | } 861 | break; 862 | 863 | // illegal byte sequence 864 | default: 865 | lua_pushinteger(L, cur); 866 | lua_pushlstring(L, src + cur, 1); 867 | return 3; 868 | } 869 | 870 | #undef push_host 871 | 872 | PARSE_PORT: 873 | // parse port 874 | phead = cur; 875 | portnum = 0; 876 | 877 | #define push_hostport() \ 878 | do { \ 879 | if (cur - head > 2) { \ 880 | lauxh_pushlstr2tbl(L, "hostname", src + head, tail - head); \ 881 | if (cur - phead) { \ 882 | lauxh_pushlstr2tbl(L, "host", src + head, cur - head); \ 883 | lauxh_pushlstr2tbl(L, "port", src + phead, cur - phead); \ 884 | } else { \ 885 | lauxh_pushlstr2tbl(L, "host", src + head, tail - head); \ 886 | } \ 887 | } \ 888 | } while (0) 889 | 890 | for (; cur < urllen; cur++) { 891 | c = url[cur]; 892 | switch (c) { 893 | // convert to integer 894 | case '0' ... '9': 895 | portnum = (portnum << 3) + (portnum << 1) + (c - '0'); 896 | // invalid port range 897 | if (portnum > 0xFFFF) { 898 | // illegal byte sequence 899 | lua_pushinteger(L, cur); 900 | lua_pushlstring(L, src + cur, 1); 901 | return 3; 902 | } 903 | continue; 904 | 905 | case '/': 906 | // set "hostname", "host" and "port" fields 907 | push_hostport(); 908 | goto PARSE_PATHNAME; 909 | 910 | case '?': 911 | push_hostport(); 912 | goto PARSE_QUERY; 913 | 914 | case '#': 915 | push_hostport(); 916 | cur++; 917 | goto PARSE_FRAGMENT; 918 | 919 | default: 920 | // userinfo already parsed or hostname ommited 921 | if (userinfo || omit_hostname) { 922 | // illegal byte sequence 923 | lua_pushinteger(L, cur); 924 | lua_pushlstring(L, src + cur, 1); 925 | return 3; 926 | } 927 | // previsous string is treated as username 928 | goto PARSE_PASSWORD; 929 | } 930 | } 931 | 932 | push_hostport(); 933 | lua_pushinteger(L, cur); 934 | return 2; 935 | 936 | #undef push_hostport 937 | 938 | PARSE_PASSWORD: 939 | for (; cur < urllen; cur++) { 940 | switch (URIC[url[cur]]) { 941 | // illegal byte sequence 942 | case 0: 943 | case ':': 944 | case '/': 945 | case '?': 946 | case '#': 947 | lua_pushinteger(L, cur); 948 | lua_pushlstring(L, src + cur, 1); 949 | return 3; 950 | 951 | case '@': 952 | lauxh_pushlstr2tbl(L, "userinfo", src + head, cur - head); 953 | lauxh_pushlstr2tbl(L, "user", src + head, tail - head); 954 | lauxh_pushlstr2tbl(L, "password", src + phead, cur - phead); 955 | userinfo = c; 956 | cur++; 957 | goto PARSE_HOST; 958 | 959 | // percent-encoded 960 | case '%': 961 | // invalid percent-encoded format 962 | if (!is_percentencoded(url + cur + 1)) { 963 | lua_pushinteger(L, cur); 964 | lua_pushlstring(L, src + cur, 1); 965 | return 3; 966 | } 967 | // skip "%" 968 | cur += 2; 969 | // fallthrough 970 | } 971 | } 972 | 973 | // invalid userinfo format 974 | lua_pushinteger(L, cur); 975 | lua_pushlstring(L, src + cur, 1); 976 | return 3; 977 | } 978 | 979 | LUALIB_API int luaopen_url_parse(lua_State *L) 980 | { 981 | lua_pushcfunction(L, parse_lua); 982 | return 1; 983 | } 984 | --------------------------------------------------------------------------------