├── .checksrc ├── .github └── workflows │ ├── codeql.yml │ ├── codespell.yml │ ├── curl-for-win.yml │ ├── makefile.yml │ └── reuse.yml ├── .gitignore ├── .reuse └── dep5 ├── CONTRIBUTING.md ├── COPYING ├── LICENSES └── curl.txt ├── Makefile ├── README.md ├── RELEASE-NOTES ├── RELEASE-PROCEDURE.md ├── THANKS ├── URL-QUIRKS.md ├── completions └── _trurl.zsh.in ├── scripts ├── cd2nroff ├── checksrc.pl ├── generate_completions.sh └── mkrelease ├── test.py ├── testfiles ├── test0000.txt ├── test0001.txt └── test0002.txt ├── tests.json ├── trurl.c ├── trurl.md ├── version.h └── winbuild ├── .vcpkg ├── README.md ├── trurl.sln ├── trurl.vcxproj ├── vcpkg-configuration.json └── vcpkg.json /.checksrc: -------------------------------------------------------------------------------- 1 | disable FOPENMODE 2 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: ["master"] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: ["master"] 20 | 21 | jobs: 22 | analyze: 23 | name: Analyze 24 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 25 | permissions: 26 | actions: read 27 | contents: read 28 | security-events: write 29 | 30 | strategy: 31 | fail-fast: false 32 | matrix: 33 | language: ['cpp', 'python'] 34 | 35 | steps: 36 | - name: Checkout repository 37 | uses: actions/checkout@v4 38 | 39 | # Initializes the CodeQL tools for scanning. 40 | - name: Initialize CodeQL 41 | uses: github/codeql-action/init@v3 42 | with: 43 | languages: ${{ matrix.language }} 44 | 45 | - name: install libcurl 46 | run: | 47 | sudo apt-get update 48 | sudo apt-get install libcurl4-openssl-dev 49 | 50 | - name: build 51 | run: make 52 | 53 | - name: Perform CodeQL Analysis 54 | uses: github/codeql-action/analyze@v3 55 | with: 56 | category: "/language:${{matrix.language}}" 57 | -------------------------------------------------------------------------------- /.github/workflows/codespell.yml: -------------------------------------------------------------------------------- 1 | name: "Codespell" 2 | on: [push, pull_request] 3 | jobs: 4 | codespell: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: Checkout repository 8 | uses: actions/checkout@v4 9 | 10 | - name: install codespell 11 | run: | 12 | sudo apt-get update 13 | sudo apt-get install codespell 14 | 15 | - name: Perform spelling checks 16 | run: codespell README.md RELEASE-NOTES CONTRIBUTING.md trurl.1 trurl.c 17 | -------------------------------------------------------------------------------- /.github/workflows/curl-for-win.yml: -------------------------------------------------------------------------------- 1 | # Copyright (C) Viktor Szakats. See LICENSE.md 2 | # SPDX-License-Identifier: curl 3 | --- 4 | name: curl-for-win 5 | 6 | on: 7 | push: 8 | branches: ["master"] 9 | pull_request: 10 | branches: ["master"] 11 | 12 | permissions: {} 13 | 14 | env: 15 | CW_GET: 'curl' 16 | CW_MAP: '0' 17 | CW_JOBS: '3' 18 | CW_PKG_NODELETE: '1' 19 | CW_PKG_FLATTEN: '1' 20 | DOCKER_CONTENT_TRUST: '1' 21 | 22 | jobs: 23 | win-llvm: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v4 27 | with: 28 | path: 'trurl' 29 | fetch-depth: 8 30 | - name: 'build' 31 | env: 32 | CW_LLVM_MINGW_DL: '1' 33 | CW_LLVM_MINGW_ONLY: '0' 34 | CW_TURL_TEST: '1' 35 | run: | 36 | git clone --depth 1 https://github.com/curl/curl-for-win 37 | mv curl-for-win/* . 38 | export CW_CONFIG='-dev-zero-imap-osnotls-osnoidn-nohttp-nocurltool-win' 39 | export CW_REVISION='${{ github.sha }}' 40 | . ./_versions.sh 41 | docker trust inspect --pretty "${DOCKER_IMAGE}" 42 | time docker pull "${DOCKER_IMAGE}" 43 | docker images --digests 44 | time docker run --volume "$(pwd):$(pwd)" --workdir "$(pwd)" \ 45 | --env-file <(env | grep -a -E \ 46 | '^(CW_|GITHUB_)') \ 47 | "${DOCKER_IMAGE}" \ 48 | sh -c ./_ci-linux-debian.sh 49 | 50 | - name: 'list dependencies' 51 | run: cat urls.txt 52 | - uses: actions/upload-artifact@v4 53 | with: 54 | name: 'trurl-windows' 55 | retention-days: 5 56 | path: curl-*-*-*/trurl* 57 | -------------------------------------------------------------------------------- /.github/workflows/makefile.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | branches: ["master"] 6 | pull_request: 7 | branches: ["master"] 8 | 9 | jobs: 10 | ubuntu: 11 | runs-on: ubuntu-latest 12 | 13 | strategy: 14 | matrix: 15 | build: 16 | - name: default 17 | install_packages: valgrind 18 | test: test-memory 19 | - name: clang sanitizers 20 | install_packages: clang 21 | test: test 22 | make_opts: > 23 | CC=clang 24 | CFLAGS="-fsanitize=address,undefined,signed-integer-overflow -Wformat -Werror=format-security -Werror=array-bounds -g" 25 | LDFLAGS="-fsanitize=address,undefined,signed-integer-overflow -g" 26 | 27 | steps: 28 | - uses: actions/checkout@v4 29 | 30 | - name: install libcurl 31 | run: | 32 | sudo apt-get update 33 | sudo apt-get install libcurl4-openssl-dev ${{ matrix.build.install_packages }} 34 | 35 | - name: code style check 36 | run: make checksrc 37 | 38 | - name: make 39 | run: make ${{ matrix.build.make_opts }} 40 | 41 | - name: sanity test 42 | run: ./trurl -v 43 | 44 | - name: test 45 | run: make ${{matrix.build.test}} 46 | 47 | cygwin: 48 | runs-on: windows-latest 49 | 50 | steps: 51 | - uses: actions/checkout@v4 52 | 53 | - name: install cygwin 54 | uses: cygwin/cygwin-install-action@master 55 | with: 56 | packages: curl, libcurl-devel, libcurl4, make, gcc-core, python39 57 | 58 | - name: make 59 | run: make 60 | 61 | - name: sanity test 62 | run: ./trurl -v 63 | 64 | - name: test 65 | run: make test 66 | 67 | macos: 68 | runs-on: macos-latest 69 | 70 | steps: 71 | - uses: actions/checkout@v4 72 | 73 | - name: make 74 | run: make 75 | 76 | - name: sanity test 77 | run: ./trurl -v 78 | 79 | - name: test 80 | run: make test 81 | -------------------------------------------------------------------------------- /.github/workflows/reuse.yml: -------------------------------------------------------------------------------- 1 | # Copyright (C) Daniel Stenberg, , et al. 2 | # SPDX-FileCopyrightText: 2022 Free Software Foundation Europe e.V. 3 | # 4 | # SPDX-License-Identifier: curl 5 | 6 | name: REUSE compliance 7 | 8 | on: 9 | push: 10 | branches: 11 | - master 12 | - '*/ci' 13 | pull_request: 14 | branches: 15 | - master 16 | 17 | concurrency: 18 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} 19 | cancel-in-progress: true 20 | 21 | permissions: {} 22 | 23 | jobs: 24 | check: 25 | runs-on: ubuntu-latest 26 | steps: 27 | - uses: actions/checkout@v4 28 | - name: REUSE Compliance Check 29 | uses: fsfe/reuse-action@v1 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # compiled program 2 | /trurl 3 | /trurl.1 4 | _trurl.zsh 5 | 6 | # Prerequisites 7 | *.d 8 | 9 | # Object files 10 | *.o 11 | *.ko 12 | *.obj 13 | *.elf 14 | 15 | # Linker output 16 | *.ilk 17 | *.map 18 | *.exp 19 | 20 | # Precompiled Headers 21 | *.gch 22 | *.pch 23 | 24 | # Libraries 25 | *.lib 26 | *.a 27 | *.la 28 | *.lo 29 | 30 | # Shared objects (inc. Windows DLLs) 31 | *.dll 32 | *.so 33 | *.so.* 34 | *.dylib 35 | 36 | # Executables 37 | *.exe 38 | *.out 39 | *.app 40 | *.i*86 41 | *.x86_64 42 | *.hex 43 | 44 | # Debug files 45 | *.dSYM/ 46 | *.su 47 | *.idb 48 | *.pdb 49 | 50 | # Kernel Module Compile Results 51 | *.mod* 52 | *.cmd 53 | .tmp_versions/ 54 | modules.order 55 | Module.symvers 56 | Mkfile.old 57 | dkms.conf 58 | 59 | # Output files from msvc 60 | winbuild/bin/ 61 | winbuild/obj/ 62 | 63 | # Dependencies for msvc from vcpkg 64 | winbuild/vcpkg_installed/ 65 | 66 | # vim 67 | *.sw* 68 | -------------------------------------------------------------------------------- /.reuse/dep5: -------------------------------------------------------------------------------- 1 | Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Upstream-Name: trurl 3 | Upstream-Contact: Daniel Stenberg 4 | Source: https://curl.se 5 | 6 | # Tests 7 | Files: tests.json testfiles/* 8 | Copyright: Daniel Stenberg, , et al. 9 | License: curl 10 | 11 | # Docs 12 | Files: CONTRIBUTING.md README.md RELEASE-NOTES THANKS URL-QUIRKS.md RELEASE-PROCEDURE.md 13 | Copyright: Daniel Stenberg, , et al. 14 | License: curl 15 | 16 | # Meta files 17 | Files: .checksrc .github/workflows/codeql.yml .github/workflows/codespell.yml .github/workflows/makefile.yml .gitignore 18 | Copyright: Daniel Stenberg, , et al. 19 | License: curl 20 | 21 | # winbuild files 22 | Files: winbuild/.vcpkg winbuild/trurl.sln winbuild/trurl.vcxproj winbuild/vcpkg-configuration.json winbuild/vcpkg.json 23 | Copyright: Daniel Stenberg, , et al. 24 | License: curl 25 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to trurl 2 | This document is intended to provide a framework for contributing to trurl. This document will go over requesting new features, fixing existing bugs and effectively 3 | using the internal tooling to help PRs merge quickly. 4 | 5 | ## Opening an issue 6 | trurl uses GitHubs issue tracking to track upcoming work. If you have a feature you want to add or find a bug simply open an issue in the 7 | [issues tab](https://github.com/curl/trurl/issues). Briefly describe the feature you are requesting and why you think it may be valuable for trurl. If you are 8 | reporting a bug be prepared for questions as we will want to reproduce it locally. In general providing the output of `trurl --version` along with the operating 9 | system / Distro you are running is a good starting point. 10 | 11 | ## Writing a good PR 12 | trurl is a relatively straightforward code base, so it is best to keep your PRs straightforward as well. Avoid trying to fix many bugs in one PR, and instead 13 | use many smaller PRs as this avoids potential conflicts when merging. trurl is written in C and uses the [curl code style](https://curl.se/dev/code-style.html). 14 | PRs that do not follow to code style will not be merged in. 15 | 16 | trurl is in its early stages, so it's important to open a PR against a recent version of the source code, as a lot can change over a few days. 17 | Preferably you would open a PR against the most recent commit in master. 18 | 19 | If you are implementing a new feature, it must be submitted with tests and documentation. The process for writing tests is explained below in the tooling section. Documentation exists 20 | in two locations, the man page ([trurl.1](https://github.com/curl/trurl/blob/master/trurl.1)) and the help prompt when running `trurl -h`. Most documentation changes 21 | will go in the man page, but if you add a new command line argument then it must be documented in the help page. 22 | 23 | It is also important to be prepared for feedback on your PR and adjust it promptly. 24 | 25 | 26 | ## Tooling 27 | The trurl repository has a few small helper tools to make development easier. 28 | 29 | **checksrc.pl** is used to ensure the code style is correct. It accepts C files as command line arguments, and returns nothing if the code style is valid. If the 30 | code style is incorrect, checksrc.pl will provide the line the error is on and a brief description of what is wrong. You may run `make checksrc` to scan the entire 31 | repository for style compliance. 32 | 33 | **test.py** is used to run automated tests for trurl. It loads in tests from `test.json` (described below) and reports the number of tests passed. You may specify 34 | the tests to run by passing a list of comma-separated numbers as command line arguments, such as `4,8,15,16,23,42` Note there is no space between the numbers. `test.py` 35 | may also use valgrind to test for memory errors by passing `--with-valgrind` as a command line argument, it should be noted that this may take a while to run all the tests. 36 | `test.py` will also skip tests that require a specific curl runtime or buildtime. 37 | 38 | ### Adding tests 39 | Tests are located in [tests.json](https://github.com/curl/trurl/blob/master/tests.json). This file is an array of json objects when outline an input and what the expected 40 | output should be. Below is a simple example of a single test: 41 | ```json 42 | { 43 | "input": { 44 | "arguments": [ 45 | "https://example.com" 46 | ] 47 | }, 48 | "expected": { 49 | "stdout": "https://example.com/\n", 50 | "stderr": "", 51 | "returncode": 0 52 | } 53 | } 54 | ``` 55 | `"arguments"` is an array of the arguments to run in the test, so if you wanted to pass multiple arguments it would look something like: 56 | ```json 57 | { 58 | "input": { 59 | "arguments": [ 60 | "https://curl.se:22/", 61 | "-s", 62 | "port=443", 63 | "--get", 64 | "{url}" 65 | ] 66 | }, 67 | "expected": { 68 | "stdout": "https://curl.se/\n", 69 | "stderr": "", 70 | "returncode": 0 71 | } 72 | } 73 | ``` 74 | trurl may also return json. It you are adding a test that returns json to stdout, write the json directly instead of a string in the examples above. Below is an example 75 | of what stdout should be if it is a json test, where `"input"` is what trurl accepts from the command line and `"expected"` is what trurl should return. 76 | ```json 77 | "expected": { 78 | "stdout": [ 79 | { 80 | "url": "https://curl.se/", 81 | "scheme": "https", 82 | "host": "curl.se", 83 | "port": "443", 84 | "raw_port": "", 85 | "path": "/", 86 | "query": "", 87 | "params": [] 88 | } 89 | ], 90 | "returncode": 0, 91 | "stderr": "" 92 | } 93 | ``` 94 | 95 | # Tips to make opening a PR easier 96 | - Run `make checksrc` and `make test-memory` locally before opening a PR. These ran automatically when a PR is opened so you might as well make sure they pass before-hand. 97 | - Update the man page and the help prompt accordingly. Documentation is annoying but if everyone writes a little it's not bad. 98 | - Add tests to cover new features or the bug you fixed. 99 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | COPYRIGHT AND PERMISSION NOTICE 2 | 3 | Copyright (c) 2023 - 2024, Daniel Stenberg, , and many 4 | contributors, see the THANKS file. 5 | 6 | All rights reserved. 7 | 8 | Permission to use, copy, modify, and distribute this software for any purpose 9 | with or without fee is hereby granted, provided that the above copyright 10 | notice and this permission notice appear in all copies. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN 15 | NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 16 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 17 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 18 | OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | Except as contained in this notice, the name of a copyright holder shall not 21 | be used in advertising or otherwise to promote the sale, use or other dealings 22 | in this Software without prior written authorization of the copyright holder. 23 | -------------------------------------------------------------------------------- /LICENSES/curl.txt: -------------------------------------------------------------------------------- 1 | COPYRIGHT AND PERMISSION NOTICE 2 | 3 | Copyright (C) Daniel Stenberg, , and many 4 | contributors, see the THANKS file. 5 | 6 | All rights reserved. 7 | 8 | Permission to use, copy, modify, and distribute this software for any purpose 9 | with or without fee is hereby granted, provided that the above copyright 10 | notice and this permission notice appear in all copies. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN 15 | NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 16 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 17 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 18 | OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | Except as contained in this notice, the name of a copyright holder shall not 21 | be used in advertising or otherwise to promote the sale, use or other dealings 22 | in this Software without prior written authorization of the copyright holder. 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | # _ _ ____ _ 3 | # Project ___| | | | _ \| | 4 | # / __| | | | |_) | | 5 | # | (__| |_| | _ <| |___ 6 | # \___|\___/|_| \_\_____| 7 | # 8 | # Copyright (C) Daniel Stenberg, , et al. 9 | # 10 | # This software is licensed as described in the file COPYING, which 11 | # you should have received as part of this distribution. The terms 12 | # are also available at https://curl.se/docs/copyright.html. 13 | # 14 | # You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 | # copies of the Software, and permit persons to whom the Software is 16 | # furnished to do so, under the terms of the COPYING file. 17 | # 18 | # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 | # KIND, either express or implied. 20 | # 21 | # SPDX-License-Identifier: curl 22 | # 23 | ########################################################################## 24 | 25 | TARGET = trurl 26 | OBJS = trurl.o 27 | ifndef TRURL_IGNORE_CURL_CONFIG 28 | LDLIBS += $$(curl-config --libs) 29 | CFLAGS += $$(curl-config --cflags) 30 | endif 31 | CFLAGS += -W -Wall -Wshadow -pedantic 32 | CFLAGS += -Wconversion -Wmissing-prototypes -Wwrite-strings -Wsign-compare -Wno-sign-conversion 33 | ifndef NDEBUG 34 | CFLAGS += -Werror -g 35 | endif 36 | MANUAL = trurl.1 37 | 38 | PREFIX ?= /usr/local 39 | BINDIR ?= $(PREFIX)/bin 40 | MANDIR ?= $(PREFIX)/share/man/man1 41 | ZSH_COMPLETIONSDIR ?= $(PREFIX)/share/zsh/site-functions 42 | COMPLETION_FILES=completions/_trurl.zsh 43 | 44 | INSTALL ?= install 45 | PYTHON3 ?= python3 46 | 47 | all: $(TARGET) $(MANUAL) 48 | 49 | $(TARGET): $(OBJS) 50 | $(CC) $(LDFLAGS) $(OBJS) -o $(TARGET) $(LDLIBS) 51 | 52 | trurl.o: trurl.c version.h 53 | 54 | $(MANUAL): trurl.md 55 | ./scripts/cd2nroff trurl.md > $(MANUAL) 56 | 57 | .PHONY: install 58 | install: 59 | $(INSTALL) -d $(DESTDIR)$(BINDIR) 60 | $(INSTALL) -m 0755 $(TARGET) $(DESTDIR)$(BINDIR) 61 | (if test -f $(MANUAL); then \ 62 | $(INSTALL) -d $(DESTDIR)$(MANDIR); \ 63 | $(INSTALL) -m 0644 $(MANUAL) $(DESTDIR)$(MANDIR); \ 64 | fi) 65 | (if test -f $(COMPLETION_FILES); then \ 66 | $(INSTALL) -d $(DESTDIR)$(ZSH_COMPLETIONSDIR); \ 67 | $(INSTALL) -m 0755 $(COMPLETION_FILES) $(DESTDIR)$(ZSH_COMPLETIONSDIR)/_trurl; \ 68 | fi) 69 | 70 | .PHONY: clean 71 | clean: 72 | rm -f $(OBJS) $(TARGET) $(COMPLETION_FILES) $(MANUAL) 73 | 74 | .PHONY: test 75 | test: $(TARGET) 76 | @$(PYTHON3) test.py 77 | 78 | .PHONY: test-memory 79 | test-memory: $(TARGET) 80 | @$(PYTHON3) test.py --with-valgrind 81 | 82 | .PHONY: checksrc 83 | checksrc: 84 | ./scripts/checksrc.pl trurl.c version.h 85 | 86 | .PHONY: completions 87 | completions: trurl.md 88 | ./scripts/generate_completions.sh $^ 89 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # [![trurl logo](https://curl.se/logo/trurl-logo.svg)](https://curl.se/trurl) 3 | 4 | # trurl 5 | 6 | Command line tool for URL parsing and manipulation 7 | 8 | [Video presentation](https://youtu.be/oDL7DVszr2w) 9 | 10 | ## Examples 11 | 12 | **Replace the hostname of a URL:** 13 | 14 | ```text 15 | $ trurl --url https://curl.se --set host=example.com 16 | https://example.com/ 17 | ``` 18 | 19 | **Create a URL by setting components:** 20 | 21 | ```text 22 | $ trurl --set host=example.com --set scheme=ftp 23 | ftp://example.com/ 24 | ``` 25 | 26 | **Redirect a URL:** 27 | 28 | ```text 29 | $ trurl --url https://curl.se/we/are.html --redirect here.html 30 | https://curl.se/we/here.html 31 | ``` 32 | 33 | **Change port number:** 34 | 35 | ```text 36 | $ trurl --url https://curl.se/we/../are.html --set port=8080 37 | https://curl.se:8080/are.html 38 | ``` 39 | 40 | **Extract the path from a URL:** 41 | 42 | ```text 43 | $ trurl --url https://curl.se/we/are.html --get '{path}' 44 | /we/are.html 45 | ``` 46 | 47 | **Extract the port from a URL:** 48 | 49 | ```text 50 | $ trurl --url https://curl.se/we/are.html --get '{port}' 51 | 443 52 | ``` 53 | 54 | **Append a path segment to a URL:** 55 | 56 | ```text 57 | $ trurl --url https://curl.se/hello --append path=you 58 | https://curl.se/hello/you 59 | ``` 60 | 61 | **Append a query segment to a URL:** 62 | 63 | ```text 64 | $ trurl --url "https://curl.se?name=hello" --append query=search=string 65 | https://curl.se/?name=hello&search=string 66 | ``` 67 | 68 | **Read URLs from stdin:** 69 | 70 | ```text 71 | $ cat urllist.txt | trurl --url-file - 72 | ... 73 | ``` 74 | 75 | **Output JSON:** 76 | 77 | ```text 78 | $ trurl "https://fake.host/hello#frag" --set user=::moo:: --json 79 | [ 80 | { 81 | "url": "https://%3a%3amoo%3a%3a@fake.host/hello#frag", 82 | "parts": { 83 | "scheme": "https", 84 | "user": "::moo::", 85 | "host": "fake.host", 86 | "path": "/hello", 87 | "fragment": "frag" 88 | } 89 | } 90 | ] 91 | ``` 92 | 93 | **Remove tracking tuples from query:** 94 | 95 | ```text 96 | $ trurl "https://curl.se?search=hey&utm_source=tracker" --qtrim "utm_*" 97 | https://curl.se/?search=hey 98 | ``` 99 | 100 | **Show a specific query key value:** 101 | 102 | ```text 103 | $ trurl "https://example.com?a=home&here=now&thisthen" -g '{query:a}' 104 | home 105 | ``` 106 | 107 | **Sort the key/value pairs in the query component:** 108 | 109 | ```text 110 | $ trurl "https://example.com?b=a&c=b&a=c" --sort-query 111 | https://example.com?a=c&b=a&c=b 112 | ``` 113 | 114 | **Work with a query that uses a semicolon separator:** 115 | 116 | ```text 117 | $ trurl "https://curl.se?search=fool;page=5" --qtrim "search" --query-separator ";" 118 | https://curl.se?page=5 119 | ``` 120 | 121 | **Accept spaces in the URL path:** 122 | 123 | ```text 124 | $ trurl "https://curl.se/this has space/index.html" --accept-space 125 | https://curl.se/this%20has%20space/index.html 126 | ``` 127 | 128 | ## Install 129 | 130 | ### Linux 131 | 132 | It is quite easy to compile the C source with GCC: 133 | 134 | ```text 135 | $ make 136 | cc -W -Wall -pedantic -g -c -o trurl.o trurl.c 137 | cc trurl.o -lcurl -o trurl 138 | ``` 139 | 140 | trurl is also available in [some package managers](https://github.com/curl/trurl/wiki/Get-trurl-for-your-OS). If it is not listed you can try searching for it using the package manager of your preferred distribution. 141 | 142 | ### Windows 143 | 144 | 1. Download and run [Cygwin installer.](https://www.cygwin.com/install.html) 145 | 2. Follow the instructions provided by the installer. When prompted to select packages, make sure to choose the following: curl, libcurl-devel, libcurl4, make and gcc-core. 146 | 3. (optional) Add the Cygwin bin directory to your system PATH variable. 147 | 4. Use `make`, just like on Linux. 148 | 149 | ## Prerequisites 150 | 151 | Development files of libcurl (e.g. `libcurl4-openssl-dev` or 152 | `libcurl4-gnutls-dev`) are needed for compilation. Requires libcurl version 153 | 7.62.0 or newer (the first libcurl to ship the URL parsing API). 154 | 155 | trurl also uses `CURLUPART_ZONEID` added in libcurl 7.81.0 and 156 | `curl_url_strerror()` added in libcurl 7.80.0 157 | 158 | It would certainly be possible to make trurl work with older libcurl versions 159 | if someone wanted to. 160 | 161 | ### Older libcurls 162 | 163 | trurl builds with libcurl older than 7.81.0 but will then not work as 164 | good. For all the documented goodness, use a more modern libcurl. 165 | 166 | ### trurl / libcurl Compatibility 167 | 168 | | trurl Feature | Minimum libcurl Version | 169 | |-----------------|--------------------------| 170 | | imap-options | 7.30.0 | 171 | | normalize-ipv | 7.77.0 | 172 | | white-space | 7.78.0 | 173 | | url-strerror | 7.80.0 | 174 | | zone-id | 7.81.0 | 175 | | punycode | 7.88.0 | 176 | | punycode2idn | 8.3.0 | 177 | | no-guess-scheme | 8.9.0 | 178 | 179 | For more details on how trurl will behave if these features are missing see [URL Quirks](https://github.com/curl/trurl/blob/master/URL-QUIRKS.md). 180 | To see the features your version of trurl supports as well as the version of libcurl it is built with, run the following command: `trurl --version` 181 | 182 | -------------------------------------------------------------------------------- /RELEASE-NOTES: -------------------------------------------------------------------------------- 1 | # trurl 0.16.1 2 | 3 | ## Bugfixes 4 | 5 | - COPYING: add the "and many contributors" text from the curl license 6 | - scripts: import cd2nroff from curl 7 | - trurl: handle zero length query pairs 8 | - trurl.md: fix typo in --replace-append 9 | - Update README.md to link to the getting trurl wiki page 10 | - Autogenerate ZSH completions based on trurl.md 11 | - Makefile: only create MANDIR when manpage is installed 12 | 13 | Contributors to this release: 14 | 15 | Daniel Stenberg, Jacob Mealey, Sertonix 16 | -------------------------------------------------------------------------------- /RELEASE-PROCEDURE.md: -------------------------------------------------------------------------------- 1 | trurl release procedure - how to do a release 2 | ============================================== 3 | 4 | in the source code repo 5 | ----------------------- 6 | 7 | - edit `RELEASE-NOTES` to be accurate 8 | 9 | - run `./scripts/mkrelease [version]` 10 | 11 | - make sure all relevant changes are committed on the master branch 12 | 13 | - tag the git repo in this style: `git tag -a trurl-[version]` -a annotates 14 | the tag 15 | 16 | - push the git commits and the new tag 17 | 18 | - Go to https://github.com/curl/trurl/tags and edit the tag as a release 19 | Consider allowing it to make a discussion post about it. 20 | 21 | celebrate 22 | --------- 23 | 24 | - suitable beverage intake is encouraged for the festivities 25 | -------------------------------------------------------------------------------- /THANKS: -------------------------------------------------------------------------------- 1 | This project exists only thanks to the awesome people who make it happen. The 2 | following friends have contributed: 3 | 4 | Dan Fandrich 5 | Daniel Gustafsson 6 | Daniel Stenberg 7 | Ehsan 8 | Emanuele Torre 9 | Enno Tensing 10 | Gustavo Costa 11 | Håvard Bønes 12 | Jacob Mealey 13 | Jay Satiro 14 | Jeremy Lecour 15 | Krishean Draconis 16 | Luca Barbato 17 | ma 18 | Marian Posaceanu 19 | Martin Hauke 20 | Michael Ablassmeier 21 | Michael Lass 22 | Nekobit 23 | Nicolas CARPi 24 | Olaf Alders 25 | Pascal Knecht 26 | Paul Roub 27 | Paul Wise 28 | Renato Botelho 29 | Ruoyu Zhong 30 | Sajad F. Maghrebi 31 | Sevan Janiyan 32 | Viktor Szakats 33 | 積丹尼 Dan Jacobson 34 | -------------------------------------------------------------------------------- /URL-QUIRKS.md: -------------------------------------------------------------------------------- 1 | # URL Quirks 2 | 3 | This is a collection of peculiarities you may find in trurl due to bugs or 4 | changes/improvements in libcurl's URL handling. 5 | 6 | ## The URL API 7 | 8 | Was introduced in libcurl 7.62.0. No older libcurl versions can be used. 9 | 10 | Build-time requirement. 11 | 12 | ## Extracting zone id 13 | 14 | Added in libcurl 7.65.0. The `CURLUE_NO_ZONEID` error code was added in 15 | 7.81.0. 16 | 17 | Build-time requirement. 18 | 19 | ## Normalizing IPv4 addresses 20 | 21 | Added in libcurl 7.77.0. Before that, the source formatting was kept. 22 | 23 | Run-time requirement. 24 | 25 | ## Allow space 26 | 27 | The libcurl URL parser was given the ability to allow spaces in libcurl 28 | 7.78.0. trurl therefore cannot offer this feature with older libcurl versions. 29 | 30 | Build-time requirement. 31 | 32 | ## `curl_url_strerror()` 33 | 34 | This API call was added in 7.80.0, using a libcurl version older than this 35 | will make trurl output less good error messages. 36 | 37 | Build-time requirement. 38 | 39 | ## Normalizing IPv6 addresses 40 | 41 | Implemented in libcurl 7.81.0. Before this, the source formatting was kept. 42 | 43 | Run-time requirement. 44 | 45 | ## `CURLU_PUNYCODE` 46 | 47 | Added in libcurl 7.88.0. 48 | 49 | Build-time requirement. 50 | 51 | ## Accepting % in host names 52 | 53 | The host name parser has been made stricter over time, with the most recent 54 | enhancement merged for libcurl 8.0.0. 55 | 56 | Run-time requirement. 57 | 58 | ## Parsing IPv6 literals when libcurl does not support IPv6 59 | 60 | Before libcurl 8.0.0 the URL parser was not able to parse IPv6 addresses if 61 | libcurl itself was built without IPv6 capabilities. 62 | 63 | Run-time requirement. 64 | 65 | ## URL encoding of fragments 66 | 67 | This was a libcurl bug, fixed in libcurl 8.1.0 68 | 69 | Run-time requirement. 70 | 71 | ## Bad IPv4 numerical address 72 | 73 | The normalization of IPv4 addresses would just ignore bad addresses, while 74 | newer libcurl versions will reject host names using invalid IPv4 addresses. 75 | Fixed in 8.1.0 76 | 77 | Run-time requirement. 78 | 79 | ## Set illegal scheme 80 | 81 | Permitted before libcurl 8.1.0 82 | 83 | Run-time requirement. 84 | -------------------------------------------------------------------------------- /completions/_trurl.zsh.in: -------------------------------------------------------------------------------- 1 | #compdef trurl 2 | ########################################################################## 3 | # _ _ 4 | # Project | |_ _ __ _ _ _ __| | 5 | # | __| '__| | | | '__| | 6 | # | |_| | | |_| | | | | 7 | # \__|_| \__,_|_| |_| 8 | # 9 | # Copyright (C) Daniel Stenberg, , et al. 10 | # 11 | # This software is licensed as described in the file COPYING, which 12 | # you should have received as part of this distribution. The terms 13 | # are also available at https://curl.se/docs/copyright.html. 14 | # 15 | # You may opt to use, copy, modify, merge, publish, distribute and/or sell 16 | # copies of the Software, and permit persons to whom the Software is 17 | # furnished to do so, under the terms of the COPYING file. 18 | # 19 | # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 20 | # KIND, either express or implied. 21 | # 22 | # SPDX-License-Identifier: curl 23 | # 24 | ########################################################################## 25 | 26 | 27 | # This file is generated from trurls generate_completions.sh 28 | 29 | # standalone flags - things that have now follow on 30 | standalone_flags=(@TRURL_STANDALONE_FLAGS@) 31 | 32 | # component options - flags that expected to come after them 33 | component_options=(@TRURL_COMPONENT_OPTIONS@) 34 | 35 | # components that take *something* as a param but we can't 36 | # be sure what 37 | random_options=(@TRURL_RANDOM_OPTIONS@) 38 | 39 | # Components are specific URL parts that are only completed 40 | # after a component_options appears 41 | component_list=( @TRURL_COMPONENT_LIST@) 42 | 43 | if (( "${component_options[(Ie)${words[$CURRENT-1]}]}" )); then 44 | compadd -S "=" "${component_list[@]}" 45 | return 0 46 | fi 47 | 48 | # if we expect another parameter that trurl doesn't define then 49 | # we should (i.e. a component) then fall back on ZSH _path_file 50 | if (( "${random_options[(Ie)${words[$CURRENT-1]}]}" )); then 51 | _path_files 52 | return 0 53 | fi 54 | 55 | # calling compadd directly allows us the let the flags be 56 | # repeatable so we can recall --set, --get etc. 57 | repeatable=( "${component_options[@]}" "${random_options[@]}" ) 58 | args=( "${repeatable[@]}" ) 59 | # only apply single completions which haven't been used. 60 | for sf in "${standalone_flags[@]}"; do 61 | if ! (( "${words[(Ie)$sf]}" )); then 62 | args+=("$sf") 63 | fi 64 | done 65 | 66 | compadd "${args[@]}" 67 | -------------------------------------------------------------------------------- /scripts/cd2nroff: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | #*************************************************************************** 3 | # _ _ ____ _ 4 | # Project ___| | | | _ \| | 5 | # / __| | | | |_) | | 6 | # | (__| |_| | _ <| |___ 7 | # \___|\___/|_| \_\_____| 8 | # 9 | # Copyright (C) Daniel Stenberg, , et al. 10 | # 11 | # This software is licensed as described in the file COPYING, which 12 | # you should have received as part of this distribution. The terms 13 | # are also available at https://curl.se/docs/copyright.html. 14 | # 15 | # You may opt to use, copy, modify, merge, publish, distribute and/or sell 16 | # copies of the Software, and permit persons to whom the Software is 17 | # furnished to do so, under the terms of the COPYING file. 18 | # 19 | # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 20 | # KIND, either express or implied. 21 | # 22 | # SPDX-License-Identifier: curl 23 | # 24 | ########################################################################### 25 | 26 | =begin comment 27 | 28 | Converts a curldown file to nroff (manpage). 29 | 30 | =end comment 31 | =cut 32 | 33 | use strict; 34 | use warnings; 35 | 36 | my $cd2nroff = "0.1"; # to keep check 37 | my $dir; 38 | my $extension; 39 | my $keepfilename; 40 | 41 | while(@ARGV) { 42 | if($ARGV[0] eq "-d") { 43 | shift @ARGV; 44 | $dir = shift @ARGV; 45 | } 46 | elsif($ARGV[0] eq "-e") { 47 | shift @ARGV; 48 | $extension = shift @ARGV; 49 | } 50 | elsif($ARGV[0] eq "-k") { 51 | shift @ARGV; 52 | $keepfilename = 1; 53 | } 54 | elsif($ARGV[0] eq "-h") { 55 | print < Write the output to the file name from the meta-data in the 59 | specified directory, instead of writing to stdout 60 | -e If -d is used, this option can provide an added "extension", arbitrary 61 | text really, to append to the file name. 62 | -h This help text, 63 | -v Show version then exit 64 | HELP 65 | ; 66 | exit 0; 67 | } 68 | elsif($ARGV[0] eq "-v") { 69 | print "cd2nroff version $cd2nroff\n"; 70 | exit 0; 71 | } 72 | else { 73 | last; 74 | } 75 | } 76 | 77 | use POSIX qw(strftime); 78 | my @ts; 79 | if (defined($ENV{SOURCE_DATE_EPOCH})) { 80 | @ts = gmtime($ENV{SOURCE_DATE_EPOCH}); 81 | } else { 82 | @ts = localtime; 83 | } 84 | my $date = strftime "%Y-%m-%d", @ts; 85 | 86 | sub outseealso { 87 | my (@sa) = @_; 88 | my $comma = 0; 89 | my @o; 90 | push @o, ".SH SEE ALSO\n"; 91 | for my $s (sort @sa) { 92 | push @o, sprintf "%s.BR $s", $comma ? ",\n": ""; 93 | $comma = 1; 94 | } 95 | push @o, "\n"; 96 | return @o; 97 | } 98 | 99 | sub outprotocols { 100 | my (@p) = @_; 101 | my $comma = 0; 102 | my @o; 103 | push @o, ".SH PROTOCOLS\n"; 104 | 105 | if($p[0] eq "TLS") { 106 | push @o, "This functionality affects all TLS based protocols: HTTPS, FTPS, IMAPS, POP3S, SMTPS etc."; 107 | } 108 | else { 109 | my @s = sort @p; 110 | push @o, "This functionality affects "; 111 | for my $e (sort @s) { 112 | push @o, sprintf "%s%s", 113 | $comma ? (($e eq $s[-1]) ? " and " : ", "): "", 114 | lc($e); 115 | $comma = 1; 116 | } 117 | if($#s == 0) { 118 | if($s[0] eq "All") { 119 | push @o, " supported protocols"; 120 | } 121 | else { 122 | push @o, " only"; 123 | } 124 | } 125 | } 126 | push @o, "\n"; 127 | return @o; 128 | } 129 | 130 | sub outtls { 131 | my (@t) = @_; 132 | my $comma = 0; 133 | my @o; 134 | if($t[0] eq "All") { 135 | push @o, "\nAll TLS backends support this option."; 136 | } 137 | elsif($t[0] eq "none") { 138 | push @o, "\nNo TLS backend supports this option."; 139 | } 140 | else { 141 | push @o, "\nThis option works only with the following TLS backends:\n"; 142 | my @s = sort @t; 143 | for my $e (@s) { 144 | push @o, sprintf "%s$e", 145 | $comma ? (($e eq $s[-1]) ? " and " : ", "): ""; 146 | $comma = 1; 147 | } 148 | } 149 | push @o, "\n"; 150 | return @o; 151 | } 152 | 153 | my %knownprotos = ( 154 | 'DICT' => 1, 155 | 'FILE' => 1, 156 | 'FTP' => 1, 157 | 'FTPS' => 1, 158 | 'GOPHER' => 1, 159 | 'GOPHERS' => 1, 160 | 'HTTP' => 1, 161 | 'HTTPS' => 1, 162 | 'IMAP' => 1, 163 | 'IMAPS' => 1, 164 | 'LDAP' => 1, 165 | 'LDAPS' => 1, 166 | 'MQTT' => 1, 167 | 'POP3' => 1, 168 | 'POP3S' => 1, 169 | 'RTMP' => 1, 170 | 'RTMPS' => 1, 171 | 'RTSP' => 1, 172 | 'SCP' => 1, 173 | 'SFTP' => 1, 174 | 'SMB' => 1, 175 | 'SMBS' => 1, 176 | 'SMTP' => 1, 177 | 'SMTPS' => 1, 178 | 'TELNET' => 1, 179 | 'TFTP' => 1, 180 | 'WS' => 1, 181 | 'WSS' => 1, 182 | 'TLS' => 1, 183 | 'TCP' => 1, 184 | 'QUIC' => 1, 185 | 'All' => 1 186 | ); 187 | 188 | my %knowntls = ( 189 | 'BearSSL' => 1, 190 | 'GnuTLS' => 1, 191 | 'mbedTLS' => 1, 192 | 'OpenSSL' => 1, 193 | 'rustls' => 1, 194 | 'Schannel' => 1, 195 | 'Secure Transport' => 1, 196 | 'wolfSSL' => 1, 197 | 'All' => 1, 198 | 'none' => 1, 199 | ); 200 | 201 | sub single { 202 | my @seealso; 203 | my @proto; 204 | my @tls; 205 | my $d; 206 | my ($f)=@_; 207 | my $copyright; 208 | my $errors = 0; 209 | my $fh; 210 | my $line; 211 | my $list; 212 | my $tlslist; 213 | my $section; 214 | my $source; 215 | my $addedin; 216 | my $spdx; 217 | my $start = 0; 218 | my $title; 219 | 220 | if(defined($f)) { 221 | if(!open($fh, "<:crlf", "$f")) { 222 | print STDERR "cd2nroff failed to open '$f' for reading: $!\n"; 223 | return 1; 224 | } 225 | } 226 | else { 227 | $f = "STDIN"; 228 | $fh = \*STDIN; 229 | binmode($fh, ":crlf"); 230 | } 231 | while(<$fh>) { 232 | $line++; 233 | if(!$start) { 234 | if(/^---/) { 235 | # header starts here 236 | $start = 1; 237 | } 238 | next; 239 | } 240 | if(/^Title: *(.*)/i) { 241 | $title=$1; 242 | } 243 | elsif(/^Section: *(.*)/i) { 244 | $section=$1; 245 | } 246 | elsif(/^Source: *(.*)/i) { 247 | $source=$1; 248 | } 249 | elsif(/^See-also: +(.*)/i) { 250 | $list = 1; # 1 for see-also 251 | push @seealso, $1; 252 | } 253 | elsif(/^See-also: */i) { 254 | if($seealso[0]) { 255 | print STDERR "$f:$line:1:ERROR: bad See-Also, needs list\n"; 256 | return 2; 257 | } 258 | $list = 1; # 1 for see-also 259 | } 260 | elsif(/^Protocol:/i) { 261 | $list = 2; # 2 for protocol 262 | } 263 | elsif(/^TLS-backend:/i) { 264 | $list = 3; # 3 for TLS backend 265 | } 266 | elsif(/^Added-in: *(.*)/i) { 267 | $addedin=$1; 268 | if(($addedin !~ /^[0-9.]+[0-9]\z/) && 269 | ($addedin ne "n/a")) { 270 | print STDERR "$f:$line:1:ERROR: invalid version number in Added-in line: $addedin\n"; 271 | return 2; 272 | } 273 | } 274 | elsif(/^ +- (.*)/i) { 275 | # the only lists we support are see-also and protocol 276 | if($list == 1) { 277 | push @seealso, $1; 278 | } 279 | elsif($list == 2) { 280 | push @proto, $1; 281 | } 282 | elsif($list == 3) { 283 | push @tls, $1; 284 | } 285 | else { 286 | print STDERR "$f:$line:1:ERROR: list item without owner?\n"; 287 | return 2; 288 | } 289 | } 290 | # REUSE-IgnoreStart 291 | elsif(/^C: (.*)/i) { 292 | $copyright=$1; 293 | } 294 | elsif(/^SPDX-License-Identifier: (.*)/i) { 295 | $spdx=$1; 296 | } 297 | # REUSE-IgnoreEnd 298 | elsif(/^---/) { 299 | # end of the header section 300 | if(!$title) { 301 | print STDERR "$f:$line:1:ERROR: no 'Title:' in $f\n"; 302 | return 1; 303 | } 304 | if(!$section) { 305 | print STDERR "$f:$line:1:ERROR: no 'Section:' in $f\n"; 306 | return 2; 307 | } 308 | if(!$source) { 309 | print STDERR "$f:$line:1:ERROR: no 'Source:' in $f\n"; 310 | return 2; 311 | } 312 | if(($source eq "libcurl") && !$addedin) { 313 | print STDERR "$f:$line:1:ERROR: no 'Added-in:' in $f\n"; 314 | return 2; 315 | } 316 | if(!$seealso[0]) { 317 | print STDERR "$f:$line:1:ERROR: no 'See-also:' present\n"; 318 | return 2; 319 | } 320 | if(!$copyright) { 321 | print STDERR "$f:$line:1:ERROR: no 'C:' field present\n"; 322 | return 2; 323 | } 324 | if(!$spdx) { 325 | print STDERR "$f:$line:1:ERROR: no 'SPDX-License-Identifier:' field present\n"; 326 | return 2; 327 | } 328 | if($section == 3) { 329 | if(!$proto[0]) { 330 | printf STDERR "$f:$line:1:ERROR: missing Protocol:\n"; 331 | exit 2; 332 | } 333 | my $tls = 0; 334 | for my $p (@proto) { 335 | if($p eq "TLS") { 336 | $tls = 1; 337 | } 338 | if(!$knownprotos{$p}) { 339 | printf STDERR "$f:$line:1:ERROR: invalid protocol used: $p:\n"; 340 | exit 2; 341 | } 342 | } 343 | # This is for TLS, require TLS-backend: 344 | if($tls) { 345 | if(!$tls[0]) { 346 | printf STDERR "$f:$line:1:ERROR: missing TLS-backend:\n"; 347 | exit 2; 348 | } 349 | for my $t (@tls) { 350 | if(!$knowntls{$t}) { 351 | printf STDERR "$f:$line:1:ERROR: invalid TLS backend: $t:\n"; 352 | exit 2; 353 | } 354 | } 355 | } 356 | } 357 | last; 358 | } 359 | else { 360 | chomp; 361 | print STDERR "$f:$line:1:ERROR: unrecognized header keyword: '$_'\n"; 362 | $errors++; 363 | } 364 | } 365 | 366 | if(!$start) { 367 | print STDERR "$f:$line:1:ERROR: no header present\n"; 368 | return 2; 369 | } 370 | 371 | my @desc; 372 | my $quote = 0; 373 | my $blankline = 0; 374 | my $header = 0; 375 | 376 | # cut off the leading path from the file name, if any 377 | $f =~ s/^(.*[\\\/])//; 378 | 379 | push @desc, ".\\\" generated by cd2nroff $cd2nroff from $f\n"; 380 | push @desc, ".TH $title $section \"$date\" $source\n"; 381 | while(<$fh>) { 382 | $line++; 383 | 384 | $d = $_; 385 | 386 | if($quote) { 387 | if($quote == 4) { 388 | # remove the indentation 389 | if($d =~ /^ (.*)/) { 390 | push @desc, "$1\n"; 391 | next; 392 | } 393 | else { 394 | # end of quote 395 | $quote = 0; 396 | push @desc, ".fi\n"; 397 | next; 398 | } 399 | } 400 | if(/^~~~/) { 401 | # end of quote 402 | $quote = 0; 403 | push @desc, ".fi\n"; 404 | next; 405 | } 406 | # convert single backslahes to doubles 407 | $d =~ s/\\/\\\\/g; 408 | # lines starting with a period needs it escaped 409 | $d =~ s/^\./\\&./; 410 | push @desc, $d; 411 | next; 412 | } 413 | 414 | # remove single line HTML comments 415 | $d =~ s///g; 416 | 417 | # **bold** 418 | $d =~ s/\*\*(\S.*?)\*\*/\\fB$1\\fP/g; 419 | # *italics* 420 | $d =~ s/\*(\S.*?)\*/\\fI$1\\fP/g; 421 | 422 | my $back = $d; 423 | 424 | # remove all backticked pieces 425 | $back =~ s/\`(.*?)\`//g; 426 | 427 | if($back =~ /[^\\][\<\>]/) { 428 | print STDERR "$f:$line:1:ERROR: un-escaped < or > used\n"; 429 | $errors++; 430 | } 431 | # convert backslash-'<' or '> to just the second character 432 | $d =~ s/\\([<>])/$1/g; 433 | 434 | # mentions of curl symbols with manpages use italics by default 435 | $d =~ s/((lib|)curl([^ ]*\(3\)))/\\fI$1\\fP/gi; 436 | 437 | # backticked becomes italics 438 | $d =~ s/\`(.*?)\`/\\fI$1\\fP/g; 439 | 440 | if(/^## (.*)/) { 441 | my $word = $1; 442 | # if there are enclosing quotes, remove them first 443 | $word =~ s/[\"\'\`](.*)[\"\'\`]\z/$1/; 444 | 445 | # enclose in double quotes if there is a space present 446 | if($word =~ / /) { 447 | push @desc, ".IP \"$word\"\n"; 448 | } 449 | else { 450 | push @desc, ".IP $word\n"; 451 | } 452 | $header = 1; 453 | } 454 | elsif(/^##/) { 455 | # end of IP sequence 456 | push @desc, ".PP\n"; 457 | $header = 1; 458 | } 459 | elsif(/^# (.*)/) { 460 | my $word = $1; 461 | # if there are enclosing quotes, remove them first 462 | $word =~ s/[\"\'](.*)[\"\']\z/$1/; 463 | 464 | if($word eq "PROTOCOLS") { 465 | print STDERR "$f:$line:1:WARN: PROTOCOLS section in source file\n"; 466 | } 467 | elsif($word eq "AVAILABILITY") { 468 | print STDERR "$f:$line:1:WARN: AVAILABILITY section in source file\n"; 469 | } 470 | elsif($word eq "%PROTOCOLS%") { 471 | # insert the generated PROTOCOLS section 472 | push @desc, outprotocols(@proto); 473 | 474 | if($proto[0] eq "TLS") { 475 | push @desc, outtls(@tls); 476 | } 477 | $header = 1; 478 | next; 479 | } 480 | elsif($word eq "%AVAILABILITY%") { 481 | if($addedin ne "n/a") { 482 | # insert the generated AVAILABILITY section 483 | push @desc, ".SH AVAILABILITY\n"; 484 | push @desc, "Added in curl $addedin\n"; 485 | } 486 | $header = 1; 487 | next; 488 | } 489 | push @desc, ".SH $word\n"; 490 | $header = 1; 491 | } 492 | elsif(/^~~~c/) { 493 | # start of a code section, not indented 494 | $quote = 1; 495 | push @desc, "\n" if($blankline && !$header); 496 | $header = 0; 497 | push @desc, ".nf\n"; 498 | } 499 | elsif(/^~~~/) { 500 | # start of a quote section; not code, not indented 501 | $quote = 1; 502 | push @desc, "\n" if($blankline && !$header); 503 | $header = 0; 504 | push @desc, ".nf\n"; 505 | } 506 | elsif(/^ (.*)/) { 507 | # quoted, indented by 4 space 508 | $quote = 4; 509 | push @desc, "\n" if($blankline && !$header); 510 | $header = 0; 511 | push @desc, ".nf\n$1\n"; 512 | } 513 | elsif(/^[ \t]*\n/) { 514 | # count and ignore blank lines 515 | $blankline++; 516 | } 517 | else { 518 | # don't output newlines if this is the first content after a 519 | # header 520 | push @desc, "\n" if($blankline && !$header); 521 | $blankline = 0; 522 | $header = 0; 523 | 524 | # quote minuses in the output 525 | $d =~ s/([^\\])-/$1\\-/g; 526 | # replace single quotes 527 | $d =~ s/\'/\\(aq/g; 528 | # handle double quotes first on the line 529 | $d =~ s/^(\s*)\"/$1\\&\"/; 530 | 531 | # lines starting with a period needs it escaped 532 | $d =~ s/^\./\\&./; 533 | 534 | if($d =~ /^(.*) /) { 535 | printf STDERR "$f:$line:%d:ERROR: 2 spaces detected\n", 536 | length($1); 537 | $errors++; 538 | } 539 | if($d =~ /^[ \t]*\n/) { 540 | # replaced away all contents 541 | $blankline= 1; 542 | } 543 | else { 544 | push @desc, $d; 545 | } 546 | } 547 | } 548 | if($fh != \*STDIN) { 549 | close($fh); 550 | } 551 | push @desc, outseealso(@seealso); 552 | if($dir) { 553 | if($keepfilename) { 554 | $title = $f; 555 | $title =~ s/\.[^.]*$//; 556 | } 557 | my $outfile = "$dir/$title.$section"; 558 | if(defined($extension)) { 559 | $outfile .= $extension; 560 | } 561 | if(!open(O, ">", $outfile)) { 562 | print STDERR "Failed to open $outfile : $!\n"; 563 | return 1; 564 | } 565 | print O @desc; 566 | close(O); 567 | } 568 | else { 569 | print @desc; 570 | } 571 | return $errors; 572 | } 573 | 574 | if(@ARGV) { 575 | for my $f (@ARGV) { 576 | my $r = single($f); 577 | if($r) { 578 | exit $r; 579 | } 580 | } 581 | } 582 | else { 583 | exit single(); 584 | } 585 | -------------------------------------------------------------------------------- /scripts/checksrc.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | #*************************************************************************** 3 | # _ _ ____ _ 4 | # Project ___| | | | _ \| | 5 | # / __| | | | |_) | | 6 | # | (__| |_| | _ <| |___ 7 | # \___|\___/|_| \_\_____| 8 | # 9 | # Copyright (C) Daniel Stenberg, , et al. 10 | # 11 | # This software is licensed as described in the file COPYING, which 12 | # you should have received as part of this distribution. The terms 13 | # are also available at https://curl.se/docs/copyright.html. 14 | # 15 | # You may opt to use, copy, modify, merge, publish, distribute and/or sell 16 | # copies of the Software, and permit persons to whom the Software is 17 | # furnished to do so, under the terms of the COPYING file. 18 | # 19 | # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 20 | # KIND, either express or implied. 21 | # 22 | # SPDX-License-Identifier: curl 23 | # 24 | ########################################################################### 25 | 26 | use strict; 27 | use warnings; 28 | 29 | my $max_column = 79; 30 | my $indent = 2; 31 | 32 | my $warnings = 0; 33 | my $swarnings = 0; 34 | my $errors = 0; 35 | my $serrors = 0; 36 | my $suppressed; # skipped problems 37 | my $file; 38 | my $dir="."; 39 | my $wlist=""; 40 | my @alist; 41 | my $windows_os = $^O eq 'MSWin32' || $^O eq 'cygwin' || $^O eq 'msys'; 42 | my $verbose; 43 | my %skiplist; 44 | 45 | my %ignore; 46 | my %ignore_set; 47 | my %ignore_used; 48 | my @ignore_line; 49 | 50 | my %warnings_extended = ( 51 | 'COPYRIGHTYEAR' => 'copyright year incorrect', 52 | 'STRERROR', => 'strerror() detected', 53 | ); 54 | 55 | my %warnings = ( 56 | 'ASSIGNWITHINCONDITION' => 'assignment within conditional expression', 57 | 'ASTERISKNOSPACE' => 'pointer declared without space before asterisk', 58 | 'ASTERISKSPACE' => 'pointer declared with space after asterisk', 59 | 'BADCOMMAND' => 'bad !checksrc! instruction', 60 | 'BANNEDFUNC' => 'a banned function was used', 61 | 'BRACEELSE' => '} else on the same line', 62 | 'BRACEPOS' => 'wrong position for an open brace', 63 | 'BRACEWHILE' => 'A single space between open brace and while', 64 | 'COMMANOSPACE' => 'comma without following space', 65 | 'COMMENTNOSPACEEND' => 'no space before */', 66 | 'COMMENTNOSPACESTART' => 'no space following /*', 67 | 'COPYRIGHT' => 'file missing a copyright statement', 68 | 'CPPCOMMENTS' => '// comment detected', 69 | 'DOBRACE' => 'A single space between do and open brace', 70 | 'EMPTYLINEBRACE' => 'Empty line before the open brace', 71 | 'EQUALSNOSPACE' => 'equals sign without following space', 72 | 'EQUALSNULL' => 'if/while comparison with == NULL', 73 | 'EXCLAMATIONSPACE' => 'Whitespace after exclamation mark in expression', 74 | 'FOPENMODE' => 'fopen needs a macro for the mode string', 75 | 'INCLUDEDUP', => 'same file is included again', 76 | 'INDENTATION' => 'wrong start column for code', 77 | 'LONGLINE' => "Line longer than $max_column", 78 | 'MULTISPACE' => 'multiple spaces used when not suitable', 79 | 'NOSPACEEQUALS' => 'equals sign without preceding space', 80 | 'NOTEQUALSZERO', => 'if/while comparison with != 0', 81 | 'ONELINECONDITION' => 'conditional block on the same line as the if()', 82 | 'OPENCOMMENT' => 'file ended with a /* comment still "open"', 83 | 'PARENBRACE' => '){ without sufficient space', 84 | 'RETURNNOSPACE' => 'return without space', 85 | 'SEMINOSPACE' => 'semicolon without following space', 86 | 'SIZEOFNOPAREN' => 'use of sizeof without parentheses', 87 | 'SNPRINTF' => 'use of snprintf', 88 | 'SPACEAFTERPAREN' => 'space after open parenthesis', 89 | 'SPACEBEFORECLOSE' => 'space before a close parenthesis', 90 | 'SPACEBEFORECOMMA' => 'space before a comma', 91 | 'SPACEBEFOREPAREN' => 'space before an open parenthesis', 92 | 'SPACESEMICOLON' => 'space before semicolon', 93 | 'SPACESWITCHCOLON' => 'space before colon of switch label', 94 | 'TABS' => 'TAB characters not allowed', 95 | 'TRAILINGSPACE' => 'Trailing whitespace on the line', 96 | 'TYPEDEFSTRUCT' => 'typedefed struct', 97 | 'UNUSEDIGNORE' => 'a warning ignore was not used', 98 | ); 99 | 100 | sub readskiplist { 101 | open(my $W, '<', "$dir/checksrc.skip") or return; 102 | my @all=<$W>; 103 | for(@all) { 104 | $windows_os ? $_ =~ s/\r?\n$// : chomp; 105 | $skiplist{$_}=1; 106 | } 107 | close($W); 108 | } 109 | 110 | # Reads the .checksrc in $dir for any extended warnings to enable locally. 111 | # Currently there is no support for disabling warnings from the standard set, 112 | # and since that's already handled via !checksrc! commands there is probably 113 | # little use to add it. 114 | sub readlocalfile { 115 | my $i = 0; 116 | 117 | open(my $rcfile, "<", "$dir/.checksrc") or return; 118 | 119 | while(<$rcfile>) { 120 | $i++; 121 | 122 | # Lines starting with '#' are considered comments 123 | if (/^\s*(#.*)/) { 124 | next; 125 | } 126 | elsif (/^\s*enable ([A-Z]+)$/) { 127 | if(!defined($warnings_extended{$1})) { 128 | print STDERR "invalid warning specified in .checksrc: \"$1\"\n"; 129 | next; 130 | } 131 | $warnings{$1} = $warnings_extended{$1}; 132 | } 133 | elsif (/^\s*disable ([A-Z]+)$/) { 134 | if(!defined($warnings{$1})) { 135 | print STDERR "invalid warning specified in .checksrc: \"$1\"\n"; 136 | next; 137 | } 138 | # Accept-list 139 | push @alist, $1; 140 | } 141 | else { 142 | die "Invalid format in $dir/.checksrc on line $i\n"; 143 | } 144 | } 145 | close($rcfile); 146 | } 147 | 148 | sub checkwarn { 149 | my ($name, $num, $col, $file, $line, $msg, $error) = @_; 150 | 151 | my $w=$error?"error":"warning"; 152 | my $nowarn=0; 153 | 154 | #if(!$warnings{$name}) { 155 | # print STDERR "Dev! there's no description for $name!\n"; 156 | #} 157 | 158 | # checksrc.skip 159 | if($skiplist{$line}) { 160 | $nowarn = 1; 161 | } 162 | # !checksrc! controlled 163 | elsif($ignore{$name}) { 164 | $ignore{$name}--; 165 | $ignore_used{$name}++; 166 | $nowarn = 1; 167 | if(!$ignore{$name}) { 168 | # reached zero, enable again 169 | enable_warn($name, $num, $file, $line); 170 | } 171 | } 172 | 173 | if($nowarn) { 174 | $suppressed++; 175 | if($w) { 176 | $swarnings++; 177 | } 178 | else { 179 | $serrors++; 180 | } 181 | return; 182 | } 183 | 184 | if($w) { 185 | $warnings++; 186 | } 187 | else { 188 | $errors++; 189 | } 190 | 191 | $col++; 192 | print "$file:$num:$col: $w: $msg ($name)\n"; 193 | print " $line\n"; 194 | 195 | if($col < 80) { 196 | my $pref = (' ' x $col); 197 | print "${pref}^\n"; 198 | } 199 | } 200 | 201 | $file = shift @ARGV; 202 | 203 | while(defined $file) { 204 | 205 | if($file =~ /-D(.*)/) { 206 | $dir = $1; 207 | $file = shift @ARGV; 208 | next; 209 | } 210 | elsif($file =~ /-W(.*)/) { 211 | $wlist .= " $1 "; 212 | $file = shift @ARGV; 213 | next; 214 | } 215 | elsif($file =~ /-A(.+)/) { 216 | push @alist, $1; 217 | $file = shift @ARGV; 218 | next; 219 | } 220 | elsif($file =~ /-i([1-9])/) { 221 | $indent = $1 + 0; 222 | $file = shift @ARGV; 223 | next; 224 | } 225 | elsif($file =~ /-m([0-9]+)/) { 226 | $max_column = $1 + 0; 227 | $file = shift @ARGV; 228 | next; 229 | } 230 | elsif($file =~ /^(-h|--help)/) { 231 | undef $file; 232 | last; 233 | } 234 | 235 | last; 236 | } 237 | 238 | if(!$file) { 239 | print "checksrc.pl [option] [file2] ...\n"; 240 | print " Options:\n"; 241 | print " -A[rule] Accept this violation, can be used multiple times\n"; 242 | print " -D[DIR] Directory to prepend file names\n"; 243 | print " -h Show help output\n"; 244 | print " -W[file] Skip the given file - ignore all its flaws\n"; 245 | print " -i Indent spaces. Default: 2\n"; 246 | print " -m Maximum line length. Default: 79\n"; 247 | print "\nDetects and warns for these problems:\n"; 248 | my @allw = keys %warnings; 249 | push @allw, keys %warnings_extended; 250 | for my $w (sort @allw) { 251 | if($warnings{$w}) { 252 | printf (" %-18s: %s\n", $w, $warnings{$w}); 253 | } 254 | else { 255 | printf (" %-18s: %s[*]\n", $w, $warnings_extended{$w}); 256 | } 257 | } 258 | print " [*] = disabled by default\n"; 259 | exit; 260 | } 261 | 262 | readskiplist(); 263 | readlocalfile(); 264 | 265 | do { 266 | if("$wlist" !~ / $file /) { 267 | my $fullname = $file; 268 | $fullname = "$dir/$file" if ($fullname !~ '^\.?\.?/'); 269 | scanfile($fullname); 270 | } 271 | $file = shift @ARGV; 272 | 273 | } while($file); 274 | 275 | sub accept_violations { 276 | for my $r (@alist) { 277 | if(!$warnings{$r}) { 278 | print "'$r' is not a warning to accept!\n"; 279 | exit; 280 | } 281 | $ignore{$r}=999999; 282 | $ignore_used{$r}=0; 283 | } 284 | } 285 | 286 | sub checksrc_clear { 287 | undef %ignore; 288 | undef %ignore_set; 289 | undef @ignore_line; 290 | } 291 | 292 | sub checksrc_endoffile { 293 | my ($file) = @_; 294 | for(keys %ignore_set) { 295 | if($ignore_set{$_} && !$ignore_used{$_}) { 296 | checkwarn("UNUSEDIGNORE", $ignore_set{$_}, 297 | length($_)+11, $file, 298 | $ignore_line[$ignore_set{$_}], 299 | "Unused ignore: $_"); 300 | } 301 | } 302 | } 303 | 304 | sub enable_warn { 305 | my ($what, $line, $file, $l) = @_; 306 | 307 | # switch it back on, but warn if not triggered! 308 | if(!$ignore_used{$what}) { 309 | checkwarn("UNUSEDIGNORE", 310 | $line, length($what) + 11, $file, $l, 311 | "No warning was inhibited!"); 312 | } 313 | $ignore_set{$what}=0; 314 | $ignore_used{$what}=0; 315 | $ignore{$what}=0; 316 | } 317 | sub checksrc { 318 | my ($cmd, $line, $file, $l) = @_; 319 | if($cmd =~ / *([^ ]*) *(.*)/) { 320 | my ($enable, $what) = ($1, $2); 321 | $what =~ s: *\*/$::; # cut off end of C comment 322 | # print "ENABLE $enable WHAT $what\n"; 323 | if($enable eq "disable") { 324 | my ($warn, $scope)=($1, $2); 325 | if($what =~ /([^ ]*) +(.*)/) { 326 | ($warn, $scope)=($1, $2); 327 | } 328 | else { 329 | $warn = $what; 330 | $scope = 1; 331 | } 332 | # print "IGNORE $warn for SCOPE $scope\n"; 333 | if($scope eq "all") { 334 | $scope=999999; 335 | } 336 | 337 | # Comparing for a literal zero rather than the scalar value zero 338 | # covers the case where $scope contains the ending '*' from the 339 | # comment. If we use a scalar comparison (==) we induce warnings 340 | # on non-scalar contents. 341 | if($scope eq "0") { 342 | checkwarn("BADCOMMAND", 343 | $line, 0, $file, $l, 344 | "Disable zero not supported, did you mean to enable?"); 345 | } 346 | elsif($ignore_set{$warn}) { 347 | checkwarn("BADCOMMAND", 348 | $line, 0, $file, $l, 349 | "$warn already disabled from line $ignore_set{$warn}"); 350 | } 351 | else { 352 | $ignore{$warn}=$scope; 353 | $ignore_set{$warn}=$line; 354 | $ignore_line[$line]=$l; 355 | } 356 | } 357 | elsif($enable eq "enable") { 358 | enable_warn($what, $line, $file, $l); 359 | } 360 | else { 361 | checkwarn("BADCOMMAND", 362 | $line, 0, $file, $l, 363 | "Illegal !checksrc! command"); 364 | } 365 | } 366 | } 367 | 368 | sub nostrings { 369 | my ($str) = @_; 370 | $str =~ s/\".*\"//g; 371 | return $str; 372 | } 373 | 374 | sub scanfile { 375 | my ($file) = @_; 376 | 377 | my $line = 1; 378 | my $prevl=""; 379 | my $prevpl=""; 380 | my $l = ""; 381 | my $prep = 0; 382 | my $prevp = 0; 383 | open(my $R, '<', $file) || die "failed to open $file"; 384 | 385 | my $incomment=0; 386 | my @copyright=(); 387 | my %includes; 388 | checksrc_clear(); # for file based ignores 389 | accept_violations(); 390 | 391 | while(<$R>) { 392 | $windows_os ? $_ =~ s/\r?\n$// : chomp; 393 | my $l = $_; 394 | my $ol = $l; # keep the unmodified line for error reporting 395 | my $column = 0; 396 | 397 | # check for !checksrc! commands 398 | if($l =~ /\!checksrc\! (.*)/) { 399 | my $cmd = $1; 400 | checksrc($cmd, $line, $file, $l) 401 | } 402 | 403 | # check for a copyright statement and save the years 404 | if($l =~ /\* +copyright .* (\d\d\d\d|)/i) { 405 | my $count = 0; 406 | while($l =~ /([\d]{4})/g) { 407 | push @copyright, { 408 | year => $1, 409 | line => $line, 410 | col => index($l, $1), 411 | code => $l 412 | }; 413 | $count++; 414 | } 415 | if(!$count) { 416 | # year-less 417 | push @copyright, { 418 | year => -1, 419 | line => $line, 420 | col => index($l, $1), 421 | code => $l 422 | }; 423 | } 424 | } 425 | 426 | # detect long lines 427 | if(length($l) > $max_column) { 428 | checkwarn("LONGLINE", $line, length($l), $file, $l, 429 | "Longer than $max_column columns"); 430 | } 431 | # detect TAB characters 432 | if($l =~ /^(.*)\t/) { 433 | checkwarn("TABS", 434 | $line, length($1), $file, $l, "Contains TAB character", 1); 435 | } 436 | # detect trailing whitespace 437 | if($l =~ /^(.*)[ \t]+\z/) { 438 | checkwarn("TRAILINGSPACE", 439 | $line, length($1), $file, $l, "Trailing whitespace"); 440 | } 441 | 442 | # no space after comment start 443 | if($l =~ /^(.*)\/\*\w/) { 444 | checkwarn("COMMENTNOSPACESTART", 445 | $line, length($1) + 2, $file, $l, 446 | "Missing space after comment start"); 447 | } 448 | # no space at comment end 449 | if($l =~ /^(.*)\w\*\//) { 450 | checkwarn("COMMENTNOSPACEEND", 451 | $line, length($1) + 1, $file, $l, 452 | "Missing space end comment end"); 453 | } 454 | # ------------------------------------------------------------ 455 | # Above this marker, the checks were done on lines *including* 456 | # comments 457 | # ------------------------------------------------------------ 458 | 459 | # strip off C89 comments 460 | 461 | comment: 462 | if(!$incomment) { 463 | if($l =~ s/\/\*.*\*\// /g) { 464 | # full /* comments */ were removed! 465 | } 466 | if($l =~ s/\/\*.*//) { 467 | # start of /* comment was removed 468 | $incomment = 1; 469 | } 470 | } 471 | else { 472 | if($l =~ s/.*\*\///) { 473 | # end of comment */ was removed 474 | $incomment = 0; 475 | goto comment; 476 | } 477 | else { 478 | # still within a comment 479 | $l=""; 480 | } 481 | } 482 | 483 | # ------------------------------------------------------------ 484 | # Below this marker, the checks were done on lines *without* 485 | # comments 486 | # ------------------------------------------------------------ 487 | 488 | # prev line was a preprocessor **and** ended with a backslash 489 | if($prep && ($prevpl =~ /\\ *\z/)) { 490 | # this is still a preprocessor line 491 | $prep = 1; 492 | goto preproc; 493 | } 494 | $prep = 0; 495 | 496 | # crude attempt to detect // comments without too many false 497 | # positives 498 | if($l =~ /^(([^"\*]*)[^:"]|)\/\//) { 499 | checkwarn("CPPCOMMENTS", 500 | $line, length($1), $file, $l, "\/\/ comment"); 501 | } 502 | 503 | if($l =~ /^(\#\s*include\s+)([\">].*[>}"])/) { 504 | my ($pre, $path) = ($1, $2); 505 | if($includes{$path}) { 506 | checkwarn("INCLUDEDUP", 507 | $line, length($1), $file, $l, "duplicated include"); 508 | } 509 | $includes{$path} = $l; 510 | } 511 | 512 | # detect and strip preprocessor directives 513 | if($l =~ /^[ \t]*\#/) { 514 | # preprocessor line 515 | $prep = 1; 516 | goto preproc; 517 | } 518 | 519 | my $nostr = nostrings($l); 520 | # check spaces after for/if/while/function call 521 | if($nostr =~ /^(.*)(for|if|while|switch| ([a-zA-Z0-9_]+)) \((.)/) { 522 | my ($leading, $word, $extra, $first)=($1,$2,$3,$4); 523 | if($1 =~ / *\#/) { 524 | # this is a #if, treat it differently 525 | } 526 | elsif(defined $3 && $3 eq "return") { 527 | # return must have a space 528 | } 529 | elsif(defined $3 && $3 eq "case") { 530 | # case must have a space 531 | } 532 | elsif(($first eq "*") && ($word !~ /(for|if|while|switch)/)) { 533 | # A "(*" beginning makes the space OK because it wants to 534 | # allow function pointer declared 535 | } 536 | elsif($1 =~ / *typedef/) { 537 | # typedefs can use space-paren 538 | } 539 | else { 540 | checkwarn("SPACEBEFOREPAREN", $line, length($leading)+length($word), $file, $l, 541 | "$word with space"); 542 | } 543 | } 544 | # check for '== NULL' in if/while conditions but not if the thing on 545 | # the left of it is a function call 546 | if($nostr =~ /^(.*)(if|while)(\(.*?)([!=]= NULL|NULL [!=]=)/) { 547 | checkwarn("EQUALSNULL", $line, 548 | length($1) + length($2) + length($3), 549 | $file, $l, "we prefer !variable instead of \"== NULL\" comparisons"); 550 | } 551 | 552 | # check for '!= 0' in if/while conditions but not if the thing on 553 | # the left of it is a function call 554 | if($nostr =~ /^(.*)(if|while)(\(.*[^)]) != 0[^x]/) { 555 | checkwarn("NOTEQUALSZERO", $line, 556 | length($1) + length($2) + length($3), 557 | $file, $l, "we prefer if(rc) instead of \"rc != 0\" comparisons"); 558 | } 559 | 560 | # check spaces in 'do {' 561 | if($nostr =~ /^( *)do( *)\{/ && length($2) != 1) { 562 | checkwarn("DOBRACE", $line, length($1) + 2, $file, $l, "one space after do before brace"); 563 | } 564 | # check spaces in 'do {' 565 | elsif($nostr =~ /^( *)\}( *)while/ && length($2) != 1) { 566 | checkwarn("BRACEWHILE", $line, length($1) + 2, $file, $l, "one space between brace and while"); 567 | } 568 | if($nostr =~ /^((.*\s)(if) *\()(.*)\)(.*)/) { 569 | my $pos = length($1); 570 | my $postparen = $5; 571 | my $cond = $4; 572 | if($cond =~ / = /) { 573 | checkwarn("ASSIGNWITHINCONDITION", 574 | $line, $pos+1, $file, $l, 575 | "assignment within conditional expression"); 576 | } 577 | my $temp = $cond; 578 | $temp =~ s/\(//g; # remove open parens 579 | my $openc = length($cond) - length($temp); 580 | 581 | $temp = $cond; 582 | $temp =~ s/\)//g; # remove close parens 583 | my $closec = length($cond) - length($temp); 584 | my $even = $openc == $closec; 585 | 586 | if($l =~ / *\#/) { 587 | # this is a #if, treat it differently 588 | } 589 | elsif($even && $postparen && 590 | ($postparen !~ /^ *$/) && ($postparen !~ /^ *[,{&|\\]+/)) { 591 | checkwarn("ONELINECONDITION", 592 | $line, length($l)-length($postparen), $file, $l, 593 | "conditional block on the same line"); 594 | } 595 | } 596 | # check spaces after open parentheses 597 | if($l =~ /^(.*[a-z])\( /i) { 598 | checkwarn("SPACEAFTERPAREN", 599 | $line, length($1)+1, $file, $l, 600 | "space after open parenthesis"); 601 | } 602 | 603 | # check spaces before close parentheses, unless it was a space or a 604 | # close parenthesis! 605 | if($l =~ /(.*[^\) ]) \)/) { 606 | checkwarn("SPACEBEFORECLOSE", 607 | $line, length($1)+1, $file, $l, 608 | "space before close parenthesis"); 609 | } 610 | 611 | # check spaces before comma! 612 | if($l =~ /(.*[^ ]) ,/) { 613 | checkwarn("SPACEBEFORECOMMA", 614 | $line, length($1)+1, $file, $l, 615 | "space before comma"); 616 | } 617 | 618 | # check for "return(" without space 619 | if($l =~ /^(.*)return\(/) { 620 | if($1 =~ / *\#/) { 621 | # this is a #if, treat it differently 622 | } 623 | else { 624 | checkwarn("RETURNNOSPACE", $line, length($1)+6, $file, $l, 625 | "return without space before paren"); 626 | } 627 | } 628 | 629 | # check for "sizeof" without parenthesis 630 | if(($l =~ /^(.*)sizeof *([ (])/) && ($2 ne "(")) { 631 | if($1 =~ / *\#/) { 632 | # this is a #if, treat it differently 633 | } 634 | else { 635 | checkwarn("SIZEOFNOPAREN", $line, length($1)+6, $file, $l, 636 | "sizeof without parenthesis"); 637 | } 638 | } 639 | 640 | # check for comma without space 641 | if($l =~ /^(.*),[^ \n]/) { 642 | my $pref=$1; 643 | my $ign=0; 644 | if($pref =~ / *\#/) { 645 | # this is a #if, treat it differently 646 | $ign=1; 647 | } 648 | elsif($pref =~ /\/\*/) { 649 | # this is a comment 650 | $ign=1; 651 | } 652 | elsif($pref =~ /[\"\']/) { 653 | $ign = 1; 654 | # There is a quote here, figure out whether the comma is 655 | # within a string or '' or not. 656 | if($pref =~ /\"/) { 657 | # within a string 658 | } 659 | elsif($pref =~ /\'$/) { 660 | # a single letter 661 | } 662 | else { 663 | $ign = 0; 664 | } 665 | } 666 | if(!$ign) { 667 | checkwarn("COMMANOSPACE", $line, length($pref)+1, $file, $l, 668 | "comma without following space"); 669 | } 670 | } 671 | 672 | # check for "} else" 673 | if($l =~ /^(.*)\} *else/) { 674 | checkwarn("BRACEELSE", 675 | $line, length($1), $file, $l, "else after closing brace on same line"); 676 | } 677 | # check for "){" 678 | if($l =~ /^(.*)\)\{/) { 679 | checkwarn("PARENBRACE", 680 | $line, length($1)+1, $file, $l, "missing space after close paren"); 681 | } 682 | # check for "^{" with an empty line before it 683 | if(($l =~ /^\{/) && ($prevl =~ /^[ \t]*\z/)) { 684 | checkwarn("EMPTYLINEBRACE", 685 | $line, 0, $file, $l, "empty line before open brace"); 686 | } 687 | 688 | # check for space before the semicolon last in a line 689 | if($l =~ /^(.*[^ ].*) ;$/) { 690 | checkwarn("SPACESEMICOLON", 691 | $line, length($1), $file, $ol, "no space before semicolon"); 692 | } 693 | 694 | # check for space before the colon in a switch label 695 | if($l =~ /^( *(case .+|default)) :/) { 696 | checkwarn("SPACESWITCHCOLON", 697 | $line, length($1), $file, $ol, "no space before colon of switch label"); 698 | } 699 | 700 | # scan for use of banned functions 701 | if($l =~ /^(.*\W) 702 | (gmtime|localtime| 703 | gets| 704 | strtok| 705 | v?sprintf| 706 | (str|_mbs|_tcs|_wcs)n?cat| 707 | LoadLibrary(Ex)?(A|W)?) 708 | \s*\( 709 | /x) { 710 | checkwarn("BANNEDFUNC", 711 | $line, length($1), $file, $ol, 712 | "use of $2 is banned"); 713 | } 714 | if($warnings{"STRERROR"}) { 715 | # scan for use of banned strerror. This is not a BANNEDFUNC to 716 | # allow for individual enable/disable of this warning. 717 | if($l =~ /^(.*\W)(strerror)\s*\(/x) { 718 | if($1 !~ /^ *\#/) { 719 | # skip preprocessor lines 720 | checkwarn("STRERROR", 721 | $line, length($1), $file, $ol, 722 | "use of $2 is banned"); 723 | } 724 | } 725 | } 726 | # scan for use of snprintf for curl-internals reasons 727 | if($l =~ /^(.*\W)(v?snprintf)\s*\(/x) { 728 | checkwarn("SNPRINTF", 729 | $line, length($1), $file, $ol, 730 | "use of $2 is banned"); 731 | } 732 | 733 | # scan for use of non-binary fopen without the macro 734 | if($l =~ /^(.*\W)fopen\s*\([^,]*, *\"([^"]*)/) { 735 | my $mode = $2; 736 | if($mode !~ /b/) { 737 | checkwarn("FOPENMODE", 738 | $line, length($1), $file, $ol, 739 | "use of non-binary fopen without FOPEN_* macro: $mode"); 740 | } 741 | } 742 | 743 | # check for open brace first on line but not first column only alert 744 | # if previous line ended with a close paren and it wasn't a cpp line 745 | if(($prevl =~ /\)\z/) && ($l =~ /^( +)\{/) && !$prevp) { 746 | checkwarn("BRACEPOS", 747 | $line, length($1), $file, $ol, "badly placed open brace"); 748 | } 749 | 750 | # if the previous line starts with if/while/for AND ends with an open 751 | # brace, or an else statement, check that this line is indented $indent 752 | # more steps, if not a cpp line 753 | if(!$prevp && ($prevl =~ /^( *)((if|while|for)\(.*\{|else)\z/)) { 754 | my $first = length($1); 755 | # this line has some character besides spaces 756 | if($l =~ /^( *)[^ ]/) { 757 | my $second = length($1); 758 | my $expect = $first+$indent; 759 | if($expect != $second) { 760 | my $diff = $second - $first; 761 | checkwarn("INDENTATION", $line, length($1), $file, $ol, 762 | "not indented $indent steps (uses $diff)"); 763 | 764 | } 765 | } 766 | } 767 | 768 | # if the previous line starts with if/while/for AND ends with a closed 769 | # parenthesis and there's an equal number of open and closed 770 | # parentheses, check that this line is indented $indent more steps, if 771 | # not a cpp line 772 | elsif(!$prevp && ($prevl =~ /^( *)(if|while|for)(\(.*\))\z/)) { 773 | my $first = length($1); 774 | my $op = $3; 775 | my $cl = $3; 776 | 777 | $op =~ s/[^(]//g; 778 | $cl =~ s/[^)]//g; 779 | 780 | if(length($op) == length($cl)) { 781 | # this line has some character besides spaces 782 | if($l =~ /^( *)[^ ]/) { 783 | my $second = length($1); 784 | my $expect = $first+$indent; 785 | if($expect != $second) { 786 | my $diff = $second - $first; 787 | checkwarn("INDENTATION", $line, length($1), $file, $ol, 788 | "not indented $indent steps (uses $diff)"); 789 | } 790 | } 791 | } 792 | } 793 | 794 | # check for 'char * name' 795 | if(($l =~ /(^.*(char|int|long|void|CURL|CURLM|CURLMsg|[cC]url_[A-Za-z_]+|struct [a-zA-Z_]+) *(\*+)) (\w+)/) && ($4 !~ /^(const|volatile)$/)) { 796 | checkwarn("ASTERISKSPACE", 797 | $line, length($1), $file, $ol, 798 | "space after declarative asterisk"); 799 | } 800 | # check for 'char*' 801 | if(($l =~ /(^.*(char|int|long|void|curl_slist|CURL|CURLM|CURLMsg|curl_httppost|sockaddr_in|FILE)\*)/)) { 802 | checkwarn("ASTERISKNOSPACE", 803 | $line, length($1)-1, $file, $ol, 804 | "no space before asterisk"); 805 | } 806 | 807 | # check for 'void func() {', but avoid false positives by requiring 808 | # both an open and closed parentheses before the open brace 809 | if($l =~ /^((\w).*)\{\z/) { 810 | my $k = $1; 811 | $k =~ s/const *//; 812 | $k =~ s/static *//; 813 | if($k =~ /\(.*\)/) { 814 | checkwarn("BRACEPOS", 815 | $line, length($l)-1, $file, $ol, 816 | "wrongly placed open brace"); 817 | } 818 | } 819 | 820 | # check for equals sign without spaces next to it 821 | if($nostr =~ /(.*)\=[a-z0-9]/i) { 822 | checkwarn("EQUALSNOSPACE", 823 | $line, length($1)+1, $file, $ol, 824 | "no space after equals sign"); 825 | } 826 | # check for equals sign without spaces before it 827 | elsif($nostr =~ /(.*)[a-z0-9]\=/i) { 828 | checkwarn("NOSPACEEQUALS", 829 | $line, length($1)+1, $file, $ol, 830 | "no space before equals sign"); 831 | } 832 | 833 | # check for plus signs without spaces next to it 834 | if($nostr =~ /(.*)[^+]\+[a-z0-9]/i) { 835 | checkwarn("PLUSNOSPACE", 836 | $line, length($1)+1, $file, $ol, 837 | "no space after plus sign"); 838 | } 839 | # check for plus sign without spaces before it 840 | elsif($nostr =~ /(.*)[a-z0-9]\+[^+]/i) { 841 | checkwarn("NOSPACEPLUS", 842 | $line, length($1)+1, $file, $ol, 843 | "no space before plus sign"); 844 | } 845 | 846 | # check for semicolons without space next to it 847 | if($nostr =~ /(.*)\;[a-z0-9]/i) { 848 | checkwarn("SEMINOSPACE", 849 | $line, length($1)+1, $file, $ol, 850 | "no space after semicolon"); 851 | } 852 | 853 | # typedef struct ... { 854 | if($nostr =~ /^(.*)typedef struct.*{/) { 855 | checkwarn("TYPEDEFSTRUCT", 856 | $line, length($1)+1, $file, $ol, 857 | "typedef'ed struct"); 858 | } 859 | 860 | if($nostr =~ /(.*)! +(\w|\()/) { 861 | checkwarn("EXCLAMATIONSPACE", 862 | $line, length($1)+1, $file, $ol, 863 | "space after exclamation mark"); 864 | } 865 | 866 | # check for more than one consecutive space before open brace or 867 | # question mark. Skip lines containing strings since they make it hard 868 | # due to artificially getting multiple spaces 869 | if(($l eq $nostr) && 870 | $nostr =~ /^(.*(\S)) + [{?]/i) { 871 | checkwarn("MULTISPACE", 872 | $line, length($1)+1, $file, $ol, 873 | "multiple spaces"); 874 | } 875 | preproc: 876 | $line++; 877 | $prevp = $prep; 878 | $prevl = $ol if(!$prep); 879 | $prevpl = $ol if($prep); 880 | } 881 | 882 | if(!scalar(@copyright)) { 883 | checkwarn("COPYRIGHT", 1, 0, $file, "", "Missing copyright statement", 1); 884 | } 885 | 886 | # COPYRIGHTYEAR is an extended warning so we must first see if it has been 887 | # enabled in .checksrc 888 | if(defined($warnings{"COPYRIGHTYEAR"})) { 889 | # The check for updated copyrightyear is overly complicated in order to 890 | # not punish current hacking for past sins. The copyright years are 891 | # right now a bit behind, so enforcing copyright year checking on all 892 | # files would cause hundreds of errors. Instead we only look at files 893 | # which are tracked in the Git repo and edited in the workdir, or 894 | # committed locally on the branch without being in upstream master. 895 | # 896 | # The simple and naive test is to simply check for the current year, 897 | # but updating the year even without an edit is against project policy 898 | # (and it would fail every file on January 1st). 899 | # 900 | # A rather more interesting, and correct, check would be to not test 901 | # only locally committed files but inspect all files wrt the year of 902 | # their last commit. Removing the `git rev-list origin/master..HEAD` 903 | # condition below will enforce copyright year checks against the year 904 | # the file was last committed (and thus edited to some degree). 905 | my $commityear = undef; 906 | @copyright = sort {$$b{year} cmp $$a{year}} @copyright; 907 | 908 | # if the file is modified, assume commit year this year 909 | if(`git status -s -- $file` =~ /^ [MARCU]/) { 910 | $commityear = (localtime(time))[5] + 1900; 911 | } 912 | else { 913 | # min-parents=1 to ignore wrong initial commit in truncated repos 914 | my $grl = `git rev-list --max-count=1 --min-parents=1 --timestamp HEAD -- $file`; 915 | if($grl) { 916 | chomp $grl; 917 | $commityear = (localtime((split(/ /, $grl))[0]))[5] + 1900; 918 | } 919 | } 920 | 921 | if(defined($commityear) && scalar(@copyright) && 922 | $copyright[0]{year} != $commityear) { 923 | checkwarn("COPYRIGHTYEAR", $copyright[0]{line}, $copyright[0]{col}, 924 | $file, $copyright[0]{code}, 925 | "Copyright year out of date, should be $commityear, " . 926 | "is $copyright[0]{year}", 1); 927 | } 928 | } 929 | 930 | if($incomment) { 931 | checkwarn("OPENCOMMENT", 1, 0, $file, "", "Missing closing comment", 1); 932 | } 933 | 934 | checksrc_endoffile($file); 935 | 936 | close($R); 937 | 938 | } 939 | 940 | 941 | if($errors || $warnings || $verbose) { 942 | printf "checksrc: %d errors and %d warnings\n", $errors, $warnings; 943 | if($suppressed) { 944 | printf "checksrc: %d errors and %d warnings suppressed\n", 945 | $serrors, 946 | $swarnings; 947 | } 948 | exit 5; # return failure 949 | } 950 | -------------------------------------------------------------------------------- /scripts/generate_completions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ########################################################################## 3 | # _ _ 4 | # Project | |_ _ __ _ _ _ __| | 5 | # | __| '__| | | | '__| | 6 | # | |_| | | |_| | | | | 7 | # \__|_| \__,_|_| |_| 8 | # 9 | # Copyright (C) Daniel Stenberg, , et al. 10 | # 11 | # This software is licensed as described in the file COPYING, which 12 | # you should have received as part of this distribution. The terms 13 | # are also available at https://curl.se/docs/copyright.html. 14 | # 15 | # You may opt to use, copy, modify, merge, publish, distribute and/or sell 16 | # copies of the Software, and permit persons to whom the Software is 17 | # furnished to do so, under the terms of the COPYING file. 18 | # 19 | # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 20 | # KIND, either express or implied. 21 | # 22 | # SPDX-License-Identifier: curl 23 | # 24 | ########################################################################## 25 | 26 | 27 | if [ -z "$1" ]; then 28 | echo "expected a trurl.md file to be passed in..." 29 | exit 1 30 | fi 31 | 32 | TRURL_MD_FILE=$1 33 | 34 | 35 | 36 | ALL_FLAGS="$(sed -n \ 37 | -e 's/"//g' \ 38 | -e '/\# URL COMPONENTS/q;p' \ 39 | < "${TRURL_MD_FILE}" \ 40 | | grep "##" \ 41 | | awk '{printf "%s%s%s%s ", $2, $3, $4, $5}')" 42 | 43 | 44 | TRURL_COMPONENT_OPTIONS="" 45 | TRURL_STANDALONE_FLAGS="" 46 | TRURL_RANDOM_OPTIONS="" 47 | TRURL_COMPONENT_LIST="$(sed -n \ 48 | -e 's/"//g' \ 49 | -e '1,/\# URL COMPONENTS/ d' \ 50 | -e '/\# JSON output format/q;p' \ 51 | < "${TRURL_MD_FILE}" \ 52 | | grep "##" \ 53 | | awk '{printf "\"%s\" ", $2}')" 54 | 55 | for flag in $ALL_FLAGS; do 56 | # these are now TRURL_STANDALONE 57 | if echo "$flag" | grep -q "="; then 58 | TRURL_COMPONENT_OPTIONS+="$(echo "$flag" \ 59 | | awk '{split($0, a, ","); for(i in a) {printf "%s ", a[i]}}' \ 60 | | cut -f1 -d '[' \ 61 | | awk '{printf "\"%s\" ", $1}')" 62 | elif echo "$flag" | grep -q "\["; then 63 | TRURL_RANDOM_OPTIONS+="$(echo "$flag" \ 64 | | awk '{split($0, a, ","); for(i in a) {printf "%s ", a[i]}}' \ 65 | | cut -f1 -d '[' \ 66 | | awk '{printf "\"%s\" ", $1}')" 67 | else 68 | TRURL_STANDALONE_FLAGS+="$(echo "$flag" \ 69 | | awk '{split($0, a, ","); for(i in a) {printf "\"%s\" ", a[i]}}')" 70 | fi 71 | done 72 | 73 | function generate_zsh() { 74 | sed -e "s/@TRURL_RANDOM_OPTIONS@/${TRURL_RANDOM_OPTIONS}/g" \ 75 | -e "s/@TRURL_STANDALONE_FLAGS@/${TRURL_STANDALONE_FLAGS}/g" \ 76 | -e "s/@TRURL_COMPONENT_OPTIONS@/${TRURL_COMPONENT_OPTIONS}/g" \ 77 | -e "s/@TRURL_COMPONENT_LIST@/${TRURL_COMPONENT_LIST}/g" \ 78 | ./completions/_trurl.zsh.in > ./completions/_trurl.zsh 79 | } 80 | 81 | generate_zsh "$TRURL_RANDOM_OPTIONS" 82 | -------------------------------------------------------------------------------- /scripts/mkrelease: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ########################################################################## 3 | # _ _ ____ _ 4 | # Project ___| | | | _ \| | 5 | # / __| | | | |_) | | 6 | # | (__| |_| | _ <| |___ 7 | # \___|\___/|_| \_\_____| 8 | # 9 | # Copyright (C) Daniel Stenberg, , et al. 10 | # 11 | # This software is licensed as described in the file COPYING, which 12 | # you should have received as part of this distribution. The terms 13 | # are also available at https://curl.se/docs/copyright.html. 14 | # 15 | # You may opt to use, copy, modify, merge, publish, distribute and/or sell 16 | # copies of the Software, and permit persons to whom the Software is 17 | # furnished to do so, under the terms of the COPYING file. 18 | # 19 | # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 20 | # KIND, either express or implied. 21 | # 22 | # SPDX-License-Identifier: curl 23 | # 24 | ########################################################################## 25 | 26 | set -eu 27 | 28 | export LC_ALL=C 29 | export TZ=UTC 30 | 31 | version="${1:-}" 32 | 33 | if [ -z "$version" ]; then 34 | echo "Specify a version number!" 35 | exit 36 | fi 37 | 38 | rel="trurl-$version" 39 | 40 | mkdir $rel 41 | 42 | # update title in markdown manpage 43 | sed -ie "s/^Source: trurl \([0-9.]*\)/Source: trurl $version/" trurl.md 44 | 45 | # update version number in header file 46 | sed -ie "s/\"[\.0-9]*\"/\"$version\"/" version.h 47 | 48 | # render the manpage into nroff 49 | ./scripts/cd2nroff trurl.md > $rel/trurl.1 50 | 51 | # create a release directory tree 52 | cp -p --parents $(git ls-files | grep -vE '^(.github/|.reuse/|.gitignore|LICENSES/)') $rel 53 | 54 | # create tarball from the tree 55 | targz="$rel.tar.gz" 56 | tar cfz "$targz" "$rel" 57 | 58 | timestamp=${SOURCE_DATE_EPOCH:-$(date +"%s")} 59 | filestamp=$(date -d "@$timestamp" +"%Y%m%d%H%M.%S") 60 | 61 | retar() { 62 | tempdir=$1 63 | rm -rf "$tempdir" 64 | mkdir "$tempdir" 65 | cd "$tempdir" 66 | gzip -dc "../$targz" | tar -xf - 67 | find trurl-* -depth -exec touch -c -t "$filestamp" '{}' + 68 | tar --create --format=ustar --owner=0 --group=0 --numeric-owner --sort=name trurl-* | gzip --best --no-name > out.tar.gz 69 | mv out.tar.gz ../ 70 | cd .. 71 | rm -rf "$tempdir" 72 | } 73 | 74 | # make it reproducible 75 | retar ".tarbuild" 76 | mv out.tar.gz "$targz" 77 | 78 | # remove the temporary directory 79 | rm -rf $rel 80 | 81 | # Set deterministic timestamp 82 | touch -c -t "$filestamp" "$targz" 83 | 84 | echo "Now sign the release:" 85 | echo "gpg -b -a '$targz'" 86 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ########################################################################## 3 | # _ _ ____ _ 4 | # Project ___| | | | _ \| | 5 | # / __| | | | |_) | | 6 | # | (__| |_| | _ <| |___ 7 | # \___|\___/|_| \_\_____| 8 | # 9 | # Copyright (C) Daniel Stenberg, , et al. 10 | # 11 | # This software is licensed as described in the file COPYING, which 12 | # you should have received as part of this distribution. The terms 13 | # are also available at https://curl.se/docs/copyright.html. 14 | # 15 | # You may opt to use, copy, modify, merge, publish, distribute and/or sell 16 | # copies of the Software, and permit persons to whom the Software is 17 | # furnished to do so, under the terms of the COPYING file. 18 | # 19 | # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 20 | # KIND, either express or implied. 21 | # 22 | # SPDX-License-Identifier: curl 23 | # 24 | ########################################################################## 25 | 26 | import sys 27 | from os import getcwd, path 28 | import json 29 | import shlex 30 | from subprocess import PIPE, run, Popen 31 | from dataclasses import dataclass, asdict 32 | from typing import Any, Optional, TextIO 33 | import locale 34 | 35 | PROGNAME = "trurl" 36 | TESTFILE = "tests.json" 37 | VALGRINDTEST = "valgrind" 38 | VALGRINDARGS = ["--error-exitcode=1", "--leak-check=full", "-q"] 39 | 40 | RED = "\033[91m" # used to mark unsuccessful tests 41 | NOCOLOR = "\033[0m" 42 | 43 | EXIT_SUCCESS = 0 44 | EXIT_ERROR = 1 45 | 46 | @dataclass 47 | class CommandOutput: 48 | stdout: Any 49 | returncode: int 50 | stderr: str 51 | 52 | 53 | def testComponent(value, exp): 54 | if isinstance(exp, bool): 55 | result = value == 0 or value not in ("", []) 56 | if exp: 57 | return result 58 | else: 59 | return not result 60 | 61 | return value == exp 62 | 63 | # checks if valgrind is installed 64 | def check_valgrind(): 65 | process = Popen(VALGRINDTEST + " --version", 66 | shell=True, stdout=PIPE, stderr=PIPE, encoding="utf-8") 67 | output, error = process.communicate() 68 | if output.startswith(VALGRINDTEST) and not len(error): 69 | return True 70 | return False 71 | 72 | 73 | def getcharmap(): 74 | process = Popen("locale charmap", shell=True, stdout=PIPE, stderr=PIPE, encoding="utf-8"); 75 | output, error = process.communicate() 76 | return output.strip() 77 | 78 | 79 | class TestCase: 80 | def __init__(self, testIndex, runnerCmd, baseCmd, **testCase): 81 | self.testIndex = testIndex 82 | self.runnerCmd = runnerCmd 83 | self.baseCmd = baseCmd 84 | self.arguments = testCase["input"]["arguments"] 85 | self.expected = testCase["expected"] 86 | self.commandOutput: CommandOutput = None 87 | self.testPassed: bool = False 88 | 89 | def runCommand(self, cmdfilter: Optional[str], runWithValgrind: bool): 90 | # Skip test if none of the arguments contain the keyword 91 | if cmdfilter and all(cmdfilter not in arg for arg in self.arguments): 92 | return False 93 | 94 | cmd = [self.baseCmd] 95 | args = self.arguments 96 | if self.runnerCmd != "": 97 | cmd = [self.runnerCmd] 98 | args = [self.baseCmd] + self.arguments 99 | elif runWithValgrind: 100 | cmd = [VALGRINDTEST] 101 | args = VALGRINDARGS + [self.baseCmd] + self.arguments 102 | 103 | output = run( 104 | cmd + args, 105 | stdout=PIPE, stderr=PIPE, 106 | encoding="utf-8" 107 | ) 108 | 109 | if isinstance(self.expected["stdout"], list): 110 | # if we don't expect string, parse to json 111 | try: 112 | stdout = json.loads(output.stdout) 113 | except json.decoder.JSONDecodeError: 114 | stdout = None 115 | else: 116 | stdout = output.stdout 117 | 118 | # assume stderr is always going to be string 119 | stderr = output.stderr 120 | 121 | # runners (e.g. wine) spill their own output into stderr, 122 | # ignore stderr tests when using a runner. 123 | if self.runnerCmd != "" and "stderr" in self.expected: 124 | stderr = self.expected["stderr"] 125 | 126 | self.commandOutput = CommandOutput(stdout, output.returncode, stderr) 127 | return True 128 | 129 | def test(self): 130 | # return true only if stdout, stderr and errorcode 131 | # are equal to the ones found in the testfile 132 | self.testPassed = all( 133 | testComponent(asdict(self.commandOutput)[k], exp) 134 | for k, exp in self.expected.items()) 135 | return self.testPassed 136 | 137 | def _printVerbose(self, output: TextIO): 138 | self._printConcise(output) 139 | 140 | for component, exp in self.expected.items(): 141 | value = asdict(self.commandOutput)[component] 142 | itemFail = self.commandOutput.returncode == 1 or \ 143 | not testComponent(value, exp) 144 | 145 | print(f"--- {component} --- ", file=output) 146 | print("expected:", file=output) 147 | print("nothing" if exp is False else 148 | "something" if exp is True else 149 | f"{exp!r}",file=output) 150 | print("got:", file=output) 151 | 152 | header = RED if itemFail else "" 153 | footer = NOCOLOR if itemFail else "" 154 | print(f"{header}{value!r}{footer}", file=output) 155 | 156 | print() 157 | 158 | def _printConcise(self, output: TextIO): 159 | if self.testPassed: 160 | header = "" 161 | result = "passed" 162 | footer = "" 163 | else: 164 | header = RED 165 | result = "failed" 166 | footer = NOCOLOR 167 | text = f"{self.testIndex}: {result}\t{shlex.join(self.arguments)}" 168 | print(f"{header}{text}{footer}", file=output) 169 | 170 | 171 | def printDetail(self, verbose: bool = False, failed: bool = False): 172 | output: TextIO = sys.stderr if failed else sys.stdout 173 | if verbose: 174 | self._printVerbose(output) 175 | else: 176 | self._printConcise(output) 177 | 178 | 179 | def main(argc, argv): 180 | ret = EXIT_SUCCESS 181 | baseDir = path.dirname(path.realpath(argv[0])) 182 | locale.setlocale(locale.LC_ALL, "") 183 | # python on windows does not always seem to find the 184 | # executable if it is in a different output directory than 185 | # the python script, even if it is in the current working 186 | # directory, using absolute paths to the executable and json 187 | # file makes it reliably find the executable 188 | baseCmd = path.join(getcwd(), PROGNAME) 189 | # the .exe on the end is necessary when using absolute paths 190 | if sys.platform == "win32" or sys.platform == "cygwin": 191 | baseCmd += ".exe" 192 | 193 | with open(path.join(baseDir, TESTFILE), "r", encoding="utf-8") as file: 194 | allTests = json.load(file) 195 | testIndexesToRun = [] 196 | 197 | # if argv[1] exists and starts with int 198 | cmdfilter = "" 199 | testIndexesToRun = list(range(len(allTests))) 200 | runWithValgrind = False 201 | verboseDetail = False 202 | runnerCmd = "" 203 | 204 | if argc > 1: 205 | for arg in argv[1:]: 206 | if arg[0].isnumeric(): 207 | # run only test cases separated by "," 208 | testIndexesToRun = [] 209 | 210 | for caseIndex in arg.split(","): 211 | testIndexesToRun.append(int(caseIndex)) 212 | elif arg == "--with-valgrind": 213 | runWithValgrind = True 214 | elif arg == "--verbose": 215 | verboseDetail = True 216 | elif arg.startswith("--trurl="): 217 | baseCmd = arg[len("--trurl="):] 218 | elif arg.startswith("--runner="): 219 | runnerCmd = arg[len("--runner="):] 220 | else: 221 | cmdfilter = argv[1] 222 | 223 | if runWithValgrind and not check_valgrind(): 224 | print(f'Error: {VALGRINDTEST} is not installed!', file=sys.stderr) 225 | return EXIT_ERROR 226 | 227 | # check if the trurl executable exists 228 | if path.isfile(baseCmd): 229 | # get the version info for the feature list 230 | args = ["--version"] 231 | if runnerCmd != "": 232 | cmd = [runnerCmd] 233 | args = [baseCmd] + args 234 | else: 235 | cmd = [baseCmd] 236 | output = run( 237 | cmd + args, 238 | stdout=PIPE, stderr=PIPE, 239 | encoding="utf-8" 240 | ) 241 | features = output.stdout.split('\n')[1].split()[1:] 242 | 243 | numTestsFailed = 0 244 | numTestsPassed = 0 245 | numTestsSkipped = 0 246 | for testIndex in testIndexesToRun: 247 | # skip tests if required features are not met 248 | required = allTests[testIndex].get("required", None) 249 | if required and not set(required).issubset(set(features)): 250 | print(f"Missing feature, skipping test {testIndex + 1}.") 251 | numTestsSkipped += 1 252 | continue 253 | encoding = allTests[testIndex].get("encoding", None) 254 | if encoding and encoding != getcharmap(): 255 | print(f"Invalid locale, skipping test {testIndex + 1}.") 256 | numTestsSkipped += 1 257 | continue; 258 | 259 | test = TestCase(testIndex + 1, runnerCmd, baseCmd, **allTests[testIndex]) 260 | 261 | if test.runCommand(cmdfilter, runWithValgrind): 262 | if test.test(): # passed 263 | test.printDetail(verbose=verboseDetail) 264 | numTestsPassed += 1 265 | 266 | else: 267 | test.printDetail(verbose=True, failed=True) 268 | numTestsFailed += 1 269 | 270 | # finally print the results to terminal 271 | print("Finished:") 272 | result = ", ".join([ 273 | f"Failed: {numTestsFailed}", 274 | f"Passed: {numTestsPassed}", 275 | f"Skipped: {numTestsSkipped}", 276 | f"Total: {len(testIndexesToRun)}" 277 | ]) 278 | if (numTestsFailed == 0): 279 | print("Passed! - ", result) 280 | else: 281 | ret = f"Failed! - {result}" 282 | else: 283 | ret = f" error: File \"{baseCmd}\" not found!" 284 | return ret 285 | 286 | 287 | if __name__ == "__main__": 288 | sys.exit(main(len(sys.argv), sys.argv)) 289 | -------------------------------------------------------------------------------- /testfiles/test0000.txt: -------------------------------------------------------------------------------- 1 | https://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/ 2 | http://example.org 3 | -------------------------------------------------------------------------------- /testfiles/test0001.txt: -------------------------------------------------------------------------------- 1 | https://curl.se/ 2 | 3 | https://docs.python.org/ 4 | 5 | git://github.com/curl/curl.git 6 | 7 | http://example.org 8 | 9 | 10 | 11 | xyz://hello/?hi -------------------------------------------------------------------------------- /testfiles/test0002.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/curl/trurl/a4d8ba2aaa237e3db90d350b97209838ee240aa5/testfiles/test0002.txt -------------------------------------------------------------------------------- /trurl.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | * _ _ 3 | * Project | |_ _ __ _ _ _ __| | 4 | * | __| '__| | | | '__| | 5 | * | |_| | | |_| | | | | 6 | * \__|_| \__,_|_| |_| 7 | * 8 | * Copyright (C) Daniel Stenberg, , et al. 9 | * 10 | * This software is licensed as described in the file COPYING, which 11 | * you should have received as part of this distribution. The terms 12 | * are also available at https://curl.se/docs/copyright.html. 13 | * 14 | * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 | * copies of the Software, and permit persons to whom the Software is 16 | * furnished to do so, under the terms of the COPYING file. 17 | * 18 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 | * KIND, either express or implied. 20 | * 21 | * SPDX-License-Identifier: curl 22 | * 23 | ***************************************************************************/ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #if defined(_MSC_VER) && (_MSC_VER < 1800) 34 | typedef enum { 35 | bool_false = 0, 36 | bool_true = 1 37 | } bool; 38 | #define false bool_false 39 | #define true bool_true 40 | #else 41 | #include 42 | #endif 43 | 44 | #include /* for setlocale() */ 45 | 46 | #include "version.h" 47 | 48 | #ifdef _MSC_VER 49 | #define strdup _strdup 50 | #endif 51 | 52 | #if CURL_AT_LEAST_VERSION(7,77,0) 53 | #define SUPPORTS_NORM_IPV4 54 | #endif 55 | #if CURL_AT_LEAST_VERSION(7,81,0) 56 | #define SUPPORTS_ZONEID 57 | #endif 58 | #if CURL_AT_LEAST_VERSION(7,80,0) 59 | #define SUPPORTS_URL_STRERROR 60 | #endif 61 | #if CURL_AT_LEAST_VERSION(7,78,0) 62 | #define SUPPORTS_ALLOW_SPACE 63 | #else 64 | #define CURLU_ALLOW_SPACE 0 65 | #endif 66 | #if CURL_AT_LEAST_VERSION(7,88,0) 67 | #define SUPPORTS_PUNYCODE 68 | #endif 69 | #if CURL_AT_LEAST_VERSION(8,3,0) 70 | #define SUPPORTS_PUNY2IDN 71 | #endif 72 | #if CURL_AT_LEAST_VERSION(7,30,0) 73 | #define SUPPORTS_IMAP_OPTIONS 74 | #endif 75 | #if CURL_AT_LEAST_VERSION(8,9,0) 76 | #define SUPPORTS_NO_GUESS_SCHEME 77 | #else 78 | #define CURLU_NO_GUESS_SCHEME 0 79 | #endif 80 | #if CURL_AT_LEAST_VERSION(8,8,0) 81 | #define SUPPORTS_GET_EMPTY 82 | #else 83 | #define CURLU_GET_EMPTY 0 84 | #endif 85 | 86 | #define OUTPUT_URL 0 /* default */ 87 | #define OUTPUT_SCHEME 1 88 | #define OUTPUT_USER 2 89 | #define OUTPUT_PASSWORD 3 90 | #define OUTPUT_OPTIONS 4 91 | #define OUTPUT_HOST 5 92 | #define OUTPUT_PORT 6 93 | #define OUTPUT_PATH 7 94 | #define OUTPUT_QUERY 8 95 | #define OUTPUT_FRAGMENT 9 96 | #define OUTPUT_ZONEID 10 97 | 98 | #define NUM_COMPONENTS 10 /* excluding "url" */ 99 | 100 | #define PROGNAME "trurl" 101 | 102 | #define REPLACE_NULL_BYTE '.' /* for query:key extractions */ 103 | 104 | enum { 105 | VARMODIFIER_URLENCODED = 1 << 1, 106 | VARMODIFIER_DEFAULT = 1 << 2, 107 | VARMODIFIER_PUNY = 1 << 3, 108 | VARMODIFIER_PUNY2IDN = 1 << 4, 109 | VARMODIFIER_EMPTY = 1 << 8, 110 | }; 111 | 112 | struct var { 113 | const char *name; 114 | CURLUPart part; 115 | }; 116 | 117 | struct string { 118 | char *str; 119 | size_t len; 120 | }; 121 | 122 | static const struct var variables[] = { 123 | {"scheme", CURLUPART_SCHEME}, 124 | {"user", CURLUPART_USER}, 125 | {"password", CURLUPART_PASSWORD}, 126 | {"options", CURLUPART_OPTIONS}, 127 | {"host", CURLUPART_HOST}, 128 | {"port", CURLUPART_PORT}, 129 | {"path", CURLUPART_PATH}, 130 | {"query", CURLUPART_QUERY}, 131 | {"fragment", CURLUPART_FRAGMENT}, 132 | {"zoneid", CURLUPART_ZONEID}, 133 | {NULL, 0} 134 | }; 135 | 136 | #define ERROR_PREFIX PROGNAME " error: " 137 | #define WARN_PREFIX PROGNAME " note: " 138 | 139 | /* error codes */ 140 | #define ERROR_FILE 1 141 | #define ERROR_APPEND 2 /* --append mistake */ 142 | #define ERROR_ARG 3 /* a command line option misses its argument */ 143 | #define ERROR_FLAG 4 /* a command line flag mistake */ 144 | #define ERROR_SET 5 /* a --set problem */ 145 | #define ERROR_MEM 6 /* out of memory */ 146 | #define ERROR_URL 7 /* could not get a URL out of the set components */ 147 | #define ERROR_TRIM 8 /* a --qtrim problem */ 148 | #define ERROR_BADURL 9 /* if --verify is set and the URL cannot parse */ 149 | #define ERROR_GET 10 /* bad --get syntax */ 150 | #define ERROR_ITER 11 /* bad --iterate syntax */ 151 | #define ERROR_REPL 12 /* a --replace problem */ 152 | 153 | #ifndef SUPPORTS_URL_STRERROR 154 | /* provide a fake local mockup */ 155 | static char *curl_url_strerror(CURLUcode error) 156 | { 157 | static char buffer[128]; 158 | curl_msnprintf(buffer, sizeof(buffer), "URL error %u", (int)error); 159 | return buffer; 160 | } 161 | #endif 162 | 163 | /* Mapping table to go from lowercase to uppercase for plain ASCII.*/ 164 | static const unsigned char touppermap[256] = { 165 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 166 | 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 167 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 168 | 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 169 | 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 65, 170 | 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 171 | 85, 86, 87, 88, 89, 90, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 172 | 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 173 | 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 174 | 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 175 | 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 176 | 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 177 | 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 178 | 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 179 | 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 180 | }; 181 | 182 | /* Portable, ASCII-consistent toupper. Do not use toupper() because its 183 | behavior is altered by the current locale. */ 184 | #define raw_toupper(in) touppermap[(unsigned int)in] 185 | 186 | /* 187 | * casecompare() does ASCII based case insensitive checks, as a strncasecmp 188 | * replacement. 189 | */ 190 | 191 | static int casecompare(const char *first, const char *second, size_t max) 192 | { 193 | while(*first && *second && max) { 194 | int diff = raw_toupper(*first) - raw_toupper(*second); 195 | if(diff) 196 | /* get out of the loop as soon as they don't match */ 197 | return diff; 198 | max--; 199 | first++; 200 | second++; 201 | } 202 | if(!max) 203 | return 0; /* identical to this point */ 204 | 205 | return raw_toupper(*first) - raw_toupper(*second); 206 | } 207 | 208 | static void message_low(const char *prefix, const char *suffix, 209 | const char *fmt, va_list ap) 210 | { 211 | fputs(prefix, stderr); 212 | vfprintf(stderr, fmt, ap); 213 | fputs(suffix, stderr); 214 | } 215 | 216 | static void warnf_low(const char *fmt, va_list ap) 217 | { 218 | message_low(WARN_PREFIX, "\n", fmt, ap); 219 | } 220 | 221 | static void warnf(const char *fmt, ...) 222 | { 223 | va_list ap; 224 | va_start(ap, fmt); 225 | warnf_low(fmt, ap); 226 | va_end(ap); 227 | } 228 | 229 | static void help(void) 230 | { 231 | int i; 232 | fputs( 233 | "Usage: " PROGNAME " [options] [URL]\n" 234 | " -a, --append [component]=[data] - append data to component\n" 235 | " --accept-space - give in to this URL abuse\n" 236 | " --as-idn - encode hostnames in idn\n" 237 | " --curl - only schemes supported by libcurl\n" 238 | " --default-port - add known default ports\n" 239 | " -f, --url-file [file/-] - read URLs from file or stdin\n" 240 | " -g, --get [{component}s] - output component(s)\n" 241 | " -h, --help - this help\n" 242 | " --iterate [component]=[list] - create multiple URL outputs\n" 243 | " --json - output URL as JSON\n" 244 | " --keep-port - keep known default ports\n" 245 | " --no-guess-scheme - require scheme in URLs\n" 246 | " --punycode - encode hostnames in punycode\n" 247 | " --qtrim [what] - trim the query\n" 248 | " --query-separator [letter] - if something else than '&'\n" 249 | " --quiet - Suppress (some) notes and comments\n" 250 | " --redirect [URL] - redirect to this\n" 251 | " --replace [data] - replaces a query [data]\n" 252 | " --replace-append [data] - appends a new query if not found\n" 253 | " -s, --set [component]=[data] - set component content\n" 254 | " --sort-query - alpha-sort the query pairs\n" 255 | " --url [URL] - URL to work with\n" 256 | " --urlencode - show components URL encoded\n" 257 | " -v, --version - show version\n" 258 | " --verify - return error on (first) bad URL\n" 259 | " URL COMPONENTS:\n" 260 | " ", stdout); 261 | fputs("url, ", stdout); 262 | for(i = 0; i< NUM_COMPONENTS ; i++) { 263 | printf("%s%s", i?", ":"", variables[i].name); 264 | } 265 | fputs("\n", stdout); 266 | exit(0); 267 | } 268 | 269 | static void show_version(void) 270 | { 271 | curl_version_info_data *data = curl_version_info(CURLVERSION_NOW); 272 | /* puny code isn't guaranteed based on the version, so it must be polled 273 | * from libcurl */ 274 | #if defined(SUPPORTS_PUNYCODE) || defined(SUPPORTS_PUNY2IDN) 275 | bool supports_puny = (data->features & CURL_VERSION_IDN) != 0; 276 | #endif 277 | #if defined(SUPPORTS_IMAP_OPTIONS) 278 | bool supports_imap = false; 279 | const char *const *protocol_name = data->protocols; 280 | while(*protocol_name && !supports_imap) { 281 | supports_imap = !strncmp(*protocol_name, "imap", 3); 282 | protocol_name++; 283 | } 284 | #endif 285 | 286 | fprintf(stdout, "%s version %s libcurl/%s [built-with %s]\n", 287 | PROGNAME, TRURL_VERSION_TXT, data->version, LIBCURL_VERSION); 288 | fprintf(stdout, "features:"); 289 | #ifdef SUPPORTS_GET_EMPTY 290 | fprintf(stdout, " get-empty"); 291 | #endif 292 | #ifdef SUPPORTS_IMAP_OPTIONS 293 | if(supports_imap) 294 | fprintf(stdout, " imap-options"); 295 | #endif 296 | #ifdef SUPPORTS_NO_GUESS_SCHEME 297 | fprintf(stdout, " no-guess-scheme"); 298 | #endif 299 | #ifdef SUPPORTS_NORM_IPV4 300 | fprintf(stdout, " normalize-ipv4"); 301 | #endif 302 | #ifdef SUPPORTS_PUNYCODE 303 | if(supports_puny) 304 | fprintf(stdout, " punycode"); 305 | #endif 306 | #ifdef SUPPORTS_PUNY2IDN 307 | if(supports_puny) 308 | fprintf(stdout, " punycode2idn"); 309 | #endif 310 | #ifdef SUPPORTS_URL_STRERROR 311 | fprintf(stdout, " url-strerror"); 312 | #endif 313 | #ifdef SUPPORTS_ALLOW_SPACE 314 | fprintf(stdout, " white-space"); 315 | #endif 316 | #ifdef SUPPORTS_ZONEID 317 | fprintf(stdout, " zone-id"); 318 | #endif 319 | 320 | fprintf(stdout, "\n"); 321 | exit(0); 322 | } 323 | 324 | struct iterinfo { 325 | CURLU *uh; 326 | const char *part; 327 | size_t plen; 328 | char *ptr; 329 | unsigned int varmask; /* sets 1 << [component] */ 330 | }; 331 | 332 | struct option { 333 | struct curl_slist *url_list; 334 | struct curl_slist *append_path; 335 | struct curl_slist *append_query; 336 | struct curl_slist *set_list; 337 | struct curl_slist *trim_list; 338 | struct curl_slist *iter_list; 339 | struct curl_slist *replace_list; 340 | const char *redirect; 341 | const char *qsep; 342 | const char *format; 343 | FILE *url; 344 | bool urlopen; 345 | bool jsonout; 346 | bool verify; 347 | bool accept_space; 348 | bool curl; 349 | bool default_port; 350 | bool keep_port; 351 | bool punycode; 352 | bool puny2idn; 353 | bool sort_query; 354 | bool no_guess_scheme; 355 | bool urlencode; 356 | bool end_of_options; 357 | bool quiet_warnings; 358 | bool force_replace; 359 | 360 | /* -- stats -- */ 361 | unsigned int urls; 362 | }; 363 | 364 | static void trurl_warnf(struct option *o, const char *fmt, ...) 365 | { 366 | if(!o->quiet_warnings) { 367 | va_list ap; 368 | va_start(ap, fmt); 369 | fputs(WARN_PREFIX, stderr); 370 | vfprintf(stderr, fmt, ap); 371 | fputs("\n", stderr); 372 | va_end(ap); 373 | } 374 | } 375 | 376 | #define MAX_QPAIRS 1000 377 | struct string qpairs[MAX_QPAIRS]; /* encoded */ 378 | struct string qpairsdec[MAX_QPAIRS]; /* decoded */ 379 | int nqpairs; /* how many is stored */ 380 | 381 | static void trurl_cleanup_options(struct option *o) 382 | { 383 | if(!o) 384 | return; 385 | curl_slist_free_all(o->url_list); 386 | curl_slist_free_all(o->set_list); 387 | curl_slist_free_all(o->iter_list); 388 | curl_slist_free_all(o->append_query); 389 | curl_slist_free_all(o->trim_list); 390 | curl_slist_free_all(o->replace_list); 391 | curl_slist_free_all(o->append_path); 392 | } 393 | 394 | static void errorf_low(const char *fmt, va_list ap) 395 | { 396 | message_low(ERROR_PREFIX, "\n" 397 | ERROR_PREFIX "Try " PROGNAME " -h for help\n", fmt, ap); 398 | } 399 | 400 | static void errorf(struct option *o, int exit_code, const char *fmt, ...) 401 | { 402 | va_list ap; 403 | va_start(ap, fmt); 404 | errorf_low(fmt, ap); 405 | va_end(ap); 406 | trurl_cleanup_options(o); 407 | curl_global_cleanup(); 408 | exit(exit_code); 409 | } 410 | 411 | static char *xstrdup(struct option *o, const char *ptr) 412 | { 413 | char *temp = strdup(ptr); 414 | if(!temp) 415 | errorf(o, ERROR_MEM, "out of memory"); 416 | return temp; 417 | } 418 | 419 | static void verify(struct option *o, int exit_code, const char *fmt, ...) 420 | { 421 | va_list ap; 422 | va_start(ap, fmt); 423 | if(!o->verify) { 424 | warnf_low(fmt, ap); 425 | va_end(ap); 426 | } 427 | else { 428 | /* make sure to terminate the JSON array */ 429 | if(o->jsonout) 430 | printf("%s]\n", o->urls ? "\n" : ""); 431 | errorf_low(fmt, ap); 432 | va_end(ap); 433 | trurl_cleanup_options(o); 434 | curl_global_cleanup(); 435 | exit(exit_code); 436 | } 437 | } 438 | 439 | static char *strurldecode(const char *url, int inlength, int *outlength) 440 | { 441 | return curl_easy_unescape(NULL, inlength ? url : "", inlength, 442 | outlength); 443 | } 444 | 445 | static void urladd(struct option *o, const char *url) 446 | { 447 | struct curl_slist *n; 448 | n = curl_slist_append(o->url_list, url); 449 | if(n) 450 | o->url_list = n; 451 | } 452 | 453 | 454 | /* read URLs from this file/stdin */ 455 | static void urlfile(struct option *o, const char *file) 456 | { 457 | FILE *f; 458 | if(o->url) 459 | errorf(o, ERROR_FLAG, "only one --url-file is supported"); 460 | if(strcmp("-", file)) { 461 | f = fopen(file, "rt"); 462 | if(!f) 463 | errorf(o, ERROR_FILE, "--url-file %s not found", file); 464 | o->urlopen = true; 465 | } 466 | else 467 | f = stdin; 468 | o->url = f; 469 | } 470 | 471 | static void pathadd(struct option *o, const char *path) 472 | { 473 | struct curl_slist *n; 474 | char *urle = curl_easy_escape(NULL, path, 0); 475 | if(urle) { 476 | n = curl_slist_append(o->append_path, urle); 477 | if(n) { 478 | o->append_path = n; 479 | } 480 | curl_free(urle); 481 | } 482 | } 483 | 484 | static char *encodeassign(const char *query) 485 | { 486 | char *p = strchr(query, '='); 487 | char *urle; 488 | if(p) { 489 | /* URL encode the left and the right side of the '=' separately */ 490 | char *f1 = curl_easy_escape(NULL, query, (int)(p - query)); 491 | char *f2 = curl_easy_escape(NULL, p + 1, 0); 492 | urle = curl_maprintf("%s=%s", f1, f2); 493 | curl_free(f1); 494 | curl_free(f2); 495 | } 496 | else 497 | urle = curl_easy_escape(NULL, query, 0); 498 | return urle; 499 | } 500 | 501 | static void queryadd(struct option *o, const char *query) 502 | { 503 | char *urle = encodeassign(query); 504 | if(urle) { 505 | struct curl_slist *n = curl_slist_append(o->append_query, urle); 506 | if(n) 507 | o->append_query = n; 508 | curl_free(urle); 509 | } 510 | } 511 | 512 | static void appendadd(struct option *o, 513 | const char *arg) 514 | { 515 | if(!strncmp("path=", arg, 5)) 516 | pathadd(o, arg + 5); 517 | else if(!strncmp("query=", arg, 6)) 518 | queryadd(o, arg + 6); 519 | else 520 | errorf(o, ERROR_APPEND, "--append unsupported component: %s", arg); 521 | } 522 | 523 | static void setadd(struct option *o, 524 | const char *set) /* [component]=[data] */ 525 | { 526 | struct curl_slist *n; 527 | n = curl_slist_append(o->set_list, set); 528 | if(n) 529 | o->set_list = n; 530 | } 531 | 532 | static void iteradd(struct option *o, 533 | const char *iter) /* [component]=[data] */ 534 | { 535 | struct curl_slist *n; 536 | n = curl_slist_append(o->iter_list, iter); 537 | if(n) 538 | o->iter_list = n; 539 | } 540 | 541 | static void trimadd(struct option *o, 542 | const char *trim) /* [component]=[data] */ 543 | { 544 | struct curl_slist *n; 545 | n = curl_slist_append(o->trim_list, trim); 546 | if(n) 547 | o->trim_list = n; 548 | } 549 | 550 | static void replaceadd(struct option *o, 551 | const char *replace_list) /* [component]=[data] */ 552 | { 553 | if(replace_list) { 554 | char *urle = encodeassign(replace_list); 555 | if(urle) { 556 | struct curl_slist *n = curl_slist_append(o->replace_list, urle); 557 | if(n) 558 | o->replace_list = n; 559 | curl_free(urle); 560 | } 561 | } 562 | else 563 | errorf(o, ERROR_REPL, "No data passed to replace component"); 564 | } 565 | 566 | static bool longarg(const char *flag, const char *check) 567 | { 568 | /* the given flag might end with an equals sign */ 569 | size_t len = strlen(flag); 570 | return (!strcmp(flag, check) || 571 | (!strncmp(flag, check, len) && check[len] == '=')); 572 | } 573 | 574 | static bool checkoptarg(struct option *o, const char *flag, 575 | const char *given, 576 | const char *arg) 577 | { 578 | bool shortopt = false; 579 | if((flag[0] == '-') && (flag[1] != '-')) 580 | shortopt = true; 581 | if((!shortopt && longarg(flag, given)) || 582 | (!strncmp(flag, given, 2) && shortopt)) { 583 | if(!arg) 584 | errorf(o, ERROR_ARG, "Missing argument for %s", flag); 585 | return true; 586 | } 587 | return false; 588 | } 589 | 590 | static int getarg(struct option *o, 591 | const char *flag, 592 | const char *arg, 593 | bool *usedarg) 594 | { 595 | bool gap = true; 596 | *usedarg = false; 597 | 598 | if((flag[0] == '-') && (flag[1] != '-') && flag[2]) { 599 | arg = (char *)&flag[2]; 600 | gap = false; 601 | } 602 | else if((flag[0] == '-') && (flag[1] == '-')) { 603 | char *equals = strchr(&flag[2], '='); 604 | if(equals) { 605 | arg = (char *)&equals[1]; 606 | gap = false; 607 | } 608 | } 609 | 610 | if(!strcmp("--", flag)) 611 | o->end_of_options = true; 612 | else if(!strcmp("-v", flag) || !strcmp("--version", flag)) 613 | show_version(); 614 | else if(!strcmp("-h", flag) || !strcmp("--help", flag)) 615 | help(); 616 | else if(checkoptarg(o, "--url", flag, arg)) { 617 | urladd(o, arg); 618 | *usedarg = gap; 619 | } 620 | else if(checkoptarg(o, "-f", flag, arg) || 621 | checkoptarg(o, "--url-file", flag, arg)) { 622 | urlfile(o, arg); 623 | *usedarg = gap; 624 | } 625 | else if(checkoptarg(o, "-a", flag, arg) || 626 | checkoptarg(o, "--append", flag, arg)) { 627 | appendadd(o, arg); 628 | *usedarg = gap; 629 | } 630 | else if(checkoptarg(o, "-s", flag, arg) || 631 | checkoptarg(o, "--set", flag, arg)) { 632 | setadd(o, arg); 633 | *usedarg = gap; 634 | } 635 | else if(checkoptarg(o, "--iterate", flag, arg)) { 636 | iteradd(o, arg); 637 | *usedarg = gap; 638 | } 639 | else if(checkoptarg(o, "--redirect", flag, arg)) { 640 | if(o->redirect) 641 | errorf(o, ERROR_FLAG, "only one --redirect is supported"); 642 | o->redirect = arg; 643 | *usedarg = gap; 644 | } 645 | else if(checkoptarg(o, "--query-separator", flag, arg)) { 646 | if(o->qsep) 647 | errorf(o, ERROR_FLAG, "only one --query-separator is supported"); 648 | if(strlen(arg) != 1) 649 | errorf(o, ERROR_FLAG, 650 | "only single-letter query separators are supported"); 651 | o->qsep = arg; 652 | *usedarg = gap; 653 | } 654 | else if(checkoptarg(o, "--trim", flag, arg)) { 655 | if(strncmp(arg, "query=", 6)) 656 | errorf(o, ERROR_TRIM, "Unsupported trim component: %s", arg); 657 | 658 | trimadd(o, &arg[6]); 659 | *usedarg = gap; 660 | } 661 | else if(checkoptarg(o, "--qtrim", flag, arg)) { 662 | trimadd(o, arg); 663 | *usedarg = gap; 664 | } 665 | else if(checkoptarg(o, "-g", flag, arg) || 666 | checkoptarg(o, "--get", flag, arg)) { 667 | if(o->format) 668 | errorf(o, ERROR_FLAG, "only one --get is supported"); 669 | if(o->jsonout) 670 | errorf(o, ERROR_FLAG, 671 | "--get is mutually exclusive with --json"); 672 | o->format = arg; 673 | *usedarg = gap; 674 | } 675 | else if(!strcmp("--json", flag)) { 676 | if(o->format) 677 | errorf(o, ERROR_FLAG, "--json is mutually exclusive with --get"); 678 | o->jsonout = true; 679 | } 680 | else if(!strcmp("--verify", flag)) 681 | o->verify = true; 682 | else if(!strcmp("--accept-space", flag)) { 683 | #ifdef SUPPORTS_ALLOW_SPACE 684 | o->accept_space = true; 685 | #else 686 | trurl_warnf(o, 687 | "built with too old libcurl version, --accept-space does not work"); 688 | #endif 689 | } 690 | else if(!strcmp("--curl", flag)) 691 | o->curl = true; 692 | else if(!strcmp("--default-port", flag)) 693 | o->default_port = true; 694 | else if(!strcmp("--keep-port", flag)) 695 | o->keep_port = true; 696 | else if(!strcmp("--punycode", flag)) { 697 | if(o->puny2idn) 698 | errorf(o, ERROR_FLAG, "--punycode is mutually exclusive with --as-idn"); 699 | o->punycode = true; 700 | } 701 | else if(!strcmp("--as-idn", flag)) { 702 | if(o->punycode) 703 | errorf(o, ERROR_FLAG, "--as-idn is mutually exclusive with --punycode"); 704 | o->puny2idn = true; 705 | } 706 | else if(!strcmp("--no-guess-scheme", flag)) 707 | o->no_guess_scheme = true; 708 | else if(!strcmp("--sort-query", flag)) 709 | o->sort_query = true; 710 | else if(!strcmp("--urlencode", flag)) 711 | o->urlencode = true; 712 | else if(!strcmp("--quiet", flag)) 713 | o->quiet_warnings = true; 714 | else if(!strcmp("--replace", flag)) { 715 | replaceadd(o, arg); 716 | *usedarg = gap; 717 | } 718 | else if(!strcmp("--replace-append", flag) || 719 | !strcmp("--force-replace", flag)) { /* the initial name */ 720 | replaceadd(o, arg); 721 | o->force_replace = true; 722 | *usedarg = gap; 723 | } 724 | else 725 | return 1; /* unrecognized option */ 726 | return 0; 727 | } 728 | 729 | static void showqkey(FILE *stream, const char *key, size_t klen, 730 | bool urldecode, bool showall) 731 | { 732 | int i; 733 | bool shown = false; 734 | struct string *qp = urldecode ? qpairsdec : qpairs; 735 | 736 | for(i = 0; i< nqpairs; i++) { 737 | if(!strncmp(key, qp[i].str, klen) && (qp[i].str[klen] == '=')) { 738 | if(shown) 739 | fputc(' ', stream); 740 | fprintf(stream, "%.*s", (int) (qp[i].len - klen - 1), 741 | &qp[i].str[klen + 1]); 742 | if(!showall) 743 | break; 744 | shown = true; 745 | } 746 | } 747 | } 748 | 749 | /* component to variable pointer */ 750 | static const struct var *comp2var(const char *name, size_t vlen) 751 | { 752 | int i; 753 | for(i = 0; variables[i].name; i++) 754 | if((strlen(variables[i].name) == vlen) && 755 | !strncmp(name, variables[i].name, vlen)) 756 | return &variables[i]; 757 | return NULL; 758 | } 759 | 760 | static CURLUcode geturlpart(struct option *o, int modifiers, CURLU *uh, 761 | CURLUPart part, char **out) 762 | { 763 | CURLUcode rc = 764 | curl_url_get(uh, part, out, 765 | (((modifiers & VARMODIFIER_DEFAULT) || 766 | o->default_port) ? 767 | CURLU_DEFAULT_PORT : 768 | ((part != CURLUPART_URL || o->keep_port) ? 769 | 0 : CURLU_NO_DEFAULT_PORT))| 770 | #ifdef SUPPORTS_PUNYCODE 771 | (((modifiers & VARMODIFIER_PUNY) || o->punycode) ? 772 | CURLU_PUNYCODE : 0)| 773 | #endif 774 | #ifdef SUPPORTS_PUNY2IDN 775 | (((modifiers & VARMODIFIER_PUNY2IDN) || o->puny2idn) ? 776 | CURLU_PUNY2IDN : 0) | 777 | #endif 778 | #ifdef SUPPORTS_GET_EMPTY 779 | ((modifiers & VARMODIFIER_EMPTY) ? CURLU_GET_EMPTY : 0) | 780 | #endif 781 | (o->curl ? 0 : CURLU_NON_SUPPORT_SCHEME)| 782 | (((modifiers & VARMODIFIER_URLENCODED) || 783 | o->urlencode) ? 784 | 0 :CURLU_URLDECODE)); 785 | 786 | #ifdef SUPPORTS_PUNY2IDN 787 | /* retry get w/ out puny2idn to handle invalid punycode conversions */ 788 | if(rc == CURLUE_BAD_HOSTNAME && 789 | (o->puny2idn || (modifiers & VARMODIFIER_PUNY2IDN))) { 790 | curl_free(*out); 791 | modifiers &= ~VARMODIFIER_PUNY2IDN; 792 | o->puny2idn = false; 793 | trurl_warnf(o, 794 | "Error converting url to IDN [%s]", 795 | curl_url_strerror(rc)); 796 | return geturlpart(o, modifiers, uh, part, out); 797 | } 798 | #endif 799 | return rc; 800 | } 801 | 802 | static bool is_valid_trurl_error(CURLUcode rc) 803 | { 804 | switch(rc) { 805 | case CURLUE_OK: 806 | case CURLUE_NO_SCHEME: 807 | case CURLUE_NO_USER: 808 | case CURLUE_NO_PASSWORD: 809 | case CURLUE_NO_OPTIONS: 810 | case CURLUE_NO_HOST: 811 | case CURLUE_NO_PORT: 812 | case CURLUE_NO_QUERY: 813 | case CURLUE_NO_FRAGMENT: 814 | #ifdef SUPPORTS_ZONEID 815 | case CURLUE_NO_ZONEID: 816 | #endif 817 | /* silently ignore */ 818 | return false; 819 | default: 820 | return true; 821 | } 822 | return true; 823 | } 824 | 825 | static void showurl(FILE *stream, struct option *o, int modifiers, 826 | CURLU *uh) 827 | { 828 | char *url; 829 | CURLUcode rc = geturlpart(o, modifiers, uh, CURLUPART_URL, &url); 830 | if(rc) { 831 | trurl_cleanup_options(o); 832 | verify(o, ERROR_BADURL, "invalid url [%s]", curl_url_strerror(rc)); 833 | return; 834 | } 835 | fputs(url, stream); 836 | curl_free(url); 837 | } 838 | 839 | static void get(struct option *o, CURLU *uh) 840 | { 841 | FILE *stream = stdout; 842 | const char *ptr = o->format; 843 | bool done = false; 844 | char startbyte = 0; 845 | char endbyte = 0; 846 | 847 | while(ptr && *ptr && !done) { 848 | if(!startbyte && (('{' == *ptr) || ('[' == *ptr))) { 849 | startbyte = *ptr; 850 | if('{' == *ptr) 851 | endbyte = '}'; 852 | else 853 | endbyte = ']'; 854 | } 855 | if(startbyte == *ptr) { 856 | if(startbyte == ptr[1]) { 857 | /* an escaped {-letter */ 858 | fputc(startbyte, stream); 859 | ptr += 2; 860 | } 861 | else { 862 | /* this is meant as a variable to output */ 863 | const char *start = ptr; 864 | char *end; 865 | char *cl; 866 | size_t vlen; 867 | bool isquery = false; 868 | bool queryall = false; 869 | bool strict = false; /* strict mode, fail on URL decode problems */ 870 | bool must = false; /* must mode, fail on missing component */ 871 | int mods = 0; 872 | end = strchr(ptr, endbyte); 873 | ptr++; /* pass the { */ 874 | if(!end) { 875 | /* syntax error */ 876 | fputc(startbyte, stream); 877 | continue; 878 | } 879 | 880 | /* {path} {:path} {/path} */ 881 | if(*ptr == ':') { 882 | mods |= VARMODIFIER_URLENCODED; 883 | ptr++; 884 | } 885 | vlen = end - ptr; 886 | do { 887 | size_t wordlen; 888 | cl = memchr(ptr, ':', vlen); 889 | if(!cl) 890 | break; 891 | wordlen = cl - ptr + 1; 892 | 893 | /* modifiers! */ 894 | if(!strncmp(ptr, "default:", wordlen)) 895 | mods |= VARMODIFIER_DEFAULT; 896 | else if(!strncmp(ptr, "puny:", wordlen)) { 897 | if(mods & VARMODIFIER_PUNY2IDN) 898 | errorf(o, ERROR_GET, 899 | "puny modifier is mutually exclusive with idn"); 900 | mods |= VARMODIFIER_PUNY; 901 | } 902 | else if(!strncmp(ptr, "idn:", wordlen)) { 903 | if(mods & VARMODIFIER_PUNY) 904 | errorf(o, ERROR_GET, 905 | "idn modifier is mutually exclusive with puny"); 906 | mods |= VARMODIFIER_PUNY2IDN; 907 | } 908 | else if(!strncmp(ptr, "strict:", wordlen)) 909 | strict = true; 910 | else if(!strncmp(ptr, "must:", wordlen)) { 911 | must = true; 912 | mods |= VARMODIFIER_EMPTY; 913 | } 914 | else if(!strncmp(ptr, "url:", wordlen)) 915 | mods |= VARMODIFIER_URLENCODED; 916 | else { 917 | if(!strncmp(ptr, "query-all:", wordlen)) { 918 | isquery = true; 919 | queryall = true; 920 | } 921 | else if(!strncmp(ptr, "query:", wordlen)) 922 | isquery = true; 923 | else { 924 | /* syntax error */ 925 | vlen = 0; 926 | end[1] = '\0'; 927 | } 928 | break; 929 | } 930 | 931 | ptr = cl + 1; 932 | vlen = end - ptr; 933 | } while(true); 934 | 935 | if(isquery) { 936 | showqkey(stream, cl + 1, end - cl - 1, 937 | !o->urlencode && !(mods & VARMODIFIER_URLENCODED), 938 | queryall); 939 | } 940 | else if(!vlen) 941 | errorf(o, ERROR_GET, "Bad --get syntax: %s", start); 942 | else if(!strncmp(ptr, "url", vlen)) 943 | showurl(stream, o, mods, uh); 944 | else { 945 | const struct var *v = comp2var(ptr, vlen); 946 | if(v) { 947 | char *nurl; 948 | /* ask for it URL encode always, to avoid libcurl warning on 949 | content */ 950 | CURLUcode rc = geturlpart(o, mods | VARMODIFIER_URLENCODED, 951 | uh, v->part, &nurl); 952 | if(!rc && !(mods & VARMODIFIER_URLENCODED) && !o->urlencode) { 953 | /* it should not be encoded in the output */ 954 | int olen; 955 | char *dec = curl_easy_unescape(NULL, nurl, 0, &olen); 956 | curl_free(nurl); 957 | if(memchr(dec, '\0', (size_t)olen)) { 958 | /* a binary zero cannot be shown */ 959 | rc = CURLUE_URLDECODE; 960 | curl_free(dec); 961 | dec = NULL; 962 | } 963 | nurl = dec; 964 | } 965 | 966 | if(rc == CURLUE_OK) { 967 | fputs(nurl, stream); 968 | curl_free(nurl); 969 | } 970 | else if(!is_valid_trurl_error(rc) && must) 971 | errorf(o, ERROR_GET, "missing must:%s", v->name); 972 | else if(is_valid_trurl_error(rc) || strict) { 973 | if((rc == CURLUE_URLDECODE) && strict) 974 | errorf(o, ERROR_GET, "problems URL decoding %s", v->name); 975 | else 976 | trurl_warnf(o, "%s (%s)", curl_url_strerror(rc), v->name); 977 | } 978 | } 979 | else 980 | errorf(o, ERROR_GET, "\"%.*s\" is not a recognized URL component", 981 | (int)vlen, ptr); 982 | } 983 | ptr = end + 1; /* pass the end */ 984 | } 985 | } 986 | else if('\\' == *ptr && ptr[1]) { 987 | switch(ptr[1]) { 988 | case 'r': 989 | fputc('\r', stream); 990 | break; 991 | case 'n': 992 | fputc('\n', stream); 993 | break; 994 | case 't': 995 | fputc('\t', stream); 996 | break; 997 | case '\\': 998 | fputc('\\', stream); 999 | break; 1000 | case '{': 1001 | fputc('{', stream); 1002 | break; 1003 | case '[': 1004 | fputc('[', stream); 1005 | break; 1006 | default: 1007 | /* unknown, just output this */ 1008 | fputc(*ptr, stream); 1009 | fputc(ptr[1], stream); 1010 | break; 1011 | } 1012 | ptr += 2; 1013 | } 1014 | else { 1015 | fputc(*ptr, stream); 1016 | ptr++; 1017 | } 1018 | } 1019 | fputc('\n', stream); 1020 | } 1021 | 1022 | static const struct var *setone(CURLU *uh, const char *setline, 1023 | struct option *o) 1024 | { 1025 | char *ptr = strchr(setline, '='); 1026 | const struct var *v = NULL; 1027 | if(ptr && (ptr > setline)) { 1028 | size_t vlen = ptr - setline; 1029 | bool urlencode = true; 1030 | bool conditional = false; 1031 | bool found = false; 1032 | if(vlen) { 1033 | int back = -1; 1034 | size_t reqlen = 1; 1035 | while(vlen > reqlen) { 1036 | if(ptr[back] == ':') { 1037 | urlencode = false; 1038 | vlen--; 1039 | } 1040 | else if(ptr[back] == '?') { 1041 | conditional = true; 1042 | vlen--; 1043 | } 1044 | else 1045 | break; 1046 | reqlen++; 1047 | back--; 1048 | } 1049 | } 1050 | v = comp2var(setline, vlen); 1051 | if(v) { 1052 | CURLUcode rc = CURLUE_OK; 1053 | bool skip = false; 1054 | if((v->part == CURLUPART_HOST) && ('[' == ptr[1])) 1055 | /* when setting an IPv6 numerical address, disable URL encoding */ 1056 | urlencode = false; 1057 | 1058 | if(conditional) { 1059 | char *piece; 1060 | rc = curl_url_get(uh, v->part, &piece, CURLU_NO_GUESS_SCHEME); 1061 | if(!rc) { 1062 | skip = true; 1063 | curl_free(piece); 1064 | } 1065 | } 1066 | 1067 | if(!skip) 1068 | rc = curl_url_set(uh, v->part, ptr[1] ? &ptr[1] : NULL, 1069 | (o->curl ? 0 : CURLU_NON_SUPPORT_SCHEME)| 1070 | (urlencode ? CURLU_URLENCODE : 0) ); 1071 | if(rc) 1072 | warnf("Error setting %s: %s", v->name, curl_url_strerror(rc)); 1073 | found = true; 1074 | } 1075 | if(!found) 1076 | errorf(o, ERROR_SET, 1077 | "unknown component: %.*s", (int)vlen, setline); 1078 | } 1079 | else 1080 | errorf(o, ERROR_SET, "invalid --set syntax: %s", setline); 1081 | return v; 1082 | } 1083 | 1084 | static unsigned int set(CURLU *uh, 1085 | struct option *o) 1086 | { 1087 | struct curl_slist *node; 1088 | unsigned int mask = 0; 1089 | for(node = o->set_list; node; node = node->next) { 1090 | const struct var *v; 1091 | char *setline = node->data; 1092 | v = setone(uh, setline, o); 1093 | if(v) { 1094 | if(mask & (1 << v->part)) 1095 | errorf(o, ERROR_SET, 1096 | "duplicate --set for component %s", v->name); 1097 | mask |= (1 << v->part); 1098 | } 1099 | } 1100 | return mask; /* the set components */ 1101 | } 1102 | 1103 | static void jsonString(FILE *stream, const char *in, size_t len, 1104 | bool lowercase) 1105 | { 1106 | const unsigned char *i = (unsigned char *)in; 1107 | const char *in_end = &in[len]; 1108 | fputc('\"', stream); 1109 | for(; i < (unsigned char *)in_end; i++) { 1110 | switch(*i) { 1111 | case '\\': 1112 | fputs("\\\\", stream); 1113 | break; 1114 | case '\"': 1115 | fputs("\\\"", stream); 1116 | break; 1117 | case '\b': 1118 | fputs("\\b", stream); 1119 | break; 1120 | case '\f': 1121 | fputs("\\f", stream); 1122 | break; 1123 | case '\n': 1124 | fputs("\\n", stream); 1125 | break; 1126 | case '\r': 1127 | fputs("\\r", stream); 1128 | break; 1129 | case '\t': 1130 | fputs("\\t", stream); 1131 | break; 1132 | default: 1133 | if(*i < 32) 1134 | fprintf(stream, "\\u%04x", *i); 1135 | else { 1136 | char out = *i; 1137 | if(lowercase && (out >= 'A' && out <= 'Z')) 1138 | /* do not use tolower() since that's locale specific */ 1139 | out |= ('a' - 'A'); 1140 | fputc(out, stream); 1141 | } 1142 | break; 1143 | } 1144 | } 1145 | fputc('\"', stream); 1146 | } 1147 | 1148 | static void json(struct option *o, CURLU *uh) 1149 | { 1150 | int i; 1151 | bool first = true; 1152 | char *url; 1153 | CURLUcode rc = geturlpart(o, 0, uh, CURLUPART_URL, &url); 1154 | if(rc) { 1155 | trurl_cleanup_options(o); 1156 | verify(o, ERROR_BADURL, "invalid url [%s]", curl_url_strerror(rc)); 1157 | return; 1158 | } 1159 | printf("%s\n {\n \"url\": ", o->urls ? "," : ""); 1160 | jsonString(stdout, url, strlen(url), false); 1161 | curl_free(url); 1162 | fputs(",\n \"parts\": {\n", stdout); 1163 | /* special error handling required to not print params array. */ 1164 | bool params_errors = false; 1165 | for(i = 0; variables[i].name; i++) { 1166 | char *part; 1167 | /* ask for the URL encoded version so that weird control characters do not 1168 | cause problems. URL decode it when push to json. */ 1169 | rc = geturlpart(o, VARMODIFIER_URLENCODED, uh, variables[i].part, &part); 1170 | if(!rc) { 1171 | int olen; 1172 | char *dec = NULL; 1173 | 1174 | if(!o->urlencode) { 1175 | if(variables[i].part == CURLUPART_QUERY) { 1176 | /* query parts have '+' for space */ 1177 | char *n; 1178 | char *p = part; 1179 | do { 1180 | n = strchr(p, '+'); 1181 | if(n) { 1182 | *n = ' '; 1183 | p = n + 1; 1184 | } 1185 | } while(n); 1186 | } 1187 | 1188 | dec = curl_easy_unescape(NULL, part, 0, &olen); 1189 | if(!dec) 1190 | errorf(o, ERROR_MEM, "out of memory"); 1191 | } 1192 | 1193 | if(!first) 1194 | fputs(",\n", stdout); 1195 | first = false; 1196 | printf(" \"%s\": ", variables[i].name); 1197 | if(dec) 1198 | jsonString(stdout, dec, (size_t)olen, false); 1199 | else 1200 | jsonString(stdout, part, strlen(part), false); 1201 | curl_free(part); 1202 | curl_free(dec); 1203 | } 1204 | else if(is_valid_trurl_error(rc)) { 1205 | trurl_warnf(o, "%s (%s)", curl_url_strerror(rc), variables[i].name); 1206 | params_errors = true; 1207 | } 1208 | } 1209 | fputs("\n }", stdout); 1210 | first = true; 1211 | if(nqpairs && !params_errors) { 1212 | int j; 1213 | fputs(",\n \"params\": [\n", stdout); 1214 | for(j = 0 ; j < nqpairs; j++) { 1215 | const char *sep = memchr(qpairsdec[j].str, '=', qpairsdec[j].len); 1216 | const char *value = sep ? sep + 1 : ""; 1217 | int value_len = (int) qpairsdec[j].len - (int)(value - qpairsdec[j].str); 1218 | /* don't print out empty/trimmed values */ 1219 | if(!qpairsdec[j].len || !qpairsdec[j].str[0]) 1220 | continue; 1221 | if(!first) 1222 | fputs(",\n", stdout); 1223 | first = false; 1224 | fputs(" {\n \"key\": ", stdout); 1225 | jsonString(stdout, qpairsdec[j].str, 1226 | sep ? (size_t)(sep - qpairsdec[j].str) : 1227 | qpairsdec[j].len, 1228 | false); 1229 | fputs(",\n \"value\": ", stdout); 1230 | jsonString(stdout, sep?value:"", sep?value_len:0, false); 1231 | fputs("\n }", stdout); 1232 | } 1233 | fputs("\n ]", stdout); 1234 | } 1235 | fputs("\n }", stdout); 1236 | } 1237 | 1238 | /* --trim query="utm_*" */ 1239 | static bool trim(struct option *o) 1240 | { 1241 | bool query_is_modified = false; 1242 | struct curl_slist *node; 1243 | for(node = o->trim_list; node; node = node->next) { 1244 | char *ptr = node->data; 1245 | if(ptr) { 1246 | /* 'ptr' should be a fixed string or a pattern ending with an 1247 | asterisk */ 1248 | size_t inslen; 1249 | bool pattern = false; 1250 | int i; 1251 | char *temp = NULL; 1252 | 1253 | inslen = strlen(ptr); 1254 | if(inslen) { 1255 | pattern = ptr[inslen - 1] == '*'; 1256 | if(pattern && (inslen > 1)) { 1257 | pattern ^= ptr[inslen - 2] == '\\'; 1258 | if(!pattern) { 1259 | /* the two final letters are \*, but the backslash needs to be 1260 | removed. Get a copy and edit that accordingly. */ 1261 | temp = xstrdup(o, ptr); 1262 | temp[inslen - 2] = '*'; 1263 | temp[inslen - 1] = '\0'; 1264 | ptr = temp; 1265 | inslen--; /* one byte shorter now */ 1266 | } 1267 | } 1268 | if(pattern) 1269 | inslen--; 1270 | } 1271 | 1272 | for(i = 0 ; i < nqpairs; i++) { 1273 | char *q = qpairs[i].str; 1274 | char *sep = strchr(q, '='); 1275 | size_t qlen; 1276 | if(sep) 1277 | qlen = sep - q; 1278 | else 1279 | qlen = strlen(q); 1280 | 1281 | if((pattern && (inslen <= qlen) && !casecompare(q, ptr, inslen)) || 1282 | (!pattern && (inslen == qlen) && !casecompare(q, ptr, inslen))) { 1283 | /* this qpair should be stripped out */ 1284 | free(qpairs[i].str); 1285 | free(qpairsdec[i].str); 1286 | qpairs[i].str = xstrdup(o, ""); /* marked as deleted */ 1287 | qpairs[i].len = 0; 1288 | qpairsdec[i].str = xstrdup(o, ""); /* marked as deleted */ 1289 | qpairsdec[i].len = 0; 1290 | query_is_modified = true; 1291 | } 1292 | } 1293 | free(temp); 1294 | } 1295 | } 1296 | return query_is_modified; 1297 | } 1298 | 1299 | static char *decodequery(char *str, size_t len, int *olen) 1300 | { 1301 | /* handle '+' to ' ' outside of the libcurl call */ 1302 | char *p = str; 1303 | size_t plen = len; 1304 | do { 1305 | char *n = memchr(p, '+', plen); 1306 | if(n) { 1307 | *n = ' '; 1308 | ++n; 1309 | plen -= (n - p); 1310 | } 1311 | p = n; 1312 | } while(p); 1313 | return curl_easy_unescape(NULL, str, (int)len, olen); 1314 | } 1315 | 1316 | /* the unusual thing here is that we let '*' remain as-is */ 1317 | #define ISURLPUNTCS(x) (((x) == '-') || ((x) == '.') || ((x) == '_') || \ 1318 | ((x) == '~') || ((x) == '*')) 1319 | #define ISUPPER(x) (((x) >= 'A') && ((x) <= 'Z')) 1320 | #define ISLOWER(x) (((x) >= 'a') && ((x) <= 'z')) 1321 | #define ISDIGIT(x) (((x) >= '0') && ((x) <= '9')) 1322 | #define ISALNUM(x) (ISDIGIT(x) || ISLOWER(x) || ISUPPER(x)) 1323 | #define ISUNRESERVED(x) (ISALNUM(x) || ISURLPUNTCS(x)) 1324 | 1325 | static char *encodequery(char *str, size_t len) 1326 | { 1327 | /* to handle ' ' to '+' escaping we cannot use libcurl's URL encode 1328 | function */ 1329 | char *dupe = malloc(len * 3 + 1); /* worst case */ 1330 | char *p = dupe; 1331 | if(!p) 1332 | return NULL; 1333 | 1334 | while(len--) { 1335 | /* treat the characters unsigned */ 1336 | unsigned char in = (unsigned char)*str++; 1337 | 1338 | if(in == ' ') 1339 | *dupe++ = '+'; 1340 | else if(ISUNRESERVED(in)) 1341 | *dupe++ = in; 1342 | else { 1343 | /* encode it */ 1344 | const char hex[] = "0123456789abcdef"; 1345 | dupe[0]='%'; 1346 | dupe[1] = hex[in>>4]; 1347 | dupe[2] = hex[in & 0xf]; 1348 | dupe += 3; 1349 | } 1350 | } 1351 | *dupe = 0; 1352 | return p; 1353 | } 1354 | 1355 | /* URL decode, then URL encode it back to normalize. But don't touch 1356 | the first '=' if there is one */ 1357 | static struct string *memdupzero(char *source, size_t len, bool *modified) 1358 | { 1359 | struct string *ret = calloc(1, sizeof(struct string)); 1360 | char *left = NULL; 1361 | char *right = NULL; 1362 | char *el = NULL; 1363 | char *er = NULL; 1364 | char *encode = NULL; 1365 | if(!ret) 1366 | return NULL; 1367 | 1368 | if(len) { 1369 | char *sep = memchr(source, '=', len); 1370 | int olen; 1371 | if(!sep) { /* no '=' */ 1372 | char *decode = decodequery(source, (int)len, &olen); 1373 | if(decode) 1374 | encode = encodequery(decode, olen); 1375 | else 1376 | goto error; 1377 | curl_free(decode); 1378 | } 1379 | else { 1380 | int llen; 1381 | int rlen; 1382 | int leftside; 1383 | int rightside; 1384 | char *temp; 1385 | 1386 | /* decode both sides */ 1387 | leftside = (int)(sep - source); 1388 | if(leftside) { 1389 | left = decodequery(source, leftside, &llen); 1390 | if(!left) 1391 | goto error; 1392 | } 1393 | else { 1394 | left = NULL; 1395 | llen = 0; 1396 | } 1397 | 1398 | /* length on the right side of '=': */ 1399 | rightside = (int)len - (int)(sep - source) - 1; 1400 | 1401 | if(rightside) { 1402 | right = decodequery(sep + 1, 1403 | (int)len - (int)(sep - source) - 1, &rlen); 1404 | if(!right) 1405 | goto error; 1406 | } 1407 | else { 1408 | right = NULL; 1409 | rlen = 0; 1410 | } 1411 | 1412 | /* encode both sides again */ 1413 | if(left) { 1414 | el = encodequery(left, llen); 1415 | if(!el) 1416 | goto error; 1417 | } 1418 | if(right) { 1419 | er = encodequery(right, rlen); 1420 | if(!er) 1421 | goto error; 1422 | } 1423 | 1424 | temp = curl_maprintf("%s=%s", el ? el : "", er ? er : ""); 1425 | if(!temp) 1426 | goto error; 1427 | /* pointers from curl_maprintf() must be curl_free()d so make a copy */ 1428 | encode = strdup(temp); 1429 | curl_free(temp); 1430 | if(!encode) 1431 | goto error; 1432 | } 1433 | olen = (int)strlen(encode); 1434 | 1435 | if(((size_t)olen != len) || strcmp(encode, source)) 1436 | *modified |= true; 1437 | ret->str = encode; 1438 | ret->len = olen; 1439 | } 1440 | curl_free(left); 1441 | curl_free(right); 1442 | free(el); 1443 | free(er); 1444 | return ret; 1445 | error: 1446 | curl_free(left); 1447 | curl_free(right); 1448 | free(el); 1449 | free(er); 1450 | free(encode); 1451 | free(ret); 1452 | return NULL; 1453 | } 1454 | 1455 | /* URL decode the pair and return it in an allocated chunk */ 1456 | static struct string *memdupdec(char *source, size_t len, bool json) 1457 | { 1458 | char *sep = memchr(source, '=', len); 1459 | char *left = NULL; 1460 | char *right = NULL; 1461 | int right_len = 0; 1462 | int left_len = 0; 1463 | char *str; 1464 | struct string *ret; 1465 | left = strurldecode(source, (int)(sep ? (size_t)(sep - source) : len), 1466 | &left_len); 1467 | if(sep) { 1468 | char *p; 1469 | int plen; 1470 | right = strurldecode(sep + 1, (int)(len - (sep - source) - 1), 1471 | &right_len); 1472 | 1473 | /* convert null bytes to periods */ 1474 | for(plen = right_len, p = right; plen; plen--, p++) { 1475 | if(!*p && !json) { 1476 | *p = REPLACE_NULL_BYTE; 1477 | } 1478 | } 1479 | } 1480 | str = malloc(sizeof(char) * (left_len + (sep?(right_len + 1):0))); 1481 | if(!str) { 1482 | curl_free(right); 1483 | curl_free(left); 1484 | return NULL; 1485 | } 1486 | memcpy(str, left, left_len); 1487 | if(sep) { 1488 | str[left_len] = '='; 1489 | memcpy(str + 1 + left_len, right, right_len); 1490 | } 1491 | curl_free(right); 1492 | curl_free(left); 1493 | ret = malloc(sizeof(struct string)); 1494 | if(!ret) { 1495 | free(str); 1496 | return NULL; 1497 | } 1498 | ret->str = str; 1499 | ret->len = left_len + (sep?(right_len + 1):0); 1500 | return ret; 1501 | } 1502 | 1503 | 1504 | static void freeqpairs(void) 1505 | { 1506 | int i; 1507 | for(i = 0; istr; 1529 | qpairs[nqpairs].len = p->len; 1530 | qpairsdec[nqpairs].str = pdec->str; 1531 | qpairsdec[nqpairs].len = pdec->len; 1532 | nqpairs++; 1533 | } 1534 | } 1535 | else 1536 | warnf("too many query pairs"); 1537 | 1538 | if(pdec) 1539 | free(pdec); 1540 | if(p) 1541 | free(p); 1542 | return modified; 1543 | } 1544 | 1545 | /* convert the query string into an array of name=data pair */ 1546 | static bool extractqpairs(CURLU *uh, struct option *o) 1547 | { 1548 | char *q = NULL; 1549 | bool modified = false; 1550 | memset(qpairs, 0, sizeof(qpairs)); 1551 | nqpairs = 0; 1552 | /* extract the query */ 1553 | if(!curl_url_get(uh, CURLUPART_QUERY, &q, 0)) { 1554 | char *p = q; 1555 | char *amp; 1556 | while(*p) { 1557 | size_t len; 1558 | amp = strchr(p, o->qsep[0]); 1559 | if(!amp) 1560 | len = strlen(p); 1561 | else 1562 | len = amp - p; 1563 | modified |= addqpair(p, len, o->jsonout); 1564 | if(amp) 1565 | p = amp + 1; 1566 | else 1567 | break; 1568 | } 1569 | } 1570 | curl_free(q); 1571 | return modified; 1572 | } 1573 | 1574 | static void qpair2query(CURLU *uh, struct option *o) 1575 | { 1576 | int i; 1577 | char *nq = NULL; 1578 | for(i = 0; iqsep : "", 1582 | qpairs[i].len ? qpairs[i].str : ""); 1583 | curl_free(oldnq); 1584 | } 1585 | if(nq) { 1586 | int rc = curl_url_set(uh, CURLUPART_QUERY, nq, 0); 1587 | if(rc) 1588 | trurl_warnf(o, "internal problem: failed to store updated query in URL"); 1589 | } 1590 | curl_free(nq); 1591 | } 1592 | 1593 | /* sort case insensitively */ 1594 | static int cmpfunc(const void *p1, const void *p2) 1595 | { 1596 | int i; 1597 | int len = (int)((((struct string *)p1)->len) < (((struct string *)p2)->len)? 1598 | (((struct string *)p1)->len) : (((struct string *)p2)->len)); 1599 | 1600 | for(i = 0; i < len; i++) { 1601 | char c1 = ((struct string *)p1)->str[i] | ('a' - 'A'); 1602 | char c2 = ((struct string *)p2)->str[i] | ('a' - 'A'); 1603 | if(c1 != c2) 1604 | return c1 - c2; 1605 | } 1606 | 1607 | return 0; 1608 | } 1609 | 1610 | static bool sortquery(struct option *o) 1611 | { 1612 | if(o->sort_query) { 1613 | /* not these two lists may no longer be the same order after the sort */ 1614 | qsort(&qpairs[0], nqpairs, sizeof(struct string), cmpfunc); 1615 | qsort(&qpairsdec[0], nqpairs, sizeof(struct string), cmpfunc); 1616 | return true; 1617 | } 1618 | return false; 1619 | } 1620 | 1621 | static bool replace(struct option *o) 1622 | { 1623 | bool query_is_modified = false; 1624 | struct curl_slist *node; 1625 | for(node = o->replace_list; node; node = node->next) { 1626 | struct string key; 1627 | struct string value; 1628 | bool replaced = false; 1629 | int i; 1630 | key.str = node->data; 1631 | value.str = strchr(key.str, '='); 1632 | if(value.str) { 1633 | key.len = value.str++ - key.str; 1634 | value.len = strlen(value.str); 1635 | } 1636 | else { 1637 | key.len = strlen(key.str); 1638 | value.str = NULL; 1639 | value.len = 0; 1640 | } 1641 | for(i = 0; i < nqpairs; i++) { 1642 | char *q = qpairs[i].str; 1643 | /* not the correct query, move on */ 1644 | if(strncmp(q, key.str, key.len)) 1645 | continue; 1646 | free(qpairs[i].str); 1647 | free(qpairsdec[i].str); 1648 | /* this is a duplicate remove it. */ 1649 | if(replaced) { 1650 | qpairs[i].len = 0; 1651 | qpairs[i].str = xstrdup(o, ""); 1652 | qpairsdec[i].len = 0; 1653 | qpairsdec[i].str = xstrdup(o, ""); 1654 | continue; 1655 | } 1656 | struct string *pdec = 1657 | memdupdec(key.str, key.len + value.len + 1, o->jsonout); 1658 | struct string *p = memdupzero(key.str, key.len + value.len + 1659 | (value.str ? 1 : 0), 1660 | &query_is_modified); 1661 | qpairs[i].len = p->len; 1662 | qpairs[i].str = p->str; 1663 | qpairsdec[i].len = pdec->len; 1664 | qpairsdec[i].str = pdec->str; 1665 | free(pdec); 1666 | free(p); 1667 | query_is_modified = replaced = true; 1668 | } 1669 | 1670 | if(!replaced && o->force_replace) { 1671 | addqpair(key.str, strlen(key.str), o->jsonout); 1672 | query_is_modified = true; 1673 | } 1674 | } 1675 | return query_is_modified; 1676 | } 1677 | 1678 | static CURLUcode seturl(struct option *o, CURLU *uh, const char *url) 1679 | { 1680 | return curl_url_set(uh, CURLUPART_URL, url, 1681 | (o->no_guess_scheme ? 1682 | 0 : CURLU_GUESS_SCHEME)| 1683 | (o->curl ? 0 : CURLU_NON_SUPPORT_SCHEME)| 1684 | (o->accept_space ? 1685 | CURLU_ALLOW_SPACE : 0)| 1686 | CURLU_URLENCODE); 1687 | } 1688 | 1689 | static char *canonical_path(const char *path) 1690 | { 1691 | /* split the path per slash, URL decode + encode, then put together again */ 1692 | size_t len = strlen(path); 1693 | char *sl; 1694 | char *dupe = NULL; 1695 | 1696 | do { 1697 | char *opath; 1698 | char *npath; 1699 | char *ndupe; 1700 | int olen; 1701 | sl = memchr(path, '/', len); 1702 | size_t partlen = sl ? (size_t)(sl - path) : len; 1703 | 1704 | if(partlen) { 1705 | /* First URL decode the part */ 1706 | opath = curl_easy_unescape(NULL, path, (int)partlen, &olen); 1707 | if(!opath) 1708 | return NULL; 1709 | 1710 | /* Then URL encode it again */ 1711 | npath = curl_easy_escape(NULL, opath, olen); 1712 | curl_free(opath); 1713 | if(!npath) 1714 | return NULL; 1715 | 1716 | ndupe = curl_maprintf("%s%s%s", dupe ? dupe : "", npath, sl ? "/": ""); 1717 | curl_free(npath); 1718 | } 1719 | else if(sl) { 1720 | /* zero length part but a slash */ 1721 | ndupe = curl_maprintf("%s/", dupe ? dupe : ""); 1722 | } 1723 | else { 1724 | /* no part, no slash */ 1725 | break; 1726 | } 1727 | curl_free(dupe); 1728 | if(!ndupe) 1729 | return NULL; 1730 | 1731 | dupe = ndupe; 1732 | if(sl) { 1733 | path = sl + 1; 1734 | len -= partlen + 1; 1735 | } 1736 | 1737 | } while(sl); 1738 | 1739 | return dupe; 1740 | } 1741 | 1742 | static void normalize_part(struct option *o, CURLU *uh, CURLUPart part) 1743 | { 1744 | char *ptr; 1745 | size_t ptrlen = 0; 1746 | (void)curl_url_get(uh, part, &ptr, 0); 1747 | 1748 | if(ptr) 1749 | ptrlen = strlen(ptr); 1750 | 1751 | if(ptrlen) { 1752 | int olen; 1753 | char *uptr; 1754 | /* First URL decode the component */ 1755 | char *rawptr = curl_easy_unescape(NULL, ptr, (int)ptrlen, &olen); 1756 | if(!rawptr) 1757 | errorf(o, ERROR_MEM, "out of memory"); 1758 | 1759 | /* Then URL encode it again */ 1760 | uptr = curl_easy_escape(NULL, rawptr, olen); 1761 | curl_free(rawptr); 1762 | if(!uptr) 1763 | errorf(o, ERROR_MEM, "out of memory"); 1764 | 1765 | if(strcmp(ptr, uptr)) 1766 | /* changed, store the updated one */ 1767 | (void)curl_url_set(uh, part, uptr, 0); 1768 | curl_free(uptr); 1769 | } 1770 | curl_free(ptr); 1771 | } 1772 | 1773 | 1774 | static void singleurl(struct option *o, 1775 | const char *url, /* might be NULL */ 1776 | struct iterinfo *iinfo, 1777 | struct curl_slist *iter) 1778 | { 1779 | CURLU *uh = iinfo->uh; 1780 | bool first_lap = true; 1781 | if(!uh) { 1782 | uh = curl_url(); 1783 | if(!uh) 1784 | errorf(o, ERROR_MEM, "out of memory"); 1785 | if(url) { 1786 | CURLUcode rc = seturl(o, uh, url); 1787 | if(rc) { 1788 | curl_url_cleanup(uh); 1789 | verify(o, ERROR_BADURL, "%s [%s]", curl_url_strerror(rc), url); 1790 | return; 1791 | } 1792 | if(o->redirect) { 1793 | rc = seturl(o, uh, o->redirect); 1794 | if(rc) { 1795 | curl_url_cleanup(uh); 1796 | verify(o, ERROR_BADURL, "invalid redirection: %s [%s]", 1797 | curl_url_strerror(rc), o->redirect); 1798 | return; 1799 | } 1800 | } 1801 | } 1802 | } 1803 | do { 1804 | struct curl_slist *p; 1805 | bool url_is_invalid = false; 1806 | bool query_is_modified = false; 1807 | unsigned setmask = 0; 1808 | 1809 | /* set everything */ 1810 | setmask = set(uh, o); 1811 | 1812 | if(iter) { 1813 | char iterbuf[1024]; 1814 | /* "part=item1 item2 item2" */ 1815 | const char *part; 1816 | size_t plen; 1817 | const char *w; 1818 | size_t wlen; 1819 | char *sep; 1820 | bool urlencode = true; 1821 | const struct var *v; 1822 | 1823 | if(!iinfo->ptr) { 1824 | part = iter->data; 1825 | sep = strchr(part, '='); 1826 | if(!sep) 1827 | errorf(o, ERROR_ITER, "wrong iterate syntax"); 1828 | plen = sep - part; 1829 | if(sep[-1] == ':') { 1830 | urlencode = false; 1831 | plen--; 1832 | } 1833 | w = sep + 1; 1834 | /* store for next lap */ 1835 | iinfo->part = part; 1836 | iinfo->plen = plen; 1837 | v = comp2var(part, plen); 1838 | if(!v) { 1839 | curl_url_cleanup(uh); 1840 | errorf(o, ERROR_ITER, "bad component for iterate"); 1841 | } 1842 | if(iinfo->varmask & (1<part)) { 1843 | curl_url_cleanup(uh); 1844 | errorf(o, ERROR_ITER, 1845 | "duplicate component for iterate: %s", v->name); 1846 | } 1847 | if(setmask & (1 << v->part)) { 1848 | curl_url_cleanup(uh); 1849 | errorf(o, ERROR_ITER, 1850 | "duplicate --iterate and --set for component %s", 1851 | v->name); 1852 | } 1853 | } 1854 | else { 1855 | part = iinfo->part; 1856 | plen = iinfo->plen; 1857 | v = comp2var(part, plen); 1858 | w = iinfo->ptr; 1859 | } 1860 | 1861 | sep = strchr(w, ' '); 1862 | if(sep) { 1863 | wlen = sep - w; 1864 | iinfo->ptr = sep + 1; /* next word is here */ 1865 | } 1866 | else { 1867 | /* last word */ 1868 | wlen = strlen(w); 1869 | iinfo->ptr = NULL; 1870 | } 1871 | (void)curl_msnprintf(iterbuf, sizeof(iterbuf), 1872 | "%.*s%s=%.*s", (int)plen, part, 1873 | urlencode ? "" : ":", 1874 | (int)wlen, w); 1875 | setone(uh, iterbuf, o); 1876 | if(iter->next) { 1877 | struct iterinfo info; 1878 | memset(&info, 0, sizeof(info)); 1879 | info.uh = uh; 1880 | info.varmask = iinfo->varmask | (1 << v->part); 1881 | singleurl(o, url, &info, iter->next); 1882 | } 1883 | } 1884 | 1885 | if(first_lap) { 1886 | /* extract the current path */ 1887 | char *opath; 1888 | char *cpath; 1889 | bool path_is_modified = false; 1890 | if(curl_url_get(uh, CURLUPART_PATH, &opath, 0)) 1891 | errorf(o, ERROR_MEM, "out of memory"); 1892 | 1893 | /* append path segments */ 1894 | for(p = o->append_path; p; p = p->next) { 1895 | char *apath = p->data; 1896 | char *npath; 1897 | size_t olen; 1898 | 1899 | /* does the existing path end with a slash, then don't 1900 | add one in between */ 1901 | olen = strlen(opath); 1902 | 1903 | /* append the new segment */ 1904 | npath = curl_maprintf("%s%s%s", opath, 1905 | opath[olen-1] == '/' ? "" : "/", 1906 | apath); 1907 | curl_free(opath); 1908 | opath = npath; 1909 | path_is_modified = true; 1910 | } 1911 | cpath = canonical_path(opath); 1912 | if(!cpath) 1913 | errorf(o, ERROR_MEM, "out of memory"); 1914 | 1915 | if(strcmp(cpath, opath)) { 1916 | /* updated */ 1917 | path_is_modified = true; 1918 | curl_free(opath); 1919 | opath = cpath; 1920 | } 1921 | else 1922 | curl_free(cpath); 1923 | if(path_is_modified) { 1924 | /* set the new path */ 1925 | if(curl_url_set(uh, CURLUPART_PATH, opath, 0)) 1926 | errorf(o, ERROR_MEM, "out of memory"); 1927 | } 1928 | curl_free(opath); 1929 | 1930 | normalize_part(o, uh, CURLUPART_FRAGMENT); 1931 | normalize_part(o, uh, CURLUPART_USER); 1932 | normalize_part(o, uh, CURLUPART_PASSWORD); 1933 | normalize_part(o, uh, CURLUPART_OPTIONS); 1934 | } 1935 | 1936 | query_is_modified |= extractqpairs(uh, o); 1937 | 1938 | /* trim parts */ 1939 | query_is_modified |= trim(o); 1940 | 1941 | /* replace parts */ 1942 | query_is_modified |= replace(o); 1943 | 1944 | if(first_lap) { 1945 | /* append query segments */ 1946 | for(p = o->append_query; p; p = p->next) { 1947 | addqpair(p->data, strlen(p->data), o->jsonout); 1948 | query_is_modified = true; 1949 | } 1950 | } 1951 | 1952 | /* sort query */ 1953 | query_is_modified |= sortquery(o); 1954 | 1955 | /* put the query back */ 1956 | if(query_is_modified) 1957 | qpair2query(uh, o); 1958 | 1959 | /* make sure the URL is still valid */ 1960 | if(!url || o->redirect || o->set_list || o->append_path) { 1961 | char *ourl = NULL; 1962 | CURLUcode rc = curl_url_get(uh, CURLUPART_URL, &ourl, 0); 1963 | if(rc) { 1964 | if(o->verify) /* only clean up if we're exiting */ 1965 | curl_url_cleanup(uh); 1966 | verify(o, ERROR_URL, "not enough input for a URL"); 1967 | url_is_invalid = true; 1968 | } 1969 | else { 1970 | rc = seturl(o, uh, ourl); 1971 | if(rc) { 1972 | if(o->verify) /* only clean up if we're exiting */ 1973 | curl_url_cleanup(uh); 1974 | verify(o, ERROR_BADURL, "%s [%s]", curl_url_strerror(rc), 1975 | ourl); 1976 | url_is_invalid = true; 1977 | } 1978 | else { 1979 | char *nurl = NULL; 1980 | rc = curl_url_get(uh, CURLUPART_URL, &nurl, 0); 1981 | if(!rc) 1982 | curl_free(nurl); 1983 | else { 1984 | if(o->verify) /* only clean up if we're exiting */ 1985 | curl_url_cleanup(uh); 1986 | verify(o, ERROR_BADURL, "url became invalid"); 1987 | url_is_invalid = true; 1988 | } 1989 | } 1990 | curl_free(ourl); 1991 | } 1992 | } 1993 | 1994 | if(iter && iter->next) 1995 | ; 1996 | else if(url_is_invalid) 1997 | ; 1998 | else if(o->jsonout) 1999 | json(o, uh); 2000 | else if(o->format) { 2001 | /* custom output format */ 2002 | get(o, uh); 2003 | } 2004 | else { 2005 | /* default output is full URL */ 2006 | char *nurl = NULL; 2007 | int rc = geturlpart(o, 0, uh, CURLUPART_URL, &nurl); 2008 | if(!rc) { 2009 | printf("%s\n", nurl); 2010 | curl_free(nurl); 2011 | } 2012 | } 2013 | 2014 | fflush(stdout); 2015 | 2016 | freeqpairs(); 2017 | 2018 | o->urls++; 2019 | 2020 | first_lap = false; 2021 | } while(iinfo->ptr); 2022 | if(!iinfo->uh) 2023 | curl_url_cleanup(uh); 2024 | } 2025 | 2026 | int main(int argc, const char **argv) 2027 | { 2028 | int exit_status = 0; 2029 | struct option o; 2030 | struct curl_slist *node; 2031 | memset(&o, 0, sizeof(o)); 2032 | setlocale(LC_ALL, ""); 2033 | curl_global_init(CURL_GLOBAL_ALL); 2034 | 2035 | for(argc--, argv++; argc > 0; argc--, argv++) { 2036 | bool usedarg = false; 2037 | if(!o.end_of_options && argv[0][0] == '-') { 2038 | /* dash-dash prefixed */ 2039 | if(getarg(&o, argv[0], argv[1], &usedarg)) { 2040 | /* if the long option ends with an equals sign, cut it there, 2041 | if it is a short option, show just two letters */ 2042 | size_t not_e = argv[0][1] == '-' ? strcspn(argv[0], "=") : 2; 2043 | errorf(&o, ERROR_FLAG, "unknown option: %.*s", (int)not_e, argv[0]); 2044 | } 2045 | } 2046 | else { 2047 | /* this is a URL */ 2048 | urladd(&o, argv[0]); 2049 | } 2050 | if(usedarg) { 2051 | /* skip the parsed argument */ 2052 | argc--; 2053 | argv++; 2054 | } 2055 | } 2056 | if(!o.qsep) 2057 | o.qsep = "&"; 2058 | 2059 | if(o.jsonout) 2060 | putchar('['); 2061 | 2062 | if(o.url) { 2063 | /* this is a file to read URLs from */ 2064 | char buffer[4096]; /* arbitrary max */ 2065 | bool end_of_file = false; 2066 | while(!end_of_file && fgets(buffer, sizeof(buffer), o.url)) { 2067 | char *eol = strchr(buffer, '\n'); 2068 | if(eol && (eol > buffer)) { 2069 | if(eol[-1] == '\r') 2070 | /* CRLF detected */ 2071 | eol--; 2072 | } 2073 | else if(eol == buffer) { 2074 | /* empty line */ 2075 | continue; 2076 | } 2077 | else if(feof(o.url)) { 2078 | /* end of file */ 2079 | eol = strlen(buffer) + buffer; 2080 | end_of_file = true; 2081 | } 2082 | else { 2083 | /* line too long */ 2084 | int ch; 2085 | trurl_warnf(&o, "skipping long line"); 2086 | do { 2087 | ch = getc(o.url); 2088 | } while(ch != EOF && ch != '\n'); 2089 | if(ch == EOF) { 2090 | if(ferror(o.url)) 2091 | trurl_warnf(&o, "getc: %s", strerror(errno)); 2092 | end_of_file = true; 2093 | } 2094 | continue; 2095 | } 2096 | 2097 | /* trim trailing spaces and tabs */ 2098 | while((eol > buffer) && 2099 | ((eol[-1] == ' ') || eol[-1] == '\t')) 2100 | eol--; 2101 | 2102 | if(eol > buffer) { 2103 | /* if there is actual content left to deal with */ 2104 | struct iterinfo iinfo; 2105 | memset(&iinfo, 0, sizeof(iinfo)); 2106 | *eol = 0; /* end of URL */ 2107 | singleurl(&o, buffer, &iinfo, o.iter_list); 2108 | } 2109 | } 2110 | 2111 | if(!end_of_file && ferror(o.url)) 2112 | trurl_warnf(&o, "fgets: %s", strerror(errno)); 2113 | if(o.urlopen) 2114 | fclose(o.url); 2115 | } 2116 | else { 2117 | /* not reading URLs from a file */ 2118 | node = o.url_list; 2119 | do { 2120 | if(node) { 2121 | const char *url = node->data; 2122 | struct iterinfo iinfo; 2123 | memset(&iinfo, 0, sizeof(iinfo)); 2124 | singleurl(&o, url, &iinfo, o.iter_list); 2125 | node = node->next; 2126 | } 2127 | else { 2128 | struct iterinfo iinfo; 2129 | memset(&iinfo, 0, sizeof(iinfo)); 2130 | o.verify = true; 2131 | singleurl(&o, NULL, &iinfo, o.iter_list); 2132 | } 2133 | } while(node); 2134 | } 2135 | if(o.jsonout) 2136 | printf("%s]\n", o.urls ? "\n" : ""); 2137 | /* we're done with libcurl, so clean it up */ 2138 | trurl_cleanup_options(&o); 2139 | curl_global_cleanup(); 2140 | return exit_status; 2141 | } 2142 | -------------------------------------------------------------------------------- /trurl.md: -------------------------------------------------------------------------------- 1 | --- 2 | c: Copyright (C) Daniel Stenberg, , et al. 3 | SPDX-License-Identifier: curl 4 | Title: trurl 5 | Section: 1 6 | Source: trurl 0.16.1 7 | See-also: 8 | - curl (1) 9 | - wcurl (1) 10 | --- 11 | 12 | # NAME 13 | 14 | trurl - transpose URLs 15 | 16 | # SYNOPSIS 17 | 18 | **trurl [options / URLs]** 19 | 20 | # DESCRIPTION 21 | 22 | **trurl** parses, manipulates and outputs URLs and parts of URLs. 23 | 24 | It uses the RFC 3986 definition of URLs and it uses libcurl's URL parser to do 25 | so, which includes a few "extensions". The URL support is limited to 26 | "hierarchical" URLs, the ones that use `://` separators after the scheme. 27 | 28 | Typically you pass in one or more URLs and decide what of that you want 29 | output. Possibly modifying the URL as well. 30 | 31 | trurl knows URLs and every URL consists of up to ten separate and independent 32 | *components*. These components can be extracted, removed and updated with 33 | trurl and they are referred to by their respective names: scheme, user, 34 | password, options, host, port, path, query, fragment and zoneid. 35 | 36 | # NORMALIZATION 37 | 38 | When provided a URL to work with, trurl "normalizes" it. It means that 39 | individual URL components are URL decoded then URL encoded back again and set 40 | in the URL. 41 | 42 | Example: 43 | 44 | $ trurl 'http://ex%61mple:80/%62ath/a/../b?%2e%FF#tes%74' 45 | http://example/bath/b?.%ff#test 46 | 47 | # OPTIONS 48 | 49 | Options start with one or two dashes. Many of the options require an 50 | additional value next to them. 51 | 52 | Any other argument is interpreted as a URL argument, and is treated as if it 53 | was following a `--url` option. 54 | 55 | The first argument that is exactly two dashes (`--`), marks the end of 56 | options; any argument after the end of options is interpreted as a URL 57 | argument even if it starts with a dash. 58 | 59 | Long options can be provided either as `--flag argument` or as 60 | `--flag=argument`. 61 | 62 | ## -a, --append [component]=[data] 63 | 64 | Append data to a component. This can only append data to the path and the 65 | query components. 66 | 67 | For path, this URL encodes and appends the new segment to the path, separated 68 | with a slash. 69 | 70 | For query, this URL encodes and appends the new segment to the query, 71 | separated with an ampersand (&). If the appended segment contains an equal 72 | sign (`=`) that one is kept verbatim and both sides of the first occurrence 73 | are URL encoded separately. 74 | 75 | ## --accept-space 76 | 77 | When set, trurl tries to accept spaces as part of the URL and instead URL 78 | encode such occurrences accordingly. 79 | 80 | According to RFC 3986, a space cannot legally be part of a URL. This option 81 | provides a best-effort to convert the provided string into a valid URL. 82 | 83 | ## --as-idn 84 | 85 | Converts a punycode ASCII hostname to its original International Domain Name 86 | in Unicode. If the hostname is not using punycode then the original hostname 87 | is used. 88 | 89 | ## --curl 90 | 91 | Only accept URL schemes supported by libcurl. 92 | 93 | ## --default-port 94 | 95 | When set, trurl uses the scheme's default port number for URLs with a known 96 | scheme, and without an explicit port number. 97 | 98 | Note that trurl only knows default port numbers for URL schemes that are 99 | supported by libcurl. 100 | 101 | Since, by default, trurl removes default port numbers from URLs with a known 102 | scheme, this option is pretty much ignored unless one of *--get*, *--json*, 103 | and *--keep-port* is not also specified. 104 | 105 | ## -f, --url-file [filename] 106 | 107 | Read URLs to work on from the given file. Use the filename `-` (a single 108 | minus) to tell trurl to read the URLs from stdin. 109 | 110 | Each line needs to be a single valid URL. trurl removes one carriage return 111 | character at the end of the line if present, trims off all the trailing space 112 | and tab characters, and skips all empty (after trimming) lines. 113 | 114 | The maximum line length supported in a file like this is 4094 bytes. Lines 115 | that exceed that length are skipped, and a warning is printed to stderr when 116 | they are encountered. 117 | 118 | ## -g, --get [format] 119 | 120 | Output text and URL data according to the provided format string. Components 121 | from the URL can be output when specified as **{component}** or 122 | **[component]**, with the name of the part show within curly braces or 123 | brackets. You can not mix braces and brackets for this purpose in the same 124 | command line. 125 | 126 | The following component names are available (case sensitive): url, scheme, 127 | user, password, options, host, port, path, query, fragment and zoneid. 128 | 129 | **{component}** expands to nothing if the given component does not have a 130 | value. 131 | 132 | Components are shown URL decoded by default. 133 | 134 | URL decoding a component may cause problems to display it. Such problems make 135 | a warning get displayed unless **--quiet** is used. 136 | 137 | trurl supports a range of different qualifiers, or prefixes, to the component 138 | that changes how it handles it: 139 | 140 | If **url:** is specified, like `{url:path}`, the component gets output URL 141 | encoded. As a shortcut, `url:` also works written as a single colon: 142 | `{:path}`. 143 | 144 | If **strict:** is specified, like `{strict:path}`, URL decode problems are 145 | turned into errors. In this stricter mode, a URL decode problem makes trurl 146 | stop what it is doing and return with exit code 10. 147 | 148 | If **must:** is specified, like `{must:query}`, it makes trurl return an error 149 | if the requested component does not exist in the URL. By default a missing 150 | component will just be shown blank. 151 | 152 | If **default:** is specified, like `{default:url}` or `{default:port}`, and 153 | the port is not explicitly specified in the URL, the scheme's default port is 154 | output if it is known. 155 | 156 | If **puny:** is specified, like `{puny:url}` or `{puny:host}`, the punycoded 157 | version of the hostname is used in the output. This option is mutually 158 | exclusive with **idn:**. 159 | 160 | If **idn:** is specified like `{idn:url}` or `{idn:host}`, the International 161 | Domain Name version of the hostname is used in the output if it is provided 162 | as a correctly encoded punycode version. This option is mutually exclusive 163 | with **puny:**. 164 | 165 | If *--default-port* is specified, all formats are expanded as if they used 166 | *default:*; and if *--punycode* is used, all formats are expanded as if they 167 | used *puny:*. Also note that `{url}` is affected by the *--keep-port* option. 168 | 169 | Hosts provided as IPv6 numerical addresses are provided within square 170 | brackets. Like `[fe80::20c:29ff:fe9c:409b]`. 171 | 172 | Hosts provided as IPv4 numerical addresses are *normalized* and provided as 173 | four dot-separated decimal numbers when output. 174 | 175 | You can access specific keys in the query string using the format 176 | **{query:key}**. Then the value of the first matching key is output using a 177 | case sensitive match. When extracting a URL decoded query key that contains 178 | `%00`, such octet is replaced with a single period `.` in the output. 179 | 180 | You can access specific keys in the query string and out all values using the 181 | format **{query-all:key}**. This looks for *key* case sensitively and outputs 182 | all values for that key space-separated. 183 | 184 | The *format* string supports the following backslash sequences: 185 | 186 | \\ - backslash 187 | 188 | \\t - tab 189 | 190 | \\n - newline 191 | 192 | \\r - carriage return 193 | 194 | \\{ - an open curly brace that does not start a variable 195 | 196 | \\[ - an open bracket that does not start a variable 197 | 198 | All other text in the format string is shown as-is. 199 | 200 | ## -h, --help 201 | 202 | Show the help output. 203 | 204 | ## --iterate [component]=[item1 item2 ...] 205 | 206 | Set the component to multiple values and output the result once for each 207 | iteration. Several combined iterations are allowed to generate combinations, 208 | but only one *--iterate* option per component. The listed items to iterate 209 | over should be separated by single spaces. 210 | 211 | Example: 212 | 213 | $ trurl example.com --iterate=scheme="ftp https" --iterate=port="22 80" 214 | ftp://example.com:22/ 215 | ftp://example.com:80/ 216 | https://example.com:22/ 217 | https://example.com:80/ 218 | 219 | ## --json 220 | 221 | Outputs all set components of the URLs as JSON objects. All components of the 222 | URL that have data get populated in the parts object using their component 223 | names. See below for details on the format. 224 | 225 | The URL components are provided URL decoded. Change that with **--urlencode**. 226 | 227 | ## --keep-port 228 | 229 | By default, trurl removes default port numbers from URLs with a known scheme 230 | even if they are explicitly specified in the input URL. This options, makes 231 | trurl not remove them. 232 | 233 | Example: 234 | 235 | $ trurl https://example.com:443/ --keep-port 236 | https://example.com:443/ 237 | 238 | ## --no-guess-scheme 239 | 240 | Disables libcurl's scheme guessing feature. URLs that do not contain a scheme 241 | are treated as invalid URLs. 242 | 243 | Example: 244 | 245 | $ trurl example.com --no-guess-scheme 246 | trurl note: Bad scheme [example.com] 247 | 248 | ## --punycode 249 | 250 | Uses the punycode version of the hostname, which is how International Domain 251 | Names are converted into plain ASCII. If the hostname is not using IDN, the 252 | regular ASCII name is used. 253 | 254 | Example: 255 | 256 | $ trurl http://åäö/ --punycode 257 | http://xn--4cab6c/ 258 | 259 | ## --qtrim [what] 260 | 261 | Trims data off a query. 262 | 263 | *what* is specified as a full name of a name/value pair, or as a word prefix 264 | (using a single trailing asterisk (`*`)) which makes trurl remove the tuples 265 | from the query string that match the instruction. 266 | 267 | To match a literal trailing asterisk instead of using a wildcard, escape it 268 | with a backslash in front of it. Like `\\*`. 269 | 270 | ## --query-separator [what] 271 | 272 | Specify the single letter used for separating query pairs. The default is `&` 273 | but at least in the past sometimes semicolons `;` or even colons `:` have been 274 | used for this purpose. If your URL uses something other than the default 275 | letter, setting the right one makes sure trurl can do its query operations 276 | properly. 277 | 278 | Example: 279 | 280 | $ trurl "https://curl.se?b=name:a=age" --sort-query --query-separator ":" 281 | https://curl.se/?a=age:b=name 282 | 283 | ## --quiet 284 | 285 | Suppress (some) notes and warnings. 286 | 287 | ## --redirect [URL] 288 | 289 | Redirect the URL to this new location. The redirection is performed on the 290 | base URL, so, if no base URL is specified, no redirection is performed. 291 | 292 | Example: 293 | 294 | $ trurl --url https://curl.se/we/are.html --redirect ../here.html 295 | https://curl.se/here.html 296 | 297 | ## --replace [data] 298 | 299 | Replaces a URL query. 300 | 301 | data can either take the form of a single value, or as a key/value pair in the 302 | shape *foo=bar*. If replace is called on an item that is not in the list of 303 | queries trurl ignores that item. 304 | 305 | trurl URL encodes both sides of the `=` character in the given input data 306 | argument. 307 | 308 | ## --replace-append [data] 309 | 310 | Works the same as *--replace*, but trurl appends a missing query string if 311 | it is not in the query list already. 312 | 313 | ## -s, --set [component][:]=[data] 314 | 315 | Set this URL component. Setting blank string (`""`) clears the component from 316 | the URL. 317 | 318 | The following components can be set: url, scheme, user, password, options, 319 | host, port, path, query, fragment and zoneid. 320 | 321 | If a simple `=`-assignment is used, the data is URL encoded when applied. If 322 | `:=` is used, the data is assumed to already be URL encoded and stored as-is. 323 | 324 | If `?=` is used, the set is only performed if the component is not already 325 | set. It avoids overwriting any already set data. 326 | 327 | You can also combine `:` and `?` into `?:=` if desired. 328 | 329 | If no URL or *--url-file* argument is provided, trurl tries to create a URL 330 | using the components provided by the *--set* options. If not enough components 331 | are specified, this fails. 332 | 333 | ## --sort-query 334 | 335 | The "variable=content" tuplets in the query component are sorted in a case 336 | insensitive alphabetical order. This helps making URLs identical that 337 | otherwise only had their query pairs in different orders. 338 | 339 | ## --trim [component]=[what] 340 | 341 | Deprecated: use **--qtrim**. 342 | 343 | Trims data off a component. Currently this can only trim a query component. 344 | 345 | *what* is specified as a full word or as a word prefix (using a single 346 | trailing asterisk (`*`)) which makes trurl remove the tuples from the query 347 | string that match the instruction. 348 | 349 | To match a literal trailing asterisk instead of using a wildcard, escape it 350 | with a backslash in front of it. Like `\\*`. 351 | 352 | ## --url [URL] 353 | 354 | Set the input URL to work with. The URL may be provided without a scheme, 355 | which then typically is not actually a legal URL but trurl tries to figure 356 | out what is meant and guess what scheme to use (unless *--no-guess-scheme* 357 | is used). 358 | 359 | Providing multiple URLs makes trurl act on all URLs in a serial fashion. 360 | 361 | If the URL cannot be parsed for whatever reason, trurl simply moves on to 362 | the next provided URL - unless *--verify* is used. 363 | 364 | ## --urlencode 365 | 366 | Outputs URL encoded version of components by default when using *--get* or 367 | *--json*. 368 | 369 | ## -v, --version 370 | 371 | Show version information and exit. 372 | 373 | ## --verify 374 | 375 | When a URL is provided, return error immediately if it does not parse as a 376 | valid URL. In normal cases, trurl can forgive a bad URL input. 377 | 378 | # URL COMPONENTS 379 | 380 | ## scheme 381 | 382 | This is the leading character sequence of a URL, excluding the "://" 383 | separator. It cannot be specified URL encoded. 384 | 385 | A URL cannot exist without a scheme, but unless **--no-guess-scheme** is used 386 | trurl guesses what scheme that was intended if none was provided. 387 | 388 | Examples: 389 | 390 | $ trurl https://odd/ -g '{scheme}' 391 | https 392 | 393 | $ trurl odd -g '{scheme}' 394 | http 395 | 396 | $ trurl odd -g '{scheme}' --no-guess-scheme 397 | trurl note: Bad scheme [odd] 398 | 399 | ## user 400 | 401 | After the scheme separator, there can be a username provided. If it ends with 402 | a colon (`:`), there is a password provided. If it ends with an at character 403 | (`@`) there is no password provided in the URL. 404 | 405 | Example: 406 | 407 | $ trurl https://user%3a%40:secret@odd/ -g '{user}' 408 | user:@ 409 | 410 | ## password 411 | 412 | If the password ends with a semicolon (`;`) there is an options field 413 | following. This field is only accepted by trurl for URLs using the IMAP 414 | scheme. 415 | 416 | Example: 417 | 418 | $ trurl https://user:secr%65t@odd/ -g '{password}' 419 | secret 420 | 421 | ## options 422 | 423 | This field can only end with an at character (`@`) that separates the options 424 | from the hostname. 425 | 426 | $ trurl 'imap://user:pwd;giraffe@odd' -g '{options}' 427 | giraffe 428 | 429 | If the scheme is not IMAP, the `giraffe` part is instead considered part of 430 | the password: 431 | 432 | $ trurl 'sftp://user:pwd;giraffe@odd' -g '{password}' 433 | pwd;giraffe 434 | 435 | We strongly advice users to %-encode `;`, `:` and `@` in URLs of course to 436 | reduce the risk for confusions. 437 | 438 | ## host 439 | 440 | The host component is the hostname or a numerical IP address. If a hostname is 441 | provided, it can be an International Domain Name non-ASCII characters. A 442 | hostname can be provided URL encoded. 443 | 444 | trurl provides options for working with the IDN hostnames either as IDN or in 445 | its punycode version. 446 | 447 | Example, convert an IDN name to punycode in the output: 448 | 449 | $ trurl http://åäö/ --punycode 450 | http://xn--4cab6c/ 451 | 452 | Or the reverse, convert a punycode hostname into its IDN version: 453 | 454 | $ trurl http://xn--4cab6c/ --as-idn 455 | http://åäö/ 456 | 457 | If the URL's hostname starts with an open bracket (`[`) it is a numerical IPv6 458 | address that also must end with a closing bracket (`]`). trurl normalizes IPv6 459 | addresses. 460 | 461 | Example: 462 | 463 | $ trurl 'http://[2001:9b1:0:0:0:0:7b97:364b]/' 464 | http://[2001:9b1::7b97:364b]/ 465 | 466 | A numerical IPV4 address can be specified using one, two, three or four 467 | numbers separated with dots and they can use decimal, octal or hexadecimal. 468 | trurl normalizes provided addresses and uses four dotted decimal numbers in 469 | its output. 470 | 471 | Examples: 472 | 473 | $ trurl http://646464646/ 474 | http://38.136.68.134/ 475 | 476 | $ trurl http://246.646/ 477 | http://246.0.2.134/ 478 | 479 | $ trurl http://246.46.646/ 480 | http://246.46.2.134/ 481 | 482 | $ trurl http://0x14.0xb3022/ 483 | http://20.11.48.34/ 484 | 485 | ## zoneid 486 | 487 | If the provided host is an IPv6 address, it might contain a specific zoneid. A 488 | number or a network interface name normally. 489 | 490 | Example: 491 | 492 | $ trurl 'http://[2001:9b1::f358:1ba4:7b97:364b%enp3s0]/' -g '{zoneid}' 493 | enp3s0 494 | 495 | ## port 496 | 497 | If the host ends with a colon (`:`) then a port number follows. It is a 16 bit 498 | decimal number that may not be URL encoded. 499 | 500 | trurl knows the default port number for many URL schemes so it can show port 501 | numbers for a URL even if none was explicitly used in the URL. With 502 | **--default-port** it can add the default port to a URL even when not provide. 503 | 504 | Example: 505 | 506 | $ trurl http:/a --default-port 507 | http://a:80/ 508 | 509 | Similarly, trurl normally hides the port number if the given number is the 510 | default. 511 | 512 | Example: 513 | 514 | $ trurl http:/a:80 515 | http://a/ 516 | 517 | But a user can make trurl keep the port even if it is the default, with 518 | **--keep-port**. 519 | 520 | Example: 521 | 522 | $ trurl http:/a:80 --keep-port 523 | http://a:80/ 524 | 525 | ## path 526 | 527 | A URL path is assumed to always start with and contain at least a slash (`/`), 528 | even if none is actually provided in the URL. 529 | 530 | Example: 531 | 532 | $ trurl http://xn--4cab6c -g '[path]' 533 | / 534 | 535 | When setting the path, trurl will inject a leading slash if none is provided: 536 | 537 | $ trurl http://hello -s path="pony" 538 | http://hello/pony 539 | 540 | $ trurl http://hello -s path="/pony" 541 | http://hello/pony 542 | 543 | If the input path contains dotdot or dot-slash sequences, they are normalized 544 | away. 545 | 546 | Example: 547 | 548 | $ trurl http://hej/one/../two/../three/./four 549 | http://hej/three/four 550 | 551 | You can append a new segment to an existing path with **--append** like this: 552 | 553 | $ trurl http://twelve/three?hello --append path=four 554 | http://twelve/three/four?hello 555 | 556 | ## query 557 | 558 | The query part does not include the leading question mark (`?`) separator when 559 | extracted with trurl. 560 | 561 | Example: 562 | 563 | $ trurl http://horse?elephant -g '{query}' 564 | elephant 565 | 566 | Example, if you set the query with a leading question mark: 567 | 568 | $ trurl http://horse?elephant -s "query=?elephant" 569 | http://horse/?%3felephant 570 | 571 | Query parts are often made up of a series of name=value pairs separated with 572 | ampersands (`&`), and trurl offers several ways to work with such. 573 | 574 | Append a new name value pair to a URL with **--append**: 575 | 576 | $ trurl http://host?name=hello --append query=search=life 577 | http://host/?name=hello&search=life 578 | 579 | You cam **--replace** the value of a specific existing name among the pairs: 580 | 581 | $ trurl 'http://alpha?one=real&two=fake' --replace two=alsoreal 582 | http://alpha/?one=real&two=alsoreal 583 | 584 | If the specific name you want to replace perhaps does not exist in the URL, 585 | you can opt to replace *or* append the pair: 586 | 587 | $ trurl 'http://alpha?one=real&two=fake' --replace-append three=alsoreal 588 | http://alpha/?one=real&two=fake&three=alsoreal 589 | 590 | In order to perhaps compare two URLs using query name value pairs, sorting 591 | them first at least increases the chances of it working: 592 | 593 | $ trurl "http://alpha/?one=real&two=fake&three=alsoreal" --sort-query 594 | http://alpha/?one=real&three=alsoreal&two=fake 595 | 596 | Remove name/value pairs from the URL by specifying exact name or wildcard 597 | pattern with **--qtrim**: 598 | 599 | $ trurl 'https://example.com?a12=hej&a23=moo&b12=foo' --qtrim a*' 600 | https://example.com/?b12=foo 601 | 602 | ## fragment 603 | 604 | The fragment part does not include the leading hash sign (`#`) separator when 605 | extracted with trurl. 606 | 607 | Example: 608 | 609 | $ trurl http://horse#elephant -g '{fragment}' 610 | elephant 611 | 612 | Example, if you set the fragment with a leading hash sign: 613 | 614 | $ trurl "http://horse#elephant" -s "fragment=#zebra" 615 | http://horse/#%23zebra 616 | 617 | The fragment part of a URL is for local purposes only. The data in there is 618 | never actually sent over the network when a URL is used for transfers. 619 | 620 | ## url 621 | 622 | trurl supports **url** as a named component for **--get** to allow for more 623 | powerful outputs, but of course it is not actually a "component"; it is the 624 | full URL. 625 | 626 | Example: 627 | 628 | $ trurl ftps://example.com:2021/p%61th -g '{url}' 629 | ftps://example.com:2021/path 630 | 631 | # JSON output format 632 | 633 | The *--json* option outputs a JSON array with one or more objects. One for 634 | each URL. Each URL JSON object contains a number of properties, a series of 635 | key/value pairs. The exact set present depends on the given URL. 636 | 637 | ## url 638 | 639 | This key exists in every object. It is the complete URL. Affected by 640 | *--default-port*, *--keep-port*, and *--punycode*. 641 | 642 | ## parts 643 | 644 | This key exists in every object, and contains an object with a key for each of 645 | the settable URL components. If a component is missing, it means it is not 646 | present in the URL. The parts are URL decoded unless *--urlencode* is used. 647 | 648 | ## parts.scheme 649 | The URL scheme. 650 | 651 | ## parts.user 652 | The username. 653 | 654 | ## parts.password 655 | The password. 656 | 657 | ## parts.options 658 | The options. Note that only a few URL schemes support the "options" 659 | component. 660 | 661 | ## parts.host 662 | The normalized hostname. It might be a UTF-8 name if an IDN name was used. It 663 | can also be a normalized IPv4 or IPv6 address. An IPv6 address always starts 664 | with a bracket (**[**) - and no other hostnames can contain such a symbol. If 665 | *--punycode* is used, the punycode version of the host is outputted instead. 666 | 667 | ## parts.port 668 | The provided port number as a string. If the port number was not provided in 669 | the URL, but the scheme is a known one, and *--default-port* is in use, the 670 | default port for that scheme is provided here. 671 | 672 | ## parts.path 673 | The path. Including the leading slash. 674 | 675 | ## parts.query 676 | The full query, excluding the question mark separator. 677 | 678 | ## parts.fragment 679 | The fragment, excluding the pound sign separator. 680 | 681 | ## parts.zoneid 682 | The zone id, which can only be present in an IPv6 address. When this key is 683 | present, then **host** is an IPv6 numerical address. 684 | 685 | ## params 686 | 687 | This key contains an array of query key/value objects. Each such pair is 688 | listed with "key" and "value" and their respective contents in the output. 689 | 690 | The key/values are extracted from the query where they are separated by 691 | ampersands (**&**) - or the user sets with **--query-separator**. 692 | 693 | The query pairs are listed in the order of appearance in a left-to-right 694 | order, but can be made alpha-sorted with **--sort-query**. 695 | 696 | It is only present if the URL has a query. 697 | 698 | # EXAMPLES 699 | 700 | ## Replace the hostname of a URL 701 | 702 | ~~~ 703 | $ trurl --url https://curl.se --set host=example.com 704 | https://example.com/ 705 | ~~~ 706 | 707 | ## Create a URL by setting components 708 | 709 | ~~~ 710 | $ trurl --set host=example.com --set scheme=ftp 711 | ftp://example.com/ 712 | ~~~ 713 | 714 | ## Redirect a URL 715 | 716 | ~~~ 717 | $ trurl --url https://curl.se/we/are.html --redirect here.html 718 | https://curl.se/we/here.html 719 | ~~~ 720 | 721 | ## Change port number 722 | 723 | This also shows how trurl removes dot-dot sequences 724 | ~~~ 725 | $ trurl --url https://curl.se/we/../are.html --set port=8080 726 | https://curl.se:8080/are.html 727 | ~~~ 728 | 729 | ## Extract the path from a URL 730 | 731 | ~~~ 732 | $ trurl --url https://curl.se/we/are.html --get '{path}' 733 | /we/are.html 734 | ~~~ 735 | 736 | ## Extract the port from a URL 737 | 738 | This gets the default port based on the scheme if the port is not set in the 739 | URL. 740 | ~~~ 741 | $ trurl --url https://curl.se/we/are.html --get '{default:port}' 742 | 443 743 | ~~~ 744 | 745 | ## Append a path segment to a URL 746 | 747 | ~~~ 748 | $ trurl --url https://curl.se/hello --append path=you 749 | https://curl.se/hello/you 750 | ~~~ 751 | 752 | ## Append a query segment to a URL 753 | 754 | ~~~ 755 | $ trurl --url "https://curl.se?name=hello" --append query=search=string 756 | https://curl.se/?name=hello&search=string 757 | ~~~ 758 | 759 | ## Read URLs from stdin 760 | 761 | ~~~ 762 | $ cat urllist.txt | trurl --url-file - 763 | \&... 764 | ~~~ 765 | 766 | ## Output JSON 767 | 768 | ~~~ 769 | $ trurl "https://fake.host/search?q=answers&user=me#frag" --json 770 | [ 771 | { 772 | "url": "https://fake.host/search?q=answers&user=me#frag", 773 | "parts": [ 774 | "scheme": "https", 775 | "host": "fake.host", 776 | "path": "/search", 777 | "query": "q=answers&user=me" 778 | "fragment": "frag", 779 | ], 780 | "params": [ 781 | { 782 | "key": "q", 783 | "value": "answers" 784 | }, 785 | { 786 | "key": "user", 787 | "value": "me" 788 | } 789 | ] 790 | } 791 | ] 792 | ~~~ 793 | 794 | ## Remove tracking tuples from query 795 | 796 | ~~~ 797 | $ trurl "https://curl.se?search=hey&utm_source=tracker" --qtrim "utm_*" 798 | https://curl.se/?search=hey 799 | ~~~ 800 | 801 | ## Show a specific query key value 802 | 803 | ~~~ 804 | $ trurl "https://example.com?a=home&here=now&thisthen" -g '{query:a}' 805 | home 806 | ~~~ 807 | 808 | ## Sort the key/value pairs in the query component 809 | 810 | ~~~ 811 | $ trurl "https://example.com?b=a&c=b&a=c" --sort-query 812 | https://example.com?a=c&b=a&c=b 813 | ~~~ 814 | 815 | ## Work with a query that uses a semicolon separator 816 | 817 | ~~~ 818 | $ trurl "https://curl.se?search=fool;page=5" --qtrim "search" --query-separator ";" 819 | https://curl.se?page=5 820 | ~~~ 821 | 822 | ## Accept spaces in the URL path 823 | 824 | ~~~ 825 | $ trurl "https://curl.se/this has space/index.html" --accept-space 826 | https://curl.se/this%20has%20space/index.html 827 | ~~~ 828 | 829 | ## Create multiple variations of a URL with different schemes 830 | 831 | ~~~ 832 | $ trurl "https://curl.se/path/index.html" --iterate "scheme=http ftp sftp" 833 | http://curl.se/path/index.html 834 | ftp://curl.se/path/index.html 835 | sftp://curl.se/path/index.html 836 | ~~~ 837 | 838 | # EXIT CODES 839 | 840 | trurl returns a non-zero exit code to indicate problems. 841 | 842 | ## 1 843 | 844 | A problem with --url-file 845 | 846 | ## 2 847 | 848 | A problem with --append 849 | 850 | ## 3 851 | 852 | A command line option misses an argument 853 | 854 | ## 4 855 | 856 | A command line option mistake or an illegal option combination. 857 | 858 | ## 5 859 | 860 | A problem with --set 861 | 862 | ## 6 863 | 864 | Out of memory 865 | 866 | ## 7 867 | 868 | Could not output a valid URL 869 | 870 | ## 8 871 | 872 | A problem with --qtrim 873 | 874 | ## 9 875 | 876 | If --verify is set and the input URL cannot parse. 877 | 878 | ## 10 879 | 880 | A problem with --get 881 | 882 | ## 11 883 | 884 | A problem with --iterate 885 | 886 | ## 12 887 | 888 | A problem with --replace or --replace-append 889 | 890 | # WWW 891 | 892 | https://curl.se/trurl 893 | -------------------------------------------------------------------------------- /version.h: -------------------------------------------------------------------------------- 1 | #ifndef TRURL_VERSION_H 2 | #define TRURL_VERSION_H 3 | /*************************************************************************** 4 | * _ _ ____ _ 5 | * Project ___| | | | _ \| | 6 | * / __| | | | |_) | | 7 | * | (__| |_| | _ <| |___ 8 | * \___|\___/|_| \_\_____| 9 | * 10 | * Copyright (C) Daniel Stenberg, , et al. 11 | * 12 | * This software is licensed as described in the file COPYING, which 13 | * you should have received as part of this distribution. The terms 14 | * are also available at https://curl.se/docs/copyright.html. 15 | * 16 | * You may opt to use, copy, modify, merge, publish, distribute and/or sell 17 | * copies of the Software, and permit persons to whom the Software is 18 | * furnished to do so, under the terms of the COPYING file. 19 | * 20 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 21 | * KIND, either express or implied. 22 | * 23 | * SPDX-License-Identifier: curl 24 | * 25 | ***************************************************************************/ 26 | 27 | #define TRURL_VERSION_TXT "0.16.1" 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /winbuild/.vcpkg: -------------------------------------------------------------------------------- 1 | # git clone https://github.com/microsoft/vcpkg.git 2 | # .\vcpkg\bootstrap-vcpkg.bat 3 | 4 | .\vcpkg\vcpkg install curl:x86-windows-static-md 5 | .\vcpkg\vcpkg install curl:x64-windows-static-md 6 | 7 | .\vcpkg\vcpkg integrate install 8 | -------------------------------------------------------------------------------- /winbuild/README.md: -------------------------------------------------------------------------------- 1 | 6 | 7 | # Building trurl with Microsoft C++ Build Tools 8 | 9 | Download and install [Microsoft C++ Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/) 10 | 11 | When installing, choose the `Desktop development with C++` option. 12 | 13 | ## Open a command prompt 14 | 15 | Open the **x64 Native Tools Command Prompt for VS 2022**, or if you are on an x86 platform **x86 Native Tools Command Prompt for VS 2022** 16 | 17 | ## Set up the vcpkg repository 18 | 19 | Note: The location of the vcpkg repository does not necessarily need to correspond to the trurl directory, it can be set up anywhere. But it is recommended to use a short path such as `C:\src\vcpkg` or `C:\dev\vcpkg`, since otherwise you may run into path issues for some port build systems. 20 | 21 | Once you are in the console, run the below commands to clone the vcpkg repository and set up the curl dependencies: 22 | 23 | ~~~ 24 | git clone https://github.com/microsoft/vcpkg.git 25 | .\vcpkg\bootstrap-vcpkg.bat 26 | .\vcpkg\vcpkg install curl:x86-windows-static-md 27 | .\vcpkg\vcpkg install curl:x64-windows-static-md 28 | .\vcpkg\vcpkg integrate install 29 | ~~~ 30 | 31 | Once the vcpkg repository is set up you do not need to run these commands again. If a newer version of curl is released, you may need to run `git pull` in the vcpkg repository and then `vcpkg upgrade` to fetch the new version. 32 | 33 | ## Build in the console 34 | 35 | Once the vcpkg repository and dependencies are set up, go to the winbuild directory in the trurl sources: 36 | 37 | cd trurl\winbuild 38 | 39 | Then you can call the build command with the desired parameters. The builds will be placed in an output directory as described below. 40 | 41 | ## Parameters 42 | 43 | - The `Configuration` parameter can be set to either `Debug` or `Release` 44 | - The `Platform` parameter can be set to either `x86` or `x64` 45 | 46 | ## Build commands 47 | 48 | - x64 Debug: `msbuild /m /t:Clean,Build /p:Configuration=Debug /p:Platform=x64 trurl.sln` 49 | - x64 Release: `msbuild /m /t:Clean,Build /p:Configuration=Release /p:Platform=x64 trurl.sln` 50 | - x86 Debug: `msbuild /m /t:Clean,Build /p:Configuration=Debug /p:Platform=x86 trurl.sln` 51 | - x86 Release: `msbuild /m /t:Clean,Build /p:Configuration=Release /p:Platform=x86 trurl.sln` 52 | 53 | Note: If you are using the x64 Native Tools Command Prompt you can also run the x86 build commands. 54 | 55 | ## Output directories 56 | 57 | The output files will be placed in: `winbuild\bin\\\` 58 | PDB files will be generated in the same directory as the executable for Debug builds, but they will not be generated for release builds. 59 | 60 | Intermediate files will be placed in: `winbuild\obj\\\` 61 | These include build logs and obj files. 62 | 63 | ## Tests 64 | 65 | Tests can be run by going to the directory of the output files in the console and running `perl .\..\..\..\..\test.pl` 66 | You will need perl installed to run the tests, such as [Strawberry Perl](https://strawberryperl.com/) 67 | -------------------------------------------------------------------------------- /winbuild/trurl.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.5.33516.290 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "trurl", "trurl.vcxproj", "{575657CF-843F-491C-B15B-881C28DF36CA}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {575657CF-843F-491C-B15B-881C28DF36CA}.Debug|x64.ActiveCfg = Debug|x64 17 | {575657CF-843F-491C-B15B-881C28DF36CA}.Debug|x64.Build.0 = Debug|x64 18 | {575657CF-843F-491C-B15B-881C28DF36CA}.Debug|x86.ActiveCfg = Debug|Win32 19 | {575657CF-843F-491C-B15B-881C28DF36CA}.Debug|x86.Build.0 = Debug|Win32 20 | {575657CF-843F-491C-B15B-881C28DF36CA}.Release|x64.ActiveCfg = Release|x64 21 | {575657CF-843F-491C-B15B-881C28DF36CA}.Release|x64.Build.0 = Release|x64 22 | {575657CF-843F-491C-B15B-881C28DF36CA}.Release|x86.ActiveCfg = Release|Win32 23 | {575657CF-843F-491C-B15B-881C28DF36CA}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {14A4D782-313F-4F61-A2C5-EF2CD877D3F3} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /winbuild/trurl.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 16.0 23 | Win32Proj 24 | {575657cf-843f-491c-b15b-881c28df36ca} 25 | trurl 26 | 10.0 27 | 28 | 29 | 30 | Application 31 | true 32 | v143 33 | Unicode 34 | 35 | 36 | Application 37 | false 38 | v143 39 | true 40 | Unicode 41 | 42 | 43 | Application 44 | true 45 | v143 46 | Unicode 47 | 48 | 49 | Application 50 | false 51 | v143 52 | true 53 | Unicode 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | $(SolutionDir)bin\$(Platform)\$(Configuration)\ 75 | $(SolutionDir)obj\$(Platform)\$(Configuration)\ 76 | 77 | 78 | $(SolutionDir)bin\$(Platform)\$(Configuration)\ 79 | $(SolutionDir)obj\$(Platform)\$(Configuration)\ 80 | 81 | 82 | $(SolutionDir)bin\$(PlatformShortName)\$(Configuration)\ 83 | $(SolutionDir)obj\$(PlatformShortName)\$(Configuration)\ 84 | 85 | 86 | $(SolutionDir)bin\$(PlatformShortName)\$(Configuration)\ 87 | $(SolutionDir)obj\$(PlatformShortName)\$(Configuration)\ 88 | 89 | 90 | true 91 | 92 | 93 | true 94 | true 95 | x64-windows 96 | 97 | 98 | true 99 | true 100 | x64-windows 101 | 102 | 103 | true 104 | true 105 | x86-windows 106 | 107 | 108 | true 109 | true 110 | x86-windows 111 | 112 | 113 | 114 | Level4 115 | true 116 | WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 117 | true 118 | 119 | 120 | Console 121 | true 122 | ws2_32.lib;wldap32.lib;advapi32.lib;crypt32.lib;Normaliz.lib 123 | 124 | 125 | 126 | 127 | Level4 128 | true 129 | true 130 | true 131 | WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 132 | true 133 | 134 | 135 | Console 136 | true 137 | true 138 | false 139 | ws2_32.lib;wldap32.lib;advapi32.lib;crypt32.lib;Normaliz.lib 140 | 141 | 142 | 143 | 144 | Level4 145 | true 146 | _DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 147 | true 148 | 149 | 150 | Console 151 | true 152 | ws2_32.lib;wldap32.lib;advapi32.lib;crypt32.lib;Normaliz.lib 153 | 154 | 155 | 156 | 157 | Level4 158 | true 159 | true 160 | true 161 | NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 162 | true 163 | 164 | 165 | Console 166 | true 167 | true 168 | false 169 | ws2_32.lib;wldap32.lib;advapi32.lib;crypt32.lib;Normaliz.lib 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | -------------------------------------------------------------------------------- /winbuild/vcpkg-configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "default-registry": { 3 | "kind": "git", 4 | "baseline": "43401f5835f97f48180724bdeb49a8e4a994b848", 5 | "repository": "https://github.com/microsoft/vcpkg" 6 | }, 7 | "registries": [ 8 | { 9 | "kind": "artifact", 10 | "location": "https://aka.ms/vcpkg-ce-default", 11 | "name": "microsoft" 12 | } 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /winbuild/vcpkg.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "trurl", 3 | "version": "0.x", 4 | "dependencies": [ 5 | "curl" 6 | ] 7 | } 8 | --------------------------------------------------------------------------------