├── .dockerignore ├── .editorconfig ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── Dockerfile ├── LICENSE.md ├── META.json ├── Makefile ├── README.md ├── ci └── pg_hba.conf ├── debian ├── changelog ├── compat ├── control.in ├── copyright ├── gitlab-ci.yml ├── pgversions ├── rules ├── source │ └── format ├── tests │ ├── control │ └── installcheck └── watch ├── expected └── gzip.out ├── gzip--1.0.sql ├── gzip.control ├── pg_gzip.c └── sql └── gzip.sql /.dockerignore: -------------------------------------------------------------------------------- 1 | # Nothing should be passed to the docker build 2 | * 3 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | # these are the defaults 7 | [*] 8 | charset = utf-8 9 | end_of_line = lf 10 | trim_trailing_whitespace = true 11 | insert_final_newline = true 12 | 13 | # C files want tab indentation 14 | [*.{c,h}] 15 | indent_style = tab 16 | 17 | # YAML, MD files want space indentation 18 | [*.{yml,md}] 19 | indent_style = space 20 | indent_size = 4 21 | 22 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # GitHub Actions for PostGIS 2 | # 3 | # Paul Ramsey 4 | 5 | name: "CI" 6 | on: [push, pull_request] 7 | 8 | jobs: 9 | linux: 10 | 11 | runs-on: ubuntu-latest 12 | 13 | name: "CI" 14 | strategy: 15 | matrix: 16 | ci: 17 | - { PGVER: 12 } 18 | - { PGVER: 13 } 19 | - { PGVER: 14 } 20 | - { PGVER: 15 } 21 | 22 | steps: 23 | 24 | - name: 'Check Out' 25 | uses: actions/checkout@v3 26 | 27 | - name: 'Install PostgreSQL' 28 | run: | 29 | sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg-snapshot main ${{ matrix.ci.PGVER }}" > /etc/apt/sources.list.d/pgdg.list' 30 | curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/apt.postgresql.org.gpg >/dev/null 31 | sudo apt-get update 32 | sudo apt-get -y install postgresql-${{ matrix.ci.PGVER }} postgresql-server-dev-${{ matrix.ci.PGVER }} 33 | 34 | - name: 'Start PostgreSQL' 35 | run: | 36 | export PGVER=${{ matrix.ci.PGVER }} 37 | export PGDATA=/var/lib/postgresql/$PGVER/main 38 | export PGETC=/etc/postgresql/$PGVER/main 39 | export PGBIN=/usr/lib/postgresql/$PGVER/bin 40 | sudo cp ./ci/pg_hba.conf $PGETC/pg_hba.conf 41 | sudo su postgres -c "$PGBIN/pg_ctl --pgdata $PGDATA start -o '-c config_file=$PGETC/postgresql.conf -p 5432'" 42 | 43 | - name: 'Build & Test' 44 | run: | 45 | export PGVER=${{ matrix.ci.PGVER }} 46 | export PGBIN=/usr/lib/postgresql/$PGVER/bin 47 | export PATH=$PGBIN:$PATH 48 | export PG_CFLAGS=-Werror 49 | export PG_CONFIG=$PGBIN/pg_config 50 | make 51 | sudo -E make PG_CONFIG=$PG_CONFIG install 52 | PGUSER=postgres make installcheck || (cat regression.diffs && /bin/false) 53 | 54 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.a 2 | *.bc 3 | *.dll 4 | *.dylib 5 | *.o 6 | *.obj 7 | *.pc 8 | *.so 9 | build* 10 | debian/.debhelper/ 11 | debian/control 12 | debian/files 13 | debian/postgresql-* 14 | log/ 15 | output_iso/ 16 | regression.diffs 17 | regression.out 18 | results/ 19 | target/ 20 | tmp_check/ 21 | tmp_check_iso/ 22 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # This argument will usually be set by the Makefile. Override example: 2 | # make deb-docker base=debian:latest 3 | ARG BASE_IMAGE=debian:sid 4 | FROM ${BASE_IMAGE} 5 | 6 | ARG BASE_IMAGE 7 | RUN set -eux ;\ 8 | echo "Using BASE_IMAGE=$BASE_IMAGE" ;\ 9 | DEBIAN_FRONTEND=noninteractive apt-get update -qq ;\ 10 | DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 11 | build-essential \ 12 | fakeroot \ 13 | pkg-config \ 14 | debhelper \ 15 | devscripts \ 16 | zlib1g-dev \ 17 | postgresql-server-dev-all 18 | 19 | WORKDIR /build/pgsql-gzip 20 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (C) 2019 Paul Ramsey 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /META.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "gzip", 3 | "abstract": "gzip compress/decompress functions", 4 | "description": "compress a bytea to a compressed bytea or decompress same", 5 | "version": "1.0.0", 6 | "maintainer": [ 7 | "Paul Ramsey " 8 | ], 9 | "license": { 10 | "mit": "http://en.wikipedia.org/wiki/MIT_License" 11 | }, 12 | "prereqs": { 13 | "runtime": { 14 | "requires": { 15 | "PostgreSQL": "9.1.0" 16 | } 17 | } 18 | }, 19 | "provides": { 20 | "gip": { 21 | "file": "gip--1.0.sql", 22 | "docfile": "README.md", 23 | "version": "1.0.0", 24 | "abstract": "gzip functions" 25 | } 26 | }, 27 | "resources": { 28 | "homepage": "https://github.com/pramsey/pgsql-gzip/", 29 | "bugtracker": { 30 | "web": "https://github.com/pramsey/pgsql-gzip/issues" 31 | }, 32 | "repository": { 33 | "url": "https://github.com/pramsey/pgsql-gzip.git", 34 | "web": "https://github.com/pramsey/pgsql-gzip/", 35 | "type": "git" 36 | } 37 | }, 38 | "generated_by": "Paul Ramsey", 39 | "meta-spec": { 40 | "version": "1.0.0", 41 | "url": "http://pgxn.org/meta/spec.txt" 42 | }, 43 | "tags": [ 44 | "gzip", 45 | "zlib", 46 | "libz", 47 | "gunzip" 48 | ] 49 | } 50 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Make sure we do not run any code when using deb-* target 2 | ifeq (,$(findstring deb-,$(MAKECMDGOALS))) 3 | 4 | # Detect pkg-config on the path 5 | PKGCONFIG := $(shell type -p pkg-config || echo NONE) 6 | 7 | ifeq ($(PKGCONFIG), NONE) 8 | # Hard code paths if necessary 9 | ZLIB_PATH = /usr 10 | ZLIB_INC = -I$(ZLIB_PATH)/include 11 | ZLIB_LIB = -L$(ZLIB_PATH)/lib -lz 12 | else 13 | # Use pkg-config to detect zlib if possible 14 | ZLIB_INC = $(shell pkg-config zlib --cflags) 15 | ZLIB_LIB = $(shell pkg-config zlib --libs) 16 | endif 17 | 18 | #DEBUG = 1 19 | 20 | # These should not require modification 21 | MODULE_big = gzip 22 | OBJS = pg_gzip.o 23 | EXTENSION = gzip 24 | DATA = gzip--1.0.sql 25 | REGRESS = gzip 26 | EXTRA_CLEAN = 27 | 28 | PG_CONFIG = pg_config 29 | 30 | CFLAGS += $(ZLIB_INC) 31 | LIBS += $(ZLIB_LIB) 32 | SHLIB_LINK := $(LIBS) 33 | 34 | ifdef DEBUG 35 | COPT += -O0 -g 36 | endif 37 | 38 | PGXS := $(shell $(PG_CONFIG) --pgxs) 39 | include $(PGXS) 40 | 41 | endif 42 | 43 | 44 | .PHONY: deb 45 | deb: clean 46 | pg_buildext updatecontrol 47 | dpkg-buildpackage -B 48 | 49 | # Name of the base Docker image to use. Uses debian:sid by default 50 | base ?= debian:sid 51 | 52 | .PHONY: deb-docker 53 | deb-docker: 54 | @echo "*** Using base=$(base)" 55 | docker build "--build-arg=BASE_IMAGE=$(base)" -t pgsql-gzip-$(base) . 56 | # Create a temp dir that we will remove later. Otherwise docker will create a root-owned dir. 57 | mkdir -p "$$(pwd)/target/pgsql-gzip" 58 | docker run --rm -ti -u $$(id -u $${USER}):$$(id -g $${USER}) -v "$$(pwd)/target:/build" -v "$$(pwd):/build/pgsql-gzip" pgsql-gzip-$(base) make deb 59 | rmdir "$$(pwd)/target/pgsql-gzip" || true 60 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://github.com/pramsey/pgsql-gzip/actions/workflows/ci.yml/badge.svg)](https://github.com/pramsey/pgsql-gzip/actions/workflows/ci.yml) 2 | 3 | # PostgreSQL gzip/gunzip Functions 4 | 5 | ## Motivation 6 | 7 | Sometimes you just need to compress your `bytea` object before you return it to the client. 8 | 9 | Sometimes you receive a compressed `bytea` from the client, and you have to uncompress it before you can work with it. 10 | 11 | This extension is for that. 12 | 13 | This extension is **not** for storage compression. PostgreSQL already does [tuple compression](https://www.postgresql.org/docs/current/storage-toast.html) on the fly if your tuple gets large enough, manually pre-compressing your data using this function won't make things smaller. 14 | 15 | ## Examples 16 | 17 | > SELECT gzip('this is my this is my this is my this is my text'); 18 | 19 | gzip 20 | -------------------------------------------------------------------------- 21 | \x1f8b08000000000000132bc9c82c5600a2dc4a851282ccd48a12002e7a22ff30000000 22 | 23 | Wait, what, the compressed output is longer?!? No, it only **looks** that way, because in hex every byte is represented with two hex digits. The original string looks like this in hex: 24 | 25 | > SELECT 'this is my this is my this is my this is my text'::bytea; 26 | 27 | bytea 28 | ---------------------------------------------------------------------------------------------------- 29 | \x74686973206973206d792074686973206973206d792074686973206973206d792074686973206973206d792074657874 30 | 31 | For really long, repetitive things, compression naturally works like a charm: 32 | 33 | > SELECT gzip(repeat('this is my ', 100)); 34 | 35 | bytea 36 | ---------------------------------------------------------------------------------------------------- 37 | \x1f8b08000000000000132bc9c82c5600a2dc4a859251e628739439ca24970900d1341c5c4c040000 38 | 39 | To convert a `bytea` back into an equivalent `text` you must use the `encode()` function with the `escape` encoding. 40 | 41 | > SELECT encode('test text'::bytea, 'escape'); 42 | encode 43 | ----------- 44 | test text 45 | 46 | > SELECT encode(gunzip(gzip('this text has been compressed and then decompressed')), 'escape') 47 | 48 | encode 49 | ----------------------------------------------------- 50 | this text has been compressed and then decompressed 51 | 52 | 53 | ## Functions 54 | 55 | * `gzip(uncompressed BYTEA, [compression_level INTEGER])` returns `BYTEA` 56 | * `gzip(uncompressed TEXT, [compression_level INTEGER])` returns `BYTEA` 57 | * `gunzip(compressed BYTEA)` returns `BYTEA` 58 | 59 | 60 | ## Installation 61 | 62 | ### UNIX 63 | 64 | If you have PostgreSQL devel packages (`postgresql-server-dev-all`) and zlib (`zlib1g-dev`) installed, you should have `pg_config` on your path, so you should be able to just run `make`, then `make install`, then in your database `CREATE EXTENSION gzip`. 65 | 66 | If your `libz` is installed in a non-standard location, you may need to edit `ZLIB_PATH` in the `Makefile`. 67 | 68 | ### Debain/Ubuntu 69 | 70 | ```bash 71 | sudo apt-get install build-essential zlib1g-dev postgresql-server-dev-all pkg-config 72 | make 73 | make install 74 | psql ... -c "CREATE EXTENSION gzip" 75 | ``` 76 | 77 | To build the DEB package you will also need `fakeroot` and `devscripts`. See [Dockerfile](./Dockerfile) for a full list. 78 | 79 | ```bash 80 | sudo apt-get install build-essential zlib1g-dev postgresql-server-dev-all pkg-config fakeroot devscripts 81 | make 82 | make deb 83 | dpkg -i .deb 84 | ``` 85 | 86 | And you will be able to run the `make deb` and get the packege wich can be installed with `` 87 | 88 | 89 | #### deb package build using Docker 90 | 91 | Makefile has targets for building the DEB package using Docker image with different base images. This approach only requires `make` and `docker` to be available on the host. 92 | 93 | ```bash 94 | make deb-latest # Uses debian:sid 95 | ``` 96 | 97 | To build an image using a different base, supply it with a parameter: 98 | 99 | ```bash 100 | make deb-docker base=debian:latest 101 | ``` 102 | -------------------------------------------------------------------------------- /ci/pg_hba.conf: -------------------------------------------------------------------------------- 1 | # TYPE DATABASE USER ADDRESS METHOD 2 | 3 | # "local" is for Unix domain socket connections only 4 | local all postgres trust 5 | # IPv4 local connections: 6 | host all postgres 127.0.0.1/32 trust 7 | # IPv6 local connections: 8 | host all postgres ::1/128 trust 9 | -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | pgsql-gzip (1.0.1) unstable; urgency=medium 2 | * DEB package cleanup, docker build improvements, readme updates [Yuri Astrakhan ] 3 | -- Yuri Astrakhan Tue, 21 Apr 2020 22:21:00 +0000 4 | 5 | pgsql-gzip (1.0.0) unstable; urgency=medium 6 | * Initial release of the DEB package 7 | -- Oleksandr Kylymnychenko Fri, 20 Mar 2020 13:18:29 +0000 8 | -------------------------------------------------------------------------------- /debian/compat: -------------------------------------------------------------------------------- 1 | 9 2 | -------------------------------------------------------------------------------- /debian/control.in: -------------------------------------------------------------------------------- 1 | Source: pgsql-gzip 2 | Section: database 3 | Priority: optional 4 | Maintainer: Paul Ramsey 5 | Uploaders: Oleksandr Kylymnychenko , 6 | Build-Depends: debhelper (>= 9), postgresql-server-dev-all (>= 153~) 7 | Standards-Version: 4.5.0 8 | Vcs-Browser: https://github.com/pramsey/pgsql-gzip 9 | Vcs-Git: https://github.com/pramsey/pgsql-gzip.git 10 | 11 | Package: postgresql-PGVERSION-gzip 12 | Architecture: any 13 | Depends: postgresql-PGVERSION, ${misc:Depends}, ${shlibs:Depends} 14 | Description: PostgreSQL extension code 15 | Sometimes you just need to compress your bytea object before you return it to the client. 16 | Sometimes you receive a compressed bytea from the client, and you have to uncompress it before you can work with it. 17 | This extension is for that. 18 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Upstream-Name: pgsql-gzip 3 | #Source: @URL@ 4 | 5 | Files: * 6 | Copyright: Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group 7 | Portions Copyright (c) 1994, The Regents of the University of California 8 | License: PostgreSQL 9 | Permission to use, copy, modify, and distribute this software and its 10 | documentation for any purpose, without fee, and without a written agreement 11 | is hereby granted, provided that the above copyright notice and this 12 | paragraph and the following two paragraphs appear in all copies. 13 | . 14 | IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR 15 | DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING 16 | LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS 17 | DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE 18 | POSSIBILITY OF SUCH DAMAGE. 19 | . 20 | THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 21 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 22 | AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 23 | ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO 24 | PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 25 | -------------------------------------------------------------------------------- /debian/gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | include: https://salsa.debian.org/postgresql/postgresql-common/raw/master/gitlab/gitlab-ci.yml 2 | -------------------------------------------------------------------------------- /debian/pgversions: -------------------------------------------------------------------------------- 1 | all 2 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | # Makefile only uses -Wno-maybe-uninitialized (needed on Ubuntu ppc64el) if CC=gcc 4 | export CC = gcc 5 | 6 | include /usr/share/postgresql-common/pgxs_debian_control.mk 7 | 8 | override_dh_auto_build: 9 | 10 | override_dh_auto_test: 11 | 12 | override_dh_auto_install: 13 | +pg_buildext loop postgresql-%v-gzip 14 | 15 | override_dh_installdocs: 16 | dh_installdocs --all README.* 17 | 18 | %: 19 | dh $@ 20 | -------------------------------------------------------------------------------- /debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (quilt) 2 | -------------------------------------------------------------------------------- /debian/tests/control: -------------------------------------------------------------------------------- 1 | Depends: @, postgresql-server-dev-all 2 | Tests: installcheck 3 | Restrictions: allow-stderr 4 | -------------------------------------------------------------------------------- /debian/tests/installcheck: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | pg_buildext installcheck 3 | -------------------------------------------------------------------------------- /debian/watch: -------------------------------------------------------------------------------- 1 | #version=4 2 | #https://github.com/pramsey/pgsql-gzip/releases .*/v(.*).tar.gz 3 | -------------------------------------------------------------------------------- /expected/gzip.out: -------------------------------------------------------------------------------- 1 | CREATE EXTENSION gzip; 2 | SELECT encode(gunzip(gzip('this is my this is my this is my this is my text'::bytea)), 'escape') AS gzip_bytea; 3 | gzip_bytea 4 | -------------------------------------------------- 5 | this is my this is my this is my this is my text 6 | (1 row) 7 | 8 | SELECT encode(gunzip(gzip('this is my this is my this is my this is my text'::text)), 'escape') AS gzip_text; 9 | gzip_text 10 | -------------------------------------------------- 11 | this is my this is my this is my this is my text 12 | (1 row) 13 | 14 | SELECT encode(gunzip(gzip('this is my this is my this is my this is my text')), 'escape') AS gzip_roundtrip; 15 | gzip_roundtrip 16 | -------------------------------------------------- 17 | this is my this is my this is my this is my text 18 | (1 row) 19 | 20 | SELECT gunzip(gzip('\x00000000000000000000'::bytea)) AS gzip_roundtrip_zero; 21 | gzip_roundtrip_zero 22 | ------------------------ 23 | \x00000000000000000000 24 | (1 row) 25 | 26 | WITH str AS ( 27 | SELECT repeat('this is my ', 10000) AS str 28 | ) 29 | SELECT encode(gunzip(gzip(str)), 'escape') = str AS gzip_long FROM str; 30 | gzip_long 31 | ----------- 32 | t 33 | (1 row) 34 | 35 | WITH strs AS ( 36 | SELECT repeat('pack my box with five dozen liquor jugs ', generate_series(0, 1000)) AS str 37 | ) 38 | SELECT sum((str = encode(gunzip(gzip(str)), 'escape'))::integer) AS gzip_sizes 39 | FROM strs; 40 | gzip_sizes 41 | ------------ 42 | 1001 43 | (1 row) 44 | 45 | SELECT gzip(NULL) AS gzip_null; 46 | gzip_null 47 | ----------- 48 | 49 | (1 row) 50 | 51 | SELECT gunzip(gzip('')) AS gzip_empty; 52 | gzip_empty 53 | ------------ 54 | \x 55 | (1 row) 56 | 57 | SELECT gunzip(gzip('\x00'::bytea)) AS gzip_zero; 58 | gzip_zero 59 | ----------- 60 | \x00 61 | (1 row) 62 | 63 | SELECT gunzip(NULL) AS gunzip_null; 64 | gunzip_null 65 | ------------- 66 | 67 | (1 row) 68 | 69 | SELECT gunzip('') AS gunzip_empty; 70 | ERROR: decompression error: 71 | SELECT gunzip('\x00'::bytea) AS gunzip_zero; 72 | ERROR: decompression error: 73 | SELECT gunzip('\x0000'::bytea) AS gunzip_zerozero; 74 | ERROR: decompression error: unknown compression method 75 | SELECT gunzip('not a gzip bytea'::bytea) AS gunzip_invalid; 76 | ERROR: decompression error: incorrect header check 77 | -------------------------------------------------------------------------------- /gzip--1.0.sql: -------------------------------------------------------------------------------- 1 | 2 | CREATE OR REPLACE FUNCTION gzip(uncompressed bytea, compression_level integer default -1) 3 | RETURNS bytea 4 | AS 'MODULE_PATHNAME', 'pg_gzip' 5 | LANGUAGE 'c' 6 | IMMUTABLE STRICT 7 | PARALLEL SAFE; 8 | 9 | CREATE OR REPLACE FUNCTION gzip(uncompressed text, compression_level integer default -1) 10 | RETURNS bytea 11 | AS 'MODULE_PATHNAME', 'pg_gzip' 12 | LANGUAGE 'c' 13 | IMMUTABLE STRICT 14 | PARALLEL SAFE; 15 | 16 | CREATE OR REPLACE FUNCTION gunzip(compressed bytea) 17 | RETURNS bytea 18 | AS 'MODULE_PATHNAME', 'pg_gunzip' 19 | LANGUAGE 'c' 20 | IMMUTABLE STRICT 21 | PARALLEL SAFE; 22 | -------------------------------------------------------------------------------- /gzip.control: -------------------------------------------------------------------------------- 1 | default_version = '1.0' 2 | module_pathname = '$libdir/gzip' 3 | relocatable = true 4 | comment = 'gzip and gunzip functions.' 5 | -------------------------------------------------------------------------------- /pg_gzip.c: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | * 3 | * Project: PgSQL gzip/gunzip 4 | * Purpose: Main file. 5 | * 6 | *********************************************************************** 7 | * Copyright 2019 Paul Ramsey 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a 10 | * copy of this software and associated documentation files (the 11 | * "Software"), to deal in the Software without restriction, including 12 | * without limitation the rights to use, copy, modify, merge, publish, 13 | * distribute, sublicense, and/or sell copies of the Software, and to 14 | * permit persons to whom the Software is furnished to do so, subject to 15 | * the following conditions: 16 | * 17 | * The above copyright notice and this permission notice shall be included 18 | * in all copies or substantial portions of the Software. 19 | * 20 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 21 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 22 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 23 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 24 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 27 | * 28 | ***********************************************************************/ 29 | 30 | /* Constants */ 31 | #define ZCHUNK 262144 /* 256K */ 32 | 33 | /* System */ 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | /* PostgreSQL */ 40 | #include 41 | #include 42 | #include 43 | 44 | /* LibZ */ 45 | #include 46 | 47 | /* Set up PgSQL */ 48 | PG_MODULE_MAGIC; 49 | 50 | /** 51 | * Wrap palloc in a signature that matches what zalloc expects 52 | */ 53 | static void* 54 | pg_gzip_alloc(void* opaque, unsigned int items, unsigned int itemsize) 55 | { 56 | return palloc(items * itemsize); 57 | } 58 | 59 | /** 60 | * Wrap pfree in a signature that matches what zfree expects 61 | */ 62 | static void 63 | pg_gzip_free(void* opaque, void* ptr) 64 | { 65 | pfree(ptr); 66 | return; 67 | } 68 | 69 | /* Zlib defines */ 70 | #define WINDOW_BITS 15 71 | #define ENABLE_ZLIB_GZIP 32 72 | #define GZIP_ENCODING 16 73 | 74 | /** 75 | * gzip an uncompressed bytea 76 | */ 77 | Datum pg_gzip(PG_FUNCTION_ARGS); 78 | PG_FUNCTION_INFO_V1(pg_gzip); 79 | Datum pg_gzip(PG_FUNCTION_ARGS) 80 | { 81 | StringInfoData si; 82 | int zs_rv; 83 | z_stream zs; 84 | uint8 out[ZCHUNK]; 85 | bytea* compressed; 86 | 87 | bytea* uncompressed = PG_GETARG_BYTEA_P(0); 88 | int32 compression_level = PG_GETARG_INT32(1); 89 | uint8* in = (uint8*)(VARDATA(uncompressed)); 90 | size_t in_size = VARSIZE_ANY_EXHDR(uncompressed); 91 | 92 | /* compression level -1 is default best effort (approx 6) */ 93 | /* level 0 is no compression, 1-9 are lowest to highest */ 94 | if (compression_level < -1 || compression_level > 9) 95 | elog(ERROR, "invalid compression level: %d", compression_level); 96 | 97 | /* Prepare the z_stream state */ 98 | zs.zalloc = pg_gzip_alloc; 99 | zs.zfree = pg_gzip_free; 100 | zs.opaque = Z_NULL; 101 | zs.next_in = in; 102 | zs.avail_in = in_size; 103 | 104 | if (deflateInit2(&zs, 105 | compression_level, Z_DEFLATED, 106 | WINDOW_BITS|GZIP_ENCODING, /* Magic to initialize in gzip mode */ 107 | 8, Z_DEFAULT_STRATEGY) != Z_OK) 108 | elog(ERROR, "failed to deflateInit2"); 109 | 110 | zs.next_out = out; 111 | zs.avail_out = ZCHUNK; 112 | 113 | /* Compress until deflate stops returning output */ 114 | initStringInfo(&si); 115 | zs_rv = Z_OK; 116 | while (zs_rv == Z_OK) 117 | { 118 | if (zs.avail_out == 0) 119 | { 120 | /* build up output in stringinfo */ 121 | appendBinaryStringInfo(&si, (char*)out, ZCHUNK); 122 | zs.avail_out = ZCHUNK; 123 | zs.next_out = out; 124 | } 125 | zs_rv = deflate(&zs, Z_FINISH); 126 | } 127 | if (zs_rv != Z_STREAM_END) 128 | elog(ERROR, "compression error: %s", zs.msg ? zs.msg : ""); 129 | 130 | appendBinaryStringInfo(&si, (char*)out, ZCHUNK - zs.avail_out); 131 | 132 | /* Construct output bytea */ 133 | compressed = palloc(si.len + VARHDRSZ); 134 | memcpy(VARDATA(compressed), si.data, si.len); 135 | SET_VARSIZE(compressed, si.len + VARHDRSZ); 136 | PG_FREE_IF_COPY(uncompressed, 0); 137 | PG_RETURN_POINTER(compressed); 138 | } 139 | 140 | 141 | Datum pg_gunzip(PG_FUNCTION_ARGS); 142 | PG_FUNCTION_INFO_V1(pg_gunzip); 143 | Datum pg_gunzip(PG_FUNCTION_ARGS) 144 | { 145 | StringInfoData si; 146 | int zs_rv; 147 | z_stream zs; 148 | uint8 out[ZCHUNK]; 149 | bytea* uncompressed; 150 | 151 | bytea* compressed = PG_GETARG_BYTEA_P(0); 152 | uint8* in = (uint8*)(VARDATA(compressed)); 153 | size_t in_size = VARSIZE_ANY_EXHDR(compressed); 154 | 155 | /* Prepare the z_stream state */ 156 | zs.zalloc = pg_gzip_alloc; 157 | zs.zfree = pg_gzip_free; 158 | zs.opaque = Z_NULL; 159 | /* Magic to initialize in gzip mode */ 160 | if (inflateInit2(&zs, WINDOW_BITS|ENABLE_ZLIB_GZIP) != Z_OK) 161 | elog(ERROR, "failed to inflateInit"); 162 | 163 | /* Point z_stream to input and output buffers */ 164 | zs.next_in = in; 165 | zs.avail_in = in_size; 166 | zs.next_out = out; 167 | zs.avail_out = ZCHUNK; 168 | 169 | /* Decompress until inflate stops returning output */ 170 | initStringInfo(&si); 171 | zs_rv = Z_OK; 172 | while (zs_rv == Z_OK) 173 | { 174 | if (zs.avail_out == 0) 175 | { 176 | /* build up output in stringinfo */ 177 | appendBinaryStringInfo(&si, (char*)out, ZCHUNK); 178 | zs.avail_out = ZCHUNK; 179 | zs.next_out = out; 180 | } 181 | zs_rv = inflate(&zs, Z_SYNC_FLUSH); 182 | } 183 | 184 | if (zs_rv != Z_STREAM_END) 185 | elog(ERROR, "decompression error: %s", zs.msg ? zs.msg : ""); 186 | 187 | appendBinaryStringInfo(&si, (char*)out, ZCHUNK - zs.avail_out); 188 | 189 | /* Construct output bytea */ 190 | uncompressed = palloc(si.len + VARHDRSZ); 191 | memcpy(VARDATA(uncompressed), si.data, si.len); 192 | SET_VARSIZE(uncompressed, si.len + VARHDRSZ); 193 | PG_FREE_IF_COPY(compressed, 0); 194 | PG_RETURN_POINTER(uncompressed); 195 | } 196 | -------------------------------------------------------------------------------- /sql/gzip.sql: -------------------------------------------------------------------------------- 1 | CREATE EXTENSION gzip; 2 | 3 | SELECT encode(gunzip(gzip('this is my this is my this is my this is my text'::bytea)), 'escape') AS gzip_bytea; 4 | SELECT encode(gunzip(gzip('this is my this is my this is my this is my text'::text)), 'escape') AS gzip_text; 5 | SELECT encode(gunzip(gzip('this is my this is my this is my this is my text')), 'escape') AS gzip_roundtrip; 6 | SELECT gunzip(gzip('\x00000000000000000000'::bytea)) AS gzip_roundtrip_zero; 7 | 8 | WITH str AS ( 9 | SELECT repeat('this is my ', 10000) AS str 10 | ) 11 | SELECT encode(gunzip(gzip(str)), 'escape') = str AS gzip_long FROM str; 12 | 13 | WITH strs AS ( 14 | SELECT repeat('pack my box with five dozen liquor jugs ', generate_series(0, 1000)) AS str 15 | ) 16 | SELECT sum((str = encode(gunzip(gzip(str)), 'escape'))::integer) AS gzip_sizes 17 | FROM strs; 18 | 19 | SELECT gzip(NULL) AS gzip_null; 20 | SELECT gunzip(gzip('')) AS gzip_empty; 21 | SELECT gunzip(gzip('\x00'::bytea)) AS gzip_zero; 22 | 23 | SELECT gunzip(NULL) AS gunzip_null; 24 | SELECT gunzip('') AS gunzip_empty; 25 | SELECT gunzip('\x00'::bytea) AS gunzip_zero; 26 | SELECT gunzip('\x0000'::bytea) AS gunzip_zerozero; 27 | SELECT gunzip('not a gzip bytea'::bytea) AS gunzip_invalid; 28 | 29 | --------------------------------------------------------------------------------