├── Makefile
├── cmd
    └── unipdf
    │   └── main.go
├── LICENSE
├── LICENSE.md
├── internal
    └── cli
    │   ├── form.go
    │   ├── extract.go
    │   ├── version.go
    │   ├── merge.go
    │   ├── license_info.go
    │   ├── decrypt.go
    │   ├── search.go
    │   ├── split.go
    │   ├── passwd.go
    │   ├── extract_text.go
    │   ├── organize.go
    │   ├── root.go
    │   ├── replace.go
    │   ├── info.go
    │   ├── form_export.go
    │   ├── const.go
    │   ├── grayscale.go
    │   ├── explode.go
    │   ├── watermark.go
    │   ├── rotate.go
    │   ├── extract_images.go
    │   ├── encrypt.go
    │   ├── render.go
    │   ├── form_flatten.go
    │   ├── form_fdfmerge.go
    │   ├── form_fill.go
    │   ├── optimize.go
    │   └── utils.go
├── .gitignore
├── pkg
    └── pdf
    │   ├── version.go
    │   ├── decrypt.go
    │   ├── split.go
    │   ├── passwd.go
    │   ├── search.go
    │   ├── encrypt.go
    │   ├── pdf.go
    │   ├── explode.go
    │   ├── watermark.go
    │   ├── info.go
    │   ├── rotate.go
    │   ├── optimize.go
    │   ├── render.go
    │   ├── form.go
    │   ├── organize.go
    │   ├── extract.go
    │   ├── utils.go
    │   ├── merge.go
    │   ├── replace.go
    │   └── grayscale.go
├── .goreleaser.yml
├── .golangci.yml
├── .github
    └── workflows
    │   └── build.yml
├── go.mod
├── go.sum
└── README.md


/Makefile:
--------------------------------------------------------------------------------
 1 | all: build
 2 | build:
 3 | 	GO111MODULE=on go build -o ./bin/unipdf ./cmd/unipdf/main.go
 4 | build-all:
 5 | 	goreleaser --snapshot --skip-publish --rm-dist
 6 | release:
 7 | 	goreleaser release
 8 | clean:
 9 | 	rm -rf ./bin
10 | 


--------------------------------------------------------------------------------
/cmd/unipdf/main.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package main
 7 | 
 8 | import (
 9 | 	"github.com/unidoc/unipdf-cli/internal/cli"
10 | )
11 | 
12 | func main() {
13 | 	cli.Execute()
14 | }
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | ## Licensing Information
 2 | 
 3 | This software package is a commercial product and requires a license
 4 | code to operate.
 5 | 
 6 | The use of this software package is governed by the end-user license agreement
 7 | (EULA) available at: https://unidoc.io/eula/
 8 | 
 9 | To get a free metered code to evaluate the software, please visit
10 | https://unidoc.io/
11 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | ## Licensing Information
 2 | 
 3 | This software package is a commercial product and requires a license
 4 | code to operate.
 5 | 
 6 | The use of this software package is governed by the end-user license agreement
 7 | (EULA) available at: https://unidoc.io/eula/
 8 | 
 9 | To get a free metered code to evaluate the software, please visit
10 | https://unidoc.io/
11 | 


--------------------------------------------------------------------------------
/internal/cli/form.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"github.com/spf13/cobra"
10 | )
11 | 
12 | const formCmdDesc = `PDF form operations.`
13 | 
14 | // formCmd represents the form command.
15 | var formCmd = &cobra.Command{
16 | 	Use:   "form [FLAG]... COMMAND",
17 | 	Short: "PDF form operations",
18 | 	Long:  formCmdDesc,
19 | }
20 | 
21 | func init() {
22 | 	rootCmd.AddCommand(formCmd)
23 | }
24 | 


--------------------------------------------------------------------------------
/internal/cli/extract.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"github.com/spf13/cobra"
10 | )
11 | 
12 | const extractCmdDesc = `Extract PDF resources.`
13 | 
14 | // extractCmd represents the extract command.
15 | var extractCmd = &cobra.Command{
16 | 	Use:   "extract [FLAG]... COMMAND",
17 | 	Short: "Extract PDF resources",
18 | 	Long:  extractCmdDesc,
19 | }
20 | 
21 | func init() {
22 | 	rootCmd.AddCommand(extractCmd)
23 | }
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | cmd/unipdf/unipdf
 2 | *.pdf
 3 | *.fdf
 4 | *.zip
 5 | *.gox
 6 | .idea
 7 | *.mdb
 8 | *.userprefs
 9 | *.pidb
10 | *.suo
11 | *.out
12 | *.pyc
13 | *.wixobj
14 | *.msi
15 | *.wixpdb
16 | build
17 | dist
18 | setuptools-*
19 | .DS_Store
20 | *.so
21 | nohup.out
22 | *.orig
23 | *.rej
24 | *~
25 | *.o
26 | *.pyo
27 | tests/*.err
28 | *.swp
29 | *.swo
30 | store/*
31 | *.log
32 | *.egg-info
33 | dist/
34 | doc/_build/
35 | distribute-*
36 | pip-log.txt
37 | .coverage
38 | data/
39 | *.egg
40 | .tox
41 | out.txt
42 | junit*.xml
43 | .ropeproject
44 | .cache
45 | tmp/
46 | bin/
47 | *.sublime-project
48 | *.sublime-workspace
49 | *.pprof
50 | gin-bin
51 | pkg/buildinfo/buildinfo.go
52 | temp/
53 | buildinfo.json
54 | pdf/font.go
55 | fuzz.go
56 | 


--------------------------------------------------------------------------------
/pkg/pdf/version.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package pdf
 7 | 
 8 | import (
 9 | 	unicommon "github.com/unidoc/unipdf/v4/common"
10 | 	unilicense "github.com/unidoc/unipdf/v4/common/license"
11 | )
12 | 
13 | // VersionInfo contains version and license information
14 | // about the Unidoc library.
15 | type VersionInfo struct {
16 | 	Lib     string
17 | 	License string
18 | }
19 | 
20 | // Version returns version and license information about the Unidoc library.
21 | func Version() VersionInfo {
22 | 	var license string
23 | 	if key := unilicense.GetLicenseKey(); key != nil {
24 | 		license = key.ToString()
25 | 	}
26 | 
27 | 	return VersionInfo{
28 | 		Lib:     unicommon.Version,
29 | 		License: license,
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/pkg/pdf/decrypt.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package pdf
 7 | 
 8 | import unipdf "github.com/unidoc/unipdf/v4/model"
 9 | 
10 | // Decrypt decrypts the PDF file specified by the inputPath parameter,
11 | // using the specified password and saves the result to the destination
12 | // specified by the outputPath parameter.
13 | func Decrypt(inputPath, outputPath, password string) error {
14 | 	// Read input file.
15 | 	r, _, _, _, err := readPDF(inputPath, password)
16 | 	if err != nil {
17 | 		return err
18 | 	}
19 | 
20 | 	// Copy input file contents.
21 | 	w := unipdf.NewPdfWriter()
22 | 	if err := readerToWriter(r, &w, nil); err != nil {
23 | 		return err
24 | 	}
25 | 
26 | 	// Save output file.
27 | 	safe := inputPath == outputPath
28 | 	return writePDF(outputPath, &w, safe)
29 | }
30 | 


--------------------------------------------------------------------------------
/internal/cli/version.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"fmt"
10 | 
11 | 	"github.com/spf13/cobra"
12 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
13 | )
14 | 
15 | var versionCmdExample = fmt.Sprintf("%s\n",
16 | 	fmt.Sprintf("%s version", appName),
17 | )
18 | 
19 | // versionCmd represents the version command.
20 | var versionCmd = &cobra.Command{
21 | 	Use:                   "version",
22 | 	Short:                 "Output version information and exit",
23 | 	Example:               versionCmdExample,
24 | 	DisableFlagsInUseLine: true,
25 | 	Run: func(_ *cobra.Command, _ []string) {
26 | 		version := pdf.Version()
27 | 
28 | 		fmt.Printf("%s CLI v%s\n", appName, appVersion)
29 | 		fmt.Printf("Powered by unipdf v%s\n", version.Lib)
30 | 		fmt.Printf("\nLicense info\n%s", version.License)
31 | 	},
32 | }
33 | 
34 | func init() {
35 | 	rootCmd.AddCommand(versionCmd)
36 | }
37 | 


--------------------------------------------------------------------------------
/.goreleaser.yml:
--------------------------------------------------------------------------------
 1 | project_name: unipdf-cli
 2 | 
 3 | release:
 4 |   github:
 5 |     owner: unidoc
 6 |     name: unipdf-cli
 7 | before:
 8 |   hooks:
 9 |     - go mod download
10 | builds:
11 | - binary: unipdf
12 |   goos:
13 |   - darwin
14 |   - windows
15 |   - linux
16 |   goarch:
17 |   - amd64
18 |   - 386
19 |   env:
20 |   - CGO_ENABLED=0
21 |   - GO111MODULE=on
22 |   main: ./cmd/unipdf/main.go
23 | archives:
24 |   - id: default
25 |     format: tar.gz
26 |     wrap_in_directory: true
27 |     format_overrides:
28 |     - goos: windows
29 |       format: zip
30 |     name_template: '{{ .Binary }}-{{ .Version }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}'
31 |     files:
32 |     - LICENSE
33 |     - README.md
34 | dist: bin
35 | snapshot:
36 |   name_template: SNAPSHOT-{{ .Commit }}
37 | checksum:
38 |   name_template: '{{ .ProjectName }}-{{ .Version }}-checksums.txt'
39 | changelog:
40 |   sort: asc
41 |   filters:
42 |     exclude:
43 |     - '^docs:'
44 |     - '^test:'
45 |     - '^dev:'
46 |     - 'README'
47 |     - Merge pull request
48 |     - Merge branch
49 | 


--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
 1 | linters:
 2 |   enable-all: false
 3 |   enable:
 4 |     - staticcheck
 5 |     - govet
 6 |     - gosimple
 7 |     - nakedret
 8 |     - typecheck
 9 |     - gosec
10 |     - revive
11 |     - unconvert
12 |     - misspell
13 |   disable:
14 |     - ineffassign
15 |     - errcheck
16 |     - unused
17 |     - lll
18 |     - gofmt
19 |     - goconst
20 | run:
21 |   concurrency: 4
22 |   timeout: 3m
23 |   # Keep exit code 0 until we fix all this, i.e. get the baseline set.
24 |   issues-exit-code: 0
25 |   # Keep this false until we address all the non-test files and set baseline.
26 |   tests: true
27 | 
28 | issues:
29 |   exclude-dirs:
30 |     - testdata
31 | 
32 |   exclude-files:
33 |     - ".*_test.go$"
34 | 
35 |   exclude-rules:
36 |     # We don't control the ciphers as we are just implementing standards.
37 |     - linters:
38 |       - gosec
39 |       text: "weak cryptographic primitive"
40 | 
41 | # output configuration options
42 | output:
43 |   format: colored-line-number
44 |   print-issued-lines: true
45 |   print-linter-name: true
46 |   uniq-by-line: true
47 |   path-prefix: ""
48 | 


--------------------------------------------------------------------------------
/pkg/pdf/split.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package pdf
 7 | 
 8 | import (
 9 | 	unipdf "github.com/unidoc/unipdf/v4/model"
10 | )
11 | 
12 | // Split extracts the provided page list from PDF file specified by the
13 | // inputPath parameter and saves the resulting file at the location
14 | // specified by the outputPath parameter. A password can be passed in for
15 | // encrypted input files.
16 | // If the pages parameter is nil or an empty slice, all the pages of the input
17 | // file are copied to the output file.
18 | func Split(inputPath, outputPath, password string, pages []int) error {
19 | 	// Read input file.
20 | 	r, _, _, _, err := readPDF(inputPath, password)
21 | 	if err != nil {
22 | 		return err
23 | 	}
24 | 
25 | 	// Add selected pages to the writer.
26 | 	w := unipdf.NewPdfWriter()
27 | 	if err = readerToWriter(r, &w, pages); err != nil {
28 | 		return err
29 | 	}
30 | 
31 | 	// Write output file.
32 | 	safe := inputPath == outputPath
33 | 	return writePDF(outputPath, &w, safe)
34 | }
35 | 


--------------------------------------------------------------------------------
/pkg/pdf/passwd.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package pdf
 7 | 
 8 | import (
 9 | 	unipdf "github.com/unidoc/unipdf/v4/model"
10 | )
11 | 
12 | // Passwd changes the owner and user password of an encrypted PDF file.
13 | // The resulting PDF file is saved at the location specified by the outputPath
14 | // parameter.
15 | func Passwd(inputPath, outputPath, ownerPassword, newOwnerPassword, newUserPassword string) error {
16 | 	// Read input file.
17 | 	r, _, _, perms, err := readPDF(inputPath, ownerPassword)
18 | 	if err != nil {
19 | 		return err
20 | 	}
21 | 
22 | 	// Copy input file contents.
23 | 	w := unipdf.NewPdfWriter()
24 | 	if err := readerToWriter(r, &w, nil); err != nil {
25 | 		return err
26 | 	}
27 | 
28 | 	// Encrypt output file.
29 | 	encryptOpts := &unipdf.EncryptOptions{
30 | 		Permissions: perms,
31 | 	}
32 | 
33 | 	err = w.Encrypt([]byte(newUserPassword), []byte(newOwnerPassword), encryptOpts)
34 | 	if err != nil {
35 | 		return err
36 | 	}
37 | 
38 | 	// Save output file.
39 | 	safe := inputPath == outputPath
40 | 	return writePDF(outputPath, &w, safe)
41 | }
42 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   lint:
11 |     name: golangci-lint
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Check out code into the Go module directory
15 |         uses: actions/checkout@v2
16 |       - name: Get golangci-lint
17 |         run: |
18 |           curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s v1.64.6
19 |       - name: Run golangci-lint
20 |         run: |
21 |           ./bin/golangci-lint run --out-format=github-actions --issues-exit-code=1
22 | 
23 |   build:
24 |     name: Build Go ${{ matrix.go }}
25 |     runs-on: ubuntu-latest
26 |     strategy:
27 |       matrix:
28 |         go: ['1.25', '1.24', '1.23']
29 |     env:
30 |       CGO_ENABLED: 0
31 |     steps:
32 |     - name: Check out code into the Go module directory
33 |       uses: actions/checkout@v2
34 | 
35 |     - name: Setup go
36 |       uses: actions/setup-go@v1
37 |       with:
38 |         go-version: ${{ matrix.go }}
39 | 
40 |     - name: Get dependencies
41 |       run: |
42 |         go get -v -t -d ./...
43 | 
44 |     - name: go vet
45 |       run: go vet ./...
46 | 
47 |     - name: Test
48 |       run: go test -tags=test -count=1 ./...
49 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/unidoc/unipdf-cli
 2 | 
 3 | go 1.23.0
 4 | 
 5 | require (
 6 | 	github.com/spf13/cobra v1.9.1
 7 | 	github.com/unidoc/unipdf/v4 v4.3.0
 8 | )
 9 | 
10 | require (
11 | 	github.com/adrg/strutil v0.3.1 // indirect
12 | 	github.com/adrg/sysfont v0.1.2 // indirect
13 | 	github.com/adrg/xdg v0.5.3 // indirect
14 | 	github.com/davecgh/go-spew v1.1.1 // indirect
15 | 	github.com/gorilla/i18n v0.0.0-20150820051429-8b358169da46 // indirect
16 | 	github.com/h2non/filetype v1.1.3 // indirect
17 | 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
18 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
19 | 	github.com/sirupsen/logrus v1.9.3 // indirect
20 | 	github.com/spf13/pflag v1.0.6 // indirect
21 | 	github.com/stretchr/testify v1.10.0 // indirect
22 | 	github.com/unidoc/freetype v0.2.3 // indirect
23 | 	github.com/unidoc/garabic v0.0.0-20220702200334-8c7cb25baa11 // indirect
24 | 	github.com/unidoc/pkcs7 v0.3.0 // indirect
25 | 	github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a // indirect
26 | 	github.com/unidoc/unichart v0.5.1 // indirect
27 | 	github.com/unidoc/unitype v0.5.1 // indirect
28 | 	golang.org/x/crypto v0.41.0 // indirect
29 | 	golang.org/x/image v0.30.0 // indirect
30 | 	golang.org/x/net v0.43.0 // indirect
31 | 	golang.org/x/sys v0.35.0 // indirect
32 | 	golang.org/x/text v0.28.0 // indirect
33 | 	golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
34 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
35 | )
36 | 


--------------------------------------------------------------------------------
/internal/cli/merge.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 
12 | 	"github.com/spf13/cobra"
13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
14 | )
15 | 
16 | const mergeCmdDesc = `Merge the provided input files and save the result to the
17 | specified output file.`
18 | 
19 | var mergeCmdExample = fmt.Sprintf("%s\n",
20 | 	fmt.Sprintf("%s merge output_file.pdf input_file1.pdf input_file2.pdf", appName),
21 | )
22 | 
23 | var mergeCmd = &cobra.Command{
24 | 	Use:                   "merge [FLAG]... OUTPUT_FILE INPUT_FILE...",
25 | 	Short:                 "Merge PDF files",
26 | 	Long:                  mergeCmdDesc,
27 | 	Example:               mergeCmdExample,
28 | 	DisableFlagsInUseLine: true,
29 | 	Run: func(_ *cobra.Command, args []string) {
30 | 		outputPath := args[0]
31 | 		inputPaths := args[1:]
32 | 
33 | 		if err := pdf.Merge(inputPaths, outputPath); err != nil {
34 | 			printErr("Could not merge the input files: %s\n", err)
35 | 		}
36 | 
37 | 		fmt.Printf("Successfully merged input files\n")
38 | 		fmt.Printf("Output file saved to %s\n", outputPath)
39 | 	},
40 | 	Args: func(_ *cobra.Command, args []string) error {
41 | 		if len(args) < 3 {
42 | 			return errors.New("must provide the output file and at least two input files")
43 | 		}
44 | 
45 | 		return nil
46 | 	},
47 | }
48 | 
49 | func init() {
50 | 	// Add current command to parent.
51 | 	rootCmd.AddCommand(mergeCmd)
52 | }
53 | 


--------------------------------------------------------------------------------
/internal/cli/license_info.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"fmt"
10 | 	"os"
11 | 	"strings"
12 | 
13 | 	"github.com/spf13/cobra"
14 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
15 | )
16 | 
17 | const licenseInfoCmdDesc = `Outputs information about the license key.`
18 | 
19 | var licenseInfoCmdExample = strings.Join([]string{
20 | 	fmt.Sprintf("%s license_info", appName),
21 | }, "\n")
22 | 
23 | // licenseInfoCmd represents the license info command.
24 | var licenseInfoCmd = &cobra.Command{
25 | 	Use:                   "license_info",
26 | 	Short:                 "Output license key information",
27 | 	Long:                  licenseInfoCmdDesc,
28 | 	Example:               licenseInfoCmdExample,
29 | 	DisableFlagsInUseLine: true,
30 | 	Run: func(_ *cobra.Command, _ []string) {
31 | 		licenseKey := os.Getenv("UNIDOC_LICENSE_API_KEY")
32 | 		if licenseKey != "" {
33 | 			// To get your free API key for metered license, sign up on: https://cloud.unidoc.io
34 | 			// Make sure to be using UniOffice v1.9.0 or newer for Metered API key support
35 | 			lk := pdf.GetLicenseKey()
36 | 			fmt.Printf("License: %s\n", lk)
37 | 
38 | 			// GetMeteredState freshly checks the state, contacting the licensing server.
39 | 			pdf.GetMeteredState()
40 | 			return
41 | 		}
42 | 
43 | 		licensePath := os.Getenv("UNIDOC_LICENSE_FILE")
44 | 		if licensePath != "" {
45 | 			lk := pdf.GetLicenseKey()
46 | 			fmt.Printf("License: %s\n", lk)
47 | 			return
48 | 		}
49 | 	},
50 | }
51 | 
52 | func init() {
53 | 	rootCmd.AddCommand(licenseInfoCmd)
54 | }
55 | 


--------------------------------------------------------------------------------
/pkg/pdf/search.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package pdf
 7 | 
 8 | import (
 9 | 	"strings"
10 | 
11 | 	uniextractor "github.com/unidoc/unipdf/v4/extractor"
12 | )
13 | 
14 | // SearchResult contains information about a found search term inside a PDF page.
15 | type SearchResult struct {
16 | 	// The page the search term was found on.
17 | 	Page int
18 | 
19 | 	// The number of occurrences of the search term inside the page.
20 | 	Occurrences int
21 | }
22 | 
23 | // Search searches the provided text in the PDF file specified by the inputPath
24 | // parameter. A password can be passed in for encrypted input files.
25 | func Search(inputPath, text, password string) ([]*SearchResult, error) {
26 | 	// Read input file.
27 | 	r, pages, _, _, err := readPDF(inputPath, password)
28 | 	if err != nil {
29 | 		return nil, err
30 | 	}
31 | 
32 | 	// Search specified text.
33 | 	var results []*SearchResult
34 | 	for i := 0; i < pages; i++ {
35 | 		// Get page.
36 | 		numPage := i + 1
37 | 
38 | 		page, err := r.GetPage(numPage)
39 | 		if err != nil {
40 | 			return nil, err
41 | 		}
42 | 
43 | 		// Extract page text.
44 | 		extractor, err := uniextractor.New(page)
45 | 		if err != nil {
46 | 			return nil, err
47 | 		}
48 | 
49 | 		pageText, err := extractor.ExtractText()
50 | 		if err != nil {
51 | 			return nil, err
52 | 		}
53 | 
54 | 		occurrences := strings.Count(pageText, text)
55 | 		if occurrences == 0 {
56 | 			continue
57 | 		}
58 | 
59 | 		results = append(results, &SearchResult{
60 | 			Page:        numPage,
61 | 			Occurrences: occurrences,
62 | 		})
63 | 	}
64 | 
65 | 	return results, nil
66 | }
67 | 


--------------------------------------------------------------------------------
/pkg/pdf/encrypt.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package pdf
 7 | 
 8 | import (
 9 | 	unisecurity "github.com/unidoc/unipdf/v4/core/security"
10 | 	unipdf "github.com/unidoc/unipdf/v4/model"
11 | )
12 | 
13 | // EncryptOpts contains settings for encrypting a PDF file.
14 | type EncryptOpts struct {
15 | 	// OwnerPassword represents the owner password used to encrypt the file.
16 | 	OwnerPassword string
17 | 
18 | 	// UserPassword represents the user password used to encrypt the file.
19 | 	UserPassword string
20 | 
21 | 	// Algorithm represents the encryption algorithm used to encrypt the file.
22 | 	Algorithm unipdf.EncryptionAlgorithm
23 | 
24 | 	// Permissions specifies the operations the user can execute on
25 | 	// the encrypted PDF file.
26 | 	Permissions unisecurity.Permissions
27 | }
28 | 
29 | // Encrypt encrypts the PDF file specified by the inputPath parameter,
30 | // using the specified options and saves the result at the location
31 | // specified by the outputPath parameter.
32 | func Encrypt(inputPath, outputPath string, opts *EncryptOpts) error {
33 | 	// Read input file.
34 | 	r, _, _, _, err := readPDF(inputPath, "")
35 | 	if err != nil {
36 | 		return err
37 | 	}
38 | 
39 | 	// Copy input file contents.
40 | 	w := unipdf.NewPdfWriter()
41 | 	if err := readerToWriter(r, &w, nil); err != nil {
42 | 		return err
43 | 	}
44 | 
45 | 	// Encrypt output file.
46 | 	encryptOpts := &unipdf.EncryptOptions{
47 | 		Algorithm:   opts.Algorithm,
48 | 		Permissions: opts.Permissions,
49 | 	}
50 | 
51 | 	err = w.Encrypt([]byte(opts.UserPassword), []byte(opts.OwnerPassword), encryptOpts)
52 | 	if err != nil {
53 | 		return err
54 | 	}
55 | 
56 | 	// Save output file.
57 | 	safe := inputPath == outputPath
58 | 	return writePDF(outputPath, &w, safe)
59 | }
60 | 


--------------------------------------------------------------------------------
/pkg/pdf/pdf.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package pdf
 7 | 
 8 | import (
 9 | 	"fmt"
10 | 	"os"
11 | 
12 | 	unicommon "github.com/unidoc/unipdf/v4/common"
13 | 	unilicense "github.com/unidoc/unipdf/v4/common/license"
14 | )
15 | 
16 | // SetLicense sets the license for using the UniDoc library.
17 | func SetLicense(licensePath string, customer string) error {
18 | 	// Read license file
19 | 	content, err := os.ReadFile(licensePath)
20 | 	if err != nil {
21 | 		return err
22 | 	}
23 | 
24 | 	return unilicense.SetLicenseKey(string(content), customer)
25 | }
26 | 
27 | // SetMeteredKey sets the license key for using the UniDoc library with metered api key.
28 | func SetMeteredKey(apiKey string) error {
29 | 	return unilicense.SetMeteredKey(apiKey)
30 | }
31 | 
32 | // GetLicenseKey get information about user license key.
33 | func GetLicenseKey() string {
34 | 	lk := unilicense.GetLicenseKey()
35 | 	if lk == nil {
36 | 		return "Failed retrieving license key"
37 | 	}
38 | 	return lk.ToString()
39 | }
40 | 
41 | // GetMeteredState freshly checks the state, contacting the licensing server.
42 | func GetMeteredState() {
43 | 	// GetMeteredState freshly checks the state, contacting the licensing server.
44 | 	state, err := unilicense.GetMeteredState()
45 | 	if err != nil {
46 | 		fmt.Printf("ERROR getting metered state: %+v\n", err)
47 | 		return
48 | 	}
49 | 	fmt.Printf("State: %+v\n", state)
50 | 	if state.OK {
51 | 		fmt.Printf("State is OK\n")
52 | 	} else {
53 | 		fmt.Printf("State is not OK\n")
54 | 	}
55 | 	fmt.Printf("Credits: %v\n", state.Credits)
56 | 	fmt.Printf("Used credits: %v\n", state.Used)
57 | }
58 | 
59 | // SetLogLevel sets the verbosity of the output produced by the UniDoc library.
60 | func SetLogLevel(level unicommon.LogLevel) {
61 | 	unicommon.SetLogger(unicommon.NewConsoleLogger(level))
62 | }
63 | 


--------------------------------------------------------------------------------
/internal/cli/decrypt.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 
12 | 	"github.com/spf13/cobra"
13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
14 | )
15 | 
16 | const decryptCmdDesc = `Decrypt PDF files`
17 | 
18 | var decryptCmdExample = fmt.Sprintf("%s\n%s\n",
19 | 	fmt.Sprintf("%s decrypt -p pass input_file.pdf", appName),
20 | 	fmt.Sprintf("%s decrypt -p pass -o output_file.pdf input_file.pdf", appName),
21 | )
22 | 
23 | // decryptCmd represents the decrypt command.
24 | var decryptCmd = &cobra.Command{
25 | 	Use:                   "decrypt [FLAG]... INPUT_FILE",
26 | 	Short:                 "Decrypt PDF files",
27 | 	Long:                  decryptCmdDesc,
28 | 	Example:               decryptCmdExample,
29 | 	DisableFlagsInUseLine: true,
30 | 	Run: func(cmd *cobra.Command, args []string) {
31 | 		// Parse input parameters.
32 | 		inputPath := args[0]
33 | 		password, _ := cmd.Flags().GetString("password")
34 | 
35 | 		// Parse output path.
36 | 		outputPath, _ := cmd.Flags().GetString("output-file")
37 | 		if outputPath == "" {
38 | 			outputPath = inputPath
39 | 		}
40 | 
41 | 		// Decrypt input file.
42 | 		if err := pdf.Decrypt(inputPath, outputPath, password); err != nil {
43 | 			printErr("Could not decrypt input file: %s\n", err)
44 | 		}
45 | 
46 | 		fmt.Printf("Successfully decrypted %s\n", inputPath)
47 | 		fmt.Printf("Output file saved to %s\n", outputPath)
48 | 	},
49 | 	Args: func(_ *cobra.Command, args []string) error {
50 | 		if len(args) < 1 {
51 | 			return errors.New("must provide the PDF file to decrypt")
52 | 		}
53 | 
54 | 		return nil
55 | 	},
56 | }
57 | 
58 | func init() {
59 | 	rootCmd.AddCommand(decryptCmd)
60 | 
61 | 	decryptCmd.Flags().StringP("password", "p", "", "input file password")
62 | 	decryptCmd.Flags().StringP("output-file", "o", "", "output file")
63 | }
64 | 


--------------------------------------------------------------------------------
/internal/cli/search.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 
12 | 	"github.com/spf13/cobra"
13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
14 | )
15 | 
16 | const searchCmdDesc = `Search text in PDF files`
17 | 
18 | var searchCmdExample = fmt.Sprintf("%s\n%s\n",
19 | 	fmt.Sprintf("%s search input_file.pdf text_to_search", appName),
20 | 	fmt.Sprintf("%s search -p pass input_file.pdf text_to_search", appName),
21 | )
22 | 
23 | // searchCmd represents the search command.
24 | var searchCmd = &cobra.Command{
25 | 	Use:                   "search [FLAG]... INPUT_FILE TEXT",
26 | 	Short:                 "Search text in PDF files",
27 | 	Long:                  searchCmdDesc,
28 | 	Example:               searchCmdExample,
29 | 	DisableFlagsInUseLine: true,
30 | 	Run: func(cmd *cobra.Command, args []string) {
31 | 		// Parse input parameters.
32 | 		inputPath := args[0]
33 | 		text := args[1]
34 | 		password, _ := cmd.Flags().GetString("password")
35 | 
36 | 		// Search text.
37 | 		results, err := pdf.Search(inputPath, text, password)
38 | 		if err != nil {
39 | 			printErr("Could not search the specified text: %s\n", err)
40 | 		}
41 | 
42 | 		// Print results.
43 | 		fmt.Printf("Search results for term: %s\n", text)
44 | 
45 | 		totalOccurrences := 0
46 | 		for _, result := range results {
47 | 			totalOccurrences += result.Occurrences
48 | 			fmt.Printf("Page %d: %d occurrences\n", result.Page, result.Occurrences)
49 | 		}
50 | 
51 | 		fmt.Printf("Total occurrences: %d\n", totalOccurrences)
52 | 	},
53 | 	Args: func(_ *cobra.Command, args []string) error {
54 | 		if len(args) < 2 {
55 | 			return errors.New("must provide a PDF file and the text to search")
56 | 		}
57 | 
58 | 		return nil
59 | 	},
60 | }
61 | 
62 | func init() {
63 | 	rootCmd.AddCommand(searchCmd)
64 | 
65 | 	searchCmd.Flags().StringP("password", "p", "", "input file password")
66 | }
67 | 


--------------------------------------------------------------------------------
/pkg/pdf/explode.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package pdf
 7 | 
 8 | import (
 9 | 	"archive/zip"
10 | 	"fmt"
11 | 	"os"
12 | 	"path/filepath"
13 | 	"strings"
14 | 
15 | 	unipdf "github.com/unidoc/unipdf/v4/model"
16 | )
17 | 
18 | // Explode splits the PDF file specified by the inputPath parameter into single
19 | // page PDF files. The extracted collection of PDF files is saved as a ZIP
20 | // archive at the location specified by the outputPath parameter.
21 | // A password can be passed in, if the input file is encrypted.
22 | // If the pages parameter is nil or an empty slice, all pages are extracted.
23 | func Explode(inputPath, outputPath, password string, pages []int) (string, error) {
24 | 	dir, inputFile := filepath.Split(inputPath)
25 | 	// Use input file directory if no output path is specified.
26 | 	inputFile = strings.TrimSuffix(inputFile, filepath.Ext(inputFile))
27 | 	if outputPath == "" {
28 | 		outputPath = filepath.Join(dir, inputFile+".zip")
29 | 	}
30 | 
31 | 	// Read input file.
32 | 	r, pageCount, _, _, err := readPDF(inputPath, password)
33 | 	if err != nil {
34 | 		return "", err
35 | 	}
36 | 
37 | 	// Prepare output archive.
38 | 	outputFile, err := os.Create(outputPath)
39 | 	if err != nil {
40 | 		return "", err
41 | 	}
42 | 	defer outputFile.Close()
43 | 
44 | 	// Extract pages.
45 | 	if len(pages) == 0 {
46 | 		pages = createPageRange(pageCount)
47 | 	}
48 | 
49 | 	zw := zip.NewWriter(outputFile)
50 | 	for _, numPage := range pages {
51 | 		w := unipdf.NewPdfWriter()
52 | 		if err := readerToWriter(r, &w, []int{numPage}); err != nil {
53 | 			return "", err
54 | 		}
55 | 
56 | 		// Add page to zip file.
57 | 		file, err := zw.Create(fmt.Sprintf("%s_%d.pdf", inputFile, numPage))
58 | 		if err != nil {
59 | 			return "", err
60 | 		}
61 | 
62 | 		if err = w.Write(file); err != nil {
63 | 			return "", err
64 | 		}
65 | 	}
66 | 
67 | 	return outputPath, zw.Close()
68 | }
69 | 


--------------------------------------------------------------------------------
/pkg/pdf/watermark.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package pdf
 7 | 
 8 | import (
 9 | 	unicreator "github.com/unidoc/unipdf/v4/creator"
10 | )
11 | 
12 | // Watermark adds the watermark image specified by the watermarkPath parameter
13 | // to the pages of the PDF file specified by the inputPath parameter.
14 | // A password can be passed in for encrypted input files.
15 | // The resulting file is saved at the location specified by the outputPath
16 | // parameter.
17 | // Also, a list of pages to add watermark to can be passed in. Every page that
18 | // is not included in the pages slice is left intact.
19 | // If the pages parameter is nil or an empty slice, all the pages of the input
20 | // file are watermarked.
21 | func Watermark(inputPath, outputPath, watermarkPath, password string, pages []int) error {
22 | 	// Read input file.
23 | 	r, pageCount, _, _, err := readPDF(inputPath, password)
24 | 	if err != nil {
25 | 		return err
26 | 	}
27 | 
28 | 	// Open watermark image.
29 | 	c := unicreator.New()
30 | 
31 | 	watermark, err := c.NewImageFromFile(watermarkPath)
32 | 	if err != nil {
33 | 		return err
34 | 	}
35 | 
36 | 	// Add pages.
37 | 	if len(pages) == 0 {
38 | 		pages = createPageRange(pageCount)
39 | 	}
40 | 
41 | 	for i := 0; i < pageCount; i++ {
42 | 		numPage := i + 1
43 | 
44 | 		page, err := r.GetPage(numPage)
45 | 		if err != nil {
46 | 			return err
47 | 		}
48 | 
49 | 		var hasWatermark bool
50 | 		for _, page := range pages {
51 | 			if page == numPage {
52 | 				hasWatermark = true
53 | 				break
54 | 			}
55 | 		}
56 | 
57 | 		if err = c.AddPage(page); err != nil {
58 | 			return err
59 | 		}
60 | 
61 | 		if !hasWatermark {
62 | 			continue
63 | 		}
64 | 
65 | 		watermark.ScaleToWidth(c.Context().PageWidth)
66 | 		watermark.SetPos(0, (c.Context().PageHeight-watermark.Height())/2)
67 | 		watermark.SetOpacity(0.5)
68 | 
69 | 		if err = c.Draw(watermark); err != nil {
70 | 			return err
71 | 		}
72 | 	}
73 | 
74 | 	// Add forms.
75 | 	if r.AcroForm != nil {
76 | 		c.SetForms(r.AcroForm)
77 | 	}
78 | 
79 | 	// Write output file.
80 | 	safe := inputPath == outputPath
81 | 	return writeCreatorPDF(outputPath, c, safe)
82 | }
83 | 


--------------------------------------------------------------------------------
/pkg/pdf/info.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package pdf
 7 | 
 8 | import (
 9 | 	"os"
10 | 	"sort"
11 | )
12 | 
13 | // FileStat contains basic information about a file.
14 | type FileStat struct {
15 | 	// Name represents the name of the file.
16 | 	Name string
17 | 
18 | 	// Size specifies the size in bytes of the file.
19 | 	Size int64
20 | }
21 | 
22 | // FileInfo contains information about a PDF file.
23 | type FileInfo struct {
24 | 	FileStat
25 | 
26 | 	// Pages represents the number of pages the PDF file has.
27 | 	Pages int
28 | 
29 | 	// Objects contains the types of objects the PDF file contains, along
30 | 	// with the count for each object type.
31 | 	Objects map[string]int
32 | 
33 | 	// Version specifies the PDF version of the file.
34 | 	Version string
35 | 
36 | 	// Encrypted specifies if the file is encrypted.
37 | 	Encrypted bool
38 | 
39 | 	// EncryptionAlgo contains the name of the encryption algorithm used
40 | 	// to encrypt the PDF file. The field is empty for non-encrypted files.
41 | 	EncryptionAlgo string
42 | }
43 | 
44 | // Info returns information about the PDF file specified by the inputPath
45 | // parameter. A password can be passed in for encrypted input files.
46 | func Info(inputPath string, password string) (*FileInfo, error) {
47 | 	info := &FileInfo{}
48 | 	info.Name = inputPath
49 | 
50 | 	// Get file stat.
51 | 	fi, err := os.Stat(inputPath)
52 | 	if err != nil {
53 | 		return nil, err
54 | 	}
55 | 	info.Size = fi.Size()
56 | 
57 | 	// Read input file.
58 | 	r, pages, encrypted, _, err := readPDF(inputPath, password)
59 | 	if err != nil {
60 | 		return nil, err
61 | 	}
62 | 
63 | 	info.Encrypted = encrypted
64 | 	if encrypted {
65 | 		info.EncryptionAlgo = r.GetEncryptionMethod()
66 | 	}
67 | 
68 | 	info.Version = r.PdfVersion().String()
69 | 	info.Pages = pages
70 | 
71 | 	// Read PDF objects.
72 | 	objTypes, err := r.Inspect()
73 | 	if err != nil {
74 | 		return nil, err
75 | 	}
76 | 
77 | 	keys := []string{}
78 | 	for key := range objTypes {
79 | 		keys = append(keys, key)
80 | 	}
81 | 	sort.Strings(keys)
82 | 
83 | 	objects := map[string]int{}
84 | 	for _, key := range keys {
85 | 		objects[key] = objTypes[key]
86 | 	}
87 | 	info.Objects = objects
88 | 
89 | 	return info, nil
90 | }
91 | 


--------------------------------------------------------------------------------
/internal/cli/split.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 
12 | 	"github.com/spf13/cobra"
13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
14 | )
15 | 
16 | const splitCmdDesc = `Split PDF files.
17 | 
18 | The command is used to extract one or more page ranges from the input file
19 | and save the result as the output file.
20 | If no page range is specified, all the pages from the input file will be
21 | copied to the output file.
22 | 
23 | An example of the pages parameter: 1-3,4,6-7
24 | Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be present in the output file,
25 | while page number 5 is skipped.
26 | `
27 | 
28 | var splitCmdExample = fmt.Sprintf("%s\n%s\n",
29 | 	fmt.Sprintf("%s split input_file.pdf output_file.pdf 1-2", appName),
30 | 	fmt.Sprintf("%s split -p pass input_file.pd output_file.pdf 1-2,4", appName),
31 | )
32 | 
33 | // splitCmd represents the split command.
34 | var splitCmd = &cobra.Command{
35 | 	Use:                   "split [FLAG]... INPUT_FILE OUTPUT_FILE [PAGES]",
36 | 	Short:                 "Split PDF files",
37 | 	Long:                  splitCmdDesc,
38 | 	Example:               splitCmdExample,
39 | 	DisableFlagsInUseLine: true,
40 | 	Run: func(cmd *cobra.Command, args []string) {
41 | 		inputPath := args[0]
42 | 		outputPath := args[1]
43 | 		password, _ := cmd.Flags().GetString("password")
44 | 
45 | 		// Parse page range.
46 | 		var err error
47 | 		var pages []int
48 | 
49 | 		if len(args) > 2 {
50 | 			if pages, err = parsePageRange(args[2]); err != nil {
51 | 				printUsageErr(cmd, "Invalid page range specified\n")
52 | 			}
53 | 		}
54 | 
55 | 		err = pdf.Split(inputPath, outputPath, password, pages)
56 | 		if err != nil {
57 | 			printErr("Error: %v\n", err)
58 | 		}
59 | 
60 | 		fmt.Printf("Successfully split file %s\n", inputPath)
61 | 		fmt.Printf("Output file saved to %s\n", outputPath)
62 | 	},
63 | 	Args: func(_ *cobra.Command, args []string) error {
64 | 		if len(args) < 2 {
65 | 			return errors.New("must provide at least the input and output files")
66 | 		}
67 | 
68 | 		return nil
69 | 	},
70 | }
71 | 
72 | func init() {
73 | 	rootCmd.AddCommand(splitCmd)
74 | 
75 | 	splitCmd.Flags().StringP("password", "p", "", "input file password")
76 | }
77 | 


--------------------------------------------------------------------------------
/internal/cli/passwd.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 
12 | 	"github.com/spf13/cobra"
13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
14 | )
15 | 
16 | const passwdCmdDesc = `Change owner and user passwords of PDF files.`
17 | 
18 | var passwdCmdExample = fmt.Sprintf("%s\n%s\n%s\n",
19 | 	fmt.Sprintf("%s passwd -p pass input_file.pdf new_owner_pass", appName),
20 | 	fmt.Sprintf("%s passwd -p pass -o output_file.pdf input_file.pdf new_owner_pass", appName),
21 | 	fmt.Sprintf("%s passwd -p pass -o output_file.pdf input_file.pdf new_owner_pass new_user_pass", appName),
22 | )
23 | 
24 | // passwdCmd represents the passwd command.
25 | var passwdCmd = &cobra.Command{
26 | 	Use:                   "passwd [FLAG]... INPUT_FILE NEW_OWNER_PASSWORD [NEW_USER_PASSWORD]",
27 | 	Short:                 "Change PDF passwords",
28 | 	Long:                  passwdCmdDesc,
29 | 	Example:               passwdCmdExample,
30 | 	DisableFlagsInUseLine: true,
31 | 	Run: func(cmd *cobra.Command, args []string) {
32 | 		// Parse input parameters.
33 | 		inputPath := args[0]
34 | 		newOwnerPassword := args[1]
35 | 		ownerPassword, _ := cmd.Flags().GetString("password")
36 | 
37 | 		newUserPassword := ""
38 | 		if len(args) > 2 {
39 | 			newUserPassword = args[2]
40 | 		}
41 | 
42 | 		// Parse output file.
43 | 		outputPath, _ := cmd.Flags().GetString("output-file")
44 | 		if outputPath == "" {
45 | 			outputPath = inputPath
46 | 		}
47 | 
48 | 		// Change input file password.
49 | 		err := pdf.Passwd(inputPath, outputPath, ownerPassword, newOwnerPassword, newUserPassword)
50 | 		if err != nil {
51 | 			printErr("Could not change input file password: %s\n", err)
52 | 		}
53 | 
54 | 		fmt.Printf("Password successfully changed\n")
55 | 		fmt.Printf("Output file saved to %s\n", outputPath)
56 | 	},
57 | 	Args: func(_ *cobra.Command, args []string) error {
58 | 		if len(args) < 2 {
59 | 			return errors.New("must provide the input file and the new owner password")
60 | 		}
61 | 
62 | 		return nil
63 | 	},
64 | }
65 | 
66 | func init() {
67 | 	rootCmd.AddCommand(passwdCmd)
68 | 
69 | 	passwdCmd.Flags().StringP("output-file", "o", "", "output file")
70 | 	passwdCmd.Flags().StringP("password", "p", "", "input file password")
71 | }
72 | 


--------------------------------------------------------------------------------
/internal/cli/extract_text.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 
12 | 	"github.com/spf13/cobra"
13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
14 | )
15 | 
16 | const extractTextCmdDesc = `Extracts PDF text.
17 | 
18 | The extracted text is always printed to STDOUT.
19 | 
20 | The command can be configured to extract text only from the specified pages
21 | using the --pages parameter.
22 | 
23 | An example of the pages parameter: 1-3,4,6-7
24 | Text will only be extracted from pages 1,2,3 (1-3), 4 and 6,7 (6-7), while page
25 | number 5 is skipped.
26 | `
27 | 
28 | var extractTextCmdExample = fmt.Sprintf("%s\n%s\n%s\n",
29 | 	fmt.Sprintf("%s extract text input_file.pdf", appName),
30 | 	fmt.Sprintf("%s extract text -P 1-3 input_file.pdf", appName),
31 | 	fmt.Sprintf("%s extract text -P 1-3 -p pass input_file.pdf", appName),
32 | )
33 | 
34 | // extractTextCmd represents the extract text command.
35 | var extractTextCmd = &cobra.Command{
36 | 	Use:                   "text [FLAG]... INPUT_FILE",
37 | 	Short:                 "Extract PDF text",
38 | 	Long:                  extractTextCmdDesc,
39 | 	Example:               extractTextCmdExample,
40 | 	DisableFlagsInUseLine: true,
41 | 	Run: func(cmd *cobra.Command, args []string) {
42 | 		// Parse input parameters.
43 | 		inputPath := args[0]
44 | 		password, _ := cmd.Flags().GetString("password")
45 | 
46 | 		// Parse page range.
47 | 		pageRange, _ := cmd.Flags().GetString("pages")
48 | 
49 | 		pages, err := parsePageRange(pageRange)
50 | 		if err != nil {
51 | 			printUsageErr(cmd, "Invalid page range specified\n")
52 | 		}
53 | 
54 | 		// Extract text.
55 | 		text, err := pdf.ExtractText(inputPath, password, pages)
56 | 		if err != nil {
57 | 			printErr("Could not extract text: %s\n", err)
58 | 		}
59 | 
60 | 		fmt.Println(text)
61 | 	},
62 | 	Args: func(_ *cobra.Command, args []string) error {
63 | 		if len(args) < 1 {
64 | 			return errors.New("must provide the input file")
65 | 		}
66 | 
67 | 		return nil
68 | 	},
69 | }
70 | 
71 | func init() {
72 | 	extractCmd.AddCommand(extractTextCmd)
73 | 
74 | 	extractTextCmd.Flags().StringP("password", "p", "", "input file password")
75 | 	extractTextCmd.Flags().StringP("pages", "P", "", "pages to extract text from")
76 | }
77 | 


--------------------------------------------------------------------------------
/internal/cli/organize.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 
12 | 	"github.com/spf13/cobra"
13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
14 | )
15 | 
16 | const organizeCmdDesc = `Split PDF files.
17 | 
18 | The command is used to organize one or more page ranges from the input file
19 | and save the result as the output file.
20 | If no page range is specified, all the pages from the input file will be
21 | copied to the output file.
22 | 
23 | An example of the pages parameter: 1-3,4,6-7
24 | Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be present in the output file,
25 | while page number 5 is skipped.
26 | `
27 | 
28 | var organizeCmdExample = fmt.Sprintf("%s\n%s\n",
29 | 	fmt.Sprintf("%s organize input_file.pdf output_file.pdf 1-2", appName),
30 | 	fmt.Sprintf("%s organize -p pass input_file.pd output_file.pdf 1-2,4", appName),
31 | )
32 | 
33 | // organizeCmd represents the split command.
34 | var organizeCmd = &cobra.Command{
35 | 	Use:                   "organize [FLAG]... INPUT_FILE OUTPUT_FILE [PAGES]",
36 | 	Short:                 "Organize PDF files",
37 | 	Long:                  organizeCmdDesc,
38 | 	Example:               organizeCmdExample,
39 | 	DisableFlagsInUseLine: true,
40 | 	Run: func(cmd *cobra.Command, args []string) {
41 | 		inputPath := args[0]
42 | 		outputPath := args[1]
43 | 		password, _ := cmd.Flags().GetString("password")
44 | 
45 | 		// Parse page range.
46 | 		var err error
47 | 		var pages []int
48 | 
49 | 		if len(args) > 2 {
50 | 			if pages, err = parsePageRangeUnsorted(args[2]); err != nil {
51 | 				printUsageErr(cmd, "Invalid page range specified\n")
52 | 			}
53 | 		}
54 | 
55 | 		if err := pdf.Organize(inputPath, outputPath, password, pages); err != nil {
56 | 			printErr("Error: %s\n", err)
57 | 		}
58 | 
59 | 		fmt.Printf("Successfully organized file %s\n", inputPath)
60 | 		fmt.Printf("Output file saved to %s\n", outputPath)
61 | 	},
62 | 	Args: func(_ *cobra.Command, args []string) error {
63 | 		if len(args) < 2 {
64 | 			return errors.New("must provide at least the input and output files")
65 | 		}
66 | 
67 | 		return nil
68 | 	},
69 | }
70 | 
71 | func init() {
72 | 	rootCmd.AddCommand(organizeCmd)
73 | 
74 | 	organizeCmd.Flags().StringP("password", "p", "", "input file password")
75 | }
76 | 


--------------------------------------------------------------------------------
/pkg/pdf/rotate.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package pdf
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 	"os"
12 | 	"path/filepath"
13 | 	"strings"
14 | 
15 | 	unicreator "github.com/unidoc/unipdf/v4/creator"
16 | )
17 | 
18 | // Rotate rotates the pages of the PDF file specified by the inputPath
19 | // by the angle specified by the angle parameter. The rotated PDF file is saved
20 | // at the location specified by the outputPath parameter.
21 | // A password can be passed in, if the input file is encrypted.
22 | // If the pages parameter is nil or an empty slice, all pages are rotated.
23 | func Rotate(inputPath, outputPath string, angle int, password string, pages []int) (string, error) {
24 | 	if angle%90 != 0 {
25 | 		return "", errors.New("rotation angle must be a multiple of 90 degrees")
26 | 	}
27 | 
28 | 	// Generate output path from the input path, if no output path is specified.
29 | 	dir, inputFile := filepath.Split(inputPath)
30 | 
31 | 	inputFile = strings.TrimSuffix(inputFile, filepath.Ext(inputFile))
32 | 	if outputPath == "" {
33 | 		outputPath = filepath.Join(dir, fmt.Sprintf("%s_rotated.pdf", inputFile))
34 | 	}
35 | 
36 | 	// Read input file.
37 | 	r, pageCount, _, _, err := readPDF(inputPath, password)
38 | 	if err != nil {
39 | 		return "", err
40 | 	}
41 | 
42 | 	// Prepare output archive.
43 | 	outputFile, err := os.Create(outputPath)
44 | 	if err != nil {
45 | 		return "", err
46 | 	}
47 | 	defer outputFile.Close()
48 | 
49 | 	// Rotate pages.
50 | 	if len(pages) == 0 {
51 | 		pages = createPageRange(pageCount)
52 | 	}
53 | 
54 | 	selectedPages := map[int]bool{}
55 | 	for _, page := range pages {
56 | 		selectedPages[page] = true
57 | 	}
58 | 
59 | 	c := unicreator.New()
60 | 	for i := 0; i < pageCount; i++ {
61 | 		numPage := i + 1
62 | 
63 | 		page, err := r.GetPage(numPage)
64 | 		if err != nil {
65 | 			return "", err
66 | 		}
67 | 
68 | 		if err = c.AddPage(page); err != nil {
69 | 			return "", err
70 | 		}
71 | 
72 | 		rotate := selectedPages[numPage]
73 | 		if !rotate || angle == 0 {
74 | 			continue
75 | 		}
76 | 
77 | 		if err = c.RotateDeg(int64(angle)); err != nil {
78 | 			return "", err
79 | 		}
80 | 	}
81 | 
82 | 	// Add forms.
83 | 	if r.AcroForm != nil {
84 | 		c.SetForms(r.AcroForm)
85 | 	}
86 | 
87 | 	// Write output file.
88 | 	safe := inputPath == outputPath
89 | 	return outputPath, writeCreatorPDF(outputPath, c, safe)
90 | }
91 | 


--------------------------------------------------------------------------------
/internal/cli/root.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"os"
10 | 
11 | 	"github.com/spf13/cobra"
12 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
13 | 	unicommon "github.com/unidoc/unipdf/v4/common"
14 | )
15 | 
16 | const appName = "unipdf"
17 | const appVersion = "0.6.0"
18 | 
19 | const rootCmdDesc = ` is a CLI application for working with PDF files.
20 | It supports the most common PDF operations. A full list of the supported
21 | operations can be found below.
22 | 
23 | If you have a license for Unidoc, you can set it through the
24 | UNIDOC_LICENSE_FILE and UNIDOC_LICENSE_CUSTOMER environment variables.
25 | 
26 | export UNIDOC_LICENSE_FILE="PATH_TO_LICENSE_FILE"
27 | export UNIDOC_LICENSE_CUSTOMER="CUSTOMER_NAME"
28 | 
29 | Or alternatively, you can set the Metered API Key license through the
30 | UNIDOC_LICENSE_API_KEY environment variable.
31 | 
32 | export UNIDOC_LICENSE_API_KEY="YOUR_API_KEY_HERE"
33 | 
34 | By default, the application only displays error messages on command execution
35 | failure. To change the verbosity of the output, set the UNIDOC_LOG_LEVEL
36 | environment variable.
37 | 
38 | export UNIDOC_LOG_LEVEL="DEBUG"
39 | 
40 | Supported log levels: trace, debug, info, notice, warning, error (default)
41 | `
42 | 
43 | var rootCmd = &cobra.Command{
44 | 	Use:  appName,
45 | 	Long: appName + rootCmdDesc,
46 | }
47 | 
48 | // Execute represents the entry point of the application.
49 | // The method parses the command line arguments and executes the appropriate
50 | // action.
51 | func Execute() {
52 | 	readEnv()
53 | 
54 | 	if err := rootCmd.Execute(); err != nil {
55 | 		printErr("%s\n", err)
56 | 	}
57 | }
58 | 
59 | func readEnv() {
60 | 	// Set license key.
61 | 	licensePath := os.Getenv("UNIDOC_LICENSE_FILE")
62 | 	licenseCustomer := os.Getenv("UNIDOC_LICENSE_CUSTOMER")
63 | 	if licensePath != "" {
64 | 		pdf.SetLicense(licensePath, licenseCustomer)
65 | 	}
66 | 
67 | 	// OR... alternatively... load a License API key.
68 | 
69 | 	// Set license key using metered api key.
70 | 	licenseMeteredKey := os.Getenv("UNIDOC_LICENSE_API_KEY")
71 | 	if licenseMeteredKey != "" {
72 | 		pdf.SetMeteredKey(licenseMeteredKey)
73 | 	}
74 | 
75 | 	// Set log level.
76 | 	logLevel, err := parseLogLevel(os.Getenv("UNIDOC_LOG_LEVEL"))
77 | 	if err != nil {
78 | 		logLevel = unicommon.LogLevelError
79 | 	}
80 | 
81 | 	pdf.SetLogLevel(logLevel)
82 | }
83 | 


--------------------------------------------------------------------------------
/internal/cli/replace.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 
12 | 	"github.com/spf13/cobra"
13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
14 | )
15 | 
16 | const replaceCmdDesc = `Replace text in PDF files`
17 | 
18 | var replaceCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n",
19 | 	fmt.Sprintf("%s replace input_file.pdf text_to_search", appName),
20 | 	fmt.Sprintf("%s replace -o output_file input_file.pdf text_to_search", appName),
21 | 	fmt.Sprintf("%s replace -o output_file -r new_text input_file.pdf text_to_search", appName),
22 | 	fmt.Sprintf("%s replace -o output_file  -r new_text -p pass input_file.pdf text_to_search", appName),
23 | )
24 | 
25 | // replaceCmd represents the replace command.
26 | var replaceCmd = &cobra.Command{
27 | 	Use:                   "replace [FLAG]... INPUT_FILE TEXT",
28 | 	Short:                 "Replace text in PDF files",
29 | 	Long:                  replaceCmdDesc,
30 | 	Example:               replaceCmdExample,
31 | 	DisableFlagsInUseLine: true,
32 | 	Run: func(cmd *cobra.Command, args []string) {
33 | 		// Parse input parameters.
34 | 		inputPath := args[0]
35 | 		text := args[1]
36 | 		password, _ := cmd.Flags().GetString("password")
37 | 
38 | 		// Parse output file.
39 | 		outputPath, _ := cmd.Flags().GetString("output-file")
40 | 		if outputPath == "" {
41 | 			outputPath = inputPath
42 | 		}
43 | 
44 | 		// Parse replaceText.
45 | 		replaceText, _ := cmd.Flags().GetString("replace-text")
46 | 		if replaceText == "" {
47 | 			replaceText = text
48 | 		}
49 | 
50 | 		// Search text.
51 | 		err := pdf.Replace(inputPath, outputPath, text, replaceText, password)
52 | 		if err != nil {
53 | 			printErr("Could not replace the specified text: %s\n", err)
54 | 		}
55 | 
56 | 		fmt.Printf("Successfully replaced text %s with %s\n", text, replaceText)
57 | 		fmt.Printf("Output file saved to %s\n", outputPath)
58 | 	},
59 | 	Args: func(_ *cobra.Command, args []string) error {
60 | 		if len(args) < 2 {
61 | 			return errors.New("must provide a PDF file and the text to search")
62 | 		}
63 | 
64 | 		return nil
65 | 	},
66 | }
67 | 
68 | func init() {
69 | 	rootCmd.AddCommand(replaceCmd)
70 | 
71 | 	replaceCmd.Flags().StringP("output-file", "o", "", "output file")
72 | 	replaceCmd.Flags().StringP("replace-text", "r", "", "replacement text")
73 | 	replaceCmd.Flags().StringP("password", "p", "", "input file password")
74 | }
75 | 


--------------------------------------------------------------------------------
/internal/cli/info.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 
12 | 	"github.com/spf13/cobra"
13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
14 | )
15 | 
16 | const infoCmdDesc = `Outputs information about the input file.
17 | Also provides basic validation.
18 | `
19 | 
20 | var infoCmdExample = fmt.Sprintf("%s\n%s\n",
21 | 	fmt.Sprintf("%s info input_file.pdf", appName),
22 | 	fmt.Sprintf("%s info -p pass input_file.pdf", appName),
23 | )
24 | 
25 | // infoCmd represents the info command.
26 | var infoCmd = &cobra.Command{
27 | 	Use:                   "info [FLAG]... INPUT_FILE",
28 | 	Short:                 "Output PDF information",
29 | 	Long:                  infoCmdDesc,
30 | 	Example:               infoCmdExample,
31 | 	DisableFlagsInUseLine: true,
32 | 	Run: func(cmd *cobra.Command, args []string) {
33 | 		inputFile := args[0]
34 | 		password, _ := cmd.Flags().GetString("password")
35 | 
36 | 		info, err := pdf.Info(inputFile, password)
37 | 		if err != nil {
38 | 			printErr("Could not retrieve input file information: %s\n", err)
39 | 		}
40 | 
41 | 		// Print basic PDF info
42 | 		fmt.Println("Info")
43 | 		fmt.Printf("Name: %s\n", inputFile)
44 | 		fmt.Printf("Size: %d bytes\n", info.Size)
45 | 		fmt.Printf("Pages: %d\n", info.Pages)
46 | 		fmt.Printf("PDF Version: %s\n", info.Version)
47 | 
48 | 		if info.Encrypted {
49 | 			fmt.Printf("Encryption: encrypted with %s algorithm\n", info.EncryptionAlgo)
50 | 		} else {
51 | 			fmt.Println("Encryption: none")
52 | 		}
53 | 
54 | 		// Print PDF objects
55 | 		fmt.Println("\nObjects")
56 | 
57 | 		var malicious bool
58 | 		for key, val := range info.Objects {
59 | 			maliciousStr := ""
60 | 			if key == "JavaScript" || key == "Flash" || key == "Video" {
61 | 				maliciousStr = " (potentially malicious)"
62 | 				malicious = true
63 | 			}
64 | 
65 | 			fmt.Printf("%s objects: %d%s\n", key, val, maliciousStr)
66 | 		}
67 | 
68 | 		if malicious {
69 | 			fmt.Println("\nFile contains potentially malicious objects!")
70 | 		} else {
71 | 			fmt.Println("\nFile is safe")
72 | 		}
73 | 	},
74 | 	Args: func(_ *cobra.Command, args []string) error {
75 | 		if len(args) < 1 {
76 | 			return errors.New("must provide the input file")
77 | 		}
78 | 
79 | 		return nil
80 | 	},
81 | }
82 | 
83 | func init() {
84 | 	rootCmd.AddCommand(infoCmd)
85 | 
86 | 	infoCmd.Flags().StringP("password", "p", "", "input file password")
87 | }
88 | 


--------------------------------------------------------------------------------
/internal/cli/form_export.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 	"os"
12 | 
13 | 	"github.com/spf13/cobra"
14 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
15 | )
16 | 
17 | const formExportCmdDesc = `Export JSON representation of form fields.
18 | 
19 | By default, the resulting JSON content is printed to STDOUT. The output can be
20 | saved to a file by using the --output-file flag (see the Examples section).
21 | 
22 | The exported JSON template can be used to fill PDF forms using the
23 | "form fill" command.
24 | `
25 | 
26 | var formExportCmdExample = fmt.Sprintf("%s\n%s\n%s\n",
27 | 	fmt.Sprintf("%s form export in_file.pdf", appName),
28 | 	fmt.Sprintf("%s form export in_file.pdf > out_file.json", appName),
29 | 	fmt.Sprintf("%s form export -o out_file.json in_file.pdf", appName),
30 | )
31 | 
32 | // formExportCmd represents the form export command.
33 | var formExportCmd = &cobra.Command{
34 | 	Use:                   "export [FLAG]... INPUT_FILE",
35 | 	Short:                 "Export form fields as JSON",
36 | 	Long:                  formExportCmdDesc,
37 | 	Example:               formExportCmdExample,
38 | 	DisableFlagsInUseLine: true,
39 | 	Run: func(cmd *cobra.Command, args []string) {
40 | 		// Parse input parameters.
41 | 		inputPath := args[0]
42 | 		outputPath, _ := cmd.Flags().GetString("output-file")
43 | 
44 | 		// Export form fields.
45 | 		json, err := pdf.FormExport(inputPath)
46 | 		if err != nil {
47 | 			printErr("Could not export form fields: %s\n", err)
48 | 			return
49 | 		}
50 | 		if json == "" {
51 | 			fmt.Println("Could not find any form fields to export.")
52 | 			return
53 | 		}
54 | 
55 | 		// Write exported data.
56 | 		if outputPath == "" {
57 | 			fmt.Println(json)
58 | 			return
59 | 		}
60 | 
61 | 		// #nosec G306
62 | 		err = os.WriteFile(outputPath, []byte(json), 0644)
63 | 		if err != nil {
64 | 			printErr("Could not export form fields: %s\n", err)
65 | 		}
66 | 
67 | 		fmt.Printf("Form fields successfully exported from %s\n", inputPath)
68 | 		fmt.Printf("Output file saved to %s\n", outputPath)
69 | 	},
70 | 	Args: func(_ *cobra.Command, args []string) error {
71 | 		if len(args) < 1 {
72 | 			return errors.New("must provide the input file")
73 | 		}
74 | 
75 | 		return nil
76 | 	},
77 | }
78 | 
79 | func init() {
80 | 	formCmd.AddCommand(formExportCmd)
81 | 
82 | 	formExportCmd.Flags().StringP("output-file", "o", "", "output file")
83 | }
84 | 


--------------------------------------------------------------------------------
/internal/cli/const.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package cli
  7 | 
  8 | import (
  9 | 	"errors"
 10 | 	"strings"
 11 | 
 12 | 	unicommon "github.com/unidoc/unipdf/v4/common"
 13 | 	unisecurity "github.com/unidoc/unipdf/v4/core/security"
 14 | 	unipdf "github.com/unidoc/unipdf/v4/model"
 15 | )
 16 | 
 17 | var encryptAlgoMap = map[string]unipdf.EncryptionAlgorithm{
 18 | 	"rc4":    unipdf.RC4_128bit,
 19 | 	"aes128": unipdf.AES_128bit,
 20 | 	"aes256": unipdf.AES_256bit,
 21 | }
 22 | 
 23 | var logLevelMap = map[string]unicommon.LogLevel{
 24 | 	"trace":   unicommon.LogLevelTrace,
 25 | 	"debug":   unicommon.LogLevelDebug,
 26 | 	"info":    unicommon.LogLevelInfo,
 27 | 	"notice":  unicommon.LogLevelNotice,
 28 | 	"warning": unicommon.LogLevelWarning,
 29 | 	"error":   unicommon.LogLevelError,
 30 | }
 31 | 
 32 | var imageFormats = map[string]struct{}{
 33 | 	"jpeg": struct{}{},
 34 | 	"png":  struct{}{},
 35 | }
 36 | 
 37 | func parseEncryptionMode(mode string) (unipdf.EncryptionAlgorithm, error) {
 38 | 	algo, ok := encryptAlgoMap[mode]
 39 | 	if !ok {
 40 | 		return 0, errors.New("invalid encryption mode")
 41 | 	}
 42 | 
 43 | 	return algo, nil
 44 | }
 45 | 
 46 | func parseLogLevel(levelStr string) (unicommon.LogLevel, error) {
 47 | 	levelStr = strings.TrimSpace(levelStr)
 48 | 	if levelStr == "" {
 49 | 		return unicommon.LogLevelError, nil
 50 | 	}
 51 | 
 52 | 	level, ok := logLevelMap[levelStr]
 53 | 	if !ok {
 54 | 		return 0, errors.New("invalid log level")
 55 | 	}
 56 | 
 57 | 	return level, nil
 58 | }
 59 | 
 60 | func parsePermissionList(permStr string) (unisecurity.Permissions, error) {
 61 | 	permStr = removeSpaces(permStr)
 62 | 	if permStr == "" {
 63 | 		return 0, nil
 64 | 	}
 65 | 	permList := strings.Split(permStr, ",")
 66 | 
 67 | 	perms := unisecurity.Permissions(0)
 68 | 	for _, perm := range permList {
 69 | 		if perm == "" {
 70 | 			continue
 71 | 		}
 72 | 
 73 | 		switch perm {
 74 | 		case "all":
 75 | 			perms = unisecurity.PermOwner
 76 | 		case "none":
 77 | 			perms = unisecurity.Permissions(0)
 78 | 		case "print-low-res":
 79 | 			perms |= unisecurity.PermPrinting
 80 | 		case "print-high-res":
 81 | 			perms |= unisecurity.PermFullPrintQuality
 82 | 		case "modify":
 83 | 			perms |= unisecurity.PermModify
 84 | 		case "extract-graphics":
 85 | 			perms |= unisecurity.PermExtractGraphics
 86 | 		case "annotate":
 87 | 			perms |= unisecurity.PermAnnotate
 88 | 		case "fill-forms":
 89 | 			perms |= unisecurity.PermFillForms
 90 | 		case "rotate":
 91 | 			perms |= unisecurity.PermRotateInsert
 92 | 
 93 | 		default:
 94 | 			return 0, errors.New("invalid permission")
 95 | 		}
 96 | 	}
 97 | 
 98 | 	return perms, nil
 99 | }
100 | 


--------------------------------------------------------------------------------
/internal/cli/grayscale.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 
12 | 	"github.com/spf13/cobra"
13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
14 | )
15 | 
16 | const grayscaleCmdDesc = `Converts the input file to grayscale.
17 | 
18 | The command can be configured to convert only the specified
19 | pages to grayscale using the --pages parameter.
20 | 
21 | An example of the pages parameter: 1-3,4,6-7
22 | Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be converted to grayscale, while
23 | page number 5 is skipped.
24 | `
25 | 
26 | var grayscaleCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n",
27 | 	fmt.Sprintf("%s grayscale input_file.pdf", appName),
28 | 	fmt.Sprintf("%s grayscale -o output_file input_file.pdf", appName),
29 | 	fmt.Sprintf("%s grayscale -o output_file -P 1-3 input_file.pdf", appName),
30 | 	fmt.Sprintf("%s grayscale -o output_file -P 1-3 -p pass input_file.pdf", appName),
31 | )
32 | 
33 | // grayscaleCmd represents the grayscale command.
34 | var grayscaleCmd = &cobra.Command{
35 | 	Use:                   "grayscale [FLAG]... INPUT_FILE",
36 | 	Short:                 "Convert PDF to grayscale",
37 | 	Long:                  grayscaleCmdDesc,
38 | 	Example:               grayscaleCmdExample,
39 | 	DisableFlagsInUseLine: true,
40 | 	Run: func(cmd *cobra.Command, args []string) {
41 | 		// Parse input parameters.
42 | 		inputPath := args[0]
43 | 		password, _ := cmd.Flags().GetString("password")
44 | 
45 | 		// Parse output file.
46 | 		outputPath, _ := cmd.Flags().GetString("output-file")
47 | 		if outputPath == "" {
48 | 			outputPath = inputPath
49 | 		}
50 | 
51 | 		// Parse page range.
52 | 		pageRange, _ := cmd.Flags().GetString("pages")
53 | 
54 | 		pages, err := parsePageRange(pageRange)
55 | 		if err != nil {
56 | 			printUsageErr(cmd, "Invalid page range specified\n")
57 | 		}
58 | 
59 | 		// Convert file to grayscale.
60 | 		err = pdf.Grayscale(inputPath, outputPath, password, pages)
61 | 		if err != nil {
62 | 			printErr("Could not convert input file to grayscale: %s\n", err)
63 | 		}
64 | 
65 | 		fmt.Printf("Successfully converted %s to grayscale\n", inputPath)
66 | 		fmt.Printf("Output file saved to %s\n", outputPath)
67 | 	},
68 | 	Args: func(_ *cobra.Command, args []string) error {
69 | 		if len(args) < 1 {
70 | 			return errors.New("must provide the input file")
71 | 		}
72 | 
73 | 		return nil
74 | 	},
75 | }
76 | 
77 | func init() {
78 | 	rootCmd.AddCommand(grayscaleCmd)
79 | 
80 | 	grayscaleCmd.Flags().StringP("output-file", "o", "", "output file")
81 | 	grayscaleCmd.Flags().StringP("password", "p", "", "input file password")
82 | 	grayscaleCmd.Flags().StringP("pages", "P", "", "pages to convert to grayscale")
83 | }
84 | 


--------------------------------------------------------------------------------
/internal/cli/explode.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 
12 | 	"github.com/spf13/cobra"
13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
14 | )
15 | 
16 | const explodeCmdDesc = `Splits the input file into separate single page PDF files.
17 | 
18 | The resulting PDF files are saved in a ZIP archive at the location specified
19 | by the --output-file parameter. If no output file is specified, the ZIP file
20 | is saved in the same directory as the input file.
21 | 
22 | The command can be configured to extract only the specified pages using
23 | the --pages parameter.
24 | 
25 | An example of the pages parameter: 1-3,4,6-7
26 | Pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be extracted, while page
27 | number 5 is skipped.
28 | `
29 | 
30 | var explodeCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n",
31 | 	fmt.Sprintf("%s explode input_file.pdf", appName),
32 | 	fmt.Sprintf("%s explode -o pages.zip input_file.pdf", appName),
33 | 	fmt.Sprintf("%s explode -o pages.zip -P 1-3 input_file.pdf", appName),
34 | 	fmt.Sprintf("%s explode -o pages.zip -P 1-3 -p pass input_file.pdf", appName),
35 | )
36 | 
37 | // explodeCmd represents the explode command.
38 | var explodeCmd = &cobra.Command{
39 | 	Use:                   "explode [FLAG]... INPUT_FILE",
40 | 	Short:                 "Explodes the input file into separate single page PDF files",
41 | 	Long:                  explodeCmdDesc,
42 | 	Example:               explodeCmdExample,
43 | 	DisableFlagsInUseLine: true,
44 | 	Run: func(cmd *cobra.Command, args []string) {
45 | 		// Parse input parameters.
46 | 		inputPath := args[0]
47 | 		password, _ := cmd.Flags().GetString("password")
48 | 		outputPath, _ := cmd.Flags().GetString("output-file")
49 | 
50 | 		// Parse page range.
51 | 		pageRange, _ := cmd.Flags().GetString("pages")
52 | 
53 | 		pages, err := parsePageRange(pageRange)
54 | 		if err != nil {
55 | 			printUsageErr(cmd, "Invalid page range specified\n")
56 | 		}
57 | 
58 | 		// Explode file.
59 | 		outputPath, err = pdf.Explode(inputPath, outputPath, password, pages)
60 | 		if err != nil {
61 | 			printErr("Could not explode input file: %s\n", err)
62 | 			return
63 | 		}
64 | 
65 | 		fmt.Printf("File %s successfully exploded\n", inputPath)
66 | 		fmt.Printf("Output file saved to %s\n", outputPath)
67 | 	},
68 | 	Args: func(_ *cobra.Command, args []string) error {
69 | 		if len(args) < 1 {
70 | 			return errors.New("must provide the input file")
71 | 		}
72 | 
73 | 		return nil
74 | 	},
75 | }
76 | 
77 | func init() {
78 | 	rootCmd.AddCommand(explodeCmd)
79 | 
80 | 	explodeCmd.Flags().StringP("password", "p", "", "input file password")
81 | 	explodeCmd.Flags().StringP("output-file", "o", "", "output file")
82 | 	explodeCmd.Flags().StringP("pages", "P", "", "pages to extract from the input file")
83 | }
84 | 


--------------------------------------------------------------------------------
/internal/cli/watermark.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 
12 | 	"github.com/spf13/cobra"
13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
14 | )
15 | 
16 | const watermarkCmdDesc = `Add watermark to PDF files.
17 | 
18 | The command can be configured to apply the watermark image only to the specified
19 | pages using the --pages parameter.
20 | 
21 | An example of the pages parameter: 1-3,4,6-7
22 | Watermark will only be applied to pages 1,2,3 (1-3), 4 and 6,7 (6-7), while page
23 | number 5 is skipped.
24 | `
25 | 
26 | var watermarkCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n",
27 | 	fmt.Sprintf("%s watermark input_file.pdf watermark.png", appName),
28 | 	fmt.Sprintf("%s watermark -o output file.png input_file.pdf watermark.png", appName),
29 | 	fmt.Sprintf("%s watermark -o output file.png -P 1-3 input_file.pdf watermark.png", appName),
30 | 	fmt.Sprintf("%s watermark -o output file.png -P 1-3 -p pass input_file.pdf watermark.png", appName),
31 | )
32 | 
33 | // watermarkCmd represents the watermark command.
34 | var watermarkCmd = &cobra.Command{
35 | 	Use:                   "watermark [FLAG]... INPUT_FILE WATERMARK_IMAGE",
36 | 	Short:                 "Add watermark to PDF files",
37 | 	Long:                  watermarkCmdDesc,
38 | 	Example:               watermarkCmdExample,
39 | 	DisableFlagsInUseLine: true,
40 | 	Run: func(cmd *cobra.Command, args []string) {
41 | 		// Parse input parameters.
42 | 		inputPath := args[0]
43 | 		watermarkPath := args[1]
44 | 		password, _ := cmd.Flags().GetString("password")
45 | 
46 | 		// Parse output file.
47 | 		outputPath, _ := cmd.Flags().GetString("output-file")
48 | 		if outputPath == "" {
49 | 			outputPath = inputPath
50 | 		}
51 | 
52 | 		// Parse page range.
53 | 		pageRange, _ := cmd.Flags().GetString("pages")
54 | 
55 | 		pages, err := parsePageRange(pageRange)
56 | 		if err != nil {
57 | 			printUsageErr(cmd, "Invalid page range specified\n")
58 | 		}
59 | 
60 | 		// Apply watermark.
61 | 		err = pdf.Watermark(inputPath, outputPath, watermarkPath, password, pages)
62 | 		if err != nil {
63 | 			printErr("Could not apply watermark to the input file: %s\n", err)
64 | 		}
65 | 
66 | 		fmt.Printf("Watermark successfully applied to %s\n", inputPath)
67 | 		fmt.Printf("Output file saved to %s\n", outputPath)
68 | 	},
69 | 	Args: func(_ *cobra.Command, args []string) error {
70 | 		if len(args) < 2 {
71 | 			return errors.New("must provide the input file and the watermark image")
72 | 		}
73 | 
74 | 		return nil
75 | 	},
76 | }
77 | 
78 | func init() {
79 | 	rootCmd.AddCommand(watermarkCmd)
80 | 
81 | 	watermarkCmd.Flags().StringP("output-file", "o", "", "output file")
82 | 	watermarkCmd.Flags().StringP("password", "p", "", "input file password")
83 | 	watermarkCmd.Flags().StringP("pages", "P", "", "pages on which to add watermark")
84 | }
85 | 


--------------------------------------------------------------------------------
/internal/cli/rotate.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is subject to the terms and conditions defined in
 3 |  * file 'LICENSE.md', which is part of this source code package.
 4 |  */
 5 | 
 6 | package cli
 7 | 
 8 | import (
 9 | 	"errors"
10 | 	"fmt"
11 | 	"strconv"
12 | 
13 | 	"github.com/spf13/cobra"
14 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
15 | )
16 | 
17 | const rotateCmdDesc = `Rotate PDF file pages by a specified angle.
18 | The angle argument is specified in degrees and it must be a multiple of 90.
19 | 
20 | The command can be configured to rotate only the specified pages
21 | using the --pages parameter.
22 | 
23 | An example of the pages parameter: 1-3,4,6-7
24 | Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be rotated, while
25 | page number 5 is skipped.
26 | `
27 | 
28 | var rotateCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n",
29 | 	fmt.Sprintf("%s rotate input_file.pdf 90", appName),
30 | 	fmt.Sprintf("%s rotate -- input_file.pdf -270", appName),
31 | 	fmt.Sprintf("%s rotate -o output_file.pdf input_file.pdf 90", appName),
32 | 	fmt.Sprintf("%s rotate -o output_file.pdf -P 1-3 input_file.pdf 90", appName),
33 | 	fmt.Sprintf("%s rotate -o output_file.pdf -P 1-3 -p pass input_file.pdf 90", appName),
34 | )
35 | 
36 | // rotateCmd represents the rotate command.
37 | var rotateCmd = &cobra.Command{
38 | 	Use:                   "rotate [FLAG]... INPUT_FILE ANGLE",
39 | 	Short:                 "Rotate PDF file pages",
40 | 	Long:                  rotateCmdDesc,
41 | 	Example:               rotateCmdExample,
42 | 	DisableFlagsInUseLine: true,
43 | 	Run: func(cmd *cobra.Command, args []string) {
44 | 		// Parse input parameters.
45 | 		inputPath := args[0]
46 | 		password, _ := cmd.Flags().GetString("password")
47 | 
48 | 		// Parse angle parameter.
49 | 		angle, err := strconv.Atoi(args[1])
50 | 		if err != nil {
51 | 			printUsageErr(cmd, "Invalid rotation angle specified\n")
52 | 		}
53 | 
54 | 		// Parse output file.
55 | 		outputPath, _ := cmd.Flags().GetString("output-file")
56 | 		if outputPath == "" {
57 | 			outputPath = inputPath
58 | 		}
59 | 
60 | 		// Parse page range.
61 | 		pageRange, _ := cmd.Flags().GetString("pages")
62 | 
63 | 		pages, err := parsePageRange(pageRange)
64 | 		if err != nil {
65 | 			printUsageErr(cmd, "Invalid page range specified\n")
66 | 		}
67 | 
68 | 		// Rotate file.
69 | 		outputPath, err = pdf.Rotate(inputPath, outputPath, angle, password, pages)
70 | 		if err != nil {
71 | 			printErr("Could not rotate input file pages: %s\n", err)
72 | 		}
73 | 
74 | 		fmt.Printf("Successfully rotated %s\n", inputPath)
75 | 		fmt.Printf("Output file saved to %s\n", outputPath)
76 | 	},
77 | 	Args: func(_ *cobra.Command, args []string) error {
78 | 		if len(args) < 2 {
79 | 			return errors.New("must provide the input file and the rotation angle")
80 | 		}
81 | 
82 | 		return nil
83 | 	},
84 | }
85 | 
86 | func init() {
87 | 	rootCmd.AddCommand(rotateCmd)
88 | 
89 | 	rotateCmd.Flags().StringP("pages", "P", "", "pages to rotate")
90 | 	rotateCmd.Flags().StringP("output-file", "o", "", "putput file")
91 | 	rotateCmd.Flags().StringP("password", "p", "", "input file password")
92 | }
93 | 


--------------------------------------------------------------------------------
/pkg/pdf/optimize.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package pdf
  7 | 
  8 | import (
  9 | 	"os"
 10 | 	"time"
 11 | 
 12 | 	unipdf "github.com/unidoc/unipdf/v4/model"
 13 | 	unioptimize "github.com/unidoc/unipdf/v4/model/optimize"
 14 | )
 15 | 
 16 | // OptimizeOpts represents the options used for optimizing PDF files.
 17 | type OptimizeOpts struct {
 18 | 	// ImageQuality specifies the quality of the optimized images.
 19 | 	ImageQuality int
 20 | 
 21 | 	// ImagePPI specifies the maximum pixels per inch of the optimized images.
 22 | 	ImagePPI float64
 23 | }
 24 | 
 25 | // OptimizeResult contains information about the optimization process.
 26 | type OptimizeResult struct {
 27 | 	// Original contains information about the original file.
 28 | 	Original FileStat
 29 | 
 30 | 	// Optimized contains information about the optimized file.
 31 | 	Optimized FileStat
 32 | 
 33 | 	// Duration specifies the optimization processing time in nanoseconds.
 34 | 	Duration time.Duration
 35 | }
 36 | 
 37 | // Optimize optimizes the PDF file specified by the inputPath parameter, using
 38 | // the provided options and saves the result at the location specified by the
 39 | // outputPath parameter. A password can be specified for encrypted input files.
 40 | func Optimize(inputPath, outputPath, password string, opts *OptimizeOpts) (*OptimizeResult, error) {
 41 | 	// Initialize starting time.
 42 | 	start := time.Now()
 43 | 
 44 | 	// Get input file stat.
 45 | 	inputFileInfo, err := os.Stat(inputPath)
 46 | 	if err != nil {
 47 | 		return nil, err
 48 | 	}
 49 | 
 50 | 	// Read input file.
 51 | 	r, _, _, _, err := readPDF(inputPath, password)
 52 | 	if err != nil {
 53 | 		return nil, err
 54 | 	}
 55 | 
 56 | 	// Copy input file contents to the output file.
 57 | 	w := unipdf.NewPdfWriter()
 58 | 	if err = readerToWriter(r, &w, nil); err != nil {
 59 | 		return nil, err
 60 | 	}
 61 | 
 62 | 	// Add optimizer.
 63 | 	if opts == nil {
 64 | 		opts = &OptimizeOpts{
 65 | 			ImageQuality: 100,
 66 | 		}
 67 | 	}
 68 | 
 69 | 	w.SetOptimizer(unioptimize.New(unioptimize.Options{
 70 | 		CombineDuplicateDirectObjects:   true,
 71 | 		CombineIdenticalIndirectObjects: true,
 72 | 		CombineDuplicateStreams:         true,
 73 | 		CompressStreams:                 true,
 74 | 		UseObjectStreams:                true,
 75 | 		ImageQuality:                    opts.ImageQuality,
 76 | 		ImageUpperPPI:                   opts.ImagePPI,
 77 | 	}))
 78 | 
 79 | 	// Write output file.
 80 | 	safe := inputPath == outputPath
 81 | 	if err = writePDF(outputPath, &w, safe); err != nil {
 82 | 		return nil, err
 83 | 	}
 84 | 
 85 | 	// Get output file stat.
 86 | 	outputFileInfo, err := os.Stat(outputPath)
 87 | 	if err != nil {
 88 | 		return nil, err
 89 | 	}
 90 | 
 91 | 	return &OptimizeResult{
 92 | 		Original: FileStat{
 93 | 			Name: inputPath,
 94 | 			Size: inputFileInfo.Size(),
 95 | 		},
 96 | 		Optimized: FileStat{
 97 | 			Name: outputPath,
 98 | 			Size: outputFileInfo.Size(),
 99 | 		},
100 | 		Duration: time.Since(start),
101 | 	}, nil
102 | }
103 | 


--------------------------------------------------------------------------------
/pkg/pdf/render.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package pdf
  7 | 
  8 | import (
  9 | 	"archive/zip"
 10 | 	"fmt"
 11 | 	"image"
 12 | 	"image/jpeg"
 13 | 	"image/png"
 14 | 	"io"
 15 | 	"os"
 16 | 	"path/filepath"
 17 | 	"strings"
 18 | 
 19 | 	"github.com/unidoc/unipdf/v4/render"
 20 | )
 21 | 
 22 | // RenderOpts represents the options used for rendering PDF pages to images.
 23 | type RenderOpts struct {
 24 | 	// ImageFormat specifies the file format of the rendered images.
 25 | 	// Supported formats: jpeg, png.
 26 | 	ImageFormat string
 27 | 
 28 | 	// ImageQuality specifies the quality of the rendered images.
 29 | 	// Only applies to rendered JPEG images.
 30 | 	ImageQuality int
 31 | }
 32 | 
 33 | // Render renders the pages of the PDF file specified by the inputPath parameter
 34 | // to image targets. The rendered images are saved as a ZIP archive at the
 35 | // location specified by the outputPath parameter.
 36 | // A password can be passed in, if the input file is encrypted.
 37 | // If the pages parameter is nil or an empty slice, all pages are rendered.
 38 | func Render(inputPath, outputPath, password string, pages []int, opts *RenderOpts) (string, error) {
 39 | 	// Use input file directory if no output path is specified.
 40 | 	dir, inputFile := filepath.Split(inputPath)
 41 | 
 42 | 	inputFile = strings.TrimSuffix(inputFile, filepath.Ext(inputFile))
 43 | 	if outputPath == "" {
 44 | 		outputPath = filepath.Join(dir, inputFile+".zip")
 45 | 	}
 46 | 
 47 | 	// Read input file.
 48 | 	r, pageCount, _, _, err := readPDF(inputPath, password)
 49 | 	if err != nil {
 50 | 		return "", err
 51 | 	}
 52 | 
 53 | 	// Extract pages.
 54 | 	if len(pages) == 0 {
 55 | 		pages = createPageRange(pageCount)
 56 | 	}
 57 | 
 58 | 	// Create render options, if none are specified.
 59 | 	if opts == nil {
 60 | 		opts = &RenderOpts{ImageFormat: "jpeg", ImageQuality: 100}
 61 | 	}
 62 | 	if opts.ImageQuality < 0 || opts.ImageQuality > 100 {
 63 | 		opts.ImageQuality = 100
 64 | 	}
 65 | 
 66 | 	// Create image encode function.
 67 | 	var encodeFunc func(w io.Writer, img image.Image) error
 68 | 	imgExt := "jpg"
 69 | 
 70 | 	switch opts.ImageFormat {
 71 | 	case "jpeg":
 72 | 		encodeFunc = func(w io.Writer, img image.Image) error {
 73 | 			return jpeg.Encode(w, img, &jpeg.Options{Quality: opts.ImageQuality})
 74 | 		}
 75 | 	case "png":
 76 | 		imgExt = "png"
 77 | 		encodeFunc = func(w io.Writer, img image.Image) error {
 78 | 			return png.Encode(w, img)
 79 | 		}
 80 | 	default:
 81 | 		return "", fmt.Errorf("unsupported image format: %s", opts.ImageFormat)
 82 | 	}
 83 | 
 84 | 	// Prepare output archive.
 85 | 	outputFile, err := os.Create(outputPath)
 86 | 	if err != nil {
 87 | 		return "", err
 88 | 	}
 89 | 	defer outputFile.Close()
 90 | 
 91 | 	zw := zip.NewWriter(outputFile)
 92 | 
 93 | 	// Render pages.
 94 | 	device := render.NewImageDevice()
 95 | 	for _, numPage := range pages {
 96 | 		// Get page.
 97 | 		page, err := r.GetPage(numPage)
 98 | 		if err != nil {
 99 | 			return "", err
100 | 		}
101 | 
102 | 		// Render page to image.
103 | 		img, err := device.Render(page)
104 | 		if err != nil {
105 | 			return "", err
106 | 		}
107 | 
108 | 		// Add rendered image to zip file.
109 | 		file, err := zw.Create(fmt.Sprintf("%s_%d.%s", inputFile, numPage, imgExt))
110 | 		if err != nil {
111 | 			return "", err
112 | 		}
113 | 		if err := encodeFunc(file, img); err != nil {
114 | 			return "", err
115 | 		}
116 | 	}
117 | 
118 | 	return outputPath, zw.Close()
119 | }
120 | 


--------------------------------------------------------------------------------
/internal/cli/extract_images.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package cli
  7 | 
  8 | import (
  9 | 	"errors"
 10 | 	"fmt"
 11 | 
 12 | 	"github.com/spf13/cobra"
 13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
 14 | 
 15 | 	uniextractor "github.com/unidoc/unipdf/v4/extractor"
 16 | )
 17 | 
 18 | const extractImagesCmdDesc = `Extracts PDF images.
 19 | 
 20 | The images are extracted in a ZIP file and saved at the destination specified
 21 | by the --output-file parameter. If no output file is specified, the ZIP
 22 | archive is saved in the same directory as the input file.
 23 | 
 24 | The command can be configured to extract images only from the specified
 25 | pages using the --pages parameter.
 26 | 
 27 | An example of the pages parameter: 1-3,4,6-7
 28 | Images will only be extracted from pages 1,2,3 (1-3), 4 and 6,7 (6-7), while page
 29 | number 5 is skipped.
 30 | `
 31 | 
 32 | var extractImagesCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n",
 33 | 	fmt.Sprintf("%s extract images input_file.pdf", appName),
 34 | 	fmt.Sprintf("%s extract images -o images.zip input_file.pdf", appName),
 35 | 	fmt.Sprintf("%s extract images -P 1-3 -p pass -o images.zip input_file.pdf", appName),
 36 | 	fmt.Sprintf("%s extract images -P 1-3 -p pass -o images.zip -S input_file.pdf", appName),
 37 | )
 38 | 
 39 | // extractImagesCmd represents the extract images command.
 40 | var extractImagesCmd = &cobra.Command{
 41 | 	Use:                   "images [FLAG]... INPUT_FILE",
 42 | 	Short:                 "Extract PDF images",
 43 | 	Long:                  extractImagesCmdDesc,
 44 | 	Example:               extractImagesCmdExample,
 45 | 	DisableFlagsInUseLine: true,
 46 | 	Run: func(cmd *cobra.Command, args []string) {
 47 | 		// Parse input parameters.
 48 | 		inputPath := args[0]
 49 | 		password, _ := cmd.Flags().GetString("password")
 50 | 		outputPath, _ := cmd.Flags().GetString("output-file")
 51 | 
 52 | 		// Parse image extraction options.
 53 | 		includeSM, _ := cmd.Flags().GetBool("include-inline-stencil-masks")
 54 | 
 55 | 		extractOptions := &uniextractor.ImageExtractOptions{
 56 | 			IncludeInlineStencilMasks: includeSM,
 57 | 		}
 58 | 
 59 | 		// Parse page range.
 60 | 		pageRange, _ := cmd.Flags().GetString("pages")
 61 | 
 62 | 		pages, err := parsePageRange(pageRange)
 63 | 		if err != nil {
 64 | 			printUsageErr(cmd, "Invalid page range specified\n")
 65 | 		}
 66 | 
 67 | 		// Extract images.
 68 | 		outputPath, count, err := pdf.ExtractImages(
 69 | 			inputPath,
 70 | 			outputPath,
 71 | 			password,
 72 | 			pages,
 73 | 			extractOptions,
 74 | 		)
 75 | 		if err != nil {
 76 | 			printErr("Could not extract images: %s\n", err)
 77 | 			return
 78 | 		}
 79 | 
 80 | 		if count == 0 {
 81 | 			fmt.Printf("%s does not contain any images to extract\n", inputPath)
 82 | 		} else {
 83 | 			fmt.Printf("Images successfully extracted to %s\n", outputPath)
 84 | 		}
 85 | 	},
 86 | 	Args: func(_ *cobra.Command, args []string) error {
 87 | 		if len(args) < 1 {
 88 | 			return errors.New("must provide the input file")
 89 | 		}
 90 | 
 91 | 		return nil
 92 | 	},
 93 | }
 94 | 
 95 | func init() {
 96 | 	extractCmd.AddCommand(extractImagesCmd)
 97 | 
 98 | 	extractImagesCmd.Flags().StringP("password", "p", "", "input file password")
 99 | 	extractImagesCmd.Flags().StringP("output-file", "o", "", "output file")
100 | 	extractImagesCmd.Flags().StringP("pages", "P", "", "pages to extract images from")
101 | 	extractImagesCmd.Flags().BoolP("include-inline-stencil-masks", "S", false, "include inline stencil masks")
102 | }
103 | 


--------------------------------------------------------------------------------
/internal/cli/encrypt.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package cli
  7 | 
  8 | import (
  9 | 	"errors"
 10 | 	"fmt"
 11 | 
 12 | 	"github.com/spf13/cobra"
 13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
 14 | )
 15 | 
 16 | const encryptCmdDesc = `Encrypts the input file using the specified owner password.
 17 | 
 18 | The algorithm used for the file encryption is configurable.
 19 | 
 20 | Supported encryption algorithms:
 21 |   - rc4 (default)
 22 |   - aes128
 23 |   - aes256
 24 | 
 25 | A user password along with a set of permissions can also be specified.
 26 | 
 27 | Supported user permissions:
 28 |   - all (default)
 29 |   - none
 30 |   - print-low-res
 31 |   - print-high-res
 32 |   - modify
 33 |   - extract
 34 |   - extract-graphics
 35 |   - annotate
 36 |   - fill-forms
 37 |   - rotate
 38 | `
 39 | 
 40 | var encryptCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n",
 41 | 	fmt.Sprintf("%s encrypt input_file.pdf owner_pass", appName),
 42 | 	fmt.Sprintf("%s encrypt input_file.pdf owner_pass user_pass", appName),
 43 | 	fmt.Sprintf("%s encrypt -o output_file.pdf -m aes256 input_file.pdf owner_pass user_pass", appName),
 44 | 	fmt.Sprintf("%s encrypt -o output_file.pdf -P none -m aes256 input_file.pdf owner_pass user_pass", appName),
 45 | 	fmt.Sprintf("%s encrypt -o output_file.pdf -P modify,annotate -m aes256 input_file.pdf owner_pass user_pass", appName),
 46 | )
 47 | 
 48 | // encryptCmd represents the encrypt command.
 49 | var encryptCmd = &cobra.Command{
 50 | 	Use:                   "encrypt [FLAG]... INPUT_FILE OWNER_PASSWORD [USER_PASSWORD]",
 51 | 	Short:                 "Encrypt PDF files",
 52 | 	Long:                  encryptCmdDesc,
 53 | 	Example:               encryptCmdExample,
 54 | 	DisableFlagsInUseLine: true,
 55 | 	Run: func(cmd *cobra.Command, args []string) {
 56 | 		// Parse input parameters.
 57 | 		inputPath := args[0]
 58 | 		ownerPassword := args[1]
 59 | 
 60 | 		// Parse user password.
 61 | 		var userPassword string
 62 | 		if len(args) > 2 {
 63 | 			userPassword = args[2]
 64 | 		}
 65 | 
 66 | 		// Parse output file.
 67 | 		outputPath, _ := cmd.Flags().GetString("output-file")
 68 | 		if outputPath == "" {
 69 | 			outputPath = inputPath
 70 | 		}
 71 | 
 72 | 		// Parse encryption mode.
 73 | 		mode, _ := cmd.Flags().GetString("mode")
 74 | 
 75 | 		algorithm, err := parseEncryptionMode(mode)
 76 | 		if err != nil {
 77 | 			printUsageErr(cmd, "Invalid encryption mode\n")
 78 | 		}
 79 | 
 80 | 		// Parse user permissions.
 81 | 		permList, _ := cmd.Flags().GetString("perms")
 82 | 
 83 | 		perms, err := parsePermissionList(permList)
 84 | 		if err != nil {
 85 | 			printUsageErr(cmd, "Invalid user permission values\n")
 86 | 		}
 87 | 
 88 | 		opts := &pdf.EncryptOpts{
 89 | 			OwnerPassword: ownerPassword,
 90 | 			UserPassword:  userPassword,
 91 | 			Algorithm:     algorithm,
 92 | 			Permissions:   perms,
 93 | 		}
 94 | 
 95 | 		// Encrypt file.
 96 | 		if err := pdf.Encrypt(inputPath, outputPath, opts); err != nil {
 97 | 			printErr("Could not encrypt file: %s\n", err)
 98 | 		}
 99 | 
100 | 		fmt.Printf("File %s successfully encrypted\n", inputPath)
101 | 		fmt.Printf("Output file saved to %s\n", outputPath)
102 | 	},
103 | 	Args: func(_ *cobra.Command, args []string) error {
104 | 		if len(args) < 2 {
105 | 			return errors.New("must provide the input file and the owner password")
106 | 		}
107 | 
108 | 		return nil
109 | 	},
110 | }
111 | 
112 | func init() {
113 | 	rootCmd.AddCommand(encryptCmd)
114 | 
115 | 	encryptCmd.Flags().StringP("output-file", "o", "", "output file")
116 | 	encryptCmd.Flags().StringP("perms", "P", "all", "user permissions")
117 | 	encryptCmd.Flags().StringP("mode", "m", "rc4", "algorithm to use for encrypting the file")
118 | }
119 | 


--------------------------------------------------------------------------------
/internal/cli/render.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package cli
  7 | 
  8 | import (
  9 | 	"errors"
 10 | 	"fmt"
 11 | 
 12 | 	"github.com/spf13/cobra"
 13 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
 14 | )
 15 | 
 16 | const renderCmdDesc = `Renders the pages of the input file to image targets.
 17 | 
 18 | The rendered image files are saved in a ZIP archive at the location specified
 19 | by the --output-file parameter. If no output file is specified, the ZIP file
 20 | is saved in the same directory as the input file.
 21 | 
 22 | The command can be configured to render only the specified pages using
 23 | the --pages parameter.
 24 | 
 25 | An example of the pages parameter: 1-3,4,6-7
 26 | Pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be rendered, while page
 27 | number 5 is skipped.
 28 | 
 29 | The format of the rendered image files can be specified using
 30 | the --image-format flag (default jpeg).
 31 | 
 32 | Supported image formats:
 33 |   - jpeg (default)
 34 |   - png
 35 | 
 36 | The quality of the rendered image files can be configured through
 37 | the --image-quality flag (default 100). Only applies to JPEG images.
 38 | `
 39 | 
 40 | var renderCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n",
 41 | 	fmt.Sprintf("%s render input_file.pdf", appName),
 42 | 	fmt.Sprintf("%s render -o images.zip input_file.pdf", appName),
 43 | 	fmt.Sprintf("%s render -o images.zip -P 1-3 input_file.pdf", appName),
 44 | 	fmt.Sprintf("%s render -o images.zip -P 1-3 -p pass input_file.pdf", appName),
 45 | 	fmt.Sprintf("%s render -o images.zip -P 1-3 -p pass -f jpeg -q 100 input_file.pdf", appName),
 46 | )
 47 | 
 48 | // renderCmd represents the render command.
 49 | var renderCmd = &cobra.Command{
 50 | 	Use:                   "render [FLAG]... INPUT_FILE",
 51 | 	Short:                 "Render PDF pages to images",
 52 | 	Long:                  renderCmdDesc,
 53 | 	Example:               renderCmdExample,
 54 | 	DisableFlagsInUseLine: true,
 55 | 	Run: func(cmd *cobra.Command, args []string) {
 56 | 		// Parse input parameters.
 57 | 		inputPath := args[0]
 58 | 		password, _ := cmd.Flags().GetString("password")
 59 | 		outputPath, _ := cmd.Flags().GetString("output-file")
 60 | 
 61 | 		// Parse page range.
 62 | 		pageRange, _ := cmd.Flags().GetString("pages")
 63 | 
 64 | 		pages, err := parsePageRange(pageRange)
 65 | 		if err != nil {
 66 | 			printUsageErr(cmd, "Invalid page range specified\n")
 67 | 		}
 68 | 
 69 | 		// Parse render options.
 70 | 		imageFormat, _ := cmd.Flags().GetString("image-format")
 71 | 		if _, ok := imageFormats[imageFormat]; !ok {
 72 | 			imageFormat = "jpeg"
 73 | 		}
 74 | 
 75 | 		imageQuality, err := cmd.Flags().GetInt("image-quality")
 76 | 		if err != nil {
 77 | 			imageQuality = 100
 78 | 		}
 79 | 
 80 | 		opts := &pdf.RenderOpts{
 81 | 			ImageFormat:  imageFormat,
 82 | 			ImageQuality: imageQuality,
 83 | 		}
 84 | 
 85 | 		// Render file.
 86 | 		outputPath, err = pdf.Render(inputPath, outputPath, password, pages, opts)
 87 | 		if err != nil {
 88 | 			printErr("Could not render input file: %s\n", err)
 89 | 			return
 90 | 		}
 91 | 
 92 | 		fmt.Printf("File %s successfully rendered\n", inputPath)
 93 | 		fmt.Printf("Output file saved to %s\n", outputPath)
 94 | 	},
 95 | 	Args: func(_ *cobra.Command, args []string) error {
 96 | 		if len(args) < 1 {
 97 | 			return errors.New("must provide the input file")
 98 | 		}
 99 | 
100 | 		return nil
101 | 	},
102 | }
103 | 
104 | func init() {
105 | 	rootCmd.AddCommand(renderCmd)
106 | 
107 | 	renderCmd.Flags().StringP("password", "p", "", "input file password")
108 | 	renderCmd.Flags().StringP("output-file", "o", "", "output file")
109 | 	renderCmd.Flags().StringP("pages", "P", "", "pages to render from the input file")
110 | 	renderCmd.Flags().StringP("image-format", "f", "jpeg", "format of the output images")
111 | 	renderCmd.Flags().IntP("image-quality", "q", 100, "quality of the output images")
112 | }
113 | 


--------------------------------------------------------------------------------
/pkg/pdf/form.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package pdf
  7 | 
  8 | import (
  9 | 	"github.com/unidoc/unipdf/v4/annotator"
 10 | 	"github.com/unidoc/unipdf/v4/fdf"
 11 | 	"github.com/unidoc/unipdf/v4/fjson"
 12 | 	unipdf "github.com/unidoc/unipdf/v4/model"
 13 | )
 14 | 
 15 | // FormExport exports all form field values from the PDF file specified
 16 | // by the inputPath parameters, as JSON.
 17 | func FormExport(inputPath string) (string, error) {
 18 | 	fieldData, err := fjson.LoadFromPDFFile(inputPath)
 19 | 	if err != nil {
 20 | 		return "", err
 21 | 	}
 22 | 	if fieldData == nil {
 23 | 		return "", nil
 24 | 	}
 25 | 
 26 | 	return fieldData.JSON()
 27 | }
 28 | 
 29 | // FormFillJSON fills the form field values from the PDF file specified by the
 30 | // inputPath parameter, using the values from the JSON file specified by the
 31 | // jsonPath parameter. The output PDF file is saved at the location specified
 32 | // by the outputPath parameter. The output file form annotations can be
 33 | // flattened by using the flatten parameter.
 34 | // A password can be specified for encrypted input files.
 35 | func FormFillJSON(inputPath, jsonPath, outputPath, password string, flatten bool) error {
 36 | 	// Read JSON field data.
 37 | 	fieldData, err := fjson.LoadFromJSONFile(jsonPath)
 38 | 	if err != nil {
 39 | 		return err
 40 | 	}
 41 | 
 42 | 	return formFill(inputPath, fieldData, outputPath, password, flatten)
 43 | }
 44 | 
 45 | // FormFillFDF fills the form field values from the PDF file specified by the
 46 | // inputPath parameter, using the values from the FDF file specified by the
 47 | // fdfPath parameter. The output PDF file is saved at the location specified
 48 | // by the outputPath parameter. The output file form annotations can be
 49 | // flattened by using the flatten parameter.
 50 | // A password can be specified for encrypted input files.
 51 | func FormFillFDF(inputPath, fdfPath, outputPath, password string, flatten bool) error {
 52 | 	// Read field data.
 53 | 	fieldData, err := fdf.LoadFromPath(fdfPath)
 54 | 	if err != nil {
 55 | 		return err
 56 | 	}
 57 | 
 58 | 	return formFill(inputPath, fieldData, outputPath, password, flatten)
 59 | }
 60 | 
 61 | // FormFlatten flattens all the form annotation from the PDF file specified by
 62 | // the inputPath parameter. The output PDF file is saved at the location
 63 | // specified by the outputPath parameter.
 64 | // A password can be specified for encrypted input files.
 65 | func FormFlatten(inputPath, outputPath, password string) error {
 66 | 	// Read input file.
 67 | 	r, _, _, _, err := readPDF(inputPath, password)
 68 | 	if err != nil {
 69 | 		return err
 70 | 	}
 71 | 
 72 | 	// Flatten form.
 73 | 	fieldAppearance := annotator.FieldAppearance{
 74 | 		OnlyIfMissing: true,
 75 | 	}
 76 | 
 77 | 	if err = r.FlattenFields(true, fieldAppearance); err != nil {
 78 | 		return err
 79 | 	}
 80 | 	r.AcroForm = nil
 81 | 
 82 | 	// Copy input file contents.
 83 | 	w := unipdf.NewPdfWriter()
 84 | 	if err := readerToWriter(r, &w, nil); err != nil {
 85 | 		return err
 86 | 	}
 87 | 
 88 | 	// Save output file.
 89 | 	safe := inputPath == outputPath
 90 | 	return writePDF(outputPath, &w, safe)
 91 | }
 92 | 
 93 | func formFill(inputPath string, provider unipdf.FieldValueProvider, outputPath, password string, flatten bool) error {
 94 | 	// Read input file.
 95 | 	r, _, _, _, err := readPDF(inputPath, password)
 96 | 	if err != nil {
 97 | 		return err
 98 | 	}
 99 | 
100 | 	// Populate the form data.
101 | 	if err = r.AcroForm.Fill(provider); err != nil {
102 | 		return err
103 | 	}
104 | 
105 | 	// Flatten form.
106 | 	if flatten {
107 | 		fieldAppearance := annotator.FieldAppearance{
108 | 			OnlyIfMissing:        true,
109 | 			RegenerateTextFields: true,
110 | 		}
111 | 
112 | 		if err = r.FlattenFields(true, fieldAppearance); err != nil {
113 | 			return err
114 | 		}
115 | 		r.AcroForm = nil
116 | 	}
117 | 
118 | 	// Copy input file contents.
119 | 	w := unipdf.NewPdfWriter()
120 | 	if err := readerToWriter(r, &w, nil); err != nil {
121 | 		return err
122 | 	}
123 | 
124 | 	// Save output file.
125 | 	safe := inputPath == outputPath
126 | 	return writePDF(outputPath, &w, safe)
127 | }
128 | 


--------------------------------------------------------------------------------
/internal/cli/form_flatten.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package cli
  7 | 
  8 | import (
  9 | 	"errors"
 10 | 	"fmt"
 11 | 	"os"
 12 | 	"strings"
 13 | 
 14 | 	"github.com/spf13/cobra"
 15 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
 16 | )
 17 | 
 18 | const formFlattenCmdDesc = `Flatten PDF file form annotations.
 19 | 
 20 | The flattening process makes the form fields of the output files read-only by
 21 | appending the form field annotation XObject Form data to the page content
 22 | stream, thus making it part of the page contents.
 23 | 
 24 | The command can take multiple files and directories as input parameters.
 25 | By default, each PDF file is saved in the same location as the original file,
 26 | appending the "_flattened" suffix to the file name. Use the --overwrite flag
 27 | to overwrite the original files.
 28 | In addition, the flattened output files can be saved to a different directory
 29 | by using the --target-dir flag.
 30 | The command can search for PDF files inside the subdirectories of the
 31 | specified input directories by using the --recursive flag.
 32 | `
 33 | 
 34 | var formFlattenCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s\n",
 35 | 	fmt.Sprintf("%s form flatten file_1.pdf file_n.pdf", appName),
 36 | 	fmt.Sprintf("%s form flatten -O file_1.pdf file_n.pdf", appName),
 37 | 	fmt.Sprintf("%s form flatten -O -r file_1.pdf file_n.pdf dir_1 dir_n", appName),
 38 | 	fmt.Sprintf("%s form flatten -t out_dir file_1.pdf file_n.pdf dir_1 dir_n", appName),
 39 | 	fmt.Sprintf("%s form flatten -t out_dir -r file_1.pdf file_n.pdf dir_1 dir_n", appName),
 40 | 	fmt.Sprintf("%s form flatten -t out_dir -r -p pass file_1.pdf file_n.pdf dir_1 dir_n", appName),
 41 | )
 42 | 
 43 | // formFlattenCmd represents the form flatten command.
 44 | var formFlattenCmd = &cobra.Command{
 45 | 	Use:                   "flatten [FLAG]... INPUT_FILES...",
 46 | 	Short:                 "Flatten form annotations",
 47 | 	Long:                  formFlattenCmdDesc,
 48 | 	Example:               formFlattenCmdExample,
 49 | 	DisableFlagsInUseLine: true,
 50 | 	Run: func(cmd *cobra.Command, args []string) {
 51 | 		// Parse flags.
 52 | 		outputDir, _ := cmd.Flags().GetString("target-dir")
 53 | 		overwrite, _ := cmd.Flags().GetBool("overwrite")
 54 | 		recursive, _ := cmd.Flags().GetBool("recursive")
 55 | 		password, _ := cmd.Flags().GetString("password")
 56 | 
 57 | 		// Parse input parameters.
 58 | 		inputPaths, err := parseInputPaths(args, recursive, pdfMatcher)
 59 | 		if err != nil {
 60 | 			printErr("Could not parse input files: %s\n", err)
 61 | 		}
 62 | 
 63 | 		// Create output directory, if it does not exist.
 64 | 		if outputDir != "" {
 65 | 			if overwrite {
 66 | 				printErr("The --target-dir and the --overwrite flags are mutually exclusive")
 67 | 			}
 68 | 			if err = os.MkdirAll(outputDir, os.ModePerm); err != nil {
 69 | 				printErr("Could not create output directory: %s\n", err)
 70 | 			}
 71 | 		}
 72 | 
 73 | 		// Flatten PDF files form annotations.
 74 | 		for _, inputPath := range inputPaths {
 75 | 			fmt.Printf("Flattening %s\n", inputPath)
 76 | 
 77 | 			// Generate output path.
 78 | 			outputPath := generateOutputPath(inputPath, outputDir, "flattened", overwrite)
 79 | 
 80 | 			// Flatten input file form fields.
 81 | 			err := pdf.FormFlatten(inputPath, outputPath, password)
 82 | 			if err != nil {
 83 | 				printErr("Could not flatten input file form annotations: %s\n", err)
 84 | 			}
 85 | 
 86 | 			fmt.Printf("Original: %s\n", inputPath)
 87 | 			fmt.Printf("Flattened: %s\n", outputPath)
 88 | 			fmt.Println("Status: success")
 89 | 			fmt.Println(strings.Repeat("-", 10))
 90 | 		}
 91 | 	},
 92 | 	Args: func(_ *cobra.Command, args []string) error {
 93 | 		if len(args) < 1 {
 94 | 			return errors.New("must provide the at least on input file or directory")
 95 | 		}
 96 | 
 97 | 		return nil
 98 | 	},
 99 | }
100 | 
101 | func init() {
102 | 	formCmd.AddCommand(formFlattenCmd)
103 | 
104 | 	formFlattenCmd.Flags().StringP("target-dir", "t", "", "output directory")
105 | 	formFlattenCmd.Flags().BoolP("overwrite", "O", false, "overwrite input files")
106 | 	formFlattenCmd.Flags().BoolP("recursive", "r", false, "search PDF files in subdirectories")
107 | 	formFlattenCmd.Flags().StringP("password", "p", "", "input file password")
108 | }
109 | 


--------------------------------------------------------------------------------
/pkg/pdf/organize.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package pdf
  7 | 
  8 | import (
  9 | 	"github.com/unidoc/unipdf/v4/common"
 10 | 	unipdf "github.com/unidoc/unipdf/v4/model"
 11 | )
 12 | 
 13 | // Organize extracts the provided page list from PDF file specified by the
 14 | // inputPath parameter then merges the individual pages and saves the
 15 | // resulting file at the location specified by the outputPath parameter.
 16 | // A password can be passed in for encrypted input files.
 17 | func Organize(inputPath, outputPath, password string, pages []int) error {
 18 | 	// Read input file.
 19 | 	pdfReader, _, _, _, err := readPDF(inputPath, password)
 20 | 	if err != nil {
 21 | 		return err
 22 | 	}
 23 | 
 24 | 	// Add selected pages to the writer.
 25 | 	pdfWriter := unipdf.NewPdfWriter()
 26 | 
 27 | 	for i := 0; i < len(pages); i++ {
 28 | 		page, err := pdfReader.GetPage(pages[i])
 29 | 		if err != nil {
 30 | 			return err
 31 | 		}
 32 | 
 33 | 		err = pdfWriter.AddPage(page)
 34 | 		if err != nil {
 35 | 			return err
 36 | 		}
 37 | 	}
 38 | 
 39 | 	// Copy PDF version.
 40 | 	version := pdfReader.PdfVersion()
 41 | 	pdfWriter.SetVersion(version.Major, version.Minor)
 42 | 
 43 | 	// Copy PDF info.
 44 | 	info, err := pdfReader.GetPdfInfo()
 45 | 	if err != nil {
 46 | 		common.Log.Debug("ERROR: %v", err)
 47 | 	} else {
 48 | 		pdfWriter.SetDocInfo(info)
 49 | 	}
 50 | 
 51 | 	// Copy Catalog Metadata.
 52 | 	if meta, ok := pdfReader.GetCatalogMetadata(); ok {
 53 | 		if err := pdfWriter.SetCatalogMetadata(meta); err != nil {
 54 | 			return err
 55 | 		}
 56 | 	}
 57 | 
 58 | 	// Copy catalog mark information.
 59 | 	if markInfo, ok := pdfReader.GetCatalogMarkInfo(); ok {
 60 | 		if err := pdfWriter.SetCatalogMarkInfo(markInfo); err != nil {
 61 | 			return err
 62 | 		}
 63 | 	}
 64 | 
 65 | 	// Copy AcroForm.
 66 | 	err = pdfWriter.SetForms(pdfReader.AcroForm)
 67 | 	if err != nil {
 68 | 		common.Log.Debug("ERROR: %v", err)
 69 | 		return err
 70 | 	}
 71 | 
 72 | 	// Copy viewer preferences.
 73 | 	if pref, ok := pdfReader.GetCatalogViewerPreferences(); ok {
 74 | 		if err := pdfWriter.SetCatalogViewerPreferences(pref); err != nil {
 75 | 			return err
 76 | 		}
 77 | 	}
 78 | 
 79 | 	// Copy language preferences.
 80 | 	if lang, ok := pdfReader.GetCatalogLanguage(); ok {
 81 | 		if err := pdfWriter.SetCatalogLanguage(lang); err != nil {
 82 | 			return err
 83 | 		}
 84 | 	}
 85 | 
 86 | 	// Copy document outlines.
 87 | 	pdfWriter.AddOutlineTree(pdfReader.GetOutlineTree())
 88 | 
 89 | 	// Copy OC Properties.
 90 | 	props, err := pdfReader.GetOCProperties()
 91 | 	if err != nil {
 92 | 		common.Log.Debug("ERROR: %v", err)
 93 | 	} else {
 94 | 		err = pdfWriter.SetOCProperties(props)
 95 | 		if err != nil {
 96 | 			common.Log.Debug("ERROR: %v", err)
 97 | 		}
 98 | 	}
 99 | 
100 | 	// Copy page labels.
101 | 	labelObj, err := pdfReader.GetPageLabels()
102 | 	if err != nil {
103 | 		common.Log.Debug("ERROR: %v", err)
104 | 	} else {
105 | 		err = pdfWriter.SetPageLabels(labelObj)
106 | 		if err != nil {
107 | 			common.Log.Debug("ERROR: %v", err)
108 | 		}
109 | 	}
110 | 
111 | 	// Copy named destinations.
112 | 	namedDest, err := pdfReader.GetNamedDestinations()
113 | 	if err != nil {
114 | 		common.Log.Debug("ERROR: %v", err)
115 | 	} else {
116 | 		err = pdfWriter.SetNamedDestinations(namedDest)
117 | 		if err != nil {
118 | 			common.Log.Debug("ERROR: %v", err)
119 | 		}
120 | 	}
121 | 
122 | 	// Copy name dictionary.
123 | 	nameDict, err := pdfReader.GetNameDictionary()
124 | 	if err != nil {
125 | 		common.Log.Debug("ERROR: %v", err)
126 | 	} else {
127 | 		err = pdfWriter.SetNameDictionary(nameDict)
128 | 		if err != nil {
129 | 			common.Log.Debug("ERROR: %v", err)
130 | 		}
131 | 	}
132 | 
133 | 	// Copy StructTreeRoot dictionary.
134 | 	structTreeRoot, found := pdfReader.GetCatalogStructTreeRoot()
135 | 	if found {
136 | 		err := pdfWriter.SetCatalogStructTreeRoot(structTreeRoot)
137 | 		if err != nil {
138 | 			common.Log.Debug("ERROR: %v", err)
139 | 		}
140 | 	}
141 | 
142 | 	// Copy global page rotation.
143 | 	if pdfReader.Rotate != nil {
144 | 		if err := pdfWriter.SetRotation(*pdfReader.Rotate); err != nil {
145 | 			common.Log.Debug("ERROR: %v", err)
146 | 		}
147 | 	}
148 | 
149 | 	// Write output file.
150 | 	safe := inputPath == outputPath
151 | 	return writePDF(outputPath, &pdfWriter, safe)
152 | }
153 | 


--------------------------------------------------------------------------------
/pkg/pdf/extract.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package pdf
  7 | 
  8 | import (
  9 | 	"archive/zip"
 10 | 	"bytes"
 11 | 	"fmt"
 12 | 	"image/jpeg"
 13 | 	"io"
 14 | 	"os"
 15 | 	"path/filepath"
 16 | 	"strings"
 17 | 	"time"
 18 | 
 19 | 	uniextractor "github.com/unidoc/unipdf/v4/extractor"
 20 | )
 21 | 
 22 | // ExtractText returns all text content from the PDF file specified by the
 23 | // inputPath parameter. A password can be specified for encrypted PDF files.
 24 | // Also, a list of pages from which to extract text can be passed in.
 25 | // If the pages parameter is nil or an empty slice, the text is extracted from
 26 | // all the pages of the file.
 27 | func ExtractText(inputPath, password string, pages []int) (string, error) {
 28 | 	// Read input file.
 29 | 	r, pageCount, _, _, err := readPDF(inputPath, password)
 30 | 	if err != nil {
 31 | 		return "", err
 32 | 	}
 33 | 
 34 | 	// Extract text.
 35 | 	if len(pages) == 0 {
 36 | 		pages = createPageRange(pageCount)
 37 | 	}
 38 | 
 39 | 	var text string
 40 | 	for _, numPage := range pages {
 41 | 		// Get page.
 42 | 		page, err := r.GetPage(numPage)
 43 | 		if err != nil {
 44 | 			return "", err
 45 | 		}
 46 | 
 47 | 		// Extract page text.
 48 | 		extractor, err := uniextractor.New(page)
 49 | 		if err != nil {
 50 | 			return "", err
 51 | 		}
 52 | 
 53 | 		pageText, err := extractor.ExtractText()
 54 | 		if err != nil {
 55 | 			return "", err
 56 | 		}
 57 | 
 58 | 		text += pageText
 59 | 	}
 60 | 
 61 | 	return text, nil
 62 | }
 63 | 
 64 | // ExtractImages extracts all image content from the PDF file specified by the
 65 | // inputPath parameter. The extracted collection of images is saved as a ZIP
 66 | // archive at the location specified by the outputPath parameter.
 67 | // A password can be passed in, if the input file is encrypted.
 68 | // Also, a list of pages from which to extract images can be passed in.
 69 | // If the pages parameter is nil or an empty slice, the images are extracted
 70 | // from all the pages of the file.
 71 | // In addition, the image extraction process can be controlled by using the
 72 | // options parameter. If the options parameter is nil, the default image
 73 | // extraction options are used.
 74 | func ExtractImages(inputPath, outputPath, password string, pages []int,
 75 | 	options *uniextractor.ImageExtractOptions) (string, int, error) {
 76 | 	// Use input file directory if no output path is specified.
 77 | 	if outputPath == "" {
 78 | 		dir, name := filepath.Split(inputPath)
 79 | 		name = strings.TrimSuffix(name, filepath.Ext(name)) + ".zip"
 80 | 		outputPath = filepath.Join(dir, name)
 81 | 	}
 82 | 
 83 | 	// Read input file.
 84 | 	r, pageCount, _, _, err := readPDF(inputPath, password)
 85 | 	if err != nil {
 86 | 		return "", 0, err
 87 | 	}
 88 | 
 89 | 	// Extract images.
 90 | 	if len(pages) == 0 {
 91 | 		pages = createPageRange(pageCount)
 92 | 	}
 93 | 
 94 | 	// Create zip file.
 95 | 	zipBuffer := bytes.NewBuffer(nil)
 96 | 	w := zip.NewWriter(zipBuffer)
 97 | 	now := time.Now()
 98 | 	var countImages int
 99 | 
100 | 	for _, numPage := range pages {
101 | 		// Get page.
102 | 		page, err := r.GetPage(numPage)
103 | 		if err != nil {
104 | 			return "", 0, err
105 | 		}
106 | 
107 | 		// Extract page images.
108 | 		extractor, err := uniextractor.New(page)
109 | 		if err != nil {
110 | 			return "", 0, err
111 | 		}
112 | 
113 | 		pageImages, err := extractor.ExtractPageImages(options)
114 | 		if err != nil {
115 | 			return "", 0, err
116 | 		}
117 | 
118 | 		// Add images to zip file.
119 | 		images := pageImages.Images
120 | 		countImages += len(images)
121 | 
122 | 		for i, pageImage := range images {
123 | 			img, err := pageImage.Image.ToGoImage()
124 | 			if err != nil {
125 | 				return "", 0, err
126 | 			}
127 | 
128 | 			filename, err := w.CreateHeader(&zip.FileHeader{
129 | 				Name:     (fmt.Sprintf("p%d_%d.jpg", numPage, i)),
130 | 				Modified: now,
131 | 			})
132 | 			if err != nil {
133 | 				return "", 0, err
134 | 			}
135 | 
136 | 			err = jpeg.Encode(filename, img, &jpeg.Options{Quality: 100})
137 | 			if err != nil {
138 | 				return "", 0, err
139 | 			}
140 | 		}
141 | 	}
142 | 
143 | 	if err := w.Close(); err != nil {
144 | 		return "", 0, nil
145 | 	}
146 | 
147 | 	if countImages == 0 {
148 | 		return "", 0, nil
149 | 	}
150 | 
151 | 	// Write output file.
152 | 	outputFile, err := os.Create(outputPath)
153 | 	if err != nil {
154 | 		return "", 0, err
155 | 	}
156 | 	defer outputFile.Close()
157 | 
158 | 	if _, err := io.Copy(outputFile, zipBuffer); err != nil {
159 | 		return "", 0, err
160 | 	}
161 | 
162 | 	return outputPath, countImages, nil
163 | }
164 | 


--------------------------------------------------------------------------------
/internal/cli/form_fdfmerge.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package cli
  7 | 
  8 | import (
  9 | 	"errors"
 10 | 	"fmt"
 11 | 	"os"
 12 | 	"strings"
 13 | 
 14 | 	"github.com/spf13/cobra"
 15 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
 16 | )
 17 | 
 18 | const formFDFMergeCmdDesc = `Fill form fields from FDF file.
 19 | 
 20 | The field values specified in the FDF file template are used to fill the form
 21 | fields in the input PDF files. In addition, the output file form fields can be
 22 | flattened by using the --flatten flag. The flattening process makes the form
 23 | fields of the output files read-only by appending the form field annotation
 24 | XObject Form data to the page content stream, thus making it part of the page
 25 | contents.
 26 | 
 27 | The command can take multiple files and directories as input parameters.
 28 | By default, each PDF file is saved in the same location as the original file,
 29 | appending the "_filled" suffix to the file name. Use the --overwrite flag
 30 | to overwrite the original files.
 31 | In addition, the filled output files can be saved to a different directory
 32 | by using the --target-dir flag.
 33 | The command can search for PDF files inside the subdirectories of the
 34 | specified input directories by using the --recursive flag.
 35 | `
 36 | 
 37 | var formFDFMergeCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s\n",
 38 | 	fmt.Sprintf("%s form fdfmerge fields.fdf file_1.pdf file_n.pdf", appName),
 39 | 	fmt.Sprintf("%s form fdfmerge -O fields.fdf file_1.pdf file_n.pdf", appName),
 40 | 	fmt.Sprintf("%s form fdfmerge -O -r -f fields.fdf file_1.pdf file_n.pdf dir_1 dir_n", appName),
 41 | 	fmt.Sprintf("%s form fdfmerge -t out_dir fields.fdf file_1.pdf file_n.pdf dir_1 dir_n", appName),
 42 | 	fmt.Sprintf("%s form fdfmerge -t out_dir -r fields.fdf file_1.pdf file_n.pdf dir_1 dir_n", appName),
 43 | 	fmt.Sprintf("%s form fdfmerge -t out_dir -r -p pass fields.fdf file_1.pdf file_n.pdf dir_1 dir_n", appName),
 44 | )
 45 | 
 46 | // formFDFMergeCmd represents the form fdfmerge command.
 47 | var formFDFMergeCmd = &cobra.Command{
 48 | 	Use:                   "fdfmerge [FLAG]... FDF_FILE INPUT_FILES...",
 49 | 	Short:                 "Fill form fields from FDF file",
 50 | 	Long:                  formFDFMergeCmdDesc,
 51 | 	Example:               formFDFMergeCmdExample,
 52 | 	DisableFlagsInUseLine: true,
 53 | 	Run: func(cmd *cobra.Command, args []string) {
 54 | 		// Parse input flags.
 55 | 		outputDir, _ := cmd.Flags().GetString("target-dir")
 56 | 		overwrite, _ := cmd.Flags().GetBool("overwrite")
 57 | 		recursive, _ := cmd.Flags().GetBool("recursive")
 58 | 		password, _ := cmd.Flags().GetString("password")
 59 | 		flatten, _ := cmd.Flags().GetBool("flatten")
 60 | 
 61 | 		// Parse input parameters.
 62 | 		fdfPath := args[0]
 63 | 
 64 | 		inputPaths, err := parseInputPaths(args[1:], recursive, pdfMatcher)
 65 | 		if err != nil {
 66 | 			printErr("Could not parse input files: %s\n", err)
 67 | 		}
 68 | 
 69 | 		// Create output directory, if it does not exist.
 70 | 		if outputDir != "" {
 71 | 			if overwrite {
 72 | 				printErr("The --target-dir and the --overwrite flags are mutually exclusive")
 73 | 			}
 74 | 			if err = os.MkdirAll(outputDir, os.ModePerm); err != nil {
 75 | 				printErr("Could not create output directory: %s\n", err)
 76 | 			}
 77 | 		}
 78 | 
 79 | 		// Fill form fields.
 80 | 		for _, inputPath := range inputPaths {
 81 | 			fmt.Printf("Filling form values for %s\n", inputPath)
 82 | 
 83 | 			// Generate output path.
 84 | 			outputPath := generateOutputPath(inputPath, outputDir, "filled", overwrite)
 85 | 
 86 | 			// Fill input file form fields.
 87 | 			err := pdf.FormFillFDF(inputPath, fdfPath, outputPath, password, flatten)
 88 | 			if err != nil {
 89 | 				printErr("Could not fill form fields: %s\n", err)
 90 | 			}
 91 | 
 92 | 			fmt.Printf("Original: %s\n", inputPath)
 93 | 			fmt.Printf("Filled: %s\n", outputPath)
 94 | 			fmt.Println("Status: success")
 95 | 			fmt.Println(strings.Repeat("-", 10))
 96 | 		}
 97 | 	},
 98 | 	Args: func(_ *cobra.Command, args []string) error {
 99 | 		if len(args) < 2 {
100 | 			return errors.New("must provide the FDF file and at least one input file")
101 | 		}
102 | 
103 | 		return nil
104 | 	},
105 | }
106 | 
107 | func init() {
108 | 	formCmd.AddCommand(formFDFMergeCmd)
109 | 
110 | 	formFDFMergeCmd.Flags().StringP("target-dir", "t", "", "output directory")
111 | 	formFDFMergeCmd.Flags().BoolP("overwrite", "O", false, "overwrite input files")
112 | 	formFDFMergeCmd.Flags().BoolP("recursive", "r", false, "search PDF files in subdirectories")
113 | 	formFDFMergeCmd.Flags().StringP("password", "p", "", "input file password")
114 | 	formFDFMergeCmd.Flags().BoolP("flatten", "f", false, "flatten form annotations")
115 | }
116 | 


--------------------------------------------------------------------------------
/internal/cli/form_fill.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package cli
  7 | 
  8 | import (
  9 | 	"errors"
 10 | 	"fmt"
 11 | 	"os"
 12 | 	"strings"
 13 | 
 14 | 	"github.com/spf13/cobra"
 15 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
 16 | )
 17 | 
 18 | const formFillCmdDesc = `Fill form fields from JSON file.
 19 | 
 20 | The field values specified in the JSON file template are used to fill the form
 21 | fields in the input PDF files. In addition, the output file form fields can be
 22 | flattened by using the --flatten flag. The flattening process makes the form
 23 | fields of the output files read-only by appending the form field annotation
 24 | XObject Form data to the page content stream, thus making it part of the page
 25 | contents.
 26 | 
 27 | The command can take multiple files and directories as input parameters.
 28 | By default, each PDF file is saved in the same location as the original file,
 29 | appending the "_filled" suffix to the file name. Use the --overwrite flag
 30 | to overwrite the original files.
 31 | In addition, the filled output files can be saved to a different directory
 32 | by using the --target-dir flag.
 33 | The command can search for PDF files inside the subdirectories of the
 34 | specified input directories by using the --recursive flag.
 35 | 
 36 | The "form export" command can be used to generate the JSON form fields template
 37 | for a PDF file.
 38 | `
 39 | 
 40 | var formFillCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s\n",
 41 | 	fmt.Sprintf("%s form fill fields.json file_1.pdf file_n.pdf", appName),
 42 | 	fmt.Sprintf("%s form fill -O fields.json file_1.pdf file_n.pdf", appName),
 43 | 	fmt.Sprintf("%s form fill -O -r -f fields.json file_1.pdf file_n.pdf dir_1 dir_n", appName),
 44 | 	fmt.Sprintf("%s form fill -t out_dir fields.json file_1.pdf file_n.pdf dir_1 dir_n", appName),
 45 | 	fmt.Sprintf("%s form fill -t out_dir -r fields.json file_1.pdf file_n.pdf dir_1 dir_n", appName),
 46 | 	fmt.Sprintf("%s form fill -t out_dir -r -p pass fields.json file_1.pdf file_n.pdf dir_1 dir_n", appName),
 47 | )
 48 | 
 49 | // formFillCmd represents the form fill command.
 50 | var formFillCmd = &cobra.Command{
 51 | 	Use:                   "fill [FLAG]... JSON_FILE INPUT_FILES...",
 52 | 	Short:                 "Fill form fields from JSON file",
 53 | 	Long:                  formFillCmdDesc,
 54 | 	Example:               formFillCmdExample,
 55 | 	DisableFlagsInUseLine: true,
 56 | 	Run: func(cmd *cobra.Command, args []string) {
 57 | 		// Parse input flags.
 58 | 		outputDir, _ := cmd.Flags().GetString("target-dir")
 59 | 		overwrite, _ := cmd.Flags().GetBool("overwrite")
 60 | 		recursive, _ := cmd.Flags().GetBool("recursive")
 61 | 		password, _ := cmd.Flags().GetString("password")
 62 | 		flatten, _ := cmd.Flags().GetBool("flatten")
 63 | 
 64 | 		// Parse input parameters.
 65 | 		jsonPath := args[0]
 66 | 
 67 | 		inputPaths, err := parseInputPaths(args[1:], recursive, pdfMatcher)
 68 | 		if err != nil {
 69 | 			printErr("Could not parse input files: %s\n", err)
 70 | 		}
 71 | 
 72 | 		// Create output directory, if it does not exist.
 73 | 		if outputDir != "" {
 74 | 			if overwrite {
 75 | 				printErr("The --target-dir and the --overwrite flags are mutually exclusive")
 76 | 			}
 77 | 			if err = os.MkdirAll(outputDir, os.ModePerm); err != nil {
 78 | 				printErr("Could not create output directory: %s\n", err)
 79 | 			}
 80 | 		}
 81 | 
 82 | 		// Fill form fields.
 83 | 		for _, inputPath := range inputPaths {
 84 | 			fmt.Printf("Filling form values for %s\n", inputPath)
 85 | 
 86 | 			// Generate output path.
 87 | 			outputPath := generateOutputPath(inputPath, outputDir, "filled", overwrite)
 88 | 
 89 | 			// Fill input file form fields.
 90 | 			err := pdf.FormFillJSON(inputPath, jsonPath, outputPath, password, flatten)
 91 | 			if err != nil {
 92 | 				printErr("Could not fill form fields: %s\n", err)
 93 | 			}
 94 | 
 95 | 			fmt.Printf("Original: %s\n", inputPath)
 96 | 			fmt.Printf("Filled: %s\n", outputPath)
 97 | 			fmt.Println("Status: success")
 98 | 			fmt.Println(strings.Repeat("-", 10))
 99 | 		}
100 | 	},
101 | 	Args: func(_ *cobra.Command, args []string) error {
102 | 		if len(args) < 2 {
103 | 			return errors.New("must provide the JSON file and at least one input file")
104 | 		}
105 | 
106 | 		return nil
107 | 	},
108 | }
109 | 
110 | func init() {
111 | 	formCmd.AddCommand(formFillCmd)
112 | 
113 | 	formFillCmd.Flags().StringP("target-dir", "t", "", "output directory")
114 | 	formFillCmd.Flags().BoolP("overwrite", "O", false, "overwrite input files")
115 | 	formFillCmd.Flags().BoolP("recursive", "r", false, "search PDF files in subdirectories")
116 | 	formFillCmd.Flags().StringP("password", "p", "", "input file password")
117 | 	formFillCmd.Flags().BoolP("flatten", "f", false, "flatten form annotations")
118 | }
119 | 


--------------------------------------------------------------------------------
/internal/cli/optimize.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package cli
  7 | 
  8 | import (
  9 | 	"errors"
 10 | 	"fmt"
 11 | 	"os"
 12 | 	"strings"
 13 | 	"time"
 14 | 
 15 | 	"github.com/spf13/cobra"
 16 | 	"github.com/unidoc/unipdf-cli/pkg/pdf"
 17 | )
 18 | 
 19 | const optimizeCmdDesc = `Optimize PDF files by optimizing structure, compression and image quality.
 20 | 
 21 | The command can take multiple files and directories as input parameters.
 22 | By default, each PDF file is saved in the same location as the original file,
 23 | appending the "_optimized" suffix to the file name. Use the --overwrite flag
 24 | to overwrite the original files.
 25 | In addition, the optimized output files can be saved to a different directory
 26 | by using the --target-dir flag.
 27 | The command can search for PDF files inside the subdirectories of the
 28 | specified input directories by using the --recursive flag.
 29 | 
 30 | The quality of the images in the output files can be configured through
 31 | the --image-quality flag (default 90).
 32 | The resolution of the output images can be controlled using the --image-ppi flag.
 33 | Common pixels per inch values are 100 (screen), 150-300 (print), 600 (art). If
 34 | not specified, the PPI of the output images is 100.
 35 | `
 36 | 
 37 | var optimizeCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n",
 38 | 	fmt.Sprintf("%s optimize file_1.pdf file_n.pdf", appName),
 39 | 	fmt.Sprintf("%s optimize -O file_1.pdf file_n.pdf", appName),
 40 | 	fmt.Sprintf("%s optimize -O -r file_1.pdf file_n.pdf dir_1 dir_n", appName),
 41 | 	fmt.Sprintf("%s optimize -t out_dir file_1.pdf file_n.pdf dir_1 dir_n", appName),
 42 | 	fmt.Sprintf("%s optimize -t out_dir -r file_1.pdf file_n.pdf dir_1 dir_n", appName),
 43 | 	fmt.Sprintf("%s optimize -t out_dir -r -q 75 file_1.pdf file_n.pdf dir_1 dir_n", appName),
 44 | 	fmt.Sprintf("%s optimize -t out_dir -r -q 75 -P 100 file_1.pdf file_n.pdf dir_1 dir_n", appName),
 45 | 	fmt.Sprintf("%s optimize -t out_dir -r -q 75 -P 100 -p pass file_1.pdf file_n.pdf dir_1 dir_n", appName),
 46 | )
 47 | 
 48 | // optimizeCmd represents the optimize command.
 49 | var optimizeCmd = &cobra.Command{
 50 | 	Use:                   "optimize [FLAG]... INPUT_FILES...",
 51 | 	Short:                 "Optimize PDF files",
 52 | 	Long:                  optimizeCmdDesc,
 53 | 	Example:               optimizeCmdExample,
 54 | 	DisableFlagsInUseLine: true,
 55 | 	Run: func(cmd *cobra.Command, args []string) {
 56 | 		// Parse flags.
 57 | 		outputDir, _ := cmd.Flags().GetString("target-dir")
 58 | 		overwrite, _ := cmd.Flags().GetBool("overwrite")
 59 | 		recursive, _ := cmd.Flags().GetBool("recursive")
 60 | 		password, _ := cmd.Flags().GetString("password")
 61 | 
 62 | 		// Parse optimization parameters.
 63 | 		imageQuality, err := cmd.Flags().GetInt("image-quality")
 64 | 		if err != nil {
 65 | 			imageQuality = 90
 66 | 		}
 67 | 
 68 | 		imagePPI, err := cmd.Flags().GetFloat64("image-ppi")
 69 | 		if err != nil {
 70 | 			imagePPI = 100
 71 | 		}
 72 | 
 73 | 		opts := &pdf.OptimizeOpts{
 74 | 			ImageQuality: clampInt(imageQuality, 10, 100),
 75 | 			ImagePPI:     imagePPI,
 76 | 		}
 77 | 
 78 | 		// Parse input parameters.
 79 | 		inputPaths, err := parseInputPaths(args, recursive, pdfMatcher)
 80 | 		if err != nil {
 81 | 			printErr("Could not parse input files: %s\n", err)
 82 | 		}
 83 | 
 84 | 		// Create output directory, if it does not exist.
 85 | 		if outputDir != "" {
 86 | 			if overwrite {
 87 | 				printErr("The --target-dir and the --overwrite flags are mutually exclusive")
 88 | 			}
 89 | 
 90 | 			if err = os.MkdirAll(outputDir, os.ModePerm); err != nil {
 91 | 				printErr("Could not create output directory: %s\n", err)
 92 | 			}
 93 | 		}
 94 | 
 95 | 		// Optimize PDF files.
 96 | 		for _, inputPath := range inputPaths {
 97 | 			fmt.Printf("Optimizing %s\n", inputPath)
 98 | 
 99 | 			// Generate output path.
100 | 			outputPath := generateOutputPath(inputPath, outputDir, "optimized", overwrite)
101 | 
102 | 			// Optimize input file.
103 | 			res, err := pdf.Optimize(inputPath, outputPath, password, opts)
104 | 			if err != nil {
105 | 				printErr("Could not optimize input file: %s\n", err)
106 | 			}
107 | 
108 | 			inSize := res.Original.Size
109 | 			outSize := res.Optimized.Size
110 | 			ratio := 100.0 - (float64(outSize) / float64(inSize) * 100.0)
111 | 			duration := float64(res.Duration) / float64(time.Millisecond)
112 | 
113 | 			fmt.Printf("Original: %s\n", res.Original.Name)
114 | 			fmt.Printf("Original size: %d bytes\n", inSize)
115 | 			fmt.Printf("Optimized: %s\n", res.Optimized.Name)
116 | 			fmt.Printf("Optimized size: %d bytes\n", outSize)
117 | 			fmt.Printf("Compression ratio: %.2f%%\n", ratio)
118 | 			fmt.Printf("Processing time: %.2f ms\n", duration)
119 | 			fmt.Println("Status: success")
120 | 			fmt.Println(strings.Repeat("-", 10))
121 | 		}
122 | 	},
123 | 	Args: func(_ *cobra.Command, args []string) error {
124 | 		if len(args) < 1 {
125 | 			return errors.New("must provide at least one input file")
126 | 		}
127 | 
128 | 		return nil
129 | 	},
130 | }
131 | 
132 | func init() {
133 | 	rootCmd.AddCommand(optimizeCmd)
134 | 
135 | 	optimizeCmd.Flags().StringP("target-dir", "t", "", "output directory")
136 | 	optimizeCmd.Flags().BoolP("overwrite", "O", false, "overwrite input files")
137 | 	optimizeCmd.Flags().BoolP("recursive", "r", false, "search PDF files in subdirectories")
138 | 	optimizeCmd.Flags().StringP("password", "p", "", "file password")
139 | 	optimizeCmd.Flags().IntP("image-quality", "q", 90, "output JPEG image quality")
140 | 	optimizeCmd.Flags().Float64P("image-ppi", "P", 100, "output images pixels per inch")
141 | }
142 | 


--------------------------------------------------------------------------------
/pkg/pdf/utils.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package pdf
  7 | 
  8 | import (
  9 | 	"errors"
 10 | 	"os"
 11 | 	"path/filepath"
 12 | 
 13 | 	unisecurity "github.com/unidoc/unipdf/v4/core/security"
 14 | 	unicreator "github.com/unidoc/unipdf/v4/creator"
 15 | 	unipdf "github.com/unidoc/unipdf/v4/model"
 16 | )
 17 | 
 18 | func readPDF(filename, password string) (*unipdf.PdfReader, int, bool, unisecurity.Permissions, error) {
 19 | 	// Open input file.
 20 | 	f, err := os.Open(filename)
 21 | 	if err != nil {
 22 | 		return nil, 0, false, 0, err
 23 | 	}
 24 | 	defer f.Close()
 25 | 
 26 | 	// Read input file.
 27 | 	r, err := unipdf.NewPdfReader(f)
 28 | 	if err != nil {
 29 | 		return nil, 0, false, 0, err
 30 | 	}
 31 | 
 32 | 	// Check if file is encrypted.
 33 | 	encrypted, err := r.IsEncrypted()
 34 | 	if err != nil {
 35 | 		return nil, 0, false, 0, err
 36 | 	}
 37 | 
 38 | 	// Decrypt using the specified password, if necessary.
 39 | 	perms := unisecurity.PermOwner
 40 | 	if encrypted {
 41 | 		passwords := []string{password}
 42 | 		if password != "" {
 43 | 			passwords = append(passwords, "")
 44 | 		}
 45 | 
 46 | 		// Extract use permissions
 47 | 		_, perms, err = r.CheckAccessRights([]byte(password))
 48 | 		if err != nil {
 49 | 			perms = unisecurity.Permissions(0)
 50 | 		}
 51 | 
 52 | 		var decrypted bool
 53 | 		for _, p := range passwords {
 54 | 			if auth, err := r.Decrypt([]byte(p)); err != nil || !auth {
 55 | 				continue
 56 | 			}
 57 | 
 58 | 			decrypted = true
 59 | 			break
 60 | 		}
 61 | 
 62 | 		if !decrypted {
 63 | 			return nil, 0, false, 0, errors.New("could not decrypt file with the provided password")
 64 | 		}
 65 | 	}
 66 | 
 67 | 	// Get number of pages.
 68 | 	pages, err := r.GetNumPages()
 69 | 	if err != nil {
 70 | 		return nil, 0, false, 0, err
 71 | 	}
 72 | 
 73 | 	return r, pages, encrypted, perms, nil
 74 | }
 75 | 
 76 | func writePDF(filename string, w *unipdf.PdfWriter, safe bool) error {
 77 | 	var err error
 78 | 	if safe {
 79 | 		// Make a copy of the original file and restore it if
 80 | 		// any error occurs while writing the new file.
 81 | 		if _, err = os.Stat(filename); !os.IsNotExist(err) {
 82 | 			tempPath := filepath.Join(os.TempDir(), "unipdf_"+filepath.Base(filename))
 83 | 			if err = os.Rename(filename, tempPath); err != nil {
 84 | 				return err
 85 | 			}
 86 | 			defer func() error {
 87 | 				if err == nil {
 88 | 					return nil
 89 | 				}
 90 | 				if err = os.Rename(tempPath, filename); err != nil {
 91 | 					return err
 92 | 				}
 93 | 
 94 | 				return os.Remove(tempPath)
 95 | 			}()
 96 | 		}
 97 | 	}
 98 | 
 99 | 	// Create output file.
100 | 	of, err := os.Create(filename)
101 | 	if err != nil {
102 | 		return err
103 | 	}
104 | 	defer of.Close()
105 | 
106 | 	// Write output file.
107 | 	err = w.Write(of)
108 | 	if err != nil {
109 | 		return err
110 | 	}
111 | 
112 | 	return nil
113 | }
114 | 
115 | func writeCreatorPDF(filename string, c *unicreator.Creator, safe bool) error {
116 | 	var err error
117 | 	if safe {
118 | 		// Make a copy of the original file and restore it if
119 | 		// any error occurs while writing the new file.
120 | 		if _, err = os.Stat(filename); !os.IsNotExist(err) {
121 | 			tempPath := filepath.Join(os.TempDir(), "unipdf_"+filepath.Base(filename))
122 | 			if err = os.Rename(filename, tempPath); err != nil {
123 | 				return err
124 | 			}
125 | 			defer func() error {
126 | 				if err == nil {
127 | 					return nil
128 | 				}
129 | 				if err = os.Rename(tempPath, filename); err != nil {
130 | 					return err
131 | 				}
132 | 
133 | 				return os.Remove(tempPath)
134 | 			}()
135 | 		}
136 | 	}
137 | 
138 | 	// Create output file.
139 | 	of, err := os.Create(filename)
140 | 	if err != nil {
141 | 		return err
142 | 	}
143 | 	defer of.Close()
144 | 
145 | 	// Write output file.
146 | 	return c.Write(of)
147 | }
148 | 
149 | func readerToWriter(r *unipdf.PdfReader, w *unipdf.PdfWriter, pages []int) error {
150 | 	if r == nil {
151 | 		return errors.New("source PDF cannot be null")
152 | 	}
153 | 	if w == nil {
154 | 		return errors.New("destination PDF cannot be null")
155 | 	}
156 | 
157 | 	// Get number of pages.
158 | 	pageCount, err := r.GetNumPages()
159 | 	if err != nil {
160 | 		return err
161 | 	}
162 | 
163 | 	// Add optional properties
164 | 	if ocProps, err := r.GetOCProperties(); err == nil {
165 | 		w.SetOCProperties(ocProps)
166 | 	}
167 | 
168 | 	// Add pages.
169 | 	if len(pages) == 0 {
170 | 		pages = createPageRange(pageCount)
171 | 	}
172 | 
173 | 	for _, numPage := range pages {
174 | 		if numPage < 1 || numPage > pageCount {
175 | 			continue
176 | 		}
177 | 
178 | 		page, err := r.GetPage(numPage)
179 | 		if err != nil {
180 | 			return err
181 | 		}
182 | 
183 | 		if err = w.AddPage(page); err != nil {
184 | 			return err
185 | 		}
186 | 	}
187 | 
188 | 	// Add forms.
189 | 	if r.AcroForm != nil {
190 | 		w.SetForms(r.AcroForm)
191 | 	}
192 | 
193 | 	return nil
194 | }
195 | 
196 | func readerToCreator(r *unipdf.PdfReader, w *unicreator.Creator, pages []int, rotationAngle int) error {
197 | 	if r == nil {
198 | 		return errors.New("source PDF cannot be null")
199 | 	}
200 | 	if w == nil {
201 | 		return errors.New("destination PDF cannot be null")
202 | 	}
203 | 
204 | 	// Get number of pages.
205 | 	pageCount, err := r.GetNumPages()
206 | 	if err != nil {
207 | 		return err
208 | 	}
209 | 
210 | 	// Add pages.
211 | 	if len(pages) == 0 {
212 | 		pages = createPageRange(pageCount)
213 | 	}
214 | 
215 | 	for _, numPage := range pages {
216 | 		if numPage < 1 || numPage > pageCount {
217 | 			continue
218 | 		}
219 | 
220 | 		page, err := r.GetPage(numPage)
221 | 		if err != nil {
222 | 			return err
223 | 		}
224 | 
225 | 		if err = w.AddPage(page); err != nil {
226 | 			return err
227 | 		}
228 | 
229 | 		if rotationAngle != 0 {
230 | 			if err = w.RotateDeg(int64(rotationAngle)); err != nil {
231 | 				return err
232 | 			}
233 | 		}
234 | 	}
235 | 
236 | 	// Add forms.
237 | 	if r.AcroForm != nil {
238 | 		w.SetForms(r.AcroForm)
239 | 	}
240 | 
241 | 	return nil
242 | }
243 | 
244 | func createPageRange(count int) []int {
245 | 	if count <= 0 {
246 | 		return []int{}
247 | 	}
248 | 
249 | 	var pages []int
250 | 	for i := 0; i < count; i++ {
251 | 		pages = append(pages, i+1)
252 | 	}
253 | 
254 | 	return pages
255 | }
256 | 


--------------------------------------------------------------------------------
/pkg/pdf/merge.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package pdf
  7 | 
  8 | import (
  9 | 	"fmt"
 10 | 
 11 | 	unicommon "github.com/unidoc/unipdf/v4/common"
 12 | 	unicore "github.com/unidoc/unipdf/v4/core"
 13 | 	unipdf "github.com/unidoc/unipdf/v4/model"
 14 | )
 15 | 
 16 | // Merge merges all the PDF files specified by the inputPaths parameter and
 17 | // saves the result at the location specified by the outputPath parameter.
 18 | func Merge(inputPaths []string, outputPath string) error {
 19 | 	w := unipdf.NewPdfWriter()
 20 | 
 21 | 	var forms *unipdf.PdfAcroForm
 22 | 	for index, inputPath := range inputPaths {
 23 | 		// Read file.
 24 | 		r, pages, _, _, err := readPDF(inputPath, "")
 25 | 		if err != nil {
 26 | 			return err
 27 | 		}
 28 | 
 29 | 		// Add pages.
 30 | 		for i := 0; i < pages; i++ {
 31 | 			page, err := r.GetPage(i + 1)
 32 | 			if err != nil {
 33 | 				return err
 34 | 			}
 35 | 
 36 | 			err = w.AddPage(page)
 37 | 			if err != nil {
 38 | 				return err
 39 | 			}
 40 | 		}
 41 | 
 42 | 		// Handle forms.
 43 | 		if r.AcroForm != nil {
 44 | 			if forms == nil {
 45 | 				forms = r.AcroForm
 46 | 			} else {
 47 | 				forms, err = mergeForms(forms, r.AcroForm, index+1)
 48 | 				if err != nil {
 49 | 					return err
 50 | 				}
 51 | 			}
 52 | 		}
 53 | 	}
 54 | 
 55 | 	// Set the merged forms object.
 56 | 	if forms != nil {
 57 | 		w.SetForms(forms)
 58 | 	}
 59 | 
 60 | 	// Write output file.
 61 | 	return writePDF(outputPath, &w, false)
 62 | }
 63 | 
 64 | func mergeResources(r, r2 *unipdf.PdfPageResources) (*unipdf.PdfPageResources, error) {
 65 | 	// Merge XObject resources.
 66 | 	if r.XObject == nil {
 67 | 		r.XObject = r2.XObject
 68 | 	} else {
 69 | 		xobjs := getDict(r.XObject)
 70 | 		if r2.XObject != nil {
 71 | 			xobjs2 := getDict(r2.XObject)
 72 | 			for _, key := range xobjs2.Keys() {
 73 | 				val := xobjs2.Get(key)
 74 | 				xobjs.Set(key, val)
 75 | 			}
 76 | 		}
 77 | 	}
 78 | 
 79 | 	// Merge Colorspace resources.
 80 | 	colorspaces, err := r.GetColorspaces()
 81 | 	if err != nil {
 82 | 		return nil, err
 83 | 	}
 84 | 	colorspaces2, err := r2.GetColorspaces()
 85 | 	if err != nil {
 86 | 		return nil, err
 87 | 	}
 88 | 
 89 | 	if colorspaces == nil {
 90 | 		r.SetColorSpace(colorspaces2)
 91 | 	} else {
 92 | 		if colorspaces2 != nil {
 93 | 			for key, val := range colorspaces2.Colorspaces {
 94 | 				// Add the r2 colorspaces to r. Overwrite if duplicate.
 95 | 				// Ensure only present once in Names.
 96 | 				if _, has := colorspaces.Colorspaces[key]; !has {
 97 | 					colorspaces.Names = append(colorspaces.Names, key)
 98 | 				}
 99 | 				r.SetColorspaceByName(unicore.PdfObjectName(key), val)
100 | 			}
101 | 		}
102 | 	}
103 | 
104 | 	// Merge ExtGState resources.
105 | 	if r.ExtGState == nil {
106 | 		r.ExtGState = r2.ExtGState
107 | 	} else {
108 | 		extgstates := getDict(r.ExtGState)
109 | 
110 | 		if r2.ExtGState != nil {
111 | 			extgstates2 := getDict(r2.ExtGState)
112 | 			for _, key := range extgstates2.Keys() {
113 | 				val := extgstates2.Get(key)
114 | 				extgstates.Set(key, val)
115 | 			}
116 | 		}
117 | 	}
118 | 
119 | 	if r.Shading == nil {
120 | 		r.Shading = r2.Shading
121 | 	} else {
122 | 		shadings := getDict(r.Shading)
123 | 		if r2.Shading != nil {
124 | 			shadings2 := getDict(r2.Shading)
125 | 			for _, key := range shadings2.Keys() {
126 | 				val := shadings2.Get(key)
127 | 				shadings.Set(key, val)
128 | 			}
129 | 		}
130 | 	}
131 | 
132 | 	if r.Pattern == nil {
133 | 		r.Pattern = r2.Pattern
134 | 	} else {
135 | 		shadings := getDict(r.Pattern)
136 | 		if r2.Pattern != nil {
137 | 			patterns2 := getDict(r2.Pattern)
138 | 			for _, key := range patterns2.Keys() {
139 | 				val := patterns2.Get(key)
140 | 				shadings.Set(key, val)
141 | 			}
142 | 		}
143 | 	}
144 | 
145 | 	if r.Font == nil {
146 | 		r.Font = r2.Font
147 | 	} else {
148 | 		fonts := getDict(r.Font)
149 | 		if r2.Font != nil {
150 | 			fonts2 := getDict(r2.Font)
151 | 			for _, key := range fonts2.Keys() {
152 | 				val := fonts2.Get(key)
153 | 				fonts.Set(key, val)
154 | 			}
155 | 		}
156 | 	}
157 | 
158 | 	if r.ProcSet == nil {
159 | 		r.ProcSet = r2.ProcSet
160 | 	} else {
161 | 		procsets := getDict(r.ProcSet)
162 | 		if r2.ProcSet != nil {
163 | 			procsets2 := getDict(r2.ProcSet)
164 | 			for _, key := range procsets2.Keys() {
165 | 				val := procsets2.Get(key)
166 | 				procsets.Set(key, val)
167 | 			}
168 | 		}
169 | 	}
170 | 
171 | 	if r.Properties == nil {
172 | 		r.Properties = r2.Properties
173 | 	} else {
174 | 		props := getDict(r.Properties)
175 | 		if r2.Properties != nil {
176 | 			props2 := getDict(r2.Properties)
177 | 			for _, key := range props2.Keys() {
178 | 				val := props2.Get(key)
179 | 				props.Set(key, val)
180 | 			}
181 | 		}
182 | 	}
183 | 
184 | 	return r, nil
185 | }
186 | 
187 | // mergeForms merges two interactive forms.
188 | func mergeForms(form, form2 *unipdf.PdfAcroForm, docNum int) (*unipdf.PdfAcroForm, error) {
189 | 	if form.NeedAppearances == nil {
190 | 		form.NeedAppearances = form2.NeedAppearances
191 | 	}
192 | 
193 | 	if form.SigFlags == nil {
194 | 		form.SigFlags = form2.SigFlags
195 | 	}
196 | 
197 | 	if form.CO == nil {
198 | 		form.CO = form2.CO
199 | 	}
200 | 
201 | 	if form.DR == nil {
202 | 		form.DR = form2.DR
203 | 	} else if form2.DR != nil {
204 | 		dr, err := mergeResources(form.DR, form2.DR)
205 | 		if err != nil {
206 | 			return nil, err
207 | 		}
208 | 		form.DR = dr
209 | 	}
210 | 
211 | 	if form.DA == nil {
212 | 		form.DA = form2.DA
213 | 	}
214 | 
215 | 	if form.Q == nil {
216 | 		form.Q = form2.Q
217 | 	}
218 | 
219 | 	if form.XFA == nil {
220 | 		form.XFA = form2.XFA
221 | 	} else {
222 | 		if form2.XFA != nil {
223 | 			unicommon.Log.Debug("TODO: Handle XFA merging - Currently just using first one that is encountered")
224 | 		}
225 | 	}
226 | 
227 | 	// Fields.
228 | 	if form.Fields == nil {
229 | 		form.Fields = form2.Fields
230 | 	} else {
231 | 		field := unipdf.NewPdfField()
232 | 		field.T = unicore.MakeString(fmt.Sprintf("doc%d", docNum))
233 | 		field.Kids = []*unipdf.PdfField{}
234 | 		if form2.Fields != nil {
235 | 			for _, subfield := range *form2.Fields {
236 | 				// Update parent.
237 | 				subfield.Parent = field
238 | 				field.Kids = append(field.Kids, subfield)
239 | 			}
240 | 
241 | 		}
242 | 		*form.Fields = append(*form.Fields, field)
243 | 	}
244 | 
245 | 	return form, nil
246 | }
247 | 
248 | func getDict(obj unicore.PdfObject) *unicore.PdfObjectDictionary {
249 | 	if obj == nil {
250 | 		return nil
251 | 	}
252 | 
253 | 	obj = unicore.TraceToDirectObject(obj)
254 | 	dict, ok := obj.(*unicore.PdfObjectDictionary)
255 | 	if !ok {
256 | 		unicommon.Log.Debug("Error type check error (got %T)", obj)
257 | 		return nil
258 | 	}
259 | 
260 | 	return dict
261 | }
262 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/adrg/strutil v0.2.2/go.mod h1:EF2fjOFlGTepljfI+FzgTG13oXthR7ZAil9/aginnNQ=
 2 | github.com/adrg/strutil v0.3.1 h1:OLvSS7CSJO8lBii4YmBt8jiK9QOtB9CzCzwl4Ic/Fz4=
 3 | github.com/adrg/strutil v0.3.1/go.mod h1:8h90y18QLrs11IBffcGX3NW/GFBXCMcNg4M7H6MspPA=
 4 | github.com/adrg/sysfont v0.1.2 h1:MSU3KREM4RhsQ+7QgH7wPEPTgAgBIz0Hw6Nd4u7QgjE=
 5 | github.com/adrg/sysfont v0.1.2/go.mod h1:6d3l7/BSjX9VaeXWJt9fcrftFaD/t7l11xgSywCPZGk=
 6 | github.com/adrg/xdg v0.3.0/go.mod h1:7I2hH/IT30IsupOpKZ5ue7/qNi3CoKzD6tL3HwpaRMQ=
 7 | github.com/adrg/xdg v0.5.3 h1:xRnxJXne7+oWDatRhR1JLnvuccuIeCoBu2rtuLqQB78=
 8 | github.com/adrg/xdg v0.5.3/go.mod h1:nlTsY+NNiCBGCK2tpm09vRqfVzrc2fLmXGpBLF0zlTQ=
 9 | github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
10 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
11 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
12 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
13 | github.com/gorilla/i18n v0.0.0-20150820051429-8b358169da46 h1:N+R2A3fGIr5GucoRMu2xpqyQWQlfY31orbofBCdjMz8=
14 | github.com/gorilla/i18n v0.0.0-20150820051429-8b358169da46/go.mod h1:2Yoiy15Cf7Q3NFwfaJquh7Mk1uGI09ytcD7CUhn8j7s=
15 | github.com/h2non/filetype v1.1.3 h1:FKkx9QbD7HR/zjK1Ia5XiBsq9zdLi5Kf3zGyFTAFkGg=
16 | github.com/h2non/filetype v1.1.3/go.mod h1:319b3zT68BvV+WRj7cwy856M2ehB3HqNOt6sy1HndBY=
17 | github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
18 | github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
19 | github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
20 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
21 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
22 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
23 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
24 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
25 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
26 | github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
27 | github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
28 | github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo=
29 | github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0=
30 | github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
31 | github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
32 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
33 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
34 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
35 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
36 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
37 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
38 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
39 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
40 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
41 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
42 | github.com/unidoc/freetype v0.2.3 h1:uPqW+AY0vXN6K2tvtg8dMAtHTEvvHTN52b72XpZU+3I=
43 | github.com/unidoc/freetype v0.2.3/go.mod h1:mJ/Q7JnqEoWtajJVrV6S1InbRv0K/fJerPB5SQs32KI=
44 | github.com/unidoc/garabic v0.0.0-20220702200334-8c7cb25baa11 h1:kExUKrbi429KdVVuAc85z4P+W/Rk4bjGWB5KzZLl/l8=
45 | github.com/unidoc/garabic v0.0.0-20220702200334-8c7cb25baa11/go.mod h1:SX63w9Ww4+Z7E96B01OuG59SleQUb+m+dmapZ8o1Jac=
46 | github.com/unidoc/pkcs7 v0.0.0-20200411230602-d883fd70d1df/go.mod h1:UEzOZUEpJfDpywVJMUT8QiugqEZC29pDq7kdIZhWCr8=
47 | github.com/unidoc/pkcs7 v0.3.0 h1:+RCopNCR8UoZtlf4bu4Y88O3j1MbvrLcOuQj/tbPLoU=
48 | github.com/unidoc/pkcs7 v0.3.0/go.mod h1:UEzOZUEpJfDpywVJMUT8QiugqEZC29pDq7kdIZhWCr8=
49 | github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a h1:RLtvUhe4DsUDl66m7MJ8OqBjq8jpWBXPK6/RKtqeTkc=
50 | github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a/go.mod h1:j+qMWZVpZFTvDey3zxUkSgPJZEX33tDgU/QIA0IzCUw=
51 | github.com/unidoc/unichart v0.5.1 h1:qnYavwBV5sg9NUF59KbMOqJdh2kA454nVxdDTPPtSz8=
52 | github.com/unidoc/unichart v0.5.1/go.mod h1:/8yJsL49OqBOyG53JFVZOwwDXDquo/ZRMkfz9fNsVgc=
53 | github.com/unidoc/unipdf/v4 v4.3.0 h1:eA4zjRHTULtV5thy3MausfFYDP1i59qGdsfxe709oUY=
54 | github.com/unidoc/unipdf/v4 v4.3.0/go.mod h1:oR0EX7TmS7KaAuzFQPA9t9HjbU4f2NbWMvzXNqtXo70=
55 | github.com/unidoc/unitype v0.5.1 h1:UwTX15K6bktwKocWVvLoijIeu4JAVEAIeFqMOjvxqQs=
56 | github.com/unidoc/unitype v0.5.1/go.mod h1:3dxbRL+f1otNqFQIRHho8fxdg3CcUKrqS8w1SXTsqcI=
57 | golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
58 | golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
59 | golang.org/x/image v0.0.0-20211028202545-6944b10bf410/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM=
60 | golang.org/x/image v0.30.0 h1:jD5RhkmVAnjqaCUXfbGBrn3lpxbknfN9w2UhHHU+5B4=
61 | golang.org/x/image v0.30.0/go.mod h1:SAEUTxCCMWSrJcCy/4HwavEsfZZJlYxeHLc6tTiAe/c=
62 | golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
63 | golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
64 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
65 | golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
66 | golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
67 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
68 | golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
69 | golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
70 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
71 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=
72 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
73 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
74 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
75 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
76 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
77 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
78 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
79 | 


--------------------------------------------------------------------------------
/pkg/pdf/replace.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package pdf
  7 | 
  8 | import (
  9 | 	"strings"
 10 | 
 11 | 	"github.com/unidoc/unipdf/v4/common"
 12 | 	"github.com/unidoc/unipdf/v4/contentstream"
 13 | 	"github.com/unidoc/unipdf/v4/core"
 14 | 	"github.com/unidoc/unipdf/v4/model"
 15 | 	unipdf "github.com/unidoc/unipdf/v4/model"
 16 | )
 17 | 
 18 | type textChunk struct {
 19 | 	font   *model.PdfFont
 20 | 	strObj *core.PdfObjectString
 21 | 	val    string
 22 | 	idx    int
 23 | }
 24 | 
 25 | func (tc *textChunk) encode() {
 26 | 	var encoded string
 27 | 	if font := tc.font; font != nil {
 28 | 		encodedBytes, numMisses := font.StringToCharcodeBytes(tc.val)
 29 | 		if numMisses != 0 {
 30 | 			common.Log.Debug("WARN: some runes could not be encoded.\n\t%s -> %v")
 31 | 		}
 32 | 		encoded = string(encodedBytes)
 33 | 	}
 34 | 
 35 | 	*tc.strObj = *core.MakeString(encoded)
 36 | }
 37 | 
 38 | type textChunks struct {
 39 | 	text   string
 40 | 	chunks []*textChunk
 41 | }
 42 | 
 43 | func (tc *textChunks) replace(search, replacement string) {
 44 | 	text := tc.text
 45 | 	chunks := tc.chunks
 46 | 
 47 | 	// Steps:
 48 | 	// 1. Search for the first index of the search term in the text.
 49 | 	// 2. Use the found index to match the text chunk which contains
 50 | 	//    (or partly contains) the search term.
 51 | 	// 3. Replace the search term in the found text chunk. The search term
 52 | 	//    will not always start at the beginning of the text chunk. Also,
 53 | 	//    the search term could be split in multiple text chunks. If that's
 54 | 	//    the case, replace the portion of the search term in the found
 55 | 	//    chunk and continue removing characters from the following chunks
 56 | 	//    until the search term has been completely erased.
 57 | 	// 4. Offset the text chunks slice to the last processed text chunk from
 58 | 	//    the previous step, if the text chunk was not completely erased, or
 59 | 	//    to the next one otherwise. This is necessary so that the visited
 60 | 	//    text chunks are skipped when searching for the next occurrence of the
 61 | 	//    search term.
 62 | 	// 5. Discard the part of the text up to (and including) the index found
 63 | 	//    in step one.
 64 | 	// 6. Move to step 1 in order to search for the search term in the remaining
 65 | 	//    text.
 66 | 	var chunkOffset int
 67 | 	matchIdx := strings.Index(text, search)
 68 | 	for currMatchIdx := matchIdx; matchIdx != -1; {
 69 | 		for i, chunk := range chunks[chunkOffset:] {
 70 | 			idx, lenChunk := chunk.idx, len(chunk.val)
 71 | 			if currMatchIdx < idx || currMatchIdx > idx+lenChunk-1 {
 72 | 				continue
 73 | 			}
 74 | 			chunkOffset += i + 1
 75 | 
 76 | 			start := currMatchIdx - idx
 77 | 			remaining := len(search) - (lenChunk - start)
 78 | 
 79 | 			replaceVal := chunk.val[:start] + replacement
 80 | 			if remaining < 0 {
 81 | 				replaceVal += chunk.val[lenChunk+remaining:]
 82 | 				chunkOffset--
 83 | 			}
 84 | 
 85 | 			chunk.val = replaceVal
 86 | 			chunk.encode()
 87 | 
 88 | 			for j := chunkOffset; remaining > 0; j++ {
 89 | 				c := chunks[j]
 90 | 				l := len(c.val)
 91 | 
 92 | 				if l > remaining {
 93 | 					c.val = c.val[remaining:]
 94 | 				} else {
 95 | 					c.val = ""
 96 | 					chunkOffset++
 97 | 				}
 98 | 
 99 | 				c.encode()
100 | 				remaining -= l
101 | 			}
102 | 
103 | 			break
104 | 		}
105 | 
106 | 		text = text[matchIdx+1:]
107 | 		matchIdx = strings.Index(text, search)
108 | 		currMatchIdx += matchIdx + 1
109 | 	}
110 | 
111 | 	tc.text = strings.Replace(tc.text, search, replacement, -1)
112 | }
113 | 
114 | // Replace searches the provided text in the PDF file specified by the inputPath
115 | // parameter and replaces it by the newText. A password can be passed in for encrypted input files.
116 | // The result is saved to outputPath.
117 | func Replace(inputPath, outputPath, text, replaceText, password string) error {
118 | 	// Read input file.
119 | 	r, pages, _, _, err := readPDF(inputPath, password)
120 | 	if err != nil {
121 | 		return err
122 | 	}
123 | 
124 | 	w := unipdf.NewPdfWriter()
125 | 
126 | 	// Search specified text.
127 | 	for i := 0; i < pages; i++ {
128 | 		// Get page.
129 | 		numPage := i + 1
130 | 
131 | 		page, err := r.GetPage(numPage)
132 | 		if err != nil {
133 | 			return err
134 | 		}
135 | 
136 | 		err = searchReplacePageText(page, text, replaceText)
137 | 		if err != nil {
138 | 			return err
139 | 		}
140 | 
141 | 		err = w.AddPage(page)
142 | 		if err != nil {
143 | 			return err
144 | 		}
145 | 	}
146 | 
147 | 	// Write output file.
148 | 	safe := inputPath == outputPath
149 | 	return writePDF(outputPath, &w, safe)
150 | }
151 | 
152 | func searchReplacePageText(page *model.PdfPage, searchText, replaceText string) error {
153 | 	contents, err := page.GetAllContentStreams()
154 | 	if err != nil {
155 | 		return err
156 | 	}
157 | 
158 | 	ops, err := contentstream.NewContentStreamParser(contents).Parse()
159 | 	if err != nil {
160 | 		return err
161 | 	}
162 | 
163 | 	// Generate text chunks.
164 | 	var currFont *model.PdfFont
165 | 	tc := textChunks{}
166 | 
167 | 	textProcFunc := func(objptr *core.PdfObject) {
168 | 		strObj, ok := core.GetString(*objptr)
169 | 		if !ok {
170 | 			common.Log.Debug("Invalid parameter, skipping")
171 | 			return
172 | 		}
173 | 
174 | 		str := strObj.String()
175 | 		if currFont != nil {
176 | 			decoded, _, numMisses := currFont.CharcodeBytesToUnicode(strObj.Bytes())
177 | 			if numMisses != 0 {
178 | 				common.Log.Debug("WARN: some charcodes could not be decoded.\n\t%v -> %s", strObj.Bytes(), decoded)
179 | 			}
180 | 			str = decoded
181 | 		}
182 | 
183 | 		tc.chunks = append(tc.chunks, &textChunk{
184 | 			font:   currFont,
185 | 			strObj: strObj,
186 | 			val:    str,
187 | 			idx:    len(tc.text),
188 | 		})
189 | 		tc.text += str
190 | 	}
191 | 
192 | 	processor := contentstream.NewContentStreamProcessor(*ops)
193 | 	processor.AddHandler(contentstream.HandlerConditionEnumAllOperands, "",
194 | 		func(op *contentstream.ContentStreamOperation, _ contentstream.GraphicsState, resources *model.PdfPageResources) error {
195 | 			switch op.Operand {
196 | 			case `Tj`, `'`:
197 | 				if len(op.Params) != 1 {
198 | 					common.Log.Debug("Invalid: Tj/' with invalid set of parameters - skip")
199 | 					return nil
200 | 				}
201 | 				textProcFunc(&op.Params[0])
202 | 			case `''`:
203 | 				if len(op.Params) != 3 {
204 | 					common.Log.Debug("Invalid: '' with invalid set of parameters - skip")
205 | 					return nil
206 | 				}
207 | 				textProcFunc(&op.Params[3])
208 | 			case `TJ`:
209 | 				if len(op.Params) != 1 {
210 | 					common.Log.Debug("Invalid: TJ with invalid set of parameters - skip")
211 | 					return nil
212 | 				}
213 | 				arr, _ := core.GetArray(op.Params[0])
214 | 				for i := range arr.Elements() {
215 | 					obj := arr.Get(i)
216 | 					textProcFunc(&obj)
217 | 					arr.Set(i, obj)
218 | 				}
219 | 			case "Tf":
220 | 				if len(op.Params) != 2 {
221 | 					common.Log.Debug("Invalid: Tf with invalid set of parameters - skip")
222 | 					return nil
223 | 				}
224 | 
225 | 				fname, ok := core.GetName(op.Params[0])
226 | 				if !ok || fname == nil {
227 | 					common.Log.Debug("ERROR: could not get font name")
228 | 					return nil
229 | 				}
230 | 
231 | 				fObj, has := resources.GetFontByName(*fname)
232 | 				if !has {
233 | 					common.Log.Debug("ERROR: font %s not found", fname.String())
234 | 					return nil
235 | 				}
236 | 
237 | 				pdfFont, err := model.NewPdfFontFromPdfObject(fObj)
238 | 				if err != nil {
239 | 					common.Log.Debug("ERROR: loading font")
240 | 					return nil
241 | 				}
242 | 				currFont = pdfFont
243 | 			}
244 | 
245 | 			return nil
246 | 		})
247 | 
248 | 	if err = processor.Process(page.Resources); err != nil {
249 | 		return err
250 | 	}
251 | 
252 | 	tc.replace(searchText, replaceText)
253 | 	return page.SetContentStreams([]string{ops.String()}, core.NewFlateEncoder())
254 | }
255 | 


--------------------------------------------------------------------------------
/internal/cli/utils.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package cli
  7 | 
  8 | import (
  9 | 	"errors"
 10 | 	"fmt"
 11 | 	"os"
 12 | 	"path/filepath"
 13 | 	"sort"
 14 | 	"strconv"
 15 | 	"strings"
 16 | 	"unicode"
 17 | 
 18 | 	"github.com/spf13/cobra"
 19 | )
 20 | 
 21 | type fileMatcher func(string) bool
 22 | 
 23 | func pdfMatcher(inputPath string) bool {
 24 | 	return strings.ToLower(filepath.Ext(inputPath)) == ".pdf"
 25 | }
 26 | 
 27 | // parsePageRange parses a string of page ranges separated by commas and
 28 | // returns a slice of integer page numbers.
 29 | // Example page range string: 1-3,4,6-7
 30 | // The returned slice of pages contains pages 1,2,3 (1-3), 4 and 6,7 (6-7),
 31 | // while page number 5 is skipped.
 32 | func parsePageRange(pageRange string) ([]int, error) {
 33 | 	var pages []int
 34 | 
 35 | 	rngs := strings.Split(removeSpaces(pageRange), ",")
 36 | 	for _, rng := range rngs {
 37 | 		if rng == "" {
 38 | 			continue
 39 | 		}
 40 | 
 41 | 		indices := strings.Split(rng, "-")
 42 | 		lenIndices := len(indices)
 43 | 		if lenIndices > 2 {
 44 | 			return nil, errors.New("invalid page range")
 45 | 		}
 46 | 		if lenIndices == 2 {
 47 | 			start, err := strconv.Atoi(indices[0])
 48 | 			if err != nil {
 49 | 				return nil, errors.New("invalid page number")
 50 | 			}
 51 | 			if start < 1 {
 52 | 				return nil, errors.New("page range start must be greater than 0")
 53 | 			}
 54 | 
 55 | 			end, err := strconv.Atoi(indices[1])
 56 | 			if err != nil {
 57 | 				return nil, errors.New("invalid page number")
 58 | 			}
 59 | 			if end < 1 {
 60 | 				return nil, errors.New("page range end must be greater than 0")
 61 | 			}
 62 | 
 63 | 			if start > end {
 64 | 				return nil, errors.New("page range end must be greater than the start")
 65 | 			}
 66 | 
 67 | 			for page := start; page <= end; page++ {
 68 | 				pages = append(pages, page)
 69 | 			}
 70 | 
 71 | 			continue
 72 | 		}
 73 | 
 74 | 		page, err := strconv.Atoi(indices[0])
 75 | 		if err != nil {
 76 | 			return nil, errors.New("invalid page number")
 77 | 		}
 78 | 
 79 | 		pages = append(pages, page)
 80 | 	}
 81 | 
 82 | 	pages = uniqueIntSlice(pages)
 83 | 	sort.Ints(pages)
 84 | 
 85 | 	return pages, nil
 86 | }
 87 | 
 88 | func parsePageRangeUnsorted(pageRange string) ([]int, error) {
 89 | 	var pages []int
 90 | 
 91 | 	rngs := strings.Split(removeSpaces(pageRange), ",")
 92 | 	for _, rng := range rngs {
 93 | 		if rng == "" {
 94 | 			continue
 95 | 		}
 96 | 
 97 | 		indices := strings.Split(rng, "-")
 98 | 		lenIndices := len(indices)
 99 | 		if lenIndices > 2 {
100 | 			return nil, errors.New("invalid page range")
101 | 		}
102 | 		if lenIndices == 2 {
103 | 			start, err := strconv.Atoi(indices[0])
104 | 			if err != nil {
105 | 				return nil, errors.New("invalid start page number")
106 | 			}
107 | 			if start < 1 {
108 | 				return nil, errors.New("page range start must be greater than 0")
109 | 			}
110 | 
111 | 			end, err := strconv.Atoi(indices[1])
112 | 			if err != nil {
113 | 				return nil, errors.New("invalid end page number")
114 | 			}
115 | 			if end < 1 {
116 | 				return nil, errors.New("page range end must be greater than 0")
117 | 			}
118 | 
119 | 			if start > end {
120 | 				return nil, errors.New("page range end must be greater than the start")
121 | 			}
122 | 
123 | 			for page := start; page <= end; page++ {
124 | 				pages = append(pages, page)
125 | 			}
126 | 
127 | 			continue
128 | 		}
129 | 
130 | 		page, err := strconv.Atoi(indices[0])
131 | 		if err != nil {
132 | 			return nil, errors.New("invalid page number")
133 | 		}
134 | 
135 | 		pages = append(pages, page)
136 | 	}
137 | 
138 | 	pages = uniqueIntSlice(pages)
139 | 
140 | 	return pages, nil
141 | }
142 | 
143 | func parseInputPaths(inputPaths []string, recursive bool, matcher fileMatcher) ([]string, error) {
144 | 	var err error
145 | 	var files []string
146 | 	acc := map[string]bool{}
147 | 
148 | 	for _, inputPath := range inputPaths {
149 | 		// Convert relative paths to absolute ones.
150 | 		if !filepath.IsAbs(inputPath) {
151 | 			inputPath, err = filepath.Abs(inputPath)
152 | 			if err != nil {
153 | 				return nil, err
154 | 			}
155 | 		}
156 | 
157 | 		// Add visited file to the accumulator.
158 | 		if _, ok := acc[inputPath]; ok {
159 | 			continue
160 | 		}
161 | 		acc[inputPath] = true
162 | 
163 | 		// Get file info.
164 | 		inputFile, err := os.Lstat(inputPath)
165 | 		if err != nil {
166 | 			return nil, err
167 | 		}
168 | 
169 | 		// Check file type.
170 | 		switch mode := inputFile.Mode(); {
171 | 		case mode.IsRegular():
172 | 			if matcher == nil || matcher(inputPath) {
173 | 				files = append(files, inputPath)
174 | 			}
175 | 		case mode.IsDir():
176 | 			dirFiles, err := parseInputDir(inputPath, recursive, acc, matcher)
177 | 			if err != nil {
178 | 				return nil, err
179 | 			}
180 | 			files = append(files, dirFiles...)
181 | 		}
182 | 	}
183 | 
184 | 	return files, nil
185 | }
186 | 
187 | func parseInputDir(dir string, recursive bool, acc map[string]bool, matcher fileMatcher) ([]string, error) {
188 | 	inputPaths, err := dirFiles(dir)
189 | 	if err != nil {
190 | 		return nil, err
191 | 	}
192 | 
193 | 	if acc == nil {
194 | 		acc = map[string]bool{}
195 | 	}
196 | 
197 | 	var files []string
198 | 	for _, inputPath := range inputPaths {
199 | 		// Convert relative paths to absolute ones.
200 | 		inputPath = filepath.Join(dir, inputPath)
201 | 		if !filepath.IsAbs(inputPath) {
202 | 			inputPath, err = filepath.Abs(inputPath)
203 | 			if err != nil {
204 | 				return nil, err
205 | 			}
206 | 		}
207 | 
208 | 		// Add visited file to the accumulator.
209 | 		if _, ok := acc[inputPath]; ok {
210 | 			continue
211 | 		}
212 | 		acc[inputPath] = true
213 | 
214 | 		// Get file info.
215 | 		inputFile, err := os.Lstat(inputPath)
216 | 		if err != nil {
217 | 			return nil, err
218 | 		}
219 | 
220 | 		// Check file type.
221 | 		switch mode := inputFile.Mode(); {
222 | 		case mode.IsRegular():
223 | 			if matcher == nil || matcher(inputPath) {
224 | 				files = append(files, inputPath)
225 | 			}
226 | 		case mode.IsDir():
227 | 			if !recursive {
228 | 				continue
229 | 			}
230 | 
231 | 			subdirFiles, err := parseInputDir(inputPath, recursive, acc, matcher)
232 | 			if err != nil {
233 | 				return nil, err
234 | 			}
235 | 			files = append(files, subdirFiles...)
236 | 		}
237 | 	}
238 | 
239 | 	return files, nil
240 | }
241 | 
242 | func dirFiles(dir string) ([]string, error) {
243 | 	f, err := os.Open(dir)
244 | 	if err != nil {
245 | 		return nil, err
246 | 	}
247 | 	defer f.Close()
248 | 
249 | 	return f.Readdirnames(-1)
250 | }
251 | 
252 | func generateOutputPath(inputPath, outputDir, nameSuffix string, overwrite bool) string {
253 | 	if overwrite {
254 | 		return inputPath
255 | 	}
256 | 
257 | 	dir, name := filepath.Split(inputPath)
258 | 	if outputDir != "" {
259 | 		return filepath.Join(outputDir, name)
260 | 	}
261 | 
262 | 	name = strings.TrimSuffix(name, filepath.Ext(name))
263 | 	return filepath.Join(dir, fmt.Sprintf("%s_%s.pdf", name, nameSuffix))
264 | }
265 | 
266 | func clampInt(val, minimum, maximum int) int {
267 | 	if val < minimum {
268 | 		return minimum
269 | 	}
270 | 	if val > maximum {
271 | 		return maximum
272 | 	}
273 | 
274 | 	return val
275 | }
276 | 
277 | func removeSpaces(s string) string {
278 | 	return strings.TrimFunc(s, func(r rune) bool {
279 | 		return unicode.IsSpace(r)
280 | 	})
281 | }
282 | 
283 | func uniqueIntSlice(items []int) []int {
284 | 	uniq := make([]int, len(items))
285 | 
286 | 	index := 0
287 | 	catalog := map[int]struct{}{}
288 | 	for _, item := range items {
289 | 		if _, ok := catalog[item]; ok {
290 | 			continue
291 | 		}
292 | 
293 | 		catalog[item] = struct{}{}
294 | 		uniq[index] = item
295 | 		index++
296 | 	}
297 | 
298 | 	return uniq[0:index]
299 | }
300 | 
301 | func printErr(format string, a ...interface{}) {
302 | 	fmt.Printf(format, a...)
303 | 	os.Exit(1)
304 | }
305 | 
306 | func printUsageErr(cmd *cobra.Command, format string, a ...interface{}) {
307 | 	fmt.Printf("Error: "+format+"\n", a...)
308 | 	cmd.Help()
309 | 	os.Exit(1)
310 | }
311 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # unipdf-cli
  2 | 
  3 | [![Build Status](https://travis-ci.org/unidoc/unipdf-cli.svg?branch=master)](https://travis-ci.org/unidoc/unipdf-cli)
  4 | [![GoDoc](https://godoc.org/github.com/unidoc/unipdf-cli?status.svg)](https://godoc.org/github.com/unidoc/unipdf-cli)
  5 | [![Go Report Card](https://goreportcard.com/badge/github.com/unidoc/unipdf-cli)](https://goreportcard.com/report/github.com/unidoc/unipdf-cli)
  6 | 
  7 | unipdf-cli is a CLI tool which makes working with PDF files very easy. It supports
  8 | the most common PDF operations. The application is written in Golang and is
  9 | powered by the [UniPDF](https://github.com/unidoc/unipdf) PDF library.
 10 | 
 11 | ## Features
 12 | 
 13 | - [Merge PDF files](#merge)
 14 | - [Split PDF files](#split)
 15 | - [Explode PDF files](#explode)
 16 | - [Encrypt PDF files](#encrypt)
 17 | - [Decrypt PDF files](#decrypt)
 18 | - [Change user/owner password](#passwd)
 19 | - [Optimize PDF files](#optimize)
 20 | - [Rotate PDF pages](#rotate)
 21 | - [Add watermark images to PDF files](#watermark)
 22 | - [Convert PDF files to grayscale](#grayscale)
 23 | - [Validate and print PDF file information](#info)
 24 | - [Extract text from PDF files](#extract-text)
 25 | - [Extract images from PDF files](#extract-images)
 26 | - [Search text in PDF files](#search)
 27 | - [Replace text in PDF files](#replace)
 28 | - [Export PDF form fields as JSON](#form-export)
 29 | - [Fill PDF form fields from JSON file](#form-fill)
 30 | - [Fill PDF form fields from FDF file](#fdf-merge)
 31 | - [Flatten PDF form fields](#form-flatten)
 32 | - [Render PDF pages to images](#render)
 33 | 
 34 | ## Short demo
 35 | 
 36 | [![asciicast](https://i.imgur.com/nQZq6T7.png)](https://asciinema.org/a/220314)
 37 | 
 38 | ## Installation
 39 | 
 40 | Minimum required Go version: 1.23. We officially support the 3 latest minor versions of Go.
 41 | 
 42 | ```
 43 | git clone git@github.com:unidoc/unipdf-cli.git
 44 | cd unipdf-cli/cmd/unipdf
 45 | go build
 46 | ```
 47 | 
 48 | ## Showcase
 49 | 
 50 | #### Grayscale conversion
 51 | 
 52 | ![encrypt example](https://i.imgur.com/9QgXWUc.png)
 53 | 
 54 | #### Add watermark
 55 | 
 56 | ![watermark example](https://i.imgur.com/GIRsTnT.png)
 57 | 
 58 | ## Usage
 59 | 
 60 | #### Merge
 61 | 
 62 | Merge multiple PDF files into a single output file.
 63 | 
 64 | ```
 65 | unipdf merge OUTPUT_FILE INPUT_FILE...
 66 | 
 67 | Examples:
 68 | unipdf merge output_file.pdf input_file1.pdf input_file2.pdf
 69 | ```
 70 | 
 71 | #### Split
 72 | 
 73 | Extract one or more page ranges from PDF file and save the result as a
 74 | single output file.
 75 | 
 76 | ```
 77 | unipdf split [FLAG]... INPUT_FILE OUTPUT_FILE [PAGES]
 78 | 
 79 | Flags:
 80 | -p, --password string   PDF file password
 81 | 
 82 | Examples:
 83 | unipdf split input_file.pdf output_file.pdf 1-2
 84 | unipdf split -p pass input_file.pd output_file.pdf 1-2,4
 85 | 
 86 | PAGES argument example: 1-3,4,6-7
 87 | Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be present in the output file,
 88 | while page number 5 is skipped.
 89 | ```
 90 | 
 91 | #### Explode
 92 | 
 93 | Splits the input file into separate single page PDF files and saves the result
 94 | as a ZIP archive.
 95 | 
 96 | ```
 97 | Usage:
 98 | unipdf explode [FLAG]... INPUT_FILE
 99 | 
100 | Flags:
101 | -o, --output-file string   Output file
102 | -P, --pages string         Pages to extract from the input file
103 | -p, --password string      Input file password
104 | 
105 | Examples:
106 | unipdf explode input_file.pdf
107 | unipdf explode -o pages.zip input_file.pdf
108 | unipdf explode -o pages.zip -P 1-3 input_file.pdf
109 | unipdf explode -o pages.zip -P 1-3 -p pass input_file.pdf
110 | 
111 | Pages flag example: 1-3,4,6-7
112 | Pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be extracted, while page
113 | number 5 is skipped.
114 | ```
115 | 
116 | #### Encrypt
117 | 
118 | Add password protection to PDF files. Owner and user passwords can be
119 | specified, along with a set of user permissions. The encryption algorithm
120 | used for protecting the file is configurable.
121 | 
122 | ```
123 | unipdf encrypt [FLAG]... INPUT_FILE OWNER_PASSWORD [USER_PASSWORD]
124 | 
125 | Flags:
126 | -m, --mode string         Algorithm to use for encrypting the file (default "rc4")
127 | -o, --output-file string  Output file
128 | -P, --perms string        User permissions (default "all")
129 | 
130 | Examples:
131 | unipdf encrypt input_file.pdf owner_pass
132 | unipdf encrypt input_file.pdf owner_pass user_pass
133 | unipdf encrypt -o output_file.pdf -m aes256 input_file.pdf owner_pass user_pass
134 | unipdf encrypt -o output_file.pdf -P none -m aes256 input_file.pdf owner_pass user_pass
135 | unipdf encrypt -o output_file.pdf -P modify,annotate -m aes256 input_file.pdf owner_pass user
136 | 
137 | Supported encryption algorithms:
138 | - rc4 (default)
139 | - aes128
140 | - aes256
141 | 
142 | Supported user permissions:
143 | - all (default)
144 | - none
145 | - print-low-res
146 | - print-high-res
147 | - modify
148 | - extract
149 | - extract-graphics
150 | - annotate
151 | - fill-forms
152 | - rotate
153 | ```
154 | 
155 | #### Decrypt
156 | 
157 | Remove password protection from PDF files.
158 | 
159 | ```
160 | unipdf decrypt [FLAG]... INPUT_FILE
161 | 
162 | Flags:
163 | -o, --output-file string   Output file
164 | -p, --password string      PDF file password
165 | 
166 | Examples:
167 | unipdf decrypt -p pass input_file.pdf
168 | unipdf decrypt -p pass -o output_file.pdf input_file.pdf
169 | ```
170 | 
171 | #### Passwd
172 | 
173 | Change protected PDF user/owner password.
174 | 
175 | ```
176 | unipdf passwd [FLAG]... INPUT_FILE NEW_OWNER_PASSWORD [NEW_USER_PASSWORD]
177 | 
178 | Flags:
179 | -o, --output-file string   Output file
180 | -p, --password string      PDF file password
181 | 
182 | Examples:
183 | unipdf passwd -p pass input_file.pdf new_owner_pass
184 | unipdf passwd -p pass -o output_file.pdf input_file.pdf new_owner_pass
185 | unipdf passwd -p pass -o output_file.pdf input_file.pdf new_owner_pass new_user_pass
186 | ```
187 | 
188 | #### Optimize
189 | 
190 | Optimize PDF files by optimizing structure, compression and image quality.
191 | 
192 | The command can take multiple files and directories as input parameters.
193 | By default, each PDF file is saved in the same location as the original file,
194 | appending the "_optimized" suffix to the file name. Use the --overwrite flag
195 | to overwrite the original files.
196 | In addition, the optimized output files can be saved to a different directory
197 | by using the --target-dir flag.
198 | The command can search for PDF files inside the subdirectories of the
199 | specified input directories by using the --recursive flag.
200 | 
201 | The quality of the images in the output files can be configured through
202 | the --image-quality flag (default 90).
203 | The resolution of the output images can be controlled using the --image-ppi flag.
204 | Common pixels per inch values are 100 (screen), 150-300 (print), 600 (art). If
205 | not specified, the PPI of the output images is 100.
206 | 
207 | ```
208 | unipdf optimize [FLAG]... INPUT_FILES...
209 | 
210 | Flags:
211 | -P, --image-ppi float     output images pixels per inch (default 100)
212 | -q, --image-quality int   output JPEG image quality (default 90)
213 | -O, --overwrite           overwrite input files
214 | -p, --password string     file password
215 | -r, --recursive           search PDF files in subdirectories
216 | -t, --target-dir string   output directory
217 | 
218 | Examples:
219 | unipdf optimize file_1.pdf file_n.pdf
220 | unipdf optimize -O file_1.pdf file_n.pdf
221 | unipdf optimize -O -r file_1.pdf file_n.pdf dir_1 dir_n
222 | unipdf optimize -t out_dir file_1.pdf file_n.pdf dir_1 dir_n
223 | unipdf optimize -t out_dir -r file_1.pdf file_n.pdf dir_1 dir_n
224 | unipdf optimize -t out_dir -r -q 75 file_1.pdf file_n.pdf dir_1 dir_n
225 | unipdf optimize -t out_dir -r -q 75 -P 100 file_1.pdf file_n.pdf dir_1 dir_n
226 | unipdf optimize -t out_dir -r -q 75 -P 100 -p pass file_1.pdf file_n.pdf dir_1 dir_n
227 | ```
228 | 
229 | #### Rotate
230 | 
231 | Rotate PDF file pages by a specified angle. The angle argument is specified in
232 | degrees and it must be a multiple of 90.
233 | 
234 | ```
235 | unipdf rotate [FLAG]... INPUT_FILE ANGLE
236 | 
237 | Flags:
238 | -o, --output-file string   Output file
239 | -P, --pages string         Pages to rotate
240 | -p, --password string      PDF file password
241 | 
242 | Examples:
243 | unipdf rotate input_file.pdf 90
244 | unipdf rotate -- input_file.pdf -270
245 | unipdf rotate -o output_file.pdf input_file.pdf 90
246 | unipdf rotate -o output_file.pdf -P 1-3 input_file.pdf 90
247 | unipdf rotate -o output_file.pdf -P 1-3 -p pass input_file.pdf 90
248 | 
249 | Pages flag example: 1-3,4,6-7
250 | Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be rotated, while
251 | page number 5 is skipped.
252 | ```
253 | 
254 | #### Watermark
255 | 
256 | Add watermark images to PDF files.
257 | 
258 | ```
259 | unipdf watermark [FLAG]... INPUT_FILE WATERMARK_IMAGE
260 | 
261 | Flags:
262 | -o, --output-file string   Output file
263 | -P, --pages string         Pages on which to add watermark
264 | -p, --password string      PDF file password
265 | 
266 | Examples:
267 | unipdf watermark input_file.pdf watermark.png
268 | unipdf watermark -o output file.png input_file.pdf watermark.png
269 | unipdf watermark -o output file.png -P 1-3 input_file.pdf watermark.png
270 | unipdf watermark -o output file.png -P 1-3 -p pass input_file.pdf watermark.png
271 | 
272 | Pages flag example: 1-3,4,6-7
273 | Watermark will only be applied to pages 1,2,3 (1-3), 4 and 6,7 (6-7), while
274 | page number 5 is skipped.
275 | ```
276 | 
277 | #### Grayscale
278 | 
279 | Convert PDF files to grayscale.
280 | 
281 | ```
282 | unipdf grayscale [FLAG]... INPUT_FILE
283 | 
284 | Flags:
285 | -o, --output-file string   Output file
286 | -P, --pages string         Pages to convert to grayscale
287 | -p, --password string      PDF file password
288 | 
289 | Examples:
290 | unipdf grayscale input_file.pdf
291 | unipdf grayscale -o output_file input_file.pdf
292 | unipdf grayscale -o output_file -P 1-3 input_file.pdf
293 | unipdf grayscale -o output_file -P 1-3 -p pass input_file.pdf
294 | 
295 | Pages flag example: 1-3,4,6-7
296 | Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be converted to grayscale, while
297 | page number 5 is skipped.
298 | ```
299 | 
300 | #### Info
301 | 
302 | Outputs file information. Also does some basic validation.
303 | 
304 | ```
305 | unipdf info [FLAG]... INPUT_FILE
306 | 
307 | Flags:
308 | -p, --password string   PDF file password
309 | 
310 | Examples:
311 | unipdf info input_file.pdf
312 | unipdf info -p pass input_file.pdf
313 | ```
314 | 
315 | #### Extract text
316 | 
317 | Extracts PDF text. The extracted text is always printed to STDOUT.
318 | 
319 | ```
320 | unipdf extract text [FLAG]... INPUT_FILE
321 | 
322 | Flags:
323 | -P, --pages string           Pages to extract text from
324 | -p, --user-password string   Input file password
325 | 
326 | Examples:
327 | unipdf extract text input_file.pdf
328 | unipdf extract text -P 1-3 input_file.pdf
329 | unipdf extract text -P 1-3 -p pass input_file.pdf
330 | 
331 | Pages flag example: 1-3,4,6-7
332 | Text will only be extracted from pages 1,2,3 (1-3), 4 and 6,7 (6-7), while
333 | page number 5 is skipped.
334 | ```
335 | 
336 | #### Extract images
337 | 
338 | Extracts PDF images. The images are extracted in a ZIP file and saved at the
339 | destination specified by the --output-file parameter. If no output file is
340 | specified, the ZIP archive is saved in the same directory as the input file.
341 | 
342 | ```
343 | unipdf extract [FLAG]... INPUT_FILE
344 | 
345 | Flags:
346 | -S, --include-inline-stencil-masks   Include inline stencil masks
347 | -o, --output-file string             Output file
348 | -P, --pages string                   Pages to extract images from
349 | -p, --password string                Input file password
350 | 
351 | Examples:
352 | unipdf extract images input_file.pdf
353 | unipdf extract images -o images.zip input_file.pdf
354 | unipdf extract images -P 1-3 -p pass -o images.zip input_file.pdf
355 | 
356 | Pages flag example: 1-3,4,6-7
357 | Images will only be extracted from pages 1,2,3 (1-3), 4 and 6,7 (6-7), while
358 | page number 5 is skipped.
359 | ```
360 | 
361 | #### Search
362 | 
363 | Search text in PDF files.
364 | 
365 | ```
366 | unipdf search [FLAG]... INPUT_FILE TEXT
367 | 
368 | Flags:
369 | -p, --password string   PDF file password
370 | 
371 | Examples:
372 | unipdf search input_file.pdf text_to_search
373 | unipdf search -p pass input_file.pdf text_to_search
374 | ```
375 | 
376 | #### Replace
377 | 
378 | Replace text in PDF files.
379 | 
380 | ```
381 | unipdf replace [FLAG]... INPUT_FILE TEXT
382 | 
383 | Flags:
384 | -o, --output-file string   output file
385 | -r, - replace-text string   replacement text
386 | -p, --password string   PDF file password
387 | 
388 | Examples:
389 | unipdf replace input_file.pdf text_to_search
390 | unipdf replace -o output_file.pdf input_file.pdf text_to_search
391 | unipdf replace -o output_file.pdf -r replacement_text input_file.pdf text_to_search
392 | unipdf replace -o output_file.pdf -r replacement_text -p pass input_file.pdf text_to_search
393 | ```
394 | 
395 | 
396 | #### Form Export
397 | 
398 | Export JSON representation of form fields.
399 | 
400 | By default, the resulting JSON content is printed to STDOUT. The output can be
401 | saved to a file by using the --output-file flag.
402 | 
403 | ```
404 | unipdf form export [FLAG]... INPUT_FILE
405 | 
406 | Flags:
407 | -o, --output-file string   output file
408 | 
409 | Examples:
410 | unipdf form export in_file.pdf
411 | unipdf form export in_file.pdf > out_file.json
412 | unipdf form export -o out_file.json in_file.pdf
413 | ```
414 | 
415 | #### Form Fill
416 | 
417 | Fill form fields from JSON file.
418 | 
419 | The field values specified in the JSON file template are used to fill the form
420 | fields in the input PDF files. In addition, the output file form fields can be
421 | flattened by using the --flatten flag. The flattening process makes the form
422 | fields of the output files read-only by appending the form field annotation
423 | XObject Form data to the page content stream, thus making it part of the page
424 | contents.
425 | 
426 | The command can take multiple files and directories as input parameters.
427 | By default, each PDF file is saved in the same location as the original file,
428 | appending the "_filled" suffix to the file name. Use the --overwrite flag
429 | to overwrite the original files.
430 | In addition, the filled output files can be saved to a different directory
431 | by using the --target-dir flag.
432 | The command can search for PDF files inside the subdirectories of the
433 | specified input directories by using the --recursive flag.
434 | 
435 | ```
436 | unipdf form fill [FLAG]... JSON_FILE INPUT_FILES...
437 | 
438 | Flags:
439 | -f, --flatten             flatten form annotations
440 | -O, --overwrite           overwrite input files
441 | -p, --password string     input file password
442 | -r, --recursive           search PDF files in subdirectories
443 | -t, --target-dir string   output directory
444 | 
445 | Examples:
446 | unipdf form fill fields.json file_1.pdf file_n.pdf
447 | unipdf form fill -O fields.json file_1.pdf file_n.pdf
448 | unipdf form fill -O -r -f fields.json file_1.pdf file_n.pdf dir_1 dir_n
449 | unipdf form fill -t out_dir fields.json file_1.pdf file_n.pdf dir_1 dir_n
450 | unipdf form fill -t out_dir -r fields.json file_1.pdf file_n.pdf dir_1 dir_n
451 | unipdf form fill -t out_dir -r -p pass fields.json file_1.pdf file_n.pdf dir_1 dir_n
452 | ```
453 | #### FDF Merge
454 | 
455 | Fill form fields from FDF file.
456 | 
457 | The field values specified in the FDF file template are used to fill the form
458 | fields in the input PDF files. In addition, the output file form fields can be
459 | flattened by using the --flatten flag. The flattening process makes the form
460 | fields of the output files read-only by appending the form field annotation
461 | XObject Form data to the page content stream, thus making it part of the page
462 | contents.
463 | 
464 | The command can take multiple files and directories as input parameters.
465 | By default, each PDF file is saved in the same location as the original file,
466 | appending the "_filled" suffix to the file name. Use the --overwrite flag
467 | to overwrite the original files.
468 | In addition, the filled output files can be saved to a different directory
469 | by using the --target-dir flag.
470 | The command can search for PDF files inside the subdirectories of the
471 | specified input directories by using the --recursive flag.
472 | 
473 | ```
474 | Usage:
475 | unipdf form fdfmerge [FLAG]... FDF_FILE INPUT_FILES...
476 | 
477 | Flags:
478 | -f, --flatten             flatten form annotations
479 | -O, --overwrite           overwrite input files
480 | -p, --password string     input file password
481 | -r, --recursive           search PDF files in subdirectories
482 | -t, --target-dir string   output directory
483 | 
484 | Examples:
485 | unipdf form fdfmerge fields.fdf file_1.pdf file_n.pdf
486 | unipdf form fdfmerge -O fields.fdf file_1.pdf file_n.pdf
487 | unipdf form fdfmerge -O -r -f fields.fdf file_1.pdf file_n.pdf dir_1 dir_n
488 | unipdf form fdfmerge -t out_dir fields.fdf file_1.pdf file_n.pdf dir_1 dir_n
489 | unipdf form fdfmerge -t out_dir -r fields.fdf file_1.pdf file_n.pdf dir_1 dir_n
490 | unipdf form fdfmerge -t out_dir -r -p pass fields.fdf file_1.pdf file_n.pdf dir_1 dir_n
491 | ```
492 | 
493 | #### Form Flatten
494 | 
495 | Flatten PDF file form annotations.
496 | 
497 | The flattening process makes the form fields of the output files read-only by
498 | appending the form field annotation XObject Form data to the page content
499 | stream, thus making it part of the page contents.
500 | 
501 | The command can take multiple files and directories as input parameters.
502 | By default, each PDF file is saved in the same location as the original file,
503 | appending the "_flattened" suffix to the file name. Use the --overwrite flag
504 | to overwrite the original files.
505 | In addition, the flattened output files can be saved to a different directory
506 | by using the --target-dir flag.
507 | The command can search for PDF files inside the subdirectories of the
508 | specified input directories by using the --recursive flag.
509 | 
510 | ```
511 | unipdf form flatten [FLAG]... INPUT_FILES...
512 | 
513 | Flags:
514 | -O, --overwrite           overwrite input files
515 | -p, --password string     input file password
516 | -r, --recursive           search PDF files in subdirectories
517 | -t, --target-dir string   output directory
518 | 
519 | Examples:
520 | unipdf form flatten file_1.pdf file_n.pdf
521 | unipdf form flatten -O file_1.pdf file_n.pdf
522 | unipdf form flatten -O -r file_1.pdf file_n.pdf dir_1 dir_n
523 | unipdf form flatten -t out_dir file_1.pdf file_n.pdf dir_1 dir_n
524 | unipdf form flatten -t out_dir -r file_1.pdf file_n.pdf dir_1 dir_n
525 | unipdf form flatten -t out_dir -r -p pass file_1.pdf file_n.pdf dir_1 dir_n
526 | ```
527 | 
528 | #### Render
529 | 
530 | Render PDF pages to image targets.
531 | 
532 | The rendered image files are saved in a ZIP file, at the location specified
533 | by the --output-file parameter. If no output file is specified, the ZIP file
534 | is saved in the same directory as the input file.
535 | 
536 | The format of the rendered image files can be specified using
537 | the --image-format flag (default jpeg). The quality of the image files can be
538 | configured through the --image-quality flag (default 100, only applies to
539 | JPEG images).
540 | ```
541 | unipdf render [FLAG]... INPUT_FILE
542 | 
543 | Flags:
544 | -f, --image-format string   format of the output images (default "jpeg")
545 | -q, --image-quality int     quality of the output images (default 100)
546 | -o, --output-file string    output file
547 | -P, --pages string          pages to render from the input file
548 | -p, --password string       input file password
549 | 
550 | Examples:
551 | unipdf render in_file.pdf
552 | unipdf render -o images.zip in_file.pdf
553 | unipdf render -o images.zip -P 1-3 in_file.pdf
554 | unipdf render -o images.zip -P 1-3 -p pass in_file.pdf
555 | unipdf render -o images.zip -P 1-3 -p pass -f jpeg -q 100 in_file.pdf
556 | 
557 | Pages flag example: 1-3,4,6-7
558 | Images will only be rendered for pages 1,2,3 (1-3), 4 and 6,7 (6-7), while
559 | page number 5 is skipped.
560 | 
561 | Supported image formats:
562 |   - jpeg (default)
563 |   - png
564 | ```
565 | 
566 | #### License Info
567 | 
568 | Get information about license key that being loaded by unipdf-cli.
569 | 
570 | ```
571 | Example:
572 | unipdf license_info
573 | ```
574 | 
575 | ## License
576 | 
577 | unipdf-cli requires license codes to operate, there are two options:
578 | - Metered License API keys: Free ones can be obtained at https://cloud.unidoc.io
579 | - Offline Perpetual codes: Can be purchased at https://unidoc.io/pricing
580 | 
581 | ## Offline License
582 | Offline licenses are cryptography based and contain full signed information that is verified based on signatures without making any outbound connections,
583 | hence the name "offline". This kind of license is suitable for users deploying OEM products to their customers or where there are strict restrictions
584 | on outbound connections due to firewalls and/or compliance requirements.
585 | 
586 | If you have a license for [UniPDF](https://github.com/unidoc/unipdf), you can
587 | set it through the UNIDOC_LICENSE_FILE and UNIDOC_LICENSE_CUSTOMER environment
588 | variables.
589 | 
590 | ```
591 | export UNIDOC_LICENSE_FILE="PATH_TO_LICENSE_FILE"
592 | export UNIDOC_LICENSE_CUSTOMER="CUSTOMER_NAME"
593 | ```
594 | 
595 | ## Metered License (API keys)
596 | The metered license is the most convenient way to get started with UniDoc products and the Free tier enables a powerful way to get started for free.
597 | Anyone can get a free metered API key by signing up on http://cloud.unidoc.io/
598 | 
599 | If you have a metered license (API keys), you can set it through the UNIDOC_LICENSE_API_KEY environment variable.
600 | 
601 | ```
602 | export UNIDOC_LICENSE_API_KEY="unidoc_metered_api_key"
603 | ```
604 | 


--------------------------------------------------------------------------------
/pkg/pdf/grayscale.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is subject to the terms and conditions defined in
  3 |  * file 'LICENSE.md', which is part of this source code package.
  4 |  */
  5 | 
  6 | package pdf
  7 | 
  8 | import (
  9 | 	"errors"
 10 | 	"fmt"
 11 | 
 12 | 	unicommon "github.com/unidoc/unipdf/v4/common"
 13 | 	unicontent "github.com/unidoc/unipdf/v4/contentstream"
 14 | 	unicore "github.com/unidoc/unipdf/v4/core"
 15 | 	unipdf "github.com/unidoc/unipdf/v4/model"
 16 | 	"github.com/unidoc/unipdf/v4/ps"
 17 | )
 18 | 
 19 | // Grayscale converts the pages of the PDF file specified by the inputPath
 20 | // parameter to grayscale. A password can be specified for encrypted PDF files.
 21 | // A list of pages to convert to grayscale can be passed in. Every page that
 22 | // is not included in the pages slice is left intact.
 23 | // If the pages parameter is nil or an empty slice, all the pages of the input
 24 | // file are converted to grayscale.
 25 | func Grayscale(inputPath, outputPath, password string, pages []int) error {
 26 | 	// Read input file.
 27 | 	r, pageCount, _, _, err := readPDF(inputPath, password)
 28 | 	if err != nil {
 29 | 		return err
 30 | 	}
 31 | 
 32 | 	// Add pages.
 33 | 	if len(pages) == 0 {
 34 | 		pages = createPageRange(pageCount)
 35 | 	}
 36 | 
 37 | 	w := unipdf.NewPdfWriter()
 38 | 	for i := 0; i < pageCount; i++ {
 39 | 		numPage := i + 1
 40 | 
 41 | 		page, err := r.GetPage(numPage)
 42 | 		if err != nil {
 43 | 			return err
 44 | 		}
 45 | 
 46 | 		var convert bool
 47 | 		for _, page := range pages {
 48 | 			if page == numPage {
 49 | 				convert = true
 50 | 				break
 51 | 			}
 52 | 		}
 53 | 
 54 | 		if convert {
 55 | 			if err = convertPageToGrayscale(page); err != nil {
 56 | 				return err
 57 | 			}
 58 | 		}
 59 | 
 60 | 		if err = w.AddPage(page); err != nil {
 61 | 			return err
 62 | 		}
 63 | 	}
 64 | 
 65 | 	// Write output file.
 66 | 	safe := inputPath == outputPath
 67 | 	return writePDF(outputPath, &w, safe)
 68 | }
 69 | 
 70 | // convertPageToGrayscale replaces color objects on the page with grayscale
 71 | // ones. Also references XObject Images and Forms to convert those to grayscale.
 72 | func convertPageToGrayscale(page *unipdf.PdfPage) error {
 73 | 	// For each page, we go through the resources and look for the images.
 74 | 	contents, err := page.GetAllContentStreams()
 75 | 	if err != nil {
 76 | 		return err
 77 | 	}
 78 | 
 79 | 	grayContent, err := transformContentStreamToGrayscale(contents, page.Resources)
 80 | 	if err != nil {
 81 | 		return err
 82 | 	}
 83 | 	page.SetContentStreams([]string{string(grayContent)}, unicore.NewFlateEncoder())
 84 | 
 85 | 	// fmt.Printf("Processed contents: %s\n", grayContent)
 86 | 
 87 | 	return nil
 88 | }
 89 | 
 90 | // Check if colorspace represents a Pattern colorspace.
 91 | func isPatternCS(cs unipdf.PdfColorspace) bool {
 92 | 	_, isPattern := cs.(*unipdf.PdfColorspaceSpecialPattern)
 93 | 	return isPattern
 94 | }
 95 | 
 96 | func transformContentStreamToGrayscale(contents string, resources *unipdf.PdfPageResources) ([]byte, error) {
 97 | 	cstreamParser := unicontent.NewContentStreamParser(contents)
 98 | 	operations, err := cstreamParser.Parse()
 99 | 	if err != nil {
100 | 		return nil, err
101 | 	}
102 | 	processedOperations := &unicontent.ContentStreamOperations{}
103 | 
104 | 	transformedPatterns := map[unicore.PdfObjectName]bool{} // List of already transformed patterns. Avoid multiple conversions.
105 | 	transformedShadings := map[unicore.PdfObjectName]bool{} // List of already transformed shadings. Avoid multiple conversions.
106 | 
107 | 	// The content stream processor keeps track of the graphics state and we can make our own handlers to process certain commands,
108 | 	// using the AddHandler method.  In this case, we hook up to color related operands, and for image and form handling.
109 | 	processor := unicontent.NewContentStreamProcessor(*operations)
110 | 	// Add handlers for colorspace related functionality.
111 | 	processor.AddHandler(unicontent.HandlerConditionEnumAllOperands, "",
112 | 		func(op *unicontent.ContentStreamOperation, gs unicontent.GraphicsState, resources *unipdf.PdfPageResources) error {
113 | 			operand := op.Operand
114 | 			switch operand {
115 | 			case "CS": // Set colorspace operands (stroking).
116 | 				if isPatternCS(gs.ColorspaceStroking) {
117 | 					// If referring to a pattern colorspace with an external definition, need to update the definition.
118 | 					// If has an underlying colorspace, then go and change it to DeviceGray.
119 | 					// Needs to be specified externally in the colorspace resources.
120 | 
121 | 					csname := op.Params[0].(*unicore.PdfObjectName)
122 | 					if *csname != "Pattern" {
123 | 						// Update if referring to an external colorspace in resources.
124 | 						cs, ok := resources.GetColorspaceByName(*csname)
125 | 						if !ok {
126 | 							unicommon.Log.Debug("Undefined colorspace for pattern (%s)", csname)
127 | 							return errors.New("colorspace not defined")
128 | 						}
129 | 
130 | 						patternCS, ok := cs.(*unipdf.PdfColorspaceSpecialPattern)
131 | 						if !ok {
132 | 							return errors.New("type error")
133 | 						}
134 | 
135 | 						if patternCS.UnderlyingCS != nil {
136 | 							// Swap out for a gray colorspace.
137 | 							patternCS.UnderlyingCS = unipdf.NewPdfColorspaceDeviceGray()
138 | 						}
139 | 
140 | 						resources.SetColorspaceByName(*csname, patternCS)
141 | 					}
142 | 					*processedOperations = append(*processedOperations, op)
143 | 					return nil
144 | 				}
145 | 
146 | 				op := unicontent.ContentStreamOperation{}
147 | 				op.Operand = operand
148 | 				op.Params = []unicore.PdfObject{unicore.MakeName("DeviceGray")}
149 | 				*processedOperations = append(*processedOperations, &op)
150 | 				return nil
151 | 			case "cs": // Set colorspace operands (non-stroking).
152 | 				if isPatternCS(gs.ColorspaceNonStroking) {
153 | 					// If referring to a pattern colorspace with an external definition, need to update the definition.
154 | 					// If has an underlying colorspace, then go and change it to DeviceGray.
155 | 					// Needs to be specified externally in the colorspace resources.
156 | 
157 | 					csname := op.Params[0].(*unicore.PdfObjectName)
158 | 					if *csname != "Pattern" {
159 | 						// Update if referring to an external colorspace in resources.
160 | 						cs, ok := resources.GetColorspaceByName(*csname)
161 | 						if !ok {
162 | 							unicommon.Log.Debug("Undefined colorspace for pattern (%s)", csname)
163 | 							return errors.New("colorspace not defined")
164 | 						}
165 | 
166 | 						patternCS, ok := cs.(*unipdf.PdfColorspaceSpecialPattern)
167 | 						if !ok {
168 | 							return errors.New("type error")
169 | 						}
170 | 
171 | 						if patternCS.UnderlyingCS != nil {
172 | 							// Swap out for a gray colorspace.
173 | 							patternCS.UnderlyingCS = unipdf.NewPdfColorspaceDeviceGray()
174 | 						}
175 | 
176 | 						resources.SetColorspaceByName(*csname, patternCS)
177 | 					}
178 | 					*processedOperations = append(*processedOperations, op)
179 | 					return nil
180 | 				}
181 | 
182 | 				op := unicontent.ContentStreamOperation{}
183 | 				op.Operand = operand
184 | 				op.Params = []unicore.PdfObject{unicore.MakeName("DeviceGray")}
185 | 				*processedOperations = append(*processedOperations, &op)
186 | 				return nil
187 | 
188 | 			case "SC", "SCN": // Set stroking color.  Includes pattern colors.
189 | 				if isPatternCS(gs.ColorspaceStroking) {
190 | 					op := unicontent.ContentStreamOperation{}
191 | 					op.Operand = operand
192 | 					op.Params = []unicore.PdfObject{}
193 | 
194 | 					patternColor, ok := gs.ColorStroking.(*unipdf.PdfColorPattern)
195 | 					if !ok {
196 | 						return errors.New("invalid stroking color type")
197 | 					}
198 | 
199 | 					if patternColor.Color != nil {
200 | 						color, err := gs.ColorspaceStroking.ColorToRGB(patternColor.Color)
201 | 						if err != nil {
202 | 							fmt.Printf("Error: %v\n", err)
203 | 							return err
204 | 						}
205 | 						rgbColor := color.(*unipdf.PdfColorDeviceRGB)
206 | 						grayColor := rgbColor.ToGray()
207 | 
208 | 						op.Params = append(op.Params, unicore.MakeFloat(grayColor.Val()))
209 | 					}
210 | 
211 | 					if _, has := transformedPatterns[patternColor.PatternName]; has {
212 | 						// Already processed, need not change anything, except underlying color if used.
213 | 						op.Params = append(op.Params, unicore.MakeName(string(patternColor.PatternName)))
214 | 						*processedOperations = append(*processedOperations, &op)
215 | 						return nil
216 | 					}
217 | 					transformedPatterns[patternColor.PatternName] = true
218 | 
219 | 					// Look up the pattern name and convert it.
220 | 					pattern, found := resources.GetPatternByName(patternColor.PatternName)
221 | 					if !found {
222 | 						return errors.New("undefined pattern name")
223 | 					}
224 | 
225 | 					grayPattern, err := convertPatternToGray(pattern)
226 | 					if err != nil {
227 | 						unicommon.Log.Debug("Unable to convert pattern to grayscale: %v", err)
228 | 						return err
229 | 					}
230 | 					resources.SetPatternByName(patternColor.PatternName, grayPattern.ToPdfObject())
231 | 
232 | 					op.Params = append(op.Params, unicore.MakeName(string(patternColor.PatternName)))
233 | 					*processedOperations = append(*processedOperations, &op)
234 | 				} else {
235 | 					color, err := gs.ColorspaceStroking.ColorToRGB(gs.ColorStroking)
236 | 					if err != nil {
237 | 						fmt.Printf("Error with ColorToRGB: %v\n", err)
238 | 						return err
239 | 					}
240 | 					rgbColor := color.(*unipdf.PdfColorDeviceRGB)
241 | 					grayColor := rgbColor.ToGray()
242 | 
243 | 					op := unicontent.ContentStreamOperation{}
244 | 					op.Operand = operand
245 | 					op.Params = []unicore.PdfObject{unicore.MakeFloat(grayColor.Val())}
246 | 					*processedOperations = append(*processedOperations, &op)
247 | 				}
248 | 
249 | 				return nil
250 | 			case "sc", "scn": // Set nonstroking color.
251 | 				if isPatternCS(gs.ColorspaceNonStroking) {
252 | 					op := unicontent.ContentStreamOperation{}
253 | 					op.Operand = operand
254 | 					op.Params = []unicore.PdfObject{}
255 | 
256 | 					patternColor, ok := gs.ColorNonStroking.(*unipdf.PdfColorPattern)
257 | 					if !ok {
258 | 						return errors.New("invalid stroking color type")
259 | 					}
260 | 
261 | 					if patternColor.Color != nil {
262 | 						color, err := gs.ColorspaceNonStroking.ColorToRGB(patternColor.Color)
263 | 						if err != nil {
264 | 							fmt.Printf("Error : %v\n", err)
265 | 							return err
266 | 						}
267 | 						rgbColor := color.(*unipdf.PdfColorDeviceRGB)
268 | 						grayColor := rgbColor.ToGray()
269 | 
270 | 						op.Params = append(op.Params, unicore.MakeFloat(grayColor.Val()))
271 | 					}
272 | 
273 | 					if _, has := transformedPatterns[patternColor.PatternName]; has {
274 | 						// Already processed, need not change anything, except underlying color if used.
275 | 						op.Params = append(op.Params, unicore.MakeName(string(patternColor.PatternName)))
276 | 						*processedOperations = append(*processedOperations, &op)
277 | 						return nil
278 | 					}
279 | 					transformedPatterns[patternColor.PatternName] = true
280 | 
281 | 					// Look up the pattern name and convert it.
282 | 					pattern, found := resources.GetPatternByName(patternColor.PatternName)
283 | 					if !found {
284 | 						return errors.New("undefined pattern name")
285 | 					}
286 | 
287 | 					grayPattern, err := convertPatternToGray(pattern)
288 | 					if err != nil {
289 | 						unicommon.Log.Debug("Unable to convert pattern to grayscale: %v", err)
290 | 						return err
291 | 					}
292 | 					resources.SetPatternByName(patternColor.PatternName, grayPattern.ToPdfObject())
293 | 
294 | 					op.Params = append(op.Params, unicore.MakeName(string(patternColor.PatternName)))
295 | 					*processedOperations = append(*processedOperations, &op)
296 | 				} else {
297 | 					color, err := gs.ColorspaceNonStroking.ColorToRGB(gs.ColorNonStroking)
298 | 					if err != nil {
299 | 						fmt.Printf("Error: %v\n", err)
300 | 						return err
301 | 					}
302 | 					rgbColor := color.(*unipdf.PdfColorDeviceRGB)
303 | 					grayColor := rgbColor.ToGray()
304 | 
305 | 					op := unicontent.ContentStreamOperation{}
306 | 					op.Operand = operand
307 | 					op.Params = []unicore.PdfObject{unicore.MakeFloat(grayColor.Val())}
308 | 
309 | 					*processedOperations = append(*processedOperations, &op)
310 | 				}
311 | 				return nil
312 | 			case "RG", "K": // Set RGB or CMYK stroking color.
313 | 				color, err := gs.ColorspaceStroking.ColorToRGB(gs.ColorStroking)
314 | 				if err != nil {
315 | 					fmt.Printf("Error: %v\n", err)
316 | 					return err
317 | 				}
318 | 				rgbColor := color.(*unipdf.PdfColorDeviceRGB)
319 | 				grayColor := rgbColor.ToGray()
320 | 
321 | 				op := unicontent.ContentStreamOperation{}
322 | 				op.Operand = "G"
323 | 				op.Params = []unicore.PdfObject{unicore.MakeFloat(grayColor.Val())}
324 | 
325 | 				*processedOperations = append(*processedOperations, &op)
326 | 				return nil
327 | 			case "rg", "k": // Set RGB or CMYK as nonstroking color.
328 | 				color, err := gs.ColorspaceNonStroking.ColorToRGB(gs.ColorNonStroking)
329 | 				if err != nil {
330 | 					fmt.Printf("Error: %v\n", err)
331 | 					return err
332 | 				}
333 | 				rgbColor := color.(*unipdf.PdfColorDeviceRGB)
334 | 				grayColor := rgbColor.ToGray()
335 | 
336 | 				op := unicontent.ContentStreamOperation{}
337 | 				op.Operand = "g"
338 | 				op.Params = []unicore.PdfObject{unicore.MakeFloat(grayColor.Val())}
339 | 
340 | 				*processedOperations = append(*processedOperations, &op)
341 | 				return nil
342 | 			case "sh": // Paints the shape and color defined by shading dict.
343 | 				if len(op.Params) != 1 {
344 | 					return errors.New("params to sh operator should be 1")
345 | 				}
346 | 				shname, ok := op.Params[0].(*unicore.PdfObjectName)
347 | 				if !ok {
348 | 					return errors.New("sh parameter should be a name")
349 | 				}
350 | 				if _, has := transformedShadings[*shname]; has {
351 | 					// Already processed, no need to do anything.
352 | 					*processedOperations = append(*processedOperations, op)
353 | 					return nil
354 | 				}
355 | 				transformedShadings[*shname] = true
356 | 
357 | 				shading, found := resources.GetShadingByName(*shname)
358 | 				if !found {
359 | 					return errors.New("shading not defined in resources")
360 | 				}
361 | 
362 | 				grayShading, err := convertShadingToGray(shading)
363 | 				if err != nil {
364 | 					return err
365 | 				}
366 | 
367 | 				resources.SetShadingByName(*shname, grayShading.GetContext().ToPdfObject())
368 | 			}
369 | 			*processedOperations = append(*processedOperations, op)
370 | 
371 | 			return nil
372 | 		})
373 | 	// Add handler for image related handling.  Note that inline images are completely stored with a ContentStreamInlineImage
374 | 	// object as the parameter for BI.
375 | 	processor.AddHandler(unicontent.HandlerConditionEnumOperand, "BI",
376 | 		func(op *unicontent.ContentStreamOperation, _ unicontent.GraphicsState, resources *unipdf.PdfPageResources) error {
377 | 			if len(op.Params) != 1 {
378 | 				fmt.Printf("BI Error invalid number of params\n")
379 | 				return errors.New("invalid number of parameters")
380 | 			}
381 | 			// Inline image.
382 | 			iimg, ok := op.Params[0].(*unicontent.ContentStreamInlineImage)
383 | 			if !ok {
384 | 				fmt.Printf("Error: Invalid handling for inline image\n")
385 | 				return errors.New("invalid inline image parameter")
386 | 			}
387 | 
388 | 			img, err := iimg.ToImage(resources)
389 | 			if err != nil {
390 | 				fmt.Printf("Error converting inline image to image: %v\n", err)
391 | 				return err
392 | 			}
393 | 
394 | 			cs, err := iimg.GetColorSpace(resources)
395 | 			if err != nil {
396 | 				fmt.Printf("Error getting color space for inline image: %v\n", err)
397 | 				return err
398 | 			}
399 | 			rgbImg, err := cs.ImageToRGB(*img)
400 | 			if err != nil {
401 | 				fmt.Printf("Error converting image to rgb: %v\n", err)
402 | 				return err
403 | 			}
404 | 			rgbColorSpace := unipdf.NewPdfColorspaceDeviceRGB()
405 | 			grayImage, err := rgbColorSpace.ImageToGray(rgbImg)
406 | 			if err != nil {
407 | 				fmt.Printf("Error converting img to gray: %v\n", err)
408 | 				return err
409 | 			}
410 | 
411 | 			// Update the XObject image.
412 | 			// Use same encoder as input data.  Make sure for DCT filter it is updated to 1 color component.
413 | 			encoder, err := iimg.GetEncoder()
414 | 			if err != nil {
415 | 				fmt.Printf("Error getting encoder for inline image: %v\n", err)
416 | 				return err
417 | 			}
418 | 			if dctEncoder, is := encoder.(*unicore.DCTEncoder); is {
419 | 				dctEncoder.ColorComponents = 1
420 | 			}
421 | 
422 | 			grayInlineImg, err := unicontent.NewInlineImageFromImage(grayImage, encoder)
423 | 			if err != nil {
424 | 				if err == unicore.ErrUnsupportedEncodingParameters {
425 | 					// Unsupported encoding parameters, revert to a basic flate encoder without predictor.
426 | 					encoder = unicore.NewFlateEncoder()
427 | 				}
428 | 				// Try again, fail on error.
429 | 				grayInlineImg, err = unicontent.NewInlineImageFromImage(grayImage, encoder)
430 | 				if err != nil {
431 | 					fmt.Printf("Error making a new inline image object: %v\n", err)
432 | 					return err
433 | 				}
434 | 			}
435 | 
436 | 			// Replace inline image data with the gray image.
437 | 			pOp := unicontent.ContentStreamOperation{}
438 | 			pOp.Operand = "BI"
439 | 			pOp.Params = []unicore.PdfObject{grayInlineImg}
440 | 			*processedOperations = append(*processedOperations, &pOp)
441 | 
442 | 			return nil
443 | 		})
444 | 
445 | 	// Handler for XObject Image and Forms.
446 | 	processedXObjects := map[string]bool{} // Keep track of processed XObjects to avoid repetition.
447 | 
448 | 	processor.AddHandler(unicontent.HandlerConditionEnumOperand, "Do",
449 | 		func(op *unicontent.ContentStreamOperation, _ unicontent.GraphicsState, resources *unipdf.PdfPageResources) error {
450 | 			if len(op.Params) < 1 {
451 | 				fmt.Printf("ERROR: Invalid number of params for Do object.\n")
452 | 				return errors.New("range check")
453 | 			}
454 | 
455 | 			// XObject.
456 | 			name := op.Params[0].(*unicore.PdfObjectName)
457 | 
458 | 			// Only process each one once.
459 | 			_, has := processedXObjects[string(*name)]
460 | 			if has {
461 | 				return nil
462 | 			}
463 | 			processedXObjects[string(*name)] = true
464 | 
465 | 			_, xtype := resources.GetXObjectByName(*name)
466 | 			if xtype == unipdf.XObjectTypeImage {
467 | 				// fmt.Printf(" XObject Image: %s\n", *name)
468 | 
469 | 				ximg, err := resources.GetXObjectImageByName(*name)
470 | 				if err != nil {
471 | 					fmt.Printf("Error w/GetXObjectImageByName : %v\n", err)
472 | 					return err
473 | 				}
474 | 
475 | 				img, err := ximg.ToImage()
476 | 				if err != nil {
477 | 					fmt.Printf("Error w/ToImage: %v\n", err)
478 | 					return err
479 | 				}
480 | 
481 | 				rgbImg, err := ximg.ColorSpace.ImageToRGB(*img)
482 | 				if err != nil {
483 | 					fmt.Printf("Error ImageToRGB: %v\n", err)
484 | 					return err
485 | 				}
486 | 
487 | 				rgbColorSpace := unipdf.NewPdfColorspaceDeviceRGB()
488 | 				grayImage, err := rgbColorSpace.ImageToGray(rgbImg)
489 | 				if err != nil {
490 | 					fmt.Printf("Error ImageToGray: %v\n", err)
491 | 					return err
492 | 				}
493 | 
494 | 				// Update the XObject image.
495 | 				// Use same encoder as input data.  Make sure for DCT filter it is updated to 1 color component.
496 | 				encoder := ximg.Filter
497 | 				if dctEncoder, is := encoder.(*unicore.DCTEncoder); is {
498 | 					dctEncoder.ColorComponents = 1
499 | 				}
500 | 
501 | 				ximgGray, err := unipdf.NewXObjectImageFromImage(&grayImage, nil, encoder)
502 | 				if err != nil {
503 | 					if err == unicore.ErrUnsupportedEncodingParameters {
504 | 						// Unsupported encoding parameters, revert to a basic flate encoder without predictor.
505 | 						encoder = unicore.NewFlateEncoder()
506 | 					}
507 | 
508 | 					// Try again, fail if error.
509 | 					ximgGray, err = unipdf.NewXObjectImageFromImage(&grayImage, nil, encoder)
510 | 					if err != nil {
511 | 						fmt.Printf("Error creating image: %v\n", err)
512 | 						return err
513 | 					}
514 | 				}
515 | 
516 | 				// Update the entry.
517 | 				err = resources.SetXObjectImageByName(*name, ximgGray)
518 | 				if err != nil {
519 | 					fmt.Printf("Failed setting x object: %v (%s)\n", err, string(*name))
520 | 					return err
521 | 				}
522 | 			} else if xtype == unipdf.XObjectTypeForm {
523 | 				// fmt.Printf(" XObject Form: %s\n", *name)
524 | 
525 | 				// Go through the XObject Form content stream.
526 | 				xform, err := resources.GetXObjectFormByName(*name)
527 | 				if err != nil {
528 | 					fmt.Printf("Error: %v\n", err)
529 | 					return err
530 | 				}
531 | 
532 | 				formContent, err := xform.GetContentStream()
533 | 				if err != nil {
534 | 					fmt.Printf("Error: %v\n", err)
535 | 					return err
536 | 				}
537 | 
538 | 				// Process the content stream in the Form object too:
539 | 				// XXX/TODO/Consider: Use either form resources (priority) and fall back to page resources alternatively if not found.
540 | 				// Have not come into cases where needed yet.
541 | 				formResources := xform.Resources
542 | 				if formResources == nil {
543 | 					formResources = resources
544 | 				}
545 | 
546 | 				// Process the content stream in the Form object too:
547 | 				grayContent, err := transformContentStreamToGrayscale(string(formContent), formResources)
548 | 				if err != nil {
549 | 					fmt.Printf("Error: %v\n", err)
550 | 					return err
551 | 				}
552 | 
553 | 				xform.SetContentStream(grayContent, nil)
554 | 
555 | 				// Update the resource entry.
556 | 				resources.SetXObjectFormByName(*name, xform)
557 | 			}
558 | 
559 | 			return nil
560 | 		})
561 | 
562 | 	err = processor.Process(resources)
563 | 	if err != nil {
564 | 		fmt.Printf("Error processing: %v\n", err)
565 | 		return nil, err
566 | 	}
567 | 
568 | 	// For debug purposes: (high level logging).
569 | 	//
570 | 	// fmt.Printf("=== Unprocessed - Full list\n")
571 | 	// for idx, op := range operations {
572 | 	//	fmt.Printf("U. Operation %d: %s - Params: %v\n", idx+1, op.Operand, op.Params)
573 | 	// }
574 | 	// fmt.Printf("=== Processed - Full list\n")
575 | 	// for idx, op := range *processedOperations {
576 | 	//	fmt.Printf("P. Operation %d: %s - Params: %v\n", idx+1, op.Operand, op.Params)
577 | 	// }
578 | 
579 | 	return processedOperations.Bytes(), nil
580 | }
581 | 
582 | // Convert a pattern to grayscale (tiling or shading pattern).
583 | func convertPatternToGray(pattern *unipdf.PdfPattern) (*unipdf.PdfPattern, error) {
584 | 	// Case 1: Colored tiling patterns.  Need to process the content stream and replace.
585 | 	if pattern.IsTiling() {
586 | 		tilingPattern := pattern.GetAsTilingPattern()
587 | 
588 | 		if tilingPattern.IsColored() {
589 | 			// A colored tiling pattern can use color operators in its stream, need to process the stream.
590 | 
591 | 			content, err := tilingPattern.GetContentStream()
592 | 			if err != nil {
593 | 				return nil, err
594 | 			}
595 | 
596 | 			grayContents, err := transformContentStreamToGrayscale(string(content), tilingPattern.Resources)
597 | 			if err != nil {
598 | 				return nil, err
599 | 			}
600 | 
601 | 			tilingPattern.SetContentStream(grayContents, nil)
602 | 
603 | 			// Update in-memory pdf objects.
604 | 			_ = tilingPattern.ToPdfObject()
605 | 		}
606 | 	} else if pattern.IsShading() {
607 | 		// Case 2: Shading patterns.  Need to create a new colorspace that can map from N=3,4 colorspaces to grayscale.
608 | 		shadingPattern := pattern.GetAsShadingPattern()
609 | 
610 | 		grayShading, err := convertShadingToGray(shadingPattern.Shading)
611 | 		if err != nil {
612 | 			return nil, err
613 | 		}
614 | 		shadingPattern.Shading = grayShading
615 | 
616 | 		// Update in-memory pdf objects.
617 | 		_ = shadingPattern.ToPdfObject()
618 | 	}
619 | 
620 | 	return pattern, nil
621 | }
622 | 
623 | // Convert shading to grayscale.
624 | // This one is slightly involved as a shading defines a color as function of position, i.e. color(x,y) = F(x,y).
625 | // Since the function can be challenging to change, we define new DeviceN colorspace with a color conversion
626 | // function.
627 | func convertShadingToGray(shading *unipdf.PdfShading) (*unipdf.PdfShading, error) {
628 | 	cs := shading.ColorSpace
629 | 
630 | 	if cs.GetNumComponents() == 1 {
631 | 		// Already grayscale, should be fine. No action taken.
632 | 		return shading, nil
633 | 	} else if cs.GetNumComponents() == 3 {
634 | 		// Create a new DeviceN colorspace that converts R,G,B -> Grayscale
635 | 		// Use: gray := 0.3*R + 0.59G + 0.11B
636 | 		// PS program: { 0.11 mul exch 0.59 mul add exch 0.3 mul add }.
637 | 		transformFunc := &unipdf.PdfFunctionType4{}
638 | 		transformFunc.Domain = []float64{0, 1, 0, 1, 0, 1}
639 | 		transformFunc.Range = []float64{0, 1}
640 | 		rgbToGrayPsProgram := ps.NewPSProgram()
641 | 		rgbToGrayPsProgram.Append(ps.MakeReal(0.11))
642 | 		rgbToGrayPsProgram.Append(ps.MakeOperand("mul"))
643 | 		rgbToGrayPsProgram.Append(ps.MakeOperand("exch"))
644 | 		rgbToGrayPsProgram.Append(ps.MakeReal(0.59))
645 | 		rgbToGrayPsProgram.Append(ps.MakeOperand("mul"))
646 | 		rgbToGrayPsProgram.Append(ps.MakeOperand("add"))
647 | 		rgbToGrayPsProgram.Append(ps.MakeOperand("exch"))
648 | 		rgbToGrayPsProgram.Append(ps.MakeReal(0.3))
649 | 		rgbToGrayPsProgram.Append(ps.MakeOperand("mul"))
650 | 		rgbToGrayPsProgram.Append(ps.MakeOperand("add"))
651 | 		transformFunc.Program = rgbToGrayPsProgram
652 | 
653 | 		// Define the DeviceN colorspace that performs the R,G,B -> Gray conversion for us.
654 | 		transformcs := unipdf.NewPdfColorspaceDeviceN()
655 | 		transformcs.AlternateSpace = unipdf.NewPdfColorspaceDeviceGray()
656 | 		transformcs.ColorantNames = unicore.MakeArray(unicore.MakeName("R"), unicore.MakeName("G"), unicore.MakeName("B"))
657 | 		transformcs.TintTransform = transformFunc
658 | 
659 | 		// Replace the old colorspace with the new.
660 | 		shading.ColorSpace = transformcs
661 | 
662 | 		return shading, nil
663 | 	} else if cs.GetNumComponents() == 4 {
664 | 		// Create a new DeviceN colorspace that converts C,M,Y,K -> Grayscale.
665 | 		// Use: gray = 1.0 - min(1.0, 0.3*C + 0.59*M + 0.11*Y + K)  ; where BG(k) = k simply.
666 | 		// PS program: {exch 0.11 mul add exch 0.59 mul add exch 0.3 mul add dup 1.0 ge { pop 1.0 } if}
667 | 		transformFunc := &unipdf.PdfFunctionType4{}
668 | 		transformFunc.Domain = []float64{0, 1, 0, 1, 0, 1, 0, 1}
669 | 		transformFunc.Range = []float64{0, 1}
670 | 
671 | 		cmykToGrayPsProgram := ps.NewPSProgram()
672 | 		cmykToGrayPsProgram.Append(ps.MakeOperand("exch"))
673 | 		cmykToGrayPsProgram.Append(ps.MakeReal(0.11))
674 | 		cmykToGrayPsProgram.Append(ps.MakeOperand("mul"))
675 | 		cmykToGrayPsProgram.Append(ps.MakeOperand("add"))
676 | 		cmykToGrayPsProgram.Append(ps.MakeOperand("exch"))
677 | 		cmykToGrayPsProgram.Append(ps.MakeReal(0.59))
678 | 		cmykToGrayPsProgram.Append(ps.MakeOperand("mul"))
679 | 		cmykToGrayPsProgram.Append(ps.MakeOperand("add"))
680 | 		cmykToGrayPsProgram.Append(ps.MakeOperand("exch"))
681 | 		cmykToGrayPsProgram.Append(ps.MakeReal(0.30))
682 | 		cmykToGrayPsProgram.Append(ps.MakeOperand("mul"))
683 | 		cmykToGrayPsProgram.Append(ps.MakeOperand("add"))
684 | 		cmykToGrayPsProgram.Append(ps.MakeOperand("dup"))
685 | 		cmykToGrayPsProgram.Append(ps.MakeReal(1.0))
686 | 		cmykToGrayPsProgram.Append(ps.MakeOperand("ge"))
687 | 
688 | 		// Add sub procedure.
689 | 		subProc := ps.NewPSProgram()
690 | 		subProc.Append(ps.MakeOperand("pop"))
691 | 		subProc.Append(ps.MakeReal(1.0))
692 | 		cmykToGrayPsProgram.Append(subProc)
693 | 		cmykToGrayPsProgram.Append(ps.MakeOperand("if"))
694 | 		transformFunc.Program = cmykToGrayPsProgram
695 | 
696 | 		// Define the DeviceN colorspace that performs the R,G,B -> Gray conversion for us.
697 | 		transformcs := unipdf.NewPdfColorspaceDeviceN()
698 | 		transformcs.AlternateSpace = unipdf.NewPdfColorspaceDeviceGray()
699 | 		transformcs.ColorantNames = unicore.MakeArray(unicore.MakeName("C"), unicore.MakeName("M"), unicore.MakeName("Y"), unicore.MakeName("K"))
700 | 		transformcs.TintTransform = transformFunc
701 | 
702 | 		// Replace the old colorspace with the new.
703 | 		shading.ColorSpace = transformcs
704 | 
705 | 		return shading, nil
706 | 	}
707 | 
708 | 	unicommon.Log.Debug("Cannot convert to shading pattern grayscale, color space N = %d", cs.GetNumComponents())
709 | 	return nil, errors.New("unsupported pattern colorspace for grayscale conversion")
710 | }
711 | 


--------------------------------------------------------------------------------