├── Makefile ├── cmd └── unipdf │ └── main.go ├── LICENSE ├── LICENSE.md ├── internal └── cli │ ├── form.go │ ├── extract.go │ ├── version.go │ ├── merge.go │ ├── license_info.go │ ├── decrypt.go │ ├── search.go │ ├── split.go │ ├── passwd.go │ ├── extract_text.go │ ├── organize.go │ ├── root.go │ ├── replace.go │ ├── info.go │ ├── form_export.go │ ├── const.go │ ├── grayscale.go │ ├── explode.go │ ├── watermark.go │ ├── rotate.go │ ├── extract_images.go │ ├── encrypt.go │ ├── render.go │ ├── form_flatten.go │ ├── form_fdfmerge.go │ ├── form_fill.go │ ├── optimize.go │ └── utils.go ├── .gitignore ├── pkg └── pdf │ ├── version.go │ ├── decrypt.go │ ├── split.go │ ├── passwd.go │ ├── search.go │ ├── encrypt.go │ ├── pdf.go │ ├── explode.go │ ├── watermark.go │ ├── info.go │ ├── rotate.go │ ├── optimize.go │ ├── render.go │ ├── form.go │ ├── organize.go │ ├── extract.go │ ├── utils.go │ ├── merge.go │ ├── replace.go │ └── grayscale.go ├── .goreleaser.yml ├── .golangci.yml ├── .github └── workflows │ └── build.yml ├── go.mod ├── go.sum └── README.md /Makefile: -------------------------------------------------------------------------------- 1 | all: build 2 | build: 3 | GO111MODULE=on go build -o ./bin/unipdf ./cmd/unipdf/main.go 4 | build-all: 5 | goreleaser --snapshot --skip-publish --rm-dist 6 | release: 7 | goreleaser release 8 | clean: 9 | rm -rf ./bin 10 | -------------------------------------------------------------------------------- /cmd/unipdf/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package main 7 | 8 | import ( 9 | "github.com/unidoc/unipdf-cli/internal/cli" 10 | ) 11 | 12 | func main() { 13 | cli.Execute() 14 | } 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ## Licensing Information 2 | 3 | This software package is a commercial product and requires a license 4 | code to operate. 5 | 6 | The use of this software package is governed by the end-user license agreement 7 | (EULA) available at: https://unidoc.io/eula/ 8 | 9 | To get a free metered code to evaluate the software, please visit 10 | https://unidoc.io/ 11 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | ## Licensing Information 2 | 3 | This software package is a commercial product and requires a license 4 | code to operate. 5 | 6 | The use of this software package is governed by the end-user license agreement 7 | (EULA) available at: https://unidoc.io/eula/ 8 | 9 | To get a free metered code to evaluate the software, please visit 10 | https://unidoc.io/ 11 | -------------------------------------------------------------------------------- /internal/cli/form.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | const formCmdDesc = `PDF form operations.` 13 | 14 | // formCmd represents the form command. 15 | var formCmd = &cobra.Command{ 16 | Use: "form [FLAG]... COMMAND", 17 | Short: "PDF form operations", 18 | Long: formCmdDesc, 19 | } 20 | 21 | func init() { 22 | rootCmd.AddCommand(formCmd) 23 | } 24 | -------------------------------------------------------------------------------- /internal/cli/extract.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | const extractCmdDesc = `Extract PDF resources.` 13 | 14 | // extractCmd represents the extract command. 15 | var extractCmd = &cobra.Command{ 16 | Use: "extract [FLAG]... COMMAND", 17 | Short: "Extract PDF resources", 18 | Long: extractCmdDesc, 19 | } 20 | 21 | func init() { 22 | rootCmd.AddCommand(extractCmd) 23 | } 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | cmd/unipdf/unipdf 2 | *.pdf 3 | *.fdf 4 | *.zip 5 | *.gox 6 | .idea 7 | *.mdb 8 | *.userprefs 9 | *.pidb 10 | *.suo 11 | *.out 12 | *.pyc 13 | *.wixobj 14 | *.msi 15 | *.wixpdb 16 | build 17 | dist 18 | setuptools-* 19 | .DS_Store 20 | *.so 21 | nohup.out 22 | *.orig 23 | *.rej 24 | *~ 25 | *.o 26 | *.pyo 27 | tests/*.err 28 | *.swp 29 | *.swo 30 | store/* 31 | *.log 32 | *.egg-info 33 | dist/ 34 | doc/_build/ 35 | distribute-* 36 | pip-log.txt 37 | .coverage 38 | data/ 39 | *.egg 40 | .tox 41 | out.txt 42 | junit*.xml 43 | .ropeproject 44 | .cache 45 | tmp/ 46 | bin/ 47 | *.sublime-project 48 | *.sublime-workspace 49 | *.pprof 50 | gin-bin 51 | pkg/buildinfo/buildinfo.go 52 | temp/ 53 | buildinfo.json 54 | pdf/font.go 55 | fuzz.go 56 | -------------------------------------------------------------------------------- /pkg/pdf/version.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | unicommon "github.com/unidoc/unipdf/v4/common" 10 | unilicense "github.com/unidoc/unipdf/v4/common/license" 11 | ) 12 | 13 | // VersionInfo contains version and license information 14 | // about the Unidoc library. 15 | type VersionInfo struct { 16 | Lib string 17 | License string 18 | } 19 | 20 | // Version returns version and license information about the Unidoc library. 21 | func Version() VersionInfo { 22 | var license string 23 | if key := unilicense.GetLicenseKey(); key != nil { 24 | license = key.ToString() 25 | } 26 | 27 | return VersionInfo{ 28 | Lib: unicommon.Version, 29 | License: license, 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /pkg/pdf/decrypt.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import unipdf "github.com/unidoc/unipdf/v4/model" 9 | 10 | // Decrypt decrypts the PDF file specified by the inputPath parameter, 11 | // using the specified password and saves the result to the destination 12 | // specified by the outputPath parameter. 13 | func Decrypt(inputPath, outputPath, password string) error { 14 | // Read input file. 15 | r, _, _, _, err := readPDF(inputPath, password) 16 | if err != nil { 17 | return err 18 | } 19 | 20 | // Copy input file contents. 21 | w := unipdf.NewPdfWriter() 22 | if err := readerToWriter(r, &w, nil); err != nil { 23 | return err 24 | } 25 | 26 | // Save output file. 27 | safe := inputPath == outputPath 28 | return writePDF(outputPath, &w, safe) 29 | } 30 | -------------------------------------------------------------------------------- /internal/cli/version.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "fmt" 10 | 11 | "github.com/spf13/cobra" 12 | "github.com/unidoc/unipdf-cli/pkg/pdf" 13 | ) 14 | 15 | var versionCmdExample = fmt.Sprintf("%s\n", 16 | fmt.Sprintf("%s version", appName), 17 | ) 18 | 19 | // versionCmd represents the version command. 20 | var versionCmd = &cobra.Command{ 21 | Use: "version", 22 | Short: "Output version information and exit", 23 | Example: versionCmdExample, 24 | DisableFlagsInUseLine: true, 25 | Run: func(_ *cobra.Command, _ []string) { 26 | version := pdf.Version() 27 | 28 | fmt.Printf("%s CLI v%s\n", appName, appVersion) 29 | fmt.Printf("Powered by unipdf v%s\n", version.Lib) 30 | fmt.Printf("\nLicense info\n%s", version.License) 31 | }, 32 | } 33 | 34 | func init() { 35 | rootCmd.AddCommand(versionCmd) 36 | } 37 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | project_name: unipdf-cli 2 | 3 | release: 4 | github: 5 | owner: unidoc 6 | name: unipdf-cli 7 | before: 8 | hooks: 9 | - go mod download 10 | builds: 11 | - binary: unipdf 12 | goos: 13 | - darwin 14 | - windows 15 | - linux 16 | goarch: 17 | - amd64 18 | - 386 19 | env: 20 | - CGO_ENABLED=0 21 | - GO111MODULE=on 22 | main: ./cmd/unipdf/main.go 23 | archives: 24 | - id: default 25 | format: tar.gz 26 | wrap_in_directory: true 27 | format_overrides: 28 | - goos: windows 29 | format: zip 30 | name_template: '{{ .Binary }}-{{ .Version }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}' 31 | files: 32 | - LICENSE 33 | - README.md 34 | dist: bin 35 | snapshot: 36 | name_template: SNAPSHOT-{{ .Commit }} 37 | checksum: 38 | name_template: '{{ .ProjectName }}-{{ .Version }}-checksums.txt' 39 | changelog: 40 | sort: asc 41 | filters: 42 | exclude: 43 | - '^docs:' 44 | - '^test:' 45 | - '^dev:' 46 | - 'README' 47 | - Merge pull request 48 | - Merge branch 49 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | linters: 2 | enable-all: false 3 | enable: 4 | - staticcheck 5 | - govet 6 | - gosimple 7 | - nakedret 8 | - typecheck 9 | - gosec 10 | - revive 11 | - unconvert 12 | - misspell 13 | disable: 14 | - ineffassign 15 | - errcheck 16 | - unused 17 | - lll 18 | - gofmt 19 | - goconst 20 | run: 21 | concurrency: 4 22 | timeout: 3m 23 | # Keep exit code 0 until we fix all this, i.e. get the baseline set. 24 | issues-exit-code: 0 25 | # Keep this false until we address all the non-test files and set baseline. 26 | tests: true 27 | 28 | issues: 29 | exclude-dirs: 30 | - testdata 31 | 32 | exclude-files: 33 | - ".*_test.go$" 34 | 35 | exclude-rules: 36 | # We don't control the ciphers as we are just implementing standards. 37 | - linters: 38 | - gosec 39 | text: "weak cryptographic primitive" 40 | 41 | # output configuration options 42 | output: 43 | format: colored-line-number 44 | print-issued-lines: true 45 | print-linter-name: true 46 | uniq-by-line: true 47 | path-prefix: "" 48 | -------------------------------------------------------------------------------- /pkg/pdf/split.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | unipdf "github.com/unidoc/unipdf/v4/model" 10 | ) 11 | 12 | // Split extracts the provided page list from PDF file specified by the 13 | // inputPath parameter and saves the resulting file at the location 14 | // specified by the outputPath parameter. A password can be passed in for 15 | // encrypted input files. 16 | // If the pages parameter is nil or an empty slice, all the pages of the input 17 | // file are copied to the output file. 18 | func Split(inputPath, outputPath, password string, pages []int) error { 19 | // Read input file. 20 | r, _, _, _, err := readPDF(inputPath, password) 21 | if err != nil { 22 | return err 23 | } 24 | 25 | // Add selected pages to the writer. 26 | w := unipdf.NewPdfWriter() 27 | if err = readerToWriter(r, &w, pages); err != nil { 28 | return err 29 | } 30 | 31 | // Write output file. 32 | safe := inputPath == outputPath 33 | return writePDF(outputPath, &w, safe) 34 | } 35 | -------------------------------------------------------------------------------- /pkg/pdf/passwd.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | unipdf "github.com/unidoc/unipdf/v4/model" 10 | ) 11 | 12 | // Passwd changes the owner and user password of an encrypted PDF file. 13 | // The resulting PDF file is saved at the location specified by the outputPath 14 | // parameter. 15 | func Passwd(inputPath, outputPath, ownerPassword, newOwnerPassword, newUserPassword string) error { 16 | // Read input file. 17 | r, _, _, perms, err := readPDF(inputPath, ownerPassword) 18 | if err != nil { 19 | return err 20 | } 21 | 22 | // Copy input file contents. 23 | w := unipdf.NewPdfWriter() 24 | if err := readerToWriter(r, &w, nil); err != nil { 25 | return err 26 | } 27 | 28 | // Encrypt output file. 29 | encryptOpts := &unipdf.EncryptOptions{ 30 | Permissions: perms, 31 | } 32 | 33 | err = w.Encrypt([]byte(newUserPassword), []byte(newOwnerPassword), encryptOpts) 34 | if err != nil { 35 | return err 36 | } 37 | 38 | // Save output file. 39 | safe := inputPath == outputPath 40 | return writePDF(outputPath, &w, safe) 41 | } 42 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | lint: 11 | name: golangci-lint 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Check out code into the Go module directory 15 | uses: actions/checkout@v2 16 | - name: Get golangci-lint 17 | run: | 18 | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s v1.64.6 19 | - name: Run golangci-lint 20 | run: | 21 | ./bin/golangci-lint run --out-format=github-actions --issues-exit-code=1 22 | 23 | build: 24 | name: Build Go ${{ matrix.go }} 25 | runs-on: ubuntu-latest 26 | strategy: 27 | matrix: 28 | go: ['1.25', '1.24', '1.23'] 29 | env: 30 | CGO_ENABLED: 0 31 | steps: 32 | - name: Check out code into the Go module directory 33 | uses: actions/checkout@v2 34 | 35 | - name: Setup go 36 | uses: actions/setup-go@v1 37 | with: 38 | go-version: ${{ matrix.go }} 39 | 40 | - name: Get dependencies 41 | run: | 42 | go get -v -t -d ./... 43 | 44 | - name: go vet 45 | run: go vet ./... 46 | 47 | - name: Test 48 | run: go test -tags=test -count=1 ./... 49 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/unidoc/unipdf-cli 2 | 3 | go 1.23.0 4 | 5 | require ( 6 | github.com/spf13/cobra v1.9.1 7 | github.com/unidoc/unipdf/v4 v4.3.0 8 | ) 9 | 10 | require ( 11 | github.com/adrg/strutil v0.3.1 // indirect 12 | github.com/adrg/sysfont v0.1.2 // indirect 13 | github.com/adrg/xdg v0.5.3 // indirect 14 | github.com/davecgh/go-spew v1.1.1 // indirect 15 | github.com/gorilla/i18n v0.0.0-20150820051429-8b358169da46 // indirect 16 | github.com/h2non/filetype v1.1.3 // indirect 17 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 18 | github.com/pmezard/go-difflib v1.0.0 // indirect 19 | github.com/sirupsen/logrus v1.9.3 // indirect 20 | github.com/spf13/pflag v1.0.6 // indirect 21 | github.com/stretchr/testify v1.10.0 // indirect 22 | github.com/unidoc/freetype v0.2.3 // indirect 23 | github.com/unidoc/garabic v0.0.0-20220702200334-8c7cb25baa11 // indirect 24 | github.com/unidoc/pkcs7 v0.3.0 // indirect 25 | github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a // indirect 26 | github.com/unidoc/unichart v0.5.1 // indirect 27 | github.com/unidoc/unitype v0.5.1 // indirect 28 | golang.org/x/crypto v0.41.0 // indirect 29 | golang.org/x/image v0.30.0 // indirect 30 | golang.org/x/net v0.43.0 // indirect 31 | golang.org/x/sys v0.35.0 // indirect 32 | golang.org/x/text v0.28.0 // indirect 33 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect 34 | gopkg.in/yaml.v3 v3.0.1 // indirect 35 | ) 36 | -------------------------------------------------------------------------------- /internal/cli/merge.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | ) 15 | 16 | const mergeCmdDesc = `Merge the provided input files and save the result to the 17 | specified output file.` 18 | 19 | var mergeCmdExample = fmt.Sprintf("%s\n", 20 | fmt.Sprintf("%s merge output_file.pdf input_file1.pdf input_file2.pdf", appName), 21 | ) 22 | 23 | var mergeCmd = &cobra.Command{ 24 | Use: "merge [FLAG]... OUTPUT_FILE INPUT_FILE...", 25 | Short: "Merge PDF files", 26 | Long: mergeCmdDesc, 27 | Example: mergeCmdExample, 28 | DisableFlagsInUseLine: true, 29 | Run: func(_ *cobra.Command, args []string) { 30 | outputPath := args[0] 31 | inputPaths := args[1:] 32 | 33 | if err := pdf.Merge(inputPaths, outputPath); err != nil { 34 | printErr("Could not merge the input files: %s\n", err) 35 | } 36 | 37 | fmt.Printf("Successfully merged input files\n") 38 | fmt.Printf("Output file saved to %s\n", outputPath) 39 | }, 40 | Args: func(_ *cobra.Command, args []string) error { 41 | if len(args) < 3 { 42 | return errors.New("must provide the output file and at least two input files") 43 | } 44 | 45 | return nil 46 | }, 47 | } 48 | 49 | func init() { 50 | // Add current command to parent. 51 | rootCmd.AddCommand(mergeCmd) 52 | } 53 | -------------------------------------------------------------------------------- /internal/cli/license_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "fmt" 10 | "os" 11 | "strings" 12 | 13 | "github.com/spf13/cobra" 14 | "github.com/unidoc/unipdf-cli/pkg/pdf" 15 | ) 16 | 17 | const licenseInfoCmdDesc = `Outputs information about the license key.` 18 | 19 | var licenseInfoCmdExample = strings.Join([]string{ 20 | fmt.Sprintf("%s license_info", appName), 21 | }, "\n") 22 | 23 | // licenseInfoCmd represents the license info command. 24 | var licenseInfoCmd = &cobra.Command{ 25 | Use: "license_info", 26 | Short: "Output license key information", 27 | Long: licenseInfoCmdDesc, 28 | Example: licenseInfoCmdExample, 29 | DisableFlagsInUseLine: true, 30 | Run: func(_ *cobra.Command, _ []string) { 31 | licenseKey := os.Getenv("UNIDOC_LICENSE_API_KEY") 32 | if licenseKey != "" { 33 | // To get your free API key for metered license, sign up on: https://cloud.unidoc.io 34 | // Make sure to be using UniOffice v1.9.0 or newer for Metered API key support 35 | lk := pdf.GetLicenseKey() 36 | fmt.Printf("License: %s\n", lk) 37 | 38 | // GetMeteredState freshly checks the state, contacting the licensing server. 39 | pdf.GetMeteredState() 40 | return 41 | } 42 | 43 | licensePath := os.Getenv("UNIDOC_LICENSE_FILE") 44 | if licensePath != "" { 45 | lk := pdf.GetLicenseKey() 46 | fmt.Printf("License: %s\n", lk) 47 | return 48 | } 49 | }, 50 | } 51 | 52 | func init() { 53 | rootCmd.AddCommand(licenseInfoCmd) 54 | } 55 | -------------------------------------------------------------------------------- /pkg/pdf/search.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | "strings" 10 | 11 | uniextractor "github.com/unidoc/unipdf/v4/extractor" 12 | ) 13 | 14 | // SearchResult contains information about a found search term inside a PDF page. 15 | type SearchResult struct { 16 | // The page the search term was found on. 17 | Page int 18 | 19 | // The number of occurrences of the search term inside the page. 20 | Occurrences int 21 | } 22 | 23 | // Search searches the provided text in the PDF file specified by the inputPath 24 | // parameter. A password can be passed in for encrypted input files. 25 | func Search(inputPath, text, password string) ([]*SearchResult, error) { 26 | // Read input file. 27 | r, pages, _, _, err := readPDF(inputPath, password) 28 | if err != nil { 29 | return nil, err 30 | } 31 | 32 | // Search specified text. 33 | var results []*SearchResult 34 | for i := 0; i < pages; i++ { 35 | // Get page. 36 | numPage := i + 1 37 | 38 | page, err := r.GetPage(numPage) 39 | if err != nil { 40 | return nil, err 41 | } 42 | 43 | // Extract page text. 44 | extractor, err := uniextractor.New(page) 45 | if err != nil { 46 | return nil, err 47 | } 48 | 49 | pageText, err := extractor.ExtractText() 50 | if err != nil { 51 | return nil, err 52 | } 53 | 54 | occurrences := strings.Count(pageText, text) 55 | if occurrences == 0 { 56 | continue 57 | } 58 | 59 | results = append(results, &SearchResult{ 60 | Page: numPage, 61 | Occurrences: occurrences, 62 | }) 63 | } 64 | 65 | return results, nil 66 | } 67 | -------------------------------------------------------------------------------- /pkg/pdf/encrypt.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | unisecurity "github.com/unidoc/unipdf/v4/core/security" 10 | unipdf "github.com/unidoc/unipdf/v4/model" 11 | ) 12 | 13 | // EncryptOpts contains settings for encrypting a PDF file. 14 | type EncryptOpts struct { 15 | // OwnerPassword represents the owner password used to encrypt the file. 16 | OwnerPassword string 17 | 18 | // UserPassword represents the user password used to encrypt the file. 19 | UserPassword string 20 | 21 | // Algorithm represents the encryption algorithm used to encrypt the file. 22 | Algorithm unipdf.EncryptionAlgorithm 23 | 24 | // Permissions specifies the operations the user can execute on 25 | // the encrypted PDF file. 26 | Permissions unisecurity.Permissions 27 | } 28 | 29 | // Encrypt encrypts the PDF file specified by the inputPath parameter, 30 | // using the specified options and saves the result at the location 31 | // specified by the outputPath parameter. 32 | func Encrypt(inputPath, outputPath string, opts *EncryptOpts) error { 33 | // Read input file. 34 | r, _, _, _, err := readPDF(inputPath, "") 35 | if err != nil { 36 | return err 37 | } 38 | 39 | // Copy input file contents. 40 | w := unipdf.NewPdfWriter() 41 | if err := readerToWriter(r, &w, nil); err != nil { 42 | return err 43 | } 44 | 45 | // Encrypt output file. 46 | encryptOpts := &unipdf.EncryptOptions{ 47 | Algorithm: opts.Algorithm, 48 | Permissions: opts.Permissions, 49 | } 50 | 51 | err = w.Encrypt([]byte(opts.UserPassword), []byte(opts.OwnerPassword), encryptOpts) 52 | if err != nil { 53 | return err 54 | } 55 | 56 | // Save output file. 57 | safe := inputPath == outputPath 58 | return writePDF(outputPath, &w, safe) 59 | } 60 | -------------------------------------------------------------------------------- /pkg/pdf/pdf.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | "fmt" 10 | "os" 11 | 12 | unicommon "github.com/unidoc/unipdf/v4/common" 13 | unilicense "github.com/unidoc/unipdf/v4/common/license" 14 | ) 15 | 16 | // SetLicense sets the license for using the UniDoc library. 17 | func SetLicense(licensePath string, customer string) error { 18 | // Read license file 19 | content, err := os.ReadFile(licensePath) 20 | if err != nil { 21 | return err 22 | } 23 | 24 | return unilicense.SetLicenseKey(string(content), customer) 25 | } 26 | 27 | // SetMeteredKey sets the license key for using the UniDoc library with metered api key. 28 | func SetMeteredKey(apiKey string) error { 29 | return unilicense.SetMeteredKey(apiKey) 30 | } 31 | 32 | // GetLicenseKey get information about user license key. 33 | func GetLicenseKey() string { 34 | lk := unilicense.GetLicenseKey() 35 | if lk == nil { 36 | return "Failed retrieving license key" 37 | } 38 | return lk.ToString() 39 | } 40 | 41 | // GetMeteredState freshly checks the state, contacting the licensing server. 42 | func GetMeteredState() { 43 | // GetMeteredState freshly checks the state, contacting the licensing server. 44 | state, err := unilicense.GetMeteredState() 45 | if err != nil { 46 | fmt.Printf("ERROR getting metered state: %+v\n", err) 47 | return 48 | } 49 | fmt.Printf("State: %+v\n", state) 50 | if state.OK { 51 | fmt.Printf("State is OK\n") 52 | } else { 53 | fmt.Printf("State is not OK\n") 54 | } 55 | fmt.Printf("Credits: %v\n", state.Credits) 56 | fmt.Printf("Used credits: %v\n", state.Used) 57 | } 58 | 59 | // SetLogLevel sets the verbosity of the output produced by the UniDoc library. 60 | func SetLogLevel(level unicommon.LogLevel) { 61 | unicommon.SetLogger(unicommon.NewConsoleLogger(level)) 62 | } 63 | -------------------------------------------------------------------------------- /internal/cli/decrypt.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | ) 15 | 16 | const decryptCmdDesc = `Decrypt PDF files` 17 | 18 | var decryptCmdExample = fmt.Sprintf("%s\n%s\n", 19 | fmt.Sprintf("%s decrypt -p pass input_file.pdf", appName), 20 | fmt.Sprintf("%s decrypt -p pass -o output_file.pdf input_file.pdf", appName), 21 | ) 22 | 23 | // decryptCmd represents the decrypt command. 24 | var decryptCmd = &cobra.Command{ 25 | Use: "decrypt [FLAG]... INPUT_FILE", 26 | Short: "Decrypt PDF files", 27 | Long: decryptCmdDesc, 28 | Example: decryptCmdExample, 29 | DisableFlagsInUseLine: true, 30 | Run: func(cmd *cobra.Command, args []string) { 31 | // Parse input parameters. 32 | inputPath := args[0] 33 | password, _ := cmd.Flags().GetString("password") 34 | 35 | // Parse output path. 36 | outputPath, _ := cmd.Flags().GetString("output-file") 37 | if outputPath == "" { 38 | outputPath = inputPath 39 | } 40 | 41 | // Decrypt input file. 42 | if err := pdf.Decrypt(inputPath, outputPath, password); err != nil { 43 | printErr("Could not decrypt input file: %s\n", err) 44 | } 45 | 46 | fmt.Printf("Successfully decrypted %s\n", inputPath) 47 | fmt.Printf("Output file saved to %s\n", outputPath) 48 | }, 49 | Args: func(_ *cobra.Command, args []string) error { 50 | if len(args) < 1 { 51 | return errors.New("must provide the PDF file to decrypt") 52 | } 53 | 54 | return nil 55 | }, 56 | } 57 | 58 | func init() { 59 | rootCmd.AddCommand(decryptCmd) 60 | 61 | decryptCmd.Flags().StringP("password", "p", "", "input file password") 62 | decryptCmd.Flags().StringP("output-file", "o", "", "output file") 63 | } 64 | -------------------------------------------------------------------------------- /internal/cli/search.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | ) 15 | 16 | const searchCmdDesc = `Search text in PDF files` 17 | 18 | var searchCmdExample = fmt.Sprintf("%s\n%s\n", 19 | fmt.Sprintf("%s search input_file.pdf text_to_search", appName), 20 | fmt.Sprintf("%s search -p pass input_file.pdf text_to_search", appName), 21 | ) 22 | 23 | // searchCmd represents the search command. 24 | var searchCmd = &cobra.Command{ 25 | Use: "search [FLAG]... INPUT_FILE TEXT", 26 | Short: "Search text in PDF files", 27 | Long: searchCmdDesc, 28 | Example: searchCmdExample, 29 | DisableFlagsInUseLine: true, 30 | Run: func(cmd *cobra.Command, args []string) { 31 | // Parse input parameters. 32 | inputPath := args[0] 33 | text := args[1] 34 | password, _ := cmd.Flags().GetString("password") 35 | 36 | // Search text. 37 | results, err := pdf.Search(inputPath, text, password) 38 | if err != nil { 39 | printErr("Could not search the specified text: %s\n", err) 40 | } 41 | 42 | // Print results. 43 | fmt.Printf("Search results for term: %s\n", text) 44 | 45 | totalOccurrences := 0 46 | for _, result := range results { 47 | totalOccurrences += result.Occurrences 48 | fmt.Printf("Page %d: %d occurrences\n", result.Page, result.Occurrences) 49 | } 50 | 51 | fmt.Printf("Total occurrences: %d\n", totalOccurrences) 52 | }, 53 | Args: func(_ *cobra.Command, args []string) error { 54 | if len(args) < 2 { 55 | return errors.New("must provide a PDF file and the text to search") 56 | } 57 | 58 | return nil 59 | }, 60 | } 61 | 62 | func init() { 63 | rootCmd.AddCommand(searchCmd) 64 | 65 | searchCmd.Flags().StringP("password", "p", "", "input file password") 66 | } 67 | -------------------------------------------------------------------------------- /pkg/pdf/explode.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | "archive/zip" 10 | "fmt" 11 | "os" 12 | "path/filepath" 13 | "strings" 14 | 15 | unipdf "github.com/unidoc/unipdf/v4/model" 16 | ) 17 | 18 | // Explode splits the PDF file specified by the inputPath parameter into single 19 | // page PDF files. The extracted collection of PDF files is saved as a ZIP 20 | // archive at the location specified by the outputPath parameter. 21 | // A password can be passed in, if the input file is encrypted. 22 | // If the pages parameter is nil or an empty slice, all pages are extracted. 23 | func Explode(inputPath, outputPath, password string, pages []int) (string, error) { 24 | dir, inputFile := filepath.Split(inputPath) 25 | // Use input file directory if no output path is specified. 26 | inputFile = strings.TrimSuffix(inputFile, filepath.Ext(inputFile)) 27 | if outputPath == "" { 28 | outputPath = filepath.Join(dir, inputFile+".zip") 29 | } 30 | 31 | // Read input file. 32 | r, pageCount, _, _, err := readPDF(inputPath, password) 33 | if err != nil { 34 | return "", err 35 | } 36 | 37 | // Prepare output archive. 38 | outputFile, err := os.Create(outputPath) 39 | if err != nil { 40 | return "", err 41 | } 42 | defer outputFile.Close() 43 | 44 | // Extract pages. 45 | if len(pages) == 0 { 46 | pages = createPageRange(pageCount) 47 | } 48 | 49 | zw := zip.NewWriter(outputFile) 50 | for _, numPage := range pages { 51 | w := unipdf.NewPdfWriter() 52 | if err := readerToWriter(r, &w, []int{numPage}); err != nil { 53 | return "", err 54 | } 55 | 56 | // Add page to zip file. 57 | file, err := zw.Create(fmt.Sprintf("%s_%d.pdf", inputFile, numPage)) 58 | if err != nil { 59 | return "", err 60 | } 61 | 62 | if err = w.Write(file); err != nil { 63 | return "", err 64 | } 65 | } 66 | 67 | return outputPath, zw.Close() 68 | } 69 | -------------------------------------------------------------------------------- /pkg/pdf/watermark.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | unicreator "github.com/unidoc/unipdf/v4/creator" 10 | ) 11 | 12 | // Watermark adds the watermark image specified by the watermarkPath parameter 13 | // to the pages of the PDF file specified by the inputPath parameter. 14 | // A password can be passed in for encrypted input files. 15 | // The resulting file is saved at the location specified by the outputPath 16 | // parameter. 17 | // Also, a list of pages to add watermark to can be passed in. Every page that 18 | // is not included in the pages slice is left intact. 19 | // If the pages parameter is nil or an empty slice, all the pages of the input 20 | // file are watermarked. 21 | func Watermark(inputPath, outputPath, watermarkPath, password string, pages []int) error { 22 | // Read input file. 23 | r, pageCount, _, _, err := readPDF(inputPath, password) 24 | if err != nil { 25 | return err 26 | } 27 | 28 | // Open watermark image. 29 | c := unicreator.New() 30 | 31 | watermark, err := c.NewImageFromFile(watermarkPath) 32 | if err != nil { 33 | return err 34 | } 35 | 36 | // Add pages. 37 | if len(pages) == 0 { 38 | pages = createPageRange(pageCount) 39 | } 40 | 41 | for i := 0; i < pageCount; i++ { 42 | numPage := i + 1 43 | 44 | page, err := r.GetPage(numPage) 45 | if err != nil { 46 | return err 47 | } 48 | 49 | var hasWatermark bool 50 | for _, page := range pages { 51 | if page == numPage { 52 | hasWatermark = true 53 | break 54 | } 55 | } 56 | 57 | if err = c.AddPage(page); err != nil { 58 | return err 59 | } 60 | 61 | if !hasWatermark { 62 | continue 63 | } 64 | 65 | watermark.ScaleToWidth(c.Context().PageWidth) 66 | watermark.SetPos(0, (c.Context().PageHeight-watermark.Height())/2) 67 | watermark.SetOpacity(0.5) 68 | 69 | if err = c.Draw(watermark); err != nil { 70 | return err 71 | } 72 | } 73 | 74 | // Add forms. 75 | if r.AcroForm != nil { 76 | c.SetForms(r.AcroForm) 77 | } 78 | 79 | // Write output file. 80 | safe := inputPath == outputPath 81 | return writeCreatorPDF(outputPath, c, safe) 82 | } 83 | -------------------------------------------------------------------------------- /pkg/pdf/info.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | "os" 10 | "sort" 11 | ) 12 | 13 | // FileStat contains basic information about a file. 14 | type FileStat struct { 15 | // Name represents the name of the file. 16 | Name string 17 | 18 | // Size specifies the size in bytes of the file. 19 | Size int64 20 | } 21 | 22 | // FileInfo contains information about a PDF file. 23 | type FileInfo struct { 24 | FileStat 25 | 26 | // Pages represents the number of pages the PDF file has. 27 | Pages int 28 | 29 | // Objects contains the types of objects the PDF file contains, along 30 | // with the count for each object type. 31 | Objects map[string]int 32 | 33 | // Version specifies the PDF version of the file. 34 | Version string 35 | 36 | // Encrypted specifies if the file is encrypted. 37 | Encrypted bool 38 | 39 | // EncryptionAlgo contains the name of the encryption algorithm used 40 | // to encrypt the PDF file. The field is empty for non-encrypted files. 41 | EncryptionAlgo string 42 | } 43 | 44 | // Info returns information about the PDF file specified by the inputPath 45 | // parameter. A password can be passed in for encrypted input files. 46 | func Info(inputPath string, password string) (*FileInfo, error) { 47 | info := &FileInfo{} 48 | info.Name = inputPath 49 | 50 | // Get file stat. 51 | fi, err := os.Stat(inputPath) 52 | if err != nil { 53 | return nil, err 54 | } 55 | info.Size = fi.Size() 56 | 57 | // Read input file. 58 | r, pages, encrypted, _, err := readPDF(inputPath, password) 59 | if err != nil { 60 | return nil, err 61 | } 62 | 63 | info.Encrypted = encrypted 64 | if encrypted { 65 | info.EncryptionAlgo = r.GetEncryptionMethod() 66 | } 67 | 68 | info.Version = r.PdfVersion().String() 69 | info.Pages = pages 70 | 71 | // Read PDF objects. 72 | objTypes, err := r.Inspect() 73 | if err != nil { 74 | return nil, err 75 | } 76 | 77 | keys := []string{} 78 | for key := range objTypes { 79 | keys = append(keys, key) 80 | } 81 | sort.Strings(keys) 82 | 83 | objects := map[string]int{} 84 | for _, key := range keys { 85 | objects[key] = objTypes[key] 86 | } 87 | info.Objects = objects 88 | 89 | return info, nil 90 | } 91 | -------------------------------------------------------------------------------- /internal/cli/split.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | ) 15 | 16 | const splitCmdDesc = `Split PDF files. 17 | 18 | The command is used to extract one or more page ranges from the input file 19 | and save the result as the output file. 20 | If no page range is specified, all the pages from the input file will be 21 | copied to the output file. 22 | 23 | An example of the pages parameter: 1-3,4,6-7 24 | Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be present in the output file, 25 | while page number 5 is skipped. 26 | ` 27 | 28 | var splitCmdExample = fmt.Sprintf("%s\n%s\n", 29 | fmt.Sprintf("%s split input_file.pdf output_file.pdf 1-2", appName), 30 | fmt.Sprintf("%s split -p pass input_file.pd output_file.pdf 1-2,4", appName), 31 | ) 32 | 33 | // splitCmd represents the split command. 34 | var splitCmd = &cobra.Command{ 35 | Use: "split [FLAG]... INPUT_FILE OUTPUT_FILE [PAGES]", 36 | Short: "Split PDF files", 37 | Long: splitCmdDesc, 38 | Example: splitCmdExample, 39 | DisableFlagsInUseLine: true, 40 | Run: func(cmd *cobra.Command, args []string) { 41 | inputPath := args[0] 42 | outputPath := args[1] 43 | password, _ := cmd.Flags().GetString("password") 44 | 45 | // Parse page range. 46 | var err error 47 | var pages []int 48 | 49 | if len(args) > 2 { 50 | if pages, err = parsePageRange(args[2]); err != nil { 51 | printUsageErr(cmd, "Invalid page range specified\n") 52 | } 53 | } 54 | 55 | err = pdf.Split(inputPath, outputPath, password, pages) 56 | if err != nil { 57 | printErr("Error: %v\n", err) 58 | } 59 | 60 | fmt.Printf("Successfully split file %s\n", inputPath) 61 | fmt.Printf("Output file saved to %s\n", outputPath) 62 | }, 63 | Args: func(_ *cobra.Command, args []string) error { 64 | if len(args) < 2 { 65 | return errors.New("must provide at least the input and output files") 66 | } 67 | 68 | return nil 69 | }, 70 | } 71 | 72 | func init() { 73 | rootCmd.AddCommand(splitCmd) 74 | 75 | splitCmd.Flags().StringP("password", "p", "", "input file password") 76 | } 77 | -------------------------------------------------------------------------------- /internal/cli/passwd.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | ) 15 | 16 | const passwdCmdDesc = `Change owner and user passwords of PDF files.` 17 | 18 | var passwdCmdExample = fmt.Sprintf("%s\n%s\n%s\n", 19 | fmt.Sprintf("%s passwd -p pass input_file.pdf new_owner_pass", appName), 20 | fmt.Sprintf("%s passwd -p pass -o output_file.pdf input_file.pdf new_owner_pass", appName), 21 | fmt.Sprintf("%s passwd -p pass -o output_file.pdf input_file.pdf new_owner_pass new_user_pass", appName), 22 | ) 23 | 24 | // passwdCmd represents the passwd command. 25 | var passwdCmd = &cobra.Command{ 26 | Use: "passwd [FLAG]... INPUT_FILE NEW_OWNER_PASSWORD [NEW_USER_PASSWORD]", 27 | Short: "Change PDF passwords", 28 | Long: passwdCmdDesc, 29 | Example: passwdCmdExample, 30 | DisableFlagsInUseLine: true, 31 | Run: func(cmd *cobra.Command, args []string) { 32 | // Parse input parameters. 33 | inputPath := args[0] 34 | newOwnerPassword := args[1] 35 | ownerPassword, _ := cmd.Flags().GetString("password") 36 | 37 | newUserPassword := "" 38 | if len(args) > 2 { 39 | newUserPassword = args[2] 40 | } 41 | 42 | // Parse output file. 43 | outputPath, _ := cmd.Flags().GetString("output-file") 44 | if outputPath == "" { 45 | outputPath = inputPath 46 | } 47 | 48 | // Change input file password. 49 | err := pdf.Passwd(inputPath, outputPath, ownerPassword, newOwnerPassword, newUserPassword) 50 | if err != nil { 51 | printErr("Could not change input file password: %s\n", err) 52 | } 53 | 54 | fmt.Printf("Password successfully changed\n") 55 | fmt.Printf("Output file saved to %s\n", outputPath) 56 | }, 57 | Args: func(_ *cobra.Command, args []string) error { 58 | if len(args) < 2 { 59 | return errors.New("must provide the input file and the new owner password") 60 | } 61 | 62 | return nil 63 | }, 64 | } 65 | 66 | func init() { 67 | rootCmd.AddCommand(passwdCmd) 68 | 69 | passwdCmd.Flags().StringP("output-file", "o", "", "output file") 70 | passwdCmd.Flags().StringP("password", "p", "", "input file password") 71 | } 72 | -------------------------------------------------------------------------------- /internal/cli/extract_text.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | ) 15 | 16 | const extractTextCmdDesc = `Extracts PDF text. 17 | 18 | The extracted text is always printed to STDOUT. 19 | 20 | The command can be configured to extract text only from the specified pages 21 | using the --pages parameter. 22 | 23 | An example of the pages parameter: 1-3,4,6-7 24 | Text will only be extracted from pages 1,2,3 (1-3), 4 and 6,7 (6-7), while page 25 | number 5 is skipped. 26 | ` 27 | 28 | var extractTextCmdExample = fmt.Sprintf("%s\n%s\n%s\n", 29 | fmt.Sprintf("%s extract text input_file.pdf", appName), 30 | fmt.Sprintf("%s extract text -P 1-3 input_file.pdf", appName), 31 | fmt.Sprintf("%s extract text -P 1-3 -p pass input_file.pdf", appName), 32 | ) 33 | 34 | // extractTextCmd represents the extract text command. 35 | var extractTextCmd = &cobra.Command{ 36 | Use: "text [FLAG]... INPUT_FILE", 37 | Short: "Extract PDF text", 38 | Long: extractTextCmdDesc, 39 | Example: extractTextCmdExample, 40 | DisableFlagsInUseLine: true, 41 | Run: func(cmd *cobra.Command, args []string) { 42 | // Parse input parameters. 43 | inputPath := args[0] 44 | password, _ := cmd.Flags().GetString("password") 45 | 46 | // Parse page range. 47 | pageRange, _ := cmd.Flags().GetString("pages") 48 | 49 | pages, err := parsePageRange(pageRange) 50 | if err != nil { 51 | printUsageErr(cmd, "Invalid page range specified\n") 52 | } 53 | 54 | // Extract text. 55 | text, err := pdf.ExtractText(inputPath, password, pages) 56 | if err != nil { 57 | printErr("Could not extract text: %s\n", err) 58 | } 59 | 60 | fmt.Println(text) 61 | }, 62 | Args: func(_ *cobra.Command, args []string) error { 63 | if len(args) < 1 { 64 | return errors.New("must provide the input file") 65 | } 66 | 67 | return nil 68 | }, 69 | } 70 | 71 | func init() { 72 | extractCmd.AddCommand(extractTextCmd) 73 | 74 | extractTextCmd.Flags().StringP("password", "p", "", "input file password") 75 | extractTextCmd.Flags().StringP("pages", "P", "", "pages to extract text from") 76 | } 77 | -------------------------------------------------------------------------------- /internal/cli/organize.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | ) 15 | 16 | const organizeCmdDesc = `Split PDF files. 17 | 18 | The command is used to organize one or more page ranges from the input file 19 | and save the result as the output file. 20 | If no page range is specified, all the pages from the input file will be 21 | copied to the output file. 22 | 23 | An example of the pages parameter: 1-3,4,6-7 24 | Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be present in the output file, 25 | while page number 5 is skipped. 26 | ` 27 | 28 | var organizeCmdExample = fmt.Sprintf("%s\n%s\n", 29 | fmt.Sprintf("%s organize input_file.pdf output_file.pdf 1-2", appName), 30 | fmt.Sprintf("%s organize -p pass input_file.pd output_file.pdf 1-2,4", appName), 31 | ) 32 | 33 | // organizeCmd represents the split command. 34 | var organizeCmd = &cobra.Command{ 35 | Use: "organize [FLAG]... INPUT_FILE OUTPUT_FILE [PAGES]", 36 | Short: "Organize PDF files", 37 | Long: organizeCmdDesc, 38 | Example: organizeCmdExample, 39 | DisableFlagsInUseLine: true, 40 | Run: func(cmd *cobra.Command, args []string) { 41 | inputPath := args[0] 42 | outputPath := args[1] 43 | password, _ := cmd.Flags().GetString("password") 44 | 45 | // Parse page range. 46 | var err error 47 | var pages []int 48 | 49 | if len(args) > 2 { 50 | if pages, err = parsePageRangeUnsorted(args[2]); err != nil { 51 | printUsageErr(cmd, "Invalid page range specified\n") 52 | } 53 | } 54 | 55 | if err := pdf.Organize(inputPath, outputPath, password, pages); err != nil { 56 | printErr("Error: %s\n", err) 57 | } 58 | 59 | fmt.Printf("Successfully organized file %s\n", inputPath) 60 | fmt.Printf("Output file saved to %s\n", outputPath) 61 | }, 62 | Args: func(_ *cobra.Command, args []string) error { 63 | if len(args) < 2 { 64 | return errors.New("must provide at least the input and output files") 65 | } 66 | 67 | return nil 68 | }, 69 | } 70 | 71 | func init() { 72 | rootCmd.AddCommand(organizeCmd) 73 | 74 | organizeCmd.Flags().StringP("password", "p", "", "input file password") 75 | } 76 | -------------------------------------------------------------------------------- /pkg/pdf/rotate.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | "os" 12 | "path/filepath" 13 | "strings" 14 | 15 | unicreator "github.com/unidoc/unipdf/v4/creator" 16 | ) 17 | 18 | // Rotate rotates the pages of the PDF file specified by the inputPath 19 | // by the angle specified by the angle parameter. The rotated PDF file is saved 20 | // at the location specified by the outputPath parameter. 21 | // A password can be passed in, if the input file is encrypted. 22 | // If the pages parameter is nil or an empty slice, all pages are rotated. 23 | func Rotate(inputPath, outputPath string, angle int, password string, pages []int) (string, error) { 24 | if angle%90 != 0 { 25 | return "", errors.New("rotation angle must be a multiple of 90 degrees") 26 | } 27 | 28 | // Generate output path from the input path, if no output path is specified. 29 | dir, inputFile := filepath.Split(inputPath) 30 | 31 | inputFile = strings.TrimSuffix(inputFile, filepath.Ext(inputFile)) 32 | if outputPath == "" { 33 | outputPath = filepath.Join(dir, fmt.Sprintf("%s_rotated.pdf", inputFile)) 34 | } 35 | 36 | // Read input file. 37 | r, pageCount, _, _, err := readPDF(inputPath, password) 38 | if err != nil { 39 | return "", err 40 | } 41 | 42 | // Prepare output archive. 43 | outputFile, err := os.Create(outputPath) 44 | if err != nil { 45 | return "", err 46 | } 47 | defer outputFile.Close() 48 | 49 | // Rotate pages. 50 | if len(pages) == 0 { 51 | pages = createPageRange(pageCount) 52 | } 53 | 54 | selectedPages := map[int]bool{} 55 | for _, page := range pages { 56 | selectedPages[page] = true 57 | } 58 | 59 | c := unicreator.New() 60 | for i := 0; i < pageCount; i++ { 61 | numPage := i + 1 62 | 63 | page, err := r.GetPage(numPage) 64 | if err != nil { 65 | return "", err 66 | } 67 | 68 | if err = c.AddPage(page); err != nil { 69 | return "", err 70 | } 71 | 72 | rotate := selectedPages[numPage] 73 | if !rotate || angle == 0 { 74 | continue 75 | } 76 | 77 | if err = c.RotateDeg(int64(angle)); err != nil { 78 | return "", err 79 | } 80 | } 81 | 82 | // Add forms. 83 | if r.AcroForm != nil { 84 | c.SetForms(r.AcroForm) 85 | } 86 | 87 | // Write output file. 88 | safe := inputPath == outputPath 89 | return outputPath, writeCreatorPDF(outputPath, c, safe) 90 | } 91 | -------------------------------------------------------------------------------- /internal/cli/root.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "os" 10 | 11 | "github.com/spf13/cobra" 12 | "github.com/unidoc/unipdf-cli/pkg/pdf" 13 | unicommon "github.com/unidoc/unipdf/v4/common" 14 | ) 15 | 16 | const appName = "unipdf" 17 | const appVersion = "0.6.0" 18 | 19 | const rootCmdDesc = ` is a CLI application for working with PDF files. 20 | It supports the most common PDF operations. A full list of the supported 21 | operations can be found below. 22 | 23 | If you have a license for Unidoc, you can set it through the 24 | UNIDOC_LICENSE_FILE and UNIDOC_LICENSE_CUSTOMER environment variables. 25 | 26 | export UNIDOC_LICENSE_FILE="PATH_TO_LICENSE_FILE" 27 | export UNIDOC_LICENSE_CUSTOMER="CUSTOMER_NAME" 28 | 29 | Or alternatively, you can set the Metered API Key license through the 30 | UNIDOC_LICENSE_API_KEY environment variable. 31 | 32 | export UNIDOC_LICENSE_API_KEY="YOUR_API_KEY_HERE" 33 | 34 | By default, the application only displays error messages on command execution 35 | failure. To change the verbosity of the output, set the UNIDOC_LOG_LEVEL 36 | environment variable. 37 | 38 | export UNIDOC_LOG_LEVEL="DEBUG" 39 | 40 | Supported log levels: trace, debug, info, notice, warning, error (default) 41 | ` 42 | 43 | var rootCmd = &cobra.Command{ 44 | Use: appName, 45 | Long: appName + rootCmdDesc, 46 | } 47 | 48 | // Execute represents the entry point of the application. 49 | // The method parses the command line arguments and executes the appropriate 50 | // action. 51 | func Execute() { 52 | readEnv() 53 | 54 | if err := rootCmd.Execute(); err != nil { 55 | printErr("%s\n", err) 56 | } 57 | } 58 | 59 | func readEnv() { 60 | // Set license key. 61 | licensePath := os.Getenv("UNIDOC_LICENSE_FILE") 62 | licenseCustomer := os.Getenv("UNIDOC_LICENSE_CUSTOMER") 63 | if licensePath != "" { 64 | pdf.SetLicense(licensePath, licenseCustomer) 65 | } 66 | 67 | // OR... alternatively... load a License API key. 68 | 69 | // Set license key using metered api key. 70 | licenseMeteredKey := os.Getenv("UNIDOC_LICENSE_API_KEY") 71 | if licenseMeteredKey != "" { 72 | pdf.SetMeteredKey(licenseMeteredKey) 73 | } 74 | 75 | // Set log level. 76 | logLevel, err := parseLogLevel(os.Getenv("UNIDOC_LOG_LEVEL")) 77 | if err != nil { 78 | logLevel = unicommon.LogLevelError 79 | } 80 | 81 | pdf.SetLogLevel(logLevel) 82 | } 83 | -------------------------------------------------------------------------------- /internal/cli/replace.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | ) 15 | 16 | const replaceCmdDesc = `Replace text in PDF files` 17 | 18 | var replaceCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n", 19 | fmt.Sprintf("%s replace input_file.pdf text_to_search", appName), 20 | fmt.Sprintf("%s replace -o output_file input_file.pdf text_to_search", appName), 21 | fmt.Sprintf("%s replace -o output_file -r new_text input_file.pdf text_to_search", appName), 22 | fmt.Sprintf("%s replace -o output_file -r new_text -p pass input_file.pdf text_to_search", appName), 23 | ) 24 | 25 | // replaceCmd represents the replace command. 26 | var replaceCmd = &cobra.Command{ 27 | Use: "replace [FLAG]... INPUT_FILE TEXT", 28 | Short: "Replace text in PDF files", 29 | Long: replaceCmdDesc, 30 | Example: replaceCmdExample, 31 | DisableFlagsInUseLine: true, 32 | Run: func(cmd *cobra.Command, args []string) { 33 | // Parse input parameters. 34 | inputPath := args[0] 35 | text := args[1] 36 | password, _ := cmd.Flags().GetString("password") 37 | 38 | // Parse output file. 39 | outputPath, _ := cmd.Flags().GetString("output-file") 40 | if outputPath == "" { 41 | outputPath = inputPath 42 | } 43 | 44 | // Parse replaceText. 45 | replaceText, _ := cmd.Flags().GetString("replace-text") 46 | if replaceText == "" { 47 | replaceText = text 48 | } 49 | 50 | // Search text. 51 | err := pdf.Replace(inputPath, outputPath, text, replaceText, password) 52 | if err != nil { 53 | printErr("Could not replace the specified text: %s\n", err) 54 | } 55 | 56 | fmt.Printf("Successfully replaced text %s with %s\n", text, replaceText) 57 | fmt.Printf("Output file saved to %s\n", outputPath) 58 | }, 59 | Args: func(_ *cobra.Command, args []string) error { 60 | if len(args) < 2 { 61 | return errors.New("must provide a PDF file and the text to search") 62 | } 63 | 64 | return nil 65 | }, 66 | } 67 | 68 | func init() { 69 | rootCmd.AddCommand(replaceCmd) 70 | 71 | replaceCmd.Flags().StringP("output-file", "o", "", "output file") 72 | replaceCmd.Flags().StringP("replace-text", "r", "", "replacement text") 73 | replaceCmd.Flags().StringP("password", "p", "", "input file password") 74 | } 75 | -------------------------------------------------------------------------------- /internal/cli/info.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | ) 15 | 16 | const infoCmdDesc = `Outputs information about the input file. 17 | Also provides basic validation. 18 | ` 19 | 20 | var infoCmdExample = fmt.Sprintf("%s\n%s\n", 21 | fmt.Sprintf("%s info input_file.pdf", appName), 22 | fmt.Sprintf("%s info -p pass input_file.pdf", appName), 23 | ) 24 | 25 | // infoCmd represents the info command. 26 | var infoCmd = &cobra.Command{ 27 | Use: "info [FLAG]... INPUT_FILE", 28 | Short: "Output PDF information", 29 | Long: infoCmdDesc, 30 | Example: infoCmdExample, 31 | DisableFlagsInUseLine: true, 32 | Run: func(cmd *cobra.Command, args []string) { 33 | inputFile := args[0] 34 | password, _ := cmd.Flags().GetString("password") 35 | 36 | info, err := pdf.Info(inputFile, password) 37 | if err != nil { 38 | printErr("Could not retrieve input file information: %s\n", err) 39 | } 40 | 41 | // Print basic PDF info 42 | fmt.Println("Info") 43 | fmt.Printf("Name: %s\n", inputFile) 44 | fmt.Printf("Size: %d bytes\n", info.Size) 45 | fmt.Printf("Pages: %d\n", info.Pages) 46 | fmt.Printf("PDF Version: %s\n", info.Version) 47 | 48 | if info.Encrypted { 49 | fmt.Printf("Encryption: encrypted with %s algorithm\n", info.EncryptionAlgo) 50 | } else { 51 | fmt.Println("Encryption: none") 52 | } 53 | 54 | // Print PDF objects 55 | fmt.Println("\nObjects") 56 | 57 | var malicious bool 58 | for key, val := range info.Objects { 59 | maliciousStr := "" 60 | if key == "JavaScript" || key == "Flash" || key == "Video" { 61 | maliciousStr = " (potentially malicious)" 62 | malicious = true 63 | } 64 | 65 | fmt.Printf("%s objects: %d%s\n", key, val, maliciousStr) 66 | } 67 | 68 | if malicious { 69 | fmt.Println("\nFile contains potentially malicious objects!") 70 | } else { 71 | fmt.Println("\nFile is safe") 72 | } 73 | }, 74 | Args: func(_ *cobra.Command, args []string) error { 75 | if len(args) < 1 { 76 | return errors.New("must provide the input file") 77 | } 78 | 79 | return nil 80 | }, 81 | } 82 | 83 | func init() { 84 | rootCmd.AddCommand(infoCmd) 85 | 86 | infoCmd.Flags().StringP("password", "p", "", "input file password") 87 | } 88 | -------------------------------------------------------------------------------- /internal/cli/form_export.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | "os" 12 | 13 | "github.com/spf13/cobra" 14 | "github.com/unidoc/unipdf-cli/pkg/pdf" 15 | ) 16 | 17 | const formExportCmdDesc = `Export JSON representation of form fields. 18 | 19 | By default, the resulting JSON content is printed to STDOUT. The output can be 20 | saved to a file by using the --output-file flag (see the Examples section). 21 | 22 | The exported JSON template can be used to fill PDF forms using the 23 | "form fill" command. 24 | ` 25 | 26 | var formExportCmdExample = fmt.Sprintf("%s\n%s\n%s\n", 27 | fmt.Sprintf("%s form export in_file.pdf", appName), 28 | fmt.Sprintf("%s form export in_file.pdf > out_file.json", appName), 29 | fmt.Sprintf("%s form export -o out_file.json in_file.pdf", appName), 30 | ) 31 | 32 | // formExportCmd represents the form export command. 33 | var formExportCmd = &cobra.Command{ 34 | Use: "export [FLAG]... INPUT_FILE", 35 | Short: "Export form fields as JSON", 36 | Long: formExportCmdDesc, 37 | Example: formExportCmdExample, 38 | DisableFlagsInUseLine: true, 39 | Run: func(cmd *cobra.Command, args []string) { 40 | // Parse input parameters. 41 | inputPath := args[0] 42 | outputPath, _ := cmd.Flags().GetString("output-file") 43 | 44 | // Export form fields. 45 | json, err := pdf.FormExport(inputPath) 46 | if err != nil { 47 | printErr("Could not export form fields: %s\n", err) 48 | return 49 | } 50 | if json == "" { 51 | fmt.Println("Could not find any form fields to export.") 52 | return 53 | } 54 | 55 | // Write exported data. 56 | if outputPath == "" { 57 | fmt.Println(json) 58 | return 59 | } 60 | 61 | // #nosec G306 62 | err = os.WriteFile(outputPath, []byte(json), 0644) 63 | if err != nil { 64 | printErr("Could not export form fields: %s\n", err) 65 | } 66 | 67 | fmt.Printf("Form fields successfully exported from %s\n", inputPath) 68 | fmt.Printf("Output file saved to %s\n", outputPath) 69 | }, 70 | Args: func(_ *cobra.Command, args []string) error { 71 | if len(args) < 1 { 72 | return errors.New("must provide the input file") 73 | } 74 | 75 | return nil 76 | }, 77 | } 78 | 79 | func init() { 80 | formCmd.AddCommand(formExportCmd) 81 | 82 | formExportCmd.Flags().StringP("output-file", "o", "", "output file") 83 | } 84 | -------------------------------------------------------------------------------- /internal/cli/const.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "strings" 11 | 12 | unicommon "github.com/unidoc/unipdf/v4/common" 13 | unisecurity "github.com/unidoc/unipdf/v4/core/security" 14 | unipdf "github.com/unidoc/unipdf/v4/model" 15 | ) 16 | 17 | var encryptAlgoMap = map[string]unipdf.EncryptionAlgorithm{ 18 | "rc4": unipdf.RC4_128bit, 19 | "aes128": unipdf.AES_128bit, 20 | "aes256": unipdf.AES_256bit, 21 | } 22 | 23 | var logLevelMap = map[string]unicommon.LogLevel{ 24 | "trace": unicommon.LogLevelTrace, 25 | "debug": unicommon.LogLevelDebug, 26 | "info": unicommon.LogLevelInfo, 27 | "notice": unicommon.LogLevelNotice, 28 | "warning": unicommon.LogLevelWarning, 29 | "error": unicommon.LogLevelError, 30 | } 31 | 32 | var imageFormats = map[string]struct{}{ 33 | "jpeg": struct{}{}, 34 | "png": struct{}{}, 35 | } 36 | 37 | func parseEncryptionMode(mode string) (unipdf.EncryptionAlgorithm, error) { 38 | algo, ok := encryptAlgoMap[mode] 39 | if !ok { 40 | return 0, errors.New("invalid encryption mode") 41 | } 42 | 43 | return algo, nil 44 | } 45 | 46 | func parseLogLevel(levelStr string) (unicommon.LogLevel, error) { 47 | levelStr = strings.TrimSpace(levelStr) 48 | if levelStr == "" { 49 | return unicommon.LogLevelError, nil 50 | } 51 | 52 | level, ok := logLevelMap[levelStr] 53 | if !ok { 54 | return 0, errors.New("invalid log level") 55 | } 56 | 57 | return level, nil 58 | } 59 | 60 | func parsePermissionList(permStr string) (unisecurity.Permissions, error) { 61 | permStr = removeSpaces(permStr) 62 | if permStr == "" { 63 | return 0, nil 64 | } 65 | permList := strings.Split(permStr, ",") 66 | 67 | perms := unisecurity.Permissions(0) 68 | for _, perm := range permList { 69 | if perm == "" { 70 | continue 71 | } 72 | 73 | switch perm { 74 | case "all": 75 | perms = unisecurity.PermOwner 76 | case "none": 77 | perms = unisecurity.Permissions(0) 78 | case "print-low-res": 79 | perms |= unisecurity.PermPrinting 80 | case "print-high-res": 81 | perms |= unisecurity.PermFullPrintQuality 82 | case "modify": 83 | perms |= unisecurity.PermModify 84 | case "extract-graphics": 85 | perms |= unisecurity.PermExtractGraphics 86 | case "annotate": 87 | perms |= unisecurity.PermAnnotate 88 | case "fill-forms": 89 | perms |= unisecurity.PermFillForms 90 | case "rotate": 91 | perms |= unisecurity.PermRotateInsert 92 | 93 | default: 94 | return 0, errors.New("invalid permission") 95 | } 96 | } 97 | 98 | return perms, nil 99 | } 100 | -------------------------------------------------------------------------------- /internal/cli/grayscale.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | ) 15 | 16 | const grayscaleCmdDesc = `Converts the input file to grayscale. 17 | 18 | The command can be configured to convert only the specified 19 | pages to grayscale using the --pages parameter. 20 | 21 | An example of the pages parameter: 1-3,4,6-7 22 | Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be converted to grayscale, while 23 | page number 5 is skipped. 24 | ` 25 | 26 | var grayscaleCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n", 27 | fmt.Sprintf("%s grayscale input_file.pdf", appName), 28 | fmt.Sprintf("%s grayscale -o output_file input_file.pdf", appName), 29 | fmt.Sprintf("%s grayscale -o output_file -P 1-3 input_file.pdf", appName), 30 | fmt.Sprintf("%s grayscale -o output_file -P 1-3 -p pass input_file.pdf", appName), 31 | ) 32 | 33 | // grayscaleCmd represents the grayscale command. 34 | var grayscaleCmd = &cobra.Command{ 35 | Use: "grayscale [FLAG]... INPUT_FILE", 36 | Short: "Convert PDF to grayscale", 37 | Long: grayscaleCmdDesc, 38 | Example: grayscaleCmdExample, 39 | DisableFlagsInUseLine: true, 40 | Run: func(cmd *cobra.Command, args []string) { 41 | // Parse input parameters. 42 | inputPath := args[0] 43 | password, _ := cmd.Flags().GetString("password") 44 | 45 | // Parse output file. 46 | outputPath, _ := cmd.Flags().GetString("output-file") 47 | if outputPath == "" { 48 | outputPath = inputPath 49 | } 50 | 51 | // Parse page range. 52 | pageRange, _ := cmd.Flags().GetString("pages") 53 | 54 | pages, err := parsePageRange(pageRange) 55 | if err != nil { 56 | printUsageErr(cmd, "Invalid page range specified\n") 57 | } 58 | 59 | // Convert file to grayscale. 60 | err = pdf.Grayscale(inputPath, outputPath, password, pages) 61 | if err != nil { 62 | printErr("Could not convert input file to grayscale: %s\n", err) 63 | } 64 | 65 | fmt.Printf("Successfully converted %s to grayscale\n", inputPath) 66 | fmt.Printf("Output file saved to %s\n", outputPath) 67 | }, 68 | Args: func(_ *cobra.Command, args []string) error { 69 | if len(args) < 1 { 70 | return errors.New("must provide the input file") 71 | } 72 | 73 | return nil 74 | }, 75 | } 76 | 77 | func init() { 78 | rootCmd.AddCommand(grayscaleCmd) 79 | 80 | grayscaleCmd.Flags().StringP("output-file", "o", "", "output file") 81 | grayscaleCmd.Flags().StringP("password", "p", "", "input file password") 82 | grayscaleCmd.Flags().StringP("pages", "P", "", "pages to convert to grayscale") 83 | } 84 | -------------------------------------------------------------------------------- /internal/cli/explode.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | ) 15 | 16 | const explodeCmdDesc = `Splits the input file into separate single page PDF files. 17 | 18 | The resulting PDF files are saved in a ZIP archive at the location specified 19 | by the --output-file parameter. If no output file is specified, the ZIP file 20 | is saved in the same directory as the input file. 21 | 22 | The command can be configured to extract only the specified pages using 23 | the --pages parameter. 24 | 25 | An example of the pages parameter: 1-3,4,6-7 26 | Pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be extracted, while page 27 | number 5 is skipped. 28 | ` 29 | 30 | var explodeCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n", 31 | fmt.Sprintf("%s explode input_file.pdf", appName), 32 | fmt.Sprintf("%s explode -o pages.zip input_file.pdf", appName), 33 | fmt.Sprintf("%s explode -o pages.zip -P 1-3 input_file.pdf", appName), 34 | fmt.Sprintf("%s explode -o pages.zip -P 1-3 -p pass input_file.pdf", appName), 35 | ) 36 | 37 | // explodeCmd represents the explode command. 38 | var explodeCmd = &cobra.Command{ 39 | Use: "explode [FLAG]... INPUT_FILE", 40 | Short: "Explodes the input file into separate single page PDF files", 41 | Long: explodeCmdDesc, 42 | Example: explodeCmdExample, 43 | DisableFlagsInUseLine: true, 44 | Run: func(cmd *cobra.Command, args []string) { 45 | // Parse input parameters. 46 | inputPath := args[0] 47 | password, _ := cmd.Flags().GetString("password") 48 | outputPath, _ := cmd.Flags().GetString("output-file") 49 | 50 | // Parse page range. 51 | pageRange, _ := cmd.Flags().GetString("pages") 52 | 53 | pages, err := parsePageRange(pageRange) 54 | if err != nil { 55 | printUsageErr(cmd, "Invalid page range specified\n") 56 | } 57 | 58 | // Explode file. 59 | outputPath, err = pdf.Explode(inputPath, outputPath, password, pages) 60 | if err != nil { 61 | printErr("Could not explode input file: %s\n", err) 62 | return 63 | } 64 | 65 | fmt.Printf("File %s successfully exploded\n", inputPath) 66 | fmt.Printf("Output file saved to %s\n", outputPath) 67 | }, 68 | Args: func(_ *cobra.Command, args []string) error { 69 | if len(args) < 1 { 70 | return errors.New("must provide the input file") 71 | } 72 | 73 | return nil 74 | }, 75 | } 76 | 77 | func init() { 78 | rootCmd.AddCommand(explodeCmd) 79 | 80 | explodeCmd.Flags().StringP("password", "p", "", "input file password") 81 | explodeCmd.Flags().StringP("output-file", "o", "", "output file") 82 | explodeCmd.Flags().StringP("pages", "P", "", "pages to extract from the input file") 83 | } 84 | -------------------------------------------------------------------------------- /internal/cli/watermark.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | ) 15 | 16 | const watermarkCmdDesc = `Add watermark to PDF files. 17 | 18 | The command can be configured to apply the watermark image only to the specified 19 | pages using the --pages parameter. 20 | 21 | An example of the pages parameter: 1-3,4,6-7 22 | Watermark will only be applied to pages 1,2,3 (1-3), 4 and 6,7 (6-7), while page 23 | number 5 is skipped. 24 | ` 25 | 26 | var watermarkCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n", 27 | fmt.Sprintf("%s watermark input_file.pdf watermark.png", appName), 28 | fmt.Sprintf("%s watermark -o output file.png input_file.pdf watermark.png", appName), 29 | fmt.Sprintf("%s watermark -o output file.png -P 1-3 input_file.pdf watermark.png", appName), 30 | fmt.Sprintf("%s watermark -o output file.png -P 1-3 -p pass input_file.pdf watermark.png", appName), 31 | ) 32 | 33 | // watermarkCmd represents the watermark command. 34 | var watermarkCmd = &cobra.Command{ 35 | Use: "watermark [FLAG]... INPUT_FILE WATERMARK_IMAGE", 36 | Short: "Add watermark to PDF files", 37 | Long: watermarkCmdDesc, 38 | Example: watermarkCmdExample, 39 | DisableFlagsInUseLine: true, 40 | Run: func(cmd *cobra.Command, args []string) { 41 | // Parse input parameters. 42 | inputPath := args[0] 43 | watermarkPath := args[1] 44 | password, _ := cmd.Flags().GetString("password") 45 | 46 | // Parse output file. 47 | outputPath, _ := cmd.Flags().GetString("output-file") 48 | if outputPath == "" { 49 | outputPath = inputPath 50 | } 51 | 52 | // Parse page range. 53 | pageRange, _ := cmd.Flags().GetString("pages") 54 | 55 | pages, err := parsePageRange(pageRange) 56 | if err != nil { 57 | printUsageErr(cmd, "Invalid page range specified\n") 58 | } 59 | 60 | // Apply watermark. 61 | err = pdf.Watermark(inputPath, outputPath, watermarkPath, password, pages) 62 | if err != nil { 63 | printErr("Could not apply watermark to the input file: %s\n", err) 64 | } 65 | 66 | fmt.Printf("Watermark successfully applied to %s\n", inputPath) 67 | fmt.Printf("Output file saved to %s\n", outputPath) 68 | }, 69 | Args: func(_ *cobra.Command, args []string) error { 70 | if len(args) < 2 { 71 | return errors.New("must provide the input file and the watermark image") 72 | } 73 | 74 | return nil 75 | }, 76 | } 77 | 78 | func init() { 79 | rootCmd.AddCommand(watermarkCmd) 80 | 81 | watermarkCmd.Flags().StringP("output-file", "o", "", "output file") 82 | watermarkCmd.Flags().StringP("password", "p", "", "input file password") 83 | watermarkCmd.Flags().StringP("pages", "P", "", "pages on which to add watermark") 84 | } 85 | -------------------------------------------------------------------------------- /internal/cli/rotate.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | "strconv" 12 | 13 | "github.com/spf13/cobra" 14 | "github.com/unidoc/unipdf-cli/pkg/pdf" 15 | ) 16 | 17 | const rotateCmdDesc = `Rotate PDF file pages by a specified angle. 18 | The angle argument is specified in degrees and it must be a multiple of 90. 19 | 20 | The command can be configured to rotate only the specified pages 21 | using the --pages parameter. 22 | 23 | An example of the pages parameter: 1-3,4,6-7 24 | Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be rotated, while 25 | page number 5 is skipped. 26 | ` 27 | 28 | var rotateCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n", 29 | fmt.Sprintf("%s rotate input_file.pdf 90", appName), 30 | fmt.Sprintf("%s rotate -- input_file.pdf -270", appName), 31 | fmt.Sprintf("%s rotate -o output_file.pdf input_file.pdf 90", appName), 32 | fmt.Sprintf("%s rotate -o output_file.pdf -P 1-3 input_file.pdf 90", appName), 33 | fmt.Sprintf("%s rotate -o output_file.pdf -P 1-3 -p pass input_file.pdf 90", appName), 34 | ) 35 | 36 | // rotateCmd represents the rotate command. 37 | var rotateCmd = &cobra.Command{ 38 | Use: "rotate [FLAG]... INPUT_FILE ANGLE", 39 | Short: "Rotate PDF file pages", 40 | Long: rotateCmdDesc, 41 | Example: rotateCmdExample, 42 | DisableFlagsInUseLine: true, 43 | Run: func(cmd *cobra.Command, args []string) { 44 | // Parse input parameters. 45 | inputPath := args[0] 46 | password, _ := cmd.Flags().GetString("password") 47 | 48 | // Parse angle parameter. 49 | angle, err := strconv.Atoi(args[1]) 50 | if err != nil { 51 | printUsageErr(cmd, "Invalid rotation angle specified\n") 52 | } 53 | 54 | // Parse output file. 55 | outputPath, _ := cmd.Flags().GetString("output-file") 56 | if outputPath == "" { 57 | outputPath = inputPath 58 | } 59 | 60 | // Parse page range. 61 | pageRange, _ := cmd.Flags().GetString("pages") 62 | 63 | pages, err := parsePageRange(pageRange) 64 | if err != nil { 65 | printUsageErr(cmd, "Invalid page range specified\n") 66 | } 67 | 68 | // Rotate file. 69 | outputPath, err = pdf.Rotate(inputPath, outputPath, angle, password, pages) 70 | if err != nil { 71 | printErr("Could not rotate input file pages: %s\n", err) 72 | } 73 | 74 | fmt.Printf("Successfully rotated %s\n", inputPath) 75 | fmt.Printf("Output file saved to %s\n", outputPath) 76 | }, 77 | Args: func(_ *cobra.Command, args []string) error { 78 | if len(args) < 2 { 79 | return errors.New("must provide the input file and the rotation angle") 80 | } 81 | 82 | return nil 83 | }, 84 | } 85 | 86 | func init() { 87 | rootCmd.AddCommand(rotateCmd) 88 | 89 | rotateCmd.Flags().StringP("pages", "P", "", "pages to rotate") 90 | rotateCmd.Flags().StringP("output-file", "o", "", "putput file") 91 | rotateCmd.Flags().StringP("password", "p", "", "input file password") 92 | } 93 | -------------------------------------------------------------------------------- /pkg/pdf/optimize.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | "os" 10 | "time" 11 | 12 | unipdf "github.com/unidoc/unipdf/v4/model" 13 | unioptimize "github.com/unidoc/unipdf/v4/model/optimize" 14 | ) 15 | 16 | // OptimizeOpts represents the options used for optimizing PDF files. 17 | type OptimizeOpts struct { 18 | // ImageQuality specifies the quality of the optimized images. 19 | ImageQuality int 20 | 21 | // ImagePPI specifies the maximum pixels per inch of the optimized images. 22 | ImagePPI float64 23 | } 24 | 25 | // OptimizeResult contains information about the optimization process. 26 | type OptimizeResult struct { 27 | // Original contains information about the original file. 28 | Original FileStat 29 | 30 | // Optimized contains information about the optimized file. 31 | Optimized FileStat 32 | 33 | // Duration specifies the optimization processing time in nanoseconds. 34 | Duration time.Duration 35 | } 36 | 37 | // Optimize optimizes the PDF file specified by the inputPath parameter, using 38 | // the provided options and saves the result at the location specified by the 39 | // outputPath parameter. A password can be specified for encrypted input files. 40 | func Optimize(inputPath, outputPath, password string, opts *OptimizeOpts) (*OptimizeResult, error) { 41 | // Initialize starting time. 42 | start := time.Now() 43 | 44 | // Get input file stat. 45 | inputFileInfo, err := os.Stat(inputPath) 46 | if err != nil { 47 | return nil, err 48 | } 49 | 50 | // Read input file. 51 | r, _, _, _, err := readPDF(inputPath, password) 52 | if err != nil { 53 | return nil, err 54 | } 55 | 56 | // Copy input file contents to the output file. 57 | w := unipdf.NewPdfWriter() 58 | if err = readerToWriter(r, &w, nil); err != nil { 59 | return nil, err 60 | } 61 | 62 | // Add optimizer. 63 | if opts == nil { 64 | opts = &OptimizeOpts{ 65 | ImageQuality: 100, 66 | } 67 | } 68 | 69 | w.SetOptimizer(unioptimize.New(unioptimize.Options{ 70 | CombineDuplicateDirectObjects: true, 71 | CombineIdenticalIndirectObjects: true, 72 | CombineDuplicateStreams: true, 73 | CompressStreams: true, 74 | UseObjectStreams: true, 75 | ImageQuality: opts.ImageQuality, 76 | ImageUpperPPI: opts.ImagePPI, 77 | })) 78 | 79 | // Write output file. 80 | safe := inputPath == outputPath 81 | if err = writePDF(outputPath, &w, safe); err != nil { 82 | return nil, err 83 | } 84 | 85 | // Get output file stat. 86 | outputFileInfo, err := os.Stat(outputPath) 87 | if err != nil { 88 | return nil, err 89 | } 90 | 91 | return &OptimizeResult{ 92 | Original: FileStat{ 93 | Name: inputPath, 94 | Size: inputFileInfo.Size(), 95 | }, 96 | Optimized: FileStat{ 97 | Name: outputPath, 98 | Size: outputFileInfo.Size(), 99 | }, 100 | Duration: time.Since(start), 101 | }, nil 102 | } 103 | -------------------------------------------------------------------------------- /pkg/pdf/render.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | "archive/zip" 10 | "fmt" 11 | "image" 12 | "image/jpeg" 13 | "image/png" 14 | "io" 15 | "os" 16 | "path/filepath" 17 | "strings" 18 | 19 | "github.com/unidoc/unipdf/v4/render" 20 | ) 21 | 22 | // RenderOpts represents the options used for rendering PDF pages to images. 23 | type RenderOpts struct { 24 | // ImageFormat specifies the file format of the rendered images. 25 | // Supported formats: jpeg, png. 26 | ImageFormat string 27 | 28 | // ImageQuality specifies the quality of the rendered images. 29 | // Only applies to rendered JPEG images. 30 | ImageQuality int 31 | } 32 | 33 | // Render renders the pages of the PDF file specified by the inputPath parameter 34 | // to image targets. The rendered images are saved as a ZIP archive at the 35 | // location specified by the outputPath parameter. 36 | // A password can be passed in, if the input file is encrypted. 37 | // If the pages parameter is nil or an empty slice, all pages are rendered. 38 | func Render(inputPath, outputPath, password string, pages []int, opts *RenderOpts) (string, error) { 39 | // Use input file directory if no output path is specified. 40 | dir, inputFile := filepath.Split(inputPath) 41 | 42 | inputFile = strings.TrimSuffix(inputFile, filepath.Ext(inputFile)) 43 | if outputPath == "" { 44 | outputPath = filepath.Join(dir, inputFile+".zip") 45 | } 46 | 47 | // Read input file. 48 | r, pageCount, _, _, err := readPDF(inputPath, password) 49 | if err != nil { 50 | return "", err 51 | } 52 | 53 | // Extract pages. 54 | if len(pages) == 0 { 55 | pages = createPageRange(pageCount) 56 | } 57 | 58 | // Create render options, if none are specified. 59 | if opts == nil { 60 | opts = &RenderOpts{ImageFormat: "jpeg", ImageQuality: 100} 61 | } 62 | if opts.ImageQuality < 0 || opts.ImageQuality > 100 { 63 | opts.ImageQuality = 100 64 | } 65 | 66 | // Create image encode function. 67 | var encodeFunc func(w io.Writer, img image.Image) error 68 | imgExt := "jpg" 69 | 70 | switch opts.ImageFormat { 71 | case "jpeg": 72 | encodeFunc = func(w io.Writer, img image.Image) error { 73 | return jpeg.Encode(w, img, &jpeg.Options{Quality: opts.ImageQuality}) 74 | } 75 | case "png": 76 | imgExt = "png" 77 | encodeFunc = func(w io.Writer, img image.Image) error { 78 | return png.Encode(w, img) 79 | } 80 | default: 81 | return "", fmt.Errorf("unsupported image format: %s", opts.ImageFormat) 82 | } 83 | 84 | // Prepare output archive. 85 | outputFile, err := os.Create(outputPath) 86 | if err != nil { 87 | return "", err 88 | } 89 | defer outputFile.Close() 90 | 91 | zw := zip.NewWriter(outputFile) 92 | 93 | // Render pages. 94 | device := render.NewImageDevice() 95 | for _, numPage := range pages { 96 | // Get page. 97 | page, err := r.GetPage(numPage) 98 | if err != nil { 99 | return "", err 100 | } 101 | 102 | // Render page to image. 103 | img, err := device.Render(page) 104 | if err != nil { 105 | return "", err 106 | } 107 | 108 | // Add rendered image to zip file. 109 | file, err := zw.Create(fmt.Sprintf("%s_%d.%s", inputFile, numPage, imgExt)) 110 | if err != nil { 111 | return "", err 112 | } 113 | if err := encodeFunc(file, img); err != nil { 114 | return "", err 115 | } 116 | } 117 | 118 | return outputPath, zw.Close() 119 | } 120 | -------------------------------------------------------------------------------- /internal/cli/extract_images.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | 15 | uniextractor "github.com/unidoc/unipdf/v4/extractor" 16 | ) 17 | 18 | const extractImagesCmdDesc = `Extracts PDF images. 19 | 20 | The images are extracted in a ZIP file and saved at the destination specified 21 | by the --output-file parameter. If no output file is specified, the ZIP 22 | archive is saved in the same directory as the input file. 23 | 24 | The command can be configured to extract images only from the specified 25 | pages using the --pages parameter. 26 | 27 | An example of the pages parameter: 1-3,4,6-7 28 | Images will only be extracted from pages 1,2,3 (1-3), 4 and 6,7 (6-7), while page 29 | number 5 is skipped. 30 | ` 31 | 32 | var extractImagesCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n", 33 | fmt.Sprintf("%s extract images input_file.pdf", appName), 34 | fmt.Sprintf("%s extract images -o images.zip input_file.pdf", appName), 35 | fmt.Sprintf("%s extract images -P 1-3 -p pass -o images.zip input_file.pdf", appName), 36 | fmt.Sprintf("%s extract images -P 1-3 -p pass -o images.zip -S input_file.pdf", appName), 37 | ) 38 | 39 | // extractImagesCmd represents the extract images command. 40 | var extractImagesCmd = &cobra.Command{ 41 | Use: "images [FLAG]... INPUT_FILE", 42 | Short: "Extract PDF images", 43 | Long: extractImagesCmdDesc, 44 | Example: extractImagesCmdExample, 45 | DisableFlagsInUseLine: true, 46 | Run: func(cmd *cobra.Command, args []string) { 47 | // Parse input parameters. 48 | inputPath := args[0] 49 | password, _ := cmd.Flags().GetString("password") 50 | outputPath, _ := cmd.Flags().GetString("output-file") 51 | 52 | // Parse image extraction options. 53 | includeSM, _ := cmd.Flags().GetBool("include-inline-stencil-masks") 54 | 55 | extractOptions := &uniextractor.ImageExtractOptions{ 56 | IncludeInlineStencilMasks: includeSM, 57 | } 58 | 59 | // Parse page range. 60 | pageRange, _ := cmd.Flags().GetString("pages") 61 | 62 | pages, err := parsePageRange(pageRange) 63 | if err != nil { 64 | printUsageErr(cmd, "Invalid page range specified\n") 65 | } 66 | 67 | // Extract images. 68 | outputPath, count, err := pdf.ExtractImages( 69 | inputPath, 70 | outputPath, 71 | password, 72 | pages, 73 | extractOptions, 74 | ) 75 | if err != nil { 76 | printErr("Could not extract images: %s\n", err) 77 | return 78 | } 79 | 80 | if count == 0 { 81 | fmt.Printf("%s does not contain any images to extract\n", inputPath) 82 | } else { 83 | fmt.Printf("Images successfully extracted to %s\n", outputPath) 84 | } 85 | }, 86 | Args: func(_ *cobra.Command, args []string) error { 87 | if len(args) < 1 { 88 | return errors.New("must provide the input file") 89 | } 90 | 91 | return nil 92 | }, 93 | } 94 | 95 | func init() { 96 | extractCmd.AddCommand(extractImagesCmd) 97 | 98 | extractImagesCmd.Flags().StringP("password", "p", "", "input file password") 99 | extractImagesCmd.Flags().StringP("output-file", "o", "", "output file") 100 | extractImagesCmd.Flags().StringP("pages", "P", "", "pages to extract images from") 101 | extractImagesCmd.Flags().BoolP("include-inline-stencil-masks", "S", false, "include inline stencil masks") 102 | } 103 | -------------------------------------------------------------------------------- /internal/cli/encrypt.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | ) 15 | 16 | const encryptCmdDesc = `Encrypts the input file using the specified owner password. 17 | 18 | The algorithm used for the file encryption is configurable. 19 | 20 | Supported encryption algorithms: 21 | - rc4 (default) 22 | - aes128 23 | - aes256 24 | 25 | A user password along with a set of permissions can also be specified. 26 | 27 | Supported user permissions: 28 | - all (default) 29 | - none 30 | - print-low-res 31 | - print-high-res 32 | - modify 33 | - extract 34 | - extract-graphics 35 | - annotate 36 | - fill-forms 37 | - rotate 38 | ` 39 | 40 | var encryptCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n", 41 | fmt.Sprintf("%s encrypt input_file.pdf owner_pass", appName), 42 | fmt.Sprintf("%s encrypt input_file.pdf owner_pass user_pass", appName), 43 | fmt.Sprintf("%s encrypt -o output_file.pdf -m aes256 input_file.pdf owner_pass user_pass", appName), 44 | fmt.Sprintf("%s encrypt -o output_file.pdf -P none -m aes256 input_file.pdf owner_pass user_pass", appName), 45 | fmt.Sprintf("%s encrypt -o output_file.pdf -P modify,annotate -m aes256 input_file.pdf owner_pass user_pass", appName), 46 | ) 47 | 48 | // encryptCmd represents the encrypt command. 49 | var encryptCmd = &cobra.Command{ 50 | Use: "encrypt [FLAG]... INPUT_FILE OWNER_PASSWORD [USER_PASSWORD]", 51 | Short: "Encrypt PDF files", 52 | Long: encryptCmdDesc, 53 | Example: encryptCmdExample, 54 | DisableFlagsInUseLine: true, 55 | Run: func(cmd *cobra.Command, args []string) { 56 | // Parse input parameters. 57 | inputPath := args[0] 58 | ownerPassword := args[1] 59 | 60 | // Parse user password. 61 | var userPassword string 62 | if len(args) > 2 { 63 | userPassword = args[2] 64 | } 65 | 66 | // Parse output file. 67 | outputPath, _ := cmd.Flags().GetString("output-file") 68 | if outputPath == "" { 69 | outputPath = inputPath 70 | } 71 | 72 | // Parse encryption mode. 73 | mode, _ := cmd.Flags().GetString("mode") 74 | 75 | algorithm, err := parseEncryptionMode(mode) 76 | if err != nil { 77 | printUsageErr(cmd, "Invalid encryption mode\n") 78 | } 79 | 80 | // Parse user permissions. 81 | permList, _ := cmd.Flags().GetString("perms") 82 | 83 | perms, err := parsePermissionList(permList) 84 | if err != nil { 85 | printUsageErr(cmd, "Invalid user permission values\n") 86 | } 87 | 88 | opts := &pdf.EncryptOpts{ 89 | OwnerPassword: ownerPassword, 90 | UserPassword: userPassword, 91 | Algorithm: algorithm, 92 | Permissions: perms, 93 | } 94 | 95 | // Encrypt file. 96 | if err := pdf.Encrypt(inputPath, outputPath, opts); err != nil { 97 | printErr("Could not encrypt file: %s\n", err) 98 | } 99 | 100 | fmt.Printf("File %s successfully encrypted\n", inputPath) 101 | fmt.Printf("Output file saved to %s\n", outputPath) 102 | }, 103 | Args: func(_ *cobra.Command, args []string) error { 104 | if len(args) < 2 { 105 | return errors.New("must provide the input file and the owner password") 106 | } 107 | 108 | return nil 109 | }, 110 | } 111 | 112 | func init() { 113 | rootCmd.AddCommand(encryptCmd) 114 | 115 | encryptCmd.Flags().StringP("output-file", "o", "", "output file") 116 | encryptCmd.Flags().StringP("perms", "P", "all", "user permissions") 117 | encryptCmd.Flags().StringP("mode", "m", "rc4", "algorithm to use for encrypting the file") 118 | } 119 | -------------------------------------------------------------------------------- /internal/cli/render.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | "github.com/spf13/cobra" 13 | "github.com/unidoc/unipdf-cli/pkg/pdf" 14 | ) 15 | 16 | const renderCmdDesc = `Renders the pages of the input file to image targets. 17 | 18 | The rendered image files are saved in a ZIP archive at the location specified 19 | by the --output-file parameter. If no output file is specified, the ZIP file 20 | is saved in the same directory as the input file. 21 | 22 | The command can be configured to render only the specified pages using 23 | the --pages parameter. 24 | 25 | An example of the pages parameter: 1-3,4,6-7 26 | Pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be rendered, while page 27 | number 5 is skipped. 28 | 29 | The format of the rendered image files can be specified using 30 | the --image-format flag (default jpeg). 31 | 32 | Supported image formats: 33 | - jpeg (default) 34 | - png 35 | 36 | The quality of the rendered image files can be configured through 37 | the --image-quality flag (default 100). Only applies to JPEG images. 38 | ` 39 | 40 | var renderCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n", 41 | fmt.Sprintf("%s render input_file.pdf", appName), 42 | fmt.Sprintf("%s render -o images.zip input_file.pdf", appName), 43 | fmt.Sprintf("%s render -o images.zip -P 1-3 input_file.pdf", appName), 44 | fmt.Sprintf("%s render -o images.zip -P 1-3 -p pass input_file.pdf", appName), 45 | fmt.Sprintf("%s render -o images.zip -P 1-3 -p pass -f jpeg -q 100 input_file.pdf", appName), 46 | ) 47 | 48 | // renderCmd represents the render command. 49 | var renderCmd = &cobra.Command{ 50 | Use: "render [FLAG]... INPUT_FILE", 51 | Short: "Render PDF pages to images", 52 | Long: renderCmdDesc, 53 | Example: renderCmdExample, 54 | DisableFlagsInUseLine: true, 55 | Run: func(cmd *cobra.Command, args []string) { 56 | // Parse input parameters. 57 | inputPath := args[0] 58 | password, _ := cmd.Flags().GetString("password") 59 | outputPath, _ := cmd.Flags().GetString("output-file") 60 | 61 | // Parse page range. 62 | pageRange, _ := cmd.Flags().GetString("pages") 63 | 64 | pages, err := parsePageRange(pageRange) 65 | if err != nil { 66 | printUsageErr(cmd, "Invalid page range specified\n") 67 | } 68 | 69 | // Parse render options. 70 | imageFormat, _ := cmd.Flags().GetString("image-format") 71 | if _, ok := imageFormats[imageFormat]; !ok { 72 | imageFormat = "jpeg" 73 | } 74 | 75 | imageQuality, err := cmd.Flags().GetInt("image-quality") 76 | if err != nil { 77 | imageQuality = 100 78 | } 79 | 80 | opts := &pdf.RenderOpts{ 81 | ImageFormat: imageFormat, 82 | ImageQuality: imageQuality, 83 | } 84 | 85 | // Render file. 86 | outputPath, err = pdf.Render(inputPath, outputPath, password, pages, opts) 87 | if err != nil { 88 | printErr("Could not render input file: %s\n", err) 89 | return 90 | } 91 | 92 | fmt.Printf("File %s successfully rendered\n", inputPath) 93 | fmt.Printf("Output file saved to %s\n", outputPath) 94 | }, 95 | Args: func(_ *cobra.Command, args []string) error { 96 | if len(args) < 1 { 97 | return errors.New("must provide the input file") 98 | } 99 | 100 | return nil 101 | }, 102 | } 103 | 104 | func init() { 105 | rootCmd.AddCommand(renderCmd) 106 | 107 | renderCmd.Flags().StringP("password", "p", "", "input file password") 108 | renderCmd.Flags().StringP("output-file", "o", "", "output file") 109 | renderCmd.Flags().StringP("pages", "P", "", "pages to render from the input file") 110 | renderCmd.Flags().StringP("image-format", "f", "jpeg", "format of the output images") 111 | renderCmd.Flags().IntP("image-quality", "q", 100, "quality of the output images") 112 | } 113 | -------------------------------------------------------------------------------- /pkg/pdf/form.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | "github.com/unidoc/unipdf/v4/annotator" 10 | "github.com/unidoc/unipdf/v4/fdf" 11 | "github.com/unidoc/unipdf/v4/fjson" 12 | unipdf "github.com/unidoc/unipdf/v4/model" 13 | ) 14 | 15 | // FormExport exports all form field values from the PDF file specified 16 | // by the inputPath parameters, as JSON. 17 | func FormExport(inputPath string) (string, error) { 18 | fieldData, err := fjson.LoadFromPDFFile(inputPath) 19 | if err != nil { 20 | return "", err 21 | } 22 | if fieldData == nil { 23 | return "", nil 24 | } 25 | 26 | return fieldData.JSON() 27 | } 28 | 29 | // FormFillJSON fills the form field values from the PDF file specified by the 30 | // inputPath parameter, using the values from the JSON file specified by the 31 | // jsonPath parameter. The output PDF file is saved at the location specified 32 | // by the outputPath parameter. The output file form annotations can be 33 | // flattened by using the flatten parameter. 34 | // A password can be specified for encrypted input files. 35 | func FormFillJSON(inputPath, jsonPath, outputPath, password string, flatten bool) error { 36 | // Read JSON field data. 37 | fieldData, err := fjson.LoadFromJSONFile(jsonPath) 38 | if err != nil { 39 | return err 40 | } 41 | 42 | return formFill(inputPath, fieldData, outputPath, password, flatten) 43 | } 44 | 45 | // FormFillFDF fills the form field values from the PDF file specified by the 46 | // inputPath parameter, using the values from the FDF file specified by the 47 | // fdfPath parameter. The output PDF file is saved at the location specified 48 | // by the outputPath parameter. The output file form annotations can be 49 | // flattened by using the flatten parameter. 50 | // A password can be specified for encrypted input files. 51 | func FormFillFDF(inputPath, fdfPath, outputPath, password string, flatten bool) error { 52 | // Read field data. 53 | fieldData, err := fdf.LoadFromPath(fdfPath) 54 | if err != nil { 55 | return err 56 | } 57 | 58 | return formFill(inputPath, fieldData, outputPath, password, flatten) 59 | } 60 | 61 | // FormFlatten flattens all the form annotation from the PDF file specified by 62 | // the inputPath parameter. The output PDF file is saved at the location 63 | // specified by the outputPath parameter. 64 | // A password can be specified for encrypted input files. 65 | func FormFlatten(inputPath, outputPath, password string) error { 66 | // Read input file. 67 | r, _, _, _, err := readPDF(inputPath, password) 68 | if err != nil { 69 | return err 70 | } 71 | 72 | // Flatten form. 73 | fieldAppearance := annotator.FieldAppearance{ 74 | OnlyIfMissing: true, 75 | } 76 | 77 | if err = r.FlattenFields(true, fieldAppearance); err != nil { 78 | return err 79 | } 80 | r.AcroForm = nil 81 | 82 | // Copy input file contents. 83 | w := unipdf.NewPdfWriter() 84 | if err := readerToWriter(r, &w, nil); err != nil { 85 | return err 86 | } 87 | 88 | // Save output file. 89 | safe := inputPath == outputPath 90 | return writePDF(outputPath, &w, safe) 91 | } 92 | 93 | func formFill(inputPath string, provider unipdf.FieldValueProvider, outputPath, password string, flatten bool) error { 94 | // Read input file. 95 | r, _, _, _, err := readPDF(inputPath, password) 96 | if err != nil { 97 | return err 98 | } 99 | 100 | // Populate the form data. 101 | if err = r.AcroForm.Fill(provider); err != nil { 102 | return err 103 | } 104 | 105 | // Flatten form. 106 | if flatten { 107 | fieldAppearance := annotator.FieldAppearance{ 108 | OnlyIfMissing: true, 109 | RegenerateTextFields: true, 110 | } 111 | 112 | if err = r.FlattenFields(true, fieldAppearance); err != nil { 113 | return err 114 | } 115 | r.AcroForm = nil 116 | } 117 | 118 | // Copy input file contents. 119 | w := unipdf.NewPdfWriter() 120 | if err := readerToWriter(r, &w, nil); err != nil { 121 | return err 122 | } 123 | 124 | // Save output file. 125 | safe := inputPath == outputPath 126 | return writePDF(outputPath, &w, safe) 127 | } 128 | -------------------------------------------------------------------------------- /internal/cli/form_flatten.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | "os" 12 | "strings" 13 | 14 | "github.com/spf13/cobra" 15 | "github.com/unidoc/unipdf-cli/pkg/pdf" 16 | ) 17 | 18 | const formFlattenCmdDesc = `Flatten PDF file form annotations. 19 | 20 | The flattening process makes the form fields of the output files read-only by 21 | appending the form field annotation XObject Form data to the page content 22 | stream, thus making it part of the page contents. 23 | 24 | The command can take multiple files and directories as input parameters. 25 | By default, each PDF file is saved in the same location as the original file, 26 | appending the "_flattened" suffix to the file name. Use the --overwrite flag 27 | to overwrite the original files. 28 | In addition, the flattened output files can be saved to a different directory 29 | by using the --target-dir flag. 30 | The command can search for PDF files inside the subdirectories of the 31 | specified input directories by using the --recursive flag. 32 | ` 33 | 34 | var formFlattenCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s\n", 35 | fmt.Sprintf("%s form flatten file_1.pdf file_n.pdf", appName), 36 | fmt.Sprintf("%s form flatten -O file_1.pdf file_n.pdf", appName), 37 | fmt.Sprintf("%s form flatten -O -r file_1.pdf file_n.pdf dir_1 dir_n", appName), 38 | fmt.Sprintf("%s form flatten -t out_dir file_1.pdf file_n.pdf dir_1 dir_n", appName), 39 | fmt.Sprintf("%s form flatten -t out_dir -r file_1.pdf file_n.pdf dir_1 dir_n", appName), 40 | fmt.Sprintf("%s form flatten -t out_dir -r -p pass file_1.pdf file_n.pdf dir_1 dir_n", appName), 41 | ) 42 | 43 | // formFlattenCmd represents the form flatten command. 44 | var formFlattenCmd = &cobra.Command{ 45 | Use: "flatten [FLAG]... INPUT_FILES...", 46 | Short: "Flatten form annotations", 47 | Long: formFlattenCmdDesc, 48 | Example: formFlattenCmdExample, 49 | DisableFlagsInUseLine: true, 50 | Run: func(cmd *cobra.Command, args []string) { 51 | // Parse flags. 52 | outputDir, _ := cmd.Flags().GetString("target-dir") 53 | overwrite, _ := cmd.Flags().GetBool("overwrite") 54 | recursive, _ := cmd.Flags().GetBool("recursive") 55 | password, _ := cmd.Flags().GetString("password") 56 | 57 | // Parse input parameters. 58 | inputPaths, err := parseInputPaths(args, recursive, pdfMatcher) 59 | if err != nil { 60 | printErr("Could not parse input files: %s\n", err) 61 | } 62 | 63 | // Create output directory, if it does not exist. 64 | if outputDir != "" { 65 | if overwrite { 66 | printErr("The --target-dir and the --overwrite flags are mutually exclusive") 67 | } 68 | if err = os.MkdirAll(outputDir, os.ModePerm); err != nil { 69 | printErr("Could not create output directory: %s\n", err) 70 | } 71 | } 72 | 73 | // Flatten PDF files form annotations. 74 | for _, inputPath := range inputPaths { 75 | fmt.Printf("Flattening %s\n", inputPath) 76 | 77 | // Generate output path. 78 | outputPath := generateOutputPath(inputPath, outputDir, "flattened", overwrite) 79 | 80 | // Flatten input file form fields. 81 | err := pdf.FormFlatten(inputPath, outputPath, password) 82 | if err != nil { 83 | printErr("Could not flatten input file form annotations: %s\n", err) 84 | } 85 | 86 | fmt.Printf("Original: %s\n", inputPath) 87 | fmt.Printf("Flattened: %s\n", outputPath) 88 | fmt.Println("Status: success") 89 | fmt.Println(strings.Repeat("-", 10)) 90 | } 91 | }, 92 | Args: func(_ *cobra.Command, args []string) error { 93 | if len(args) < 1 { 94 | return errors.New("must provide the at least on input file or directory") 95 | } 96 | 97 | return nil 98 | }, 99 | } 100 | 101 | func init() { 102 | formCmd.AddCommand(formFlattenCmd) 103 | 104 | formFlattenCmd.Flags().StringP("target-dir", "t", "", "output directory") 105 | formFlattenCmd.Flags().BoolP("overwrite", "O", false, "overwrite input files") 106 | formFlattenCmd.Flags().BoolP("recursive", "r", false, "search PDF files in subdirectories") 107 | formFlattenCmd.Flags().StringP("password", "p", "", "input file password") 108 | } 109 | -------------------------------------------------------------------------------- /pkg/pdf/organize.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | "github.com/unidoc/unipdf/v4/common" 10 | unipdf "github.com/unidoc/unipdf/v4/model" 11 | ) 12 | 13 | // Organize extracts the provided page list from PDF file specified by the 14 | // inputPath parameter then merges the individual pages and saves the 15 | // resulting file at the location specified by the outputPath parameter. 16 | // A password can be passed in for encrypted input files. 17 | func Organize(inputPath, outputPath, password string, pages []int) error { 18 | // Read input file. 19 | pdfReader, _, _, _, err := readPDF(inputPath, password) 20 | if err != nil { 21 | return err 22 | } 23 | 24 | // Add selected pages to the writer. 25 | pdfWriter := unipdf.NewPdfWriter() 26 | 27 | for i := 0; i < len(pages); i++ { 28 | page, err := pdfReader.GetPage(pages[i]) 29 | if err != nil { 30 | return err 31 | } 32 | 33 | err = pdfWriter.AddPage(page) 34 | if err != nil { 35 | return err 36 | } 37 | } 38 | 39 | // Copy PDF version. 40 | version := pdfReader.PdfVersion() 41 | pdfWriter.SetVersion(version.Major, version.Minor) 42 | 43 | // Copy PDF info. 44 | info, err := pdfReader.GetPdfInfo() 45 | if err != nil { 46 | common.Log.Debug("ERROR: %v", err) 47 | } else { 48 | pdfWriter.SetDocInfo(info) 49 | } 50 | 51 | // Copy Catalog Metadata. 52 | if meta, ok := pdfReader.GetCatalogMetadata(); ok { 53 | if err := pdfWriter.SetCatalogMetadata(meta); err != nil { 54 | return err 55 | } 56 | } 57 | 58 | // Copy catalog mark information. 59 | if markInfo, ok := pdfReader.GetCatalogMarkInfo(); ok { 60 | if err := pdfWriter.SetCatalogMarkInfo(markInfo); err != nil { 61 | return err 62 | } 63 | } 64 | 65 | // Copy AcroForm. 66 | err = pdfWriter.SetForms(pdfReader.AcroForm) 67 | if err != nil { 68 | common.Log.Debug("ERROR: %v", err) 69 | return err 70 | } 71 | 72 | // Copy viewer preferences. 73 | if pref, ok := pdfReader.GetCatalogViewerPreferences(); ok { 74 | if err := pdfWriter.SetCatalogViewerPreferences(pref); err != nil { 75 | return err 76 | } 77 | } 78 | 79 | // Copy language preferences. 80 | if lang, ok := pdfReader.GetCatalogLanguage(); ok { 81 | if err := pdfWriter.SetCatalogLanguage(lang); err != nil { 82 | return err 83 | } 84 | } 85 | 86 | // Copy document outlines. 87 | pdfWriter.AddOutlineTree(pdfReader.GetOutlineTree()) 88 | 89 | // Copy OC Properties. 90 | props, err := pdfReader.GetOCProperties() 91 | if err != nil { 92 | common.Log.Debug("ERROR: %v", err) 93 | } else { 94 | err = pdfWriter.SetOCProperties(props) 95 | if err != nil { 96 | common.Log.Debug("ERROR: %v", err) 97 | } 98 | } 99 | 100 | // Copy page labels. 101 | labelObj, err := pdfReader.GetPageLabels() 102 | if err != nil { 103 | common.Log.Debug("ERROR: %v", err) 104 | } else { 105 | err = pdfWriter.SetPageLabels(labelObj) 106 | if err != nil { 107 | common.Log.Debug("ERROR: %v", err) 108 | } 109 | } 110 | 111 | // Copy named destinations. 112 | namedDest, err := pdfReader.GetNamedDestinations() 113 | if err != nil { 114 | common.Log.Debug("ERROR: %v", err) 115 | } else { 116 | err = pdfWriter.SetNamedDestinations(namedDest) 117 | if err != nil { 118 | common.Log.Debug("ERROR: %v", err) 119 | } 120 | } 121 | 122 | // Copy name dictionary. 123 | nameDict, err := pdfReader.GetNameDictionary() 124 | if err != nil { 125 | common.Log.Debug("ERROR: %v", err) 126 | } else { 127 | err = pdfWriter.SetNameDictionary(nameDict) 128 | if err != nil { 129 | common.Log.Debug("ERROR: %v", err) 130 | } 131 | } 132 | 133 | // Copy StructTreeRoot dictionary. 134 | structTreeRoot, found := pdfReader.GetCatalogStructTreeRoot() 135 | if found { 136 | err := pdfWriter.SetCatalogStructTreeRoot(structTreeRoot) 137 | if err != nil { 138 | common.Log.Debug("ERROR: %v", err) 139 | } 140 | } 141 | 142 | // Copy global page rotation. 143 | if pdfReader.Rotate != nil { 144 | if err := pdfWriter.SetRotation(*pdfReader.Rotate); err != nil { 145 | common.Log.Debug("ERROR: %v", err) 146 | } 147 | } 148 | 149 | // Write output file. 150 | safe := inputPath == outputPath 151 | return writePDF(outputPath, &pdfWriter, safe) 152 | } 153 | -------------------------------------------------------------------------------- /pkg/pdf/extract.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | "archive/zip" 10 | "bytes" 11 | "fmt" 12 | "image/jpeg" 13 | "io" 14 | "os" 15 | "path/filepath" 16 | "strings" 17 | "time" 18 | 19 | uniextractor "github.com/unidoc/unipdf/v4/extractor" 20 | ) 21 | 22 | // ExtractText returns all text content from the PDF file specified by the 23 | // inputPath parameter. A password can be specified for encrypted PDF files. 24 | // Also, a list of pages from which to extract text can be passed in. 25 | // If the pages parameter is nil or an empty slice, the text is extracted from 26 | // all the pages of the file. 27 | func ExtractText(inputPath, password string, pages []int) (string, error) { 28 | // Read input file. 29 | r, pageCount, _, _, err := readPDF(inputPath, password) 30 | if err != nil { 31 | return "", err 32 | } 33 | 34 | // Extract text. 35 | if len(pages) == 0 { 36 | pages = createPageRange(pageCount) 37 | } 38 | 39 | var text string 40 | for _, numPage := range pages { 41 | // Get page. 42 | page, err := r.GetPage(numPage) 43 | if err != nil { 44 | return "", err 45 | } 46 | 47 | // Extract page text. 48 | extractor, err := uniextractor.New(page) 49 | if err != nil { 50 | return "", err 51 | } 52 | 53 | pageText, err := extractor.ExtractText() 54 | if err != nil { 55 | return "", err 56 | } 57 | 58 | text += pageText 59 | } 60 | 61 | return text, nil 62 | } 63 | 64 | // ExtractImages extracts all image content from the PDF file specified by the 65 | // inputPath parameter. The extracted collection of images is saved as a ZIP 66 | // archive at the location specified by the outputPath parameter. 67 | // A password can be passed in, if the input file is encrypted. 68 | // Also, a list of pages from which to extract images can be passed in. 69 | // If the pages parameter is nil or an empty slice, the images are extracted 70 | // from all the pages of the file. 71 | // In addition, the image extraction process can be controlled by using the 72 | // options parameter. If the options parameter is nil, the default image 73 | // extraction options are used. 74 | func ExtractImages(inputPath, outputPath, password string, pages []int, 75 | options *uniextractor.ImageExtractOptions) (string, int, error) { 76 | // Use input file directory if no output path is specified. 77 | if outputPath == "" { 78 | dir, name := filepath.Split(inputPath) 79 | name = strings.TrimSuffix(name, filepath.Ext(name)) + ".zip" 80 | outputPath = filepath.Join(dir, name) 81 | } 82 | 83 | // Read input file. 84 | r, pageCount, _, _, err := readPDF(inputPath, password) 85 | if err != nil { 86 | return "", 0, err 87 | } 88 | 89 | // Extract images. 90 | if len(pages) == 0 { 91 | pages = createPageRange(pageCount) 92 | } 93 | 94 | // Create zip file. 95 | zipBuffer := bytes.NewBuffer(nil) 96 | w := zip.NewWriter(zipBuffer) 97 | now := time.Now() 98 | var countImages int 99 | 100 | for _, numPage := range pages { 101 | // Get page. 102 | page, err := r.GetPage(numPage) 103 | if err != nil { 104 | return "", 0, err 105 | } 106 | 107 | // Extract page images. 108 | extractor, err := uniextractor.New(page) 109 | if err != nil { 110 | return "", 0, err 111 | } 112 | 113 | pageImages, err := extractor.ExtractPageImages(options) 114 | if err != nil { 115 | return "", 0, err 116 | } 117 | 118 | // Add images to zip file. 119 | images := pageImages.Images 120 | countImages += len(images) 121 | 122 | for i, pageImage := range images { 123 | img, err := pageImage.Image.ToGoImage() 124 | if err != nil { 125 | return "", 0, err 126 | } 127 | 128 | filename, err := w.CreateHeader(&zip.FileHeader{ 129 | Name: (fmt.Sprintf("p%d_%d.jpg", numPage, i)), 130 | Modified: now, 131 | }) 132 | if err != nil { 133 | return "", 0, err 134 | } 135 | 136 | err = jpeg.Encode(filename, img, &jpeg.Options{Quality: 100}) 137 | if err != nil { 138 | return "", 0, err 139 | } 140 | } 141 | } 142 | 143 | if err := w.Close(); err != nil { 144 | return "", 0, nil 145 | } 146 | 147 | if countImages == 0 { 148 | return "", 0, nil 149 | } 150 | 151 | // Write output file. 152 | outputFile, err := os.Create(outputPath) 153 | if err != nil { 154 | return "", 0, err 155 | } 156 | defer outputFile.Close() 157 | 158 | if _, err := io.Copy(outputFile, zipBuffer); err != nil { 159 | return "", 0, err 160 | } 161 | 162 | return outputPath, countImages, nil 163 | } 164 | -------------------------------------------------------------------------------- /internal/cli/form_fdfmerge.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | "os" 12 | "strings" 13 | 14 | "github.com/spf13/cobra" 15 | "github.com/unidoc/unipdf-cli/pkg/pdf" 16 | ) 17 | 18 | const formFDFMergeCmdDesc = `Fill form fields from FDF file. 19 | 20 | The field values specified in the FDF file template are used to fill the form 21 | fields in the input PDF files. In addition, the output file form fields can be 22 | flattened by using the --flatten flag. The flattening process makes the form 23 | fields of the output files read-only by appending the form field annotation 24 | XObject Form data to the page content stream, thus making it part of the page 25 | contents. 26 | 27 | The command can take multiple files and directories as input parameters. 28 | By default, each PDF file is saved in the same location as the original file, 29 | appending the "_filled" suffix to the file name. Use the --overwrite flag 30 | to overwrite the original files. 31 | In addition, the filled output files can be saved to a different directory 32 | by using the --target-dir flag. 33 | The command can search for PDF files inside the subdirectories of the 34 | specified input directories by using the --recursive flag. 35 | ` 36 | 37 | var formFDFMergeCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s\n", 38 | fmt.Sprintf("%s form fdfmerge fields.fdf file_1.pdf file_n.pdf", appName), 39 | fmt.Sprintf("%s form fdfmerge -O fields.fdf file_1.pdf file_n.pdf", appName), 40 | fmt.Sprintf("%s form fdfmerge -O -r -f fields.fdf file_1.pdf file_n.pdf dir_1 dir_n", appName), 41 | fmt.Sprintf("%s form fdfmerge -t out_dir fields.fdf file_1.pdf file_n.pdf dir_1 dir_n", appName), 42 | fmt.Sprintf("%s form fdfmerge -t out_dir -r fields.fdf file_1.pdf file_n.pdf dir_1 dir_n", appName), 43 | fmt.Sprintf("%s form fdfmerge -t out_dir -r -p pass fields.fdf file_1.pdf file_n.pdf dir_1 dir_n", appName), 44 | ) 45 | 46 | // formFDFMergeCmd represents the form fdfmerge command. 47 | var formFDFMergeCmd = &cobra.Command{ 48 | Use: "fdfmerge [FLAG]... FDF_FILE INPUT_FILES...", 49 | Short: "Fill form fields from FDF file", 50 | Long: formFDFMergeCmdDesc, 51 | Example: formFDFMergeCmdExample, 52 | DisableFlagsInUseLine: true, 53 | Run: func(cmd *cobra.Command, args []string) { 54 | // Parse input flags. 55 | outputDir, _ := cmd.Flags().GetString("target-dir") 56 | overwrite, _ := cmd.Flags().GetBool("overwrite") 57 | recursive, _ := cmd.Flags().GetBool("recursive") 58 | password, _ := cmd.Flags().GetString("password") 59 | flatten, _ := cmd.Flags().GetBool("flatten") 60 | 61 | // Parse input parameters. 62 | fdfPath := args[0] 63 | 64 | inputPaths, err := parseInputPaths(args[1:], recursive, pdfMatcher) 65 | if err != nil { 66 | printErr("Could not parse input files: %s\n", err) 67 | } 68 | 69 | // Create output directory, if it does not exist. 70 | if outputDir != "" { 71 | if overwrite { 72 | printErr("The --target-dir and the --overwrite flags are mutually exclusive") 73 | } 74 | if err = os.MkdirAll(outputDir, os.ModePerm); err != nil { 75 | printErr("Could not create output directory: %s\n", err) 76 | } 77 | } 78 | 79 | // Fill form fields. 80 | for _, inputPath := range inputPaths { 81 | fmt.Printf("Filling form values for %s\n", inputPath) 82 | 83 | // Generate output path. 84 | outputPath := generateOutputPath(inputPath, outputDir, "filled", overwrite) 85 | 86 | // Fill input file form fields. 87 | err := pdf.FormFillFDF(inputPath, fdfPath, outputPath, password, flatten) 88 | if err != nil { 89 | printErr("Could not fill form fields: %s\n", err) 90 | } 91 | 92 | fmt.Printf("Original: %s\n", inputPath) 93 | fmt.Printf("Filled: %s\n", outputPath) 94 | fmt.Println("Status: success") 95 | fmt.Println(strings.Repeat("-", 10)) 96 | } 97 | }, 98 | Args: func(_ *cobra.Command, args []string) error { 99 | if len(args) < 2 { 100 | return errors.New("must provide the FDF file and at least one input file") 101 | } 102 | 103 | return nil 104 | }, 105 | } 106 | 107 | func init() { 108 | formCmd.AddCommand(formFDFMergeCmd) 109 | 110 | formFDFMergeCmd.Flags().StringP("target-dir", "t", "", "output directory") 111 | formFDFMergeCmd.Flags().BoolP("overwrite", "O", false, "overwrite input files") 112 | formFDFMergeCmd.Flags().BoolP("recursive", "r", false, "search PDF files in subdirectories") 113 | formFDFMergeCmd.Flags().StringP("password", "p", "", "input file password") 114 | formFDFMergeCmd.Flags().BoolP("flatten", "f", false, "flatten form annotations") 115 | } 116 | -------------------------------------------------------------------------------- /internal/cli/form_fill.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | "os" 12 | "strings" 13 | 14 | "github.com/spf13/cobra" 15 | "github.com/unidoc/unipdf-cli/pkg/pdf" 16 | ) 17 | 18 | const formFillCmdDesc = `Fill form fields from JSON file. 19 | 20 | The field values specified in the JSON file template are used to fill the form 21 | fields in the input PDF files. In addition, the output file form fields can be 22 | flattened by using the --flatten flag. The flattening process makes the form 23 | fields of the output files read-only by appending the form field annotation 24 | XObject Form data to the page content stream, thus making it part of the page 25 | contents. 26 | 27 | The command can take multiple files and directories as input parameters. 28 | By default, each PDF file is saved in the same location as the original file, 29 | appending the "_filled" suffix to the file name. Use the --overwrite flag 30 | to overwrite the original files. 31 | In addition, the filled output files can be saved to a different directory 32 | by using the --target-dir flag. 33 | The command can search for PDF files inside the subdirectories of the 34 | specified input directories by using the --recursive flag. 35 | 36 | The "form export" command can be used to generate the JSON form fields template 37 | for a PDF file. 38 | ` 39 | 40 | var formFillCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s\n", 41 | fmt.Sprintf("%s form fill fields.json file_1.pdf file_n.pdf", appName), 42 | fmt.Sprintf("%s form fill -O fields.json file_1.pdf file_n.pdf", appName), 43 | fmt.Sprintf("%s form fill -O -r -f fields.json file_1.pdf file_n.pdf dir_1 dir_n", appName), 44 | fmt.Sprintf("%s form fill -t out_dir fields.json file_1.pdf file_n.pdf dir_1 dir_n", appName), 45 | fmt.Sprintf("%s form fill -t out_dir -r fields.json file_1.pdf file_n.pdf dir_1 dir_n", appName), 46 | fmt.Sprintf("%s form fill -t out_dir -r -p pass fields.json file_1.pdf file_n.pdf dir_1 dir_n", appName), 47 | ) 48 | 49 | // formFillCmd represents the form fill command. 50 | var formFillCmd = &cobra.Command{ 51 | Use: "fill [FLAG]... JSON_FILE INPUT_FILES...", 52 | Short: "Fill form fields from JSON file", 53 | Long: formFillCmdDesc, 54 | Example: formFillCmdExample, 55 | DisableFlagsInUseLine: true, 56 | Run: func(cmd *cobra.Command, args []string) { 57 | // Parse input flags. 58 | outputDir, _ := cmd.Flags().GetString("target-dir") 59 | overwrite, _ := cmd.Flags().GetBool("overwrite") 60 | recursive, _ := cmd.Flags().GetBool("recursive") 61 | password, _ := cmd.Flags().GetString("password") 62 | flatten, _ := cmd.Flags().GetBool("flatten") 63 | 64 | // Parse input parameters. 65 | jsonPath := args[0] 66 | 67 | inputPaths, err := parseInputPaths(args[1:], recursive, pdfMatcher) 68 | if err != nil { 69 | printErr("Could not parse input files: %s\n", err) 70 | } 71 | 72 | // Create output directory, if it does not exist. 73 | if outputDir != "" { 74 | if overwrite { 75 | printErr("The --target-dir and the --overwrite flags are mutually exclusive") 76 | } 77 | if err = os.MkdirAll(outputDir, os.ModePerm); err != nil { 78 | printErr("Could not create output directory: %s\n", err) 79 | } 80 | } 81 | 82 | // Fill form fields. 83 | for _, inputPath := range inputPaths { 84 | fmt.Printf("Filling form values for %s\n", inputPath) 85 | 86 | // Generate output path. 87 | outputPath := generateOutputPath(inputPath, outputDir, "filled", overwrite) 88 | 89 | // Fill input file form fields. 90 | err := pdf.FormFillJSON(inputPath, jsonPath, outputPath, password, flatten) 91 | if err != nil { 92 | printErr("Could not fill form fields: %s\n", err) 93 | } 94 | 95 | fmt.Printf("Original: %s\n", inputPath) 96 | fmt.Printf("Filled: %s\n", outputPath) 97 | fmt.Println("Status: success") 98 | fmt.Println(strings.Repeat("-", 10)) 99 | } 100 | }, 101 | Args: func(_ *cobra.Command, args []string) error { 102 | if len(args) < 2 { 103 | return errors.New("must provide the JSON file and at least one input file") 104 | } 105 | 106 | return nil 107 | }, 108 | } 109 | 110 | func init() { 111 | formCmd.AddCommand(formFillCmd) 112 | 113 | formFillCmd.Flags().StringP("target-dir", "t", "", "output directory") 114 | formFillCmd.Flags().BoolP("overwrite", "O", false, "overwrite input files") 115 | formFillCmd.Flags().BoolP("recursive", "r", false, "search PDF files in subdirectories") 116 | formFillCmd.Flags().StringP("password", "p", "", "input file password") 117 | formFillCmd.Flags().BoolP("flatten", "f", false, "flatten form annotations") 118 | } 119 | -------------------------------------------------------------------------------- /internal/cli/optimize.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | "os" 12 | "strings" 13 | "time" 14 | 15 | "github.com/spf13/cobra" 16 | "github.com/unidoc/unipdf-cli/pkg/pdf" 17 | ) 18 | 19 | const optimizeCmdDesc = `Optimize PDF files by optimizing structure, compression and image quality. 20 | 21 | The command can take multiple files and directories as input parameters. 22 | By default, each PDF file is saved in the same location as the original file, 23 | appending the "_optimized" suffix to the file name. Use the --overwrite flag 24 | to overwrite the original files. 25 | In addition, the optimized output files can be saved to a different directory 26 | by using the --target-dir flag. 27 | The command can search for PDF files inside the subdirectories of the 28 | specified input directories by using the --recursive flag. 29 | 30 | The quality of the images in the output files can be configured through 31 | the --image-quality flag (default 90). 32 | The resolution of the output images can be controlled using the --image-ppi flag. 33 | Common pixels per inch values are 100 (screen), 150-300 (print), 600 (art). If 34 | not specified, the PPI of the output images is 100. 35 | ` 36 | 37 | var optimizeCmdExample = fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n", 38 | fmt.Sprintf("%s optimize file_1.pdf file_n.pdf", appName), 39 | fmt.Sprintf("%s optimize -O file_1.pdf file_n.pdf", appName), 40 | fmt.Sprintf("%s optimize -O -r file_1.pdf file_n.pdf dir_1 dir_n", appName), 41 | fmt.Sprintf("%s optimize -t out_dir file_1.pdf file_n.pdf dir_1 dir_n", appName), 42 | fmt.Sprintf("%s optimize -t out_dir -r file_1.pdf file_n.pdf dir_1 dir_n", appName), 43 | fmt.Sprintf("%s optimize -t out_dir -r -q 75 file_1.pdf file_n.pdf dir_1 dir_n", appName), 44 | fmt.Sprintf("%s optimize -t out_dir -r -q 75 -P 100 file_1.pdf file_n.pdf dir_1 dir_n", appName), 45 | fmt.Sprintf("%s optimize -t out_dir -r -q 75 -P 100 -p pass file_1.pdf file_n.pdf dir_1 dir_n", appName), 46 | ) 47 | 48 | // optimizeCmd represents the optimize command. 49 | var optimizeCmd = &cobra.Command{ 50 | Use: "optimize [FLAG]... INPUT_FILES...", 51 | Short: "Optimize PDF files", 52 | Long: optimizeCmdDesc, 53 | Example: optimizeCmdExample, 54 | DisableFlagsInUseLine: true, 55 | Run: func(cmd *cobra.Command, args []string) { 56 | // Parse flags. 57 | outputDir, _ := cmd.Flags().GetString("target-dir") 58 | overwrite, _ := cmd.Flags().GetBool("overwrite") 59 | recursive, _ := cmd.Flags().GetBool("recursive") 60 | password, _ := cmd.Flags().GetString("password") 61 | 62 | // Parse optimization parameters. 63 | imageQuality, err := cmd.Flags().GetInt("image-quality") 64 | if err != nil { 65 | imageQuality = 90 66 | } 67 | 68 | imagePPI, err := cmd.Flags().GetFloat64("image-ppi") 69 | if err != nil { 70 | imagePPI = 100 71 | } 72 | 73 | opts := &pdf.OptimizeOpts{ 74 | ImageQuality: clampInt(imageQuality, 10, 100), 75 | ImagePPI: imagePPI, 76 | } 77 | 78 | // Parse input parameters. 79 | inputPaths, err := parseInputPaths(args, recursive, pdfMatcher) 80 | if err != nil { 81 | printErr("Could not parse input files: %s\n", err) 82 | } 83 | 84 | // Create output directory, if it does not exist. 85 | if outputDir != "" { 86 | if overwrite { 87 | printErr("The --target-dir and the --overwrite flags are mutually exclusive") 88 | } 89 | 90 | if err = os.MkdirAll(outputDir, os.ModePerm); err != nil { 91 | printErr("Could not create output directory: %s\n", err) 92 | } 93 | } 94 | 95 | // Optimize PDF files. 96 | for _, inputPath := range inputPaths { 97 | fmt.Printf("Optimizing %s\n", inputPath) 98 | 99 | // Generate output path. 100 | outputPath := generateOutputPath(inputPath, outputDir, "optimized", overwrite) 101 | 102 | // Optimize input file. 103 | res, err := pdf.Optimize(inputPath, outputPath, password, opts) 104 | if err != nil { 105 | printErr("Could not optimize input file: %s\n", err) 106 | } 107 | 108 | inSize := res.Original.Size 109 | outSize := res.Optimized.Size 110 | ratio := 100.0 - (float64(outSize) / float64(inSize) * 100.0) 111 | duration := float64(res.Duration) / float64(time.Millisecond) 112 | 113 | fmt.Printf("Original: %s\n", res.Original.Name) 114 | fmt.Printf("Original size: %d bytes\n", inSize) 115 | fmt.Printf("Optimized: %s\n", res.Optimized.Name) 116 | fmt.Printf("Optimized size: %d bytes\n", outSize) 117 | fmt.Printf("Compression ratio: %.2f%%\n", ratio) 118 | fmt.Printf("Processing time: %.2f ms\n", duration) 119 | fmt.Println("Status: success") 120 | fmt.Println(strings.Repeat("-", 10)) 121 | } 122 | }, 123 | Args: func(_ *cobra.Command, args []string) error { 124 | if len(args) < 1 { 125 | return errors.New("must provide at least one input file") 126 | } 127 | 128 | return nil 129 | }, 130 | } 131 | 132 | func init() { 133 | rootCmd.AddCommand(optimizeCmd) 134 | 135 | optimizeCmd.Flags().StringP("target-dir", "t", "", "output directory") 136 | optimizeCmd.Flags().BoolP("overwrite", "O", false, "overwrite input files") 137 | optimizeCmd.Flags().BoolP("recursive", "r", false, "search PDF files in subdirectories") 138 | optimizeCmd.Flags().StringP("password", "p", "", "file password") 139 | optimizeCmd.Flags().IntP("image-quality", "q", 90, "output JPEG image quality") 140 | optimizeCmd.Flags().Float64P("image-ppi", "P", 100, "output images pixels per inch") 141 | } 142 | -------------------------------------------------------------------------------- /pkg/pdf/utils.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | "errors" 10 | "os" 11 | "path/filepath" 12 | 13 | unisecurity "github.com/unidoc/unipdf/v4/core/security" 14 | unicreator "github.com/unidoc/unipdf/v4/creator" 15 | unipdf "github.com/unidoc/unipdf/v4/model" 16 | ) 17 | 18 | func readPDF(filename, password string) (*unipdf.PdfReader, int, bool, unisecurity.Permissions, error) { 19 | // Open input file. 20 | f, err := os.Open(filename) 21 | if err != nil { 22 | return nil, 0, false, 0, err 23 | } 24 | defer f.Close() 25 | 26 | // Read input file. 27 | r, err := unipdf.NewPdfReader(f) 28 | if err != nil { 29 | return nil, 0, false, 0, err 30 | } 31 | 32 | // Check if file is encrypted. 33 | encrypted, err := r.IsEncrypted() 34 | if err != nil { 35 | return nil, 0, false, 0, err 36 | } 37 | 38 | // Decrypt using the specified password, if necessary. 39 | perms := unisecurity.PermOwner 40 | if encrypted { 41 | passwords := []string{password} 42 | if password != "" { 43 | passwords = append(passwords, "") 44 | } 45 | 46 | // Extract use permissions 47 | _, perms, err = r.CheckAccessRights([]byte(password)) 48 | if err != nil { 49 | perms = unisecurity.Permissions(0) 50 | } 51 | 52 | var decrypted bool 53 | for _, p := range passwords { 54 | if auth, err := r.Decrypt([]byte(p)); err != nil || !auth { 55 | continue 56 | } 57 | 58 | decrypted = true 59 | break 60 | } 61 | 62 | if !decrypted { 63 | return nil, 0, false, 0, errors.New("could not decrypt file with the provided password") 64 | } 65 | } 66 | 67 | // Get number of pages. 68 | pages, err := r.GetNumPages() 69 | if err != nil { 70 | return nil, 0, false, 0, err 71 | } 72 | 73 | return r, pages, encrypted, perms, nil 74 | } 75 | 76 | func writePDF(filename string, w *unipdf.PdfWriter, safe bool) error { 77 | var err error 78 | if safe { 79 | // Make a copy of the original file and restore it if 80 | // any error occurs while writing the new file. 81 | if _, err = os.Stat(filename); !os.IsNotExist(err) { 82 | tempPath := filepath.Join(os.TempDir(), "unipdf_"+filepath.Base(filename)) 83 | if err = os.Rename(filename, tempPath); err != nil { 84 | return err 85 | } 86 | defer func() error { 87 | if err == nil { 88 | return nil 89 | } 90 | if err = os.Rename(tempPath, filename); err != nil { 91 | return err 92 | } 93 | 94 | return os.Remove(tempPath) 95 | }() 96 | } 97 | } 98 | 99 | // Create output file. 100 | of, err := os.Create(filename) 101 | if err != nil { 102 | return err 103 | } 104 | defer of.Close() 105 | 106 | // Write output file. 107 | err = w.Write(of) 108 | if err != nil { 109 | return err 110 | } 111 | 112 | return nil 113 | } 114 | 115 | func writeCreatorPDF(filename string, c *unicreator.Creator, safe bool) error { 116 | var err error 117 | if safe { 118 | // Make a copy of the original file and restore it if 119 | // any error occurs while writing the new file. 120 | if _, err = os.Stat(filename); !os.IsNotExist(err) { 121 | tempPath := filepath.Join(os.TempDir(), "unipdf_"+filepath.Base(filename)) 122 | if err = os.Rename(filename, tempPath); err != nil { 123 | return err 124 | } 125 | defer func() error { 126 | if err == nil { 127 | return nil 128 | } 129 | if err = os.Rename(tempPath, filename); err != nil { 130 | return err 131 | } 132 | 133 | return os.Remove(tempPath) 134 | }() 135 | } 136 | } 137 | 138 | // Create output file. 139 | of, err := os.Create(filename) 140 | if err != nil { 141 | return err 142 | } 143 | defer of.Close() 144 | 145 | // Write output file. 146 | return c.Write(of) 147 | } 148 | 149 | func readerToWriter(r *unipdf.PdfReader, w *unipdf.PdfWriter, pages []int) error { 150 | if r == nil { 151 | return errors.New("source PDF cannot be null") 152 | } 153 | if w == nil { 154 | return errors.New("destination PDF cannot be null") 155 | } 156 | 157 | // Get number of pages. 158 | pageCount, err := r.GetNumPages() 159 | if err != nil { 160 | return err 161 | } 162 | 163 | // Add optional properties 164 | if ocProps, err := r.GetOCProperties(); err == nil { 165 | w.SetOCProperties(ocProps) 166 | } 167 | 168 | // Add pages. 169 | if len(pages) == 0 { 170 | pages = createPageRange(pageCount) 171 | } 172 | 173 | for _, numPage := range pages { 174 | if numPage < 1 || numPage > pageCount { 175 | continue 176 | } 177 | 178 | page, err := r.GetPage(numPage) 179 | if err != nil { 180 | return err 181 | } 182 | 183 | if err = w.AddPage(page); err != nil { 184 | return err 185 | } 186 | } 187 | 188 | // Add forms. 189 | if r.AcroForm != nil { 190 | w.SetForms(r.AcroForm) 191 | } 192 | 193 | return nil 194 | } 195 | 196 | func readerToCreator(r *unipdf.PdfReader, w *unicreator.Creator, pages []int, rotationAngle int) error { 197 | if r == nil { 198 | return errors.New("source PDF cannot be null") 199 | } 200 | if w == nil { 201 | return errors.New("destination PDF cannot be null") 202 | } 203 | 204 | // Get number of pages. 205 | pageCount, err := r.GetNumPages() 206 | if err != nil { 207 | return err 208 | } 209 | 210 | // Add pages. 211 | if len(pages) == 0 { 212 | pages = createPageRange(pageCount) 213 | } 214 | 215 | for _, numPage := range pages { 216 | if numPage < 1 || numPage > pageCount { 217 | continue 218 | } 219 | 220 | page, err := r.GetPage(numPage) 221 | if err != nil { 222 | return err 223 | } 224 | 225 | if err = w.AddPage(page); err != nil { 226 | return err 227 | } 228 | 229 | if rotationAngle != 0 { 230 | if err = w.RotateDeg(int64(rotationAngle)); err != nil { 231 | return err 232 | } 233 | } 234 | } 235 | 236 | // Add forms. 237 | if r.AcroForm != nil { 238 | w.SetForms(r.AcroForm) 239 | } 240 | 241 | return nil 242 | } 243 | 244 | func createPageRange(count int) []int { 245 | if count <= 0 { 246 | return []int{} 247 | } 248 | 249 | var pages []int 250 | for i := 0; i < count; i++ { 251 | pages = append(pages, i+1) 252 | } 253 | 254 | return pages 255 | } 256 | -------------------------------------------------------------------------------- /pkg/pdf/merge.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | "fmt" 10 | 11 | unicommon "github.com/unidoc/unipdf/v4/common" 12 | unicore "github.com/unidoc/unipdf/v4/core" 13 | unipdf "github.com/unidoc/unipdf/v4/model" 14 | ) 15 | 16 | // Merge merges all the PDF files specified by the inputPaths parameter and 17 | // saves the result at the location specified by the outputPath parameter. 18 | func Merge(inputPaths []string, outputPath string) error { 19 | w := unipdf.NewPdfWriter() 20 | 21 | var forms *unipdf.PdfAcroForm 22 | for index, inputPath := range inputPaths { 23 | // Read file. 24 | r, pages, _, _, err := readPDF(inputPath, "") 25 | if err != nil { 26 | return err 27 | } 28 | 29 | // Add pages. 30 | for i := 0; i < pages; i++ { 31 | page, err := r.GetPage(i + 1) 32 | if err != nil { 33 | return err 34 | } 35 | 36 | err = w.AddPage(page) 37 | if err != nil { 38 | return err 39 | } 40 | } 41 | 42 | // Handle forms. 43 | if r.AcroForm != nil { 44 | if forms == nil { 45 | forms = r.AcroForm 46 | } else { 47 | forms, err = mergeForms(forms, r.AcroForm, index+1) 48 | if err != nil { 49 | return err 50 | } 51 | } 52 | } 53 | } 54 | 55 | // Set the merged forms object. 56 | if forms != nil { 57 | w.SetForms(forms) 58 | } 59 | 60 | // Write output file. 61 | return writePDF(outputPath, &w, false) 62 | } 63 | 64 | func mergeResources(r, r2 *unipdf.PdfPageResources) (*unipdf.PdfPageResources, error) { 65 | // Merge XObject resources. 66 | if r.XObject == nil { 67 | r.XObject = r2.XObject 68 | } else { 69 | xobjs := getDict(r.XObject) 70 | if r2.XObject != nil { 71 | xobjs2 := getDict(r2.XObject) 72 | for _, key := range xobjs2.Keys() { 73 | val := xobjs2.Get(key) 74 | xobjs.Set(key, val) 75 | } 76 | } 77 | } 78 | 79 | // Merge Colorspace resources. 80 | colorspaces, err := r.GetColorspaces() 81 | if err != nil { 82 | return nil, err 83 | } 84 | colorspaces2, err := r2.GetColorspaces() 85 | if err != nil { 86 | return nil, err 87 | } 88 | 89 | if colorspaces == nil { 90 | r.SetColorSpace(colorspaces2) 91 | } else { 92 | if colorspaces2 != nil { 93 | for key, val := range colorspaces2.Colorspaces { 94 | // Add the r2 colorspaces to r. Overwrite if duplicate. 95 | // Ensure only present once in Names. 96 | if _, has := colorspaces.Colorspaces[key]; !has { 97 | colorspaces.Names = append(colorspaces.Names, key) 98 | } 99 | r.SetColorspaceByName(unicore.PdfObjectName(key), val) 100 | } 101 | } 102 | } 103 | 104 | // Merge ExtGState resources. 105 | if r.ExtGState == nil { 106 | r.ExtGState = r2.ExtGState 107 | } else { 108 | extgstates := getDict(r.ExtGState) 109 | 110 | if r2.ExtGState != nil { 111 | extgstates2 := getDict(r2.ExtGState) 112 | for _, key := range extgstates2.Keys() { 113 | val := extgstates2.Get(key) 114 | extgstates.Set(key, val) 115 | } 116 | } 117 | } 118 | 119 | if r.Shading == nil { 120 | r.Shading = r2.Shading 121 | } else { 122 | shadings := getDict(r.Shading) 123 | if r2.Shading != nil { 124 | shadings2 := getDict(r2.Shading) 125 | for _, key := range shadings2.Keys() { 126 | val := shadings2.Get(key) 127 | shadings.Set(key, val) 128 | } 129 | } 130 | } 131 | 132 | if r.Pattern == nil { 133 | r.Pattern = r2.Pattern 134 | } else { 135 | shadings := getDict(r.Pattern) 136 | if r2.Pattern != nil { 137 | patterns2 := getDict(r2.Pattern) 138 | for _, key := range patterns2.Keys() { 139 | val := patterns2.Get(key) 140 | shadings.Set(key, val) 141 | } 142 | } 143 | } 144 | 145 | if r.Font == nil { 146 | r.Font = r2.Font 147 | } else { 148 | fonts := getDict(r.Font) 149 | if r2.Font != nil { 150 | fonts2 := getDict(r2.Font) 151 | for _, key := range fonts2.Keys() { 152 | val := fonts2.Get(key) 153 | fonts.Set(key, val) 154 | } 155 | } 156 | } 157 | 158 | if r.ProcSet == nil { 159 | r.ProcSet = r2.ProcSet 160 | } else { 161 | procsets := getDict(r.ProcSet) 162 | if r2.ProcSet != nil { 163 | procsets2 := getDict(r2.ProcSet) 164 | for _, key := range procsets2.Keys() { 165 | val := procsets2.Get(key) 166 | procsets.Set(key, val) 167 | } 168 | } 169 | } 170 | 171 | if r.Properties == nil { 172 | r.Properties = r2.Properties 173 | } else { 174 | props := getDict(r.Properties) 175 | if r2.Properties != nil { 176 | props2 := getDict(r2.Properties) 177 | for _, key := range props2.Keys() { 178 | val := props2.Get(key) 179 | props.Set(key, val) 180 | } 181 | } 182 | } 183 | 184 | return r, nil 185 | } 186 | 187 | // mergeForms merges two interactive forms. 188 | func mergeForms(form, form2 *unipdf.PdfAcroForm, docNum int) (*unipdf.PdfAcroForm, error) { 189 | if form.NeedAppearances == nil { 190 | form.NeedAppearances = form2.NeedAppearances 191 | } 192 | 193 | if form.SigFlags == nil { 194 | form.SigFlags = form2.SigFlags 195 | } 196 | 197 | if form.CO == nil { 198 | form.CO = form2.CO 199 | } 200 | 201 | if form.DR == nil { 202 | form.DR = form2.DR 203 | } else if form2.DR != nil { 204 | dr, err := mergeResources(form.DR, form2.DR) 205 | if err != nil { 206 | return nil, err 207 | } 208 | form.DR = dr 209 | } 210 | 211 | if form.DA == nil { 212 | form.DA = form2.DA 213 | } 214 | 215 | if form.Q == nil { 216 | form.Q = form2.Q 217 | } 218 | 219 | if form.XFA == nil { 220 | form.XFA = form2.XFA 221 | } else { 222 | if form2.XFA != nil { 223 | unicommon.Log.Debug("TODO: Handle XFA merging - Currently just using first one that is encountered") 224 | } 225 | } 226 | 227 | // Fields. 228 | if form.Fields == nil { 229 | form.Fields = form2.Fields 230 | } else { 231 | field := unipdf.NewPdfField() 232 | field.T = unicore.MakeString(fmt.Sprintf("doc%d", docNum)) 233 | field.Kids = []*unipdf.PdfField{} 234 | if form2.Fields != nil { 235 | for _, subfield := range *form2.Fields { 236 | // Update parent. 237 | subfield.Parent = field 238 | field.Kids = append(field.Kids, subfield) 239 | } 240 | 241 | } 242 | *form.Fields = append(*form.Fields, field) 243 | } 244 | 245 | return form, nil 246 | } 247 | 248 | func getDict(obj unicore.PdfObject) *unicore.PdfObjectDictionary { 249 | if obj == nil { 250 | return nil 251 | } 252 | 253 | obj = unicore.TraceToDirectObject(obj) 254 | dict, ok := obj.(*unicore.PdfObjectDictionary) 255 | if !ok { 256 | unicommon.Log.Debug("Error type check error (got %T)", obj) 257 | return nil 258 | } 259 | 260 | return dict 261 | } 262 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/adrg/strutil v0.2.2/go.mod h1:EF2fjOFlGTepljfI+FzgTG13oXthR7ZAil9/aginnNQ= 2 | github.com/adrg/strutil v0.3.1 h1:OLvSS7CSJO8lBii4YmBt8jiK9QOtB9CzCzwl4Ic/Fz4= 3 | github.com/adrg/strutil v0.3.1/go.mod h1:8h90y18QLrs11IBffcGX3NW/GFBXCMcNg4M7H6MspPA= 4 | github.com/adrg/sysfont v0.1.2 h1:MSU3KREM4RhsQ+7QgH7wPEPTgAgBIz0Hw6Nd4u7QgjE= 5 | github.com/adrg/sysfont v0.1.2/go.mod h1:6d3l7/BSjX9VaeXWJt9fcrftFaD/t7l11xgSywCPZGk= 6 | github.com/adrg/xdg v0.3.0/go.mod h1:7I2hH/IT30IsupOpKZ5ue7/qNi3CoKzD6tL3HwpaRMQ= 7 | github.com/adrg/xdg v0.5.3 h1:xRnxJXne7+oWDatRhR1JLnvuccuIeCoBu2rtuLqQB78= 8 | github.com/adrg/xdg v0.5.3/go.mod h1:nlTsY+NNiCBGCK2tpm09vRqfVzrc2fLmXGpBLF0zlTQ= 9 | github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= 10 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 11 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 12 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 13 | github.com/gorilla/i18n v0.0.0-20150820051429-8b358169da46 h1:N+R2A3fGIr5GucoRMu2xpqyQWQlfY31orbofBCdjMz8= 14 | github.com/gorilla/i18n v0.0.0-20150820051429-8b358169da46/go.mod h1:2Yoiy15Cf7Q3NFwfaJquh7Mk1uGI09ytcD7CUhn8j7s= 15 | github.com/h2non/filetype v1.1.3 h1:FKkx9QbD7HR/zjK1Ia5XiBsq9zdLi5Kf3zGyFTAFkGg= 16 | github.com/h2non/filetype v1.1.3/go.mod h1:319b3zT68BvV+WRj7cwy856M2ehB3HqNOt6sy1HndBY= 17 | github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= 18 | github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= 19 | github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= 20 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 21 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 22 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 23 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 24 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 25 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 26 | github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= 27 | github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= 28 | github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= 29 | github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= 30 | github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= 31 | github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 32 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 33 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 34 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 35 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 36 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 37 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 38 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 39 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 40 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 41 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 42 | github.com/unidoc/freetype v0.2.3 h1:uPqW+AY0vXN6K2tvtg8dMAtHTEvvHTN52b72XpZU+3I= 43 | github.com/unidoc/freetype v0.2.3/go.mod h1:mJ/Q7JnqEoWtajJVrV6S1InbRv0K/fJerPB5SQs32KI= 44 | github.com/unidoc/garabic v0.0.0-20220702200334-8c7cb25baa11 h1:kExUKrbi429KdVVuAc85z4P+W/Rk4bjGWB5KzZLl/l8= 45 | github.com/unidoc/garabic v0.0.0-20220702200334-8c7cb25baa11/go.mod h1:SX63w9Ww4+Z7E96B01OuG59SleQUb+m+dmapZ8o1Jac= 46 | github.com/unidoc/pkcs7 v0.0.0-20200411230602-d883fd70d1df/go.mod h1:UEzOZUEpJfDpywVJMUT8QiugqEZC29pDq7kdIZhWCr8= 47 | github.com/unidoc/pkcs7 v0.3.0 h1:+RCopNCR8UoZtlf4bu4Y88O3j1MbvrLcOuQj/tbPLoU= 48 | github.com/unidoc/pkcs7 v0.3.0/go.mod h1:UEzOZUEpJfDpywVJMUT8QiugqEZC29pDq7kdIZhWCr8= 49 | github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a h1:RLtvUhe4DsUDl66m7MJ8OqBjq8jpWBXPK6/RKtqeTkc= 50 | github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a/go.mod h1:j+qMWZVpZFTvDey3zxUkSgPJZEX33tDgU/QIA0IzCUw= 51 | github.com/unidoc/unichart v0.5.1 h1:qnYavwBV5sg9NUF59KbMOqJdh2kA454nVxdDTPPtSz8= 52 | github.com/unidoc/unichart v0.5.1/go.mod h1:/8yJsL49OqBOyG53JFVZOwwDXDquo/ZRMkfz9fNsVgc= 53 | github.com/unidoc/unipdf/v4 v4.3.0 h1:eA4zjRHTULtV5thy3MausfFYDP1i59qGdsfxe709oUY= 54 | github.com/unidoc/unipdf/v4 v4.3.0/go.mod h1:oR0EX7TmS7KaAuzFQPA9t9HjbU4f2NbWMvzXNqtXo70= 55 | github.com/unidoc/unitype v0.5.1 h1:UwTX15K6bktwKocWVvLoijIeu4JAVEAIeFqMOjvxqQs= 56 | github.com/unidoc/unitype v0.5.1/go.mod h1:3dxbRL+f1otNqFQIRHho8fxdg3CcUKrqS8w1SXTsqcI= 57 | golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4= 58 | golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc= 59 | golang.org/x/image v0.0.0-20211028202545-6944b10bf410/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= 60 | golang.org/x/image v0.30.0 h1:jD5RhkmVAnjqaCUXfbGBrn3lpxbknfN9w2UhHHU+5B4= 61 | golang.org/x/image v0.30.0/go.mod h1:SAEUTxCCMWSrJcCy/4HwavEsfZZJlYxeHLc6tTiAe/c= 62 | golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= 63 | golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= 64 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 65 | golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= 66 | golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= 67 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 68 | golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= 69 | golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= 70 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 71 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= 72 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= 73 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 74 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= 75 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 76 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 77 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 78 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 79 | -------------------------------------------------------------------------------- /pkg/pdf/replace.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | "strings" 10 | 11 | "github.com/unidoc/unipdf/v4/common" 12 | "github.com/unidoc/unipdf/v4/contentstream" 13 | "github.com/unidoc/unipdf/v4/core" 14 | "github.com/unidoc/unipdf/v4/model" 15 | unipdf "github.com/unidoc/unipdf/v4/model" 16 | ) 17 | 18 | type textChunk struct { 19 | font *model.PdfFont 20 | strObj *core.PdfObjectString 21 | val string 22 | idx int 23 | } 24 | 25 | func (tc *textChunk) encode() { 26 | var encoded string 27 | if font := tc.font; font != nil { 28 | encodedBytes, numMisses := font.StringToCharcodeBytes(tc.val) 29 | if numMisses != 0 { 30 | common.Log.Debug("WARN: some runes could not be encoded.\n\t%s -> %v") 31 | } 32 | encoded = string(encodedBytes) 33 | } 34 | 35 | *tc.strObj = *core.MakeString(encoded) 36 | } 37 | 38 | type textChunks struct { 39 | text string 40 | chunks []*textChunk 41 | } 42 | 43 | func (tc *textChunks) replace(search, replacement string) { 44 | text := tc.text 45 | chunks := tc.chunks 46 | 47 | // Steps: 48 | // 1. Search for the first index of the search term in the text. 49 | // 2. Use the found index to match the text chunk which contains 50 | // (or partly contains) the search term. 51 | // 3. Replace the search term in the found text chunk. The search term 52 | // will not always start at the beginning of the text chunk. Also, 53 | // the search term could be split in multiple text chunks. If that's 54 | // the case, replace the portion of the search term in the found 55 | // chunk and continue removing characters from the following chunks 56 | // until the search term has been completely erased. 57 | // 4. Offset the text chunks slice to the last processed text chunk from 58 | // the previous step, if the text chunk was not completely erased, or 59 | // to the next one otherwise. This is necessary so that the visited 60 | // text chunks are skipped when searching for the next occurrence of the 61 | // search term. 62 | // 5. Discard the part of the text up to (and including) the index found 63 | // in step one. 64 | // 6. Move to step 1 in order to search for the search term in the remaining 65 | // text. 66 | var chunkOffset int 67 | matchIdx := strings.Index(text, search) 68 | for currMatchIdx := matchIdx; matchIdx != -1; { 69 | for i, chunk := range chunks[chunkOffset:] { 70 | idx, lenChunk := chunk.idx, len(chunk.val) 71 | if currMatchIdx < idx || currMatchIdx > idx+lenChunk-1 { 72 | continue 73 | } 74 | chunkOffset += i + 1 75 | 76 | start := currMatchIdx - idx 77 | remaining := len(search) - (lenChunk - start) 78 | 79 | replaceVal := chunk.val[:start] + replacement 80 | if remaining < 0 { 81 | replaceVal += chunk.val[lenChunk+remaining:] 82 | chunkOffset-- 83 | } 84 | 85 | chunk.val = replaceVal 86 | chunk.encode() 87 | 88 | for j := chunkOffset; remaining > 0; j++ { 89 | c := chunks[j] 90 | l := len(c.val) 91 | 92 | if l > remaining { 93 | c.val = c.val[remaining:] 94 | } else { 95 | c.val = "" 96 | chunkOffset++ 97 | } 98 | 99 | c.encode() 100 | remaining -= l 101 | } 102 | 103 | break 104 | } 105 | 106 | text = text[matchIdx+1:] 107 | matchIdx = strings.Index(text, search) 108 | currMatchIdx += matchIdx + 1 109 | } 110 | 111 | tc.text = strings.Replace(tc.text, search, replacement, -1) 112 | } 113 | 114 | // Replace searches the provided text in the PDF file specified by the inputPath 115 | // parameter and replaces it by the newText. A password can be passed in for encrypted input files. 116 | // The result is saved to outputPath. 117 | func Replace(inputPath, outputPath, text, replaceText, password string) error { 118 | // Read input file. 119 | r, pages, _, _, err := readPDF(inputPath, password) 120 | if err != nil { 121 | return err 122 | } 123 | 124 | w := unipdf.NewPdfWriter() 125 | 126 | // Search specified text. 127 | for i := 0; i < pages; i++ { 128 | // Get page. 129 | numPage := i + 1 130 | 131 | page, err := r.GetPage(numPage) 132 | if err != nil { 133 | return err 134 | } 135 | 136 | err = searchReplacePageText(page, text, replaceText) 137 | if err != nil { 138 | return err 139 | } 140 | 141 | err = w.AddPage(page) 142 | if err != nil { 143 | return err 144 | } 145 | } 146 | 147 | // Write output file. 148 | safe := inputPath == outputPath 149 | return writePDF(outputPath, &w, safe) 150 | } 151 | 152 | func searchReplacePageText(page *model.PdfPage, searchText, replaceText string) error { 153 | contents, err := page.GetAllContentStreams() 154 | if err != nil { 155 | return err 156 | } 157 | 158 | ops, err := contentstream.NewContentStreamParser(contents).Parse() 159 | if err != nil { 160 | return err 161 | } 162 | 163 | // Generate text chunks. 164 | var currFont *model.PdfFont 165 | tc := textChunks{} 166 | 167 | textProcFunc := func(objptr *core.PdfObject) { 168 | strObj, ok := core.GetString(*objptr) 169 | if !ok { 170 | common.Log.Debug("Invalid parameter, skipping") 171 | return 172 | } 173 | 174 | str := strObj.String() 175 | if currFont != nil { 176 | decoded, _, numMisses := currFont.CharcodeBytesToUnicode(strObj.Bytes()) 177 | if numMisses != 0 { 178 | common.Log.Debug("WARN: some charcodes could not be decoded.\n\t%v -> %s", strObj.Bytes(), decoded) 179 | } 180 | str = decoded 181 | } 182 | 183 | tc.chunks = append(tc.chunks, &textChunk{ 184 | font: currFont, 185 | strObj: strObj, 186 | val: str, 187 | idx: len(tc.text), 188 | }) 189 | tc.text += str 190 | } 191 | 192 | processor := contentstream.NewContentStreamProcessor(*ops) 193 | processor.AddHandler(contentstream.HandlerConditionEnumAllOperands, "", 194 | func(op *contentstream.ContentStreamOperation, _ contentstream.GraphicsState, resources *model.PdfPageResources) error { 195 | switch op.Operand { 196 | case `Tj`, `'`: 197 | if len(op.Params) != 1 { 198 | common.Log.Debug("Invalid: Tj/' with invalid set of parameters - skip") 199 | return nil 200 | } 201 | textProcFunc(&op.Params[0]) 202 | case `''`: 203 | if len(op.Params) != 3 { 204 | common.Log.Debug("Invalid: '' with invalid set of parameters - skip") 205 | return nil 206 | } 207 | textProcFunc(&op.Params[3]) 208 | case `TJ`: 209 | if len(op.Params) != 1 { 210 | common.Log.Debug("Invalid: TJ with invalid set of parameters - skip") 211 | return nil 212 | } 213 | arr, _ := core.GetArray(op.Params[0]) 214 | for i := range arr.Elements() { 215 | obj := arr.Get(i) 216 | textProcFunc(&obj) 217 | arr.Set(i, obj) 218 | } 219 | case "Tf": 220 | if len(op.Params) != 2 { 221 | common.Log.Debug("Invalid: Tf with invalid set of parameters - skip") 222 | return nil 223 | } 224 | 225 | fname, ok := core.GetName(op.Params[0]) 226 | if !ok || fname == nil { 227 | common.Log.Debug("ERROR: could not get font name") 228 | return nil 229 | } 230 | 231 | fObj, has := resources.GetFontByName(*fname) 232 | if !has { 233 | common.Log.Debug("ERROR: font %s not found", fname.String()) 234 | return nil 235 | } 236 | 237 | pdfFont, err := model.NewPdfFontFromPdfObject(fObj) 238 | if err != nil { 239 | common.Log.Debug("ERROR: loading font") 240 | return nil 241 | } 242 | currFont = pdfFont 243 | } 244 | 245 | return nil 246 | }) 247 | 248 | if err = processor.Process(page.Resources); err != nil { 249 | return err 250 | } 251 | 252 | tc.replace(searchText, replaceText) 253 | return page.SetContentStreams([]string{ops.String()}, core.NewFlateEncoder()) 254 | } 255 | -------------------------------------------------------------------------------- /internal/cli/utils.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package cli 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | "os" 12 | "path/filepath" 13 | "sort" 14 | "strconv" 15 | "strings" 16 | "unicode" 17 | 18 | "github.com/spf13/cobra" 19 | ) 20 | 21 | type fileMatcher func(string) bool 22 | 23 | func pdfMatcher(inputPath string) bool { 24 | return strings.ToLower(filepath.Ext(inputPath)) == ".pdf" 25 | } 26 | 27 | // parsePageRange parses a string of page ranges separated by commas and 28 | // returns a slice of integer page numbers. 29 | // Example page range string: 1-3,4,6-7 30 | // The returned slice of pages contains pages 1,2,3 (1-3), 4 and 6,7 (6-7), 31 | // while page number 5 is skipped. 32 | func parsePageRange(pageRange string) ([]int, error) { 33 | var pages []int 34 | 35 | rngs := strings.Split(removeSpaces(pageRange), ",") 36 | for _, rng := range rngs { 37 | if rng == "" { 38 | continue 39 | } 40 | 41 | indices := strings.Split(rng, "-") 42 | lenIndices := len(indices) 43 | if lenIndices > 2 { 44 | return nil, errors.New("invalid page range") 45 | } 46 | if lenIndices == 2 { 47 | start, err := strconv.Atoi(indices[0]) 48 | if err != nil { 49 | return nil, errors.New("invalid page number") 50 | } 51 | if start < 1 { 52 | return nil, errors.New("page range start must be greater than 0") 53 | } 54 | 55 | end, err := strconv.Atoi(indices[1]) 56 | if err != nil { 57 | return nil, errors.New("invalid page number") 58 | } 59 | if end < 1 { 60 | return nil, errors.New("page range end must be greater than 0") 61 | } 62 | 63 | if start > end { 64 | return nil, errors.New("page range end must be greater than the start") 65 | } 66 | 67 | for page := start; page <= end; page++ { 68 | pages = append(pages, page) 69 | } 70 | 71 | continue 72 | } 73 | 74 | page, err := strconv.Atoi(indices[0]) 75 | if err != nil { 76 | return nil, errors.New("invalid page number") 77 | } 78 | 79 | pages = append(pages, page) 80 | } 81 | 82 | pages = uniqueIntSlice(pages) 83 | sort.Ints(pages) 84 | 85 | return pages, nil 86 | } 87 | 88 | func parsePageRangeUnsorted(pageRange string) ([]int, error) { 89 | var pages []int 90 | 91 | rngs := strings.Split(removeSpaces(pageRange), ",") 92 | for _, rng := range rngs { 93 | if rng == "" { 94 | continue 95 | } 96 | 97 | indices := strings.Split(rng, "-") 98 | lenIndices := len(indices) 99 | if lenIndices > 2 { 100 | return nil, errors.New("invalid page range") 101 | } 102 | if lenIndices == 2 { 103 | start, err := strconv.Atoi(indices[0]) 104 | if err != nil { 105 | return nil, errors.New("invalid start page number") 106 | } 107 | if start < 1 { 108 | return nil, errors.New("page range start must be greater than 0") 109 | } 110 | 111 | end, err := strconv.Atoi(indices[1]) 112 | if err != nil { 113 | return nil, errors.New("invalid end page number") 114 | } 115 | if end < 1 { 116 | return nil, errors.New("page range end must be greater than 0") 117 | } 118 | 119 | if start > end { 120 | return nil, errors.New("page range end must be greater than the start") 121 | } 122 | 123 | for page := start; page <= end; page++ { 124 | pages = append(pages, page) 125 | } 126 | 127 | continue 128 | } 129 | 130 | page, err := strconv.Atoi(indices[0]) 131 | if err != nil { 132 | return nil, errors.New("invalid page number") 133 | } 134 | 135 | pages = append(pages, page) 136 | } 137 | 138 | pages = uniqueIntSlice(pages) 139 | 140 | return pages, nil 141 | } 142 | 143 | func parseInputPaths(inputPaths []string, recursive bool, matcher fileMatcher) ([]string, error) { 144 | var err error 145 | var files []string 146 | acc := map[string]bool{} 147 | 148 | for _, inputPath := range inputPaths { 149 | // Convert relative paths to absolute ones. 150 | if !filepath.IsAbs(inputPath) { 151 | inputPath, err = filepath.Abs(inputPath) 152 | if err != nil { 153 | return nil, err 154 | } 155 | } 156 | 157 | // Add visited file to the accumulator. 158 | if _, ok := acc[inputPath]; ok { 159 | continue 160 | } 161 | acc[inputPath] = true 162 | 163 | // Get file info. 164 | inputFile, err := os.Lstat(inputPath) 165 | if err != nil { 166 | return nil, err 167 | } 168 | 169 | // Check file type. 170 | switch mode := inputFile.Mode(); { 171 | case mode.IsRegular(): 172 | if matcher == nil || matcher(inputPath) { 173 | files = append(files, inputPath) 174 | } 175 | case mode.IsDir(): 176 | dirFiles, err := parseInputDir(inputPath, recursive, acc, matcher) 177 | if err != nil { 178 | return nil, err 179 | } 180 | files = append(files, dirFiles...) 181 | } 182 | } 183 | 184 | return files, nil 185 | } 186 | 187 | func parseInputDir(dir string, recursive bool, acc map[string]bool, matcher fileMatcher) ([]string, error) { 188 | inputPaths, err := dirFiles(dir) 189 | if err != nil { 190 | return nil, err 191 | } 192 | 193 | if acc == nil { 194 | acc = map[string]bool{} 195 | } 196 | 197 | var files []string 198 | for _, inputPath := range inputPaths { 199 | // Convert relative paths to absolute ones. 200 | inputPath = filepath.Join(dir, inputPath) 201 | if !filepath.IsAbs(inputPath) { 202 | inputPath, err = filepath.Abs(inputPath) 203 | if err != nil { 204 | return nil, err 205 | } 206 | } 207 | 208 | // Add visited file to the accumulator. 209 | if _, ok := acc[inputPath]; ok { 210 | continue 211 | } 212 | acc[inputPath] = true 213 | 214 | // Get file info. 215 | inputFile, err := os.Lstat(inputPath) 216 | if err != nil { 217 | return nil, err 218 | } 219 | 220 | // Check file type. 221 | switch mode := inputFile.Mode(); { 222 | case mode.IsRegular(): 223 | if matcher == nil || matcher(inputPath) { 224 | files = append(files, inputPath) 225 | } 226 | case mode.IsDir(): 227 | if !recursive { 228 | continue 229 | } 230 | 231 | subdirFiles, err := parseInputDir(inputPath, recursive, acc, matcher) 232 | if err != nil { 233 | return nil, err 234 | } 235 | files = append(files, subdirFiles...) 236 | } 237 | } 238 | 239 | return files, nil 240 | } 241 | 242 | func dirFiles(dir string) ([]string, error) { 243 | f, err := os.Open(dir) 244 | if err != nil { 245 | return nil, err 246 | } 247 | defer f.Close() 248 | 249 | return f.Readdirnames(-1) 250 | } 251 | 252 | func generateOutputPath(inputPath, outputDir, nameSuffix string, overwrite bool) string { 253 | if overwrite { 254 | return inputPath 255 | } 256 | 257 | dir, name := filepath.Split(inputPath) 258 | if outputDir != "" { 259 | return filepath.Join(outputDir, name) 260 | } 261 | 262 | name = strings.TrimSuffix(name, filepath.Ext(name)) 263 | return filepath.Join(dir, fmt.Sprintf("%s_%s.pdf", name, nameSuffix)) 264 | } 265 | 266 | func clampInt(val, minimum, maximum int) int { 267 | if val < minimum { 268 | return minimum 269 | } 270 | if val > maximum { 271 | return maximum 272 | } 273 | 274 | return val 275 | } 276 | 277 | func removeSpaces(s string) string { 278 | return strings.TrimFunc(s, func(r rune) bool { 279 | return unicode.IsSpace(r) 280 | }) 281 | } 282 | 283 | func uniqueIntSlice(items []int) []int { 284 | uniq := make([]int, len(items)) 285 | 286 | index := 0 287 | catalog := map[int]struct{}{} 288 | for _, item := range items { 289 | if _, ok := catalog[item]; ok { 290 | continue 291 | } 292 | 293 | catalog[item] = struct{}{} 294 | uniq[index] = item 295 | index++ 296 | } 297 | 298 | return uniq[0:index] 299 | } 300 | 301 | func printErr(format string, a ...interface{}) { 302 | fmt.Printf(format, a...) 303 | os.Exit(1) 304 | } 305 | 306 | func printUsageErr(cmd *cobra.Command, format string, a ...interface{}) { 307 | fmt.Printf("Error: "+format+"\n", a...) 308 | cmd.Help() 309 | os.Exit(1) 310 | } 311 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # unipdf-cli 2 | 3 | [![Build Status](https://travis-ci.org/unidoc/unipdf-cli.svg?branch=master)](https://travis-ci.org/unidoc/unipdf-cli) 4 | [![GoDoc](https://godoc.org/github.com/unidoc/unipdf-cli?status.svg)](https://godoc.org/github.com/unidoc/unipdf-cli) 5 | [![Go Report Card](https://goreportcard.com/badge/github.com/unidoc/unipdf-cli)](https://goreportcard.com/report/github.com/unidoc/unipdf-cli) 6 | 7 | unipdf-cli is a CLI tool which makes working with PDF files very easy. It supports 8 | the most common PDF operations. The application is written in Golang and is 9 | powered by the [UniPDF](https://github.com/unidoc/unipdf) PDF library. 10 | 11 | ## Features 12 | 13 | - [Merge PDF files](#merge) 14 | - [Split PDF files](#split) 15 | - [Explode PDF files](#explode) 16 | - [Encrypt PDF files](#encrypt) 17 | - [Decrypt PDF files](#decrypt) 18 | - [Change user/owner password](#passwd) 19 | - [Optimize PDF files](#optimize) 20 | - [Rotate PDF pages](#rotate) 21 | - [Add watermark images to PDF files](#watermark) 22 | - [Convert PDF files to grayscale](#grayscale) 23 | - [Validate and print PDF file information](#info) 24 | - [Extract text from PDF files](#extract-text) 25 | - [Extract images from PDF files](#extract-images) 26 | - [Search text in PDF files](#search) 27 | - [Replace text in PDF files](#replace) 28 | - [Export PDF form fields as JSON](#form-export) 29 | - [Fill PDF form fields from JSON file](#form-fill) 30 | - [Fill PDF form fields from FDF file](#fdf-merge) 31 | - [Flatten PDF form fields](#form-flatten) 32 | - [Render PDF pages to images](#render) 33 | 34 | ## Short demo 35 | 36 | [![asciicast](https://i.imgur.com/nQZq6T7.png)](https://asciinema.org/a/220314) 37 | 38 | ## Installation 39 | 40 | Minimum required Go version: 1.23. We officially support the 3 latest minor versions of Go. 41 | 42 | ``` 43 | git clone git@github.com:unidoc/unipdf-cli.git 44 | cd unipdf-cli/cmd/unipdf 45 | go build 46 | ``` 47 | 48 | ## Showcase 49 | 50 | #### Grayscale conversion 51 | 52 | ![encrypt example](https://i.imgur.com/9QgXWUc.png) 53 | 54 | #### Add watermark 55 | 56 | ![watermark example](https://i.imgur.com/GIRsTnT.png) 57 | 58 | ## Usage 59 | 60 | #### Merge 61 | 62 | Merge multiple PDF files into a single output file. 63 | 64 | ``` 65 | unipdf merge OUTPUT_FILE INPUT_FILE... 66 | 67 | Examples: 68 | unipdf merge output_file.pdf input_file1.pdf input_file2.pdf 69 | ``` 70 | 71 | #### Split 72 | 73 | Extract one or more page ranges from PDF file and save the result as a 74 | single output file. 75 | 76 | ``` 77 | unipdf split [FLAG]... INPUT_FILE OUTPUT_FILE [PAGES] 78 | 79 | Flags: 80 | -p, --password string PDF file password 81 | 82 | Examples: 83 | unipdf split input_file.pdf output_file.pdf 1-2 84 | unipdf split -p pass input_file.pd output_file.pdf 1-2,4 85 | 86 | PAGES argument example: 1-3,4,6-7 87 | Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be present in the output file, 88 | while page number 5 is skipped. 89 | ``` 90 | 91 | #### Explode 92 | 93 | Splits the input file into separate single page PDF files and saves the result 94 | as a ZIP archive. 95 | 96 | ``` 97 | Usage: 98 | unipdf explode [FLAG]... INPUT_FILE 99 | 100 | Flags: 101 | -o, --output-file string Output file 102 | -P, --pages string Pages to extract from the input file 103 | -p, --password string Input file password 104 | 105 | Examples: 106 | unipdf explode input_file.pdf 107 | unipdf explode -o pages.zip input_file.pdf 108 | unipdf explode -o pages.zip -P 1-3 input_file.pdf 109 | unipdf explode -o pages.zip -P 1-3 -p pass input_file.pdf 110 | 111 | Pages flag example: 1-3,4,6-7 112 | Pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be extracted, while page 113 | number 5 is skipped. 114 | ``` 115 | 116 | #### Encrypt 117 | 118 | Add password protection to PDF files. Owner and user passwords can be 119 | specified, along with a set of user permissions. The encryption algorithm 120 | used for protecting the file is configurable. 121 | 122 | ``` 123 | unipdf encrypt [FLAG]... INPUT_FILE OWNER_PASSWORD [USER_PASSWORD] 124 | 125 | Flags: 126 | -m, --mode string Algorithm to use for encrypting the file (default "rc4") 127 | -o, --output-file string Output file 128 | -P, --perms string User permissions (default "all") 129 | 130 | Examples: 131 | unipdf encrypt input_file.pdf owner_pass 132 | unipdf encrypt input_file.pdf owner_pass user_pass 133 | unipdf encrypt -o output_file.pdf -m aes256 input_file.pdf owner_pass user_pass 134 | unipdf encrypt -o output_file.pdf -P none -m aes256 input_file.pdf owner_pass user_pass 135 | unipdf encrypt -o output_file.pdf -P modify,annotate -m aes256 input_file.pdf owner_pass user 136 | 137 | Supported encryption algorithms: 138 | - rc4 (default) 139 | - aes128 140 | - aes256 141 | 142 | Supported user permissions: 143 | - all (default) 144 | - none 145 | - print-low-res 146 | - print-high-res 147 | - modify 148 | - extract 149 | - extract-graphics 150 | - annotate 151 | - fill-forms 152 | - rotate 153 | ``` 154 | 155 | #### Decrypt 156 | 157 | Remove password protection from PDF files. 158 | 159 | ``` 160 | unipdf decrypt [FLAG]... INPUT_FILE 161 | 162 | Flags: 163 | -o, --output-file string Output file 164 | -p, --password string PDF file password 165 | 166 | Examples: 167 | unipdf decrypt -p pass input_file.pdf 168 | unipdf decrypt -p pass -o output_file.pdf input_file.pdf 169 | ``` 170 | 171 | #### Passwd 172 | 173 | Change protected PDF user/owner password. 174 | 175 | ``` 176 | unipdf passwd [FLAG]... INPUT_FILE NEW_OWNER_PASSWORD [NEW_USER_PASSWORD] 177 | 178 | Flags: 179 | -o, --output-file string Output file 180 | -p, --password string PDF file password 181 | 182 | Examples: 183 | unipdf passwd -p pass input_file.pdf new_owner_pass 184 | unipdf passwd -p pass -o output_file.pdf input_file.pdf new_owner_pass 185 | unipdf passwd -p pass -o output_file.pdf input_file.pdf new_owner_pass new_user_pass 186 | ``` 187 | 188 | #### Optimize 189 | 190 | Optimize PDF files by optimizing structure, compression and image quality. 191 | 192 | The command can take multiple files and directories as input parameters. 193 | By default, each PDF file is saved in the same location as the original file, 194 | appending the "_optimized" suffix to the file name. Use the --overwrite flag 195 | to overwrite the original files. 196 | In addition, the optimized output files can be saved to a different directory 197 | by using the --target-dir flag. 198 | The command can search for PDF files inside the subdirectories of the 199 | specified input directories by using the --recursive flag. 200 | 201 | The quality of the images in the output files can be configured through 202 | the --image-quality flag (default 90). 203 | The resolution of the output images can be controlled using the --image-ppi flag. 204 | Common pixels per inch values are 100 (screen), 150-300 (print), 600 (art). If 205 | not specified, the PPI of the output images is 100. 206 | 207 | ``` 208 | unipdf optimize [FLAG]... INPUT_FILES... 209 | 210 | Flags: 211 | -P, --image-ppi float output images pixels per inch (default 100) 212 | -q, --image-quality int output JPEG image quality (default 90) 213 | -O, --overwrite overwrite input files 214 | -p, --password string file password 215 | -r, --recursive search PDF files in subdirectories 216 | -t, --target-dir string output directory 217 | 218 | Examples: 219 | unipdf optimize file_1.pdf file_n.pdf 220 | unipdf optimize -O file_1.pdf file_n.pdf 221 | unipdf optimize -O -r file_1.pdf file_n.pdf dir_1 dir_n 222 | unipdf optimize -t out_dir file_1.pdf file_n.pdf dir_1 dir_n 223 | unipdf optimize -t out_dir -r file_1.pdf file_n.pdf dir_1 dir_n 224 | unipdf optimize -t out_dir -r -q 75 file_1.pdf file_n.pdf dir_1 dir_n 225 | unipdf optimize -t out_dir -r -q 75 -P 100 file_1.pdf file_n.pdf dir_1 dir_n 226 | unipdf optimize -t out_dir -r -q 75 -P 100 -p pass file_1.pdf file_n.pdf dir_1 dir_n 227 | ``` 228 | 229 | #### Rotate 230 | 231 | Rotate PDF file pages by a specified angle. The angle argument is specified in 232 | degrees and it must be a multiple of 90. 233 | 234 | ``` 235 | unipdf rotate [FLAG]... INPUT_FILE ANGLE 236 | 237 | Flags: 238 | -o, --output-file string Output file 239 | -P, --pages string Pages to rotate 240 | -p, --password string PDF file password 241 | 242 | Examples: 243 | unipdf rotate input_file.pdf 90 244 | unipdf rotate -- input_file.pdf -270 245 | unipdf rotate -o output_file.pdf input_file.pdf 90 246 | unipdf rotate -o output_file.pdf -P 1-3 input_file.pdf 90 247 | unipdf rotate -o output_file.pdf -P 1-3 -p pass input_file.pdf 90 248 | 249 | Pages flag example: 1-3,4,6-7 250 | Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be rotated, while 251 | page number 5 is skipped. 252 | ``` 253 | 254 | #### Watermark 255 | 256 | Add watermark images to PDF files. 257 | 258 | ``` 259 | unipdf watermark [FLAG]... INPUT_FILE WATERMARK_IMAGE 260 | 261 | Flags: 262 | -o, --output-file string Output file 263 | -P, --pages string Pages on which to add watermark 264 | -p, --password string PDF file password 265 | 266 | Examples: 267 | unipdf watermark input_file.pdf watermark.png 268 | unipdf watermark -o output file.png input_file.pdf watermark.png 269 | unipdf watermark -o output file.png -P 1-3 input_file.pdf watermark.png 270 | unipdf watermark -o output file.png -P 1-3 -p pass input_file.pdf watermark.png 271 | 272 | Pages flag example: 1-3,4,6-7 273 | Watermark will only be applied to pages 1,2,3 (1-3), 4 and 6,7 (6-7), while 274 | page number 5 is skipped. 275 | ``` 276 | 277 | #### Grayscale 278 | 279 | Convert PDF files to grayscale. 280 | 281 | ``` 282 | unipdf grayscale [FLAG]... INPUT_FILE 283 | 284 | Flags: 285 | -o, --output-file string Output file 286 | -P, --pages string Pages to convert to grayscale 287 | -p, --password string PDF file password 288 | 289 | Examples: 290 | unipdf grayscale input_file.pdf 291 | unipdf grayscale -o output_file input_file.pdf 292 | unipdf grayscale -o output_file -P 1-3 input_file.pdf 293 | unipdf grayscale -o output_file -P 1-3 -p pass input_file.pdf 294 | 295 | Pages flag example: 1-3,4,6-7 296 | Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be converted to grayscale, while 297 | page number 5 is skipped. 298 | ``` 299 | 300 | #### Info 301 | 302 | Outputs file information. Also does some basic validation. 303 | 304 | ``` 305 | unipdf info [FLAG]... INPUT_FILE 306 | 307 | Flags: 308 | -p, --password string PDF file password 309 | 310 | Examples: 311 | unipdf info input_file.pdf 312 | unipdf info -p pass input_file.pdf 313 | ``` 314 | 315 | #### Extract text 316 | 317 | Extracts PDF text. The extracted text is always printed to STDOUT. 318 | 319 | ``` 320 | unipdf extract text [FLAG]... INPUT_FILE 321 | 322 | Flags: 323 | -P, --pages string Pages to extract text from 324 | -p, --user-password string Input file password 325 | 326 | Examples: 327 | unipdf extract text input_file.pdf 328 | unipdf extract text -P 1-3 input_file.pdf 329 | unipdf extract text -P 1-3 -p pass input_file.pdf 330 | 331 | Pages flag example: 1-3,4,6-7 332 | Text will only be extracted from pages 1,2,3 (1-3), 4 and 6,7 (6-7), while 333 | page number 5 is skipped. 334 | ``` 335 | 336 | #### Extract images 337 | 338 | Extracts PDF images. The images are extracted in a ZIP file and saved at the 339 | destination specified by the --output-file parameter. If no output file is 340 | specified, the ZIP archive is saved in the same directory as the input file. 341 | 342 | ``` 343 | unipdf extract [FLAG]... INPUT_FILE 344 | 345 | Flags: 346 | -S, --include-inline-stencil-masks Include inline stencil masks 347 | -o, --output-file string Output file 348 | -P, --pages string Pages to extract images from 349 | -p, --password string Input file password 350 | 351 | Examples: 352 | unipdf extract images input_file.pdf 353 | unipdf extract images -o images.zip input_file.pdf 354 | unipdf extract images -P 1-3 -p pass -o images.zip input_file.pdf 355 | 356 | Pages flag example: 1-3,4,6-7 357 | Images will only be extracted from pages 1,2,3 (1-3), 4 and 6,7 (6-7), while 358 | page number 5 is skipped. 359 | ``` 360 | 361 | #### Search 362 | 363 | Search text in PDF files. 364 | 365 | ``` 366 | unipdf search [FLAG]... INPUT_FILE TEXT 367 | 368 | Flags: 369 | -p, --password string PDF file password 370 | 371 | Examples: 372 | unipdf search input_file.pdf text_to_search 373 | unipdf search -p pass input_file.pdf text_to_search 374 | ``` 375 | 376 | #### Replace 377 | 378 | Replace text in PDF files. 379 | 380 | ``` 381 | unipdf replace [FLAG]... INPUT_FILE TEXT 382 | 383 | Flags: 384 | -o, --output-file string output file 385 | -r, - replace-text string replacement text 386 | -p, --password string PDF file password 387 | 388 | Examples: 389 | unipdf replace input_file.pdf text_to_search 390 | unipdf replace -o output_file.pdf input_file.pdf text_to_search 391 | unipdf replace -o output_file.pdf -r replacement_text input_file.pdf text_to_search 392 | unipdf replace -o output_file.pdf -r replacement_text -p pass input_file.pdf text_to_search 393 | ``` 394 | 395 | 396 | #### Form Export 397 | 398 | Export JSON representation of form fields. 399 | 400 | By default, the resulting JSON content is printed to STDOUT. The output can be 401 | saved to a file by using the --output-file flag. 402 | 403 | ``` 404 | unipdf form export [FLAG]... INPUT_FILE 405 | 406 | Flags: 407 | -o, --output-file string output file 408 | 409 | Examples: 410 | unipdf form export in_file.pdf 411 | unipdf form export in_file.pdf > out_file.json 412 | unipdf form export -o out_file.json in_file.pdf 413 | ``` 414 | 415 | #### Form Fill 416 | 417 | Fill form fields from JSON file. 418 | 419 | The field values specified in the JSON file template are used to fill the form 420 | fields in the input PDF files. In addition, the output file form fields can be 421 | flattened by using the --flatten flag. The flattening process makes the form 422 | fields of the output files read-only by appending the form field annotation 423 | XObject Form data to the page content stream, thus making it part of the page 424 | contents. 425 | 426 | The command can take multiple files and directories as input parameters. 427 | By default, each PDF file is saved in the same location as the original file, 428 | appending the "_filled" suffix to the file name. Use the --overwrite flag 429 | to overwrite the original files. 430 | In addition, the filled output files can be saved to a different directory 431 | by using the --target-dir flag. 432 | The command can search for PDF files inside the subdirectories of the 433 | specified input directories by using the --recursive flag. 434 | 435 | ``` 436 | unipdf form fill [FLAG]... JSON_FILE INPUT_FILES... 437 | 438 | Flags: 439 | -f, --flatten flatten form annotations 440 | -O, --overwrite overwrite input files 441 | -p, --password string input file password 442 | -r, --recursive search PDF files in subdirectories 443 | -t, --target-dir string output directory 444 | 445 | Examples: 446 | unipdf form fill fields.json file_1.pdf file_n.pdf 447 | unipdf form fill -O fields.json file_1.pdf file_n.pdf 448 | unipdf form fill -O -r -f fields.json file_1.pdf file_n.pdf dir_1 dir_n 449 | unipdf form fill -t out_dir fields.json file_1.pdf file_n.pdf dir_1 dir_n 450 | unipdf form fill -t out_dir -r fields.json file_1.pdf file_n.pdf dir_1 dir_n 451 | unipdf form fill -t out_dir -r -p pass fields.json file_1.pdf file_n.pdf dir_1 dir_n 452 | ``` 453 | #### FDF Merge 454 | 455 | Fill form fields from FDF file. 456 | 457 | The field values specified in the FDF file template are used to fill the form 458 | fields in the input PDF files. In addition, the output file form fields can be 459 | flattened by using the --flatten flag. The flattening process makes the form 460 | fields of the output files read-only by appending the form field annotation 461 | XObject Form data to the page content stream, thus making it part of the page 462 | contents. 463 | 464 | The command can take multiple files and directories as input parameters. 465 | By default, each PDF file is saved in the same location as the original file, 466 | appending the "_filled" suffix to the file name. Use the --overwrite flag 467 | to overwrite the original files. 468 | In addition, the filled output files can be saved to a different directory 469 | by using the --target-dir flag. 470 | The command can search for PDF files inside the subdirectories of the 471 | specified input directories by using the --recursive flag. 472 | 473 | ``` 474 | Usage: 475 | unipdf form fdfmerge [FLAG]... FDF_FILE INPUT_FILES... 476 | 477 | Flags: 478 | -f, --flatten flatten form annotations 479 | -O, --overwrite overwrite input files 480 | -p, --password string input file password 481 | -r, --recursive search PDF files in subdirectories 482 | -t, --target-dir string output directory 483 | 484 | Examples: 485 | unipdf form fdfmerge fields.fdf file_1.pdf file_n.pdf 486 | unipdf form fdfmerge -O fields.fdf file_1.pdf file_n.pdf 487 | unipdf form fdfmerge -O -r -f fields.fdf file_1.pdf file_n.pdf dir_1 dir_n 488 | unipdf form fdfmerge -t out_dir fields.fdf file_1.pdf file_n.pdf dir_1 dir_n 489 | unipdf form fdfmerge -t out_dir -r fields.fdf file_1.pdf file_n.pdf dir_1 dir_n 490 | unipdf form fdfmerge -t out_dir -r -p pass fields.fdf file_1.pdf file_n.pdf dir_1 dir_n 491 | ``` 492 | 493 | #### Form Flatten 494 | 495 | Flatten PDF file form annotations. 496 | 497 | The flattening process makes the form fields of the output files read-only by 498 | appending the form field annotation XObject Form data to the page content 499 | stream, thus making it part of the page contents. 500 | 501 | The command can take multiple files and directories as input parameters. 502 | By default, each PDF file is saved in the same location as the original file, 503 | appending the "_flattened" suffix to the file name. Use the --overwrite flag 504 | to overwrite the original files. 505 | In addition, the flattened output files can be saved to a different directory 506 | by using the --target-dir flag. 507 | The command can search for PDF files inside the subdirectories of the 508 | specified input directories by using the --recursive flag. 509 | 510 | ``` 511 | unipdf form flatten [FLAG]... INPUT_FILES... 512 | 513 | Flags: 514 | -O, --overwrite overwrite input files 515 | -p, --password string input file password 516 | -r, --recursive search PDF files in subdirectories 517 | -t, --target-dir string output directory 518 | 519 | Examples: 520 | unipdf form flatten file_1.pdf file_n.pdf 521 | unipdf form flatten -O file_1.pdf file_n.pdf 522 | unipdf form flatten -O -r file_1.pdf file_n.pdf dir_1 dir_n 523 | unipdf form flatten -t out_dir file_1.pdf file_n.pdf dir_1 dir_n 524 | unipdf form flatten -t out_dir -r file_1.pdf file_n.pdf dir_1 dir_n 525 | unipdf form flatten -t out_dir -r -p pass file_1.pdf file_n.pdf dir_1 dir_n 526 | ``` 527 | 528 | #### Render 529 | 530 | Render PDF pages to image targets. 531 | 532 | The rendered image files are saved in a ZIP file, at the location specified 533 | by the --output-file parameter. If no output file is specified, the ZIP file 534 | is saved in the same directory as the input file. 535 | 536 | The format of the rendered image files can be specified using 537 | the --image-format flag (default jpeg). The quality of the image files can be 538 | configured through the --image-quality flag (default 100, only applies to 539 | JPEG images). 540 | ``` 541 | unipdf render [FLAG]... INPUT_FILE 542 | 543 | Flags: 544 | -f, --image-format string format of the output images (default "jpeg") 545 | -q, --image-quality int quality of the output images (default 100) 546 | -o, --output-file string output file 547 | -P, --pages string pages to render from the input file 548 | -p, --password string input file password 549 | 550 | Examples: 551 | unipdf render in_file.pdf 552 | unipdf render -o images.zip in_file.pdf 553 | unipdf render -o images.zip -P 1-3 in_file.pdf 554 | unipdf render -o images.zip -P 1-3 -p pass in_file.pdf 555 | unipdf render -o images.zip -P 1-3 -p pass -f jpeg -q 100 in_file.pdf 556 | 557 | Pages flag example: 1-3,4,6-7 558 | Images will only be rendered for pages 1,2,3 (1-3), 4 and 6,7 (6-7), while 559 | page number 5 is skipped. 560 | 561 | Supported image formats: 562 | - jpeg (default) 563 | - png 564 | ``` 565 | 566 | #### License Info 567 | 568 | Get information about license key that being loaded by unipdf-cli. 569 | 570 | ``` 571 | Example: 572 | unipdf license_info 573 | ``` 574 | 575 | ## License 576 | 577 | unipdf-cli requires license codes to operate, there are two options: 578 | - Metered License API keys: Free ones can be obtained at https://cloud.unidoc.io 579 | - Offline Perpetual codes: Can be purchased at https://unidoc.io/pricing 580 | 581 | ## Offline License 582 | Offline licenses are cryptography based and contain full signed information that is verified based on signatures without making any outbound connections, 583 | hence the name "offline". This kind of license is suitable for users deploying OEM products to their customers or where there are strict restrictions 584 | on outbound connections due to firewalls and/or compliance requirements. 585 | 586 | If you have a license for [UniPDF](https://github.com/unidoc/unipdf), you can 587 | set it through the UNIDOC_LICENSE_FILE and UNIDOC_LICENSE_CUSTOMER environment 588 | variables. 589 | 590 | ``` 591 | export UNIDOC_LICENSE_FILE="PATH_TO_LICENSE_FILE" 592 | export UNIDOC_LICENSE_CUSTOMER="CUSTOMER_NAME" 593 | ``` 594 | 595 | ## Metered License (API keys) 596 | The metered license is the most convenient way to get started with UniDoc products and the Free tier enables a powerful way to get started for free. 597 | Anyone can get a free metered API key by signing up on http://cloud.unidoc.io/ 598 | 599 | If you have a metered license (API keys), you can set it through the UNIDOC_LICENSE_API_KEY environment variable. 600 | 601 | ``` 602 | export UNIDOC_LICENSE_API_KEY="unidoc_metered_api_key" 603 | ``` 604 | -------------------------------------------------------------------------------- /pkg/pdf/grayscale.go: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is subject to the terms and conditions defined in 3 | * file 'LICENSE.md', which is part of this source code package. 4 | */ 5 | 6 | package pdf 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | 12 | unicommon "github.com/unidoc/unipdf/v4/common" 13 | unicontent "github.com/unidoc/unipdf/v4/contentstream" 14 | unicore "github.com/unidoc/unipdf/v4/core" 15 | unipdf "github.com/unidoc/unipdf/v4/model" 16 | "github.com/unidoc/unipdf/v4/ps" 17 | ) 18 | 19 | // Grayscale converts the pages of the PDF file specified by the inputPath 20 | // parameter to grayscale. A password can be specified for encrypted PDF files. 21 | // A list of pages to convert to grayscale can be passed in. Every page that 22 | // is not included in the pages slice is left intact. 23 | // If the pages parameter is nil or an empty slice, all the pages of the input 24 | // file are converted to grayscale. 25 | func Grayscale(inputPath, outputPath, password string, pages []int) error { 26 | // Read input file. 27 | r, pageCount, _, _, err := readPDF(inputPath, password) 28 | if err != nil { 29 | return err 30 | } 31 | 32 | // Add pages. 33 | if len(pages) == 0 { 34 | pages = createPageRange(pageCount) 35 | } 36 | 37 | w := unipdf.NewPdfWriter() 38 | for i := 0; i < pageCount; i++ { 39 | numPage := i + 1 40 | 41 | page, err := r.GetPage(numPage) 42 | if err != nil { 43 | return err 44 | } 45 | 46 | var convert bool 47 | for _, page := range pages { 48 | if page == numPage { 49 | convert = true 50 | break 51 | } 52 | } 53 | 54 | if convert { 55 | if err = convertPageToGrayscale(page); err != nil { 56 | return err 57 | } 58 | } 59 | 60 | if err = w.AddPage(page); err != nil { 61 | return err 62 | } 63 | } 64 | 65 | // Write output file. 66 | safe := inputPath == outputPath 67 | return writePDF(outputPath, &w, safe) 68 | } 69 | 70 | // convertPageToGrayscale replaces color objects on the page with grayscale 71 | // ones. Also references XObject Images and Forms to convert those to grayscale. 72 | func convertPageToGrayscale(page *unipdf.PdfPage) error { 73 | // For each page, we go through the resources and look for the images. 74 | contents, err := page.GetAllContentStreams() 75 | if err != nil { 76 | return err 77 | } 78 | 79 | grayContent, err := transformContentStreamToGrayscale(contents, page.Resources) 80 | if err != nil { 81 | return err 82 | } 83 | page.SetContentStreams([]string{string(grayContent)}, unicore.NewFlateEncoder()) 84 | 85 | // fmt.Printf("Processed contents: %s\n", grayContent) 86 | 87 | return nil 88 | } 89 | 90 | // Check if colorspace represents a Pattern colorspace. 91 | func isPatternCS(cs unipdf.PdfColorspace) bool { 92 | _, isPattern := cs.(*unipdf.PdfColorspaceSpecialPattern) 93 | return isPattern 94 | } 95 | 96 | func transformContentStreamToGrayscale(contents string, resources *unipdf.PdfPageResources) ([]byte, error) { 97 | cstreamParser := unicontent.NewContentStreamParser(contents) 98 | operations, err := cstreamParser.Parse() 99 | if err != nil { 100 | return nil, err 101 | } 102 | processedOperations := &unicontent.ContentStreamOperations{} 103 | 104 | transformedPatterns := map[unicore.PdfObjectName]bool{} // List of already transformed patterns. Avoid multiple conversions. 105 | transformedShadings := map[unicore.PdfObjectName]bool{} // List of already transformed shadings. Avoid multiple conversions. 106 | 107 | // The content stream processor keeps track of the graphics state and we can make our own handlers to process certain commands, 108 | // using the AddHandler method. In this case, we hook up to color related operands, and for image and form handling. 109 | processor := unicontent.NewContentStreamProcessor(*operations) 110 | // Add handlers for colorspace related functionality. 111 | processor.AddHandler(unicontent.HandlerConditionEnumAllOperands, "", 112 | func(op *unicontent.ContentStreamOperation, gs unicontent.GraphicsState, resources *unipdf.PdfPageResources) error { 113 | operand := op.Operand 114 | switch operand { 115 | case "CS": // Set colorspace operands (stroking). 116 | if isPatternCS(gs.ColorspaceStroking) { 117 | // If referring to a pattern colorspace with an external definition, need to update the definition. 118 | // If has an underlying colorspace, then go and change it to DeviceGray. 119 | // Needs to be specified externally in the colorspace resources. 120 | 121 | csname := op.Params[0].(*unicore.PdfObjectName) 122 | if *csname != "Pattern" { 123 | // Update if referring to an external colorspace in resources. 124 | cs, ok := resources.GetColorspaceByName(*csname) 125 | if !ok { 126 | unicommon.Log.Debug("Undefined colorspace for pattern (%s)", csname) 127 | return errors.New("colorspace not defined") 128 | } 129 | 130 | patternCS, ok := cs.(*unipdf.PdfColorspaceSpecialPattern) 131 | if !ok { 132 | return errors.New("type error") 133 | } 134 | 135 | if patternCS.UnderlyingCS != nil { 136 | // Swap out for a gray colorspace. 137 | patternCS.UnderlyingCS = unipdf.NewPdfColorspaceDeviceGray() 138 | } 139 | 140 | resources.SetColorspaceByName(*csname, patternCS) 141 | } 142 | *processedOperations = append(*processedOperations, op) 143 | return nil 144 | } 145 | 146 | op := unicontent.ContentStreamOperation{} 147 | op.Operand = operand 148 | op.Params = []unicore.PdfObject{unicore.MakeName("DeviceGray")} 149 | *processedOperations = append(*processedOperations, &op) 150 | return nil 151 | case "cs": // Set colorspace operands (non-stroking). 152 | if isPatternCS(gs.ColorspaceNonStroking) { 153 | // If referring to a pattern colorspace with an external definition, need to update the definition. 154 | // If has an underlying colorspace, then go and change it to DeviceGray. 155 | // Needs to be specified externally in the colorspace resources. 156 | 157 | csname := op.Params[0].(*unicore.PdfObjectName) 158 | if *csname != "Pattern" { 159 | // Update if referring to an external colorspace in resources. 160 | cs, ok := resources.GetColorspaceByName(*csname) 161 | if !ok { 162 | unicommon.Log.Debug("Undefined colorspace for pattern (%s)", csname) 163 | return errors.New("colorspace not defined") 164 | } 165 | 166 | patternCS, ok := cs.(*unipdf.PdfColorspaceSpecialPattern) 167 | if !ok { 168 | return errors.New("type error") 169 | } 170 | 171 | if patternCS.UnderlyingCS != nil { 172 | // Swap out for a gray colorspace. 173 | patternCS.UnderlyingCS = unipdf.NewPdfColorspaceDeviceGray() 174 | } 175 | 176 | resources.SetColorspaceByName(*csname, patternCS) 177 | } 178 | *processedOperations = append(*processedOperations, op) 179 | return nil 180 | } 181 | 182 | op := unicontent.ContentStreamOperation{} 183 | op.Operand = operand 184 | op.Params = []unicore.PdfObject{unicore.MakeName("DeviceGray")} 185 | *processedOperations = append(*processedOperations, &op) 186 | return nil 187 | 188 | case "SC", "SCN": // Set stroking color. Includes pattern colors. 189 | if isPatternCS(gs.ColorspaceStroking) { 190 | op := unicontent.ContentStreamOperation{} 191 | op.Operand = operand 192 | op.Params = []unicore.PdfObject{} 193 | 194 | patternColor, ok := gs.ColorStroking.(*unipdf.PdfColorPattern) 195 | if !ok { 196 | return errors.New("invalid stroking color type") 197 | } 198 | 199 | if patternColor.Color != nil { 200 | color, err := gs.ColorspaceStroking.ColorToRGB(patternColor.Color) 201 | if err != nil { 202 | fmt.Printf("Error: %v\n", err) 203 | return err 204 | } 205 | rgbColor := color.(*unipdf.PdfColorDeviceRGB) 206 | grayColor := rgbColor.ToGray() 207 | 208 | op.Params = append(op.Params, unicore.MakeFloat(grayColor.Val())) 209 | } 210 | 211 | if _, has := transformedPatterns[patternColor.PatternName]; has { 212 | // Already processed, need not change anything, except underlying color if used. 213 | op.Params = append(op.Params, unicore.MakeName(string(patternColor.PatternName))) 214 | *processedOperations = append(*processedOperations, &op) 215 | return nil 216 | } 217 | transformedPatterns[patternColor.PatternName] = true 218 | 219 | // Look up the pattern name and convert it. 220 | pattern, found := resources.GetPatternByName(patternColor.PatternName) 221 | if !found { 222 | return errors.New("undefined pattern name") 223 | } 224 | 225 | grayPattern, err := convertPatternToGray(pattern) 226 | if err != nil { 227 | unicommon.Log.Debug("Unable to convert pattern to grayscale: %v", err) 228 | return err 229 | } 230 | resources.SetPatternByName(patternColor.PatternName, grayPattern.ToPdfObject()) 231 | 232 | op.Params = append(op.Params, unicore.MakeName(string(patternColor.PatternName))) 233 | *processedOperations = append(*processedOperations, &op) 234 | } else { 235 | color, err := gs.ColorspaceStroking.ColorToRGB(gs.ColorStroking) 236 | if err != nil { 237 | fmt.Printf("Error with ColorToRGB: %v\n", err) 238 | return err 239 | } 240 | rgbColor := color.(*unipdf.PdfColorDeviceRGB) 241 | grayColor := rgbColor.ToGray() 242 | 243 | op := unicontent.ContentStreamOperation{} 244 | op.Operand = operand 245 | op.Params = []unicore.PdfObject{unicore.MakeFloat(grayColor.Val())} 246 | *processedOperations = append(*processedOperations, &op) 247 | } 248 | 249 | return nil 250 | case "sc", "scn": // Set nonstroking color. 251 | if isPatternCS(gs.ColorspaceNonStroking) { 252 | op := unicontent.ContentStreamOperation{} 253 | op.Operand = operand 254 | op.Params = []unicore.PdfObject{} 255 | 256 | patternColor, ok := gs.ColorNonStroking.(*unipdf.PdfColorPattern) 257 | if !ok { 258 | return errors.New("invalid stroking color type") 259 | } 260 | 261 | if patternColor.Color != nil { 262 | color, err := gs.ColorspaceNonStroking.ColorToRGB(patternColor.Color) 263 | if err != nil { 264 | fmt.Printf("Error : %v\n", err) 265 | return err 266 | } 267 | rgbColor := color.(*unipdf.PdfColorDeviceRGB) 268 | grayColor := rgbColor.ToGray() 269 | 270 | op.Params = append(op.Params, unicore.MakeFloat(grayColor.Val())) 271 | } 272 | 273 | if _, has := transformedPatterns[patternColor.PatternName]; has { 274 | // Already processed, need not change anything, except underlying color if used. 275 | op.Params = append(op.Params, unicore.MakeName(string(patternColor.PatternName))) 276 | *processedOperations = append(*processedOperations, &op) 277 | return nil 278 | } 279 | transformedPatterns[patternColor.PatternName] = true 280 | 281 | // Look up the pattern name and convert it. 282 | pattern, found := resources.GetPatternByName(patternColor.PatternName) 283 | if !found { 284 | return errors.New("undefined pattern name") 285 | } 286 | 287 | grayPattern, err := convertPatternToGray(pattern) 288 | if err != nil { 289 | unicommon.Log.Debug("Unable to convert pattern to grayscale: %v", err) 290 | return err 291 | } 292 | resources.SetPatternByName(patternColor.PatternName, grayPattern.ToPdfObject()) 293 | 294 | op.Params = append(op.Params, unicore.MakeName(string(patternColor.PatternName))) 295 | *processedOperations = append(*processedOperations, &op) 296 | } else { 297 | color, err := gs.ColorspaceNonStroking.ColorToRGB(gs.ColorNonStroking) 298 | if err != nil { 299 | fmt.Printf("Error: %v\n", err) 300 | return err 301 | } 302 | rgbColor := color.(*unipdf.PdfColorDeviceRGB) 303 | grayColor := rgbColor.ToGray() 304 | 305 | op := unicontent.ContentStreamOperation{} 306 | op.Operand = operand 307 | op.Params = []unicore.PdfObject{unicore.MakeFloat(grayColor.Val())} 308 | 309 | *processedOperations = append(*processedOperations, &op) 310 | } 311 | return nil 312 | case "RG", "K": // Set RGB or CMYK stroking color. 313 | color, err := gs.ColorspaceStroking.ColorToRGB(gs.ColorStroking) 314 | if err != nil { 315 | fmt.Printf("Error: %v\n", err) 316 | return err 317 | } 318 | rgbColor := color.(*unipdf.PdfColorDeviceRGB) 319 | grayColor := rgbColor.ToGray() 320 | 321 | op := unicontent.ContentStreamOperation{} 322 | op.Operand = "G" 323 | op.Params = []unicore.PdfObject{unicore.MakeFloat(grayColor.Val())} 324 | 325 | *processedOperations = append(*processedOperations, &op) 326 | return nil 327 | case "rg", "k": // Set RGB or CMYK as nonstroking color. 328 | color, err := gs.ColorspaceNonStroking.ColorToRGB(gs.ColorNonStroking) 329 | if err != nil { 330 | fmt.Printf("Error: %v\n", err) 331 | return err 332 | } 333 | rgbColor := color.(*unipdf.PdfColorDeviceRGB) 334 | grayColor := rgbColor.ToGray() 335 | 336 | op := unicontent.ContentStreamOperation{} 337 | op.Operand = "g" 338 | op.Params = []unicore.PdfObject{unicore.MakeFloat(grayColor.Val())} 339 | 340 | *processedOperations = append(*processedOperations, &op) 341 | return nil 342 | case "sh": // Paints the shape and color defined by shading dict. 343 | if len(op.Params) != 1 { 344 | return errors.New("params to sh operator should be 1") 345 | } 346 | shname, ok := op.Params[0].(*unicore.PdfObjectName) 347 | if !ok { 348 | return errors.New("sh parameter should be a name") 349 | } 350 | if _, has := transformedShadings[*shname]; has { 351 | // Already processed, no need to do anything. 352 | *processedOperations = append(*processedOperations, op) 353 | return nil 354 | } 355 | transformedShadings[*shname] = true 356 | 357 | shading, found := resources.GetShadingByName(*shname) 358 | if !found { 359 | return errors.New("shading not defined in resources") 360 | } 361 | 362 | grayShading, err := convertShadingToGray(shading) 363 | if err != nil { 364 | return err 365 | } 366 | 367 | resources.SetShadingByName(*shname, grayShading.GetContext().ToPdfObject()) 368 | } 369 | *processedOperations = append(*processedOperations, op) 370 | 371 | return nil 372 | }) 373 | // Add handler for image related handling. Note that inline images are completely stored with a ContentStreamInlineImage 374 | // object as the parameter for BI. 375 | processor.AddHandler(unicontent.HandlerConditionEnumOperand, "BI", 376 | func(op *unicontent.ContentStreamOperation, _ unicontent.GraphicsState, resources *unipdf.PdfPageResources) error { 377 | if len(op.Params) != 1 { 378 | fmt.Printf("BI Error invalid number of params\n") 379 | return errors.New("invalid number of parameters") 380 | } 381 | // Inline image. 382 | iimg, ok := op.Params[0].(*unicontent.ContentStreamInlineImage) 383 | if !ok { 384 | fmt.Printf("Error: Invalid handling for inline image\n") 385 | return errors.New("invalid inline image parameter") 386 | } 387 | 388 | img, err := iimg.ToImage(resources) 389 | if err != nil { 390 | fmt.Printf("Error converting inline image to image: %v\n", err) 391 | return err 392 | } 393 | 394 | cs, err := iimg.GetColorSpace(resources) 395 | if err != nil { 396 | fmt.Printf("Error getting color space for inline image: %v\n", err) 397 | return err 398 | } 399 | rgbImg, err := cs.ImageToRGB(*img) 400 | if err != nil { 401 | fmt.Printf("Error converting image to rgb: %v\n", err) 402 | return err 403 | } 404 | rgbColorSpace := unipdf.NewPdfColorspaceDeviceRGB() 405 | grayImage, err := rgbColorSpace.ImageToGray(rgbImg) 406 | if err != nil { 407 | fmt.Printf("Error converting img to gray: %v\n", err) 408 | return err 409 | } 410 | 411 | // Update the XObject image. 412 | // Use same encoder as input data. Make sure for DCT filter it is updated to 1 color component. 413 | encoder, err := iimg.GetEncoder() 414 | if err != nil { 415 | fmt.Printf("Error getting encoder for inline image: %v\n", err) 416 | return err 417 | } 418 | if dctEncoder, is := encoder.(*unicore.DCTEncoder); is { 419 | dctEncoder.ColorComponents = 1 420 | } 421 | 422 | grayInlineImg, err := unicontent.NewInlineImageFromImage(grayImage, encoder) 423 | if err != nil { 424 | if err == unicore.ErrUnsupportedEncodingParameters { 425 | // Unsupported encoding parameters, revert to a basic flate encoder without predictor. 426 | encoder = unicore.NewFlateEncoder() 427 | } 428 | // Try again, fail on error. 429 | grayInlineImg, err = unicontent.NewInlineImageFromImage(grayImage, encoder) 430 | if err != nil { 431 | fmt.Printf("Error making a new inline image object: %v\n", err) 432 | return err 433 | } 434 | } 435 | 436 | // Replace inline image data with the gray image. 437 | pOp := unicontent.ContentStreamOperation{} 438 | pOp.Operand = "BI" 439 | pOp.Params = []unicore.PdfObject{grayInlineImg} 440 | *processedOperations = append(*processedOperations, &pOp) 441 | 442 | return nil 443 | }) 444 | 445 | // Handler for XObject Image and Forms. 446 | processedXObjects := map[string]bool{} // Keep track of processed XObjects to avoid repetition. 447 | 448 | processor.AddHandler(unicontent.HandlerConditionEnumOperand, "Do", 449 | func(op *unicontent.ContentStreamOperation, _ unicontent.GraphicsState, resources *unipdf.PdfPageResources) error { 450 | if len(op.Params) < 1 { 451 | fmt.Printf("ERROR: Invalid number of params for Do object.\n") 452 | return errors.New("range check") 453 | } 454 | 455 | // XObject. 456 | name := op.Params[0].(*unicore.PdfObjectName) 457 | 458 | // Only process each one once. 459 | _, has := processedXObjects[string(*name)] 460 | if has { 461 | return nil 462 | } 463 | processedXObjects[string(*name)] = true 464 | 465 | _, xtype := resources.GetXObjectByName(*name) 466 | if xtype == unipdf.XObjectTypeImage { 467 | // fmt.Printf(" XObject Image: %s\n", *name) 468 | 469 | ximg, err := resources.GetXObjectImageByName(*name) 470 | if err != nil { 471 | fmt.Printf("Error w/GetXObjectImageByName : %v\n", err) 472 | return err 473 | } 474 | 475 | img, err := ximg.ToImage() 476 | if err != nil { 477 | fmt.Printf("Error w/ToImage: %v\n", err) 478 | return err 479 | } 480 | 481 | rgbImg, err := ximg.ColorSpace.ImageToRGB(*img) 482 | if err != nil { 483 | fmt.Printf("Error ImageToRGB: %v\n", err) 484 | return err 485 | } 486 | 487 | rgbColorSpace := unipdf.NewPdfColorspaceDeviceRGB() 488 | grayImage, err := rgbColorSpace.ImageToGray(rgbImg) 489 | if err != nil { 490 | fmt.Printf("Error ImageToGray: %v\n", err) 491 | return err 492 | } 493 | 494 | // Update the XObject image. 495 | // Use same encoder as input data. Make sure for DCT filter it is updated to 1 color component. 496 | encoder := ximg.Filter 497 | if dctEncoder, is := encoder.(*unicore.DCTEncoder); is { 498 | dctEncoder.ColorComponents = 1 499 | } 500 | 501 | ximgGray, err := unipdf.NewXObjectImageFromImage(&grayImage, nil, encoder) 502 | if err != nil { 503 | if err == unicore.ErrUnsupportedEncodingParameters { 504 | // Unsupported encoding parameters, revert to a basic flate encoder without predictor. 505 | encoder = unicore.NewFlateEncoder() 506 | } 507 | 508 | // Try again, fail if error. 509 | ximgGray, err = unipdf.NewXObjectImageFromImage(&grayImage, nil, encoder) 510 | if err != nil { 511 | fmt.Printf("Error creating image: %v\n", err) 512 | return err 513 | } 514 | } 515 | 516 | // Update the entry. 517 | err = resources.SetXObjectImageByName(*name, ximgGray) 518 | if err != nil { 519 | fmt.Printf("Failed setting x object: %v (%s)\n", err, string(*name)) 520 | return err 521 | } 522 | } else if xtype == unipdf.XObjectTypeForm { 523 | // fmt.Printf(" XObject Form: %s\n", *name) 524 | 525 | // Go through the XObject Form content stream. 526 | xform, err := resources.GetXObjectFormByName(*name) 527 | if err != nil { 528 | fmt.Printf("Error: %v\n", err) 529 | return err 530 | } 531 | 532 | formContent, err := xform.GetContentStream() 533 | if err != nil { 534 | fmt.Printf("Error: %v\n", err) 535 | return err 536 | } 537 | 538 | // Process the content stream in the Form object too: 539 | // XXX/TODO/Consider: Use either form resources (priority) and fall back to page resources alternatively if not found. 540 | // Have not come into cases where needed yet. 541 | formResources := xform.Resources 542 | if formResources == nil { 543 | formResources = resources 544 | } 545 | 546 | // Process the content stream in the Form object too: 547 | grayContent, err := transformContentStreamToGrayscale(string(formContent), formResources) 548 | if err != nil { 549 | fmt.Printf("Error: %v\n", err) 550 | return err 551 | } 552 | 553 | xform.SetContentStream(grayContent, nil) 554 | 555 | // Update the resource entry. 556 | resources.SetXObjectFormByName(*name, xform) 557 | } 558 | 559 | return nil 560 | }) 561 | 562 | err = processor.Process(resources) 563 | if err != nil { 564 | fmt.Printf("Error processing: %v\n", err) 565 | return nil, err 566 | } 567 | 568 | // For debug purposes: (high level logging). 569 | // 570 | // fmt.Printf("=== Unprocessed - Full list\n") 571 | // for idx, op := range operations { 572 | // fmt.Printf("U. Operation %d: %s - Params: %v\n", idx+1, op.Operand, op.Params) 573 | // } 574 | // fmt.Printf("=== Processed - Full list\n") 575 | // for idx, op := range *processedOperations { 576 | // fmt.Printf("P. Operation %d: %s - Params: %v\n", idx+1, op.Operand, op.Params) 577 | // } 578 | 579 | return processedOperations.Bytes(), nil 580 | } 581 | 582 | // Convert a pattern to grayscale (tiling or shading pattern). 583 | func convertPatternToGray(pattern *unipdf.PdfPattern) (*unipdf.PdfPattern, error) { 584 | // Case 1: Colored tiling patterns. Need to process the content stream and replace. 585 | if pattern.IsTiling() { 586 | tilingPattern := pattern.GetAsTilingPattern() 587 | 588 | if tilingPattern.IsColored() { 589 | // A colored tiling pattern can use color operators in its stream, need to process the stream. 590 | 591 | content, err := tilingPattern.GetContentStream() 592 | if err != nil { 593 | return nil, err 594 | } 595 | 596 | grayContents, err := transformContentStreamToGrayscale(string(content), tilingPattern.Resources) 597 | if err != nil { 598 | return nil, err 599 | } 600 | 601 | tilingPattern.SetContentStream(grayContents, nil) 602 | 603 | // Update in-memory pdf objects. 604 | _ = tilingPattern.ToPdfObject() 605 | } 606 | } else if pattern.IsShading() { 607 | // Case 2: Shading patterns. Need to create a new colorspace that can map from N=3,4 colorspaces to grayscale. 608 | shadingPattern := pattern.GetAsShadingPattern() 609 | 610 | grayShading, err := convertShadingToGray(shadingPattern.Shading) 611 | if err != nil { 612 | return nil, err 613 | } 614 | shadingPattern.Shading = grayShading 615 | 616 | // Update in-memory pdf objects. 617 | _ = shadingPattern.ToPdfObject() 618 | } 619 | 620 | return pattern, nil 621 | } 622 | 623 | // Convert shading to grayscale. 624 | // This one is slightly involved as a shading defines a color as function of position, i.e. color(x,y) = F(x,y). 625 | // Since the function can be challenging to change, we define new DeviceN colorspace with a color conversion 626 | // function. 627 | func convertShadingToGray(shading *unipdf.PdfShading) (*unipdf.PdfShading, error) { 628 | cs := shading.ColorSpace 629 | 630 | if cs.GetNumComponents() == 1 { 631 | // Already grayscale, should be fine. No action taken. 632 | return shading, nil 633 | } else if cs.GetNumComponents() == 3 { 634 | // Create a new DeviceN colorspace that converts R,G,B -> Grayscale 635 | // Use: gray := 0.3*R + 0.59G + 0.11B 636 | // PS program: { 0.11 mul exch 0.59 mul add exch 0.3 mul add }. 637 | transformFunc := &unipdf.PdfFunctionType4{} 638 | transformFunc.Domain = []float64{0, 1, 0, 1, 0, 1} 639 | transformFunc.Range = []float64{0, 1} 640 | rgbToGrayPsProgram := ps.NewPSProgram() 641 | rgbToGrayPsProgram.Append(ps.MakeReal(0.11)) 642 | rgbToGrayPsProgram.Append(ps.MakeOperand("mul")) 643 | rgbToGrayPsProgram.Append(ps.MakeOperand("exch")) 644 | rgbToGrayPsProgram.Append(ps.MakeReal(0.59)) 645 | rgbToGrayPsProgram.Append(ps.MakeOperand("mul")) 646 | rgbToGrayPsProgram.Append(ps.MakeOperand("add")) 647 | rgbToGrayPsProgram.Append(ps.MakeOperand("exch")) 648 | rgbToGrayPsProgram.Append(ps.MakeReal(0.3)) 649 | rgbToGrayPsProgram.Append(ps.MakeOperand("mul")) 650 | rgbToGrayPsProgram.Append(ps.MakeOperand("add")) 651 | transformFunc.Program = rgbToGrayPsProgram 652 | 653 | // Define the DeviceN colorspace that performs the R,G,B -> Gray conversion for us. 654 | transformcs := unipdf.NewPdfColorspaceDeviceN() 655 | transformcs.AlternateSpace = unipdf.NewPdfColorspaceDeviceGray() 656 | transformcs.ColorantNames = unicore.MakeArray(unicore.MakeName("R"), unicore.MakeName("G"), unicore.MakeName("B")) 657 | transformcs.TintTransform = transformFunc 658 | 659 | // Replace the old colorspace with the new. 660 | shading.ColorSpace = transformcs 661 | 662 | return shading, nil 663 | } else if cs.GetNumComponents() == 4 { 664 | // Create a new DeviceN colorspace that converts C,M,Y,K -> Grayscale. 665 | // Use: gray = 1.0 - min(1.0, 0.3*C + 0.59*M + 0.11*Y + K) ; where BG(k) = k simply. 666 | // PS program: {exch 0.11 mul add exch 0.59 mul add exch 0.3 mul add dup 1.0 ge { pop 1.0 } if} 667 | transformFunc := &unipdf.PdfFunctionType4{} 668 | transformFunc.Domain = []float64{0, 1, 0, 1, 0, 1, 0, 1} 669 | transformFunc.Range = []float64{0, 1} 670 | 671 | cmykToGrayPsProgram := ps.NewPSProgram() 672 | cmykToGrayPsProgram.Append(ps.MakeOperand("exch")) 673 | cmykToGrayPsProgram.Append(ps.MakeReal(0.11)) 674 | cmykToGrayPsProgram.Append(ps.MakeOperand("mul")) 675 | cmykToGrayPsProgram.Append(ps.MakeOperand("add")) 676 | cmykToGrayPsProgram.Append(ps.MakeOperand("exch")) 677 | cmykToGrayPsProgram.Append(ps.MakeReal(0.59)) 678 | cmykToGrayPsProgram.Append(ps.MakeOperand("mul")) 679 | cmykToGrayPsProgram.Append(ps.MakeOperand("add")) 680 | cmykToGrayPsProgram.Append(ps.MakeOperand("exch")) 681 | cmykToGrayPsProgram.Append(ps.MakeReal(0.30)) 682 | cmykToGrayPsProgram.Append(ps.MakeOperand("mul")) 683 | cmykToGrayPsProgram.Append(ps.MakeOperand("add")) 684 | cmykToGrayPsProgram.Append(ps.MakeOperand("dup")) 685 | cmykToGrayPsProgram.Append(ps.MakeReal(1.0)) 686 | cmykToGrayPsProgram.Append(ps.MakeOperand("ge")) 687 | 688 | // Add sub procedure. 689 | subProc := ps.NewPSProgram() 690 | subProc.Append(ps.MakeOperand("pop")) 691 | subProc.Append(ps.MakeReal(1.0)) 692 | cmykToGrayPsProgram.Append(subProc) 693 | cmykToGrayPsProgram.Append(ps.MakeOperand("if")) 694 | transformFunc.Program = cmykToGrayPsProgram 695 | 696 | // Define the DeviceN colorspace that performs the R,G,B -> Gray conversion for us. 697 | transformcs := unipdf.NewPdfColorspaceDeviceN() 698 | transformcs.AlternateSpace = unipdf.NewPdfColorspaceDeviceGray() 699 | transformcs.ColorantNames = unicore.MakeArray(unicore.MakeName("C"), unicore.MakeName("M"), unicore.MakeName("Y"), unicore.MakeName("K")) 700 | transformcs.TintTransform = transformFunc 701 | 702 | // Replace the old colorspace with the new. 703 | shading.ColorSpace = transformcs 704 | 705 | return shading, nil 706 | } 707 | 708 | unicommon.Log.Debug("Cannot convert to shading pattern grayscale, color space N = %d", cs.GetNumComponents()) 709 | return nil, errors.New("unsupported pattern colorspace for grayscale conversion") 710 | } 711 | --------------------------------------------------------------------------------