├── .gitmodules
├── .gitsecret
├── keys
│ ├── crls.d
│ │ └── DIR.txt
│ ├── pubring.kbx
│ └── trustdb.gpg
└── paths
│ └── mapping.cfg
├── extras
└── Gather to nvUltra.shortcut
├── gather.entitlements
├── .gitignore
├── Makefile
├── scripts
├── update_formula.rb
├── fixreadme.rb
├── update_downloads.rb
├── bump.rb
└── package.sh
├── Rakefile
├── Formula
└── gather-cli.rb
├── LICENSE.md
├── Package.resolved
├── Package.swift
├── Tests
├── snippet.html
└── gather-cliTests
│ └── gather_cliTests.swift
├── .github
└── workflows
│ └── release.yml
├── CHANGELOG.md
├── README.md
├── src
└── _README.md
└── Sources
└── gather
└── gather.swift
/.gitmodules:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitsecret/keys/crls.d/DIR.txt:
--------------------------------------------------------------------------------
1 | v:1:
2 |
--------------------------------------------------------------------------------
/.gitsecret/keys/pubring.kbx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ttscoff/gather-cli/main/.gitsecret/keys/pubring.kbx
--------------------------------------------------------------------------------
/.gitsecret/keys/trustdb.gpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ttscoff/gather-cli/main/.gitsecret/keys/trustdb.gpg
--------------------------------------------------------------------------------
/.gitsecret/paths/mapping.cfg:
--------------------------------------------------------------------------------
1 | buildnotes.md:5cc23a769ac033f097cd36898a616de8fa6e8648146265d1afa666a7a392cd1f
2 |
--------------------------------------------------------------------------------
/extras/Gather to nvUltra.shortcut:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ttscoff/gather-cli/main/extras/Gather to nvUltra.shortcut
--------------------------------------------------------------------------------
/gather.entitlements:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | com.apple.security.automation.apple-events
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | /.build
3 | /Packages
4 | /*.xcodeproj
5 | xcuserdata/
6 | DerivedData/
7 | .swiftpm/config/registries.json
8 | .swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata
9 | .netrc
10 | build
11 | *.pkg
12 | package
13 | .gitsecret/keys/random_seed
14 | !*.secret
15 | gather
16 | tag_message.txt
17 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | prefix ?= /usr/local
2 | bindir = $(prefix)/bin
3 | libdir = $(prefix)/lib
4 |
5 | build:
6 | swift build -c release --disable-sandbox
7 |
8 | install: build
9 | install -d "$(bindir)" "$(libdir)"
10 | install ".build/release/gather" "$(bindir)"
11 |
12 | uninstall:
13 | rm -rf "$(bindir)/gather"
14 |
15 | clean:
16 | rm -rf .build
17 |
18 | .PHONY: build install uninstall clean
19 |
--------------------------------------------------------------------------------
/scripts/update_formula.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 |
3 | last_tag = `git describe --tags --abbrev=0`.strip
4 | last_hash = `git rev-parse #{last_tag}`.strip
5 |
6 | formula = '/Users/ttscoff/Desktop/Code/homebrew-thelab/Formula/gather-cli.rb'
7 | content = IO.read(formula)
8 | content.sub!(/tag: ".*?", revision: ".*?"/, %(tag: "#{last_tag}", revision: "#{last_hash}"))
9 | File.open(formula, 'w') { |f| f.puts content }
10 |
11 | Dir.chdir(File.dirname(formula))
12 | `git commit -a -m "Formula update #{last_tag}"`
13 | `git pull`
14 | `git push`
15 |
16 | puts "Formula updated"
17 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | desc "Development version check"
2 | task :ver do
3 | gver = `git ver`
4 | cver = IO.read(File.join(File.dirname(__FILE__), "CHANGELOG.md")).match(/^#+ (\d+\.\d+\.\d+(\w+)?)/)[1]
5 | res = `grep VERSION lib/na/version.rb`
6 | version = res.match(/VERSION *= *['"](\d+\.\d+\.\d+(\w+)?)/)[1]
7 | puts "git tag: #{gver}"
8 | puts "version.rb: #{version}"
9 | puts "changelog: #{cver}"
10 | end
11 |
12 | desc "Changelog version check"
13 | task :cver do
14 | puts IO.read(File.join(File.dirname(__FILE__), "CHANGELOG.md")).match(/^#+ (\d+\.\d+\.\d+(\w+)?)/)[1]
15 | end
16 |
--------------------------------------------------------------------------------
/scripts/fixreadme.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | # frozen_string_literal: true
3 |
4 | current_ver = ARGV[0]
5 | src = 'src/_README.md'
6 | dest = 'README.md'
7 |
8 | readme = IO.read(src).force_encoding('ASCII-8BIT').encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
9 |
10 | content = readme.match(/(?<=\)(.*?)(?=\)/m)[0]
11 |
12 | content = "# Gather CLI\n\n#{content}"
13 | content.gsub!(/(.*?)/, current_ver)
14 | content.gsub!(/(.*?)/m, '\1')
15 | content.gsub!(//m, '')
16 |
17 | File.open(dest, 'w') { |f| f.puts(content) }
18 |
19 | Process.exit 0
20 |
--------------------------------------------------------------------------------
/scripts/update_downloads.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | # frozen_string_literal: true
3 |
4 | require 'fileutils'
5 | require 'csv'
6 | filename = ARGV[0]
7 | new_version = ARGV[1]
8 |
9 | csvfile = File.expand_path('~/Sites/dev/bt/downloads.csv')
10 | FileUtils.cp(csvfile, "#{csvfile}.bak")
11 | downloads = CSV.read(csvfile)
12 | t = Time.now
13 | updated = t.strftime('%a %b %d %H:%M:%S %z %Y')
14 |
15 | f = File.open(csvfile, 'wb')
16 | downloads.map! do |row|
17 | if row[0] == '54'
18 | answers = { id: '54', version: new_version, filename: filename }
19 | row[2] = "/downloads/#{answers[:filename]}"
20 | row[3] = answers[:version]
21 | row[7] = updated
22 | end
23 | f.puts row.to_csv
24 | end
25 |
26 | print 'OK'
27 |
--------------------------------------------------------------------------------
/scripts/bump.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | # frozen_string_literal: true
3 |
4 | require 'csv'
5 |
6 | mainfile = ARGV[0]
7 | new_version = ARGV[1]
8 | src = ARGV[2] # 'src/_README.md'
9 | dest = ARGV[3] # 'README.md'
10 |
11 | content = IO.read(mainfile)
12 | content.sub!(/(?mi)(?<=var VERSION = ")(.*?)(?=")/, new_version)
13 | File.open(mainfile, 'w') { |f| f.puts content }
14 |
15 | readme = IO.read(src).force_encoding('ASCII-8BIT').encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
16 |
17 | content = readme.match(/(?<=\)(.*?)(?=\)/m)[0]
18 | content = "# Gather CLI\n\n#{content}"
19 | content.gsub!(/(.*?)/, new_version)
20 | content.gsub!(/(.*?)/m, '\1')
21 | content.gsub!(//m, '')
22 |
23 | File.open(dest, 'w') { |f| f.puts(content) }
24 |
25 | print "OK"
26 |
--------------------------------------------------------------------------------
/Formula/gather-cli.rb:
--------------------------------------------------------------------------------
1 | # Homebrew formula for gather-cli
2 | # To use this formula, create a tap:
3 | # brew tap ttscoff/thelab https://github.com/ttscoff/homebrew-thelab
4 | # Then install:
5 | # brew install gather-cli
6 |
7 | class GatherCli < Formula
8 | desc "Readability and Markdown utility for saving web urls and HTML text"
9 | homepage "https://brettterpstra.com/projects/gather-cli/"
10 | version "2.1.10"
11 | license "MIT"
12 |
13 | on_macos do
14 | url "https://github.com/ttscoff/gather-cli/releases/download/v#{version}/gather-cli-#{version}-macos-universal.tar.gz"
15 | sha256 "55fe5e2243bfb0166a04f8e3234c02d012e510fe7776e7fc770231cee2884119"
16 | end
17 |
18 | def install
19 | bin.install "gather"
20 | end
21 |
22 | test do
23 | system "#{bin}/gather", "https://brettterpstra.com/2022/08/30/popclip-webmarkdown-fix-and-other-codesigning-adventures/"
24 | end
25 | end
26 |
27 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License Copyright (c) 2022 Brett Terpstra
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is furnished
8 | to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice (including the next
11 | paragraph) shall be included in all copies or substantial portions of the
12 | Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
16 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
17 | OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 | OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/Package.resolved:
--------------------------------------------------------------------------------
1 | {
2 | "pins" : [
3 | {
4 | "identity" : "html2text-swift",
5 | "kind" : "remoteSourceControl",
6 | "location" : "https://github.com/ttscoff/html2text-swift.git",
7 | "state" : {
8 | "branch" : "master",
9 | "revision" : "947f07f9a11a3439a8534ba9b604cfbd22c1b2be"
10 | }
11 | },
12 | {
13 | "identity" : "read-swift",
14 | "kind" : "remoteSourceControl",
15 | "location" : "https://github.com/ttscoff/read-swift.git",
16 | "state" : {
17 | "branch" : "master",
18 | "revision" : "7b4bb5771cf57a8b4dc92de80ba63c9442935241"
19 | }
20 | },
21 | {
22 | "identity" : "swift-argument-parser",
23 | "kind" : "remoteSourceControl",
24 | "location" : "https://github.com/apple/swift-argument-parser",
25 | "state" : {
26 | "revision" : "cdd0ef3755280949551dc26dee5de9ddeda89f54",
27 | "version" : "1.6.2"
28 | }
29 | },
30 | {
31 | "identity" : "swiftsoup",
32 | "kind" : "remoteSourceControl",
33 | "location" : "https://github.com/ttscoff/SwiftSoup.git",
34 | "state" : {
35 | "revision" : "6778575285177365cbad3e5b8a72f2a20583cfec",
36 | "version" : "2.4.3"
37 | }
38 | },
39 | {
40 | "identity" : "yams",
41 | "kind" : "remoteSourceControl",
42 | "location" : "https://github.com/jpsim/Yams.git",
43 | "state" : {
44 | "revision" : "3d6871d5b4a5cd519adf233fbb576e0a2af71c17",
45 | "version" : "5.4.0"
46 | }
47 | }
48 | ],
49 | "version" : 2
50 | }
51 |
--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version: 5.6
2 | // The swift-tools-version declares the minimum version of Swift required to build this package.
3 |
4 | import PackageDescription
5 |
6 | let package = Package(
7 | name: "Gather",
8 | dependencies: [
9 | .package(url: "https://github.com/apple/swift-argument-parser", from: "1.0.0"),
10 | .package(url: "https://github.com/ttscoff/read-swift.git", branch: "master"),
11 | .package(url: "https://github.com/ttscoff/html2text-swift.git", branch: "master"),
12 | // .package(path: "./read-swift"),
13 | // .package(path: "./html2text-swift"),
14 | .package(url: "https://github.com/ttscoff/SwiftSoup.git", from: "2.0.0"),
15 | .package(url: "https://github.com/jpsim/Yams.git", from: "5.0.4"),
16 | ],
17 | targets: [
18 | // Targets are the basic building blocks of a package. A target can define a module or a test suite.
19 | // Targets can depend on other targets in this package, and on products in packages this package depends on.
20 | .executableTarget(
21 | name: "gather",
22 | dependencies: [
23 | .product(name: "ArgumentParser", package: "swift-argument-parser"),
24 | .product(name: "SwiftSoup", package: "SwiftSoup"),
25 | .product(name: "Readability", package: "read-swift"),
26 | .product(name: "HTML2Text", package: "html2text-swift"),
27 | .product(name: "Yams", package: "Yams"),
28 | ]
29 | ),
30 | // .testTarget(
31 | // name: "gather-cliTests",
32 | // dependencies: ["gather"]
33 | // ),
34 | ]
35 | )
36 |
--------------------------------------------------------------------------------
/Tests/snippet.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
Bullseye has been updated with a couple of tweaks.
5 |
6 |
First, pressing escape after running the bookmarklet will now cancel it and you can resume browsing without refreshing the page.
7 |
8 |
Next, if you want to skip the Marky preview frame with copy button, you can make a quick edit to the bookmarklet at the very beginning, adding window.bullseyeShowFrame=0; after the javascript: and before the function:
9 |
10 |
javascript:window.bullseyeShowFrame=0;(function()...
Copy
11 |
12 |
I also made a couple of tweaks to the path builder to help with getting the raw source when it’s available.
13 |
14 |
If you have the bookmarklet installed, it’s already updated. If you want to switch to getting raw Markdown text back, just make the above edit. If you haven’t installed it yet, just drag the link below to your bookmarks bar.
15 |
16 |
19 |
20 |
If you run into issues on a particular page, I’m happy to look at test cases. Just shoot me an email.
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | push:
5 | tags:
6 | - "v*"
7 |
8 | permissions:
9 | contents: write
10 |
11 | jobs:
12 | build-macos:
13 | runs-on: macos-latest
14 | steps:
15 | - name: Checkout
16 | uses: actions/checkout@v4
17 |
18 | - name: Set up version
19 | id: version
20 | run: |
21 | VERSION=${GITHUB_REF#refs/tags/v}
22 | echo "VERSION=$VERSION" >> "$GITHUB_ENV"
23 | echo "version=$VERSION" >> "$GITHUB_OUTPUT"
24 |
25 | - name: Extract tag message
26 | id: tag_message
27 | run: |
28 | TAG_NAME=${GITHUB_REF#refs/tags/}
29 | TAG_MESSAGE=$(git tag -l --format='%(contents)' "$TAG_NAME")
30 | TAG_MESSAGE=$(echo "$TAG_MESSAGE" | sed -e :a -e '/^\n*$/{$d;N;ba' -e '}' -e 's/^[[:space:]]*//;s/[[:space:]]*$//')
31 | {
32 | echo 'body<> "$GITHUB_OUTPUT"
36 |
37 | - name: Build macOS universal binary
38 | env:
39 | VERSION: ${{ env.VERSION }}
40 | run: |
41 | set -euo pipefail
42 | xcrun swift --version
43 | xcrun swift build -c release --arch arm64 --arch x86_64
44 | BINDIR=$(xcrun swift build -c release --arch arm64 --arch x86_64 --show-bin-path)
45 | mkdir -p "release/gather-cli-${VERSION}-macos-universal"
46 | cp "${BINDIR}/gather" "release/gather-cli-${VERSION}-macos-universal/gather"
47 |
48 | - name: Create archive and checksum
49 | env:
50 | VERSION: ${{ env.VERSION }}
51 | run: |
52 | set -euo pipefail
53 | cd release
54 | tar -czf "gather-cli-${VERSION}-macos-universal.tar.gz" "gather-cli-${VERSION}-macos-universal"
55 | shasum -a 256 "gather-cli-${VERSION}-macos-universal.tar.gz" > "gather-cli-${VERSION}-macos-universal.tar.gz.sha256"
56 | cat "gather-cli-${VERSION}-macos-universal.tar.gz.sha256"
57 |
58 | - name: Upload release assets
59 | uses: softprops/action-gh-release@v1
60 | env:
61 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
62 | with:
63 | tag_name: v${{ env.VERSION }}
64 | body: ${{ steps.tag_message.outputs.body }}
65 | generate_release_notes: false
66 | files: |
67 | release/gather-cli-${{ env.VERSION }}-macos-universal.tar.gz
68 | release/gather-cli-${{ env.VERSION }}-macos-universal.tar.gz.sha256
69 |
--------------------------------------------------------------------------------
/Tests/gather-cliTests/gather_cliTests.swift:
--------------------------------------------------------------------------------
1 | import class Foundation.Bundle
2 | import XCTest
3 |
4 | func gatherWith(args: [String], stdin: String?) -> String? {
5 | // Some of the APIs that we use below are available in macOS 10.13 and above.
6 | guard #available(macOS 10.13, *) else {
7 | return nil
8 | }
9 |
10 | // Mac Catalyst won't have `Process`, but it is supported for executables.
11 | #if !targetEnvironment(macCatalyst)
12 | do {
13 | let fooBinary = productsDirectory.appendingPathComponent("gather")
14 |
15 | let process = Process()
16 | process.executableURL = fooBinary
17 |
18 | if stdin != nil {
19 | let inpipe = Pipe()
20 | let testString = "Testing gather
"
21 | inpipe.fileHandleForWriting.write(Data(testString.utf8))
22 | inpipe.fileHandleForWriting.closeFile()
23 | process.standardInput = inpipe
24 | }
25 |
26 | let pipe = Pipe()
27 | process.standardOutput = pipe
28 |
29 | process.arguments = args
30 | try process.run()
31 | process.waitUntilExit()
32 |
33 | let data = pipe.fileHandleForReading.readDataToEndOfFile()
34 | return String(data: data, encoding: .utf8)
35 | } catch {
36 | fatalError("Error running Gather")
37 | }
38 | #endif
39 | }
40 |
41 | /// Returns path to the built products directory.
42 | var productsDirectory: URL {
43 | #if os(macOS)
44 | for bundle in Bundle.allBundles where bundle.bundlePath.hasSuffix(".xctest") {
45 | return bundle.bundleURL.deletingLastPathComponent()
46 | }
47 | fatalError("couldn't find the products directory")
48 | #else
49 | return Bundle.main.bundleURL
50 | #endif
51 | }
52 |
53 | final class gather_cliTests: XCTestCase {
54 | func testTitleOnly() throws {
55 | let args = ["--title-only", "https://github.com/vimtaai/critic-markup"]
56 | let output = gatherWith(args: args, stdin: nil)
57 | XCTAssertNotNil(output)
58 | XCTAssertEqual(output!, "GitHub - vimtaai/critic-markup: CriticMarkup in JavaScript\n")
59 | }
60 |
61 | func testMetadata() throws {
62 | let args = ["--metadata", "https://github.com/vimtaai/critic-markup?query=value&something=else#nowhere"]
63 | let output = gatherWith(args: args, stdin: nil)
64 | XCTAssertNotNil(output)
65 | XCTAssertNotNil(output!.range(of: "source: https://github.com/vimtaai/critic-markup\n"))
66 | }
67 |
68 | func testStdin() throws {
69 | let args = ["--stdin", "--html", "--no-readability"]
70 | let testString = "Testing gather
"
71 | let output = gatherWith(args: args, stdin: testString)
72 | XCTAssertNotNil(output)
73 | XCTAssertEqual(output, "Testing gather\n")
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ## 2.1.12
2 |
3 | ## 2.1.11
4 |
5 | #### FIXED
6 |
7 | - PKG signing
8 |
9 | ## 2.1.10
10 |
11 | ## 2.1.9
12 |
13 | ## 2.1.8
14 |
15 | ## 2.1.7
16 |
17 | #### NEW
18 |
19 | - Homebrew formula installs binaries without requiring Xcode
20 |
21 | ## 2.1.6
22 |
23 | ## 2.1.5
24 |
25 | ## 2.1.4
26 |
27 | ## 2.1.3
28 |
29 | ## 2.1.2
30 |
31 | #### IMPROVED
32 |
33 | - Implement cleaned-up code from #16
34 |
35 | ## 2.1.1
36 |
37 | ## 2.0.48
38 |
39 | #### NEW
40 |
41 | - Use --save TITLE to save a set of command line flags for future use
42 | - Use --config TITLE to restore a set of saved flags
43 |
44 | ## 2.0.47
45 |
46 | #### IMPROVED
47 |
48 | - Don't attempt to remove extraneous bits from SEO titles, leave as is
49 |
50 | ## 2.0.46
51 |
52 | #### FIXED
53 |
54 | - Incorrect handling of links with no protocol (`//github.com...`)
55 | - Incorrect formatting of links with emphasis (`_[text_][5]`)
56 |
57 | ## 2.0.45
58 |
59 | #### FIXED
60 |
61 | - Incorrect handling of links with no protocol (`//github.com...`)
62 | - Incorrect formatting of links with emphasis (`_[text_][5]`)
63 |
64 | ## 2.0.44
65 |
66 | #### NEW
67 |
68 | - --metadata-yaml option to include source info as YAML front matter
69 |
70 | ## 2.0.43
71 |
72 | #### FIXED
73 |
74 | - Missing h1 title with --no-include-source
75 |
76 | ## 2.0.42
77 |
78 | #### FIXED
79 |
80 | - Missing title if a rel=canonical url not found
81 |
82 | ## 2.0.40
83 |
84 | #### FIXED
85 |
86 | - Using --inline-links without --no-paragraph links would throw an error, which is just stupid. What was he thinking?
87 | - Documentation refers to `--inline` instead of `--inline-links` (Thanks rand)
88 |
89 | ## 2.0.34
90 |
91 | #### IMPROVED
92 |
93 | - Documentation update
94 |
95 | #### FIXED
96 |
97 | - Double h1 headlines being inserted
98 | - Respect --no-include-title better
99 |
100 | ## 2.0.33
101 |
102 | #### NEW
103 |
104 | - Use %date, %slugdate, %title, and %slug in --file paths
105 | - %slug for url scheme templates
106 |
107 | #### IMPROVED
108 |
109 | - Sanitize filenames where needed
110 |
111 | ## 2.0.32
112 |
113 | #### IMPROVED
114 |
115 | - Rely more heavily on title tags instead of trying to parse h1/h2 effectively
116 |
117 | ## 2.0.31
118 |
119 | #### IMPROVED
120 |
121 | - Add %filename placeholder to url templates, which inserts a sanitized version of the page title
122 |
123 | ## 2.0.30
124 |
125 | #### NEW
126 |
127 | - Generate any url scheme using templates (`--url-template "handler://method?txt=%text&title=%title"`)
128 | - Open generated urls automatically with `--url-open`
129 | - Define a fallback title for various outputs in case one isn't found (usually in HTML snippets) `--fallback-title`
130 |
131 | #### IMPROVED
132 |
133 | - Better title detection for HTML snippets
134 | - If a title can be detected from an HTML snippet, include it if requested
135 | - If a canonical link can be found for the page, use it as the "source url" in output
136 |
137 | #### FIXED
138 |
139 | - Crash when HTML snippet is missing head
140 | - URL encode ALL characters to avoid malformed url handlers
141 | - Images wrapped in links missing opening bracket
142 |
143 | ## 2.0.29
144 |
145 | #### CHANGED
146 |
147 | - Moved the new --nv-add/url options to --nvu-add/url
148 |
149 | #### NEW
150 |
151 | - Generate nvALT urls and notes with --nv-url and --nv-add
152 |
153 | ## 2.0.28
154 |
155 | #### NEW
156 |
157 | - --nv-url to output results as an nvUltra url handler url
158 | - --nv-add to immediately create a new note in nvUltra
159 | - --nv-notebook to specify a path to the notebook folder you wish to use
160 | - --title-only to output only the title of the page
161 |
162 | ## 2.0.27
163 |
164 | #### NEW
165 |
166 | - --[no-]include-title to enable/disable the inclusion of an h1 with the page title
167 | - --[no-]include-source option to enable/disable the source link
168 | - --metadata option to include MultiMarkdown metadata with title, source, and current date
169 |
170 | #### IMPROVED
171 |
172 | - Sort options in --help
173 |
174 | ## 2.0.23
175 |
176 | #### FIXED
177 |
178 | - Missing space around links in text
179 | - Include h1 title, can be disabled with --no-include-title
180 |
181 | ## 2.0.20
182 |
183 | #### FIXED
184 |
185 | - Versioning script inserting variable names
186 |
187 | ## 2.0.19
188 |
189 | #### IMPROVED
190 |
191 | - Revamping build process to use signed packages
192 |
193 | ## 2.0.13
194 |
195 | #### IMPROVED
196 |
197 | - Improved build/deploy automation
198 |
199 | ## 2.0.10
200 |
201 | #### IMPROVED
202 |
203 | - Added notes about macOS quarantine to README
204 |
205 | ## 2.0.9
206 |
207 | #### FIXED
208 |
209 | - Attempting to codesign the binary to see if I can avoid macOS warnings
210 |
211 | ## 2.0.8
212 |
213 | #### NEW
214 |
215 | - Special handling for StackExchange. Formatting is automatically cleaned up, accepted answers moved to the top, and there are options for filtering by minimum upvotes, including or excluding comments, and including only accepted answers
216 |
217 | ## 2.0.5
218 |
219 | #### IMPROVED
220 |
221 | - Documentation updates
222 |
223 | ## 2.0.2
224 |
225 | #### NEW
226 |
227 | - Initial release of the successor to read2text. A Swift-based version with more options and better parsing
228 |
229 | #### IMPROVED
230 |
231 | - Documentation updates
232 |
233 | ## 2.0.0
234 |
235 | #### NEW
236 |
237 | - Initial commit
238 |
--------------------------------------------------------------------------------
/scripts/package.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Package and notarize a command line tool
3 | # package.sh PRODUCT_NAME BUNDLE_ID EXECUTABLE_NAME VERSION
4 |
5 | productname=$1
6 | identifier=$2
7 | executable=$3
8 | version=$4
9 |
10 | # the email address of your developer account
11 | dev_account="me@brettterpstra.com"
12 |
13 | # the name of your Developer ID installer certificate (for PKG signing)
14 | # Leave empty or set to "-" to build unsigned PKG
15 | INSTALLER_SIGNATURE="${INSTALLER_SIGNATURE:-Developer ID Installer: Brett Terpstra (47TRS7H4BH)}"
16 |
17 | # the 10-digit team id
18 | dev_team="47TRS7H4BH"
19 |
20 | # the label of the keychain item which contains an app-specific password
21 | dev_keychain_label="Developer-notarytool"
22 |
23 | projectdir='.'
24 |
25 | builddir="$projectdir"
26 | pkgroot="$builddir/package"
27 |
28 | requeststatus() { # $1: requestUUID
29 | requestUUID=${1?:"need a request UUID"}
30 | req_status=$(xcrun notarytool info \
31 | --keychain-profile "$dev_keychain_label" \
32 | "$requestUUID" 2>&1 |
33 | awk -F ': ' '/status:/ { print $2; }')
34 | echo "$req_status"
35 | }
36 |
37 | notarizefile() { # $1: path to file to notarize, $2: identifier
38 | filepath=${1:?"need a filepath"}
39 | identifier=${2:?"need an identifier"}
40 |
41 | # upload file and wait for completion
42 | echo "## uploading $filepath for notarization"
43 | SUBMIT_OUTPUT=$(xcrun notarytool submit --wait \
44 | --keychain-profile "$dev_keychain_label" \
45 | "$filepath" 2>&1)
46 |
47 | echo "$SUBMIT_OUTPUT"
48 |
49 | # Extract the request UUID
50 | requestUUID=$(echo "$SUBMIT_OUTPUT" | awk '/ id:/ { print $NF; }' | tail -n 1)
51 |
52 | if [[ $requestUUID == "" ]]; then
53 | echo "## Error: could not get notarization request UUID"
54 | return 1
55 | fi
56 |
57 | echo "Notarization RequestUUID: $requestUUID"
58 |
59 | # Get detailed status
60 | echo "## Checking notarization status..."
61 | STATUS_OUTPUT=$(xcrun notarytool info \
62 | --keychain-profile "$dev_keychain_label" \
63 | "$requestUUID" 2>&1)
64 |
65 | echo "$STATUS_OUTPUT"
66 |
67 | # Extract status
68 | request_status=$(echo "$STATUS_OUTPUT" | awk -F ': ' '/status:/ { print $2; }' | head -1 | tr -d ' ')
69 |
70 | if [[ "$request_status" == "Accepted" ]]; then
71 | echo "## ✓ Notarization succeeded!"
72 | return 0
73 | else
74 | echo "## ✗ Notarization failed with status: $request_status"
75 | echo "## Getting notarization logs..."
76 | xcrun notarytool log \
77 | --keychain-profile "$dev_keychain_label" \
78 | "$requestUUID" 2>&1 | head -50
79 | return 1
80 | fi
81 | }
82 |
83 | # Build the binary
84 | xcrun swift build -c release --arch arm64 --arch x86_64
85 | bindir=$(xcrun swift build -c release --arch arm64 --arch x86_64 --show-bin-path)
86 |
87 | # Determine signing identity for binary (allow override via environment variable)
88 | # Try Developer ID Application first (for notarization), fall back to 3rd Party Mac Developer Application
89 | if [ -z "$APP_SIGNING_IDENTITY" ]; then
90 | # Try Developer ID Application first
91 | if security find-identity -v -p codesigning | grep -q "Developer ID Application: Brett Terpstra (47TRS7H4BH)"; then
92 | APP_SIGNING_IDENTITY="Developer ID Application: Brett Terpstra (47TRS7H4BH)"
93 | else
94 | # Fall back to 3rd Party Mac Developer Application
95 | APP_SIGNING_IDENTITY="3rd Party Mac Developer Application: Brett Terpstra (47TRS7H4BH)"
96 | fi
97 | fi
98 |
99 | # Initialize notarization flag - will be set based on PKG signing success
100 | SKIP_NOTARIZATION=true
101 |
102 | # Check if the identity exists, if not use ad-hoc signing
103 | if security find-identity -v -p codesigning | grep -q "$APP_SIGNING_IDENTITY"; then
104 | echo "## Signing binary with: $APP_SIGNING_IDENTITY"
105 | if codesign --force --verbose --sign "$APP_SIGNING_IDENTITY" -o runtime --timestamp $bindir/$executable 2>&1; then
106 | codesign --verify -vvvv $bindir/$executable
107 | # Check if we're using Developer ID Application (required for notarization)
108 | if [[ "$APP_SIGNING_IDENTITY" == *"Developer ID Application"* ]]; then
109 | echo "## ✓ Binary signed with Developer ID Application (ready for notarization)"
110 | else
111 | echo "## Note: Using 3rd Party certificate - notarization will be skipped"
112 | fi
113 | else
114 | echo "## Warning: Failed to sign with '$APP_SIGNING_IDENTITY', trying ad-hoc signing"
115 | codesign --force --verbose --sign "-" $bindir/$executable
116 | codesign --verify -vvvv $bindir/$executable
117 | fi
118 | else
119 | echo "## Warning: Identity '$APP_SIGNING_IDENTITY' not found, using ad-hoc signing for binary"
120 | codesign --force --verbose --sign "-" $bindir/$executable
121 | codesign --verify -vvvv $bindir/$executable
122 | fi
123 |
124 | rm -rf package
125 | mkdir -p package/usr/local/bin
126 | cp $bindir/$executable package/usr/local/bin/
127 |
128 | pkgpath="$builddir/$productname-$version.pkg"
129 |
130 | echo "## building pkg: $pkgpath"
131 |
132 | # Check if installer signing identity is provided
133 | if [ -n "$INSTALLER_SIGNATURE" ] && [ "$INSTALLER_SIGNATURE" != "-" ]; then
134 | # Try to use the certificate directly - pkgbuild can use installer certs
135 | # even if they don't show up in security find-identity
136 | echo "## Attempting to sign PKG with: $INSTALLER_SIGNATURE"
137 | if pkgbuild --root "$pkgroot" \
138 | --version "$version" \
139 | --identifier "$identifier" \
140 | --sign "$INSTALLER_SIGNATURE" \
141 | "$pkgpath" 2>&1; then
142 | echo "## ✓ PKG signed successfully"
143 | # PKG is signed with Developer ID Installer, can be notarized
144 | # BUT: Binary must also be signed with Developer ID Application for notarization to succeed
145 | if [[ "$APP_SIGNING_IDENTITY" == *"Developer ID Application"* ]]; then
146 | SKIP_NOTARIZATION=false
147 | else
148 | echo "## Note: Binary not signed with Developer ID Application - notarization will be skipped"
149 | SKIP_NOTARIZATION=true
150 | fi
151 | else
152 | PKG_BUILD_EXIT=$?
153 | echo "## Warning: Failed to sign with '$INSTALLER_SIGNATURE' (exit code: $PKG_BUILD_EXIT), building unsigned pkg"
154 | # Remove the failed signed package and build unsigned
155 | rm -f "$pkgpath"
156 | pkgbuild --root "$pkgroot" \
157 | --version "$version" \
158 | --identifier "$identifier" \
159 | "$pkgpath"
160 | SKIP_NOTARIZATION=true
161 | fi
162 | else
163 | echo "## Building unsigned PKG (installer signing not configured)"
164 | pkgbuild --root "$pkgroot" \
165 | --version "$version" \
166 | --identifier "$identifier" \
167 | "$pkgpath"
168 | SKIP_NOTARIZATION=true
169 | fi
170 |
171 | # Only notarize if we have proper signing
172 | if [ "$SKIP_NOTARIZATION" = "false" ]; then
173 | # upload for notarization
174 | echo "Path: $pkgpath"
175 | echo "Identifier $identifier"
176 | if notarizefile "$pkgpath" "$identifier"; then
177 | # Only staple if notarization succeeded
178 | echo "## Stapling $pkgpath"
179 | xcrun stapler staple "$pkgpath"
180 | if [ $? -eq 0 ]; then
181 | echo "## ✓ Package stapled successfully"
182 | else
183 | echo "## Warning: Stapling failed"
184 | fi
185 | else
186 | echo "## Error: Notarization failed, skipping stapling"
187 | echo "## The PKG is signed but not notarized. You may need to fix signing issues."
188 | exit 1
189 | fi
190 | else
191 | echo "## Skipping notarization (ad-hoc or unsigned build)"
192 | fi
193 |
194 | exit $?
195 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Gather CLI
2 |
3 | 
4 |
5 |
6 | Current version: 2.1.12
7 |
8 | This project is the successor to read2text, which was a Python based tool that used Arc90 Readability and html2text to convert web URLs to Markdown documents, ready to store in your notes. It takes its name from another of my similar projects that I've since retired. It was this, but with a GUI, and this is infinitely more scriptable and is designed to nestle into your favorite tools and projects.
9 |
10 | This version is Swift-based and compiled as a binary that doesn't require Python or any other processor to run. It has more options, better parsing, and should be an all-around useful tool, easy to incorporate into almost any project.
11 |
12 | The code is available [on GitHub](https://github.com/ttscoff/gather-cli). It's built as a Swift Package and can be compiled using the `swift` command line tool. I'm just learning Swift, so I guarantee there's a lot of stupidity in the code. If you dig in, feel free to kindly point out my errors.
13 |
14 | ### Installation
15 |
16 | #### Via Homebrew
17 |
18 | The easiest way to install Gather is with [Homebrew](https://brew.sh). Building gather from source via Homebrew requires installing Xcode, so if you'd rather not deal with the hassle, see the download option below. If you use a lot of command line utilities or want a package manager for all your non-MAS apps, I highly recommend getting Homebrew [set up](https://brew.sh).
19 |
20 | If you have Homebrew and Xcode installed, just run:
21 |
22 | ```console
23 | brew tap ttscoff/thelab
24 | brew install gather-cli
25 | ```
26 |
27 | If you get errors, the most common solution is to run `sudo xcode-select -s /Applications/Xcode.app/Contents/Developer`. Seems to fix just about every issue I've had reported.
28 |
29 | #### Manual Install
30 |
31 | You can build your own binary by downloading the source code and running the swift compiler:
32 |
33 | ```console
34 | git clone https://github.com/ttscoff/gather-cli
35 | cd gather-cli
36 | swift build -c release
37 | ```
38 |
39 | The gather binary will be located in `.build/release/gather`. Copy it wherever you keep your binaries in your PATH.
40 |
41 | Or... just run `make install`, which will build the release version and copy it to `/usr/local/bin`.
42 |
43 | #### Downloading
44 |
45 |
46 | [Download the latest PKG installer](https://github.com/ttscoff/gather-cli/releases/latest)
47 |
48 |
49 | Double click to run the installer. This will install gather to /usr/local/bin with root permissions.
50 |
51 | ### Usage
52 |
53 | ```
54 | USAGE: gather [] []
55 |
56 | ARGUMENTS:
57 | The URL to parse
58 |
59 | OPTIONS:
60 | -c, --copy Copy output to clipboard
61 | --env Get input from and environment variable
62 | -f, --file Save output to file path. Accepts %date, %slugdate, %title, and %slug
63 | --html Expect raw HTML instead of a URL
64 | --include-source/--no-include-source
65 | Include source link to original URL (default: true)
66 | --include-title/--no-include-title
67 | Include page title as h1 (default: true)
68 | --inline-links Use inline links
69 | --metadata Include page title, date, source url as MultiMarkdown metadata
70 | --metadata-yaml Include page title, date, source url as YAML front matter
71 | -p, --paste Get input from clipboard
72 | --paragraph-links/--no-paragraph-links
73 | Insert link references after each paragraph (default: true)
74 | --readability/--no-readability
75 | Use readability (default: true)
76 | -s, --stdin Get input from STDIN
77 | -t, --title-only Output only page title
78 | --unicode/--no-unicode Use Unicode characters instead of ascii replacements (default: true)
79 | --accepted-only Only save accepted answer from StackExchange question pages
80 | --include-comments Include comments on StackExchange question pages
81 | --min-upvotes
82 | Only save answers from StackExchange page with minimum number of upvotes (default: 0)
83 | --nv-url Output as an Notational Velocity/nvALT URL
84 | --nv-add Add output to Notational Velocity/nvALT immediately
85 | --nvu-url Output as an nvUltra URL
86 | --nvu-add Add output to nvUltra immediately
87 | --nvu-notebook
88 | Specify an nvUltra notebook for the 'make' URL
89 | --url-template
90 | Create a URL scheme from a template using %title, %text, %notebook, %source, %date, %filename, and %slug
91 | --fallback-title
92 | Fallback title to use if no title is found, accepts %date
93 | --url-open Open URL created from template
94 | -v, --version Display current version number
95 | -h, --help Show help information.
96 | ```
97 |
98 | In its simplest form, Gather expects a URL. Just `gather https://brettterpstra.com` to perform a readability extraction of the main content and a conversion to Markdown, output to STDOUT.
99 |
100 | #### Input Options
101 |
102 | In addition to passing a URL as an argument, you can use `--stdin` to pass the URL via a pipe, e.g. `echo https://brettterpstra.com | gather --stdin`.
103 |
104 | You can have the URL pulled from your clipboard automatically with `--paste`. Just copy the URL from your browser and run `gather -p`. This is ideal for use in macOS Services or Shortcuts.
105 |
106 | You can also pass raw HTML to Gather and have it perform its magic on the source code. Just add `--html` to the command and it will parse it directly rather than trying to extract a URL. Depending on what's in your clipboard, Readability parsing can cause errors. If you run into trouble, run it without Readability using `--no-readability`. HTML can be passed via `--stdin` or `--paste`, e.g. `cat myfile.html | gather --html --stdin`.
107 |
108 | If you specify the `--html` and `--paste` flags, Gather will first check your HTML pasteboard for content. This means that if you've copied by selecting text on a web page or any web view, Gather can operate on that "rich text" version. If you've copied plain text source, that pasteboard will be empty and Gather will fall back to using the plain text pasteboard.
109 |
110 | You can also pull a URL or HTML from an environment variable using `--env VARIABLE`. This is mainly for incorporation into things like PopClip, which passes HTML via the $POPCLIP_HTML variable.
111 |
112 | #### Output Options
113 |
114 | By default the formatted Markdown is output to STDOUT (your terminal screen), where it can be piped to a file or a clipboard utility. There are some built-in options for those things as well.
115 |
116 | If you add `--copy` the command, the output will be placed on the system clipboard.
117 |
118 | If you add `--file PATH` to the command, the results will be saved to the path you specify. Any existing files at that path will be overwritten. If you want to append output, you're better off using shell redirection, e.g. `gather myurl.com >> compilation.md`
119 |
120 | #### Formatting Options
121 |
122 | You can control the formatting of the output in a couple of ways.
123 |
124 | By default Gather will use reference-style links, and will place the references directly after the paragraph where they occur. You can switch to inline links using `--inline-link`, and you can suppress the per-paragraph linking and collect them all at the end of the document using `--no-paragraph-links`.
125 |
126 | By default Gather will maintain Unicode characters in the output. If you'd prefer to have an ASCII equivalent substituted, you can use `--no-unicode`. This feature may not be working properly yet.
127 |
128 | `--include-source` will add a `[Source](PAGE_URL)` link to the top of the document. You can disable this link with `--no-include-source`. You can also include MultiMarkdown or YAML metadata with source URL, capture date, and page title using `--metadata` or `--metadata-yaml`.
129 |
130 | `--include-title` will attempt to insert an H1 title if the output doesn't have one. If a title can be determined and a matching h1 doesn't exist, it will be added at the top of the document. This is handy when the page has its header (and headline) outside of the content area that Readability chooses as the main block, and the option defaults to true. `--no-include-title` will disable this, but it will not remove an existing h1 from the document.
131 |
132 | If you just want to get the title of a URL, use `--title-only` to output a plain text title with no decoration.
133 |
134 | #### Stack Exchange Options
135 |
136 | Gather has some features specifically for saving answers from StackExchange sites like StackOverflow and AskDifferent. I love saving answers I find on StackOverflow to my notes for later where I can have them tagged, indexed, searchable, and curated. I wanted to make Gather a perfect tool for quickly making those notes.
137 |
138 | You don't have to do anything to trigger Gather's special handling of StackExchange sites. If the page you're trying to save has a body class of "question-page", it will kick in automatically. By default it will save all answers without comments. If there's a selected answer it will be moved to the top of the list.
139 |
140 | To save only the accepted answer (if there is one) for a question, use `--accepted-only`.
141 |
142 | Comments can often be fruitful (and important) to an answer, but they also get messy on popular posts, so they're ignored by default. To include comments when saving a StackExchange page, just add `--include-comments`.
143 |
144 | Lastly, sometimes there's more than one good answer worth saving, but a bunch of zero-vote errors in judgement you don't need in your notes. Use `--min-upvotes X` to filter answers by a minimum number of upvotes. For example, `--min-upvotes 60` would easily weed out the less-desirable answers on an older question. Filtering by upvotes does not affect the accepted answer, if that exists it's included no matter how many upvotes is has (or doesn't have).
145 |
146 | #### nvUltra/nvALT Options
147 |
148 | If you're running nvUltra, you can output clipped web pages directly to a notebook.
149 |
150 | `--nvu-url` will generate a x-nvultra://make url that, when opened, will add the markdown version of the web page as a note, titled with the page title. This flag simply outputs the url (or copies it with `--copy`) and can be used as part of another script that handles the link.
151 |
152 | `--nvu-add` will immediately open the url and add your note to nvUltra.
153 |
154 | You can include a `--nvu-notebook PATH` option to specify which notebook the note gets added to. If this is left out, the note will be added to the frontmost open notebook in nvUltra.
155 |
156 | [Here's a Shortcut](https://github.com/ttscoff/gather-cli/raw/main/extras/Gather%20to%20nvUltra.shortcut) that accepts text or URLs and runs `gather --nv-add` on them. I trigger it with LaunchBar to send the current page from my browser straight to nvUltra.
157 |
158 | The `url` and `add` options work with just `--nv` instead of `--nvu` to generate an `nv://` url that will work with Notational Velocity or nvALT.
159 |
160 | #### Other URL handlers
161 |
162 | You can generate any kind of url scheme you want using `--url-template`. This is a string that can contain the following placeholders (all URL encoded):
163 |
164 | - %title: The title of the page
165 | - %text: The markdown text of the page
166 | - %notebook: The contents of the `--nvu-notebook` option, can be used for additional meta in another key
167 | - %source: The canonical URL of the captured page, if available
168 | - %date: Today's date and time in the format YYYY-mm-dd HH:MM
169 | - %filename: The title of the page sanitized for use as a file name
170 | - %slug: The title of the page lowercased, all punctuation and spaces replaced with dashes (`using-gather-as-a-web-clipper`)
171 |
172 | You can include a fallback title using `--fallback-title "TITLE"`. If a page title can't be determined (common when running on snippets of HTML), this variable will be inserted. You can include the "%date" placeholder, which will be replaced with an ISO datetime.
173 |
174 | To show nvUltra's url scheme in this manner:
175 |
176 | --url-template "x-nvultra://make/?txt=%text&title=%filename¬ebook=%notebook"
177 |
178 | Add the `--url-open` flag to have the URL automatically executed instead of being returned.
179 |
180 | As an example, here's what Rand Anderson uses for Obsidian:
181 |
182 | The `--url-template`:
183 |
184 | obsidian://new?vault=myvault&name=%filename&content=%text
185 |
186 | These are the other options he uses along with it:
187 |
188 | --url-open --inline-links --no-paragraph-links --fallback-title 'webclip %date'
189 |
190 | ### Troubleshooting
191 |
192 | #### No content returned, only title
193 |
194 | When a url returns only a title via Gather, it usually means the markup was unparseable. In many cases, this is because the page itself is populated by JavaScript after page load, so what Gather retrieves is nothing more than a `