├── .github ├── dependabot.yml └── workflows │ ├── codeql-analysis.yml │ └── go.yml ├── .gitignore ├── LICENSE ├── README.md ├── actor-.go ├── actor-WebFinger.go ├── actor-activityStreams.go ├── actor-feed-.go ├── actor-feed-JSON.go ├── actor-feed-RSS.go ├── actor-feed-icon.go ├── actor-feed-links.go ├── actor-feed-microFormats.go ├── authorized-fetch.go ├── client-.go ├── client-applyLinks.go ├── client-clientOption.go ├── client-loadOption.go ├── constants.go ├── document-.go ├── document-activityStream.go ├── document-html-.go ├── document-html-jsonld-.go ├── document-html-jsonld-embedded.go ├── document-html-jsonld-linked.go ├── document-html-microformats.go ├── document-html-oembed.go ├── document-html-opengraph.go ├── document-html-opengraph_test.go ├── document-html-wordpress.go ├── go.mod ├── go.sum ├── htmlparser ├── htmlparser.go ├── opengraph.go └── opengraph_test.go ├── meta └── The_Adventure_of_Silver_Blaze.jpg ├── sherlock-extras.go ├── sherlock.go ├── sherlock_local_actor_test.go ├── sherlock_local_document_test.go ├── sherlock_remote_actor_test.go ├── sherlock_remote_document_test.go ├── test-files ├── actor-atom-1.xml ├── actor-json-1.json ├── actor-microformats-1.html ├── actor-microformats-3.html ├── actor-rss-1.html ├── actor-rss-1.xml ├── actor-rss-2.xml ├── document-ap-mastodon.html ├── document-ap-mastodon.json ├── document-microformats-1.html ├── document-microformats-2.html ├── document-microformats-3.html ├── document-microformats-4.html └── document-opengraph-1.html ├── utils.go └── utils_test.go /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "gomod" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | target-branch: "main" 11 | schedule: 12 | interval: "daily" 13 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ main ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ main ] 20 | schedule: 21 | - cron: '27 14 * * 1' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'go' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Learn more about CodeQL language support at https://git.io/codeql-language-support 38 | 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v4 42 | 43 | # Initializes the CodeQL tools for scanning. 44 | - name: Initialize CodeQL 45 | uses: github/codeql-action/init@v3 46 | with: 47 | languages: ${{ matrix.language }} 48 | # If you wish to specify custom queries, you can do so here or in a config file. 49 | # By default, queries listed here will override any specified in a config file. 50 | # Prefix the list here with "+" to use these queries and those in the config file. 51 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 52 | 53 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 54 | # If this step fails, then you should remove it and run the build manually (see below) 55 | - name: Autobuild 56 | uses: github/codeql-action/autobuild@v3 57 | 58 | # ℹ️ Command-line programs to run using the OS shell. 59 | # 📚 https://git.io/JvXDl 60 | 61 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 62 | # and modify them (or add more) to build your code if your project 63 | # uses a compiled language 64 | 65 | #- run: | 66 | # make bootstrap 67 | # make release 68 | 69 | - name: Perform CodeQL Analysis 70 | uses: github/codeql-action/analyze@v3 71 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | 16 | - name: Set up Go 17 | uses: actions/setup-go@v5 18 | with: 19 | go-version: '1.23' 20 | 21 | - name: Test 22 | run: go test -race -coverprofile=coverage.txt -covermode=atomic -v ./... 23 | 24 | - name: Report Code Coverage 25 | uses: codecov/codecov-action@v5 26 | with: 27 | fail_ci_if_error: true 28 | flags: unittests 29 | token: ${{ secrets.CODECOV_TOKEN }} 30 | verbose: true 31 | 32 | - name: GolangCI-Lint 33 | uses: golangci/golangci-lint-action@v6 34 | with: 35 | # Require: The version of golangci-lint to use. 36 | # When `install-mode` is `binary` (default) the value can be v1.2 or v1.2.3 or `latest` to use the latest version. 37 | # When `install-mode` is `goinstall` the value can be v1.2.3, `latest`, or the hash of a commit. 38 | version: latest 39 | skip-cache: true 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | .DS_Store 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sherlock 2 | 3 | Illustration of Sherlock Holmes and Watson in a train car, by Sidney Paget. From Arthur Conan Doyle's 1892 book 'The Adventure of Silver Blaze' 4 | 5 | [![GoDoc](https://img.shields.io/badge/go-documentation-blue.svg?style=flat-square)](http://pkg.go.dev/github.com/benpate/sherlock) 6 | [![Version](https://img.shields.io/github/v/release/benpate/sherlock?include_prereleases&style=flat-square&color=brightgreen)](https://github.com/benpate/sherlock/releases) 7 | [![Build Status](https://img.shields.io/github/actions/workflow/status/benpate/sherlock/go.yml?style=flat-square)](https://github.com/benpate/sherlock/actions/workflows/go.yml) 8 | [![Go Report Card](https://goreportcard.com/badge/github.com/benpate/sherlock?style=flat-square)](https://goreportcard.com/report/github.com/benpate/sherlock) 9 | [![Codecov](https://img.shields.io/codecov/c/github/benpate/sherlock.svg?style=flat-square)](https://codecov.io/gh/benpate/sherlock) 10 | 11 | ## Relentless Metadata Inspector 12 | 13 | Sherlock is a Go library that inspects a URL for any and all available metadata, pulling from whatever metadata formats are available, and returning it as an [ActivityStreams 2.0](https://www.w3.org/TR/activitystreams-core/) document. 14 | 15 | The goal is to have a standard interface into all web content, regardless of competing data standards. 16 | 17 | ### Supported Formats 18 | 19 | ✅ [ActivityPub](https://www.w3.org/TR/activitypub/)/[ActivityStreams](https://www.w3.org/TR/activitystreams-core/) 20 | 21 | ✅ [MicroFormats](https://microformats.org) 22 | 23 | ✅ [Open Graph](https://ogp.me) 24 | 25 | ### In Progress 26 | 27 | 🚧 [WebFinger](https://webfinger.net) 28 | 29 | 🚧 [JSON-LD (Linked)](https://json-ld.org/) 30 | 31 | 🚧 [Twitter Metadata](https://developer.twitter.com/en/docs/twitter-for-websites/cards/overview/abouts-cards) 32 | 33 | 🚧 [Microdata](https://html.spec.whatwg.org/multipage/microdata.html#microdata) 34 | 35 | 🚧 [RDFa](https://rdfa.info) 36 | 37 | 🚧 [oEmbed data provider](https://oembed.com) 38 | 39 | 40 | ### Using Sherlock 41 | 42 | ```go 43 | client := sherlock.NewClient() 44 | 45 | // If you only have a URL, then pass it in to .Load() 46 | result, err := client.Load("https://my-url-here") 47 | 48 | // If you have already downloaded a file, then pass it to .Parse() 49 | result, err := sherlock.ParseHTML("https://original-url", &bytes.Buffer) 50 | 51 | ``` 52 | 53 | ### Using Sherlock with Hannibal 54 | 55 | Sherlock can also be used as an http client for [Hannibal](https://github.com/benpate/hannibal), the ActivityPub library for Go. This allows many other online resources to *look like* they're ActivityPub-enabled. 56 | -------------------------------------------------------------------------------- /actor-.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "github.com/benpate/derp" 5 | "github.com/benpate/hannibal/streams" 6 | "github.com/rs/zerolog/log" 7 | ) 8 | 9 | // Actor returns an ActivityPub Actor representation of the provided URL. 10 | // If and ActivityPub Actor cannot be found, it attempts to create a fake one 11 | // using RSS/Atom feeds, and MicroFormats instead. 12 | func (client Client) loadActor(identifier string, config *LoadConfig) (streams.Document, error) { 13 | 14 | const location = "sherlock.Client.Actor" 15 | 16 | // RULE: Prevent too many redirects 17 | if config.MaximumRedirects < 0 { 18 | return streams.NilDocument(), derp.InternalError(location, "Maximum redirects exceeded", identifier) 19 | } 20 | 21 | // Validate the identifier 22 | idType := identifierType(identifier) 23 | 24 | if idType == IdentifierTypeNone { 25 | return streams.NilDocument(), derp.BadRequestError(location, "Invalid identifier", identifier) 26 | } 27 | 28 | log.Trace().Str("loc", location).Str("type", idType).Msg("searching for: " + identifier) 29 | 30 | // 1. If this looks like a username, then try WebFinger 31 | if idType == IdentifierTypeUsername { 32 | 33 | if actor := client.loadActor_WebFinger(identifier, config); actor.NotNil() { 34 | log.Trace().Str("loc", location).Msg("Found via WebFinger") 35 | return actor, nil 36 | } 37 | 38 | // If we can't look up the user via WebFinger, then stop here 39 | return streams.NilDocument(), derp.NotFoundError(location, "Unable to load actor by username", identifier) 40 | } 41 | 42 | // RULE: identifier must begin with a valid protocol 43 | identifier = defaultHTTPS(identifier) 44 | 45 | // 2. Try ActivityStreams 46 | if actor := client.loadActor_ActivityStreams(identifier); actor.NotNil() { 47 | log.Trace().Str("loc", location).Msg("Found via ActivityStream") 48 | return actor, nil 49 | } 50 | 51 | // 3. Try RSS/Atom/JSONFeed/MicroFormats 52 | if actor := client.loadActor_Feed(identifier, config); actor.NotNil() { 53 | log.Trace().Str("loc", location).Msg("Found via Feed") 54 | return actor, nil 55 | } 56 | 57 | // 4. Abject failure. Your mother would be ashamed. 58 | return streams.NilDocument(), derp.NotFoundError(location, "Unable to load actor", identifier) 59 | } 60 | -------------------------------------------------------------------------------- /actor-WebFinger.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/benpate/digit" 7 | "github.com/benpate/hannibal" 8 | "github.com/benpate/hannibal/streams" 9 | "github.com/rs/zerolog/log" 10 | ) 11 | 12 | func (client *Client) loadActor_WebFinger(uri string, config *LoadConfig) streams.Document { 13 | 14 | const location = "sherlock.Client.loadActor_WebFinger" 15 | 16 | // If the ID doesn't look like an email/username then skip this step 17 | if !strings.Contains(uri, "@") { 18 | log.Trace().Str("location", location).Msg("Skipping because uri doesn't look like an email address") 19 | return streams.NilDocument() 20 | } 21 | 22 | // Try to load the Actor via WebFinger 23 | response, err := digit.Lookup(uri, client.RemoteOptions...) 24 | 25 | // If we dont' have a valid response, then return nil (skip this step) 26 | if err != nil { 27 | log.Error().Err(err).Msg("loadActor_WebFinger: skipping because of error") 28 | return streams.NilDocument() 29 | } 30 | 31 | log.Trace().Str("location", location).Interface("response", response).Msg("Found WebFinger response") 32 | 33 | // Search for ActivityPub endpoints 34 | for _, link := range response.Links { 35 | if (link.RelationType == digit.RelationTypeSelf) && (hannibal.IsActivityPubContentType(link.MediaType)) { 36 | if result := client.loadActor_ActivityStreams(link.Href); result.NotNil() { 37 | config.MaximumRedirects-- 38 | return result 39 | } 40 | } 41 | } 42 | 43 | // Search for Profile pages (as a backup) 44 | for _, link := range response.Links { 45 | if link.RelationType == digit.RelationTypeProfile { 46 | if result := client.loadActor_Feed(link.Href, config); result.NotNil() { 47 | config.MaximumRedirects-- 48 | return result 49 | } 50 | } 51 | } 52 | 53 | // Fall through means we couldn't find any relevant links in the WebFinger response 54 | return streams.NilDocument() 55 | } 56 | -------------------------------------------------------------------------------- /actor-activityStreams.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "github.com/benpate/hannibal/streams" 5 | "github.com/benpate/remote" 6 | "github.com/benpate/remote/options" 7 | "github.com/benpate/rosetta/mapof" 8 | "github.com/rs/zerolog/log" 9 | ) 10 | 11 | // loadActor_ActivityStreams attempts to load an ActivityStream directly from 12 | // a uri. If the retrieved document is not an ActivityStream, then 13 | // this method returns a NilDocument. 14 | func (client Client) loadActor_ActivityStreams(uri string) streams.Document { 15 | 16 | const location = "sherlock.Client.loadActor_ActivityStreams" 17 | 18 | // Set up the transaction 19 | data := mapof.NewAny() 20 | txn := remote.Get(uri). 21 | UserAgent(client.UserAgent). 22 | Accept(ContentTypeActivityPub). 23 | With(client.RemoteOptions...). 24 | Result(&data) 25 | 26 | if canTrace() { 27 | txn.With(options.Debug()) 28 | } 29 | 30 | // Try to load the data from the remote server 31 | if err := txn.Send(); err != nil { 32 | log.Trace().Str("location", location).Msg("Error loading URI: " + uri) 33 | return streams.NilDocument() 34 | } 35 | 36 | // If the response is not an ActivityPub document, then exit 37 | if !isActivityStream(txn.ResponseContentType()) { 38 | if canTrace() { 39 | log.Trace().Str("location", location).Msg("Response is not an ActivityStream: " + txn.ResponseContentType()) 40 | } 41 | return streams.NilDocument() 42 | } 43 | 44 | if canTrace() { 45 | log.Trace().Str("location", location).Str("objectId", uri).Msg("Found ActivityStreams document") 46 | } 47 | 48 | // Otherwise, return the Actor with expected metadata 49 | result := streams.NewDocument( 50 | data, 51 | streams.WithClient(client), 52 | streams.WithHTTPHeader(txn.ResponseHeader()), 53 | ) 54 | 55 | return result 56 | } 57 | -------------------------------------------------------------------------------- /actor-feed-.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "github.com/benpate/hannibal/streams" 5 | "github.com/benpate/remote" 6 | ) 7 | 8 | func (client *Client) loadActor_Feed(url string, config *LoadConfig) streams.Document { 9 | 10 | // Retrieve the URL 11 | txn := remote.Get(url). 12 | UserAgent(client.UserAgent). 13 | With(client.RemoteOptions...) 14 | 15 | if err := txn.Send(); err != nil { 16 | return streams.NilDocument() 17 | } 18 | 19 | // Find and follow links in the response. 20 | if document := client.loadActor_Links(txn, config); document.NotNil() { 21 | return document 22 | } 23 | 24 | // 1. Try to generate an Actor from a JSON Feed 25 | if document := client.loadActor_Feed_JSON(txn, config); document.NotNil() { 26 | return document 27 | } 28 | 29 | // 2. Try to generate an Actor from a RSS/Atom Feed 30 | if document := client.loadActor_Feed_RSS(txn, config); document.NotNil() { 31 | return document 32 | } 33 | 34 | // 3. Try to generate an Actor from a HTML MicroFormats 35 | if document := client.loadActor_Feed_MicroFormats(txn); document.NotNil() { 36 | return document 37 | } 38 | 39 | // 4. Failure. 40 | return streams.NilDocument() 41 | } 42 | -------------------------------------------------------------------------------- /actor-feed-JSON.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "encoding/json" 5 | "net/url" 6 | 7 | "github.com/benpate/hannibal/streams" 8 | "github.com/benpate/hannibal/vocab" 9 | "github.com/benpate/remote" 10 | "github.com/benpate/rosetta/first" 11 | "github.com/benpate/rosetta/html" 12 | "github.com/benpate/rosetta/mapof" 13 | "github.com/benpate/rosetta/slice" 14 | "github.com/kr/jsonfeed" 15 | ) 16 | 17 | func (client Client) loadActor_Feed_JSON(txn *remote.Transaction, config *LoadConfig) streams.Document { 18 | 19 | // JSONFeed content only 20 | if !isJSONFeedContentType(txn.ResponseContentType()) { 21 | return streams.NilDocument() 22 | } 23 | 24 | var feed jsonfeed.Feed 25 | 26 | body, err := txn.ResponseBody() 27 | if err != nil { 28 | return streams.NilDocument() 29 | } 30 | 31 | // Parse the JSON feed 32 | if err := json.Unmarshal(body, &feed); err != nil { 33 | return streams.NilDocument() 34 | } 35 | 36 | actorID := first.String(feed.FeedURL, txn.RequestURL()) 37 | username := first.String(feed.HomePageURL, txn.RequestURL()) 38 | baseURL, _ := url.Parse(actorID) 39 | 40 | // Create an ActivityStream document 41 | result := config.DefaultValue 42 | result[vocab.AtContext] = vocab.ContextTypeActivityStreams 43 | result[vocab.PropertyID] = actorID 44 | result[vocab.PropertyType] = vocab.ActorTypeApplication 45 | result[vocab.PropertyName] = feed.Title 46 | result[vocab.PropertyIcon] = feed.Icon 47 | result[vocab.PropertySummary] = feed.Description 48 | result[vocab.PropertyURL] = username 49 | result[vocab.PropertyOutbox] = mapof.Any{ 50 | vocab.PropertyType: vocab.CoreTypeOrderedCollection, 51 | vocab.PropertyTotalItems: len(feed.Items), 52 | vocab.PropertyOrderedItems: slice.Map(feed.Items, func(item jsonfeed.Item) mapof.Any { 53 | 54 | itemURL, _ := baseURL.Parse(item.URL) 55 | 56 | return mapof.Any{ 57 | vocab.PropertyType: vocab.ObjectTypePage, 58 | vocab.PropertyID: itemURL, 59 | vocab.PropertyActor: feed.FeedURL, 60 | vocab.PropertyName: item.Title, 61 | vocab.PropertySummary: item.Summary, 62 | vocab.PropertyImage: item.Image, 63 | vocab.PropertyContent: jsonFeedToContentHTML(item), 64 | vocab.PropertyPublished: item.DatePublished.Unix(), 65 | vocab.PropertyAttributedTo: jsonFeedToAuthor(feed, item), 66 | } 67 | }), 68 | } 69 | 70 | // Search for WebSub hubs. 71 | for _, hub := range feed.Hubs { 72 | if hub.Type == "WebSub" { 73 | result[vocab.PropertyEndpoints] = mapof.Any{ 74 | "hub": hub.URL, 75 | } 76 | break 77 | } 78 | } 79 | 80 | // Apply links found in the response headers 81 | client.applyLinks(txn, result) 82 | 83 | // Patch icon into the feed (if necessary) 84 | client.loadActor_Feed_FindHomePageIcon(result) 85 | 86 | // Find/Manufacture the icon for the feed 87 | // client.loadActor_Feed_Icon(txn, result) 88 | 89 | return streams.NewDocument( 90 | result, 91 | streams.WithClient(client), 92 | streams.WithHTTPHeader(txn.ResponseHeader()), 93 | ) 94 | } 95 | 96 | // Returns TRUE if the contentType is application/activity+json or application/ld+json 97 | func isJSONFeedContentType(contentType string) bool { 98 | 99 | switch contentType { 100 | 101 | case ContentTypeJSONFeed: 102 | return true 103 | 104 | case ContentTypeJSON: 105 | return true 106 | 107 | default: 108 | return false 109 | } 110 | } 111 | 112 | func jsonFeedToAuthor(feed jsonfeed.Feed, item jsonfeed.Item) mapof.Any { 113 | 114 | if item.Author != nil { 115 | return mapof.Any{ 116 | vocab.PropertyID: item.Author.URL, 117 | vocab.PropertyName: item.Author.Name, 118 | vocab.PropertyImage: item.Author.Avatar, 119 | } 120 | } 121 | 122 | if feed.Author != nil { 123 | return mapof.Any{ 124 | vocab.PropertyID: feed.Author.URL, 125 | vocab.PropertyName: feed.Author.Name, 126 | vocab.PropertyImage: feed.Author.Avatar, 127 | } 128 | } 129 | 130 | return mapof.Any{ 131 | vocab.PropertyID: feed.FeedURL, 132 | } 133 | } 134 | 135 | func jsonFeedToContentHTML(item jsonfeed.Item) string { 136 | 137 | var result string 138 | 139 | if item.ContentHTML != "" { 140 | result = item.ContentHTML 141 | } else if item.ContentText != "" { 142 | result = html.FromText(item.ContentText) 143 | } 144 | 145 | return sanitizeHTML(result) 146 | } 147 | -------------------------------------------------------------------------------- /actor-feed-RSS.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "net/url" 5 | "sort" 6 | "time" 7 | 8 | "github.com/benpate/hannibal/streams" 9 | "github.com/benpate/hannibal/vocab" 10 | "github.com/benpate/remote" 11 | "github.com/benpate/rosetta/convert" 12 | "github.com/benpate/rosetta/first" 13 | "github.com/benpate/rosetta/html" 14 | "github.com/benpate/rosetta/list" 15 | "github.com/benpate/rosetta/mapof" 16 | "github.com/benpate/rosetta/slice" 17 | "github.com/mmcdole/gofeed" 18 | ) 19 | 20 | // loadActor_Feed_RSS tries generate an Actor from an RSS or Atom feed 21 | func (client Client) loadActor_Feed_RSS(txn *remote.Transaction, config *LoadConfig) streams.Document { 22 | 23 | // Try to find the RSS feed associated with this link 24 | feed, err := gofeed.NewParser().Parse(txn.ResponseBodyReader()) 25 | 26 | if err != nil { 27 | return streams.NilDocument() 28 | } 29 | 30 | // Sort the feed items (oldest first) 31 | sort.Slice(feed.Items, func(i, j int) bool { 32 | if firstPublishDate := feed.Items[i].PublishedParsed; firstPublishDate != nil { 33 | if secondPublishDate := feed.Items[j].PublishedParsed; secondPublishDate != nil { 34 | return firstPublishDate.Before(*secondPublishDate) 35 | } 36 | return false 37 | } 38 | return false 39 | }) 40 | 41 | actorID := first.String(feed.FeedLink, feed.Link, txn.RequestURL()) 42 | 43 | // Create JSON-LD for the Actor 44 | result := config.DefaultValue 45 | result[vocab.AtContext] = vocab.ContextTypeActivityStreams 46 | result[vocab.PropertyType] = vocab.ActorTypeApplication 47 | result[vocab.PropertyID] = actorID 48 | result[vocab.PropertyName] = feed.Title 49 | result[vocab.PropertySummary] = feed.Description 50 | result[vocab.PropertyURL] = txn.RequestURL() 51 | result[vocab.PropertyOutbox] = mapof.Any{ 52 | vocab.PropertyType: vocab.CoreTypeOrderedCollection, 53 | vocab.PropertyTotalItems: len(feed.Items), 54 | vocab.PropertyOrderedItems: slice.Map(feed.Items, feedActivity(actorID, feed)), 55 | } 56 | 57 | // Apply links found in the response headers 58 | client.applyLinks(txn, result) 59 | 60 | // Patch icon into the feed (if necessary) 61 | client.loadActor_Feed_FindHomePageIcon(result) 62 | 63 | // Return the result as a streams.Document 64 | return streams.NewDocument( 65 | result, 66 | streams.WithClient(client), 67 | streams.WithHTTPHeader(txn.ResponseHeader()), 68 | ) 69 | } 70 | 71 | // feedActivity populates an Activity object from a gofeed.Feed and gofeed.Item 72 | func feedActivity(actorID string, feed *gofeed.Feed) func(*gofeed.Item) any { 73 | 74 | baseURL, _ := url.Parse(actorID) 75 | 76 | return func(item *gofeed.Item) any { 77 | 78 | // Resolve relative URLs 79 | linkURL, _ := baseURL.Parse(item.Link) 80 | 81 | result := mapof.Any{ 82 | vocab.PropertyType: vocab.ObjectTypePage, 83 | vocab.PropertyID: linkURL.String(), 84 | vocab.PropertyName: html.ToText(item.Title), 85 | vocab.PropertyActor: feed.FeedLink, 86 | } 87 | 88 | if item.PublishedParsed != nil { 89 | result[vocab.PropertyPublished] = item.PublishedParsed.Unix() 90 | } else { 91 | result[vocab.PropertyPublished] = time.Now().Unix() 92 | } 93 | 94 | if image := feedImage(item); image != nil { 95 | result[vocab.PropertyImage] = image 96 | } 97 | 98 | if summary := feedSummary(item); summary != "" { 99 | result[vocab.PropertySummary] = summary 100 | } 101 | 102 | if contentHTML := feedContent(item); contentHTML != "" { 103 | result[vocab.PropertyContent] = contentHTML 104 | } 105 | 106 | if attributedTo := feedAuthor(actorID, feed, item); attributedTo != nil { 107 | result[vocab.PropertyAttributedTo] = attributedTo 108 | } 109 | 110 | return result 111 | } 112 | } 113 | 114 | func feedAuthor(actorID string, feed *gofeed.Feed, item *gofeed.Item) mapof.Any { 115 | 116 | // Set up default values to override (if we find something better) 117 | result := mapof.Any{ 118 | vocab.PropertyID: actorID, 119 | vocab.PropertyName: feed.Title, 120 | vocab.PropertySummary: feed.Description, 121 | } 122 | 123 | // Try to find the image from the feed. It's weird, but easier this way. 124 | if feed.Image != nil { 125 | result[vocab.PropertyImage] = feed.Image.URL 126 | 127 | } else if webfeeds, ok := feed.Extensions["webfeeds"]; ok { 128 | if icon, ok := webfeeds["icon"]; ok { 129 | for _, element := range icon { 130 | if element.Name == "icon" { 131 | result[vocab.PropertyImage] = element.Value 132 | break 133 | } 134 | } 135 | } 136 | } 137 | 138 | // Try to find the author from various sources in the item 139 | if item.Author != nil { 140 | result[vocab.PropertyName] = html.ToText(item.Author.Name) 141 | result[vocab.PropertySummary] = item.Author.Email 142 | return result 143 | } 144 | 145 | if len(item.Authors) > 0 { 146 | if itemAuthor := item.Authors[0]; itemAuthor != nil { 147 | result[vocab.PropertyName] = itemAuthor.Name 148 | result[vocab.PropertySummary] = itemAuthor.Email 149 | return result 150 | } 151 | } 152 | 153 | // Try to find the author from various sources in the feed 154 | if feed.Author != nil { 155 | result[vocab.PropertyName] = html.ToText(feed.Author.Name) 156 | result[vocab.PropertySummary] = feed.Author.Email 157 | return result 158 | } 159 | 160 | if len(feed.Authors) > 0 { 161 | if feedAuthor := feed.Authors[0]; feedAuthor != nil { 162 | result[vocab.PropertyName] = feedAuthor.Name 163 | result[vocab.PropertySummary] = feedAuthor.Email 164 | return result 165 | } 166 | } 167 | 168 | return result 169 | } 170 | 171 | // feedSummary returns a summary of the item in plain text format 172 | func feedSummary(item *gofeed.Item) string { 173 | return sanitizeText(item.Description) 174 | } 175 | 176 | // feedContent returns a sanitized version of the HTML content for this feed 177 | func feedContent(item *gofeed.Item) string { 178 | return sanitizeHTML(item.Content) 179 | } 180 | 181 | // rssImage returns the URL of the first image in the item's enclosure list. 182 | func feedImage(item *gofeed.Item) map[string]any { 183 | 184 | if item == nil { 185 | return nil 186 | } 187 | 188 | if item.Image != nil { 189 | return map[string]any{ 190 | vocab.PropertyType: vocab.ObjectTypeImage, 191 | vocab.PropertyHref: item.Image.URL, 192 | vocab.PropertySummary: item.Image.Title, 193 | } 194 | } 195 | 196 | // Search for an image in the enclosures 197 | for _, enclosure := range item.Enclosures { 198 | if list.Slash(enclosure.Type).First() == "image" { 199 | return map[string]any{ 200 | vocab.PropertyType: vocab.ObjectTypeImage, 201 | vocab.PropertyHref: enclosure.URL, 202 | } 203 | } 204 | } 205 | 206 | // Search for media extensions (YouTube uses this) 207 | if media, ok := item.Extensions["media"]; ok { 208 | for _, group := range media { 209 | for _, extension := range group { 210 | if medium := extension.Attrs["medium"]; medium == "image" { 211 | return map[string]any{ 212 | vocab.PropertyType: vocab.ObjectTypeImage, 213 | vocab.PropertyHref: extension.Attrs["url"], 214 | vocab.PropertyWidth: convert.Int(extension.Attrs["width"]), 215 | vocab.PropertyHeight: convert.Int(extension.Attrs["height"]), 216 | } 217 | } 218 | } 219 | } 220 | } 221 | 222 | return nil 223 | } 224 | -------------------------------------------------------------------------------- /actor-feed-icon.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "slices" 5 | "strings" 6 | 7 | "github.com/benpate/digit" 8 | "github.com/benpate/hannibal/vocab" 9 | "github.com/benpate/remote" 10 | "github.com/benpate/rosetta/convert" 11 | "github.com/benpate/rosetta/slice" 12 | "github.com/rs/zerolog/log" 13 | ) 14 | 15 | // loadActor_Feed_FindRootLevelIcon searches for an icon from the website homepage 16 | // and adds it to the document if found. 17 | func (client *Client) loadActor_Feed_FindHomePageIcon(document map[string]any) { 18 | 19 | // if the document already has an icon, then NOOP 20 | if icon := convert.String(document[vocab.PropertyIcon]); icon != "" { 21 | return 22 | } 23 | 24 | // Get the document ID from the document 25 | documentID := convert.String(document[vocab.PropertyID]) 26 | documentID = hostOnly(documentID) 27 | 28 | // Get the root-level document from the server 29 | txn := remote.Get(documentID) 30 | 31 | if err := txn.Send(); err != nil { 32 | log.Error().Err(err).Str("documentID", documentID).Msg("Error sending request") 33 | return 34 | } 35 | 36 | // Find Icons and apply them to the document 37 | client.loadActor_Feed_FindIcon(txn, document) 38 | } 39 | 40 | // loadActor_Feed_FindIcon searches for an icon from the remote transaction and 41 | // adds it into the document if found. 42 | func (client *Client) loadActor_Feed_FindIcon(txn *remote.Transaction, document map[string]any) { 43 | 44 | // if the document already has an icon, then NOOP 45 | if icon := convert.String(document[vocab.PropertyIcon]); icon != "" { 46 | return 47 | } 48 | 49 | // Find all links in the root-level document 50 | links := client.loadActor_DiscoverLinks(txn) 51 | 52 | // Choose the best icon and add it to the result 53 | if icon := client.loadActor_Feed_FindIconLink(links); icon != "" { 54 | document[vocab.PropertyIcon] = icon 55 | } 56 | } 57 | 58 | // Search for a sitewide Favicon, and add it to the default document if found 59 | func (client *Client) loadActor_Feed_FindIconLink(links digit.LinkSet) string { 60 | 61 | // Find all icon links 62 | icons := slice.Filter(links, func(link digit.Link) bool { 63 | return strings.Contains(link.RelationType, "icon") 64 | }) 65 | 66 | // Empty results are empty 67 | if len(icons) == 0 { 68 | return "" 69 | } 70 | 71 | // Find the "best" icon, and set it as the default value 72 | slices.SortFunc(icons, sortImageLinks) 73 | return icons[0].Href 74 | 75 | // Are there other kinds of icons we can search for? 76 | } 77 | -------------------------------------------------------------------------------- /actor-feed-links.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "net/url" 5 | "strings" 6 | 7 | "github.com/PuerkitoBio/goquery" 8 | "github.com/benpate/digit" 9 | "github.com/benpate/hannibal/streams" 10 | "github.com/benpate/remote" 11 | "github.com/tomnomnom/linkheader" 12 | "golang.org/x/net/html" 13 | ) 14 | 15 | // loadActor_Links finds and follows all relevant links for an http.Response. 16 | // If it finds a link to an ActivityStream, RSS Feed, or similar, then it returns 17 | // the corresponding Actor document. 18 | // Otherwise, it returns an empty streams.Document that includes metadata for 19 | func (client *Client) loadActor_Links(txn *remote.Transaction, config *LoadConfig) streams.Document { 20 | 21 | // Extranct all Links from the HTTP Header and HTML Document 22 | links := client.loadActor_DiscoverLinks(txn) 23 | 24 | // If links point directly to something we can use (ActivityPub, RSS, etc) then use it 25 | if document := client.loadActor_FollowLinks(txn, links, config); document.NotNil() { 26 | return document 27 | } 28 | 29 | // Otherwise, populate additional links (such as Hubs, Icons, etc) 30 | // and return an empty streams.Document 31 | // TODO: https://trello.com/c/t51YFiA2/234-sherlock-restore-websub-links 32 | return streams.NilDocument() 33 | } 34 | 35 | // loadActor_DiscoverLinks finds all links in a transaction, from both the 36 | // http header and in the HTML document. 37 | func (client *Client) loadActor_DiscoverLinks(txn *remote.Transaction) digit.LinkSet { 38 | 39 | // Retrieve Links in HTTP Header 40 | headerValue := txn.ResponseHeader().Get(HTTPHeaderLink) 41 | links := linkheader.Parse(headerValue) 42 | result := make(digit.LinkSet, 0, len(links)) 43 | requestURL := txn.RequestURL() 44 | 45 | for _, link := range links { 46 | result = append(result, digit.Link{ 47 | RelationType: link.Rel, 48 | MediaType: link.Param("type"), 49 | Href: getRelativeURL(requestURL, link.URL), 50 | }) 51 | } 52 | 53 | // Retrieve Links in HTML Document 54 | if htmlDocument, err := goquery.NewDocumentFromReader(txn.ResponseBodyReader()); err == nil { 55 | 56 | // Get "relevant" links from the document 57 | selection := htmlDocument.Find("[rel=alternate],[rel=self],[rel=feed],[rel=hub],[rel=icon],[rel=apple-touch-icon],[rel=apple-touch-icon-precomposed],[rel=mask-icon]") 58 | 59 | // Add links to the accumulator 60 | for _, link := range selection.Nodes { 61 | result = append(result, digit.Link{ 62 | RelationType: nodeAttribute(link, "rel"), 63 | MediaType: nodeAttribute(link, "type"), 64 | Href: getRelativeURL(requestURL, nodeAttribute(link, "href")), 65 | Properties: map[string]string{ 66 | "sizes": nodeAttribute(link, "sizes"), 67 | }, 68 | }) 69 | } 70 | } 71 | 72 | return result 73 | } 74 | 75 | // actor_ScanHTMLForWebMentions tries to load/use any linked feeds 76 | func (client *Client) loadActor_FollowLinks(txn *remote.Transaction, links digit.LinkSet, config *LoadConfig) streams.Document { 77 | 78 | // If the client is not allowed to follow redirects (or has used all of them already), 79 | // then there is nothing to do here. Return an empty document instead. 80 | if config.MaximumRedirects < 1 { 81 | return streams.NilDocument() 82 | } 83 | 84 | // If we have one or more links, then search them in order... 85 | if len(links) > 0 { 86 | 87 | for _, mediaType := range []string{ContentTypeActivityPub, ContentTypeJSONFeed, ContentTypeJSON, ContentTypeAtom, ContentTypeRSS} { 88 | 89 | link := findSelfOrAlternateLink(links, mediaType) 90 | 91 | if link.IsEmpty() { 92 | continue 93 | } 94 | 95 | // If the link points to the same URL as the original request, then we're 96 | // already at the right place. So don't traverse the link. 97 | if link.Href == txn.RequestURL() { 98 | return streams.NilDocument() 99 | } 100 | 101 | if document, err := client.loadActor(link.Href, config); err == nil { 102 | if document.NotNil() { 103 | config.MaximumRedirects-- 104 | return document 105 | } 106 | } 107 | } 108 | } 109 | 110 | return streams.NilDocument() 111 | } 112 | 113 | /****************************************** 114 | * Helper Functions 115 | ******************************************/ 116 | 117 | // nodeAttribute searches for a specific attribute in a node and returns its value 118 | func nodeAttribute(node *html.Node, name string) string { 119 | 120 | if node == nil { 121 | return "" 122 | } 123 | 124 | for _, attr := range node.Attr { 125 | if attr.Key == name { 126 | return attr.Val 127 | } 128 | } 129 | 130 | return "" 131 | } 132 | 133 | // TODO: HIGH: Scan all references and perhaps use https://pkg.go.dev/net/url#URL.ResolveReference instead? 134 | func getRelativeURL(baseURL string, relativeURL string) string { 135 | 136 | // If the relative URL is already absolute, then just return it 137 | if strings.HasPrefix(relativeURL, "http://") || strings.HasPrefix(relativeURL, "https://") { 138 | return relativeURL 139 | } 140 | 141 | // If the relative URL is a root-relative URL, then assume HTTPS (it's 2022, for crying out loud) 142 | if strings.HasPrefix(relativeURL, "//") { 143 | return "https:" + relativeURL 144 | } 145 | 146 | // Parse the base URL so that we can do URL-math on it 147 | baseURLParsed, _ := url.Parse(baseURL) 148 | 149 | // If the relative URL is a path-relative URL, then just replace the path 150 | if strings.HasPrefix(relativeURL, "/") { 151 | baseURLParsed.Path = relativeURL 152 | return baseURLParsed.String() 153 | } 154 | 155 | // Otherwise, join the paths 156 | baseURLParsed.Path, _ = url.JoinPath(baseURLParsed.Path, relativeURL) 157 | return baseURLParsed.String() 158 | } 159 | 160 | func findSelfOrAlternateLink(links []digit.Link, mediaType string) digit.Link { 161 | 162 | for _, link := range links { 163 | 164 | switch link.RelationType { 165 | case LinkRelationSelf, LinkRelationAlternate: 166 | if link.MediaType == mediaType { 167 | return link 168 | } 169 | } 170 | } 171 | 172 | return digit.Link{} 173 | } 174 | -------------------------------------------------------------------------------- /actor-feed-microFormats.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "net/url" 5 | "time" 6 | 7 | "github.com/benpate/hannibal/streams" 8 | "github.com/benpate/hannibal/vocab" 9 | "github.com/benpate/remote" 10 | "github.com/benpate/rosetta/mapof" 11 | "github.com/benpate/rosetta/slice" 12 | "willnorris.com/go/microformats" 13 | ) 14 | 15 | // actor_MicroFormats searches and HTML document for for an h-feed Microformat 16 | func (client Client) loadActor_Feed_MicroFormats(txn *remote.Transaction) streams.Document { 17 | 18 | // Parse the document URL 19 | parsedURL, err := url.Parse(txn.RequestURL()) 20 | 21 | if err != nil { 22 | return streams.NilDocument() 23 | } 24 | 25 | // Parse the HTML document 26 | data := microformats.Parse(txn.ResponseBodyReader(), parsedURL) 27 | 28 | // Search Microformats for an h-feed 29 | for _, feed := range data.Items { 30 | 31 | if slice.Contains(feed.Type, "h-feed") { 32 | 33 | items := make([]mapof.Any, 0, len(feed.Children)) 34 | 35 | for _, child := range feed.Children { 36 | if slice.Contains(child.Type, "h-entry") { 37 | items = append(items, microformat_Item(feed, child)) 38 | } 39 | } 40 | 41 | if len(items) > 0 { 42 | 43 | data := mapof.Any{ 44 | vocab.PropertyID: parsedURL.String(), 45 | vocab.PropertyType: vocab.ActorTypeApplication, 46 | vocab.PropertyName: microformat_Property(feed, "name"), 47 | vocab.PropertyImage: microformat_Property(feed, "photo"), 48 | vocab.PropertyAttributedTo: microformat_Property(feed, "author"), 49 | vocab.PropertyOutbox: microformat_Outbox(items), 50 | } 51 | 52 | // Apply links found in the response headers 53 | client.applyLinks(txn, data) 54 | 55 | // Patch icon into the feed (if necessary) 56 | client.loadActor_Feed_FindHomePageIcon(data) 57 | 58 | // Return the (successfully?) parsed document to the caller. 59 | return streams.NewDocument( 60 | data, 61 | streams.WithClient(client), 62 | streams.WithHTTPHeader(txn.ResponseHeader()), 63 | ) 64 | } 65 | } 66 | } 67 | 68 | return streams.NilDocument() 69 | } 70 | 71 | // microformat_Outbox wraps a slice of items in an ActivityStreams OrderedCollection 72 | func microformat_Outbox(items []mapof.Any) mapof.Any { 73 | 74 | return mapof.Any{ 75 | vocab.PropertyType: vocab.CoreTypeOrderedCollection, 76 | vocab.PropertyTotalItems: len(items), 77 | vocab.PropertyOrderedItems: items, 78 | } 79 | } 80 | 81 | // microformat_Item converts a Microformat entry into an ActivityStreams document 82 | func microformat_Item(feed *microformats.Microformat, entry *microformats.Microformat) mapof.Any { 83 | 84 | result := mapof.Any{ 85 | vocab.PropertyID: microformat_Property(entry, "url"), 86 | vocab.PropertyName: microformat_Property(entry, "name"), 87 | vocab.PropertySummary: microformat_Property(entry, "summary"), 88 | } 89 | 90 | // Get properties from entry 91 | 92 | // Get photo from entry, then feed 93 | if photoURL := microformat_Property(entry, "photo"); photoURL != "" { 94 | result[vocab.PropertyImage] = photoURL 95 | } else if photoURL := microformat_Property(feed, "photo"); photoURL != "" { 96 | result[vocab.PropertyImage] = photoURL 97 | } 98 | 99 | // Get author from entry, then feed 100 | if author := microformat_First(entry.Properties["author"]); author != nil { 101 | result[vocab.PropertyAttributedTo] = microformat_Author(author) 102 | } else if author := microformat_First(feed.Properties["author"]); author != nil { 103 | result[vocab.PropertyAttributedTo] = microformat_Author(author) 104 | } 105 | 106 | // Get the publish date from the entry 107 | if published := microformat_Property(entry, "published"); published != "" { 108 | if publishDate, err := time.Parse(time.RFC3339, published); err == nil { 109 | result[vocab.PropertyPublished] = publishDate.Unix() 110 | } 111 | } 112 | 113 | // Default PublishDate just in case 114 | if result[vocab.PropertyPublished] == 0 { 115 | result[vocab.PropertyPublished] = time.Now().Unix() 116 | } 117 | 118 | return result 119 | } 120 | 121 | // microformat_Author converts a Microformat entry into an ActivityStreams document 122 | func microformat_Author(entry *microformats.Microformat) mapof.Any { 123 | 124 | if entry == nil { 125 | return mapof.NewAny() 126 | } 127 | 128 | return mapof.Any{ 129 | vocab.PropertyID: microformat_Property(entry, "url"), 130 | vocab.PropertyName: microformat_Property(entry, "name"), 131 | vocab.PropertyImage: microformat_Property(entry, "photo", "logo"), 132 | } 133 | } 134 | 135 | // microformat_First returns the first item in a slice of items 136 | func microformat_First(value any) *microformats.Microformat { 137 | 138 | switch o := value.(type) { 139 | case []any: 140 | if len(o) > 0 { 141 | return microformat_First(o[0]) 142 | } 143 | 144 | case *microformats.Microformat: 145 | return o 146 | } 147 | 148 | return nil 149 | } 150 | 151 | // microformat_Property returns the first value of a property 152 | func microformat_Property(entry *microformats.Microformat, names ...string) string { 153 | 154 | if entry == nil { 155 | return "" 156 | } 157 | 158 | for _, name := range names { 159 | 160 | if value, ok := entry.Properties[name]; ok { 161 | 162 | for _, item := range value { 163 | switch o := item.(type) { 164 | case string: 165 | return o 166 | 167 | case *microformats.Microformat: 168 | return o.Value 169 | } 170 | } 171 | } 172 | } 173 | 174 | return "" 175 | } 176 | -------------------------------------------------------------------------------- /authorized-fetch.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "crypto" 5 | "net/http" 6 | 7 | "github.com/benpate/derp" 8 | "github.com/benpate/hannibal/sigs" 9 | "github.com/benpate/remote" 10 | "github.com/rs/zerolog/log" 11 | ) 12 | 13 | // AuthorizedFetch is a remote.Option that signs all outbound requests according to the 14 | // ActivityPub "Authorized Fetch" convention: https://funfedi.dev/testing_tools/http_signatures/ 15 | func AuthorizedFetch(publicKeyID string, privateKey crypto.PrivateKey) remote.Option { 16 | 17 | if publicKeyID == "" { 18 | log.Info().Msg("AuthorizedFetch: No publicKeyID provided") 19 | return remote.Option{} 20 | } 21 | 22 | if privateKey == nil { 23 | log.Info().Msg("AuthorizedFetch: No privateKey provided") 24 | return remote.Option{} 25 | } 26 | 27 | return remote.Option{ 28 | 29 | // ModifyRequest is called after an http.Request has been generated, but before it is sent to the 30 | // remote server. It can be used to modify the request, or to replace it entirely. 31 | // If it returns a non-nil http.Response, then that is used INSTEAD OF calling the remote server. 32 | // If it returns a nil http.Response, then the request is sent to the remote server as normal. 33 | ModifyRequest: func(t *remote.Transaction, request *http.Request) *http.Response { 34 | 35 | signer := sigs.NewSigner( 36 | publicKeyID, 37 | privateKey, 38 | sigs.SignerFields("(request-target)", "host", "date"), 39 | ) 40 | 41 | if err := signer.Sign(request); err != nil { 42 | derp.Report(derp.Wrap(err, "sherlock.AuthorizedFetch", "Error signing request")) 43 | } 44 | 45 | return nil 46 | }, 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /client-.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "github.com/benpate/hannibal/streams" 5 | "github.com/benpate/remote" 6 | ) 7 | 8 | // Client implements the hannibal/streams.Client interface, and is used to load JSON-LD documents from remote servers. 9 | // The sherlock client maps additional meta-data into a standard ActivityStreams document. 10 | type Client struct { 11 | UserAgent string // User-Agent string to send with every request 12 | RemoteOptions []remote.Option // Additional options to pass to the remote library 13 | } 14 | 15 | // NewClient returns a fully initialized Client object 16 | func NewClient(options ...ClientOption) Client { 17 | 18 | // Create a default Client 19 | result := Client{ 20 | UserAgent: "Sherlock: github.com/benpate/sherlock", 21 | RemoteOptions: make([]remote.Option, 0), 22 | } 23 | 24 | // Apply options 25 | result.WithOptions(options...) 26 | 27 | // Success 28 | return result 29 | } 30 | 31 | // Load retrieves a document from a remote server and returns it as a streams.Document 32 | // It uses either the "Actor" or "Document" methods of generating it ActivityStreams 33 | // result. 34 | // "Document" treats the URL as a single ActivityStreams document, translating 35 | // OpenGraph, MicroFormats, and JSON-LD into an ActivityStreams equivalent. 36 | // "Actor" treats the URL as an Actor, translating RSS, Atom, JSON, and 37 | // MicroFormats feeds into an ActivityStream equivalent. 38 | func (client Client) Load(url string, options ...any) (streams.Document, error) { 39 | 40 | config := NewLoadConfig(options...) 41 | 42 | // If "Actor" is requested, then use that discovery method 43 | if config.DocumentType == LoadDocumentTypeActor { 44 | return client.loadActor(url, &config) 45 | } 46 | 47 | // Otherwise, use "Document" discovery method 48 | return client.loadDocument(url, config) 49 | } 50 | 51 | // WithOptions applies one or more ClientOption functions to the client 52 | func (client *Client) WithOptions(options ...ClientOption) { 53 | for _, option := range options { 54 | option(client) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /client-applyLinks.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "github.com/benpate/hannibal/vocab" 5 | "github.com/benpate/remote" 6 | "github.com/benpate/rosetta/mapof" 7 | "github.com/tomnomnom/linkheader" 8 | ) 9 | 10 | // applyLinks searches for common link headers in the response, and applies them to the data map 11 | func (client *Client) applyLinks(txn *remote.Transaction, data mapof.Any) { 12 | 13 | links := linkheader.ParseMultiple(txn.Response().Header["Link"]) 14 | 15 | for _, link := range links { 16 | switch link.Rel { 17 | 18 | case LinkRelationIcon: 19 | 20 | // Add an icon if it doesn't already exist 21 | if _, ok := data[vocab.PropertyIcon]; !ok { 22 | data[vocab.PropertyIcon] = link.URL 23 | } 24 | 25 | case LinkRelationHub: 26 | 27 | // Guarantee that the `endpoints` value exists 28 | if _, ok := data[vocab.PropertyEndpoints]; !ok { 29 | data[vocab.PropertyEndpoints] = make(map[string]any) 30 | } 31 | 32 | // Set the `endpoints.websub` value 33 | if endpoints, ok := data[vocab.PropertyEndpoints].(map[string]any); ok { 34 | endpoints["websub"] = link.URL 35 | } 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /client-clientOption.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "crypto" 5 | 6 | "github.com/benpate/remote" 7 | ) 8 | 9 | // ClientOption defines a functional option that modifies a Client object 10 | type ClientOption func(*Client) 11 | 12 | // WithUserAgent is a ClientOption that sets the UserAgent property on the Client object 13 | func WithUserAgent(userAgent string) ClientOption { 14 | return func(client *Client) { 15 | client.UserAgent = userAgent 16 | } 17 | } 18 | 19 | // WithRemoteOptions is a ClientOption that appends one or more remote.Option 20 | // objects to the Client object RemoteOptions are executed on every remote request 21 | func WithRemoteOptions(options ...remote.Option) ClientOption { 22 | return func(client *Client) { 23 | client.RemoteOptions = append(client.RemoteOptions, options...) 24 | } 25 | } 26 | 27 | // WithActor is a ClientOption that set up the AuthorizedFetch remote middleware, 28 | // which will sign all outbound requests according to the ActivityPub "Authorized Fetch" 29 | // convention: https://funfedi.dev/testing_tools/http_signatures/ 30 | func WithActor(publicKeyID string, privateKey crypto.PrivateKey) ClientOption { 31 | return func(client *Client) { 32 | client.RemoteOptions = append(client.RemoteOptions, AuthorizedFetch(publicKeyID, privateKey)) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /client-loadOption.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | const LoadDocumentTypeUnknown = 0 4 | 5 | const LoadDocumentTypeActor = 1 6 | 7 | const LoadDocumentTypeCollection = 2 8 | 9 | const LoadDocumentTypeDocument = 3 10 | 11 | type LoadConfig struct { 12 | DocumentType int 13 | MaximumRedirects int 14 | DefaultValue map[string]any 15 | } 16 | 17 | type LoadOption func(*LoadConfig) 18 | 19 | func NewLoadConfig(options ...any) LoadConfig { 20 | result := LoadConfig{ 21 | MaximumRedirects: 6, 22 | DocumentType: LoadDocumentTypeUnknown, 23 | DefaultValue: make(map[string]any), 24 | } 25 | 26 | for _, option := range options { 27 | if typed, ok := option.(LoadOption); ok { 28 | typed(&result) 29 | } 30 | } 31 | return result 32 | } 33 | 34 | func AsActor() LoadOption { 35 | return asDocumentType(LoadDocumentTypeActor) 36 | } 37 | 38 | func AsDocument() LoadOption { 39 | return asDocumentType(LoadDocumentTypeDocument) 40 | } 41 | 42 | func AsCollection() LoadOption { 43 | return asDocumentType(LoadDocumentTypeCollection) 44 | } 45 | 46 | func asDocumentType(documentType int) LoadOption { 47 | return func(config *LoadConfig) { 48 | config.DocumentType = documentType 49 | } 50 | } 51 | 52 | func WithMaximumRedirects(maximumRedirects int) LoadOption { 53 | return func(config *LoadConfig) { 54 | config.MaximumRedirects = maximumRedirects 55 | } 56 | } 57 | 58 | func WithDefaultValue(defaultValue map[string]any) LoadOption { 59 | return func(config *LoadConfig) { 60 | config.DefaultValue = defaultValue 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /constants.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | /****************************************** 4 | * ContentTypes 5 | ******************************************/ 6 | 7 | // ContentType is the string used in the HTTP header to designate a MIME type 8 | const ContentType = "Content-Type" 9 | 10 | // ContentTypeActivityPub is the standard MIME type for ActivityPub content 11 | const ContentTypeActivityPub = "application/activity+json" 12 | 13 | // ContentTypeAtom is the standard MIME Type for Atom Feeds 14 | const ContentTypeAtom = "application/atom+xml" 15 | 16 | // ContentTypeForm is the standard MIME Type for Form encoded content 17 | const ContentTypeForm = "application/x-www-form-urlencoded" 18 | 19 | // ContentTypeHTML is the standard MIME type for HTML content 20 | const ContentTypeHTML = "text/html" 21 | 22 | // ContentTypeJSON is the standard MIME Type for JSON content 23 | const ContentTypeJSON = "application/json" 24 | 25 | // ContentTypeJSONFeed is the standard MIME Type for JSON Feed content 26 | // https://en.wikipedia.org/wiki/JSON_Feed 27 | const ContentTypeJSONFeed = "application/feed+json" 28 | 29 | // ContentTypeJSONLD is the standard MIME Type for JSON-LD content 30 | // https://en.wikipedia.org/wiki/JSON-LD 31 | const ContentTypeJSONLD = "application/ld+json" 32 | 33 | // ContentTypeJSONResourceDescriptor is the standard MIME Type for JSON Resource Descriptor content 34 | // which is used by WebFinger: https://datatracker.ietf.org/doc/html/rfc7033#section-10.2 35 | const ContentTypeJSONResourceDescriptor = "application/jrd+json" 36 | 37 | // ContentTypePlain is the default plaintext MIME type 38 | const ContentTypePlain = "text/plain" 39 | 40 | // ContentTypeRSS is the standard MIME Type for RSS Feeds 41 | const ContentTypeRSS = "application/rss+xml" 42 | 43 | // ContentTypeXML is the standard MIME Type for XML content 44 | const ContentTypeXML = "application/xml" 45 | 46 | /****************************************** 47 | * Document Formats 48 | ******************************************/ 49 | 50 | const FormatActivityStream = "ACTIVITYSTREAM" 51 | 52 | const FormatRSS = "RSS" 53 | 54 | const FormatJSONFeed = "JSONFEED" 55 | 56 | const FormatMicroFormats = "MICROFORMATS" 57 | 58 | /****************************************** 59 | * HTTP Headers 60 | ******************************************/ 61 | 62 | // HTTPHeaderAccept is the string used in the HTTP header to request a response be encoded as a MIME type 63 | const HTTPHeaderAccept = "Accept" 64 | 65 | const HTTPHeaderCacheControl = "Cache-Control" 66 | 67 | const HTTPHeaderLink = "Link" 68 | 69 | /****************************************** 70 | * Link Relations 71 | ******************************************/ 72 | 73 | const LinkRelationAlternate = "alternate" 74 | 75 | const LinkRelationFeed = "feed" 76 | 77 | const LinkRelationIcon = "icon" 78 | 79 | const LinkRelationHub = "hub" 80 | 81 | const LinkRelationSelf = "self" 82 | 83 | /****************************************** 84 | * Identifier Types 85 | ******************************************/ 86 | 87 | const IdentifierTypeUsername = "USERNAME" 88 | 89 | const IdentifierTypeURL = "URL" 90 | 91 | const IdentifierTypeNone = "NONE" 92 | -------------------------------------------------------------------------------- /document-.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "github.com/benpate/derp" 5 | "github.com/benpate/hannibal/streams" 6 | ) 7 | 8 | // LoadDocument tries to retrieve a URL from the internet, then return it into a streams.Document. 9 | // If the remote resource is not already an ActivityStreams document, it will attempt to convert from 10 | // RSS, Atom, JSONFeed, and HTML MicroFormats. 11 | func (client Client) loadDocument(url string, config LoadConfig) (streams.Document, error) { 12 | 13 | const location = "sherlock.Client.loadDocument" 14 | 15 | // RULE: url must not be empty 16 | if url == "" { 17 | return streams.NilDocument(), derp.BadRequestError(location, "Empty URI") 18 | } 19 | 20 | // RULE: Prevent too many redirects 21 | if config.MaximumRedirects < 0 { 22 | return streams.NilDocument(), derp.InternalError(location, "Maximum redirects exceeded", url) 23 | } 24 | 25 | // RULE: url must begin with a valid protocol 26 | url = defaultHTTPS(url) 27 | 28 | // 1. If we can load the document as an ActivityStream, then there you go. 29 | if document := client.loadDocument_ActivityStream(url); document.NotNil() { 30 | return document, nil 31 | } 32 | 33 | // 2. If we can load the document as HTML, then that will do. 34 | if document := client.loadDocument_HTML(url, config.DefaultValue); document.NotNil() { 35 | return document, nil 36 | } 37 | 38 | // 3. If the default value is good enough, then use that. 39 | // This may happen when RSS feeds have *some* information, but a website CAPTCHA 40 | // block us from loading more details. 41 | if len(config.DefaultValue) > 0 { 42 | return streams.NewDocument(config.DefaultValue, streams.WithClient(client)), nil 43 | } 44 | 45 | // 4. Abject failure. 46 | return streams.NilDocument(), derp.BadRequestError(location, "Unable to load document", url, config) 47 | } 48 | -------------------------------------------------------------------------------- /document-activityStream.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "github.com/benpate/hannibal/streams" 5 | "github.com/benpate/hannibal/vocab" 6 | "github.com/benpate/remote" 7 | "github.com/benpate/rosetta/mapof" 8 | ) 9 | 10 | // loadDocument_ActivityStream tries to load a remote document as an ActivityStream 11 | // If successful, it will return a streams.Document with the appropriate metadata. 12 | // Otherwise, it returns a nil document. 13 | func (client *Client) loadDocument_ActivityStream(uri string) streams.Document { 14 | 15 | data := mapof.NewAny() 16 | 17 | txn := remote.Get(uri). 18 | UserAgent(client.UserAgent). 19 | Accept(vocab.ContentTypeActivityPub). 20 | With(client.RemoteOptions...). 21 | Result(&data) 22 | 23 | if err := txn.Send(); err != nil { 24 | return streams.NilDocument() 25 | } 26 | 27 | if !isActivityStream(txn.ResponseContentType()) { 28 | return streams.NilDocument() 29 | } 30 | 31 | return streams.NewDocument( 32 | data, 33 | streams.WithClient(client), 34 | streams.WithHTTPHeader(txn.ResponseHeader()), 35 | ) 36 | } 37 | -------------------------------------------------------------------------------- /document-html-.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "github.com/benpate/hannibal/streams" 5 | "github.com/benpate/hannibal/vocab" 6 | "github.com/benpate/remote" 7 | "github.com/benpate/rosetta/mapof" 8 | ) 9 | 10 | // loadDocument_HTML tries to mimic an ActivityPub document by parsing meta-data on 11 | // a remote HTML page. The `data` argument is a map that may already contain some 12 | // data, and will be updated with any new data that is discovered. 13 | func (client *Client) loadDocument_HTML(uri string, data mapof.Any) streams.Document { 14 | 15 | // Retrieve the HTML document 16 | txn := remote.Get(uri). 17 | UserAgent(client.UserAgent). 18 | With(client.RemoteOptions...) 19 | 20 | if err := txn.Send(); err != nil { 21 | return streams.NilDocument() 22 | } 23 | 24 | // Read the response body 25 | body, err := txn.ResponseBody() 26 | 27 | if err != nil { 28 | return streams.NilDocument() 29 | } 30 | 31 | // Default values for Web Pages 32 | data[vocab.PropertyID] = uri 33 | data[vocab.PropertyURL] = uri 34 | data[vocab.PropertyType] = vocab.ObjectTypePage 35 | 36 | // Apply links found in the response headers 37 | client.applyLinks(txn, data) 38 | 39 | // Add JSON-LD data to the data 40 | client.loadDocument_JSONLD(body, data) 41 | 42 | // Add OpenGraph (via HTMLInfo) data to the data 43 | client.loadDocument_OpenGraph(uri, body, data) 44 | 45 | // Add Microformats2 data to the data 46 | client.loadDocument_MicroFormats(uri, body, data) 47 | 48 | // Return success! 49 | return streams.NewDocument(data, 50 | streams.WithClient(client), 51 | streams.WithHTTPHeader(txn.ResponseHeader()), 52 | ) 53 | } 54 | -------------------------------------------------------------------------------- /document-html-jsonld-.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "bytes" 5 | 6 | "github.com/PuerkitoBio/goquery" 7 | ) 8 | 9 | func (client *Client) loadDocument_JSONLD(body []byte, result map[string]any) { 10 | 11 | // Search the returned HTML for JSON-LD 12 | if gqDoc, err := goquery.NewDocumentFromReader(bytes.NewReader(body)); err == nil { 13 | 14 | if client.loadDocument_JSONLD_Embedded(gqDoc, result) { 15 | withContext(result) 16 | return 17 | } 18 | 19 | if client.loadDocument_JSONLD_Linked(gqDoc, result) { 20 | withContext(result) 21 | return 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /document-html-jsonld-embedded.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/PuerkitoBio/goquery" 7 | "github.com/benpate/rosetta/mapof" 8 | ) 9 | 10 | // loadDocument_JSONLD_Embedded searches the GoQuery document for links to ActivityPub-like documents. 11 | func (client *Client) loadDocument_JSONLD_Embedded(document *goquery.Document, result mapof.Any) bool { 12 | // TODO: LOW: Add support for JSON-LD metadata embedded in a 50 | 51 | 52 | 53 | 54 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 66 | 101 | 102 | 103 | 104 |
105 | 106 | Skip to main content 107 | 118 | 124 | 163 |
164 |
165 |
166 |

Fun with Words

167 |

Published on 168 | 169 | under the Fun with Words category.

170 | 171 | 172 |
173 | 186 | 187 |
188 |

Responses

189 |
190 |

Comment on this post

191 |

Respond to this post by sending a Webmention.

192 |

Have a comment? Email me at readers@jamesg.blog.

193 |
194 |
195 |
196 |

Go Back to the Top

197 | 221 | 222 | 223 | 224 | 225 | 228 | 229 | 230 |
231 | 303 | 304 | 305 | 339 | 340 | 341 | 342 | 343 | 344 | -------------------------------------------------------------------------------- /test-files/document-microformats-2.html: -------------------------------------------------------------------------------- 1 | HTTP/1.1 200 OK 2 | Accept-Ranges: bytes 3 | Age: 0 4 | Cache-Control: public,max-age=0,must-revalidate 5 | Content-Length: 10839 6 | Content-Type: text/html; charset=UTF-8 7 | Date: Sat, 23 Sep 2023 18:25:35 GMT 8 | Etag: "791cd7288b52769fe197a54a6cead5ac-ssl" 9 | Server: Netlify 10 | Strict-Transport-Security: max-age=31536000 11 | X-Nf-Request-Id: 01HB1MMDMKHZG5Q92HPNVW5398 12 | Connection: close 13 | 14 | 15 | 16 | 17 | 18 | 19 | Never 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 |
33 | 34 |
35 | 55 |
56 |
57 | 58 | 63 | 64 |
65 | 66 | 67 | 68 |
69 | 70 |

Never

71 | 72 | Published by 73 | 74 |
75 |

This is not a bucket list. These are just a handful of things that haven’t happened for me yet, and perhaps some things I will never get around to. Never say never, though.

76 |

Never been on a cruise ship #

77 |

When I was nineteen or twenty I read the David Foster Wallace essay, later republished as A Supposedly Fun Thing I Will Never Do Again. My grandparents were not cruise ship people, my parents were not cruise ship people, and I am not a cruise ship person. I know I’d be one of the constantly seasick ones. The idea of having to eat at an assigned dinner table with the same random strangers for a whole week turns me off. I like spontaneous fun, not organized fun. Any time I think I do entertain the idea, I Google 'cruise ship disasters' for good reminders to keep my feet on dry land.

78 |

Never been to a Disney Park #

79 |

Along the same lines, my parents were not Disney People. At least, I think a Disney vacation was out of their price range when my sister and I were kids. I can name all the other theme parks (Great Escape, Six Flags, Canada’s Wonderland, Enchanted Forest/Water Safari) in upstate New York and Canada we visited during our summer vacations. I have heard that visiting a Disney Park is the ultimate designed experience, though, so maybe I should plan to go as an adult. You know, for UX research purposes.

80 |

Never been to Florida (since airports don’t count) #

81 |

Continuing a theme from the previous list item, I have never visited the sunshine state. My wife has though, as a resident at Florida State’s Facitlity for Arts Research a few years ago. I would love to see Miami, so this is one list item I might cross off in my lifetime. At the same time, I have familiarized myself with the Florida Man meme. Residing in Tennessee, I might be as geographically close to Florida and the Florida Men as I’m comfortable with.

82 |

Never used Uber or Lyft #

83 |

I’m not a fan of these kind of companies and their labor practices. Again, there are horror stories, but my primary concern has always been that your rideshare driver might not be carrying the right kind of insurance coverage before you get in their car. I have an airport taxi service I use in Knoxville, and it’s OK, and probably comes out to a similar price. At the same time I have stayed at Airbnbs a couple times, so you can call me a huge hypocrite.

84 |

Never invested in cryptocurrencies #

85 |

And I probably won’t. I’m a somewhat technical guy, and the blockchain technology just doesn’t interest me. Nor do the economics make sense to me. I do watch from the sidelines though, reading Molly White’s blog web3 is going just great. I’ll stick with my get rich slow scheme (graphic design).

86 |

Never read a Malcolm Gladwell book #

87 |

This one seems kind of braggy, or at best trivial. I remember Malcolm Gladwell being all the rage ten or twelve years ago, and I still have not picked up one of his bestselling nonfiction books. I don’t have anything personally against the man, but I’m familiar with the criticism of his work. I got burned by a similar writer, Jonah Lehrer, who probably committed worse crimes against popular nonfiction than Gladwell.

88 |

Never bought a television #

89 |

I grew up with a Commodore 64 in my bedroom, using an 11-inch television for a monitor. This meant I grew up with a television in my bedroom. It gave me an appreciation for 1990s pop culture and probably helped me relate to my peers. In college and beyond I always relied on my roommates to supply the TV. I had a microwave oven and a vacuum cleaner. When I started living on my own, I never prioritized having a television. Today I have a laptop and access to a couple streaming services, so I do watch TV. There are times when I think it would be nice to watch something on a larger screen, but I’m also glad our living room is not dominated by one. Maybe someday we’ll invest in a projector.

90 | 91 |
92 |
93 | 94 |
95 |   96 |
97 | 98 | Reply via email → 99 | 100 | 105 |
106 | 107 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /test-files/document-microformats-3.html: -------------------------------------------------------------------------------- 1 | HTTP/1.1 200 OK 2 | Accept-Ranges: bytes 3 | Content-Type: application/xml 4 | Date: Sat, 23 Sep 2023 17:12:39 GMT 5 | Connection: close 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | Happy 13th to microformats.org! – Microformats 16 | 17 | 18 | 19 | 78 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 |
113 | 143 | 144 |
145 | 146 |
147 | 148 |
149 |

Happy 13th to microformats.org!

150 |
151 |

152 | With more use of 153 | microformats2 154 | , especially among the growing 155 | indieweb 156 | network of websites, we’ve iterated 157 | key 158 | specs 159 | for real-world needs and are seeing more active community members. More updates & posts coming up! 160 |

161 |

162 | Originally posted on 163 | tantek.com 164 | . 165 |

166 |
167 | 168 | 180 | 181 | 199 |
200 | 201 | 202 | 203 | 204 |

Comments are closed.

205 | 206 |
207 | 208 |
209 | 210 | 280 | 281 | 282 |
283 | 284 | 296 | 297 |
298 | 299 | 305 | 306 | 307 | 308 | -------------------------------------------------------------------------------- /test-files/document-opengraph-1.html: -------------------------------------------------------------------------------- 1 | HTTP/1.1 200 OK 2 | Date: Sat, 23 Sep 2023 22:12:19 GMT 3 | Server: I'm sorry Dave, I'm afraid I can't do that. 4 | X-Content-Type-Options: nosniff 5 | X-Frame-Options: deny 6 | Content-Security-Policy: default-src 'self'; style-src 'self' 'unsafe-inline' https://cdn.wolfsoftware.com/assets/ https://fonts.googleapis.com/; img-src 'self' * data: https://cdn.wolfsoftware.com/assets/; script-src 'self' https://cdn.wolfsoftware.com/assets/ https://*.fontawesome.com/; font-src 'self' https://cdn.wolfsoftware.com/assets/ https://fonts.googleapis.com/ https://fonts.gstatic.com/ https://*.fontawesome.com/; connect-src 'self' https://*.fontawesome.com/; upgrade-insecure-requests; frame-ancestors 'none'; 7 | X-XSS-Protection: 1; mode=block 8 | Referrer-Policy: same-origin 9 | Permissions-Policy: accelerometer=(),autoplay=(),camera=(),encrypted-media=(),fullscreen=*,geolocation=*,gyroscope=(),interest-cohort=*,magnetometer=(),microphone=(),midi=(),payment=(),sync-xhr=*,usb=(),xr-spatial-tracking=() 10 | Strict-Transport-Security: max-age=31536000; includeSubDomains; preload 11 | Upgrade: h2,h2c 12 | Connection: Upgrade, close 13 | Set-Cookie: PHPSESSID=gtusj55vidm8nejb4png918ncu; path=/ 14 | Expires: Thu, 19 Nov 1981 08:52:00 GMT 15 | Cache-Control: no-store, no-cache, must-revalidate 16 | Pragma: no-cache 17 | Set-Cookie: PHPSESSID=u86m9gjenhacqk2daf3sin0tt8; path=/ 18 | Vary: Accept-Encoding 19 | Content-Type: text/html; charset=UTF-8 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | Open Graph Tester by Wolf Software Limited 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 93 | 94 | 95 | 96 |
97 | 113 | 114 |
115 |
116 |
117 | 120 |

121 | If you simply want to preview how your site appears on social media, visit the Preview Your Tags page. However, if you're interested in learning more about Open Graph and Twitter Card tags, including some historical context and simple examples for implementation, please continue reading. 122 |

123 |
124 |
125 | 126 |
127 |
128 | 131 | 132 | 133 |

134 | In 2010, Facebook introduced the Open Graph protocol, which enables integration between Facebook and other websites by turning them into rich "graph" objects with similar features as Facebook objects. The Open Graph was initially developed by Facebook and was influenced by several technologies, including Dublin Core, link-rel canonical, Microformats, and RDFa. 135 |

136 | 137 |

138 | Other social media platforms are also leveraging social meta tags. Most of the major platforms, including Twitter, LinkedIn, and Discord, support Open Graph tags, and they are also recognized by messaging platforms like Slack, Teams, WhatsApp, and others. 139 |

140 |

141 | Twitter has its own set of meta tags known as Twitter Cards, but if the Twitter robots can't find these, they resort to using Open Graph tags instead. 142 |

143 |
144 |
145 |
146 |
147 | 150 | 151 |

152 | All Open Graph META tags begin with the prefix "og:", followed by the specific property to be defined. The content related to the defined property is placed within the "content" attribute. 153 |

154 | 155 |
156 |
<meta property="og:{tagName}" content="{tagValue}">
157 | 
158 |
159 |

160 | Let's examine a practical example of a basic set of Open Graph tags commonly used by many websites. For this demonstration, we'll use the tags that are currently implemented on this particular site. 161 |

162 | 163 |
164 |
<meta property="og:url" content="http://www.opengraphtester.com/" />
165 | <meta property="og:type" content="website">
166 | <meta property="og:title" content="Open Graph Tester" />
167 | <meta property="og:description" content="Open Graph Tester - This website is a simple helper site for allowing web developers, designers and marketing professionals to maximise their websites and posts before publishing them to social media." />
168 | <meta property="og:image" content="http://www.opengraphtester.com/assets/images/logos/og-image.png" />
169 | 
170 |
171 | 172 |

173 | The example above shows the minimum set of tags that we recommend including on your site. If you'd like to view how these tags appear on various social media platforms, visit the Preview Your Tags page. 174 |

175 | 176 |

177 | Open Graph supports numerous additional tags, including those for video, music, and more. For more information on the available Open Graph tags, please refer to the Open Graph Protocol page. 178 |

179 |
180 |
181 |
182 |
183 | 186 | 187 |

188 | Twitter has created its own version, called Twitter Cards, which operates similarly to Open Graph but instead uses "twitter:" tags instead of "og:" tags. 189 |

190 | 191 |
192 |
<meta name="twitter:{tagName}" content="{tagValue}">
193 | <meta property="twitter:{tagName}" content="{tagValue}">
194 | 
195 |
196 | 197 |

198 | As you can see from the example, Twitter employs both the "property" and "name" attributes. A basic illustration of Twitter Card tags would appear as follows. 199 |

200 | 201 |
202 |
<meta name="twitter:card" content="summary_large_image" />
203 | <meta property="twitter:domain" content="opengraphtester.com" />
204 | <meta property="twitter:url" content="https://opengraphtester.com/preview/" />
205 | <meta name="twitter:title" content="Open Graph Tester" />
206 | <meta name="twitter:description" content="Open Graph Tester - This website is a simple helper site for allowing web developers, designers and marketing professionals to maximise their websites and posts before publishing them to social media." />
207 | <meta name="twitter:image" content="https://opengraphtester.com/assets/images/logos/og-image.png" />
208 | 
209 |
210 | 211 |

212 | If your site lacks Twitter Card tags, don't worry as it won't prevent your links from being shared on Twitter. In the absence of Twitter Card tags, Twitter will fall back to using Open Graph tags. 213 |

214 | 215 |
216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 |
239 |
240 |

241 | In addition to the above, if Open Graph is unable to find the necessary tags, it will also utilize other sources. 242 |

243 | 244 |
245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 |
Open GraphHTML Element
og:title<title></title>
og:description<meta name="description" content="">
263 |
264 |
265 |
266 |
267 |
268 |
269 | 272 | 273 |

274 | If you're uncertain about which tags to set, visit the Preview Your Tags page and enter your URL. We'll display the tags we found and demonstrate what the tags should look like to maximize the benefits of Open Graph and Twitter Cards. 275 |

276 |
277 |
278 | 279 |
280 | 281 |
282 |
283 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | -------------------------------------------------------------------------------- /utils.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "mime" 5 | "net/mail" 6 | "net/url" 7 | "slices" 8 | "strconv" 9 | "strings" 10 | 11 | "github.com/benpate/derp" 12 | "github.com/benpate/digit" 13 | "github.com/benpate/hannibal/vocab" 14 | "github.com/benpate/rosetta/compare" 15 | "github.com/benpate/rosetta/mapof" 16 | "github.com/microcosm-cc/bluemonday" 17 | "github.com/rs/zerolog" 18 | ) 19 | 20 | func sanitizeHTML(value string) string { 21 | return bluemonday.UGCPolicy().Sanitize(value) 22 | } 23 | 24 | func sanitizeText(value string) string { 25 | return bluemonday.StrictPolicy().Sanitize(value) 26 | } 27 | 28 | // isActivityStream returns TRUE if the MIME type is either activity+json or ld+json 29 | func isActivityStream(value string) bool { 30 | 31 | // ActivityStreams have their own MIME type, but we have to check some alternates, too. 32 | if mediaType, _, err := mime.ParseMediaType(value); err == nil { 33 | switch mediaType { 34 | case "application/activity+json", "application/ld+json": 35 | return true 36 | } 37 | } 38 | 39 | return false 40 | } 41 | 42 | // defaultHTTPS appends `https://` to the uri if it doesn't already have a valid protocol. 43 | func defaultHTTPS(uri string) string { 44 | 45 | if strings.HasPrefix(uri, "http://") { 46 | return uri 47 | } 48 | 49 | if strings.HasPrefix(uri, "https://") { 50 | return uri 51 | } 52 | 53 | return "https://" + uri 54 | } 55 | 56 | // canInfo returns TRUE if zerolog is configured to allow Info logs 57 | // nolint:unused 58 | func canInfo() bool { 59 | return canLog(zerolog.InfoLevel) 60 | } 61 | 62 | // canDebug returns TRUE if zerolog is configured to allow Debug logs 63 | // nolint:unused 64 | func canDebug() bool { 65 | return canLog(zerolog.DebugLevel) 66 | } 67 | 68 | // canTrace returns TRUE if zerolog is configured to allow Trace logs 69 | // nolint:unused 70 | func canTrace() bool { 71 | return canLog(zerolog.TraceLevel) 72 | } 73 | 74 | // canLog is a silly zerolog helper that returns TRUE 75 | // if the provided log level would be allowed 76 | // (based on the global log level). 77 | // This makes it easier to execute expensive code conditionally, 78 | // for instance: marshalling a JSON object for logging. 79 | func canLog(level zerolog.Level) bool { 80 | return zerolog.GlobalLevel() <= level 81 | } 82 | 83 | // withContext adds the standard ActivityStream @context to the JSON-LD document. 84 | // If we're doing this, it's because we're assembling a "fake" JSON-LD document out of 85 | // other metadata (like OpenGraph, MicroFormats, oEmbed, etc). 86 | func withContext(value mapof.Any) { 87 | if _, ok := value[vocab.AtContext]; !ok { 88 | value[vocab.AtContext] = vocab.ContextTypeActivityStreams 89 | } 90 | } 91 | 92 | // sortImageLinks is a slices.SortFunc function that ranks digit.Links by their size and type. 93 | func sortImageLinks(a, b digit.Link) int { 94 | 95 | // First, prefer larger images 96 | aSize := iconSizesAsInt(a.Properties["sizes"]) 97 | bSize := iconSizesAsInt(b.Properties["sizes"]) 98 | 99 | if result := compare.Int(aSize, bSize); result != 0 { 100 | return result 101 | } 102 | 103 | // Next, prefer images by type 104 | return compare.Int(iconMediaTypeAsInt(a.MediaType), iconMediaTypeAsInt(b.MediaType)) 105 | } 106 | 107 | // iconSizeAsInt converts an image size string (in the form of "128x128") to the maximum 108 | // integer value of the two dimensions. This is useful for sorting images by size. 109 | func iconSizesAsInt(value string) int { 110 | 111 | // Empty values are empty 112 | if value == "" { 113 | return 0 114 | } 115 | 116 | // Convert to lowercase, and split into parts 117 | value = strings.ToLower(value) 118 | parts := strings.Split(value, " ") 119 | results := make([]int, 0, len(parts)) 120 | 121 | // Scan each part for the first number in the dimension 122 | for _, part := range parts { 123 | 124 | part, _, _ = strings.Cut(part, "x") 125 | 126 | // If we have a number, then add that to the potential result 127 | if result, err := strconv.ParseInt(part, 10, 64); err == nil { 128 | results = append(results, int(result)) 129 | } 130 | } 131 | 132 | // If we found no results, then return 0 133 | if len(results) == 0 { 134 | return 0 135 | } 136 | 137 | // Return the largest number found 138 | return slices.Max(results) 139 | } 140 | 141 | // iconMediaTypeAsInt converts an image type string (in the form of "image/png") to a numeric value 142 | // that cam be used to sort images by type. 143 | func iconMediaTypeAsInt(value string) int { 144 | 145 | switch value { 146 | case "image/webp": 147 | return 256 148 | case "image/png": 149 | return 255 150 | case "image/jpg": 151 | return 254 152 | case "image/jpeg": 153 | return 253 154 | case "image/svg": 155 | return 252 156 | case "image/svg+xml": 157 | return 251 158 | case "image/gif": 159 | return 250 160 | case "image/bmp": 161 | return 248 162 | case "image/tiff": 163 | return 247 164 | case "image/tiff+xml": 165 | return 246 166 | case "image/x-icon": 167 | return 245 168 | case "image/vnd.microsoft.icon": 169 | return 244 170 | default: 171 | return 0 172 | } 173 | } 174 | 175 | // hostOnly returns the protocol and hostname of a URL, without the path or query string 176 | func hostOnly(value string) string { 177 | 178 | parsedURL, err := url.Parse(value) 179 | 180 | if err != nil { 181 | derp.Report(derp.Wrap(err, "sherlock.hostOnly", "Error parsing URL", value)) 182 | return value 183 | } 184 | 185 | // Strip path and query string (use root URL only) 186 | parsedURL.Path = "" 187 | parsedURL.RawQuery = "" 188 | 189 | // Rewrite the value without the path and query string 190 | return parsedURL.String() 191 | } 192 | 193 | // identifierType detects Username and URL identifiers 194 | func identifierType(identifier string) string { 195 | 196 | // Try to detect URLs first because we can use the standard library 197 | if strings.HasPrefix(identifier, "http://") || strings.HasPrefix(identifier, "https://") { 198 | if _, err := url.Parse(identifier); err == nil { 199 | return IdentifierTypeURL 200 | } 201 | } 202 | 203 | // Try to detect username/email by disregarding the leading "@" 204 | identifier = strings.TrimPrefix(identifier, "@") 205 | if strings.Contains(identifier, "@") { 206 | if _, err := mail.ParseAddress(identifier); err == nil { 207 | return IdentifierTypeUsername 208 | } 209 | } 210 | 211 | // Cannot determine identifier type 212 | return IdentifierTypeNone 213 | } 214 | -------------------------------------------------------------------------------- /utils_test.go: -------------------------------------------------------------------------------- 1 | package sherlock 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/rs/zerolog" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestCanTrace(t *testing.T) { 11 | zerolog.SetGlobalLevel(zerolog.TraceLevel) 12 | require.True(t, canTrace()) 13 | zerolog.SetGlobalLevel(zerolog.DebugLevel) 14 | require.False(t, canTrace()) 15 | } 16 | 17 | func TestCanDebug(t *testing.T) { 18 | zerolog.SetGlobalLevel(zerolog.TraceLevel) 19 | require.True(t, canDebug()) 20 | zerolog.SetGlobalLevel(zerolog.DebugLevel) 21 | require.True(t, canDebug()) 22 | zerolog.SetGlobalLevel(zerolog.InfoLevel) 23 | require.False(t, canDebug()) 24 | } 25 | 26 | func TestCanInfo(t *testing.T) { 27 | zerolog.SetGlobalLevel(zerolog.TraceLevel) 28 | require.True(t, canDebug()) 29 | zerolog.SetGlobalLevel(zerolog.DebugLevel) 30 | require.True(t, canDebug()) 31 | zerolog.SetGlobalLevel(zerolog.InfoLevel) 32 | require.True(t, canInfo()) 33 | zerolog.SetGlobalLevel(zerolog.WarnLevel) 34 | require.False(t, canInfo()) 35 | } 36 | 37 | func TestHostOnly(t *testing.T) { 38 | require.Equal(t, "https://example.com", hostOnly("https://example.com")) 39 | require.Equal(t, "https://example.com:8080", hostOnly("https://example.com:8080")) 40 | require.Equal(t, "https://example.com", hostOnly("https://example.com/")) 41 | require.Equal(t, "https://example.com", hostOnly("https://example.com/some/path/here")) 42 | require.Equal(t, "https://example.com", hostOnly("https://example.com?query=string")) 43 | require.Equal(t, "https://example.com", hostOnly("https://example.com/some/path?and=querystring")) 44 | 45 | require.Equal(t, "", hostOnly("example.com")) 46 | } 47 | 48 | func TestIdentifierType(t *testing.T) { 49 | 50 | require.Equal(t, "USERNAME", identifierType("@benpate@climatejustice.social")) 51 | require.Equal(t, "URL", identifierType("https://climatejustice.social/@benpate")) 52 | } 53 | --------------------------------------------------------------------------------