├── .build-and-release.sh ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ └── feature_request.yml ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── alfred-workflow-release.yml │ ├── markdownlint.yml │ ├── pr-title.yml │ └── stale-bot.yml ├── .gitignore ├── .markdownlint.yaml ├── .rsync-exclude ├── CITATION.cff ├── Justfile ├── LICENSE ├── README.md ├── cheatsheet.webloc ├── icon.png ├── info.plist ├── notificator └── scripts ├── get-pdf-path.applescript ├── process_annotations.js └── run-extraction.sh /.build-and-release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | #─────────────────────────────────────────────────────────────────────────────── 3 | 4 | # goto git root 5 | cd "$(git rev-parse --show-toplevel)" || return 1 6 | 7 | # Prompt for next version number 8 | current_version=$(plutil -extract version xml1 -o - info.plist | sed -n 's/.*\(.*\)<\/string>.*/\1/p') 9 | echo "current version: $current_version" 10 | echo -n " next version: " 11 | read -r next_version 12 | echo "────────────────────────" 13 | 14 | # GUARD 15 | if [[ -z "$next_version" || "$next_version" == "$current_version" ]]; then 16 | print "\033[1;31mInvalid version number.\033[0m" 17 | return 1 18 | fi 19 | 20 | # update version number in THE REPO'S `info.plist` 21 | plutil -replace version -string "$next_version" info.plist 22 | 23 | #─────────────────────────────────────────────────────────────────────────────── 24 | # INFO this assumes the local folder is named the same as the github repo 25 | # 1. update version number in LOCAL `info.plist` 26 | # 2. convenience: copy download link for current version 27 | 28 | # update version number in LOCAL `info.plist` 29 | prefs_location=$(defaults read com.runningwithcrayons.Alfred-Preferences syncfolder | sed "s|^~|$HOME|") 30 | workflow_uid="$(basename "$PWD")" 31 | local_info_plist="$prefs_location/Alfred.alfredpreferences/workflows/$workflow_uid/info.plist" 32 | if [[ -f "$local_info_plist" ]] ; then 33 | plutil -replace version -string "$next_version" "$local_info_plist" 34 | else 35 | print "\033[1;33mCould not increment version, local \`info.plist\` not found: '$local_info_plist'\033[0m" 36 | return 1 37 | fi 38 | 39 | # copy download link for current version 40 | msg="Available in the Alfred Gallery in 1-2 days, or directly by downloading the latest release here:" 41 | github_user=$(git remote --verbose | head -n1 | sed -E 's/.*github.com[:\](.*)\/.*/\1/') 42 | url="https://github.com/$github_user/$workflow_uid/releases/download/$next_version/${workflow_uid}.alfredworkflow" 43 | echo -n "$msg $url" | pbcopy 44 | 45 | #─────────────────────────────────────────────────────────────────────────────── 46 | 47 | # commit and push 48 | git add --all && 49 | git commit -m "release: $next_version" && 50 | git pull --no-progress && 51 | git push --no-progress && 52 | git tag "$next_version" && # pushing a tag triggers the github release action 53 | git push --no-progress origin --tags 54 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # https://docs.github.com/en/github/administering-a-repository/managing-repository-settings/displaying-a-sponsor-button-in-your-repository 2 | 3 | custom: https://www.paypal.me/ChrisGrieser 4 | ko_fi: pseudometa 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: File a bug report 3 | title: "[Bug]: " 4 | labels: ["bug"] 5 | body: 6 | - type: textarea 7 | id: bug-description 8 | attributes: 9 | label: Bug Description 10 | description: A clear and concise description of the bug. 11 | validations: 12 | required: true 13 | - type: textarea 14 | id: screenshot 15 | attributes: 16 | label: Relevant Screenshot 17 | description: If applicable, add screenshots or a screen recording to help explain your problem. 18 | - type: textarea 19 | id: reproduction-steps 20 | attributes: 21 | label: To Reproduce 22 | description: Steps to reproduce the problem 23 | placeholder: | 24 | For example: 25 | 1. Go to '...' 26 | 2. Click on '...' 27 | 3. Scroll down to '...' 28 | - type: textarea 29 | id: debugging-log 30 | attributes: 31 | label: Debugging Log 32 | description: "You can get a debugging log by opening the workflow in Alfred preferences and pressing `⌘ + D`. A small window will open up which will log everything happening during the execution of the Workflow. Use the malfunctioning part of the workflow once more, copy the content of the log window, and paste it here. If the debugging log is long, please attach it as file instead of pasting everything in here." 33 | render: Text 34 | validations: 35 | required: true 36 | - type: textarea 37 | id: workflow-configuration 38 | attributes: 39 | label: Workflow Configuration 40 | description: "Please add a screenshot of your [workflow configuration](https://www.alfredapp.com/help/workflows/user-configuration/)." 41 | validations: 42 | required: true 43 | - type: checkboxes 44 | id: checklist 45 | attributes: 46 | label: Checklist 47 | options: 48 | - label: I have [updated to the latest version](https://github.com/chrisgrieser/pdf-annotation-extractor-alfred/releases/latest) of this workflow. 49 | required: true 50 | - label: "If the extracted text is all jumbled up (special characters, spaces missing, etc.) then the issue is not with this workflow, but the underlying extraction engine, pdfannots2json. Upgrade it to the latest version with `brew upgrade pdfannots2json` and try again. If the issue persists, [file a bug report at pdfannots2json.](https://github.com/mgmeyers/pdfannots2json/issues)" 51 | required: true 52 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature request 2 | description: Suggest an idea 3 | title: "Feature Request: " 4 | labels: ["enhancement"] 5 | body: 6 | - type: textarea 7 | id: feature-requested 8 | attributes: 9 | label: Feature Requested 10 | description: A clear and concise description of the feature. 11 | validations: 12 | required: true 13 | - type: textarea 14 | id: screenshot 15 | attributes: 16 | label: Relevant Screenshot 17 | description: If applicable, add screenshots or a screen recording to help explain the request. 18 | - type: checkboxes 19 | id: checklist 20 | attributes: 21 | label: Checklist 22 | options: 23 | - label: The feature would be useful to more users than just me. 24 | required: true 25 | 26 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | commit-message: 8 | prefix: "chore(dependabot): " 9 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## What problem does this PR solve? 2 | 3 | ## How does the PR solve it? 4 | 5 | ## Checklist 6 | - [ ] Used only `camelCase` variable names. 7 | - [ ] If functionality is added or modified, also made respective changes to the 8 | `README.md` and the internal workflow documentation. 9 | -------------------------------------------------------------------------------- /.github/workflows/alfred-workflow-release.yml: -------------------------------------------------------------------------------- 1 | name: Alfred Workflow Release 2 | 3 | on: 4 | push: 5 | tags: ["*"] 6 | 7 | env: 8 | WORKFLOW_NAME: ${{ github.event.repository.name }} 9 | 10 | #─────────────────────────────────────────────────────────────────────────────── 11 | 12 | jobs: 13 | build: 14 | runs-on: macos-latest 15 | permissions: { contents: write } 16 | steps: 17 | - name: Checkout 18 | uses: actions/checkout@v4 19 | 20 | - name: Build .alfredworkflow 21 | run: | 22 | zip --recurse-paths --symlinks "${{ env.WORKFLOW_NAME }}.alfredworkflow" . \ 23 | --exclude "README.md" ".git*" "Justfile" ".build-and-release.sh" \ 24 | ".rsync-exclude" ".editorconfig" ".typos.toml" ".markdownlint.*" 25 | 26 | - name: Create release notes 27 | id: release_notes 28 | uses: mikepenz/release-changelog-builder-action@v5 29 | env: 30 | GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" 31 | with: 32 | mode: "COMMIT" 33 | configurationJson: | 34 | { 35 | "label_extractor": [{ 36 | "pattern": "^(\\w+)(\\([\\w\\-\\.]+\\))?(!)?: .+", 37 | "on_property": "title", 38 | "target": "$1" 39 | }], 40 | "categories": [ 41 | { "title": "## ⚠️ Breaking changes", "labels": ["break"] }, 42 | { "title": "## 🚀 New features", "labels": ["feat", "improv"] }, 43 | { "title": "## 🛠️ Fixes", "labels": ["fix", "perf", "chore"] }, 44 | { "title": "## 👾 Other", "labels": [] } 45 | ], 46 | "ignore_labels": ["release", "bump"] 47 | } 48 | 49 | - name: Release 50 | uses: softprops/action-gh-release@v2 51 | with: 52 | token: ${{ secrets.GITHUB_TOKEN }} 53 | body: ${{ steps.release_notes.outputs.changelog }} 54 | files: ${{ env.WORKFLOW_NAME }}.alfredworkflow 55 | -------------------------------------------------------------------------------- /.github/workflows/markdownlint.yml: -------------------------------------------------------------------------------- 1 | name: Markdownlint check 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | paths: 7 | - "**.md" 8 | - ".github/workflows/markdownlint.yml" 9 | - ".markdownlint.*" # markdownlint config files 10 | pull_request: 11 | paths: 12 | - "**.md" 13 | 14 | jobs: 15 | markdownlint: 16 | name: Markdownlint 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v4 20 | - uses: DavidAnson/markdownlint-cli2-action@v20 21 | with: 22 | globs: "**/*.md" 23 | -------------------------------------------------------------------------------- /.github/workflows/pr-title.yml: -------------------------------------------------------------------------------- 1 | name: PR title 2 | 3 | on: 4 | pull_request_target: 5 | types: 6 | - opened 7 | - edited 8 | - synchronize 9 | - reopened 10 | - ready_for_review 11 | 12 | permissions: 13 | pull-requests: read 14 | 15 | jobs: 16 | semantic-pull-request: 17 | name: Check PR title 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: amannn/action-semantic-pull-request@v5 21 | env: 22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 23 | with: 24 | requireScope: false 25 | subjectPattern: ^(?![A-Z]).+$ # disallow title starting with capital 26 | types: | # add `improv` to the list of allowed types 27 | improv 28 | fix 29 | feat 30 | refactor 31 | build 32 | ci 33 | style 34 | test 35 | chore 36 | perf 37 | docs 38 | break 39 | revert 40 | -------------------------------------------------------------------------------- /.github/workflows/stale-bot.yml: -------------------------------------------------------------------------------- 1 | name: Stale bot 2 | on: 3 | schedule: 4 | - cron: "18 04 * * 3" 5 | 6 | permissions: 7 | issues: write 8 | pull-requests: write 9 | 10 | jobs: 11 | stale: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Close stale issues 15 | uses: actions/stale@v9 16 | with: 17 | repo-token: ${{ secrets.GITHUB_TOKEN }} 18 | 19 | # DOCS https://github.com/actions/stale#all-options 20 | days-before-stale: 180 21 | days-before-close: 7 22 | stale-issue-label: "Stale" 23 | stale-issue-message: | 24 | This issue has been automatically marked as stale. 25 | **If this issue is still affecting you, please leave any comment**, for example "bump", and it will be kept open. 26 | close-issue-message: | 27 | This issue has been closed due to inactivity, and will not be monitored. 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Mac 2 | .DS_Store 3 | 4 | # Alfred 5 | prefs.plist 6 | *.alfredworkflow 7 | -------------------------------------------------------------------------------- /.markdownlint.yaml: -------------------------------------------------------------------------------- 1 | # Defaults https://github.com/DavidAnson/markdownlint/blob/main/schema/.markdownlint.yaml 2 | # DOCS https://github.com/markdownlint/markdownlint/blob/main/docs/RULES.md 3 | #─────────────────────────────────────────────────────────────────────────────── 4 | 5 | # MODIFIED SETTINGS 6 | blanks-around-headings: 7 | lines_below: 0 # space waster 8 | ul-style: { style: sublist } 9 | 10 | # not autofixable 11 | ol-prefix: { style: ordered } 12 | line-length: 13 | tables: false 14 | code_blocks: false 15 | no-inline-html: 16 | allowed_elements: [img, details, summary, kbd, a, br] 17 | 18 | #───────────────────────────────────────────────────────────────────────────── 19 | # DISABLED 20 | ul-indent: false # not compatible with using tabs 21 | no-hard-tabs: false # taken care of by editorconfig 22 | blanks-around-lists: false # space waster 23 | first-line-heading: false # e.g., ignore-comments 24 | no-emphasis-as-heading: false # sometimes useful 25 | -------------------------------------------------------------------------------- /.rsync-exclude: -------------------------------------------------------------------------------- 1 | # vim: ft=gitignore 2 | #─────────────────────────────────────────────────────────────────────────────── 3 | 4 | # git 5 | .git/ 6 | .gitignore 7 | 8 | # Alfred 9 | prefs.plist 10 | .rsync-exclude 11 | 12 | # docs 13 | docs/ 14 | LICENSE 15 | # INFO leading `/` -> ignore only the README in the root, not in subfolders 16 | /README.md 17 | CITATION.cff 18 | 19 | # build 20 | Justfile 21 | .github/ 22 | .build-and-release.sh 23 | 24 | # linter & types 25 | .typos.toml 26 | .editorconfig 27 | .markdownlint.yaml 28 | jxa-globals.d.ts 29 | jsconfig.json 30 | alfred.d.ts 31 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # vim: filetype=yaml 2 | # yaml-language-server: $schema=https://raw.githubusercontent.com/citation-file-format/citation-file-format/main/schema.json 3 | # DOCS https://github.com/citation-file-format/citation-file-format/blob/main/schema-guide.md 4 | #─────────────────────────────────────────────────────────────────────────────── 5 | 6 | message: If you use this software, please cite it using these metadata. 7 | 8 | title: PDF Annotation Extractor 9 | abstract: Alfred Workflow to extract annotations from PDF files. 10 | type: software 11 | authors: 12 | - family-names: Grieser 13 | given-names: Christopher 14 | orcid: "https://orcid.org/0000-0002-0767-9496" 15 | version: "8.12.2" 16 | date-released: "2023-12-04" 17 | repository-code: "https://github.com/chrisgrieser/pdf-annotation-extractor-alfred" 18 | keywords: 19 | - pdf annotations 20 | - pdfs 21 | - data extraction 22 | cff-version: 1.2.0 23 | license: MIT 24 | -------------------------------------------------------------------------------- /Justfile: -------------------------------------------------------------------------------- 1 | set quiet := true 2 | 3 | # REQUIRED local workflow uses same folder name 4 | 5 | workflow_uid := `basename "$PWD"` 6 | prefs_location := `defaults read com.runningwithcrayons.Alfred-Preferences syncfolder | sed "s|^~|$HOME|"` 7 | local_workflow := prefs_location / "Alfred.alfredpreferences/workflows" / workflow_uid 8 | 9 | #─────────────────────────────────────────────────────────────────────────────── 10 | 11 | transfer-changes-FROM-local: 12 | #!/usr/bin/env zsh 13 | rsync --archive --delete --exclude-from="$PWD/.rsync-exclude" "{{ local_workflow }}/" "$PWD" 14 | git status --short 15 | 16 | transfer-changes-TO-local: 17 | #!/usr/bin/env zsh 18 | rsync --archive --delete --exclude-from="$PWD/.rsync-exclude" "$PWD/" "{{ local_workflow }}" 19 | cd "{{ local_workflow }}" 20 | print "\e[1;34mChanges at the local workflow:\e[0m" 21 | git status --short . 22 | 23 | [macos] 24 | open-local-workflow-in-alfred: 25 | #!/usr/bin/env zsh 26 | # using JXA and URI for redundancy, as both are not 100 % reliable https://www.alfredforum.com/topic/18390-get-currently-edited-workflow-uri/ 27 | open "alfredpreferences://navigateto/workflows>workflow>{{ workflow_uid }}" 28 | osascript -e 'tell application id "com.runningwithcrayons.Alfred" to reveal workflow "{{ workflow_uid }}"' 29 | 30 | release: 31 | ./.build-and-release.sh 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Christopher Grieser 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PDF Annotation Extractor 2 | ![Download count](https://img.shields.io/github/downloads/chrisgrieser/pdf-annotation-extractor-alfred/total?label=Total%20Downloads&style=plastic) 3 | ![Last release](https://img.shields.io/github/v/release/chrisgrieser/pdf-annotation-extractor-alfred?label=Latest%20Release&style=plastic) 4 | 5 | A [Workflow for Alfred](https://www.alfredapp.com/) to extract annotations as 6 | Markdown file. Primarily for scientific papers, but can also be used for 7 | non-academic PDF files. 8 | 9 | Automatically determines correct page numbers, inserts them as Pandoc citations, 10 | merges highlights across page breaks, prepends a YAML header with bibliographic 11 | information, and more. 12 | 13 | ## Table of Contents 14 | 15 | 16 | 17 | - [Installation](#installation) 18 | - [Requirements for the PDF](#requirements-for-the-pdf) 19 | * [Automatic citekey identification](#automatic-citekey-identification) 20 | - [Usage](#usage) 21 | * [Basics](#basics) 22 | * [Automatic Page Number Identification](#automatic-page-number-identification) 23 | * [Annotation Codes](#annotation-codes) 24 | * [Extracting Images](#extracting-images) 25 | - [Troubleshooting](#troubleshooting) 26 | - [Cite this software project](#cite-this-software-project) 27 | - [Credits](#credits) 28 | - [About the developer](#about-the-developer) 29 | 30 | 31 | 32 | ## Installation 33 | - Requirement: [Alfred 5](https://www.alfredapp.com/) with Powerpack 34 | - Install [Homebrew](https://brew.sh/) 35 | - Install `pdfannots2json` by running the following command into your terminal: 36 | `brew install mgmeyers/pdfannots2json/pdfannots2json` 37 | - Download the [latest release](https://github.com/chrisgrieser/pdf-annotation-extractor-alfred/releases/latest/). 38 | - Set the hotkey by double-clicking the sky-blue field at the top left. 39 | - Set up the workflow configuration inside the app. 40 | 41 | ## Requirements for the PDF 42 | `PDF Annotation Extractor` works on any PDF that has valid annotations 43 | saved *in the PDF file*. Some PDF readers like **Skim** or **Zotero 6** do not 44 | store annotations in the PDF itself by default. 45 | 46 | This workflow automatically determines the citekey of based on the filename of 47 | your PDF file. 48 | - If the citekey is found, the `PDF Annotation Extractor` 49 | prepends a yaml header to the annotations and [automatically 50 | inserts the citekey](#automatic-page-number-identification) with the correct 51 | page numbers using the [Pandoc citations 52 | syntax](https://pandoc.org/MANUAL.html#citation-syntax). 53 | - If your filename does not contain citekey that can be found in 54 | your library, the `PDF Annotation Extractor` extracts the annotations without 55 | a yaml header and uses the PDF numbers as page numbers. 56 | 57 | ### Automatic citekey identification 58 | - The filename of the PDF file MUST begin with the citekey (without `@`). 59 | - The citekey MUST NOT contain any underscores (`_`). 60 | - The name of the file MAY be followed by an underscore and some 61 | text, such as `{citekey}_{title}.pdf`. It MUST NOT be followed by anything 62 | else, since then the citekey would not be found. 63 | - Example: With the filename, `Grieser2023_Interdependent Technologies.pdf`, the 64 | identified citekey is `Grieser2023`. 65 | 66 | > [!TIP] 67 | > You can achieve such a filename pattern with automatic renaming rules of most 68 | > reference managers, for example with the [ZotFile plugin for 69 | > Zotero](http://zotfile.com/#renaming-rules) or the [AutoFile feature of 70 | > BibDesk](https://bibdesk.sourceforge.io/manual/BibDeskHelp_77.html#SEC140). 71 | 72 | ## Usage 73 | 74 | ### Basics 75 | Use the [hotkey](https://www.alfredapp.com/help/workflows/triggers/hotkey/) to 76 | trigger the Annotation Extraction on the PDF file currently selected in Finder. 77 | The hotkey also works when triggered from [PDF Expert](https://pdfexpert.com/) 78 | or [Highlights](https://highlightsapp.net/). Alternatively, use the 79 | `anno` keyword to search for PDFs and select one. 80 | 81 | **Annotation Types extracted** 82 | - Highlight ➡️ bullet point, quoting text and prepending the comment as bold text 83 | - Free Comment ➡️ blockquote of the comment text 84 | - Strikethrough ➡️ Markdown strikethrough 85 | - Rectangle ➡️ [extracts image and inserts Markdown image link at the respective 86 | place](#extracting-images) 87 | - Underlines ➡️ sent to `Reminders.app` as a task due today in the default list 88 | 89 | ### Automatic Page Number Identification 90 | Instead of the PDF page numbers, this workflow retrieves information about the 91 | *real* page numbers from the BibTeX library and inserts them. If there is no 92 | page data in the BibTeX entry (for example, monographies), you are prompted to 93 | enter the page number manually. 94 | - In that case, enter the **real page number** of your **first PDF page**. 95 | - In case there is content before the actual text (for example, a foreword or 96 | Table of Contents), the real page number `1` often occurs later in the PDF. If 97 | that is the case, you must enter a **negative page number**, reflecting the 98 | true page number the first PDF would have. *Example: Your PDF is a book, which 99 | has a foreword, and uses roman numbers for it; real page number 1 is PDF page 100 | number 12. If you continued the numbering backwards, the first PDF page would 101 | have page number `-10`, you enter the value `-10` when prompted for a page 102 | number.* 103 | 104 | ### Annotation Codes 105 | Insert the following codes at the **beginning** of an annotation to invoke 106 | special actions on that annotation. Annotation codes do not apply to 107 | strikethroughs. 108 | 109 | - `+`: Merge this highlight with the previous highlight or underline. Works for 110 | annotations on the same PDF-page (= skipping text in between) and for 111 | annotations across two pages. 112 | * `? foo` **(free comments)**: Turns "foo" into a Question 113 | Callout (`> ![QUESTION]`) and move up. (Callouts are [Obsidian-specific 114 | Syntax](https://help.obsidian.md/How+to/Use+callouts).) 115 | - `##`: Turns highlighted text into a **heading** that is added at that 116 | location. The number of `#` determines the heading level. If the annotation is 117 | a free comment, the text following the `#` is used as heading instead. (The 118 | space after the is `#` required). 119 | - `=`: Adds highlighted text as **tags** to the YAML frontmatter. If the 120 | annotation is a free comment, uses the text 121 | after the `=`. In both cases, the annotation is removed afterward. 122 | - `_`: A copy of the annotation is sent `Reminders.app` as a task due today 123 | (default list). 124 | 125 | > [!TIP] 126 | > You can run the Alfred command `acode` to display a cheat sheet of all 127 | > annotation codes. 128 | 129 | ### Extracting Images 130 | - The respective images are saved in the `attachments` sub-folder of the output 131 | folder, and named `{citekey}_image{n}.png`. 132 | - The images are embedded in the markdown file with the `![[ ]]` syntax, for 133 | example `![[filename.png|foobar]]`. 134 | 135 | - Any `rectangle` type annotation in the PDF is extracted as image. 136 | 137 | - If the rectangle annotation has any comment, it is used as the alt-text for 138 | the image. (Note that some PDF readers like PDF Expert do not allow you to add 139 | a comment to rectangular annotations.) 140 | 141 | ## Troubleshooting 142 | - Update to the latest version of `pdfannots2json` by running 143 | `brew upgrade pdfannots2json` in your terminal. 144 | - This workflow does not work with annotations that are not actually saved in 145 | the PDF file. Some PDF Readers like **Skim** or **Zotero 6** do this, but you 146 | can [tell those PDF readers to save the notes in the actual 147 | PDF](https://skim-app.sourceforge.io/manual/SkimHelp_45.html). 148 | 149 | > [!NOTE] 150 | > As a fallback, you can use `pdfannots` as extraction engine, as a different 151 | > PDF engine sometimes fixes issues. This requires installing 152 | > [pdfannots](https://github.com/mgmeyers/pdfannots2json/issues/11) via `pip3 153 | > install pdfannots`, and switching the fallback engine in the settings. Note 154 | > that `pdfannots` does not support image extraction and the extraction quality 155 | > is slightly worse, so generally you want to use `pdfannots2json`. 156 | 157 | ## Cite this software project 158 | If you want to mention this software project in an academic publication, please 159 | cite it as: 160 | 161 | ```txt 162 | Grieser, C. (2023). PDF Annotation Extractor [Computer software]. 163 | https://github.com/chrisgrieser/pdf-annotation-extractor-alfred 164 | ``` 165 | 166 | For other citation styles, use the following metadata: [Citation File 167 | Format](./CITATION.cff). 168 | 169 | 170 | ## Credits 171 | - To [Andrew Baumann for pdfannots](https://github.com/0xabu/pdfannots), which 172 | caused me to develop this workflow (even though it does not use `pdfannots` 173 | anymore). 174 | - Also, many thanks to [@mgmeyers for 175 | pdfannots2json](https://github.com/mgmeyers/pdfannots2json/), which enabled 176 | many improvements to this workflow. 177 | - I also thank [@StPag](https://github.com/stefanopagliari/) for his ideas on 178 | annotation codes. 179 | - [Icons created by Freepik/Flaticon](https://www.flaticon.com/authors/freepik) 180 | 181 | ## About the developer 182 | In my day job, I am a sociologist studying the social mechanisms underlying the 183 | digital economy. For my PhD project, I investigate the governance of the app 184 | economy and how software ecosystems manage the tension between innovation and 185 | compatibility. If you are interested in this subject, feel free to get in touch. 186 | 187 | - [Academic Website](https://chris-grieser.de/) 188 | - [Mastodon](https://pkm.social/@pseudometa) 189 | - [ResearchGate](https://www.researchgate.net/profile/Christopher-Grieser) 190 | - [LinkedIn](https://www.linkedin.com/in/christopher-grieser-ba693b17a/) 191 | 192 | 193 | Buy Me a Coffee at ko-fi.com 200 | -------------------------------------------------------------------------------- /cheatsheet.webloc: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | URL 6 | https://github.com/chrisgrieser/pdf-annotation-extractor-alfred/blob/main/README.md#usage 7 | 8 | 9 | -------------------------------------------------------------------------------- /icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisgrieser/pdf-annotation-extractor-alfred/0bc042a8429f5f2503916f55de13ae76b5b27ce0/icon.png -------------------------------------------------------------------------------- /info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | bundleid 6 | de.chris-grieser.pdf-annotation-extraction 7 | category 8 | ⭐️ 9 | connections 10 | 11 | 0C315F47-D751-4D59-8DFA-2CFC7BF581A2 12 | 13 | 14 | destinationuid 15 | 0F49DAE6-6AB0-41CD-A791-496FD227B380 16 | modifiers 17 | 0 18 | modifiersubtext 19 | 20 | vitoclose 21 | 22 | 23 | 24 | 65783839-9CCB-48D0-A740-1F7BF96926D1 25 | 26 | 27 | destinationuid 28 | 0C315F47-D751-4D59-8DFA-2CFC7BF581A2 29 | modifiers 30 | 0 31 | modifiersubtext 32 | 33 | vitoclose 34 | 35 | 36 | 37 | 77F6FCC3-FA68-477B-BBB0-40F8C4911955 38 | 39 | 40 | destinationuid 41 | 65783839-9CCB-48D0-A740-1F7BF96926D1 42 | modifiers 43 | 0 44 | modifiersubtext 45 | 46 | vitoclose 47 | 48 | 49 | 50 | AADD9ECB-F4AB-488C-8C2C-B7736DD7B815 51 | 52 | 53 | destinationuid 54 | 65783839-9CCB-48D0-A740-1F7BF96926D1 55 | modifiers 56 | 0 57 | modifiersubtext 58 | 59 | vitoclose 60 | 61 | 62 | 63 | CD293D43-2356-49DB-AD4F-DBA5FDE026C5 64 | 65 | 66 | destinationuid 67 | 3069BB02-5E48-40CF-9DD8-337C5FA9F054 68 | modifiers 69 | 0 70 | modifiersubtext 71 | 72 | vitoclose 73 | 74 | 75 | 76 | D512D46E-8C0B-49AB-808B-051BDA488A65 77 | 78 | 79 | destinationuid 80 | 65783839-9CCB-48D0-A740-1F7BF96926D1 81 | modifiers 82 | 0 83 | modifiersubtext 84 | 85 | vitoclose 86 | 87 | 88 | 89 | 90 | createdby 91 | Chris Grieser 92 | description 93 | Extract Annotations as Markdown, insert Pandoc Citations with correct page numbers and more 94 | disabled 95 | 96 | name 97 | PDF Annotation Extractor 98 | objects 99 | 100 | 101 | config 102 | 103 | action 104 | 0 105 | argument 106 | 0 107 | focusedappvariable 108 | 109 | focusedappvariablename 110 | 111 | hotkey 112 | 0 113 | hotmod 114 | 524288 115 | hotstring 116 | A 117 | leftcursor 118 | 119 | modsmode 120 | 0 121 | relatedApps 122 | 123 | net.highlightsapp.universal 124 | com.readdle.PDFExpert-Mac 125 | com.apple.Preview 126 | com.apple.finder 127 | 128 | relatedAppsMode 129 | 1 130 | 131 | type 132 | alfred.workflow.trigger.hotkey 133 | uid 134 | 77F6FCC3-FA68-477B-BBB0-40F8C4911955 135 | version 136 | 2 137 | 138 | 139 | config 140 | 141 | concurrently 142 | 143 | escaping 144 | 0 145 | script 146 | 147 | scriptargtype 148 | 1 149 | scriptfile 150 | ./scripts/run-extraction.sh 151 | type 152 | 8 153 | 154 | type 155 | alfred.workflow.action.script 156 | uid 157 | 65783839-9CCB-48D0-A740-1F7BF96926D1 158 | version 159 | 2 160 | 161 | 162 | config 163 | 164 | concurrently 165 | 166 | escaping 167 | 0 168 | script 169 | # THESE VARIABLES MUST BE SET. SEE THE ONEUPDATER README FOR AN EXPLANATION OF EACH. 170 | readonly remote_info_plist='https://raw.githubusercontent.com/chrisgrieser/pdf-annotation-extractor-alfred/main/info.plist' 171 | readonly workflow_url='chrisgrieser/pdf-annotation-extractor-alfred' 172 | readonly download_type='github_release' 173 | readonly frequency_check='1' 174 | 175 | # FROM HERE ON, CODE SHOULD BE LEFT UNTOUCHED! 176 | function abort { 177 | echo "${1}" >&2 178 | exit 1 179 | } 180 | 181 | function url_exists { 182 | curl --silent --location --output /dev/null --fail --range 0-0 "${1}" 183 | } 184 | 185 | function notification { 186 | local -r notificator="$(find . -type f -name 'notificator')" 187 | 188 | if [[ -f "${notificator}" && "$(/usr/bin/file --brief --mime-type "${notificator}")" == 'text/x-shellscript' ]]; then 189 | "${notificator}" --message "${1}" --title "${alfred_workflow_name}" --subtitle 'A new version is available' 190 | return 191 | fi 192 | 193 | osascript -e "display notification \"${1}\" with title \"${alfred_workflow_name}\" subtitle \"A new version is available\"" 194 | } 195 | 196 | # Local sanity checks 197 | readonly local_info_plist='info.plist' 198 | readonly local_version="$(/usr/libexec/PlistBuddy -c 'print version' "${local_info_plist}")" 199 | 200 | [[ -n "${local_version}" ]] || abort 'You need to set a workflow version in the configuration sheet.' 201 | [[ "${download_type}" =~ ^(direct|page|github_release)$ ]] || abort "'download_type' (${download_type}) needs to be one of 'direct', 'page', or 'github_release'." 202 | [[ "${frequency_check}" =~ ^[0-9]+$ ]] || abort "'frequency_check' (${frequency_check}) needs to be a number." 203 | 204 | # Check for updates 205 | if [[ $(find "${local_info_plist}" -mtime +"${frequency_check}"d) ]]; then 206 | # Remote sanity check 207 | if ! url_exists "${remote_info_plist}"; then 208 | abort "'remote_info_plist' (${remote_info_plist}) appears to not be reachable." 209 | fi 210 | 211 | readonly tmp_file="$(mktemp)" 212 | curl --silent --location --output "${tmp_file}" "${remote_info_plist}" 213 | readonly remote_version="$(/usr/libexec/PlistBuddy -c 'print version' "${tmp_file}")" 214 | rm "${tmp_file}" 215 | 216 | if [[ "${local_version}" == "${remote_version}" ]]; then 217 | touch "${local_info_plist}" # Reset timer by touching local file 218 | exit 0 219 | fi 220 | 221 | if [[ "${download_type}" == 'page' ]]; then 222 | notification 'Opening download page…' 223 | open "${workflow_url}" 224 | exit 0 225 | fi 226 | 227 | readonly download_url="$( 228 | if [[ "${download_type}" == 'github_release' ]]; then 229 | osascript -l JavaScript -e 'function run(argv) { return JSON.parse(argv[0])["assets"].find(asset => asset["browser_download_url"].endsWith(".alfredworkflow"))["browser_download_url"] }' "$(curl --silent "https://api.github.com/repos/${workflow_url}/releases/latest")" 230 | else 231 | echo "${workflow_url}" 232 | fi 233 | )" 234 | 235 | if url_exists "${download_url}"; then 236 | notification 'Downloading and installing…' 237 | readonly download_name="$(basename "${download_url}")" 238 | curl --silent --location --output "${HOME}/Downloads/${download_name}" "${download_url}" 239 | open "${HOME}/Downloads/${download_name}" 240 | else 241 | abort "'workflow_url' (${download_url}) appears to not be reachable." 242 | fi 243 | fi 244 | scriptargtype 245 | 1 246 | scriptfile 247 | 248 | type 249 | 0 250 | 251 | type 252 | alfred.workflow.action.script 253 | uid 254 | 0F49DAE6-6AB0-41CD-A791-496FD227B380 255 | version 256 | 2 257 | 258 | 259 | config 260 | 261 | lastpathcomponent 262 | 263 | onlyshowifquerypopulated 264 | 265 | removeextension 266 | 267 | text 268 | 269 | title 270 | {query} 271 | 272 | type 273 | alfred.workflow.output.notification 274 | uid 275 | 0C315F47-D751-4D59-8DFA-2CFC7BF581A2 276 | version 277 | 1 278 | 279 | 280 | config 281 | 282 | anchorfields 283 | 284 | argumenttrimmode 285 | 0 286 | argumenttype 287 | 0 288 | daterange 289 | 0 290 | fields 291 | 292 | 293 | field 294 | kMDItemDisplayName 295 | not 296 | 297 | split 298 | 299 | value 300 | {query} 301 | words 302 | 303 | 304 | 305 | field 306 | kMDItemAlternateNames 307 | not 308 | 309 | split 310 | 311 | value 312 | {query} 313 | words 314 | 315 | 316 | 317 | field 318 | kMDItemFinderComment 319 | not 320 | 321 | split 322 | 323 | value 324 | {query} 325 | words 326 | 327 | 328 | 329 | includesystem 330 | 331 | keyword 332 | anno 333 | limit 334 | 0 335 | runningsubtext 336 | 337 | scopes 338 | 339 | sortmode 340 | 2 341 | subtext 342 | 343 | title 344 | Extract Annotations from PDF 345 | types 346 | 347 | com.adobe.pdf 348 | 349 | withspace 350 | 351 | 352 | type 353 | alfred.workflow.input.filefilter 354 | uid 355 | AADD9ECB-F4AB-488C-8C2C-B7736DD7B815 356 | version 357 | 2 358 | 359 | 360 | config 361 | 362 | acceptsmulti 363 | 0 364 | filetypes 365 | 366 | com.adobe.pdf 367 | 368 | name 369 | Extract PDF Annotations 370 | 371 | type 372 | alfred.workflow.trigger.action 373 | uid 374 | D512D46E-8C0B-49AB-808B-051BDA488A65 375 | version 376 | 1 377 | 378 | 379 | config 380 | 381 | concurrently 382 | 383 | escaping 384 | 102 385 | script 386 | # using a webloc file that points to the readme reduces maintainance work of keeping 387 | # the cheatsheet and the README both up to date. 388 | qlmanage -p "./cheatsheet.webloc" 389 | scriptargtype 390 | 1 391 | scriptfile 392 | 393 | type 394 | 5 395 | 396 | type 397 | alfred.workflow.action.script 398 | uid 399 | 3069BB02-5E48-40CF-9DD8-337C5FA9F054 400 | version 401 | 2 402 | 403 | 404 | config 405 | 406 | argumenttype 407 | 2 408 | keyword 409 | acodes 410 | subtext 411 | for the PDF Annotation Extractor 412 | text 413 | Cheatsheet 414 | withspace 415 | 416 | 417 | type 418 | alfred.workflow.input.keyword 419 | uid 420 | CD293D43-2356-49DB-AD4F-DBA5FDE026C5 421 | version 422 | 1 423 | 424 | 425 | readme 426 | # PDF Annotation Extractor 427 | Extract Annotations as Markdown, insert Pandoc Citations with correct page numbers, and more. 428 | 429 | ## Setup 430 | - Install [Homebrew](https://brew.sh/). 431 | - Install `pdfannots2json` by pasting the following into your terminal: `brew install mgmeyers/pdfannots2json/pdfannots2json` 432 | - Set the hotkey by double-clicking the sky-blue field at the top left. 433 | 434 | ## Usage 435 | - [Documentation](https://github.com/chrisgrieser/pdf-annotation-extractor-alfred/#usage) 436 | 437 | --- 438 | 439 | ## Cite this software project 440 | 441 | If you want to mention this software project in an academic publication, please cite it as: 442 | 443 | `Grieser, C. (2023). PDF Annotation Extractor [Computer software]. https://github.com/chrisgrieser/pdf-annotation-extractor-alfred` 444 | 445 | For other citation styles, use the following metadata: [Citation File Format](https://github.com/chrisgrieser/pdf-annotation-extractor-alfred/blob/main/CITATION.cff). 446 | 447 | 448 | ## Created by 449 | [Chris Grieser](https://chris-grieser.de/) 450 | uidata 451 | 452 | 0C315F47-D751-4D59-8DFA-2CFC7BF581A2 453 | 454 | colorindex 455 | 9 456 | xpos 457 | 350 458 | ypos 459 | 75 460 | 461 | 0F49DAE6-6AB0-41CD-A791-496FD227B380 462 | 463 | colorindex 464 | 11 465 | note 466 | OneUpdater 467 | xpos 468 | 830 469 | ypos 470 | 75 471 | 472 | 3069BB02-5E48-40CF-9DD8-337C5FA9F054 473 | 474 | colorindex 475 | 3 476 | xpos 477 | 195 478 | ypos 479 | 480 480 | 481 | 65783839-9CCB-48D0-A740-1F7BF96926D1 482 | 483 | colorindex 484 | 9 485 | note 486 | run extraction 487 | xpos 488 | 205 489 | ypos 490 | 75 491 | 492 | 77F6FCC3-FA68-477B-BBB0-40F8C4911955 493 | 494 | colorindex 495 | 7 496 | note 497 | DOUBLE CLICK THIS 498 | 499 | to set the hotkey for the annotation extraction 500 | xpos 501 | 30 502 | ypos 503 | 20 504 | 505 | AADD9ECB-F4AB-488C-8C2C-B7736DD7B815 506 | 507 | colorindex 508 | 9 509 | note 510 | select PDF for extraction 511 | xpos 512 | 30 513 | ypos 514 | 215 515 | 516 | CD293D43-2356-49DB-AD4F-DBA5FDE026C5 517 | 518 | colorindex 519 | 3 520 | note 521 | cheatsheet 522 | xpos 523 | 30 524 | ypos 525 | 480 526 | 527 | D512D46E-8C0B-49AB-808B-051BDA488A65 528 | 529 | colorindex 530 | 9 531 | xpos 532 | 30 533 | ypos 534 | 350 535 | 536 | 537 | userconfigurationconfig 538 | 539 | 540 | config 541 | 542 | default 543 | 544 | filtermode 545 | 2 546 | placeholder 547 | 548 | required 549 | 550 | 551 | description 552 | The .bib file containing your library. A library file is required for automatic page number identification and for prepending a YAML header with bibliographic information. 553 | label 554 | BibTeX Library Path 555 | type 556 | filepicker 557 | variable 558 | bibtex_library_path 559 | 560 | 561 | config 562 | 563 | default 564 | ~/Documents 565 | filtermode 566 | 1 567 | placeholder 568 | ~/Documents 569 | required 570 | 571 | 572 | description 573 | If a citekey can be found in the library, this is the location where the extracted annotations are saved. If left empty or if extracting without citekey, the annotations are saved in the same folder as the PDF file. If the output folder is inside an Obsidian vault, will also open the file in Obsidian after extraction. 574 | label 575 | Output Path 576 | type 577 | filepicker 578 | variable 579 | output_path 580 | 581 | 582 | config 583 | 584 | default 585 | pdfannots2json 586 | pairs 587 | 588 | 589 | pdfannots2json 590 | pdfannots2json 591 | 592 | 593 | pdfannots 594 | pdfannots 595 | 596 | 597 | 598 | description 599 | Advanced users only. Normally, this should stay "pdfannots2json". (`pdfannots` requries the respective pip package.) 600 | label 601 | Extraction Engine 602 | type 603 | popupbutton 604 | variable 605 | extraction_engine 606 | 607 | 608 | config 609 | 610 | default 611 | 612 | filtermode 613 | 1 614 | placeholder 615 | ~/PDFs 616 | required 617 | 618 | 619 | description 620 | only for Highlights.app users: The folder containing all PDFs. Required to be able to trigger extraction with Highlights being the frontmost app. 621 | label 622 | PDF Folder 623 | type 624 | filepicker 625 | variable 626 | pdf_folder 627 | 628 | 629 | version 630 | 9.2.2 631 | webaddress 632 | https://github.com/chrisgrieser/pdf-annotation-extractor-alfred 633 | 634 | 635 | -------------------------------------------------------------------------------- /notificator: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | 3 | #################################################### 4 | ### Created by Vítor Galvão ### 5 | ### Find the latest version at: ### 6 | ### https://github.com/vitorgalvao/notificator ### 7 | #################################################### 8 | 9 | readonly program="$(basename "${0}")" 10 | 11 | # Helpers 12 | function show_notification { 13 | /usr/bin/open "${app}" --args "${notificator_message}" "${notificator_title}" "${notificator_subtitle}" "${notificator_sound}" 14 | } 15 | 16 | function make_icns { 17 | # Setup 18 | local -r file="${1}" 19 | local -r tmp_dir="$(/usr/bin/mktemp -d)" 20 | local -r icon="${tmp_dir}/icon.icns" 21 | local -r iconset="${tmp_dir}/icon.iconset" 22 | /bin/mkdir "${iconset}" 23 | 24 | # Create iconset 25 | for size in {16,32,64,128,256,512} 26 | do 27 | /usr/bin/sips --resampleHeightWidth "${size}" "${size}" "${file}" --out "${iconset}/icon_${size}x${size}.png" &> /dev/null 28 | /usr/bin/sips --resampleHeightWidth "$((size * 2))" "$((size * 2))" "${file}" --out "${iconset}/icon_${size}x${size}@2x.png" &> /dev/null 29 | done 30 | 31 | # Convert to icns 32 | /usr/bin/iconutil --convert icns "${iconset}" --output "${icon}" 33 | 34 | # Clean up and return path to icns 35 | /bin/rm -rf "${iconset}" 36 | echo "${icon}" 37 | } 38 | 39 | function usage { 40 | echo " 41 | Trigger macOS notifications from Alfred, using the Workflow icon 42 | 43 | Usage: 44 | ${program} --message [options] 45 | 46 | Options: 47 | -m, --message Message text 48 | -t, --title Title text 49 | -s, --subtitle Subtitle text 50 | -p, --sound Sound name (from /System/Library/Sounds) 51 | -h, --help Show this help 52 | " | sed -E 's/^ {4}//' 53 | } 54 | 55 | # Options 56 | args=() 57 | while [[ "${1}" ]] 58 | do 59 | case "${1}" in 60 | -h | --help) 61 | usage 62 | exit 0 63 | ;; 64 | -m | --message) 65 | readonly notificator_message="${2}" 66 | shift 67 | ;; 68 | -t | --title) 69 | readonly notificator_title="${2}" 70 | shift 71 | ;; 72 | -s | --subtitle) 73 | readonly notificator_subtitle="${2}" 74 | shift 75 | ;; 76 | -p | --sound) 77 | readonly notificator_sound="${2}" 78 | shift 79 | ;; 80 | --) 81 | shift 82 | args+=("${@}") 83 | break 84 | ;; 85 | -*) 86 | echo "Unrecognised option: ${1}" 87 | exit 1 88 | ;; 89 | *) 90 | args+=("${1}") 91 | ;; 92 | esac 93 | shift 94 | done 95 | set -- "${args[@]}" 96 | 97 | # Check for required arguments 98 | if [[ -z "${notificator_message}" ]] 99 | then 100 | echo 'A message is mandatory! Aborting…' >&2 101 | exit 1 102 | fi 103 | 104 | readonly bundle_id="$(/usr/bin/tr -cd '[:alnum:]._-' <<< "${alfred_workflow_bundleid}")" 105 | readonly name="$(/usr/bin/tr -cd '[:alnum:]._- ' <<< "${alfred_workflow_name}")" 106 | readonly icon="${alfred_preferences}/workflows/${alfred_workflow_uid}/icon.png" 107 | readonly app="${alfred_workflow_cache}/Notificator for ${name}.app" 108 | readonly plist="${app}/Contents/Info.plist" 109 | 110 | # Exit early if Notificator exists and was modified fewer than 30 days ago 111 | if [[ -e "${app}" && "$(/bin/date -r "${app}" +%s)" -gt "$(/bin/date -v -30d +%s)" ]]; then 112 | show_notification 113 | exit 0 114 | fi 115 | 116 | # Pre-build checks 117 | if [[ -z "${bundle_id}" ]] 118 | then 119 | echo "Workflow is missing the bundle identifier! Aborting…" >&2 120 | exit 1 121 | fi 122 | 123 | if [[ -z "${name}" ]] 124 | then 125 | echo "Workflow is missing the name! Aborting…" >&2 126 | exit 1 127 | fi 128 | 129 | if [[ ! -f "${icon}" ]] 130 | then 131 | echo "Workflow is missing the icon! Aborting…" >&2 132 | exit 1 133 | fi 134 | 135 | # Build Notificator 136 | readonly jxa_script=' 137 | // Build argv/argc in a way that can be used from the applet inside the app bundle 138 | ObjC.import("Foundation") 139 | const args = $.NSProcessInfo.processInfo.arguments 140 | const argv = [] 141 | const argc = args.count 142 | for (let i = 0; i < argc; i++) { argv.push(args.objectAtIndex(i).js) } 143 | 144 | // Notification script 145 | const app = Application.currentApplication() 146 | app.includeStandardAdditions = true 147 | 148 | if (argv.length < 2) { // We use "2" because the script will always see at least one argument: the applet itself 149 | argv[1] = "Opening usage instructions…" 150 | argv[2] = "Notificator is a command-line app" 151 | argv[4] = "Funk" 152 | 153 | app.openLocation("https://github.com/vitorgalvao/notificator#usage") 154 | } 155 | 156 | const message = argv[1] 157 | const title = argv[2] 158 | const subtitle = argv[3] 159 | const sound = argv[4] 160 | 161 | const options = {} 162 | if (title) options.withTitle = title 163 | if (subtitle) options.subtitle = subtitle 164 | if (sound) options.soundName = sound 165 | 166 | app.displayNotification(message, options) 167 | ' 168 | 169 | [[ -d "${app}" ]] && /bin/rm -r "${app}" 170 | /bin/mkdir -p "${alfred_workflow_cache}" 171 | /usr/bin/osacompile -l JavaScript -o "${app}" -e "${jxa_script}" 2> /dev/null 172 | 173 | # Modify Notificator 174 | /usr/libexec/PlistBuddy -c "add :CFBundleIdentifier string ${bundle_id}.notificator" "${plist}" 175 | /usr/libexec/PlistBuddy -c 'add :LSUIElement string 1' "${plist}" 176 | /bin/mv "$(make_icns "${icon}")" "${app}/Contents/Resources/applet.icns" 177 | 178 | # Redo signature 179 | /usr/bin/codesign --remove-signature "${app}" 180 | /usr/bin/codesign --sign - "${app}" 181 | 182 | show_notification 183 | -------------------------------------------------------------------------------- /scripts/get-pdf-path.applescript: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env osascript 2 | 3 | on run() 4 | tell application "System Events" to set frontApp to (name of first process where it is frontmost) 5 | 6 | # PDF EXPERT 7 | # opens Finder, so the subsequent Finder block can be used 8 | if (frontApp is "PDF Expert") then 9 | tell application "System Events" 10 | tell process "PDF Expert" 11 | set frontmost to true 12 | click menu item "Save" of menu "File" of menu bar 1 13 | click menu item "Show in Finder" of menu "File" of menu bar 1 14 | end tell 15 | end tell 16 | delay 0.5 17 | end if 18 | 19 | # FINDER 20 | if (frontApp is "Finder" or frontApp is "PDF Expert") then 21 | tell application "Finder" to set sel to selection 22 | if ((count sel) = 0) then 23 | set current_file to "no-file" 24 | else if ((count sel) = 1) then 25 | set current_file to POSIX path of (sel as text) 26 | else 27 | set current_file to "more-than-one-file" 28 | end if 29 | end if 30 | 31 | # HIGHLIGHTS 32 | # HACK to identify filepath via a PDF folder & the window title 33 | if (frontApp is "Highlights") then 34 | # get file name 35 | tell application "System Events" 36 | tell process "Highlights" 37 | set frontmost to true 38 | click menu item "Save" of menu "File" of menu bar 1 39 | if (count of windows) > 0 then set frontWindow to name of front window 40 | end tell 41 | end tell 42 | set AppleScript's text item delimiters to " – " 43 | set filename to text item 1 of frontWindow 44 | 45 | # ensure ".pdf" is appended to the file name, if the user has hidden extensions 46 | set filename to do shell script ("filename=" & (quoted form of filename) & "; echo \"${filename%.pdf}.pdf\"") 47 | 48 | # find PDF in folder 49 | set pdfFolder to (system attribute "pdf_folder") 50 | set current_file to do shell script ("find " & (quoted form of pdfFolder) & " -type f -name " & (quoted form of filename)) 51 | 52 | if current_file = "" then return "not-in-pdf-folder" 53 | end if 54 | 55 | return current_file 56 | end run 57 | -------------------------------------------------------------------------------- /scripts/process_annotations.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env osascript -l JavaScript 2 | ObjC.import("stdlib"); 3 | const app = Application.currentApplication(); 4 | app.includeStandardAdditions = true; 5 | //────────────────────────────────────────────────────────────────────────────── 6 | 7 | /** @param {string} filepath @param {string} text */ 8 | function writeToFile(filepath, text) { 9 | const str = $.NSString.alloc.initWithUTF8String(text); 10 | str.writeToFileAtomicallyEncodingError(filepath, true, $.NSUTF8StringEncoding, null); 11 | } 12 | 13 | /** @param {string} str @returns {string} */ 14 | function toTitleCase(str) { 15 | const smallWords = 16 | /\b(and|because|but|for|neither|nor|only|over|per|some|that|than|the|upon|vs?\.?|versus|via|when|with(out)?|yet)\b/i; 17 | const word = str.replace(/\w\S*/g, (word) => { 18 | if (smallWords.test(word)) return word.toLowerCase(); 19 | if (word.toLowerCase() === "i") return "I"; 20 | if (word.length < 3) return word.toLowerCase(); 21 | return word.charAt(0).toUpperCase() + word.slice(1).toLowerCase(); 22 | }); 23 | const sentenceFirstCharUpper = word.charAt(0).toUpperCase() + word.slice(1).toLowerCase(); 24 | return sentenceFirstCharUpper; 25 | } 26 | 27 | //────────────────────────────────────────────────────────────────────────────── 28 | // TYPES 29 | /** JSON signature of annotations expected by this script 30 | * @typedef {Object} Annotation 31 | * @property {"Highlight"|"Underline"|"Free Comment"|"Image"|"Heading"|"Question Callout"|"Strikethrough"|"remove"} type – of the annotation 32 | * @property {number} page - page number where the annotation is located 33 | * @property {string=} pageStr - page number as string, so it can represent page ranges 34 | * @property {string=} comment - user-written comment for the annotation 35 | * @property {string=} quote - text marked in the pdf by Highlight or Underline 36 | * @property {string=} imagePath - path of image file 37 | * @property {string=} image - filename of image file 38 | */ 39 | 40 | /** https://github.com/mgmeyers/pdfannots2json#sample-output 41 | * @typedef {Object} Pdfannots2jsonOutput 42 | * @property {string} annotatedText 43 | * @property {string} comment 44 | * @property {string} color 45 | * @property {string} colorCategory 46 | * @property {string} date 47 | * @property {string} id 48 | * @property {string} imagePath 49 | * @property {string} ocrText 50 | * @property {number|string} page // string in case of stuff like "image 1" 51 | * @property {"image"|"highlight"|"underline"|"strike"|"text"} type 52 | * @property {number} x 53 | * @property {number} y 54 | */ 55 | 56 | /** 57 | * @typedef {Object} PdfannotsOutput 58 | * @property {string} text 59 | * @property {string} contents 60 | * @property {number|string} page // string in case of stuff like "image 1" 61 | * @property {"image"|"highlight"|"underline"|"strike"|"text"} type 62 | */ 63 | 64 | /** @typedef {Object} EntryMetadata 65 | * @property {string} title 66 | * @property {string} ptype 67 | * @property {string} author 68 | * @property {string=} keywords 69 | * @property {string} url 70 | * @property {string} doi 71 | * @property {string} tagsForYaml 72 | * @property {string} citekey 73 | * @property {number} firstPage 74 | * @property {number} year 75 | */ 76 | 77 | //────────────────────────────────────────────────────────────────────────────── 78 | 79 | /** to make pdfannots and pdfannots2json compatible with the format required by this script 80 | * @param {PdfannotsOutput[]} rawAnnos 81 | * @returns {Annotation[]} 82 | */ 83 | function pdfAnnotsAdapter(rawAnnos) { 84 | /** @type {Record} */ 85 | const typeMap = { 86 | text: "Free Comment", 87 | strike: "Strikethrough", 88 | highlight: "Highlight", 89 | underline: "Underline", 90 | image: "Image", 91 | }; 92 | 93 | return rawAnnos.map((a) => { 94 | const quote = a.text; 95 | const comment = a.contents; 96 | const type = typeMap[a.type]; 97 | 98 | // in case the page numbers have names like "image 1" instead of integers 99 | const page = 100 | typeof a.page === "string" ? Number.parseInt(a.page.match(/\d+/)?.[0] || "0") : a.page; 101 | 102 | return { ...a, quote, comment, type, page }; 103 | }); 104 | } 105 | 106 | /** to make pdfannots and pdfannots2json compatible with the format required by this script 107 | * @param {Pdfannots2jsonOutput[]} rawAnnos 108 | * @returns {Annotation[]} 109 | */ 110 | function pdfAnnots2JsonAdapter(rawAnnos) { 111 | /** @type {Record} */ 112 | const typeMap = { 113 | text: "Free Comment", 114 | strike: "Strikethrough", 115 | highlight: "Highlight", 116 | underline: "Underline", 117 | image: "Image", 118 | }; 119 | 120 | return rawAnnos.map((a) => { 121 | const quote = a.annotatedText; 122 | const type = typeMap[a.type]; 123 | 124 | // in case the page numbers have names like "image 1" instead of integers 125 | const page = 126 | typeof a.page === "string" ? Number.parseInt(a.page.match(/\d+/)?.[0] || "0") : a.page; 127 | 128 | return { ...a, type, quote, page }; 129 | }); 130 | } 131 | 132 | /** 133 | * @param {Annotation[]} annotations 134 | * @returns {Annotation[]} 135 | */ 136 | function cleanQuoteKey(annotations) { 137 | return annotations.map((a) => { 138 | if (!a.quote) return a; 139 | a.quote = a.quote 140 | .replaceAll(" - ", " – ") // proper em-dash 141 | .replaceAll("...", "…") // ellipsis 142 | .replaceAll(". . . ", "…") // ellipsis 143 | .replaceAll("\\u00AD", "") // remove invisible character 144 | .replaceAll("\\u0026", "&") // resolve &-symbol 145 | .replace(/’’|‘‘|["„“”«»’]/g, "'") // quotation marks 146 | .replace(/(\D[.,])\d/g, "$1") // remove footnotes from quote 147 | .replace(/(\w)-\s(\w)/gm, "$1$2") // remove leftover hyphens 148 | .trim(); 149 | return a; 150 | }); 151 | } 152 | 153 | /** 154 | * @param {Annotation[]} annotations 155 | * @param {number} pageNo 156 | * @returns {Annotation[]} 157 | */ 158 | function insertPageNumber(annotations, pageNo) { 159 | return annotations.map((a) => { 160 | // add first page number to pdf page number 161 | a.page = a.page + pageNo - 1; 162 | a.pageStr = a.page.toString(); 163 | return a; 164 | }); 165 | } 166 | 167 | /** code: "_" or annotation type "Underline" -> split off and send to Reminders.app 168 | * when tots is not installed, Underlines are ignored and annotations with 169 | * leading "_" are still extracted (though the "_" is removed) 170 | * @param {Annotation[]} annotations 171 | * @param {string} filename 172 | * @param {string=} citekey - only to be passed to jsonToMd of the underlines 173 | * @returns {Annotation[]} 174 | */ 175 | function processUnderlines(annotations, filename, citekey) { 176 | let totInstalled; 177 | 178 | // Annotations with leading "_": collected & removal of the "_" 179 | const underscoreAnnos = []; 180 | for (const anno of annotations) { 181 | if (anno.comment?.startsWith("_")) { 182 | anno.comment = anno.comment.slice(1).trim(); 183 | underscoreAnnos.push(anno); 184 | } 185 | } 186 | 187 | // Underline annotations 188 | if (totInstalled) { 189 | const underlineAnnos = annotations.filter((a) => a.type === "Underline"); 190 | 191 | const annosToSplitOff = [...underlineAnnos, ...underscoreAnnos]; 192 | if (annosToSplitOff.length > 0) { 193 | const text = jsonToMd(annosToSplitOff, citekey); 194 | 195 | // create new reminder due today 196 | const rem = Application("Reminders"); 197 | const today = new Date(); 198 | const newReminder = rem.Reminder({ 199 | name: `Underline Annotations for ${filename}`, 200 | body: text, 201 | alldayDueDate: today, 202 | }); 203 | rem.defaultList().reminders.push(newReminder); 204 | rem.quit(); 205 | } 206 | } 207 | 208 | // return only annotations that are not underlines 209 | return annotations.filter((/** @type {{ type: string; }} */ anno) => anno.type !== "Underline"); 210 | } 211 | 212 | /** 213 | * @param {Annotation[]} annotations 214 | * @param {string=} citekey 215 | * @returns {string} 216 | */ 217 | function jsonToMd(annotations, citekey) { 218 | let firstItem = true; 219 | const formattedAnnos = annotations.map((a) => { 220 | let comment; 221 | let output; 222 | let annotationTag = ""; 223 | 224 | // uncommented highlights or underlines 225 | if (a.comment) comment = a.comment.trim(); 226 | else comment = ""; 227 | 228 | // separate out leading annotation tags 229 | if (/^#\w/.test(comment)) { 230 | if (comment.includes(" ")) { 231 | const tempArr = comment.split(" "); 232 | annotationTag = tempArr.shift() + " "; 233 | comment = tempArr.join(" "); 234 | } else { 235 | annotationTag = comment + " "; 236 | comment = ""; 237 | } 238 | } 239 | 240 | // Pandoc Citation if citekey, otherwise just page number 241 | const reference = citekey ? `[@${citekey}, p. ${a.page}]` : `(p. ${a.page})`; 242 | 243 | // type specific output 244 | switch (a.type) { 245 | case "Highlight": 246 | case "Underline": { 247 | // highlights/underlines = bullet points 248 | if (comment) { 249 | // ordered list, if comments starts with numbering 250 | const numberRegex = /^\d+[.)] ?/; 251 | const commentNumbered = comment.match(numberRegex); 252 | if (commentNumbered) { 253 | output = commentNumbered[0].replace(/[.)] ?/, ". "); // turn consistently into "." 254 | comment = comment.replace(numberRegex, ""); 255 | } else { 256 | output = "- "; 257 | } 258 | output += `${annotationTag}**${comment}** "${a.quote}" ${reference}`; 259 | } else { 260 | output = `- ${annotationTag}"${a.quote}" ${reference}`; 261 | } 262 | break; 263 | } 264 | case "Free Comment": { 265 | // free comments = block quote (my comments) 266 | comment = comment.replaceAll("\n", "\n> "); 267 | output = `> ${annotationTag}${comment} ${reference}`; 268 | break; 269 | } 270 | case "Heading": { 271 | // ensure no leading line break when heading is first item 272 | if (firstItem) output = comment; 273 | else output = "\n" + comment; 274 | break; 275 | } 276 | case "Question Callout": { 277 | // blockquoted comment 278 | comment = comment.replaceAll("\n", "\n> "); 279 | output = `> [!QUESTION]\n> ${comment}\n`; 280 | break; 281 | } 282 | case "Image": { 283 | output = `\n![[${a.image}]]\n`; 284 | break; 285 | } 286 | default: 287 | } 288 | firstItem = false; 289 | return output; 290 | }); 291 | 292 | return formattedAnnos.join("\n"); 293 | } 294 | 295 | /** code: "+" 296 | * @param {Annotation[]} annos 297 | * @returns {Annotation[]} 298 | */ 299 | function mergeQuotes(annos) { 300 | // start at one, since the first element can't be merged to a predecessor 301 | for (let i = 1; i < annos.length; i++) { 302 | if (annos[i].type === "Free Comment" || !annos[i].comment) continue; 303 | if (annos[i].comment !== "+") continue; 304 | let connector = ""; 305 | 306 | // merge page numbers, if across pages 307 | if (annos[i - 1].page !== annos[i].page) { 308 | annos[i - 1].pageStr += "–" + annos[i].page.toString(); 309 | connector = " (…) "; 310 | } 311 | // merge quotes 312 | annos[i - 1].quote += connector + annos[i].quote; 313 | 314 | annos.splice(i, 1); // remove current element 315 | i--; // move index back, so merging of consecutive "+" works 316 | } 317 | return annos; 318 | } 319 | 320 | /** code: "##" 321 | * @param {Annotation[]} annotations 322 | * @returns {Annotation[]} 323 | */ 324 | function transformHeadings(annotations) { 325 | return annotations.map((a) => { 326 | if (!a.comment) return a; 327 | const hLevel = a.comment.match(/^#+(?!\w)/); 328 | if (!hLevel) return a; 329 | 330 | if (a.type === "Highlight" || a.type === "Underline") { 331 | if (!a.quote) return a; 332 | let headingText = a.quote; 333 | if (headingText === headingText.toUpperCase()) headingText = toTitleCase(headingText); 334 | a.comment = hLevel[0] + " " + headingText; 335 | a.quote = undefined; 336 | } 337 | a.type = "Heading"; 338 | return a; 339 | }); 340 | } 341 | 342 | /** code: "?" 343 | * @param {Annotation[]} annotations 344 | * @returns {Annotation[]} 345 | */ 346 | function questionCallout(annotations) { 347 | let annoArr = annotations.map((a) => { 348 | if (!a.comment) return a; 349 | if (a.type === "Free Comment" && a.comment.startsWith("?")) { 350 | a.type = "Question Callout"; 351 | a.comment = a.comment.slice(1).trim(); 352 | } 353 | return a; 354 | }); 355 | const callouts = annoArr.filter((a) => a.type === "Question Callout"); 356 | annoArr = annoArr.filter((a) => a.type !== "Question Callout"); 357 | return [...callouts, ...annoArr]; 358 | } 359 | 360 | /** images / rectangle annotations (pdfannots2json only) 361 | * @param {Annotation[]} annotations 362 | * @param {string} filename 363 | * @returns {Annotation[]} 364 | */ 365 | function insertImage4pdfannots2json(annotations, filename) { 366 | let i = 1; 367 | return annotations.map((a) => { 368 | if (a.type !== "Image") return a; 369 | a.image = `${filename}_image${i}.png`; 370 | if (a.comment) a.image += "|" + a.comment; // add alias 371 | i++; 372 | return a; 373 | }); 374 | } 375 | 376 | /** code: "=" 377 | * @param {Annotation[]} annotations 378 | * @param {string} keywords 379 | * @returns {{filteredArray: Annotation[]; tagsForYaml: string}} 380 | */ 381 | function transformTag4yaml(annotations, keywords) { 382 | let newKeywords = []; 383 | let tagsForYaml = ""; 384 | 385 | // existing tags (from BibTeX library) 386 | if (keywords) { 387 | for (const tag of keywords.split(",")) { 388 | newKeywords.push(tag); 389 | } 390 | } 391 | 392 | // additional tags (from annotations) 393 | const arr = annotations.map((a) => { 394 | // check for "=" as starting symbol, do not trigger on `==` for highlight syntax 395 | if (a.comment?.startsWith("=") && !a.comment?.startsWith("==")) { 396 | let tags = a.comment.slice(1); // remove the "=" 397 | if (a.type === "Highlight" || a.type === "Underline") tags += " " + a.quote; 398 | for (const tag of tags.split(",")) { 399 | newKeywords.push(tag); 400 | } 401 | a.type = "remove"; 402 | } 403 | return a; 404 | }); 405 | 406 | // Merge & Save both 407 | if (newKeywords.length > 0) { 408 | newKeywords = [...new Set(newKeywords)].map((keyword) => keyword.trim().replaceAll(" ", "-")); 409 | tagsForYaml = newKeywords.map((keyword) => `"${keyword}"`).join(", "); 410 | } 411 | 412 | // return annotation array without tags 413 | return { 414 | filteredArray: arr.filter((a) => a.type !== "remove"), 415 | tagsForYaml: tagsForYaml, 416 | }; 417 | } 418 | 419 | /** 420 | * @param {string} citekey 421 | * @param {string} rawEntry 422 | * @returns {EntryMetadata|undefined} 423 | */ 424 | // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: 425 | function extractMetadata(citekey, rawEntry) { 426 | let bibtexEntry = "@" + rawEntry.split("@")[1]; // cut following citekeys 427 | 428 | // Decode Bibtex 429 | // biome-ignore format: more compact 430 | const germanChars = ['{\\"u};ü', '{\\"a};ä', '{\\"o};ö', '{\\"U};Ü', '{\\"A};Ä', '{\\"O};Ö', '\\"u;ü', '\\"a;ä', '\\"o;ö', '\\"U;Ü', '\\"A;Ä', '\\"O;Ö', "\\ss;ß", "{\\ss};ß"]; 431 | // biome-ignore format: more compact 432 | const otherChars = ["{\\~n};n", "{\\'a};a", "{\\'e};e", "{\\v c};c", "\\c{c};c", "\\o{};ø", "\\^{i};i", '\\"{i};i', '\\"{i};i', "{\\'c};c", '\\"e;e']; 433 | const specialChars = ["\\&;&", '``;"', "`;'", "\\textendash{};—", "---;—", "--;—"]; 434 | for (const pair of [...germanChars, ...otherChars, ...specialChars]) { 435 | const half = pair.split(";"); 436 | bibtexEntry = bibtexEntry.replaceAll(half[0], half[1]); 437 | } 438 | 439 | // extracts content of a BibTeX-field 440 | /** @param {string} str */ 441 | function extract(str) { 442 | const prop = str.split("=")[1].trim(); 443 | return prop.replace(/[{}]|,$/g, ""); // remove TeX-syntax & trailing comma 444 | } 445 | 446 | // parse BibTeX entry 447 | /* @type {EntryMetadata} */ 448 | const data = { 449 | title: "", 450 | ptype: "", 451 | firstPage: -999, 452 | author: "", 453 | year: 0, 454 | keywords: "", 455 | url: "", 456 | doi: "", 457 | citekey: citekey, 458 | tagsForYaml: "", 459 | }; 460 | 461 | for (const property of bibtexEntry.split("\n")) { 462 | if (property.match(/title *=/)) { 463 | data.title = extract(property) 464 | .replaceAll('"', "'") // avoid invalid yaml, since title is wrapped in "'" 465 | .replaceAll(":", "."); // avoid invalid yaml 466 | } else if (property.includes("@")) { 467 | data.ptype = property.replace(/@(.*)\{.*/, "$1"); 468 | } else if (property.match(/pages *=/)) { 469 | const pages = property.match(/\d+/g); 470 | if (pages) data.firstPage = Number.parseInt(pages[0]); 471 | } else if (property.match(/year *=/)) { 472 | const year = property.match(/\d{4}/g); 473 | if (year) data.year = Number.parseInt(year[0]); 474 | } else if (property.match(/date *=/)) { 475 | const year = property.match(/\d{4}/g); 476 | if (year) data.year = Number.parseInt(year[0]); 477 | } else if (property.match(/author *=/)) { 478 | data.author = extract(property); 479 | } else if (property.match(/keywords *=/)) { 480 | data.keywords = extract(property).replaceAll(", ", ",").replaceAll(" ", "-"); // no spaces allowed in tags 481 | } else if (property.match(/doi *=/)) { 482 | data.url = "https://doi.org/" + extract(property); 483 | data.doi = extract(property); 484 | } else if (property.match(/url *=/)) data.url = extract(property); 485 | } 486 | 487 | // prompt for page number if needed 488 | if (data.firstPage === -999) { 489 | let response; 490 | let validInput; 491 | do { 492 | response = app.displayDialog( 493 | "BibTeX Entry has no page numbers.\n\nEnter true page number of FIRST pdf page:", 494 | { 495 | defaultAnswer: "", 496 | buttons: ["OK", "Cancel"], 497 | defaultButton: "OK", 498 | }, 499 | ); 500 | if (response.buttonReturned === "Cancel") return; 501 | validInput = response.textReturned.match(/^-?\d+$/); 502 | } while (!validInput); 503 | data.firstPage = Number.parseInt(response.textReturned) + 1; 504 | } 505 | 506 | return data; 507 | } 508 | 509 | /** if in Obsidian, open there, otherwise reveal in Finder 510 | * @param {string} filep 511 | */ 512 | function openFile(filep) { 513 | // determine if file is in Obsidian vault 514 | let isInObsidianVault = false; 515 | const obsidianJson = 516 | app.pathTo("home folder") + "/Library/Application Support/obsidian/obsidian.json"; 517 | const fileExists = Application("Finder").exists(Path(obsidianJson)); 518 | if (fileExists) { 519 | const vaults = Object.values(JSON.parse(app.read(obsidianJson)).vaults); 520 | isInObsidianVault = vaults.some((v) => filep.toLowerCase().startsWith(v.path.toLowerCase())); 521 | } 522 | 523 | // open in Obsidian or reveal in Finder 524 | if (isInObsidianVault) { 525 | delay(0.1); // delay to ensure writing took place 526 | app.openLocation("obsidian://open?path=" + encodeURIComponent(filep)); 527 | } else { 528 | app.doShellScript(`open -R "${filep}"`); // reveal in Finder 529 | } 530 | } 531 | 532 | /** 533 | * @param {string} annos 534 | * @param {EntryMetadata|undefined} metad 535 | * @param {string} outputPath 536 | * @param {string} filename 537 | */ 538 | function writeNote(annos, metad, outputPath, filename) { 539 | const writeToPath = outputPath + `/${filename}.md`; 540 | 541 | // GUARD no citekey -> skip yaml 542 | if (!metad) { 543 | writeToFile(writeToPath, annos); 544 | return; 545 | } 546 | 547 | // format authors for yaml 548 | const authorStr = metad.author 549 | .split(" and ") 550 | .map((name) => { 551 | const isLastCommaFirst = name.includes(","); 552 | if (isLastCommaFirst) name = name.split(/, ?/)[1] + " " + name.split(/, ?/)[0]; 553 | return `"${name}"`; 554 | }) 555 | .join(", "); 556 | 557 | // yaml frontmatter 558 | const yamlKeys = [ 559 | "---", 560 | `aliases: "${metad.title}"`, 561 | `cdate: "${new Date().toISOString().slice(0, 10)}"`, 562 | `tags: [${metad.tagsForYaml}]`, 563 | 'cssclasses: "pdf-annotations"', 564 | `citekey: "${metad.citekey}"`, 565 | `author: [${authorStr}]`, // already quoted above 566 | `year: ${metad.year.toString()}`, 567 | `publicationType: "${metad.ptype}"`, 568 | metad.url ? `url: "${metad.url}"` : undefined, 569 | metad.doi ? `doi: "${metad.doi}"` : undefined, 570 | "---", 571 | "", 572 | "", 573 | ]; 574 | const frontmatter = yamlKeys.filter((k) => k !== undefined).join("\n"); 575 | 576 | // write note 577 | writeToFile(writeToPath, frontmatter + annos); 578 | openFile(writeToPath); 579 | } 580 | 581 | //────────────────────────────────────────────────────────────────────────────── 582 | 583 | /** @type {AlfredRun} */ 584 | // biome-ignore lint/correctness/noUnusedVariables: AlfredRun 585 | function run(argv) { 586 | const [filename, rawAnnotations, entry, outPath, engine] = argv; 587 | const usePdfannots = engine === "pdfannots"; 588 | const hasLibraryEntry = entry !== ""; 589 | let metadata; 590 | let citekey; 591 | if (hasLibraryEntry) { 592 | citekey = filename; 593 | metadata = extractMetadata(citekey, entry); 594 | if (!metadata) return; // cancellation of the page-number-dialog by the user 595 | } 596 | 597 | // process input 598 | let annos = JSON.parse(rawAnnotations); 599 | annos = usePdfannots ? pdfAnnotsAdapter(annos) : pdfAnnots2JsonAdapter(annos); 600 | annos = insertPageNumber(annos, metadata?.firstPage || 1); 601 | annos = cleanQuoteKey(annos); 602 | 603 | // process annotation codes & images 604 | annos = mergeQuotes(annos); 605 | annos = transformHeadings(annos); 606 | annos = questionCallout(annos); 607 | const { filteredArray, tagsForYaml } = transformTag4yaml(annos, metadata?.keywords || ""); 608 | annos = filteredArray; 609 | if (metadata) metadata.tagsForYaml = tagsForYaml; 610 | if (!usePdfannots) annos = insertImage4pdfannots2json(annos, filename); 611 | 612 | // finish up 613 | annos = processUnderlines(annos, filename, citekey); 614 | annos = jsonToMd(annos, citekey); 615 | 616 | writeNote(annos, metadata, outPath, filename); 617 | return; 618 | } 619 | -------------------------------------------------------------------------------- /scripts/run-extraction.sh: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | # shellcheck disable=2154 3 | 4 | # INPUT 5 | pdf_path="$*" 6 | [[ -z "$pdf_path" ]] && pdf_path=$(osascript "./scripts/get-pdf-path.applescript") 7 | 8 | function notify { 9 | ./notificator --title "PDF Annotation Extraction" --message "$1" 10 | } 11 | 12 | #─────────────────────────────────────────────────────────────────────────────── 13 | 14 | # GUARD 15 | if [[ -n "$bibtex_library_path" && ! -f "$bibtex_library_path" ]]; then 16 | notify "⚠️ Library file path not valid." 17 | exit 1 18 | elif [[ -n "$output_path" && ! -d "$output_path" ]]; then 19 | notify "⚠️ Output path not valid." 20 | exit 1 21 | elif [[ "$pdf_path" == "no-file" ]]; then 22 | notify "⚠️ No file selected." 23 | exit 1 24 | elif [[ "$pdf_path" == "more-than-one-file" ]]; then 25 | notify "⚠️ More than one file selected." 26 | exit 1 27 | elif [[ "$pdf_path" == "not-in-pdf-folder" ]]; then 28 | notify "⚠️ When using Highlights, the PDF must be located in the PDF folder." 29 | exit 1 30 | elif [[ "$pdf_path" != *.pdf ]]; then 31 | notify "⚠️ Not a .pdf file." 32 | exit 1 33 | elif [[ "$extraction_engine" == "pdfannots" ]] && ! command -v pdfannots &> /dev/null; then 34 | notify "⚠️ pdfannots not installed." 35 | exit 1 36 | elif [[ "$extraction_engine" == "pdfannots2json" ]] && ! command -v pdfannots2json &> /dev/null; then 37 | notify "⚠️ pdfannots2json not installed." 38 | exit 1 39 | fi 40 | 41 | #─────────────────────────────────────────────────────────────────────────────── 42 | # DETERMINE CITEKEY & OUTPUT NAME 43 | 44 | citekey=$(basename "$pdf_path" .pdf | sed -E 's/_.*//') 45 | [[ -n "$bibtex_library_path" ]] && 46 | entry=$(grep --after-context=20 --max-count=1 --ignore-case "{$citekey," "$bibtex_library_path") 47 | 48 | # with citekey 49 | if [[ -n "$entry" && -n "$bibtex_library_path" ]]; then 50 | notify "⏳ Running Extraction for $citekey…" 51 | filename="$citekey" 52 | [[ -z "$output_path" ]] && output_path="$(dirname "$pdf_path")" 53 | 54 | # without citekey 55 | else 56 | notify "⏳ Running Extraction…" 57 | output_path="$(dirname "$pdf_path")" 58 | filename="$(basename "$pdf_path" .pdf)_annos" 59 | fi 60 | 61 | #─────────────────────────────────────────────────────────────────────────────── 62 | # EXTRACTION 63 | 64 | if [[ "$extraction_engine" == "pdfannots" ]]; then 65 | annotations=$(pdfannots --no-group --format=json "$pdf_path") 66 | else 67 | prevDir="$PWD" 68 | IMAGE_FOLDER="$output_path/attachments/image_temp" 69 | mkdir -p "$IMAGE_FOLDER" && cd "$IMAGE_FOLDER" || exit 1 70 | 71 | annotations=$(pdfannots2json "$pdf_path" --image-output-path=./ --image-format="png") 72 | 73 | # IMAGE EXTRACTION 74 | # shellcheck disable=SC2012 75 | NUMBER_OF_IMAGES=$(ls | wc -l | tr -d " ") 76 | if [[ $NUMBER_OF_IMAGES -gt 0 ]]; then 77 | # HACK: fix zero-padding for low page numbers by giving all images 4 digits 78 | # see https://github.com/mgmeyers/pdfannots2json/issues/16 79 | for image in *; do 80 | leftPadded=$(echo "$image" | sed -E 's/-([[:digit:]])-/-000\1-/' | sed -E 's/-([[:digit:]][[:digit:]])-/-00\1-/' | sed -E 's/-([[:digit:]][[:digit:]][[:digit:]])-/-0\1-/') 81 | mv "$image" "$leftPadded" 82 | done 83 | 84 | # rename images 85 | i=1 86 | for image in *; do 87 | mv -f "$image" ../"${filename}_image${i}.png" 88 | i=$((i + 1)) 89 | done 90 | fi 91 | 92 | # remove temp folder 93 | rmdir "$IMAGE_FOLDER" 94 | # remove attachment folder, if no images are extracted (rmdir fails if folder not empty) 95 | rmdir "$output_path/attachments" &> /dev/null 96 | 97 | cd "$prevDir" || exit 1 98 | fi 99 | 100 | #─────────────────────────────────────────────────────────────────────────────── 101 | 102 | # PROCESS ANNOTATIONS 103 | osascript -l JavaScript "./scripts/process_annotations.js" \ 104 | "$filename" "$annotations" "$entry" "$output_path" "$extraction_engine" 105 | --------------------------------------------------------------------------------