├── .appveyor.yml
├── .github
└── workflows
│ └── manubot.yaml
├── .gitignore
├── .travis.yml
├── LICENSE-CC0.md
├── LICENSE.md
├── README.md
├── SETUP.md
├── USAGE.md
├── build
├── README.md
├── assets
│ ├── custom-dictionary.txt
│ └── style.csl
├── autobuild.sh
├── build.sh
├── environment.yml
├── pandoc
│ └── defaults
│ │ ├── common.yaml
│ │ ├── docx.yaml
│ │ ├── html.yaml
│ │ └── pdf-weasyprint.yaml
├── plugins
│ ├── accordion.html
│ ├── analytics.html
│ ├── anchors.html
│ ├── attributes.html
│ ├── hypothesis.html
│ ├── jump-to-first.html
│ ├── lightbox.html
│ ├── link-highlight.html
│ ├── math.html
│ ├── table-of-contents.html
│ └── tooltips.html
└── themes
│ ├── default.docx
│ └── default.html
├── ci
├── .gitignore
├── README.md
├── deploy.sh
├── install-spellcheck.sh
└── install.sh
├── content
├── 00.front-matter.md
├── 01.abstract.md
├── 02.introduction.md
├── 03.text-mining.md
├── 04.network_applications.md
├── 05.conclusion.md
├── 90.back-matter.md
├── images
│ ├── figures
│ │ ├── constituency_parse_tree_example.png
│ │ ├── dependency_parse_example.png
│ │ ├── hetionet_metagraph_subgraph.svg
│ │ ├── unifying_applications_overview.png
│ │ ├── unifying_applications_overview.svg
│ │ ├── unifying_techniques_overview.png
│ │ └── unifying_techniques_overview.svg
│ ├── github.svg
│ ├── orcid.svg
│ └── twitter.svg
├── manual-references.json
├── metadata.yaml
└── response-to-reviewers.md
├── output
└── README.md
└── webpage
├── README.md
├── images
├── index.html
└── manuscript.pdf
/.appveyor.yml:
--------------------------------------------------------------------------------
1 | # See https://www.appveyor.com/docs/getting-started-with-appveyor-for-linux/
2 | # Don't build branches with a PR, since their build will be created with the PR itself.
3 | # Otherwise there would be two builds -- one for the PR and one for the branch.
4 | # If you're having issues with getting your PR to build, make sure there are no merge conflicts.
5 | skip_branch_with_pr: true
6 |
7 | # Enable 'Do not build on "Push" events' in the AppVeyor project settings
8 | # to only build commits from pull requests
9 | branches:
10 | only:
11 | - master
12 |
13 | # Only run AppVeyor on commits that modify at least one of the following files
14 | # Delete these lines to run AppVeyor on all master branch commits
15 | only_commits:
16 | files:
17 | - .appveyor.yml
18 | - build/
19 | - ci/install.sh
20 | - content/
21 |
22 | image: ubuntu
23 | services:
24 | - docker
25 |
26 | # Set SPELLCHECK to true to enable Pandoc spellchecking
27 | environment:
28 | SPELLCHECK: true
29 |
30 | install:
31 | # Create the message with the triggering commit before install so it is
32 | # available if the build fails
33 | - TRIGGERING_COMMIT=${APPVEYOR_PULL_REQUEST_HEAD_COMMIT:-APPVEYOR_REPO_COMMIT}
34 | - JOB_MESSAGE=" for commit $TRIGGERING_COMMIT "
35 | - source ci/install.sh
36 |
37 | test_script:
38 | - bash build/build.sh
39 | - MANUSCRIPT_FILENAME=manuscript-$APPVEYOR_BUILD_VERSION-${TRIGGERING_COMMIT:0:7}
40 | - cp output/manuscript.html $MANUSCRIPT_FILENAME.html
41 | - cp output/manuscript.pdf $MANUSCRIPT_FILENAME.pdf
42 | - appveyor PushArtifact $MANUSCRIPT_FILENAME.html
43 | - appveyor PushArtifact $MANUSCRIPT_FILENAME.pdf
44 | - |
45 | if [ "${SPELLCHECK:-}" = "true" ]; then
46 | SPELLING_ERRORS_FILENAME=spelling-errors-$APPVEYOR_BUILD_VERSION-${TRIGGERING_COMMIT:0:7}.txt
47 | cp output/spelling-errors.txt $SPELLING_ERRORS_FILENAME
48 | appveyor PushArtifact $SPELLING_ERRORS_FILENAME
49 | SPELLING_ERROR_LOCATIONS_FILENAME=spelling-error-locations-$APPVEYOR_BUILD_VERSION-${TRIGGERING_COMMIT:0:7}.txt
50 | cp output/spelling-error-locations.txt $SPELLING_ERROR_LOCATIONS_FILENAME
51 | appveyor PushArtifact $SPELLING_ERROR_LOCATIONS_FILENAME
52 | fi
53 |
54 | build: off
55 |
56 | cache:
57 | - ci/cache
58 |
59 | on_success:
60 | - echo "Artifacts available from $APPVEYOR_URL/project/$APPVEYOR_ACCOUNT_NAME/$APPVEYOR_PROJECT_SLUG/builds/$APPVEYOR_BUILD_ID/artifacts"
61 | - echo "Updated PDF available from $APPVEYOR_URL/api/buildjobs/$APPVEYOR_JOB_ID/artifacts/$MANUSCRIPT_FILENAME.pdf"
62 | - appveyor AddMessage "$JOB_MESSAGE is now complete."
63 | - |
64 | if [ "${SPELLCHECK:-}" = "true" ]; then
65 | SPELLING_ERROR_COUNT=($(wc -l $SPELLING_ERROR_LOCATIONS_FILENAME))
66 | appveyor AddMessage " Found $SPELLING_ERROR_COUNT potential spelling error(s). Preview:$(head -n 100 $SPELLING_ERROR_LOCATIONS_FILENAME)"
67 | appveyor AddMessage "... "
68 | fi
69 |
70 | on_failure:
71 | - appveyor AddMessage "$JOB_MESSAGE failed."
72 |
73 | # The following lines can be safely deleted, which will disable AppVeyorBot
74 | # notifications in GitHub pull requests
75 | # Notifications use Mustache templates http://mustache.github.io/mustache.5.html
76 | # See https://www.appveyor.com/docs/notifications/#customizing-message-template
77 | # for available variables
78 | notifications:
79 | - provider: GitHubPullRequest
80 | template: "AppVeyor [build {{buildVersion}}]({{buildUrl}})
81 | {{#jobs}}{{#messages}}{{{message}}}{{/messages}}{{/jobs}}
82 | {{#passed}}The rendered manuscript from this build is temporarily available for download at:\n\n
83 | {{#jobs}}{{#artifacts}}- [`{{fileName}}`]({{permalink}})\n{{/artifacts}}{{/jobs}}{{/passed}}"
84 |
--------------------------------------------------------------------------------
/.github/workflows/manubot.yaml:
--------------------------------------------------------------------------------
1 | name: Manubot
2 | on:
3 | push:
4 | branches:
5 | - master
6 | pull_request:
7 | branches:
8 | - master
9 | jobs:
10 | manubot:
11 | name: Manubot
12 | runs-on: ubuntu-latest
13 | env:
14 | GITHUB_PULL_REQUEST_SHA: ${{ github.event.pull_request.head.sha }}
15 | SPELLCHECK: true
16 | steps:
17 | - name: Set Environment Variables
18 | run: |
19 | TRIGGERING_SHA=${GITHUB_PULL_REQUEST_SHA:-$GITHUB_SHA}
20 | TRIGGERING_SHA_7=${TRIGGERING_SHA::7}
21 | echo "::set-env name=TRIGGERING_SHA_7::$TRIGGERING_SHA_7"
22 | echo "TRIGGERING_SHA: $TRIGGERING_SHA"
23 | - name: Checkout Repository
24 | uses: actions/checkout@v2
25 | with:
26 | # fetch entire commit history to support get_rootstock_commit
27 | fetch-depth: 0
28 | - name: Cache
29 | uses: actions/cache@v1
30 | with:
31 | path: ci/cache
32 | key: ci-cache-${{ github.ref }}
33 | restore-keys: |
34 | ci-cache-refs/heads/master
35 | - name: Install Environment
36 | uses: goanpeca/setup-miniconda@v1
37 | with:
38 | activate-environment: manubot
39 | environment-file: build/environment.yml
40 | auto-activate-base: false
41 | miniconda-version: 'latest'
42 | - name: Install Spellcheck
43 | shell: bash --login {0}
44 | run: |
45 | if [ "${SPELLCHECK:-}" = "true" ]; then
46 | bash ci/install-spellcheck.sh
47 | fi
48 | - name: Build Manuscript
49 | shell: bash --login {0}
50 | run: bash build/build.sh
51 | - name: Upload Artifacts
52 | uses: actions/upload-artifact@v1
53 | with:
54 | name: manuscript-${{ github.run_id }}-${{ env.TRIGGERING_SHA_7 }}
55 | path: output
56 | - name: Deploy Manuscript
57 | if: github.ref == 'refs/heads/master' && github.event_name == 'push' && !github.event.repository.fork
58 | env:
59 | MANUBOT_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
60 | MANUBOT_SSH_PRIVATE_KEY: ${{ secrets.MANUBOT_SSH_PRIVATE_KEY }}
61 | CI_BUILD_WEB_URL: https://github.com/${{ github.repository }}/commit/${{ github.sha }}/checks
62 | CI_JOB_WEB_URL: https://github.com/${{ github.repository }}/runs/${{ github.run_id }}
63 | shell: bash --login {0}
64 | run: bash ci/deploy.sh
65 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Generated manuscript output files
2 | output/*
3 | !output/README.md
4 |
5 | webpage/v
6 |
7 | # When PDF building fails, a temporary symlink named images in the root
8 | # directory is not removed.
9 | /images
10 |
11 | # Manubot cache directory
12 | ci/cache
13 |
14 | # Pandoc filters downloaded during continuous integration setup
15 | build/pandoc/filters/spellcheck.lua
16 |
17 | # Python
18 | __pycache__/
19 | *.pyc
20 |
21 | # Jupyter Notebook
22 | .ipynb_checkpoints
23 |
24 | # Misc temporary files
25 | *.bak
26 |
27 | # System specific files
28 |
29 | ## Linux
30 | *~
31 | .Trash-*
32 |
33 | ## macOS
34 | .DS_Store
35 | ._*
36 | .Trashes
37 |
38 | ## Windows
39 | Thumbs.db
40 | [Dd]esktop.ini
41 |
42 | ## Text Editors
43 | .vscode
44 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | dist: trusty
2 | sudo: false
3 | language: generic
4 | services:
5 | - docker
6 | branches:
7 | only:
8 | - master
9 | env:
10 | - SPELLCHECK=true
11 | install:
12 | - source ci/install.sh
13 | script:
14 | - bash build/build.sh
15 | cache:
16 | directories:
17 | - ci/cache
18 |
19 | env:
20 | - BUILD_DOCX=true
21 |
22 | deploy:
23 | provider: script
24 | script: bash ci/deploy.sh
25 | skip_cleanup: true
26 | on:
27 | branch: master
28 | condition: $TRAVIS_EVENT_TYPE = "push" && (-v MANUBOT_SSH_PRIVATE_KEY || "${!encrypted_*}" != "")
29 |
--------------------------------------------------------------------------------
/LICENSE-CC0.md:
--------------------------------------------------------------------------------
1 | # CC0 1.0 Universal
2 |
3 | ```
4 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER.
5 | ```
6 |
7 | ### Statement of Purpose
8 |
9 | The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
10 |
11 | Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
12 |
13 | For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
14 |
15 | 1. __Copyright and Related Rights.__ A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
16 |
17 | i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
18 |
19 | ii. moral rights retained by the original author(s) and/or performer(s);
20 |
21 | iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
22 |
23 | iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
24 |
25 | v. rights protecting the extraction, dissemination, use and reuse of data in a Work;
26 |
27 | vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
28 |
29 | vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
30 |
31 | 2. __Waiver.__ To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
32 |
33 | 3. __Public License Fallback.__ Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
34 |
35 | 4. __Limitations and Disclaimers.__
36 |
37 | a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
38 |
39 | b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
40 |
41 | c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
42 |
43 | d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
44 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # Creative Commons Attribution 4.0 International
2 |
3 | Creative Commons Corporation (“Creative Commons”) is not a law firm and does not provide legal services or legal advice. Distribution of Creative Commons public licenses does not create a lawyer-client or other relationship. Creative Commons makes its licenses and related information available on an “as-is” basis. Creative Commons gives no warranties regarding its licenses, any material licensed under their terms and conditions, or any related information. Creative Commons disclaims all liability for damages resulting from their use to the fullest extent possible.
4 |
5 | ### Using Creative Commons Public Licenses
6 |
7 | Creative Commons public licenses provide a standard set of terms and conditions that creators and other rights holders may use to share original works of authorship and other material subject to copyright and certain other rights specified in the public license below. The following considerations are for informational purposes only, are not exhaustive, and do not form part of our licenses.
8 |
9 | * __Considerations for licensors:__ Our public licenses are intended for use by those authorized to give the public permission to use material in ways otherwise restricted by copyright and certain other rights. Our licenses are irrevocable. Licensors should read and understand the terms and conditions of the license they choose before applying it. Licensors should also secure all rights necessary before applying our licenses so that the public can reuse the material as expected. Licensors should clearly mark any material not subject to the license. This includes other CC-licensed material, or material used under an exception or limitation to copyright. [More considerations for licensors](http://wiki.creativecommons.org/Considerations_for_licensors_and_licensees#Considerations_for_licensors).
10 |
11 | * __Considerations for the public:__ By using one of our public licenses, a licensor grants the public permission to use the licensed material under specified terms and conditions. If the licensor’s permission is not necessary for any reason–for example, because of any applicable exception or limitation to copyright–then that use is not regulated by the license. Our licenses grant only permissions under copyright and certain other rights that a licensor has authority to grant. Use of the licensed material may still be restricted for other reasons, including because others have copyright or other rights in the material. A licensor may make special requests, such as asking that all changes be marked or described. Although not required by our licenses, you are encouraged to respect those requests where reasonable. [More considerations for the public](http://wiki.creativecommons.org/Considerations_for_licensors_and_licensees#Considerations_for_licensees).
12 |
13 | ## Creative Commons Attribution 4.0 International Public License
14 |
15 | By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.
16 |
17 | ### Section 1 – Definitions.
18 |
19 | a. __Adapted Material__ means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image.
20 |
21 | b. __Adapter's License__ means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License.
22 |
23 | c. __Copyright and Similar Rights__ means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights.
24 |
25 | d. __Effective Technological Measures__ means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements.
26 |
27 | e. __Exceptions and Limitations__ means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material.
28 |
29 | f. __Licensed Material__ means the artistic or literary work, database, or other material to which the Licensor applied this Public License.
30 |
31 | g. __Licensed Rights__ means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license.
32 |
33 | h. __Licensor__ means the individual(s) or entity(ies) granting rights under this Public License.
34 |
35 | i. __Share__ means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them.
36 |
37 | j. __Sui Generis Database Rights__ means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world.
38 |
39 | k. __You__ means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning.
40 |
41 | ### Section 2 – Scope.
42 |
43 | a. ___License grant.___
44 |
45 | 1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to:
46 |
47 | A. reproduce and Share the Licensed Material, in whole or in part; and
48 |
49 | B. produce, reproduce, and Share Adapted Material.
50 |
51 | 2. __Exceptions and Limitations.__ For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions.
52 |
53 | 3. __Term.__ The term of this Public License is specified in Section 6(a).
54 |
55 | 4. __Media and formats; technical modifications allowed.__ The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material.
56 |
57 | 5. __Downstream recipients.__
58 |
59 | A. __Offer from the Licensor – Licensed Material.__ Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License.
60 |
61 | B. __No downstream restrictions.__ You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material.
62 |
63 | 6. __No endorsement.__ Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i).
64 |
65 | b. ___Other rights.___
66 |
67 | 1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise.
68 |
69 | 2. Patent and trademark rights are not licensed under this Public License.
70 |
71 | 3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties.
72 |
73 | ### Section 3 – License Conditions.
74 |
75 | Your exercise of the Licensed Rights is expressly made subject to the following conditions.
76 |
77 | a. ___Attribution.___
78 |
79 | 1. If You Share the Licensed Material (including in modified form), You must:
80 |
81 | A. retain the following if it is supplied by the Licensor with the Licensed Material:
82 |
83 | i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated);
84 |
85 | ii. a copyright notice;
86 |
87 | iii. a notice that refers to this Public License;
88 |
89 | iv. a notice that refers to the disclaimer of warranties;
90 |
91 | v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
92 |
93 | B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and
94 |
95 | C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License.
96 |
97 | 2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information.
98 |
99 | 3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable.
100 |
101 | 4. If You Share Adapted Material You produce, the Adapter's License You apply must not prevent recipients of the Adapted Material from complying with this Public License.
102 |
103 | ### Section 4 – Sui Generis Database Rights.
104 |
105 | Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material:
106 |
107 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database;
108 |
109 | b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material; and
110 |
111 | c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database.
112 |
113 | For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights.
114 |
115 | ### Section 5 – Disclaimer of Warranties and Limitation of Liability.
116 |
117 | a. __Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You.__
118 |
119 | b. __To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You.__
120 |
121 | c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability.
122 |
123 | ### Section 6 – Term and Termination.
124 |
125 | a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically.
126 |
127 | b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates:
128 |
129 | 1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or
130 |
131 | 2. upon express reinstatement by the Licensor.
132 |
133 | For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License.
134 |
135 | c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License.
136 |
137 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
138 |
139 | ### Section 7 – Other Terms and Conditions.
140 |
141 | a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed.
142 |
143 | b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License.
144 |
145 | ### Section 8 – Interpretation.
146 |
147 | a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License.
148 |
149 | b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions.
150 |
151 | c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor.
152 |
153 | d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority.
154 |
155 | ```
156 | Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances will be considered the “Licensor.” Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as otherwise permitted by the Creative Commons policies published at [creativecommons.org/policies](http://creativecommons.org/policies), Creative Commons does not authorize the use of the trademark “Creative Commons” or any other trademark or logo of Creative Commons without its prior written consent including, without limitation, in connection with any unauthorized modifications to any of its public licenses or any other arrangements, understandings, or agreements concerning use of licensed material. For the avoidance of doubt, this paragraph does not form part of the public licenses.
157 |
158 | Creative Commons may be contacted at creativecommons.org
159 | ```
160 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Constructing Knowledge Graphs and Their Biomedical Applications
2 |
3 |
4 |
5 | [](https://greenelab.github.io/knowledge-graph-review/)
6 | [](https://greenelab.github.io/knowledge-graph-review/manuscript.pdf)
7 | [](https://github.com/greenelab/knowledge-graph-review/actions)
8 |
9 | ## Manuscript description
10 |
11 |
12 |
13 | Knowledge graphs can support many biomedical applications. These graphs represent biomedical concepts and relationships in the form of nodes and edges. In this review, we discuss how these graphs are constructed and applied with a particular focus on how machine learning approaches are changing these processes.
14 |
15 | ## Manubot
16 |
17 |
18 |
19 | Manubot is a system for writing scholarly manuscripts via GitHub.
20 | Manubot automates citations and references, versions manuscripts using git, and enables collaborative writing via GitHub.
21 | An [overview manuscript](https://greenelab.github.io/meta-review/ "Open collaborative writing with Manubot") presents the benefits of collaborative writing with Manubot and its unique features.
22 | The [rootstock repository](https://git.io/fhQH1) is a general purpose template for creating new Manubot instances, as detailed in [`SETUP.md`](SETUP.md).
23 | See [`USAGE.md`](USAGE.md) for documentation how to write a manuscript.
24 |
25 | Please open [an issue](https://git.io/fhQHM) for questions related to Manubot usage, bug reports, or general inquiries.
26 |
27 | ### Repository directories & files
28 |
29 | The directories are as follows:
30 |
31 | + [`content`](content) contains the manuscript source, which includes markdown files as well as inputs for citations and references.
32 | See [`USAGE.md`](USAGE.md) for more information.
33 | + [`output`](output) contains the outputs (generated files) from Manubot including the resulting manuscripts.
34 | You should not edit these files manually, because they will get overwritten.
35 | + [`webpage`](webpage) is a directory meant to be rendered as a static webpage for viewing the HTML manuscript.
36 | + [`build`](build) contains commands and tools for building the manuscript.
37 | + [`ci`](ci) contains files necessary for deployment via continuous integration.
38 |
39 | ### Local execution
40 |
41 | The easiest way to run Manubot is to use [continuous integration](#continuous-integration) to rebuild the manuscript when the content changes.
42 | If you want to build a Manubot manuscript locally, install the [conda](https://conda.io) environment as described in [`build`](build).
43 | Then, you can build the manuscript on POSIX systems by running the following commands from this root directory.
44 |
45 | ```sh
46 | # Activate the manubot conda environment (assumes conda version >= 4.4)
47 | conda activate manubot
48 |
49 | # Build the manuscript, saving outputs to the output directory
50 | bash build/build.sh
51 |
52 | # At this point, the HTML & PDF outputs will have been created. The remaining
53 | # commands are for serving the webpage to view the HTML manuscript locally.
54 | # This is required to view local images in the HTML output.
55 |
56 | # Configure the webpage directory
57 | manubot webpage
58 |
59 | # You can now open the manuscript webpage/index.html in a web browser.
60 | # Alternatively, open a local webserver at http://localhost:8000/ with the
61 | # following commands.
62 | cd webpage
63 | python -m http.server
64 | ```
65 |
66 | Sometimes it's helpful to monitor the content directory and automatically rebuild the manuscript when a change is detected.
67 | The following command, while running, will trigger both the `build.sh` script and `manubot webpage` command upon content changes:
68 |
69 | ```sh
70 | bash build/autobuild.sh
71 | ```
72 |
73 | ### Continuous Integration
74 |
75 | <<<<<<< HEAD
76 | [](https://travis-ci.com/greenelab/knowledge-graph-review)
77 |
78 | Whenever a pull request is opened, Travis CI will test whether the changes break the build process to generate a formatted manuscript.
79 | =======
80 | Whenever a pull request is opened, CI (continuous integration) will test whether the changes break the build process to generate a formatted manuscript.
81 | >>>>>>> 8b9b5ced2c7c963bf3ea5afb8f31f9a4a54ab697
82 | The build process aims to detect common errors, such as invalid citations.
83 | If your pull request build fails, see the CI logs for the cause of failure and revise your pull request accordingly.
84 |
85 | <<<<<<< HEAD
86 | When a commit to the `master` branch occurs (for example, when a pull request is merged), Travis CI builds the manuscript and writes the results to the [`gh-pages`](https://github.com/greenelab/knowledge-graph-review/tree/gh-pages) and [`output`](https://github.com/greenelab/knowledge-graph-review/tree/output) branches.
87 | =======
88 | When a commit to the `master` branch occurs (for example, when a pull request is merged), CI builds the manuscript and writes the results to the [`gh-pages`](https://github.com/manubot/rootstock/tree/gh-pages) and [`output`](https://github.com/manubot/rootstock/tree/output) branches.
89 | >>>>>>> 8b9b5ced2c7c963bf3ea5afb8f31f9a4a54ab697
90 | The `gh-pages` branch uses [GitHub Pages](https://pages.github.com/) to host the following URLs:
91 |
92 | + **HTML manuscript** at https://greenelab.github.io/knowledge-graph-review/
93 | + **PDF manuscript** at https://greenelab.github.io/knowledge-graph-review/manuscript.pdf
94 |
95 | For continuous integration configuration details, see [`.github/workflows/manubot.yaml`](.github/workflows/manubot.yaml) if using GitHub Actions or [`.travis.yml`](.travis.yml) if using Travis CI.
96 |
97 | ## License
98 |
99 |
103 |
104 | [](http://creativecommons.org/licenses/by/4.0/)
105 | [](https://creativecommons.org/publicdomain/zero/1.0/)
106 |
107 | Except when noted otherwise, the entirety of this repository is licensed under a CC BY 4.0 License ([`LICENSE.md`](LICENSE.md)), which allows reuse with attribution.
108 | Please attribute by linking to https://github.com/greenelab/knowledge-graph-review.
109 |
110 | Since CC BY is not ideal for code and data, certain repository components are also released under the CC0 1.0 public domain dedication ([`LICENSE-CC0.md`](LICENSE-CC0.md)).
111 | All files matched by the following glob patterns are dual licensed under CC BY 4.0 and CC0 1.0:
112 |
113 | + `*.sh`
114 | + `*.py`
115 | + `*.yml` / `*.yaml`
116 | + `*.json`
117 | + `*.bib`
118 | + `*.tsv`
119 | + `.gitignore`
120 |
121 | All other files are only available under CC BY 4.0, including:
122 |
123 | + `*.md`
124 | + `*.html`
125 | + `*.pdf`
126 | + `*.docx`
127 |
128 | Please open [an issue](https://github.com/greenelab/knowledge-graph-review/issues) for any question related to licensing.
129 |
--------------------------------------------------------------------------------
/SETUP.md:
--------------------------------------------------------------------------------
1 | # Table of contents
2 |
3 | - [Creating a new manuscript](#creating-a-new-manuscript)
4 | * [Configuration](#configuration)
5 | * [Create repository](#create-repository)
6 | * [Continuous integration](#continuous-integration)
7 | + [GitHub Actions](#github-actions)
8 | + [SSH Deploy Key](#ssh-deploy-key)
9 | - [Add the public key to GitHub](#add-the-public-key-to-github)
10 | - [Add the private key to GitHub](#add-the-private-key-to-github)
11 | + [Travis CI](#travis-ci)
12 | + [Previewing pull request builds with AppVeyor](#previewing-pull-request-builds-with-appveyor)
13 | * [README updates](#readme-updates)
14 | * [Finalize](#finalize)
15 | - [Merging upstream rootstock changes](#merging-upstream-rootstock-changes)
16 |
17 | _generated with [markdown-toc](https://ecotrust-canada.github.io/markdown-toc/)_
18 |
19 | # Creating a new manuscript
20 |
21 | These instructions detail how to create a new manuscript based off of the [`manubot/rootstock`](https://github.com/manubot/rootstock/) repository.
22 | The process can be a bit challenging, because it requires a few steps that are difficult to automate.
23 | However, you will only have to perform these steps once for each manuscript.
24 |
25 | These steps should be performed in a command-line shell (terminal), starting in the directory where you want the manuscript folder be created.
26 | Setup is supported on Linux, macOS, and Windows.
27 | Windows setup requires [Git Bash](https://gitforwindows.org/) or [Windows Subsystem for Linux](https://docs.microsoft.com/en-us/windows/wsl/faq).
28 |
29 | ## Configuration
30 |
31 | First, you must configure two environment variables (`OWNER` and `REPO`).
32 | These variables specify the GitHub repository for the manuscript (i.e. `https://github.com/OWNER/REPO`).
33 | Make sure that the case of `OWNER` matches how your username is displayed on GitHub.
34 | In general, assume that all commands in this setup are case-sensitive.
35 | **Edit the following commands with your manuscript's information:**
36 |
37 | ```sh
38 | # GitHub username (change from manubot)
39 | OWNER=manubot
40 | # Repository name (change from rootstock)
41 | REPO=rootstock
42 | ```
43 |
44 | ## Create repository
45 |
46 | **Execute the remaining commands verbatim.**
47 | They do not need to be edited (if the setup works as intended).
48 |
49 | Next you must clone `manubot/rootstock` and reconfigure the remote repositories:
50 |
51 | ```sh
52 | # Clone manubot/rootstock
53 | git clone --single-branch https://github.com/manubot/rootstock.git $REPO
54 | cd $REPO
55 |
56 | # Configure remotes
57 | git remote add rootstock https://github.com/manubot/rootstock.git
58 |
59 | # Option A: Set origin URL using its web address
60 | git remote set-url origin https://github.com/$OWNER/$REPO.git
61 | # Option B: If GitHub SSH key access is enabled for OWNER, run the following command instead
62 | git remote set-url origin git@github.com:$OWNER/$REPO.git
63 | ```
64 |
65 | Next, you must manually create an empty GitHub repository at .
66 | Make sure to use the same "Owner" and "Repository name" specified above.
67 | Do not initialize the repository, other than optionally adding a Description.
68 | Next, push your cloned manuscript:
69 |
70 | ```sh
71 | git push --set-upstream origin master
72 | ```
73 |
74 | ## Continuous integration
75 |
76 | Manubot integrates with cloud services to perform continuous integration (CI).
77 | For Manubot that means automatically building and deploying your manuscript.
78 | Manubot supports the following CI services:
79 |
80 | | Service | Default | Artifacts | Deployment | Config | Private Repos |
81 | |---------|---------|-----------|---------|--------|---------------|
82 | | [GitHub Actions](https://github.com/features/actions) | ✔️ | ✔️ | ✔️ | [`manubot.yaml`](.github/workflows/manubot.yaml) | 2,000 minutes per month |
83 | | [Travis CI](https://travis-ci.com) | ❌ | ❌ | ✔️ needs setup | [`.travis.yml`](.travis.yml) | 100 build trial |
84 | | [AppVeyor](https://www.appveyor.com/) | ❌ | ✔️ with PR comments | ❌ | [`.appveyor.yml`](.appveyor.yml) | 14 day trial |
85 |
86 | Notes on table fields:
87 |
88 | - **Default**: Whether the following uncollapsed setup instructions enable the service by default.
89 | - **Artifacts**: Manuscript outputs that are saved alongside the CI build logs.
90 | This is especially helpful for previewing changes that are under development in a pull request.
91 | Both GitHub Actions and AppVeyor upload the rendered manuscript as an artifact for pull request builds.
92 | However, only AppVeyor comments on pull requests with a download link to the artifacts ([example](https://github.com/manubot/rootstock/pull/262#issuecomment-519944731)).
93 | - **Deployment**: Whether the CI service can write outputs back to the GitHub repository (to the `output` and `gh-pages` branches).
94 | Deployment provides GitHub Pages with the latest manuscript version to serve to the manuscript's URL.
95 | GitHub Actions will deploy by default without any additional setup.
96 | Travis CI will only deploy if an SSH Private Key is provided.
97 | To avoid deploying a manuscript multiple times, disable GitHub Actions before providing an SSH Private Key to Travis.
98 | - **Config**: File configuring what operations CI will perform.
99 | Removing this file is one method to disable the CI service.
100 | - **Private Repos**: Quota for private repos.
101 | Only GitHub Actions supports cost-free builds of private repositories beyond a trial period.
102 | All services are cost-free for public repos.
103 |
104 | ### GitHub Actions
105 |
106 | GitHub Actions is the recommended default CI service because it requires no additional setup.
107 | To use GitHub Actions only, remove configuration files for other CI services:
108 |
109 | ```shell
110 | # remove Travis CI config
111 | git rm .travis.yml
112 | # remove AppVeyor config
113 | git rm .appveyor.yml
114 | # remove ci/install.sh if using neither Travis CI nor AppVeyor
115 | git rm ci/install.sh
116 | ```
117 |
118 | GitHub Actions is _usually_ able to deploy without any setup using the [`GITHUB_TOKEN`](https://help.github.com/en/actions/configuring-and-managing-workflows/authenticating-with-the-github_token) for authentication.
119 | GitHub Pages deployment using `GITHUB_TOKEN` recently started working on GitHub without an official announcement.
120 | If it does not work for you after completing this setup, try reselecting "gh-pages branch" as the Source for GitHub Pages in the repository Settings.
121 | GitHub Pages should now trigger on the next commit.
122 | If not, [let us know](https://github.com/manubot/rootstock/issues/new).
123 | For more reliable deployment on GitHub, you can also use an SSH Deploy Key instead (see below).
124 |
125 | Deploying on Travis CI requires creating an SSH Deploy Key.
126 | The following sections, collapsed by default, detail how to generate an SSH Deploy Key.
127 |
128 |
129 | Expand for SSH Deploy Key setup
130 |
131 | ### SSH Deploy Key
132 |
133 | Deployment on Travis CI requires an SSH Deploy Key.
134 | Previously, GitHub Actions also required an SSH Deploy Key, but now GitHub can deploy using the `GITHUB_TOKEN` secret.
135 | Therefore, users following the default configuration of deploying only via GitHub Actions can skip these steps.
136 | Otherwise, generate a deploy key so CI can write to the repository.
137 |
138 | ```sh
139 | # Generate deploy.key.pub (public) and deploy.key (private)
140 | ssh-keygen \
141 | -t rsa -b 4096 -N "" \
142 | -C "deploy@manubot.org" \
143 | -f ci/deploy.key
144 |
145 | # Encode deploy.key to remove newlines, writing encoded text to deploy.key.txt.
146 | # This is required for entry into the Travis settings.
147 | openssl base64 -A -in ci/deploy.key > ci/deploy.key.txt
148 | ```
149 |
150 | #### Add the public key to GitHub
151 |
152 | ```sh
153 | # Print the URL for adding the public key to GitHub
154 | echo "https://github.com/$OWNER/$REPO/settings/keys/new"
155 |
156 | # Print the public key for copy-pasting to GitHub
157 | cat ci/deploy.key.pub
158 | ```
159 |
160 | Go to the GitHub settings URL echoed above in a browser, and click "Add deploy key".
161 | For "Title", add a description like "Manubot Deploy Key".
162 | Copy-paste the contents of the `ci/deploy.key.pub` text file (printed above by `cat`) into the "Key" text box.
163 | Check the "Allow write access" box below.
164 | Finally, click "Add key".
165 |
166 | #### Add the private key to GitHub
167 |
168 | If you would like GitHub Actions to use SSH for deployment, rather than via HTTPS using `GITHUB_TOKEN`, perform the steps in this section.
169 | **Skip this section if solely using Travis CI for deployment.**
170 |
171 | ```sh
172 | # Print the URL for adding the private key to GitHub
173 | echo "https://github.com/$OWNER/$REPO/settings/secrets"
174 |
175 | # Print the encoded private key for copy-pasting to GitHub
176 | cat ci/deploy.key.txt && echo
177 | ```
178 |
179 | Next, go to the GitHub repository settings page (URL echoed above).
180 | Click "Add a new secret".
181 | For "Name", enter `MANUBOT_SSH_PRIVATE_KEY`.
182 | Next, copy-paste the content of `ci/deploy.key.txt` into "Value"
183 | (printed above by `cat`, including any trailing `=` characters if present).
184 |
185 |
186 |
187 | Expand for Travis CI setup
188 |
189 | ### Travis CI
190 |
191 | Travis CI is another option for continuous integration.
192 | Now you must manually enable Travis CI for the new repository at .
193 | Click the `+` sign to "Add New Repository".
194 | If you don't see your repository listed, push the "Sync account" button.
195 | Finally, flick the repository's switch to enable CI.
196 |
197 | ```sh
198 | # Print the URL for adding the private key to Travis CI
199 | echo "https://travis-ci.com/$OWNER/$REPO/settings"
200 |
201 | # Print the encoded private key for copy-pasting to Travis CI
202 | cat ci/deploy.key.txt && echo
203 | ```
204 |
205 | Next, go to the Travis CI repository settings page (URL echoed above).
206 | Add a new record in the "Environment Variables" section.
207 | For "NAME", enter `MANUBOT_SSH_PRIVATE_KEY`.
208 | Next, copy-paste the content of `deploy.key.txt` into "VALUE"
209 | (printed above by `cat`, including any trailing `=` characters if present).
210 | Make sure "Display value in build logs" remains toggled off (the default).
211 |
212 | While in the Travis CI settings, activate the [limit concurrent jobs](https://blog.travis-ci.com/2014-07-18-per-repository-concurrency-setting/) toggle and enter `1` in the value field.
213 | This ensures previous Manubot builds deploy before subsequent ones begin.
214 |
215 | Once the public and private deploy keys have been copied to their cloud locations, you can optionally remove the untracked files:
216 |
217 | ```sh
218 | # Optionally remove untracked files
219 | rm ci/deploy.key*
220 | ```
221 |
222 |
223 |
224 |
225 | Expand for AppVeyor setup
226 |
227 | ### Previewing pull request builds with AppVeyor
228 |
229 | You can optionally enable AppVeyor continuous integration to view pull request builds.
230 | Unlike Travis CI, AppVeyor supports storing manuscripts generated during pull request builds as artifacts.
231 | These can be previewed to facilitate pull request review and ensure formatting and reference changes render as expected.
232 | When a pull request build runs successfully, **@AppVeyorBot** will comment on the pull request with a download link to the manuscript PDF.
233 |
234 | To enable AppVeyor, follow steps 1 and 2 of the [AppVeyor welcome](https://www.appveyor.com/docs/) to sign in to AppVeyor and add your manuscript repository as an AppVeyor project.
235 | The repository already contains an `.appveyor.yml` build configuration file, so no other setup is required.
236 | AppVeyor only runs when it detects changes that are likely to affect the manuscript.
237 |
238 |
239 | ## README updates
240 |
241 | The continuous integration configuration should now be complete.
242 | Now update `README.md` files to reference your new repository:
243 |
244 | ```shell
245 | # Perform substitutions
246 | sed "s/manubot\/rootstock/$OWNER\/$REPO/g" README.md > tmp && mv -f tmp README.md
247 | sed "s/manubot\.github\.io\/rootstock/$OWNER\.github\.io\/$REPO/g" README.md > tmp && mv -f tmp README.md
248 | ```
249 |
250 | ## Finalize
251 |
252 | The `content/02.delete-me.md` file details the Markdown syntax and formatting options available with Manubot.
253 | Remove it to reduce the content to a blank manuscript:
254 |
255 | ```shell
256 | # Remove deletable content file
257 | git rm content/02.delete-me.md
258 | ```
259 |
260 | Run `git status` or `git diff --color-words` to double check the changes thus far.
261 | If the changes look okay, commit and push:
262 |
263 | ```shell
264 | git add --update
265 | git commit --message "Brand repo to $OWNER/$REPO"
266 | git push origin master
267 | ```
268 |
269 | You should be good to go now.
270 | A good first step is to modify [`content/metadata.yaml`](content/metadata.yaml) with the relevant information for your manuscript.
271 |
272 | # Merging upstream rootstock changes
273 |
274 | This section will describe how to incorporate changes to rootstock that occurred since initializing your manuscript.
275 | You will want to do this if there are new enhancements or bugfixes that you want to incorporate.
276 | This process can be difficult, especially if conflicts have arisen, and is recommended only for advanced git users.
277 |
278 | It is recommended to do rootstock upgrades via a pull request to help you view the proposed changes and to ensure the build uses the updated environment.
279 | First, checkout a new branch to use as the pull request head branch:
280 |
281 | ```shell
282 | # checkout a new branch, named using the current date, i.e. rootstock-2018-11-16
283 | git checkout -b rootstock-$(date '+%Y-%m-%d')
284 | ```
285 |
286 | Second, pull the new commits from rootstock, but do not automerge:
287 |
288 | ```shell
289 | # if rootstock remote is not set, add it
290 | git config remote.rootstock.url || git remote add rootstock https://github.com/manubot/rootstock.git
291 |
292 | # pull the new commits from rootstock
293 | git pull --no-ff --no-rebase --no-commit rootstock master
294 | ```
295 |
296 | If all goes well, there won't be any conflicts.
297 | However, if there are conflicts, follow the suggested commands to resolve them.
298 |
299 | You can add the changes incrementally using `git add --patch`.
300 | This is helpful to see each upstream change.
301 | You may notice changes that affect how items in `content` are processed.
302 | If so, you should edit and stage `content` files as needed.
303 | When there are no longer any unstaged changes, then do `git commit`.
304 |
305 | If updating `master` via a pull request, proceed to push the commit to GitHub and open a pull request.
306 | Once the pull request is ready to merge, use GitHub's "Create a merge commit" option rather than "Squash and merge" or "Rebase and merge" to preserve the rootstock commit hashes.
307 |
308 | The environment for local builds does not automatically update when [`build/environment.yml`](build/environment.yml) changes.
309 | To update your local conda `manubot` environment with new changes, run:
310 |
311 | ```shell
312 | # update a local conda environment
313 | conda env update --file build/environment.yml
314 | ```
315 |
--------------------------------------------------------------------------------
/build/README.md:
--------------------------------------------------------------------------------
1 | # Building the manuscript
2 |
3 | [`build.sh`](build.sh) builds the repository.
4 | `bash build/build.sh` should be executed from the root directory of the repository.
5 | By default, `build.sh` creates HTML and PDF outputs.
6 | However, setting the `BUILD_PDF` environment variable to `false` will suppress PDF output.
7 | For example, run local builds using the command `BUILD_PDF=false bash build/build.sh`.
8 |
9 | To build a DOCX file of the manuscript, set the `BUILD_DOCX` environment variable to `true`.
10 | For example, use the command `BUILD_DOCX=true bash build/build.sh`.
11 | To export DOCX for all CI builds, set an environment variable (see docs for [GitHub Actions](https://help.github.com/en/actions/automating-your-workflow-with-github-actions/using-environment-variables) or [Travis CI](https://docs.travis-ci.com/user/environment-variables/#Defining-Variables-in-Repository-Settings)).
12 | Currently, equation numbers via `pandoc-eqnos` are not supported for DOCX output.
13 |
14 | Format conversion is done using [Pandoc](https://pandoc.org/MANUAL.html).
15 | `build.sh` calls `pandoc` commands using the options specified in [`pandoc/defaults`](pandoc/defaults).
16 | Each file specifies a set of pandoc `--defaults` options for a given format.
17 | To change the options, either edit the YAML files directly or add additional `--defaults` files.
18 |
19 | ## Environment
20 |
21 | Note: currently, **Windows is not supported**.
22 |
23 | Install the [conda](https://conda.io) environment specified in [`environment.yml`](environment.yml) by running the following commands from this directory:
24 |
25 | ```sh
26 | # Install the environment
27 | conda env create --file environment.yml
28 | ```
29 |
30 | If the `manubot` environment is already installed, but needs to be updated to reflect changes to `environment.yml`, use one of the following options:
31 |
32 | ```shell
33 | # option 1: update the existing environment.
34 | conda env update --file environment.yml
35 |
36 | # option 2: remove and reinstall the manubot environment.
37 | # Slower than option 1, but guarantees a fresh environment.
38 | conda env remove --name manubot
39 | conda env create --file environment.yml
40 | ```
41 |
42 | Activate with `conda activate manubot` (assumes `conda` version of [at least](https://github.com/conda/conda/blob/9d759d8edeb86569c25f6eb82053f09581013a2a/CHANGELOG.md#440-2017-12-20) 4.4).
43 | The environment should successfully install on both Linux and macOS.
44 | However, it will fail on Windows due to the [`pango`](https://anaconda.org/conda-forge/pango) dependency.
45 |
46 | Because the build process is dependent on having the appropriate version of the `manubot` Python package, it is necessary to use the version specified in `environment.yml`.
47 | The latest `manubot` release on PyPI may not be compatible with the latest version of this rootstock repository.
48 |
49 | ## Building PDFs
50 |
51 | If Docker is available, `build.sh` uses the [Athena](https://www.athenapdf.com/) [Docker image](https://hub.docker.com/r/arachnysdocker/athenapdf) to build the PDF.
52 | Otherwise, `build.sh` uses [WeasyPrint](https://weasyprint.org/) to build the PDF.
53 | It is common for WeasyPrint to generate many warnings and errors that can be safely ignored.
54 | Examples are shown below:
55 |
56 | ```text
57 | WARNING: Ignored `pointer-events: none` at 3:16, unknown property.
58 | WARNING: Ignored `font-display:auto` at 1:53114, descriptor not supported.
59 | ERROR: Failed to load font at "https://use.fontawesome.com/releases/v5.7.2/webfonts/fa-brands-400.eot#iefix"
60 | WARNING: Expected a media type, got only/**/screen
61 | ```
62 |
--------------------------------------------------------------------------------
/build/assets/custom-dictionary.txt:
--------------------------------------------------------------------------------
1 | personal_ws-1.1 en 22
2 | al
3 | doi
4 | eq
5 | et
6 | github
7 | isbn
8 | latex
9 | manubot
10 | orcid
11 | permalink
12 | pmc
13 | pmcid
14 | pmid
15 | pubmed
16 | rootstock
17 | s
18 | strikethrough
19 | svg
20 | svgs
21 | tbl
22 | unicode
23 | wikidata
24 |
--------------------------------------------------------------------------------
/build/assets/style.csl:
--------------------------------------------------------------------------------
1 |
2 |
77 |
--------------------------------------------------------------------------------
/build/autobuild.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | ## autobuild.sh: automatically rebuild mansucript outputs and the webpage when content changes
4 | ## Depends on watchdog https://github.com/gorakhargosh/watchdog
5 |
6 | watchmedo shell-command \
7 | --wait \
8 | --command='bash build/build.sh && manubot webpage' \
9 | content
10 |
--------------------------------------------------------------------------------
/build/build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | ## build.sh: compile manuscript outputs from content using Manubot and Pandoc
4 |
5 | set -o errexit \
6 | -o nounset \
7 | -o pipefail
8 |
9 | # Set timezone used by Python for setting the manuscript's date
10 | export TZ=Etc/UTC
11 | # Default Python to read/write text files using UTF-8 encoding
12 | export LC_ALL=en_US.UTF-8
13 |
14 | # Generate reference information
15 | echo >&2 "Retrieving and processing reference metadata"
16 | manubot process \
17 | --content-directory=content \
18 | --output-directory=output \
19 | --cache-directory=ci/cache \
20 | --skip-citations \
21 | --log-level=INFO
22 |
23 | # Pandoc's configuration is specified via files of option defaults
24 | # located in the $PANDOC_DATA_DIR/defaults directory.
25 | PANDOC_DATA_DIR="${PANDOC_DATA_DIR:-build/pandoc}"
26 |
27 | # Make output directory
28 | mkdir -p output
29 |
30 | # Create HTML output
31 | # https://pandoc.org/MANUAL.html
32 | echo >&2 "Exporting HTML manuscript"
33 | pandoc --verbose \
34 | --data-dir="$PANDOC_DATA_DIR" \
35 | --defaults=common.yaml \
36 | --defaults=html.yaml
37 |
38 | # Return null if docker command is missing, otherwise return path to docker
39 | DOCKER_EXISTS="$(command -v docker || true)"
40 |
41 | # Create PDF output (unless BUILD_PDF environment variable equals "false")
42 | # If Docker is not available, use WeasyPrint to create PDF
43 | if [ "${BUILD_PDF:-}" != "false" ] && [ -z "$DOCKER_EXISTS" ]; then
44 | echo >&2 "Exporting PDF manuscript using WeasyPrint"
45 | if [ -L images ]; then rm images; fi # if images is a symlink, remove it
46 | ln -s content/images
47 | pandoc \
48 | --data-dir="$PANDOC_DATA_DIR" \
49 | --defaults=common.yaml \
50 | --defaults=html.yaml \
51 | --defaults=pdf-weasyprint.yaml
52 | rm images
53 | fi
54 |
55 | # If Docker is available, use athenapdf to create PDF
56 | if [ "${BUILD_PDF:-}" != "false" ] && [ -n "$DOCKER_EXISTS" ]; then
57 | echo >&2 "Exporting PDF manuscript using Docker + Athena"
58 | if [ "${CI:-}" = "true" ]; then
59 | # Incease --delay for CI builds to ensure the webpage fully renders, even when the CI server is under high load.
60 | # Local builds default to a shorter --delay to minimize runtime, assuming proper rendering is less crucial.
61 | MANUBOT_ATHENAPDF_DELAY="${MANUBOT_ATHENAPDF_DELAY:-5000}"
62 | echo >&2 "Continuous integration build detected. Setting athenapdf --delay=$MANUBOT_ATHENAPDF_DELAY"
63 | fi
64 | if [ -d output/images ]; then rm -rf output/images; fi # if images is a directory, remove it
65 | cp -R -L content/images output/
66 | docker run \
67 | --rm \
68 | --shm-size=1g \
69 | --volume="$(pwd)/output:/converted/" \
70 | --security-opt=seccomp:unconfined \
71 | arachnysdocker/athenapdf:2.16.0 \
72 | athenapdf \
73 | --delay=${MANUBOT_ATHENAPDF_DELAY:-1100} \
74 | --pagesize=A4 \
75 | manuscript.html manuscript.pdf
76 | rm -rf output/images
77 | fi
78 |
79 | # Create DOCX output (if BUILD_DOCX environment variable equals "true")
80 | if [ "${BUILD_DOCX:-}" = "true" ]; then
81 | echo >&2 "Exporting Word Docx manuscript"
82 | pandoc --verbose \
83 | --data-dir="$PANDOC_DATA_DIR" \
84 | --defaults=common.yaml \
85 | --defaults=docx.yaml
86 | fi
87 |
88 | # Spellcheck
89 | if [ "${SPELLCHECK:-}" = "true" ]; then
90 | export ASPELL_CONF="add-extra-dicts $(pwd)/build/assets/custom-dictionary.txt; ignore-case true"
91 |
92 | # Identify and store spelling errors
93 | pandoc \
94 | --data-dir="$PANDOC_DATA_DIR" \
95 | --lua-filter spellcheck.lua \
96 | output/manuscript.md \
97 | | sort -fu > output/spelling-errors.txt
98 | echo >&2 "Potential spelling errors:"
99 | cat output/spelling-errors.txt
100 |
101 | # Add additional forms of punctuation that Pandoc converts so that the
102 | # locations can be detected
103 | # Create a new expanded spelling errors file so that the saved artifact
104 | # contains only the original misspelled words
105 | cp output/spelling-errors.txt output/expanded-spelling-errors.txt
106 | grep "’" output/spelling-errors.txt | sed "s/’/'/g" >> output/expanded-spelling-errors.txt || true
107 |
108 | # Find locations of spelling errors
109 | # Use "|| true" after grep because otherwise this step of the pipeline will
110 | # return exit code 1 if any of the markdown files do not contain a
111 | # misspelled word
112 | cat output/expanded-spelling-errors.txt | while read word; do grep -ion "\<$word\>" content/*.md; done | sort -h -t ":" -k 1b,1 -k2,2 > output/spelling-error-locations.txt || true
113 | echo >&2 "Filenames and line numbers with potential spelling errors:"
114 | cat output/spelling-error-locations.txt
115 |
116 | rm output/expanded-spelling-errors.txt
117 | fi
118 |
119 | echo >&2 "Build complete"
120 |
--------------------------------------------------------------------------------
/build/environment.yml:
--------------------------------------------------------------------------------
1 | name: manubot
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - cairo=1.14.12
6 | - cairocffi=0.8.0
7 | - cffi=1.12.3
8 | - ghp-import=0.5.5
9 | - jinja2=2.11.2
10 | - jsonschema=3.2.0
11 | - pandas=1.0.3
12 | - pandoc=2.9.2
13 | - pango=1.40.14
14 | - pip=20.0
15 | - psutil=5.7.0
16 | - python=3.7.6
17 | - pyyaml=5.3
18 | - requests=2.23.0
19 | - watchdog=0.10.2
20 | - yamllint=1.21.0
21 | - pip:
22 | - errorhandler==2.0.1
23 | - git+https://github.com/manubot/manubot@890b76891f139a26d36cd9a4aa652f7e019501f8
24 | - jsonref==0.2
25 | - opentimestamps-client==0.7.0
26 | - opentimestamps==0.4.1
27 | - pandoc-eqnos==2.1.1
28 | - pandoc-fignos==2.2.0
29 | - pandoc-tablenos==2.1.1
30 | - pandoc-xnos==2.2.0
31 | - pybase62==0.4.3
32 | - pysha3==1.0.2
33 | - python-bitcoinlib==0.10.2
34 | - requests-cache==0.5.2
35 | - weasyprint==0.42.3
36 |
--------------------------------------------------------------------------------
/build/pandoc/defaults/common.yaml:
--------------------------------------------------------------------------------
1 | # Pandoc --defaults shared between Manubot output formats.
2 | from: markdown
3 | input-file: output/manuscript.md
4 | filters:
5 | - pandoc-fignos
6 | - pandoc-eqnos
7 | - pandoc-tablenos
8 | - pandoc-manubot-cite
9 | - pandoc-citeproc
10 | wrap: preserve
11 | metadata:
12 | csl: build/assets/style.csl
13 | link-citations: true
14 |
--------------------------------------------------------------------------------
/build/pandoc/defaults/docx.yaml:
--------------------------------------------------------------------------------
1 | # Pandoc --defaults for DOCX output.
2 | # Load on top of common defaults.
3 | to: docx
4 | output-file: output/manuscript.docx
5 | reference-doc: build/themes/default.docx
6 | resource-path:
7 | - '.'
8 | - content
9 |
--------------------------------------------------------------------------------
/build/pandoc/defaults/html.yaml:
--------------------------------------------------------------------------------
1 | # Pandoc --defaults for HTML output.
2 | # Load on top of common defaults.
3 | to: html5
4 | output-file: output/manuscript.html
5 | include-after-body:
6 | - build/themes/default.html
7 | - build/plugins/anchors.html
8 | - build/plugins/accordion.html
9 | - build/plugins/tooltips.html
10 | - build/plugins/jump-to-first.html
11 | - build/plugins/link-highlight.html
12 | - build/plugins/table-of-contents.html
13 | - build/plugins/lightbox.html
14 | - build/plugins/attributes.html
15 | - build/plugins/math.html
16 | - build/plugins/hypothesis.html
17 | - build/plugins/analytics.html
18 | variables:
19 | math: ''
20 | html-math-method:
21 | method: mathjax
22 |
--------------------------------------------------------------------------------
/build/pandoc/defaults/pdf-weasyprint.yaml:
--------------------------------------------------------------------------------
1 | # Pandoc --defaults for PDF output via weasyprint.
2 | # Load on top of HTML defaults.
3 | output-file: output/manuscript.pdf
4 | pdf-engine: weasyprint
5 | pdf-engine-opts:
6 | - '--presentational-hints'
7 | html-math-method:
8 | method: webtex
9 | url: 'https://latex.codecogs.com/svg.latex?'
10 |
--------------------------------------------------------------------------------
/build/plugins/accordion.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
261 |
262 |
263 |
264 |
265 |
266 |
272 |
273 |
--------------------------------------------------------------------------------
/build/plugins/analytics.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/build/plugins/anchors.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
133 |
134 |
135 |
136 |
137 |
138 |
144 |
145 |
--------------------------------------------------------------------------------
/build/plugins/attributes.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
131 |
--------------------------------------------------------------------------------
/build/plugins/hypothesis.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
17 |
18 |
19 |
20 |
30 |
--------------------------------------------------------------------------------
/build/plugins/jump-to-first.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
261 |
262 |
263 |
264 |
265 |
266 |
272 |
273 |
--------------------------------------------------------------------------------
/build/plugins/link-highlight.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
185 |
--------------------------------------------------------------------------------
/build/plugins/math.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
11 |
12 |
13 |
14 |
24 |
--------------------------------------------------------------------------------
/build/plugins/table-of-contents.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
304 |
305 |
306 |
307 |
308 |
309 |
316 |
317 |
--------------------------------------------------------------------------------
/build/themes/default.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/greenelab/knowledge-graph-review/a09e874f90328073f260b072c47889648046d0f7/build/themes/default.docx
--------------------------------------------------------------------------------
/ci/.gitignore:
--------------------------------------------------------------------------------
1 | # SSH public and private keys
2 | deploy.key*
3 |
4 | # Output from travis encrypt-file (legacy)
5 | travis-encrypt-file.log
6 |
--------------------------------------------------------------------------------
/ci/README.md:
--------------------------------------------------------------------------------
1 | # Continuous integration tools
2 |
3 | This directory contains tools and files for continuous integration (CI).
4 | Specifically, [`deploy.sh`](deploy.sh) runs on successful `master` branch builds that are not pull requests.
5 | The contents of `../webpage` are committed to the `gh-pages` branch.
6 | The contents of `../output` are committed to the `output` branch.
7 |
8 | For more information on the CI implementation, see the CI setup documentation in `SETUP.md`.
9 |
--------------------------------------------------------------------------------
/ci/deploy.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | ## deploy.sh: run during a CI build to deploy manuscript outputs to the output and gh-pages branches on GitHub.
4 |
5 | # Set options for extra caution & debugging
6 | set -o errexit \
7 | -o nounset \
8 | -o pipefail
9 |
10 | # set environment variables for either Travis or GitHub Actions
11 | REPO_SLUG=${TRAVIS_REPO_SLUG:-$GITHUB_REPOSITORY}
12 | COMMIT=${TRAVIS_COMMIT:-$GITHUB_SHA}
13 | CI_BUILD_WEB_URL=${CI_BUILD_WEB_URL:-$TRAVIS_BUILD_WEB_URL}
14 | CI_JOB_WEB_URL=${CI_JOB_WEB_URL:-$TRAVIS_JOB_WEB_URL}
15 | BRANCH=${TRAVIS_BRANCH:-master}
16 |
17 | # Add commit hash to the README
18 | OWNER_NAME="$(dirname "$REPO_SLUG")"
19 | REPO_NAME="$(basename "$REPO_SLUG")"
20 | export REPO_SLUG COMMIT OWNER_NAME REPO_NAME
21 | envsubst < webpage/README.md > webpage/README-complete.md
22 | mv webpage/README-complete.md webpage/README.md
23 |
24 | # Configure git
25 | git config --global push.default simple
26 | git config --global user.email "$(git log --max-count=1 --format='%ae')"
27 | git config --global user.name "$(git log --max-count=1 --format='%an')"
28 | git checkout "$BRANCH"
29 |
30 | # Configure deployment credentials
31 | MANUBOT_DEPLOY_VIA_SSH=true
32 | git remote set-url origin "git@github.com:$REPO_SLUG.git"
33 | if [ -v MANUBOT_SSH_PRIVATE_KEY ] && [ "$MANUBOT_SSH_PRIVATE_KEY" != "" ]; then
34 | echo >&2 "[INFO] Detected MANUBOT_SSH_PRIVATE_KEY. Will deploy via SSH."
35 | elif [ -v MANUBOT_ACCESS_TOKEN ] && [ "$MANUBOT_ACCESS_TOKEN" != "" ]; then
36 | echo >&2 "[INFO] Detected MANUBOT_ACCESS_TOKEN. Will deploy via HTTPS."
37 | MANUBOT_DEPLOY_VIA_SSH=false
38 | git remote set-url origin "https://$MANUBOT_ACCESS_TOKEN@github.com/$REPO_SLUG.git"
39 | else
40 | echo >&2 "[INFO] Missing MANUBOT_SSH_PRIVATE_KEY and MANUBOT_ACCESS_TOKEN. Will deploy via SSH."
41 | fi
42 |
43 | if [ $MANUBOT_DEPLOY_VIA_SSH = "true" ]; then
44 | # Decrypt and add SSH key
45 | eval "$(ssh-agent -s)"
46 | (
47 | set +o xtrace # disable xtrace in subshell for private key operations
48 | if [ -v MANUBOT_SSH_PRIVATE_KEY ]; then
49 | base64 --decode <<< "$MANUBOT_SSH_PRIVATE_KEY" | ssh-add -
50 | else
51 | echo >&2 "DeprecationWarning: Loading deploy.key from an encrypted file.
52 | In the future, using the MANUBOT_ACCESS_TOKEN or MANUBOT_SSH_PRIVATE_KEY environment variable may be required."
53 | openssl aes-256-cbc \
54 | -K $encrypted_9befd6eddffe_key \
55 | -iv $encrypted_9befd6eddffe_iv \
56 | -in ci/deploy.key.enc \
57 | -out ci/deploy.key -d
58 | chmod 600 ci/deploy.key
59 | ssh-add ci/deploy.key
60 | fi
61 | )
62 | fi
63 |
64 | # Fetch and create gh-pages and output branches
65 | # Travis does a shallow and single branch git clone
66 | git remote set-branches --add origin gh-pages output
67 | git fetch origin gh-pages:gh-pages output:output || \
68 | echo >&2 "[INFO] could not fetch gh-pages or output from origin."
69 |
70 | # Configure versioned webpage and timestamp
71 | manubot webpage \
72 | --timestamp \
73 | --no-ots-cache \
74 | --checkout=gh-pages \
75 | --version="$COMMIT"
76 |
77 | # Commit message
78 | MESSAGE="\
79 | $(git log --max-count=1 --format='%s')
80 | [ci skip]
81 |
82 | This build is based on
83 | https://github.com/$REPO_SLUG/commit/$COMMIT.
84 |
85 | This commit was created by the following CI build and job:
86 | $CI_BUILD_WEB_URL
87 | $CI_JOB_WEB_URL
88 | "
89 |
90 | # Deploy the manubot outputs to output
91 | ghp-import \
92 | --push \
93 | --branch=output \
94 | --message="$MESSAGE" \
95 | output
96 |
97 | # Deploy the webpage directory to gh-pages
98 | ghp-import \
99 | --no-jekyll \
100 | --follow-links \
101 | --push \
102 | --branch=gh-pages \
103 | --message="$MESSAGE" \
104 | webpage
105 |
106 | if [ $MANUBOT_DEPLOY_VIA_SSH = "true" ]; then
107 | # Workaround https://github.com/travis-ci/travis-ci/issues/8082
108 | ssh-agent -k
109 | fi
110 |
--------------------------------------------------------------------------------
/ci/install-spellcheck.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | ## install-spellcheck.sh: run during a CI build to install Pandoc spellcheck dependencies.
4 |
5 | # Set options for extra caution & debugging
6 | set -o errexit \
7 | -o pipefail
8 |
9 | sudo apt-get update --yes
10 | sudo apt-get install --yes aspell aspell-en
11 | wget --directory-prefix=build/pandoc/filters \
12 | https://github.com/pandoc/lua-filters/raw/13c3fa7e97206413609a48a82575cb43137e037f/spellcheck/spellcheck.lua
13 |
--------------------------------------------------------------------------------
/ci/install.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | ## install.sh: run during a Travis CI or AppVeyor build to install the conda environment
4 | ## and the optional Pandoc spellcheck dependencies.
5 |
6 | # Set options for extra caution & debugging
7 | set -o errexit \
8 | -o pipefail
9 |
10 | wget https://repo.continuum.io/miniconda/Miniconda3-4.7.12.1-Linux-x86_64.sh \
11 | --output-document miniconda.sh
12 | bash miniconda.sh -b -p $HOME/miniconda
13 | source $HOME/miniconda/etc/profile.d/conda.sh
14 | hash -r
15 | conda config \
16 | --set always_yes yes \
17 | --set changeps1 no
18 | conda env create --quiet --file build/environment.yml
19 | conda list --name manubot
20 | conda activate manubot
21 |
22 | # Install Spellcheck filter for Pandoc
23 | if [ "${SPELLCHECK:-}" = "true" ]; then
24 | bash ci/install-spellcheck.sh
25 | fi
26 |
--------------------------------------------------------------------------------
/content/00.front-matter.md:
--------------------------------------------------------------------------------
1 | {##
2 | This file contains a Jinja2 front-matter template that adds version and authorship information.
3 | Changing the Jinja2 templates in this file may cause incompatibility with Manubot updates.
4 | Pandoc automatically inserts title from metadata.yaml, so it is not included in this template.
5 | ##}
6 |
7 | {## Uncomment & edit the following line to reference to a preprinted or published version of the manuscript.
8 | _A DOI-citable version of this manuscript is available at _.
9 | ##}
10 |
11 | {## Template to insert build date and source ##}
12 |
13 | This manuscript
14 | {% if manubot.ci_source is defined and manubot.ci_source.provider == "appveyor" -%}
15 | ([permalink]({{manubot.ci_source.artifact_url}}))
16 | {% elif manubot.html_url_versioned is defined -%}
17 | ([permalink]({{manubot.html_url_versioned}}))
18 | {% endif -%}
19 | was automatically generated
20 | {% if manubot.ci_source is defined -%}
21 | from [{{manubot.ci_source.repo_slug}}@{{manubot.ci_source.commit | truncate(length=7, end='', leeway=0)}}](https://github.com/{{manubot.ci_source.repo_slug}}/tree/{{manubot.ci_source.commit}})
22 | {% endif -%}
23 | on {{manubot.date}}.
24 |
25 |
26 | ## Authors
27 |
28 | {## Template for listing authors ##}
29 | {% for author in manubot.authors %}
30 | + **{{author.name}}**
31 | {%- if author.orcid is defined and author.orcid is not none %}
32 | {.inline_icon}
33 | [{{author.orcid}}](https://orcid.org/{{author.orcid}})
34 | {%- endif %}
35 | {%- if author.github is defined and author.github is not none %}
36 | · {.inline_icon}
37 | [{{author.github}}](https://github.com/{{author.github}})
38 | {%- endif %}
39 | {%- if author.twitter is defined and author.twitter is not none %}
40 | · {.inline_icon}
41 | [{{author.twitter}}](https://twitter.com/{{author.twitter}})
42 | {%- endif %}
43 |
44 | {%- if author.affiliations is defined and author.affiliations|length %}
45 | {{author.affiliations | join('; ')}}
46 | {%- endif %}
47 | {%- if author.funders is defined and author.funders|length %}
48 | · Funded by {{author.funders | join('; ')}}
49 | {%- endif %}
50 |
51 | {% endfor %}
52 |
--------------------------------------------------------------------------------
/content/01.abstract.md:
--------------------------------------------------------------------------------
1 | ## Abstract {.page_break_before}
2 |
3 | Knowledge graphs can support many biomedical applications.
4 | These graphs represent biomedical concepts and relationships in the form of nodes and edges.
5 | In this review, we discuss how these graphs are constructed and applied with a particular focus on how machine learning approaches are changing these processes.
6 | Biomedical knowledge graphs have often been constructed by integrating databases that were populated by experts via manual curation, but we are now seeing a more robust use of automated systems.
7 | A number of techniques are used to represent knowledge graphs, but often machine learning methods are used to construct a low-dimensional representation that can support many different applications.
8 | This representation is designed to preserve a knowledge graph’s local and/or global structure.
9 | Additional machine learning methods can be applied to this representation to make predictions within genomic, pharmaceutical, and clinical domains.
10 | We frame our discussion first around knowledge graph construction and then around unifying representational learning techniques and unifying applications.
11 | Advances in machine learning for biomedicine are creating new opportunities across many domains, and we note potential avenues for future work with knowledge graphs that appear particularly promising.
12 |
--------------------------------------------------------------------------------
/content/02.introduction.md:
--------------------------------------------------------------------------------
1 | ## Introduction
2 |
3 | Graphs are practical resources for many real-world applications.
4 | They have been used in social network mining to classify nodes [@doi:10.1007/978-1-4419-8462-3_5] and create recommendation systems [@doi:10.1145/3184558.3186904].
5 | They have also been used in natural language processing to interpret simple questions and use relational information to provide answers [@arxiv:1404.4326; @doi:10.1145/3038912.3052675].
6 | In a biomedical setting, graphs have been used to prioritize genes relevant to disease [@pmid:21121028; @doi:10.1101/651000; @doi:10.1109/CIBCB.2019.8791472; @doi:10.1093/bioinformatics/bty559], perform drug repurposing [@doi:10.7554/eLife.26726] and identify drug-target interactions [@doi:10.1371/journal.pcbi.1002574].
7 |
8 | Within a biomedical setting, some graphs can be considered knowledge graphs; although, precisely defining a knowledge graph is difficult because there are multiple conflicting definitions [@raw:KG_DEF].
9 | For this review, we define a biomedical knowledge graph as the following: a resource that integrates one or more expert-derived sources of information into a graph where nodes represent biomedical entities and edges represent relationships between two entities.
10 | This definition is consistent with other definitions found in the literature [@doi:10.3233/SW-160218;@doi:10.1109/MIC.2019.2928449;@doi:10.1016/j.eswa.2019.112948;@doi:10.1109/MSN.2016.030;@doi:10.1109/jproc.2015.2483592;@doi:10.1145/1242572.1242667;@doi:10.1109/TKDE.2017.2754499].
11 | Often relationships are considered unidirectional (e.g., a compound treats a disease, but a disease cannot treat a compound); however, there are cases where relationships can be considered bidirectional (e.g., a compound resembles another compound, or a gene interacts with another gene).
12 | A subset of graphs that meet our definition of a knowledge graph would be unsuitable for applications such as symbolic reasoning [@doi:10.1007/978-0-585-29599-2_11]; however, we chose a more liberal definition because it has been demonstrated that these broadly defined graphs have numerous uses throughout the literature.
13 | For example, Hetionet (Figure {@fig:hetionet_schema}) [@doi:10.7554/eLife.26726] would be considered a biomedical knowledge graph by this definition, and it has been used to identify drug repurposing opportunities [@doi:10.7554/eLife.26726].
14 | We do not consider databases like DISEASES [@doi:10.1016/j.ymeth.2014.11.020] and DrugBank [@doi:10.1093/nar/gkx1037] to be knowledge graphs.
15 | Although these resources contain essential information, they do not represent their data in the form of a graph.
16 |
17 | Biomedical knowledge graphs are often constructed from manually curated databases [@doi:10.7554/eLife.26726; @doi:10.1371/journal.pcbi.1002574; @doi:10.1038/s41467-017-00680-8; @doi:10.1186/s12859-016-1336-7; @doi:10.1016/j.jbi.2011.11.017].
18 | These databases provide previously established information that can be incorporated into a graph.
19 | For example, a graph using DISEASES [@doi:10.1016/j.ymeth.2014.11.020] as a resource would have genes and diseases as nodes, while edges added between nodes would represent an association between a gene and a disease.
20 | This example shows a single type of relationship; however, there are graphs that use databases with multiple relationships [@doi:10.7554/eLife.26726; @doi:10.1016/j.jbi.2008.03.004].
21 | In addition to manual curation, other approaches have used natural language processing techniques to construct knowledge graphs [@doi:10.1186/s12859-015-0549-5; @doi:10.1007/s10115-019-01351-4].
22 | One example used a text mining system to extract sentences that illustrate a protein's interaction with another protein [@doi:10.1016/j.knosys.2018.11.020].
23 | Once identified, these sentences can be incorporated as evidence to establish an edge in a knowledge graph.
24 |
25 | In this review we describe various approaches for constructing and applying knowledge graphs in a biomedical setting.
26 | We discuss the pros and cons of constructing a knowledge graph via manually curated databases and via text mining systems.
27 | We also compare assorted approaches for applying knowledge graphs to solve biomedical problems.
28 | Lastly, we conclude on the practicality of knowledge graphs and point out future applications that have yet to be explored.
29 |
30 | ![
31 | The metagraph (i.e., schema) of the knowledge graph used in the Rephetio project [@doi:10.7554/eLife.26726].
32 | The authors of this project refer to their resource as a heterogenous network (i.e., hetnet), and this network meets our definition of a knowledge graph.
33 | This resource depicts pharmacological and biomedical information in the form of nodes and edges.
34 | The nodes (circles) represent entities and edges (lines) represent relationships that are shared between two entities.
35 | The majority of edges in this metagraph are depicted as unidirectional, but some relationships can be considered bidirectional.
36 | ](https://raw.githubusercontent.com/hetio/het.io/e1ca4fd591e0aa01a3767bbf5597a910528f6f86/about/metagraph.png){#fig:hetionet_schema}
37 |
--------------------------------------------------------------------------------
/content/05.conclusion.md:
--------------------------------------------------------------------------------
1 | ## Conclusion
2 |
3 | Knowledge graphs are becoming widely used in biomedicine, and we expect their use to continue to grow.
4 | At the moment, most are constructed from databases derived from manual curation or from co-occurrences in text.
5 | We expect that machine learning approaches will play a key role in quickly deriving new findings from these graphs.
6 | Representing these knowledge graphs in a low dimensional space that captures a graph's local and global structure can enable many downstream machine learning analyses, and methods to capture this structure are an active area of research.
7 |
8 | As with any field, rigorous evaluation that can identify key factors that drive success is critical to moving the field forward.
9 | In regard to knowledge graphs, evaluation remains difficult.
10 | Experiments in this context require a significant amount of time and consequently resources [@doi:10.1093/jamia/ocy117; @doi:10.1155/2017/2498957].
11 | Moving from open ended and uncontrolled evaluations that consist of describing findings that are consistent with the literature to blinded evaluations of the literature that corroborate predictions and non-predictions would be a valuable first step.
12 | There are also well-documented biases related to node degree and degree distribution that must be considered for accurate evaluation [@url:https://greenelab.github.io/xswap-manuscript/].
13 | Furthermore, the diversity of applications hinders the development of a standardized set of expected evaluations.
14 |
15 | We anticipate that a fruitful avenue of research will be techniques that can produce low dimensional representations of knowledge graphs which distinguish between multiple node and edge types.
16 | There are many different sources of bias that lead to spurious edges or incompleteness, and modeling these biases may support better representations of knowledge graphs.
17 | It is a promising time for research into the construction and application of knowledge graphs.
18 | The peer reviewed literature is growing at an increasing rate and maintaining a complete understanding is becoming increasingly challenging for scientists.
19 | One path that scientists can take to maintain awareness is to become hyper-focused on specific areas of knowledge graph literature.
20 | If advances in how these graphs are constructed, represented and applied can enable the linking of fields, we may be able to savor the benefits of this detailed knowledge without losing the broader contextual links.
21 |
--------------------------------------------------------------------------------
/content/90.back-matter.md:
--------------------------------------------------------------------------------
1 | ## References {.page_break_before}
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/content/images/figures/constituency_parse_tree_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/greenelab/knowledge-graph-review/a09e874f90328073f260b072c47889648046d0f7/content/images/figures/constituency_parse_tree_example.png
--------------------------------------------------------------------------------
/content/images/figures/dependency_parse_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/greenelab/knowledge-graph-review/a09e874f90328073f260b072c47889648046d0f7/content/images/figures/dependency_parse_example.png
--------------------------------------------------------------------------------
/content/images/figures/hetionet_metagraph_subgraph.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/content/images/figures/unifying_applications_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/greenelab/knowledge-graph-review/a09e874f90328073f260b072c47889648046d0f7/content/images/figures/unifying_applications_overview.png
--------------------------------------------------------------------------------
/content/images/figures/unifying_applications_overview.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/content/images/figures/unifying_techniques_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/greenelab/knowledge-graph-review/a09e874f90328073f260b072c47889648046d0f7/content/images/figures/unifying_techniques_overview.png
--------------------------------------------------------------------------------
/content/images/figures/unifying_techniques_overview.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/content/images/github.svg:
--------------------------------------------------------------------------------
1 |
2 |
5 |
--------------------------------------------------------------------------------
/content/images/orcid.svg:
--------------------------------------------------------------------------------
1 |
2 |
5 |
--------------------------------------------------------------------------------
/content/images/twitter.svg:
--------------------------------------------------------------------------------
1 |
2 |
5 |
--------------------------------------------------------------------------------
/content/manual-references.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "id": "url:https://github.com/manubot/rootstock",
4 | "type": "webpage",
5 | "URL": "https://github.com/manubot/rootstock",
6 | "title": "manubot/rootstock GitHub repository",
7 | "container-title": "GitHub",
8 | "issued": {
9 | "date-parts": [
10 | [
11 | 2019
12 | ]
13 | ]
14 | },
15 | "author": [
16 | {
17 | "given": "Daniel",
18 | "family": "Himmelstein"
19 | }
20 | ]
21 | },
22 | {
23 | "id": "eisenbach2006phpsyntaxtree",
24 | "author": [
25 | {
26 | "family": "Eisenbach",
27 | "given": "A"
28 | },
29 | {
30 | "family": "Eisenbach",
31 | "given": "M"
32 | }
33 | ],
34 | "issued": {
35 | "date-parts": [
36 | [
37 | 2006
38 | ]
39 | ]
40 | },
41 | "title": "PhpSyntaxTree tool",
42 | "type": "no-type"
43 | },
44 | {
45 | "id": "honnibal2017spacy",
46 | "author": [
47 | {
48 | "family": "Honnibal",
49 | "given": "Matthew"
50 | },
51 | {
52 | "family": "Montani",
53 | "given": "Ines"
54 | }
55 | ],
56 | "container-title": "To appear",
57 | "issue": "1",
58 | "issued": {
59 | "date-parts": [
60 | [
61 | 2017
62 | ]
63 | ]
64 | },
65 | "title": "Spacy 2: Natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing",
66 | "title-short": "Spacy 2",
67 | "type": "article-journal",
68 | "volume": "7"
69 | },
70 | {
71 | "author": [
72 | {
73 | "family": "Ehrlinger",
74 | "given": "Lisa"
75 | },
76 | {
77 | "family": "Wöß",
78 | "given": "Wolfram"
79 | }
80 | ],
81 | "container-title": "SEMANTiCS",
82 | "id": "raw:KG_DEF",
83 | "issued": {
84 | "date-parts": [
85 | [
86 | 2016
87 | ]
88 | ]
89 | },
90 | "title": "Towards a definition of knowledge graphs",
91 | "type": "paper-conference"
92 | },
93 | {
94 | "id": "raw:hanreplearn",
95 | "ISBN": "978-1-57735-770-4",
96 | "URL": "http://dl.acm.org/citation.cfm?id=3060832.3060837",
97 | "author": [
98 | {
99 | "family": "Han",
100 | "given": "Yufei"
101 | },
102 | {
103 | "family": "Shen",
104 | "given": "Yun"
105 | }
106 | ],
107 | "collection-title": "IJCAI’16",
108 | "container-title": "Proceedings of the twenty-fifth international joint conference on artificial intelligence",
109 | "issued": {
110 | "date-parts": [
111 | [
112 | 2016
113 | ]
114 | ]
115 | },
116 | "page": "1548-1554",
117 | "publisher": "AAAI Press",
118 | "publisher-place": "New York, New York, USA",
119 | "title": "Partially supervised graph embedding for positive unlabelled feature selection",
120 | "type": "paper-conference"
121 | },
122 | {
123 | "id": "raw:gongreplearn",
124 | "URL": "http://dl.acm.org/citation.cfm?id=2892753.2892809",
125 | "author": [
126 | {
127 | "family": "Gong",
128 | "given": "Chen"
129 | },
130 | {
131 | "family": "Tao",
132 | "given": "Dacheng"
133 | },
134 | {
135 | "family": "Yang",
136 | "given": "Jie"
137 | },
138 | {
139 | "family": "Fu",
140 | "given": "Keren"
141 | }
142 | ],
143 | "collection-title": "AAAI’14",
144 | "container-title": "Proceedings of the twenty-eighth aaai conference on artificial intelligence",
145 | "issued": {
146 | "date-parts": [
147 | [
148 | 2014
149 | ]
150 | ]
151 | },
152 | "page": "1847-1853",
153 | "publisher": "AAAI Press",
154 | "publisher-place": "Qu&\\#233;bec City, Qu&\\#233;bec, Canada",
155 | "title": "Signed laplacian embedding for supervised dimension reduction",
156 | "type": "paper-conference"
157 | },
158 | {
159 | "id": "raw:biocreative/chemprot",
160 | "URL": "https://www.semanticscholar.org/paper/Overview-of-the-BioCreative-VI-chemical-protein-Krallinger-Rabal/eed781f498b563df5a9e8a241c67d63dd1d92ad5",
161 | "author": [
162 | {
163 | "family": "Krallinger",
164 | "given": "Martin"
165 | },
166 | {
167 | "family": "Rabal",
168 | "given": "Obdulia"
169 | },
170 | {
171 | "family": "Akhondi",
172 | "given": "Saber A"
173 | },
174 | {
175 | "literal": "others"
176 | }
177 | ],
178 | "container-title": "Proceedings of the sixth biocreative challenge evaluation workshop",
179 | "issued": {
180 | "date-parts": [
181 | [
182 | 2017
183 | ]
184 | ]
185 | },
186 | "page": "141-146",
187 | "title": "Overview of the biocreative vi chemical-protein interaction track",
188 | "type": "paper-conference",
189 | "volume": "1"
190 | },
191 | {
192 | "id": "raw:LLL",
193 | "author": [
194 | {
195 | "family": "Nédellec",
196 | "given": "C."
197 | }
198 | ],
199 | "container-title": "Proceedings of the learning language in logic 2005 workshop at the international conference on machine learning",
200 | "issued": {
201 | "date-parts": [
202 | [
203 | 2005
204 | ]
205 | ]
206 | },
207 | "title": "Learning language in logic - genic interaction extraction challenge",
208 | "type": "paper-conference"
209 | },
210 | {
211 | "id": "raw:IEPA",
212 | "URL": "http://helix-web.stanford.edu/psb02/ding.pdf",
213 | "author": [
214 | {
215 | "family": "Ding",
216 | "given": "Jing"
217 | },
218 | {
219 | "family": "Berleant",
220 | "given": "Daniel"
221 | },
222 | {
223 | "family": "Nettleton",
224 | "given": "Dan"
225 | },
226 | {
227 | "family": "Wurtele",
228 | "given": "Eve Syrkin"
229 | }
230 | ],
231 | "container-title": "Pacific symposium on biocomputing",
232 | "issued": {
233 | "date-parts": [
234 | [
235 | 2002
236 | ]
237 | ]
238 | },
239 | "keyword": "CAT CAT-REL-COOR CAT-REL-granularity levels",
240 | "page": "326-337",
241 | "title": "Mining medline: Abstracts, sentences, or phrases?",
242 | "title-short": "Mining medline",
243 | "type": "paper-conference"
244 | },
245 | {
246 | "id": "raw:GoodfellowDL",
247 | "ISBN": "0262035618, 9780262035613",
248 | "author": [
249 | {
250 | "family": "Goodfellow",
251 | "given": "Ian"
252 | },
253 | {
254 | "family": "Bengio",
255 | "given": "Yoshua"
256 | },
257 | {
258 | "family": "Courville",
259 | "given": "Aaron"
260 | }
261 | ],
262 | "issued": {
263 | "date-parts": [
264 | [
265 | 2016
266 | ]
267 | ]
268 | },
269 | "publisher": "The MIT Press",
270 | "title": "Deep learning",
271 | "type": "book"
272 | },
273 | {
274 | "id": "Thomas2011LearningTE",
275 | "author": [
276 | {
277 | "family": "Thomas",
278 | "given": "Philippe"
279 | },
280 | {
281 | "family": "Solt",
282 | "given": "Illés"
283 | },
284 | {
285 | "family": "Klinger",
286 | "given": "Roman"
287 | },
288 | {
289 | "family": "Leser",
290 | "given": "Ulf"
291 | }
292 | ],
293 | "issued": {
294 | "date-parts": [
295 | [
296 | 2011,
297 | 1
298 | ]
299 | ]
300 | },
301 | "title": "Learning protein protein interaction extraction using distant supervision",
302 | "type": "paper-conference"
303 | },
304 | {
305 | "id": "Jiezhon2018",
306 | "DOI": "10.1145/3159652.3159706",
307 | "ISBN": "9781450355810",
308 | "URL": "https://doi.org/10.1145/3159652.3159706",
309 | "author": [
310 | {
311 | "family": "Qiu",
312 | "given": "Jiezhong"
313 | },
314 | {
315 | "family": "Dong",
316 | "given": "Yuxiao"
317 | },
318 | {
319 | "family": "Ma",
320 | "given": "Hao"
321 | },
322 | {
323 | "family": "Li",
324 | "given": "Jian"
325 | },
326 | {
327 | "family": "Wang",
328 | "given": "Kuansan"
329 | },
330 | {
331 | "family": "Tang",
332 | "given": "Jie"
333 | }
334 | ],
335 | "collection-title": "WSDM ’18",
336 | "container-title": "Proceedings of the eleventh acm international conference on web search and data mining",
337 | "issued": {
338 | "date-parts": [
339 | [
340 | 2018
341 | ]
342 | ]
343 | },
344 | "keyword": "matrix factorization, network embedding, representation learning, social network, graph spectral",
345 | "page": "459-467",
346 | "publisher": "Association for Computing Machinery",
347 | "publisher-place": "New York, NY, USA",
348 | "title": "Network embedding as matrix factorization: Unifying deepwalk, line, pte, and node2vec",
349 | "title-short": "Network embedding as matrix factorization"
350 | },
351 | {
352 | "author": [
353 | {
354 | "family": "Bordes",
355 | "given": "Antoine"
356 | },
357 | {
358 | "family": "Usunier",
359 | "given": "Nicolas"
360 | },
361 | {
362 | "family": "García-Durán",
363 | "given": "Alberto"
364 | },
365 | {
366 | "family": "Weston",
367 | "given": "Jason"
368 | },
369 | {
370 | "family": "Yakhnenko",
371 | "given": "Oksana"
372 | }
373 | ],
374 | "container-title": "NIPS",
375 | "id": "raw:bordestranse",
376 | "issued": {
377 | "date-parts": [
378 | [
379 | 2013
380 | ]
381 | ]
382 | },
383 | "title": "Translating embeddings for modeling multi-relational data",
384 | "type": "paper-conference"
385 | },
386 | {
387 | "id": "raw:wangtransH",
388 | "URL": "http://dl.acm.org/citation.cfm?id=2893873.2894046",
389 | "author": [
390 | {
391 | "family": "Wang",
392 | "given": "Zhen"
393 | },
394 | {
395 | "family": "Zhang",
396 | "given": "Jianwen"
397 | },
398 | {
399 | "family": "Feng",
400 | "given": "Jianlin"
401 | },
402 | {
403 | "family": "Chen",
404 | "given": "Zheng"
405 | }
406 | ],
407 | "collection-title": "AAAI’14",
408 | "container-title": "Proceedings of the twenty-eighth aaai conference on artificial intelligence",
409 | "issued": {
410 | "date-parts": [
411 | [
412 | 2014
413 | ]
414 | ]
415 | },
416 | "page": "1112-1119",
417 | "publisher": "AAAI Press",
418 | "publisher-place": "Qu&\\#233;bec City, Qu&\\#233;bec, Canada",
419 | "title": "Knowledge graph embedding by translating on hyperplanes",
420 | "type": "paper-conference"
421 | },
422 | {
423 | "id": "raw:lintransR",
424 | "ISBN": "0-262-51129-0",
425 | "URL": "http://dl.acm.org/citation.cfm?id=2886521.2886624",
426 | "author": [
427 | {
428 | "family": "Lin",
429 | "given": "Yankai"
430 | },
431 | {
432 | "family": "Liu",
433 | "given": "Zhiyuan"
434 | },
435 | {
436 | "family": "Sun",
437 | "given": "Maosong"
438 | },
439 | {
440 | "family": "Liu",
441 | "given": "Yang"
442 | },
443 | {
444 | "family": "Zhu",
445 | "given": "Xuan"
446 | }
447 | ],
448 | "collection-title": "AAAI’15",
449 | "container-title": "Proceedings of the twenty-ninth aaai conference on artificial intelligence",
450 | "issued": {
451 | "date-parts": [
452 | [
453 | 2015
454 | ]
455 | ]
456 | },
457 | "page": "2181-2187",
458 | "publisher": "AAAI Press",
459 | "publisher-place": "Austin, Texas",
460 | "title": "Learning entity and relation embeddings for knowledge graph completion"
461 | },
462 | {
463 | "id": "Baldi2011",
464 | "author": [
465 | {
466 | "family": "Baldi",
467 | "given": "Pierre"
468 | }
469 | ],
470 | "collection-title": "UTLW’11",
471 | "container-title": "Proceedings of the 2011 international conference on unsupervised and transfer learning workshop - volume 27",
472 | "issued": {
473 | "date-parts": [
474 | [
475 | 2011
476 | ]
477 | ]
478 | },
479 | "keyword": "hebbian learning, unsupervised learning, complexity, principal component analysis, clustering, deep architectures, compression, boolean, information theory, autoencoders",
480 | "page": "37-50",
481 | "publisher": "JMLR.org",
482 | "title": "Autoencoders, unsupervised learning and deep architectures",
483 | "type": "paper-conference"
484 | }
485 | ]
486 |
--------------------------------------------------------------------------------
/content/metadata.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Constructing Knowledge Graphs and Their Biomedical Applications"
3 | keywords:
4 | - knowledge-graphs
5 | - network-embeddings
6 | - text-mining
7 | - natural-language-processing
8 | - deep-learning
9 | - machine-learning
10 | - literature-review
11 | lang: en-US
12 | author_info:
13 | -
14 | github: danich1
15 | name: David N. Nicholson
16 | initials: DNN
17 | orcid: 0000-0003-0002-5761
18 | twitter: null
19 | email: dnicholson329@gmail.com
20 | affiliations:
21 | - Department of Systems Pharmacology and Translational Therapeutics, University of Pennsylvania
22 | funders:
23 | - The Gordon and Betty Moore Foundation (GBMF4552)
24 | - The National Institutes of Health (T32 HG000046)
25 | contributions:
26 | - Conceptualization
27 | - Funding Acquisition
28 | - Investigation
29 | - Writing - Original Draft
30 | - Visualization
31 | -
32 | github: cgreene
33 | name: Casey S. Greene
34 | initials: CSG
35 | orcid: 0000-0001-8713-9213
36 | twitter: greenescientist
37 | email: greenescientist@gmail.com
38 | affiliations:
39 | - Department of Systems Pharmacology and Translational Therapeutics, Perelman School of Medicine, University of Pennsylvania
40 | - Childhood Cancer Data Lab, Alex's Lemonade Stand Foundation
41 | funders:
42 | - The Gordon and Betty Moore Foundation (GBMF4552)
43 | - The National Institutes of Health (R01 HG010067)
44 | contributions:
45 | - Conceptualization
46 | - Funding Acquisition
47 | - Supervision
48 | - Writing - Review & Editing
49 |
--------------------------------------------------------------------------------
/content/response-to-reviewers.md:
--------------------------------------------------------------------------------
1 | # Response to Reviwers
2 |
3 | ## Reviewer 1
4 |
5 | >The authors review how knowledge graphs can be applied in the context of biomedical applications. I found the review to be well-written, highly organized and informative. I have no substantial suggestions and recommend acceptance.
6 |
7 | **Thank you. We appreciate the positive feedback on our manuscript.**
8 |
9 | >Minor suggestions which can be fixed by authors in the editorial process:
10 |
11 | > - Table 1: What is the organizing principle behind the order of the databases? Would it be better to order them alphabetically?
12 | > - "or though the use"?
13 | > - "into a subphrases"?
14 | > - Table 3: Maybe define or spell out the Types of Sentences.
15 | > - "This values within"?
16 | > - "uses a matrices"?
17 | > - "other effots"?
18 | > - "then determines how"?
19 | > - Figure 5b: Font too small
20 | > - "goal of recommend safe drugs"
21 |
22 | **We would like to thank you for noticing these typos. We have fixed each of these during our revision process.**
23 |
24 |
25 | ## Reviewer 2
26 |
27 | >The manuscript attempts to review methods for knowledge graphs and their biomedical applications.
28 | >The review is too general.
29 | >It tries to combine disparate lines of research ranging from dimensionality reduction, manifold learning, to graph community detection and frame it under knowledge graph learning.
30 | >Although no established definition of knowledge graph exists, an essential property of a knowledge graph is that it models relations between nodes, which don't apply to dimensionality methods (SVD,PCA,,etc.) and other methods such as node2vec.
31 | >Almost all of the discussions in this manuscript deals with methods that don't model relations.
32 | >The only part that correctly refers to a class knowledge graph methods is translational distance methods.
33 |
34 | **We agree with the reviewer that there is not a single, well-established definition of a knowledge graph in the literature.
35 | We identified a series of substantial revisions that we needed to make to the manuscript for clarity.
36 | We have made revisions to clearly specify a definition and refer to consistent definitions in the prior literature.
37 | For the purposes of this review, we defined a knowledge graph as:**
38 |
39 | > a resource that integrates one or more expert-derived sources of information into a graph where nodes represent biomedical entities and edges represent relationships between two entities.
40 |
41 | **This is a broader definition than some.
42 | For example, knowledge graphs that aren't suitable for symbolic reasoning would qualify under this definition.
43 | However, selecting this broader definition allows us to discuss a growing set of methods (including SVD, PCA, node2vec, and other strategies) for producing reduced representations that are then used for prediction.
44 | We took the reviewer's concerns to heart and now, throughout the manuscript, we clearly specify when transformations produce a representation that is no longer a knowledge graph (e.g., the SVD decomposition or node2vec embeddings) for the purposes of an application.**
45 |
46 | >The following are some comments:
47 |
48 | >What specifically do you mean by techniques that represent KGs and machine learning methods that are used to learn low-dimensional vectors?
49 |
50 | **For "techniques that represent knowledge graphs in a low dimensional space", we include methods such as node2vec and translational distance models that associate nodes and possibly edges to dense vectors.
51 | For these techniques, the idea is that these vectors capture the predominant features of a knowledge graph.
52 | This representation can make it easier to apply machine learning and other methods to perform tasks such as link prediction for biomedical applications.
53 | We now clearly specify that these reduced representations are not, themselves, knowledge graphs.**
54 |
55 | >at least references 1, 2, 5,6 and 2 are concerned with methods for graphs not necessarily knowledge graphs, in which the edge label (i.e., relation is essential to its definition).
56 |
57 | **We agree that not all of these references were about biomedical knowledge graphs.
58 | We have now clarified the first paragraph to discuss _graphs_, before diving into our definition of biomedical knowledge graphs in the second paragraph.**
59 |
60 | >Figure 1 doesn't show the relationship direction, For example, "causes", "binds" and other relations don't clearly specify the source and destination nodes which can be confusing.
61 | >Ideally, a knowledge graph should show that.
62 | >Please have a look at some knowledge graphs reviews in literature.
63 | >For example, https://arxiv.org/pdf/1503.00759.pdf
64 |
65 | **We appreciate the reviewer noticing this.
66 | We updated our figure to reflect edge directionality and updated the text to discuss edge directionality.**
67 |
68 | ```diff
69 | - A metagraph (schema) of the heterogeneous network used in the Rephetio project [..].
70 | - This undirected network depicts pharmacological and biomedical information.
71 | - The nodes (circles) represent entities and edges (lines) depict relational information between two entities.
72 |
73 | + The metagraph (i.e., schema) of the knowledge graph used in the Rephetio project [..].
74 | + The authors of this project refer to their resource as a heterogenous network (i.e., hetnet), and this network meets our definition of a knowledge graph.
75 | + This resources depicts pharmacological and biomedical information in the form of nodes and edges.
76 | + The nodes (circles) represent entities and edges (lines) represent relationships that are shared between two entities.
77 | + The majority of edges in this metagraph are depicted as unidirectional, but some relationships can be considered bidirectional.
78 | ```
79 |
80 | >"relatively precise data, but in low quantifies"?
81 |
82 | **We like to thank the reviewer for pointing out this typo.
83 | As we were fixing this typo, we realized that the sentence itself was unclear and could be improved.
84 | We updated the text to clarify the point that we intended to make.**
85 |
86 | ```diff
87 | - Studies have shown that databases constructed in this fashion contain relatively precise data, but in low quantifies
88 | + Studies have shown that databases constructed in this fashion contain relatively precise data but the recall is low
89 | ```
90 |
91 | >what you refer to as "unifying techniques" is relational learning, I don't see why you refer to it in such an ambiguous way.
92 | >Furthermore, grouping the techniques into three is so broad and doesn't correctly represent knowledge graphs methods.
93 | >For example, matrix factorization and deep learning.
94 | >Matrix factorizations such as isomap, PCA, SVD and others are not knowledge graph representation techniques, but dimensionality reduction techniques.
95 |
96 | **We grouped techniques based on the methods that underlie them.
97 | We agree that the title for this section was overly broad.
98 | We have changed our title to refer to these as representational learning techniques, as this is a term used in the machine learning methods literature for techniques that aim to capture a low-dimensional representation with certain properties.
99 | Though the output of these methods is not, in and of itself, a knowledge graph, we expect that these methods can be applied to knowledge graphs and have updated text to be more explicit on this point.**
100 |
101 | ```diff
102 | - In the next sections we review the unifying techniques that construct this low dimensional space and unifying applications that use this space to solve biomedical problems.
103 |
104 | - Unifying Techniques
105 |
106 | + In the following sections we review methods that construct this low dimensional space (Unifying Representational Learning Techniques) and discuss applications that use this space to solve biomedical problems (Unifying Applications).
107 |
108 | + Unifying Representational Learning Techniques
109 | ```
110 |
111 | > More importantly, you don't show how they can be applied or used in the context of knowledge graphs as claimed?
112 |
113 | **We agree that the text wasn't explicit enough on how knowledge graphs are applied as input and have updated our text to fix this issue.**
114 |
115 | ```diff
116 | - In practice $U$ is usually used to represent nodes in a knowledge graph, but $V^{T}$ can also be used [..;..].
117 |
118 | +In practice $U$ is usually used to represent nodes in a knowledge graph and can be used as input for machine learning classifiers to perform tasks such as link prediction or node classification [@doi:10.1093/bioinformatics/btz718]; however,$V^{T}$ can also be used [..;..].
119 | ```
120 |
121 | ```diff
122 | - Common approaches involve using SVD, Laplacian eigenmaps or variants of the two to construct low dimensional representations.
123 |
124 | + Common approaches involve using SVD, Laplacian eigenmaps or variants of the two to decompose matrices into smaller rectangular forms.
125 | + Regarding knowledge graphs, the adjacency matrix ($A$) is the typical matrix that gets decomposed, but the laplacian matrix ($L=D-A$) can be used as well.
126 | ```
127 |
128 | >word2vec is a shallow neural network (one layer for projection, no activation or non-linearity is used in this layer), therefore it is deep learning, nor any of the methods which use similar techniques.
129 |
130 | **We agree that word2vec, node2vec and similar methods use a shallow neural network, which means these models aren't considered "deep learning".
131 | We updated the title to be "Neural networks" and replaced instances of deep learning with neural networks.**
132 |
133 | ```diff
134 | - Deep Learning
135 |
136 | - Deep learning is a paradigm that uses multiple non-linear transformations to map high dimensional data into a low dimensional space.
137 | - Many techniques that use deep learning on knowledge graphs are based on word2vec [..;..], a set of approaches that are widely used for natural language processing.
138 |
139 | + Neural Networks
140 |
141 | + Neural networks are a class of machine learning models inspired by the concept of biological neural networks [..].
142 | + These networks are reputable for making non-linear transformations of high dimensional data to solve classification and regression problems [..].
143 |
144 | + In the context of knowledge graphs, the most commonly used structures are based on word2vec [..;..].
145 | ```
146 |
--------------------------------------------------------------------------------
/output/README.md:
--------------------------------------------------------------------------------
1 | # Generated citation / reference files
2 |
3 | The `output` branch contains files automatically generated by the manuscript build process.
4 | It consists of the contents of the `output` directory of the `master` branch.
5 | These files are not tracked in `master`, but instead written to the `output` branch by continuous integration builds.
6 |
7 | ## Files
8 |
9 | This directory contains the following files:
10 |
11 | + [`citations.tsv`](citations.tsv) is a table of citations extracted from the manuscript and the corresponding standard citations and citation IDs.
12 | + [`manuscript.md`](manuscript.md) is a markdown document of all manuscript sections, with citation strings replaced by citation IDs.
13 | + [`references.json`](references.json) is CSL-JSON file of bibliographic item metadata ([see specification](https://github.com/citation-style-language/schema/blob/master/csl-data.json)) for all references.
14 | + [`variables.json`](variables.json) contains variables that were passed to the jinja2 templater. These variables contain those automatically generated by the manubot as well as those provided by the user via the `--template-variables-path` option.
15 |
16 | Pandoc consumes `manuscript.md` and `references.json` to create the formatted manuscript, which is exported to `manuscript.html`, `manuscript.pdf`, and optionally `manuscript.docx`.
17 |
--------------------------------------------------------------------------------
/webpage/README.md:
--------------------------------------------------------------------------------
1 | # Output directory containing the formatted manuscript
2 |
3 | The [`gh-pages`](https://github.com/$REPO_SLUG/tree/gh-pages) branch hosts the contents of this directory at .
4 | The permalink for this webpage version is .
5 | To redirect to the permalink for the latest manuscript version at anytime, use the link .
6 |
7 | ## Files
8 |
9 | This directory contains the following files, which are mostly ignored on the `master` branch:
10 |
11 | + [`index.html`](index.html) is an HTML manuscript.
12 | + [`manuscript.pdf`](manuscript.pdf) is a PDF manuscript.
13 |
14 | The `v` directory contains directories for each manuscript version.
15 | In general, a version is identified by the commit hash of the source content that created it.
16 |
17 | ### Timestamps
18 |
19 | The `*.ots` files in version directories are OpenTimestamps which can be used to verify manuscript existence at or before a given time.
20 | [OpenTimestamps](https://opentimestamps.org/) uses the Bitcoin blockchain to attest to file hash existence.
21 | The `deploy.sh` script run during continuous deployment creates the `.ots` files through its `manubot webpage` call.
22 | There is a delay before timestamps get confirmed by a Bitcoin block.
23 | Therefore, `.ots` files are initially incomplete and should be upgraded at a later time, so that they no longer rely on the availability of a calendar server to verify.
24 | The `manubot webpage` call during continuous deployment identifies files matched by `webpage/v/**/*.ots` and attempts to upgrade them.
25 | You can also manually upgrade timestamps, by running the following in the `gh-pages` branch:
26 |
27 | ```shell
28 | ots upgrade v/*/*.ots
29 | rm v/*/*.ots.bak
30 | git add v/*/*.ots
31 | ```
32 |
33 | Verifying timestamps with the `ots verify` command requires running a local bitcoin node with JSON-RPC configured, at this time.
34 |
35 | ## Source
36 |
37 | The manuscripts in this directory were built from
38 | [`$COMMIT`](https://github.com/$REPO_SLUG/commit/$COMMIT).
39 |
--------------------------------------------------------------------------------
/webpage/images:
--------------------------------------------------------------------------------
1 | v/latest/images
--------------------------------------------------------------------------------
/webpage/index.html:
--------------------------------------------------------------------------------
1 | v/latest/index.html
--------------------------------------------------------------------------------
/webpage/manuscript.pdf:
--------------------------------------------------------------------------------
1 | v/latest/manuscript.pdf
--------------------------------------------------------------------------------