├── .classpath
├── .gitignore
├── .gitmodules
├── .project
├── .settings
└── org.eclipse.jdt.core.prefs
├── CONDUCT.md
├── Dockerfile
├── README
├── TODO.rst
├── build.xml
├── dist
└── pdfmark.jar
├── lib
├── commons-logging-1.1.1.jar
├── commons-logging-adapters-1.1.1.jar
├── commons-logging-api-1.1.1.jar
├── httpclient-4.0.jar
├── httpcore-4.0.1.jar
├── httpcore-nio-4.0.1.jar
├── httpmime-4.0.jar
├── itextpdf-5.1.3.jar
├── jargs.jar
├── junit-4.8.jar
└── pdfbox-1.6.0.jar
├── pdfmark.jar
├── script
├── run-bin
└── run-test
├── src
└── org
│ └── crossref
│ └── pdfmark
│ ├── AnyXmpSchema.java
│ ├── ApiKey.java
│ ├── DumperMain.java
│ ├── FileInfo.java
│ ├── Main.java
│ ├── MarkBuilder.java
│ ├── MetadataGrabber.java
│ ├── PdfxSchema.java
│ ├── SchemaSet.java
│ ├── XPathHelpers.java
│ ├── XmlUtils.java
│ ├── XmpException.java
│ ├── XmpUtils.java
│ ├── prism
│ ├── Prism11Schema.java
│ └── Prism21Schema.java
│ ├── pub
│ └── Publisher.java
│ ├── test
│ ├── Main.java
│ ├── MarkBuilderTest.java
│ └── PdfInfoDirectory.java
│ └── unixref
│ ├── Book.java
│ ├── Dissertation.java
│ ├── Journal.java
│ ├── JournalArticle.java
│ ├── ReportPaper.java
│ ├── Standard.java
│ ├── Unixref.java
│ └── Work.java
└── test-data
├── nature-metadata-example.txt
├── pdfx-xmp-example.xmp
├── publisher-example.xml
├── random-xmp-example.xmp
├── test-pdf.pdf
├── unixref-example.json
└── unixref-example.xml
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bin
2 | output
3 | *.swp
4 | *~
5 | .DS_Store
6 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "test-data/extended"]
2 | path = test-data/extended
3 | url = git@labs.crossref.org:pdf-test-data.git
4 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | pdfmark
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 |
15 | org.eclipse.jdt.core.javanature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | #Mon Nov 23 14:41:40 GMT 2009
2 | eclipse.preferences.version=1
3 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
6 | org.eclipse.jdt.core.compiler.compliance=1.6
7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
12 | org.eclipse.jdt.core.compiler.source=1.6
13 |
--------------------------------------------------------------------------------
/CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Code of Conduct
2 |
3 | As contributors and maintainers of this project, we pledge to respect all people who
4 | contribute through reporting issues, posting feature requests, updating documentation,
5 | submitting pull requests or patches, and other activities.
6 |
7 | We are committed to making participation in this project a harassment-free experience for
8 | everyone, regardless of level of experience, gender, gender identity and expression,
9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
10 |
11 | Examples of unacceptable behavior by participants include the use of sexual language or
12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment,
13 | insults, or other unprofessional conduct.
14 |
15 | Project maintainers have the right and responsibility to remove, edit, or reject comments,
16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this
17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed
18 | from the project team.
19 |
20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by
21 | opening an issue or contacting one or more of the project maintainers.
22 |
23 | This Code of Conduct is adapted from the Contributor Covenant
24 | (http:contributor-covenant.org), version 1.0.0, available at
25 | http://contributor-covenant.org/version/1/0/0/
26 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM openjdk:8
2 | COPY . /usr/src/myapp
3 | WORKDIR /usr/src/myapp
4 | CMD ["/usr/bin/java","-jar","dist/pdfmark.jar","-d","10.5555/12345678","test-data/test-pdf.pdf","-o","/out"]
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | ## pdfmark
2 |
3 | *2018/01/15:* `pdfmark` is not working with Java 9. You must use 8 or below.
4 | =====================================================
5 |
6 | Extended test data (in the form of lots of PDFs)
7 | can be inserted as a git submodule. You will need
8 | git access on labs.crossref.org to access the
9 | PDF test data repository. If you do, just perform
10 | this command to put the data into your local
11 | repository:
12 |
13 | $ git submodule update --init
14 |
15 | =====================================================
16 | To run with Docker
17 |
18 | - `docker build -t pdfmark .`
19 | - `docker run -v /tmp:/out pdfmark`
20 |
21 | You should find a file named '/tmp/test-pdf_xmp.pdf'
22 |
23 | You can see the metadata that has been added by using the `exiftool` like this:
24 |
25 |
26 | `exiftool -a -G1 /tmp/test-pdf_xmp.pdf`
--------------------------------------------------------------------------------
/TODO.rst:
--------------------------------------------------------------------------------
1 | 10 minute items
2 | ==========================================================
3 |
4 | A few hours
5 | ==========================================================
6 |
7 | - Rewrite MarkBuilder as something more reusable.
8 | - Add file globbing.
9 | - Complete more unit tests.
10 | - Remove use of iText in favour of PDFBox (it does
11 | everything we need it to, while iText does a subset).
12 |
13 | Involved, hours or days
14 | ==========================================================
15 |
16 | - Expand MarkBuilder or whatever replaces it to
17 | support query response for non-article DOIs.
18 | - Expand unixref model to support non-article
19 | types.
20 | - Implement scanning of PDF documents.
--------------------------------------------------------------------------------
/build.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/dist/pdfmark.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/dist/pdfmark.jar
--------------------------------------------------------------------------------
/lib/commons-logging-1.1.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/commons-logging-1.1.1.jar
--------------------------------------------------------------------------------
/lib/commons-logging-adapters-1.1.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/commons-logging-adapters-1.1.1.jar
--------------------------------------------------------------------------------
/lib/commons-logging-api-1.1.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/commons-logging-api-1.1.1.jar
--------------------------------------------------------------------------------
/lib/httpclient-4.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/httpclient-4.0.jar
--------------------------------------------------------------------------------
/lib/httpcore-4.0.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/httpcore-4.0.1.jar
--------------------------------------------------------------------------------
/lib/httpcore-nio-4.0.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/httpcore-nio-4.0.1.jar
--------------------------------------------------------------------------------
/lib/httpmime-4.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/httpmime-4.0.jar
--------------------------------------------------------------------------------
/lib/itextpdf-5.1.3.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/itextpdf-5.1.3.jar
--------------------------------------------------------------------------------
/lib/jargs.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/jargs.jar
--------------------------------------------------------------------------------
/lib/junit-4.8.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/junit-4.8.jar
--------------------------------------------------------------------------------
/lib/pdfbox-1.6.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/pdfbox-1.6.0.jar
--------------------------------------------------------------------------------
/pdfmark.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/pdfmark.jar
--------------------------------------------------------------------------------
/script/run-bin:
--------------------------------------------------------------------------------
1 | PROJ_HOME=/Users/karl/Dropbox/Code/pdfmark
2 | LIB_DIR=$PROJ_HOME/lib
3 | BIN_DIR=$PROJ_HOME/bin
4 | LIB_LIST=$LIB_DIR/commons-logging-1.1.1.jar:$LIB_DIR/commons-logging-adapters-1.1.1.jar:$LIB_DIR/commons-logging-api-1.1.1.jar:$LIB_DIR/httpclient-4.0.jar:$LIB_DIR/httpcore-4.0.1.jar:$LIB_DIR/httpcore-nio-4.0.1.jar:$LIB_DIR/httpmime-4.0.jar:$LIB_DIR/iText-2.1.7.jar:$LIB_DIR/jargs.jar
5 |
6 | java -classpath $LIB_LIST:$BIN_DIR org.crossref.pdfmark.Main $@
7 |
--------------------------------------------------------------------------------
/script/run-test:
--------------------------------------------------------------------------------
1 | PROJ_HOME=/Users/karl/Dropbox/Code/pdfmark
2 | LIB_DIR=$PROJ_HOME/lib
3 | BIN_DIR=$PROJ_HOME/bin
4 | LIB_LIST=$LIB_DIR/commons-logging-1.1.1.jar:$LIB_DIR/commons-logging-adapters-1.1.1.jar:$LIB_DIR/commons-logging-api-1.1.1.jar:$LIB_DIR/httpclient-4.0.jar:$LIB_DIR/httpcore-4.0.1.jar:$LIB_DIR/httpcore-nio-4.0.1.jar:$LIB_DIR/httpmime-4.0.jar:$LIB_DIR/iText-2.1.7.jar:$LIB_DIR/jargs.jar:$LIB_DIR/junit-4.8.jar
5 |
6 | java -classpath $LIB_LIST:$BIN_DIR org.crossref.pdfmark.test.Main $@
7 |
--------------------------------------------------------------------------------
/src/org/crossref/pdfmark/AnyXmpSchema.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 CrossRef.org (email: support@crossref.org)
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation; either version 2 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program; if not, write to the Free Software
16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 | */
18 | package org.crossref.pdfmark;
19 |
20 | import com.itextpdf.text.xml.xmp.XmpSchema;
21 |
22 | public class AnyXmpSchema extends XmpSchema {
23 |
24 | public AnyXmpSchema(String nsPrefix, String nsUri) {
25 | super("xmlns:" + nsPrefix + "=\"" + nsUri + "\"");
26 | }
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/src/org/crossref/pdfmark/ApiKey.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 CrossRef.org (email: support@crossref.org)
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation; either version 2 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program; if not, write to the Free Software
16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 | */
18 | package org.crossref.pdfmark;
19 |
20 | public class ApiKey {
21 |
22 | /**
23 | * The default key used for DOI lookups. This should be changed to
24 | * your own API key, which can be obtained from
25 | * http://www.crossref.org/requestaccount/ .
26 | */
27 | public static final String DEFAULT = "kward@crossref.org";
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/src/org/crossref/pdfmark/DumperMain.java:
--------------------------------------------------------------------------------
1 | package org.crossref.pdfmark;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.ByteArrayInputStream;
5 | import java.io.File;
6 | import java.io.FileInputStream;
7 | import java.io.IOException;
8 | import java.io.InputStreamReader;
9 |
10 | import com.itextpdf.text.pdf.PdfReader;
11 |
12 | /**
13 | * Utility that dumps XMP data of a PDF to standard out.
14 | */
15 | public class DumperMain {
16 |
17 | public static void main(String[] args) {
18 | for (String filename : args) {
19 |
20 | File f = new File(filename);
21 | FileInputStream fileIn;
22 | PdfReader reader;
23 |
24 | try {
25 | fileIn = new FileInputStream(f);
26 | reader = new PdfReader(fileIn);
27 | byte[] merged = reader.getMetadata();
28 | ByteArrayInputStream bIn = new ByteArrayInputStream(merged);
29 | BufferedReader bR = new BufferedReader(new InputStreamReader(bIn));
30 | String line;
31 | while ((line = bR.readLine()) != null) {
32 | System.out.println(line);
33 | }
34 |
35 | reader.close();
36 | fileIn.close();
37 | } catch (IOException e) {
38 | System.err.println("Couldn't read file '" + filename + "'.");
39 | System.err.println(e);
40 | }
41 | }
42 | }
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/src/org/crossref/pdfmark/FileInfo.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 CrossRef.org (email: support@crossref.org)
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation; either version 2 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program; if not, write to the Free Software
16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 | */
18 | package org.crossref.pdfmark;
19 |
20 | import java.io.DataInputStream;
21 | import java.io.FileInputStream;
22 | import java.io.FileNotFoundException;
23 | import java.io.IOException;
24 |
25 | public class FileInfo {
26 | public String path;
27 | public byte[] data;
28 | public boolean missing;
29 | public IOException error;
30 |
31 | public static FileInfo readFileFully(String filePath) {
32 | FileInfo ni = new FileInfo();
33 | ni.path = filePath;
34 |
35 | try {
36 | FileInputStream fileIn = new FileInputStream(filePath);
37 | DataInputStream din = new DataInputStream(fileIn);
38 |
39 | byte[] buff = new byte[1024], xmpData = new byte[0];
40 | int read = 0;
41 | while ((read = din.read(buff, 0, buff.length)) > 0) {
42 | byte[] tmp = new byte[xmpData.length + read];
43 | System.arraycopy(xmpData, 0, tmp, 0, xmpData.length);
44 | System.arraycopy(buff, 0, tmp, xmpData.length, read);
45 | xmpData = tmp;
46 | }
47 |
48 | ni.data = xmpData;
49 |
50 | din.close();
51 | } catch (FileNotFoundException e) {
52 | ni.missing = true;
53 | ni.error = e;
54 | } catch (IOException e) {
55 | ni.error = e;
56 | }
57 |
58 | return ni;
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/src/org/crossref/pdfmark/Main.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 CrossRef.org (email: support@crossref.org)
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation; either version 2 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program; if not, write to the Free Software
16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 | */
18 | package org.crossref.pdfmark;
19 | import jargs.gnu.CmdLineParser;
20 |
21 | import java.io.File;
22 | import java.io.FileInputStream;
23 | import java.io.FileOutputStream;
24 | import java.io.IOException;
25 | import java.util.Map.Entry;
26 |
27 | import org.apache.pdfbox.cos.COSDictionary;
28 | import org.apache.pdfbox.cos.COSDocument;
29 | import org.apache.pdfbox.cos.COSName;
30 | import org.apache.pdfbox.cos.COSObject;
31 | import org.apache.pdfbox.exceptions.COSVisitorException;
32 | import org.apache.pdfbox.pdfparser.PDFParser;
33 | import org.apache.pdfbox.pdmodel.PDDocument;
34 | import org.apache.pdfbox.pdmodel.PDDocumentInformation;
35 |
36 | import com.itextpdf.text.DocumentException;
37 | import com.itextpdf.text.pdf.PdfReader;
38 | import com.itextpdf.text.pdf.PdfStamper;
39 | import com.itextpdf.text.pdf.PdfWriter;
40 | import com.itextpdf.text.xml.xmp.XmpSchema;
41 |
42 | import static jargs.gnu.CmdLineParser.Option;
43 |
44 | public class Main {
45 |
46 | private MetadataGrabber grabber;
47 |
48 | public static void printUsage() {
49 | System.err.println("Usage: pdfmark" +
50 | " [{-f, --force}]" +
51 | " [{-p, --xmp-file} xmp_file]" +
52 | " [{-o, --output-dir} output_dir] " +
53 | " [{-d, --doi} doi]" +
54 | " [--no-copyright]" +
55 | " [--rights-agent rights_agent_str]" +
56 | " [--api-key search_key]" +
57 | " pdf_files");
58 | }
59 |
60 | public static void printFutureUsage() {
61 | /* This will be correct once all features are implemented. */
62 | System.err.println("Usage: pdfmark" +
63 | " [{-f, --force}]" +
64 | " [{-p, --xmp-file} xmp_file]" +
65 | " [{-o, --output-dir} output_dir] " +
66 | " [{-d, --doi} doi]" +
67 | " [{-s, --search-for-doi]" +
68 | " [--no-copyright]" +
69 | " [--rights-agent rights_agent_str]" +
70 | " [--api-key search_key]" +
71 | " pdf_files");
72 | }
73 |
74 | public static void main(String[] args) {
75 | new Main(args);
76 | }
77 |
78 | private void shutDown() {
79 | grabber.shutDown();
80 | }
81 |
82 | public Main(String[] args) {
83 | if (args.length == 0) {
84 | printUsage();
85 | System.exit(2);
86 | }
87 |
88 | CmdLineParser parser = new CmdLineParser();
89 | Option provideXmpOp = parser.addStringOption('p', "xmp-file");
90 | Option overwriteOp = parser.addBooleanOption('f', "force");
91 | Option outputOp = parser.addStringOption('o', "output-dir");
92 | Option doiOp = parser.addStringOption('d', "doi");
93 | Option searchOp = parser.addBooleanOption('s', "search-for-doi");
94 | Option copyrightOp = parser.addBooleanOption("no-copyright");
95 | Option rightsOp = parser.addStringOption("rights-agent");
96 | Option apiKeyOp = parser.addStringOption("api-key");
97 |
98 | try {
99 | parser.parse(args);
100 | } catch (CmdLineParser.OptionException e) {
101 | printUsage();
102 | System.exit(2);
103 | }
104 |
105 | String optionalXmpPath = (String)
106 | parser.getOptionValue(provideXmpOp, "");
107 | String outputDir = (String)
108 | parser.getOptionValue(outputOp, "");
109 | String explicitDoi = (String)
110 | parser.getOptionValue(doiOp, "");
111 | boolean useTheForce = (Boolean)
112 | parser.getOptionValue(overwriteOp, Boolean.FALSE);
113 | boolean searchForDoi = (Boolean)
114 | parser.getOptionValue(searchOp, Boolean.FALSE);
115 | boolean noCopyright = (Boolean)
116 | parser.getOptionValue(copyrightOp, Boolean.FALSE);
117 | String rightsAgent = (String)
118 | parser.getOptionValue(rightsOp, "");
119 | String apiKey = (String)
120 | parser.getOptionValue(apiKeyOp, ApiKey.DEFAULT);
121 |
122 | if (!explicitDoi.equals("") && searchForDoi) {
123 | exitWithError(2, "-d and -s are mutually exclusive options.");
124 | }
125 |
126 | if (!outputDir.isEmpty() && !new File(outputDir).exists()) {
127 | exitWithError(2, "The output directory, '" + outputDir
128 | + "' does not exist.");
129 | }
130 |
131 | byte[] optionalXmpData = null;
132 |
133 | if (!optionalXmpPath.equals("")) {
134 | /* We will take XMP data from a file. */
135 | FileInfo xmpFile = FileInfo.readFileFully(optionalXmpPath);
136 | if (xmpFile.missing) {
137 | exitWithError(2, "Error: File '" + xmpFile.path
138 | + "' does not exist.");
139 | } else if (xmpFile.error != null) {
140 | exitWithError(2, "Error: Could not read '" + xmpFile.path
141 | + "' because of:\n" + xmpFile.error);
142 | }
143 |
144 | optionalXmpData = xmpFile.data;
145 | }
146 |
147 | grabber = new MetadataGrabber(apiKey);
148 |
149 | /* Now we're ready to merge our imported or generated XMP data with what
150 | * is already in each PDF. */
151 |
152 | for (String pdfFilePath : parser.getRemainingArgs()) {
153 | String outputPath = getOutFileName(pdfFilePath);
154 |
155 | /* Grab the leaf. */
156 | if (outputPath.contains(File.separator)) {
157 | String[] split = outputPath.split(File.separator);
158 | outputPath = split[split.length - 1];
159 | }
160 |
161 | if (!outputDir.isEmpty()) {
162 | outputPath = outputDir + File.separator + outputPath;
163 | } else {
164 | /* Output to the working directory. */
165 | }
166 |
167 | File pdfFile = new File(pdfFilePath);
168 | File outputFile = new File(outputPath);
169 |
170 | byte[] resolvedXmpData = null;
171 |
172 | if (!pdfFile.exists()) {
173 | exitWithError(2, "Error: File '" + pdfFilePath
174 | + "' does not exist.");
175 | }
176 |
177 | if (outputFile.exists() && !useTheForce) {
178 | exitWithError(2, "Error: File '" + outputPath
179 | + "' already exists.\nTry using -f (force).");
180 | }
181 |
182 | try {
183 | if (!useTheForce && isLinearizedPdf(new FileInputStream(pdfFile))) {
184 | exitWithError(2, "Error: '" + pdfFilePath + "' is a"
185 | + " linearized PDF and force is not specified."
186 | + " This tool will output non-linearized PDF."
187 | + "\nIf you don't mind that, use -f (force).");
188 | }
189 | } catch (IOException e) {
190 | exitWithError(2, "Error: Could not determine linearization"
191 | + " because of:\n" + e);
192 | }
193 |
194 | if (!explicitDoi.equals("")) {
195 | resolvedXmpData = getXmpForDoi(explicitDoi,
196 | !noCopyright,
197 | rightsAgent);
198 | }
199 |
200 | try {
201 | new File(outputFile.getPath() + ".tmp").deleteOnExit();
202 |
203 | FileInputStream fileIn = new FileInputStream(pdfFile);
204 | FileOutputStream fileOut = new FileOutputStream(outputFile.getPath() + ".tmp");
205 | PdfReader reader = new PdfReader(fileIn);
206 | PdfStamper stamper = new PdfStamper(reader, fileOut);
207 |
208 | byte[] merged = reader.getMetadata();
209 |
210 | if (optionalXmpData != null) {
211 | merged = XmpUtils.mergeXmp(merged, optionalXmpData);
212 | }
213 |
214 | if (resolvedXmpData != null) {
215 | merged = XmpUtils.mergeXmp(merged, resolvedXmpData);
216 | }
217 |
218 | stamper.setXmpMetadata(merged);
219 |
220 | stamper.close();
221 | reader.close();
222 |
223 | fileIn = new FileInputStream(outputFile.getPath() + ".tmp");
224 | writeInfoDictionary(fileIn, outputFile.getPath(), merged);
225 | } catch (IOException e) {
226 | exitWithError(2, "Error: Couldn't handle '" + pdfFilePath
227 | + "' because of:\n" + e);
228 | } catch (DocumentException e) {
229 | exitWithError(2, "Error: Couldn't handle '" + pdfFilePath
230 | + "' because of:\n" + e);
231 | } catch (XmpException e) {
232 | exitWithError(2, "Error: Couldn't handle '" + pdfFilePath
233 | + "' because of:\n" + e);
234 | } catch (COSVisitorException e) {
235 | exitWithError(2, "Error: Couldn't write document info dictionary"
236 | + " because of:\n" + e);
237 | }
238 | }
239 |
240 | shutDown();
241 | }
242 |
243 | public static void writeInfoDictionary(FileInputStream in,
244 | String outputFile, byte[] xmp) throws IOException, COSVisitorException {
245 |
246 | PDFParser parser = new PDFParser(in);
247 | parser.parse();
248 |
249 | PDDocument document = parser.getPDDocument();
250 | PDDocumentInformation info = document.getDocumentInformation();
251 |
252 | for (Entry entry : XmpUtils.toInfo(xmp).entrySet()) {
253 | info.setCustomMetadataValue(entry.getKey(), entry.getValue());
254 | }
255 |
256 | document.setDocumentInformation(info);
257 | document.save(outputFile);
258 | document.close();
259 | }
260 |
261 | /**
262 | * According to the PDF Reference Manual (appendix F) a linearized PDF
263 | * must have as its first object after the PDF header an indirect
264 | * dictionary containing only direct objects. Among these objects one
265 | * must be assigned the key "Linearized", representing the linearized PDF
266 | * version number.
267 | *
268 | * @return true if the PDF read by reader is a linearized PDF.
269 | */
270 | public static boolean isLinearizedPdf(FileInputStream in) throws IOException {
271 | boolean isLinear = false;
272 |
273 | PDFParser parser = new PDFParser(in);
274 | parser.parse();
275 | COSDocument doc = parser.getDocument();
276 |
277 | for (Object o : doc.getObjects()) {
278 | COSObject obj = (COSObject) o;
279 | if (obj.getObject() instanceof COSDictionary) {
280 | COSDictionary dict = (COSDictionary) obj.getObject();
281 | for (Object key : dict.keyList()) {
282 | COSName name = (COSName) key;
283 | if ("Linearized".equals(name.getName())) {
284 | isLinear = true;
285 | break;
286 | }
287 | }
288 |
289 | if (isLinear) break;
290 | }
291 | }
292 |
293 | doc.close();
294 |
295 | return isLinear;
296 | }
297 |
298 | private byte[] getXmpForDoi(String doi, boolean genCr, String agent) {
299 | MarkBuilder builder = new MarkBuilder(genCr, agent) {
300 | @Override
301 | public void onFailure(String doi, int code, String msg) {
302 | if (code == MetadataGrabber.CRUMMY_XML_CODE) {
303 | exitWithError(2, "Failed to parse metadata XML because of:\n"
304 | + code + ": " + msg);
305 | } else {
306 | System.err.println();
307 | exitWithError(2, "Failed to retreive metadata because of:\n"
308 | + code + ": " + msg);
309 | }
310 | }
311 | };
312 | grabber.grabOne(doi, builder);
313 | System.out.println("Grabbing metadata for '" + doi + "'...");
314 | grabber.waitForEmpty();
315 |
316 | return builder.getXmpData();
317 | }
318 |
319 | private static String getOutFileName(String pdfFileName) {
320 | if (pdfFileName.endsWith(".pdf")) {
321 | return pdfFileName.substring(0, pdfFileName.length() - 4)
322 | + "_xmp.pdf";
323 | } else {
324 | return pdfFileName + "_xmp.pdf";
325 | }
326 | }
327 |
328 | private void exitWithError(int code, String error) {
329 | shutDown();
330 | System.err.println();
331 | System.err.println(error);
332 | System.exit(code);
333 | }
334 | }
335 |
--------------------------------------------------------------------------------
/src/org/crossref/pdfmark/MarkBuilder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 CrossRef.org (email: support@crossref.org)
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation; either version 2 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program; if not, write to the Free Software
16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 | */
18 | package org.crossref.pdfmark;
19 |
20 | import java.io.ByteArrayOutputStream;
21 | import java.io.IOException;
22 | import java.net.URI;
23 | import java.net.URISyntaxException;
24 |
25 | import javax.xml.xpath.XPathExpressionException;
26 |
27 | import org.crossref.pdfmark.prism.Prism21Schema;
28 | import org.crossref.pdfmark.pub.Publisher;
29 | import org.crossref.pdfmark.unixref.Unixref;
30 | import org.crossref.pdfmark.unixref.Work;
31 |
32 | import com.itextpdf.text.xml.xmp.DublinCoreSchema;
33 | import com.itextpdf.text.xml.xmp.XmpWriter;
34 |
35 | public abstract class MarkBuilder implements MetadataGrabber.Handler {
36 |
37 | private static URI DOI_RESOLVER;
38 | static {
39 | try {
40 | DOI_RESOLVER = new URI("http://dx.doi.org/");
41 | } catch (URISyntaxException e) {
42 | /* Not possible. */
43 | }
44 | }
45 |
46 | private byte[] xmpData;
47 |
48 | private Unixref unixref;
49 |
50 | private Publisher publisher;
51 |
52 | private boolean generateCopyright;
53 |
54 | private String rightsAgent;
55 |
56 | public MarkBuilder(boolean generateCopyright, String rightsAgent) {
57 | this.generateCopyright = generateCopyright;
58 | this.rightsAgent = rightsAgent;
59 | }
60 |
61 | @Override
62 | public void onMetadata(String requestedDoi, Unixref unixref) {
63 | this.unixref = unixref;
64 | }
65 |
66 | @Override
67 | public void onPublisher(String requestedDoi, Publisher pub) {
68 | this.publisher = pub;
69 | }
70 |
71 | @Override
72 | public void onComplete(String requestedDoi) {
73 | ByteArrayOutputStream bout = new ByteArrayOutputStream();
74 | SchemaSet schemaSet = new SchemaSet();
75 |
76 | try {
77 | Work work = null;
78 |
79 | switch (unixref.getType()) {
80 | case JOURNAL:
81 | work = unixref.getJournal();
82 | break;
83 | case BOOK:
84 | work = unixref.getBook();
85 | break;
86 | default:
87 | break;
88 | }
89 |
90 | if (work != null) {
91 | XmpWriter writer = new XmpWriter(bout);
92 |
93 | work.writeXmp(schemaSet);
94 |
95 | if (publisher != null) {
96 | if (generateCopyright) {
97 | String cp = getCopyright(work);
98 | Work.addToSchema(schemaSet.getDc(), DublinCoreSchema.RIGHTS, cp);
99 | Work.addToSchema(schemaSet.getPrism(), Prism21Schema.COPYRIGHT, cp);
100 | }
101 | Work.addToSchema(schemaSet.getDc(), DublinCoreSchema.PUBLISHER,
102 | publisher.getName());
103 | }
104 |
105 | Work.addToSchema(schemaSet.getPrism(), Prism21Schema.RIGHTS_AGENT,
106 | rightsAgent);
107 |
108 | writer.addRdfDescription(schemaSet.getDc());
109 | writer.addRdfDescription(schemaSet.getPrism());
110 | writer.close();
111 | }
112 |
113 | xmpData = bout.toByteArray();
114 | } catch (IOException e) {
115 | onFailure(requestedDoi, MetadataGrabber.CLIENT_EXCEPTION_CODE,
116 | e.toString());
117 | } catch (XPathExpressionException e) {
118 | onFailure(requestedDoi, MetadataGrabber.CLIENT_EXCEPTION_CODE,
119 | e.toString());
120 | }
121 | }
122 |
123 | private String getCopyright(Work work) throws XPathExpressionException {
124 | return "(C) " + work.getYear() + " " + publisher.getName();
125 | }
126 |
127 | public static String getUrlForDoi(String doi) {
128 | return DOI_RESOLVER.resolve(doi).toString();
129 | }
130 |
131 | public byte[] getXmpData() {
132 | return xmpData;
133 | }
134 |
135 | }
136 |
--------------------------------------------------------------------------------
/src/org/crossref/pdfmark/MetadataGrabber.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 CrossRef.org (email: support@crossref.org)
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation; either version 2 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program; if not, write to the Free Software
16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 | */
18 | package org.crossref.pdfmark;
19 |
20 | import java.io.IOException;
21 | import java.util.LinkedList;
22 | import java.util.Queue;
23 |
24 | import javax.xml.parsers.DocumentBuilder;
25 | import javax.xml.parsers.DocumentBuilderFactory;
26 | import javax.xml.parsers.ParserConfigurationException;
27 | import javax.xml.xpath.XPathExpressionException;
28 |
29 | import org.apache.http.HttpEntity;
30 | import org.apache.http.HttpResponse;
31 | import org.apache.http.StatusLine;
32 | import org.apache.http.params.HttpParams;
33 | import org.apache.http.params.BasicHttpParams;
34 | import org.apache.http.params.HttpConnectionParams;
35 | import org.apache.http.client.ClientProtocolException;
36 | import org.apache.http.client.HttpClient;
37 | import org.apache.http.client.methods.HttpGet;
38 | import org.apache.http.client.methods.HttpUriRequest;
39 | import org.apache.http.impl.client.DefaultHttpClient;
40 | import org.crossref.pdfmark.pub.Publisher;
41 | import org.crossref.pdfmark.unixref.Unixref;
42 | import org.w3c.dom.Document;
43 | import org.xml.sax.SAXException;
44 |
45 | public class MetadataGrabber {
46 |
47 | public static final int CLIENT_EXCEPTION_CODE = -1;
48 | public static final int CRUMMY_XML_CODE = -2;
49 | public static final int BAD_XPATH_CODE = -3;
50 |
51 | private static final String DOI_QUERY =
52 | "http://www.crossref.org/openurl/" +
53 | "?id=doi:{0}&noredirect=true" +
54 | "&pid={1}" +
55 | "&format=unixref";
56 |
57 | private static final String PUBLISHER_QUERY =
58 | "http://www.crossref.org/" +
59 | "getPrefixPublisher/" +
60 | "?prefix={0}";
61 |
62 | private static final String QUERY_TOKEN = "{0}";
63 |
64 | private static final String KEY_TOKEN = "{1}";
65 |
66 | private HttpClient client;
67 |
68 | private Queue requests;
69 |
70 | private DocumentBuilder builder;
71 |
72 | private boolean terminated;
73 |
74 | private Object monitor = new Object();
75 |
76 | private String apiKey;
77 |
78 | private enum RequestType {
79 | DOI,
80 | PUBLISHER,
81 | }
82 |
83 | private class RequestInfo {
84 | private String doi;
85 | private HttpUriRequest request;
86 | private Handler handler;
87 | private RequestType requestType;
88 |
89 | private RequestInfo(RequestType rt) {
90 | requestType = rt;
91 | }
92 |
93 | private RequestInfo withRequest(String location, String replacement) {
94 | String detokRequest = location.replace(QUERY_TOKEN, replacement);
95 | request = new HttpGet(detokRequest);
96 | return this;
97 | }
98 |
99 | private RequestInfo withRequest(String location, String replacement,
100 | String key) {
101 | String detokRequest = location.replace(QUERY_TOKEN, replacement);
102 | detokRequest = detokRequest.replace(KEY_TOKEN, key);
103 | request = new HttpGet(detokRequest);
104 | return this;
105 | }
106 |
107 | private RequestInfo withDoi(String doi) {
108 | this.doi = doi;
109 | return this;
110 | }
111 |
112 | private RequestInfo withHandler(Handler handler) {
113 | this.handler = handler;
114 | return this;
115 | }
116 |
117 | private void performOn(HttpClient client) {
118 | try {
119 | HttpResponse sponse = client.execute(request);
120 | HttpEntity entity = sponse.getEntity();
121 |
122 | if (entity != null) {
123 | Document doc = builder.parse(entity.getContent());
124 |
125 | if (requestType == RequestType.DOI) {
126 | Unixref unixref = new Unixref(doc);
127 | String ownerPrefix = unixref.getOwnerPrefix();
128 | handler.onMetadata(doi, unixref);
129 | if (!ownerPrefix.isEmpty()) {
130 | queuePubReq(doi, handler, unixref.getOwnerPrefix());
131 | } else {
132 | handler.onComplete(doi);
133 | }
134 | } else if (requestType == RequestType.PUBLISHER) {
135 | Publisher publisher = new Publisher(doc);
136 | handler.onPublisher(doi, publisher);
137 | handler.onComplete(doi);
138 | }
139 |
140 | } else {
141 | StatusLine sl = sponse.getStatusLine();
142 | handler.onFailure(doi,
143 | sl.getStatusCode(),
144 | sl.getReasonPhrase());
145 | }
146 | } catch (ClientProtocolException e) {
147 | handler.onFailure(doi, CLIENT_EXCEPTION_CODE, e.toString());
148 | } catch (IOException e) {
149 | handler.onFailure(doi, CLIENT_EXCEPTION_CODE, e.toString());
150 | } catch (SAXException e) {
151 | handler.onFailure(doi, CRUMMY_XML_CODE, e.toString());
152 | } catch (XPathExpressionException e) {
153 | handler.onFailure(doi, BAD_XPATH_CODE, e.toString());
154 | }
155 | }
156 | }
157 |
158 | public MetadataGrabber(String apiKey) {
159 | this.apiKey = apiKey;
160 |
161 | try {
162 | DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
163 | domFactory.setNamespaceAware(true);
164 | builder = domFactory.newDocumentBuilder();
165 | } catch (ParserConfigurationException e) {
166 | System.err.println("Error: Can't create an XML parser.");
167 | System.err.println(e);
168 | System.exit(2);
169 | }
170 |
171 | HttpParams httpParams = new BasicHttpParams();
172 | HttpConnectionParams.setConnectionTimeout(httpParams, 10000);
173 | HttpConnectionParams.setSoTimeout(httpParams, 10000);
174 |
175 | client = new DefaultHttpClient(httpParams);
176 | requests = new LinkedList();
177 |
178 | new Thread(new Runnable() {
179 | @Override
180 | public void run() {
181 | while (!terminated) {
182 | while (!requests.isEmpty()) {
183 | requests.peek().performOn(client);
184 |
185 | synchronized (monitor) {
186 | requests.remove();
187 | monitor.notifyAll();
188 | }
189 | }
190 |
191 | synchronized (monitor) {
192 | try {
193 | monitor.wait();
194 | } catch (InterruptedException e) {
195 | }
196 | }
197 | }
198 | }
199 | }).start();
200 | }
201 |
202 | public void shutDown() {
203 | terminated = true;
204 | synchronized (monitor) {
205 | monitor.notifyAll();
206 | }
207 | }
208 |
209 | public void grabOne(String doi, Handler handler) {
210 | requests.add(new RequestInfo(RequestType.DOI)
211 | .withDoi(doi)
212 | .withHandler(handler)
213 | .withRequest(DOI_QUERY, doi, apiKey));
214 |
215 | synchronized (monitor) {
216 | monitor.notifyAll();
217 | }
218 | /* Later, when we receive this response, we will queue
219 | * a RequestInfo to get publisher data. */
220 | }
221 |
222 | private void queuePubReq(String doi, Handler handler, String pubPrefix) {
223 | requests.add(new RequestInfo(RequestType.PUBLISHER)
224 | .withDoi(doi)
225 | .withHandler(handler)
226 | .withRequest(PUBLISHER_QUERY, pubPrefix));
227 |
228 | synchronized (monitor) {
229 | monitor.notifyAll();
230 | }
231 | }
232 |
233 | public void waitForEmpty() {
234 | synchronized (monitor) {
235 | while (true) {
236 | if (requests.isEmpty()) {
237 | break;
238 | }
239 | try {
240 | monitor.wait();
241 | } catch (InterruptedException e) {
242 | }
243 | }
244 | }
245 | }
246 |
247 | public interface Handler {
248 | public void onMetadata(String doi, Unixref metadata);
249 | public void onPublisher(String doi, Publisher publisher);
250 | public void onComplete(String doi);
251 | public void onFailure(String doi, int code, String msg);
252 | }
253 |
254 | }
255 |
--------------------------------------------------------------------------------
/src/org/crossref/pdfmark/PdfxSchema.java:
--------------------------------------------------------------------------------
1 | package org.crossref.pdfmark;
2 |
3 | import com.itextpdf.text.xml.xmp.XmpSchema;
4 |
5 | public class PdfxSchema extends XmpSchema {
6 |
7 | public static final String DEFAULT_XPATH_ID = "pdfx";
8 | public static final String DEFAULT_XPATH_URI
9 | = "http://ns.adobe.com/pdfx/1.3/";
10 |
11 | public static final String DOI = "doi";
12 |
13 | public PdfxSchema() {
14 | super("xmlns:"
15 | + DEFAULT_XPATH_ID
16 | + "=\"" + DEFAULT_XPATH_URI + "\"");
17 | }
18 |
19 | }
20 |
--------------------------------------------------------------------------------
/src/org/crossref/pdfmark/SchemaSet.java:
--------------------------------------------------------------------------------
1 | package org.crossref.pdfmark;
2 |
3 | import org.crossref.pdfmark.prism.Prism21Schema;
4 |
5 | import com.itextpdf.text.xml.xmp.DublinCoreSchema;
6 | import com.itextpdf.text.xml.xmp.XmpSchema;
7 |
8 | public class SchemaSet {
9 |
10 | private XmpSchema dc = new DublinCoreSchema();
11 | private XmpSchema prism = new Prism21Schema();
12 | private XmpSchema pdfx = new PdfxSchema();
13 |
14 | public XmpSchema getPrism() {
15 | return prism;
16 | }
17 |
18 | public XmpSchema getDc() {
19 | return dc;
20 | }
21 |
22 | public XmpSchema getPdfx() {
23 | return pdfx;
24 | }
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/src/org/crossref/pdfmark/XPathHelpers.java:
--------------------------------------------------------------------------------
1 | package org.crossref.pdfmark;
2 |
3 | import java.util.ArrayList;
4 |
5 | import javax.xml.xpath.XPathConstants;
6 | import javax.xml.xpath.XPathExpression;
7 | import javax.xml.xpath.XPathExpressionException;
8 |
9 | import org.w3c.dom.Node;
10 |
11 | public final class XPathHelpers {
12 |
13 | private XPathHelpers() {
14 | }
15 |
16 | public static String orEmptyStr(XPathExpression xpe, Node n)
17 | throws XPathExpressionException {
18 | Node inner = (Node) xpe.evaluate(n, XPathConstants.NODE);
19 | return inner == null ? "" : inner.getTextContent();
20 | }
21 |
22 | public static String evalConcat(Node n, String delimiter, XPathExpression... exprs)
23 | throws XPathExpressionException {
24 | ArrayList results = new ArrayList();
25 | for (XPathExpression expr : exprs) {
26 | results.add(orEmptyStr(expr, n));
27 | }
28 |
29 | while (results.remove(""));
30 |
31 | String retn = "";
32 |
33 | for (String s : results) {
34 | retn += s;
35 | if (results.indexOf(s) != results.size() -1 ) {
36 | retn += delimiter;
37 | }
38 | }
39 |
40 | return retn;
41 | }
42 |
43 | public static Node oneOf(Node parent, XPathExpression... exprs)
44 | throws XPathExpressionException {
45 | for (XPathExpression expr : exprs) {
46 | Node child = (Node) expr.evaluate(parent, XPathConstants.NODE);
47 | if (child != null) {
48 | return child;
49 | }
50 | }
51 | return null;
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/src/org/crossref/pdfmark/XmlUtils.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 CrossRef.org (email: support@crossref.org)
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation; either version 2 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program; if not, write to the Free Software
16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 | */
18 | package org.crossref.pdfmark;
19 |
20 | import java.util.UUID;
21 |
22 | import javax.xml.XMLConstants;
23 |
24 | import org.w3c.dom.Attr;
25 | import org.w3c.dom.Element;
26 | import org.w3c.dom.NamedNodeMap;
27 |
28 | public final class XmlUtils {
29 |
30 | private XmlUtils() {
31 | }
32 |
33 | /**
34 | * @return A String[] of length two, [prefix, URI].
35 | */
36 | public static String[] getNamespaceDeclaration(Element ele) {
37 | String prefixHint = null;
38 | String[] parts = ele.getNodeName().split(":");
39 | if (parts.length == 2) {
40 | prefixHint = parts[0];
41 | }
42 |
43 | return getNamespaceDeclaration(ele, prefixHint);
44 | }
45 |
46 | /**
47 | * @return A String[] of length two, [prefix, URI].
48 | */
49 | public static String[] getNamespaceDeclaration(Element ele, String prefixHint) {
50 | String[] ns = new String[2]; // prefix, URI
51 | NamedNodeMap attribs = ele.getAttributes();
52 |
53 | for (int i=0; i schemata = new HashMap();
81 |
82 | for (int i=0; i schemata, Element ele) {
98 | String propertyName = ele.getNodeName();
99 | String[] ns = XmlUtils.getNamespaceDeclaration(ele);
100 | XmpSchema schema = null;
101 |
102 | if (schemata.containsKey(ns[1])) {
103 | schema = schemata.get(ns[1]);
104 | } else {
105 | schema = new AnyXmpSchema(ns[0], ns[1]);
106 | schemata.put(ns[1], schema);
107 | }
108 |
109 | /* Should have either Text or a single . */
110 | boolean hasElementChildren = false;
111 | for (int i=0; i toInfo(byte[] xmp) throws XmpException {
222 | try {
223 | DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
224 | factory.setNamespaceAware(true);
225 |
226 | Map info = new HashMap();
227 |
228 | XmpSchema[] schemata = XmpUtils.parseSchemata(xmp);
229 | for (XmpSchema schema : schemata) {
230 | if (schema.getXmlns().contains("pdfx")) {
231 | for (Entry