In Google We Trust?

├── .classpath ├── .gitignore ├── .gitmodules ├── .project ├── .settings └── org.eclipse.jdt.core.prefs ├── CONDUCT.md ├── Dockerfile ├── README ├── TODO.rst ├── build.xml ├── dist └── pdfmark.jar ├── lib ├── commons-logging-1.1.1.jar ├── commons-logging-adapters-1.1.1.jar ├── commons-logging-api-1.1.1.jar ├── httpclient-4.0.jar ├── httpcore-4.0.1.jar ├── httpcore-nio-4.0.1.jar ├── httpmime-4.0.jar ├── itextpdf-5.1.3.jar ├── jargs.jar ├── junit-4.8.jar └── pdfbox-1.6.0.jar ├── pdfmark.jar ├── script ├── run-bin └── run-test ├── src └── org │ └── crossref │ └── pdfmark │ ├── AnyXmpSchema.java │ ├── ApiKey.java │ ├── DumperMain.java │ ├── FileInfo.java │ ├── Main.java │ ├── MarkBuilder.java │ ├── MetadataGrabber.java │ ├── PdfxSchema.java │ ├── SchemaSet.java │ ├── XPathHelpers.java │ ├── XmlUtils.java │ ├── XmpException.java │ ├── XmpUtils.java │ ├── prism │ ├── Prism11Schema.java │ └── Prism21Schema.java │ ├── pub │ └── Publisher.java │ ├── test │ ├── Main.java │ ├── MarkBuilderTest.java │ └── PdfInfoDirectory.java │ └── unixref │ ├── Book.java │ ├── Dissertation.java │ ├── Journal.java │ ├── JournalArticle.java │ ├── ReportPaper.java │ ├── Standard.java │ ├── Unixref.java │ └── Work.java └── test-data ├── nature-metadata-example.txt ├── pdfx-xmp-example.xmp ├── publisher-example.xml ├── random-xmp-example.xmp ├── test-pdf.pdf ├── unixref-example.json └── unixref-example.xml /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | output 3 | *.swp 4 | *~ 5 | .DS_Store 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "test-data/extended"] 2 | path = test-data/extended 3 | url = git@labs.crossref.org:pdf-test-data.git 4 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | pdfmark 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | #Mon Nov 23 14:41:40 GMT 2009 2 | eclipse.preferences.version=1 3 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 6 | org.eclipse.jdt.core.compiler.compliance=1.6 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 12 | org.eclipse.jdt.core.compiler.source=1.6 13 | -------------------------------------------------------------------------------- /CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | As contributors and maintainers of this project, we pledge to respect all people who 4 | contribute through reporting issues, posting feature requests, updating documentation, 5 | submitting pull requests or patches, and other activities. 6 | 7 | We are committed to making participation in this project a harassment-free experience for 8 | everyone, regardless of level of experience, gender, gender identity and expression, 9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. 10 | 11 | Examples of unacceptable behavior by participants include the use of sexual language or 12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment, 13 | insults, or other unprofessional conduct. 14 | 15 | Project maintainers have the right and responsibility to remove, edit, or reject comments, 16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 18 | from the project team. 19 | 20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 21 | opening an issue or contacting one or more of the project maintainers. 22 | 23 | This Code of Conduct is adapted from the Contributor Covenant 24 | (http:contributor-covenant.org), version 1.0.0, available at 25 | http://contributor-covenant.org/version/1/0/0/ 26 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:8 2 | COPY . /usr/src/myapp 3 | WORKDIR /usr/src/myapp 4 | CMD ["/usr/bin/java","-jar","dist/pdfmark.jar","-d","10.5555/12345678","test-data/test-pdf.pdf","-o","/out"] -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | ## pdfmark 2 | 3 | *2018/01/15:* `pdfmark` is not working with Java 9. You must use 8 or below. 4 | ===================================================== 5 | 6 | Extended test data (in the form of lots of PDFs) 7 | can be inserted as a git submodule. You will need 8 | git access on labs.crossref.org to access the 9 | PDF test data repository. If you do, just perform 10 | this command to put the data into your local 11 | repository: 12 | 13 | $ git submodule update --init 14 | 15 | ===================================================== 16 | To run with Docker 17 | 18 | - `docker build -t pdfmark .` 19 | - `docker run -v /tmp:/out pdfmark` 20 | 21 | You should find a file named '/tmp/test-pdf_xmp.pdf' 22 | 23 | You can see the metadata that has been added by using the `exiftool` like this: 24 | 25 | 26 | `exiftool -a -G1 /tmp/test-pdf_xmp.pdf` -------------------------------------------------------------------------------- /TODO.rst: -------------------------------------------------------------------------------- 1 | 10 minute items 2 | ========================================================== 3 | 4 | A few hours 5 | ========================================================== 6 | 7 | - Rewrite MarkBuilder as something more reusable. 8 | - Add file globbing. 9 | - Complete more unit tests. 10 | - Remove use of iText in favour of PDFBox (it does 11 | everything we need it to, while iText does a subset). 12 | 13 | Involved, hours or days 14 | ========================================================== 15 | 16 | - Expand MarkBuilder or whatever replaces it to 17 | support query response for non-article DOIs. 18 | - Expand unixref model to support non-article 19 | types. 20 | - Implement scanning of PDF documents. -------------------------------------------------------------------------------- /build.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /dist/pdfmark.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/dist/pdfmark.jar -------------------------------------------------------------------------------- /lib/commons-logging-1.1.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/commons-logging-1.1.1.jar -------------------------------------------------------------------------------- /lib/commons-logging-adapters-1.1.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/commons-logging-adapters-1.1.1.jar -------------------------------------------------------------------------------- /lib/commons-logging-api-1.1.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/commons-logging-api-1.1.1.jar -------------------------------------------------------------------------------- /lib/httpclient-4.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/httpclient-4.0.jar -------------------------------------------------------------------------------- /lib/httpcore-4.0.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/httpcore-4.0.1.jar -------------------------------------------------------------------------------- /lib/httpcore-nio-4.0.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/httpcore-nio-4.0.1.jar -------------------------------------------------------------------------------- /lib/httpmime-4.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/httpmime-4.0.jar -------------------------------------------------------------------------------- /lib/itextpdf-5.1.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/itextpdf-5.1.3.jar -------------------------------------------------------------------------------- /lib/jargs.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/jargs.jar -------------------------------------------------------------------------------- /lib/junit-4.8.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/junit-4.8.jar -------------------------------------------------------------------------------- /lib/pdfbox-1.6.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/lib/pdfbox-1.6.0.jar -------------------------------------------------------------------------------- /pdfmark.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/pdfmark.jar -------------------------------------------------------------------------------- /script/run-bin: -------------------------------------------------------------------------------- 1 | PROJ_HOME=/Users/karl/Dropbox/Code/pdfmark 2 | LIB_DIR=$PROJ_HOME/lib 3 | BIN_DIR=$PROJ_HOME/bin 4 | LIB_LIST=$LIB_DIR/commons-logging-1.1.1.jar:$LIB_DIR/commons-logging-adapters-1.1.1.jar:$LIB_DIR/commons-logging-api-1.1.1.jar:$LIB_DIR/httpclient-4.0.jar:$LIB_DIR/httpcore-4.0.1.jar:$LIB_DIR/httpcore-nio-4.0.1.jar:$LIB_DIR/httpmime-4.0.jar:$LIB_DIR/iText-2.1.7.jar:$LIB_DIR/jargs.jar 5 | 6 | java -classpath $LIB_LIST:$BIN_DIR org.crossref.pdfmark.Main $@ 7 | -------------------------------------------------------------------------------- /script/run-test: -------------------------------------------------------------------------------- 1 | PROJ_HOME=/Users/karl/Dropbox/Code/pdfmark 2 | LIB_DIR=$PROJ_HOME/lib 3 | BIN_DIR=$PROJ_HOME/bin 4 | LIB_LIST=$LIB_DIR/commons-logging-1.1.1.jar:$LIB_DIR/commons-logging-adapters-1.1.1.jar:$LIB_DIR/commons-logging-api-1.1.1.jar:$LIB_DIR/httpclient-4.0.jar:$LIB_DIR/httpcore-4.0.1.jar:$LIB_DIR/httpcore-nio-4.0.1.jar:$LIB_DIR/httpmime-4.0.jar:$LIB_DIR/iText-2.1.7.jar:$LIB_DIR/jargs.jar:$LIB_DIR/junit-4.8.jar 5 | 6 | java -classpath $LIB_LIST:$BIN_DIR org.crossref.pdfmark.test.Main $@ 7 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/AnyXmpSchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 CrossRef.org (email: support@crossref.org) 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation; either version 2 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | package org.crossref.pdfmark; 19 | 20 | import com.itextpdf.text.xml.xmp.XmpSchema; 21 | 22 | public class AnyXmpSchema extends XmpSchema { 23 | 24 | public AnyXmpSchema(String nsPrefix, String nsUri) { 25 | super("xmlns:" + nsPrefix + "=\"" + nsUri + "\""); 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/ApiKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 CrossRef.org (email: support@crossref.org) 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation; either version 2 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | package org.crossref.pdfmark; 19 | 20 | public class ApiKey { 21 | 22 | /** 23 | * The default key used for DOI lookups. This should be changed to 24 | * your own API key, which can be obtained from 25 | * http://www.crossref.org/requestaccount/ . 26 | */ 27 | public static final String DEFAULT = "kward@crossref.org"; 28 | 29 | } 30 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/DumperMain.java: -------------------------------------------------------------------------------- 1 | package org.crossref.pdfmark; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.ByteArrayInputStream; 5 | import java.io.File; 6 | import java.io.FileInputStream; 7 | import java.io.IOException; 8 | import java.io.InputStreamReader; 9 | 10 | import com.itextpdf.text.pdf.PdfReader; 11 | 12 | /** 13 | * Utility that dumps XMP data of a PDF to standard out. 14 | */ 15 | public class DumperMain { 16 | 17 | public static void main(String[] args) { 18 | for (String filename : args) { 19 | 20 | File f = new File(filename); 21 | FileInputStream fileIn; 22 | PdfReader reader; 23 | 24 | try { 25 | fileIn = new FileInputStream(f); 26 | reader = new PdfReader(fileIn); 27 | byte[] merged = reader.getMetadata(); 28 | ByteArrayInputStream bIn = new ByteArrayInputStream(merged); 29 | BufferedReader bR = new BufferedReader(new InputStreamReader(bIn)); 30 | String line; 31 | while ((line = bR.readLine()) != null) { 32 | System.out.println(line); 33 | } 34 | 35 | reader.close(); 36 | fileIn.close(); 37 | } catch (IOException e) { 38 | System.err.println("Couldn't read file '" + filename + "'."); 39 | System.err.println(e); 40 | } 41 | } 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/FileInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 CrossRef.org (email: support@crossref.org) 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation; either version 2 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | package org.crossref.pdfmark; 19 | 20 | import java.io.DataInputStream; 21 | import java.io.FileInputStream; 22 | import java.io.FileNotFoundException; 23 | import java.io.IOException; 24 | 25 | public class FileInfo { 26 | public String path; 27 | public byte[] data; 28 | public boolean missing; 29 | public IOException error; 30 | 31 | public static FileInfo readFileFully(String filePath) { 32 | FileInfo ni = new FileInfo(); 33 | ni.path = filePath; 34 | 35 | try { 36 | FileInputStream fileIn = new FileInputStream(filePath); 37 | DataInputStream din = new DataInputStream(fileIn); 38 | 39 | byte[] buff = new byte[1024], xmpData = new byte[0]; 40 | int read = 0; 41 | while ((read = din.read(buff, 0, buff.length)) > 0) { 42 | byte[] tmp = new byte[xmpData.length + read]; 43 | System.arraycopy(xmpData, 0, tmp, 0, xmpData.length); 44 | System.arraycopy(buff, 0, tmp, xmpData.length, read); 45 | xmpData = tmp; 46 | } 47 | 48 | ni.data = xmpData; 49 | 50 | din.close(); 51 | } catch (FileNotFoundException e) { 52 | ni.missing = true; 53 | ni.error = e; 54 | } catch (IOException e) { 55 | ni.error = e; 56 | } 57 | 58 | return ni; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/Main.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 CrossRef.org (email: support@crossref.org) 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation; either version 2 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | package org.crossref.pdfmark; 19 | import jargs.gnu.CmdLineParser; 20 | 21 | import java.io.File; 22 | import java.io.FileInputStream; 23 | import java.io.FileOutputStream; 24 | import java.io.IOException; 25 | import java.util.Map.Entry; 26 | 27 | import org.apache.pdfbox.cos.COSDictionary; 28 | import org.apache.pdfbox.cos.COSDocument; 29 | import org.apache.pdfbox.cos.COSName; 30 | import org.apache.pdfbox.cos.COSObject; 31 | import org.apache.pdfbox.exceptions.COSVisitorException; 32 | import org.apache.pdfbox.pdfparser.PDFParser; 33 | import org.apache.pdfbox.pdmodel.PDDocument; 34 | import org.apache.pdfbox.pdmodel.PDDocumentInformation; 35 | 36 | import com.itextpdf.text.DocumentException; 37 | import com.itextpdf.text.pdf.PdfReader; 38 | import com.itextpdf.text.pdf.PdfStamper; 39 | import com.itextpdf.text.pdf.PdfWriter; 40 | import com.itextpdf.text.xml.xmp.XmpSchema; 41 | 42 | import static jargs.gnu.CmdLineParser.Option; 43 | 44 | public class Main { 45 | 46 | private MetadataGrabber grabber; 47 | 48 | public static void printUsage() { 49 | System.err.println("Usage: pdfmark" + 50 | " [{-f, --force}]" + 51 | " [{-p, --xmp-file} xmp_file]" + 52 | " [{-o, --output-dir} output_dir] " + 53 | " [{-d, --doi} doi]" + 54 | " [--no-copyright]" + 55 | " [--rights-agent rights_agent_str]" + 56 | " [--api-key search_key]" + 57 | " pdf_files"); 58 | } 59 | 60 | public static void printFutureUsage() { 61 | /* This will be correct once all features are implemented. */ 62 | System.err.println("Usage: pdfmark" + 63 | " [{-f, --force}]" + 64 | " [{-p, --xmp-file} xmp_file]" + 65 | " [{-o, --output-dir} output_dir] " + 66 | " [{-d, --doi} doi]" + 67 | " [{-s, --search-for-doi]" + 68 | " [--no-copyright]" + 69 | " [--rights-agent rights_agent_str]" + 70 | " [--api-key search_key]" + 71 | " pdf_files"); 72 | } 73 | 74 | public static void main(String[] args) { 75 | new Main(args); 76 | } 77 | 78 | private void shutDown() { 79 | grabber.shutDown(); 80 | } 81 | 82 | public Main(String[] args) { 83 | if (args.length == 0) { 84 | printUsage(); 85 | System.exit(2); 86 | } 87 | 88 | CmdLineParser parser = new CmdLineParser(); 89 | Option provideXmpOp = parser.addStringOption('p', "xmp-file"); 90 | Option overwriteOp = parser.addBooleanOption('f', "force"); 91 | Option outputOp = parser.addStringOption('o', "output-dir"); 92 | Option doiOp = parser.addStringOption('d', "doi"); 93 | Option searchOp = parser.addBooleanOption('s', "search-for-doi"); 94 | Option copyrightOp = parser.addBooleanOption("no-copyright"); 95 | Option rightsOp = parser.addStringOption("rights-agent"); 96 | Option apiKeyOp = parser.addStringOption("api-key"); 97 | 98 | try { 99 | parser.parse(args); 100 | } catch (CmdLineParser.OptionException e) { 101 | printUsage(); 102 | System.exit(2); 103 | } 104 | 105 | String optionalXmpPath = (String) 106 | parser.getOptionValue(provideXmpOp, ""); 107 | String outputDir = (String) 108 | parser.getOptionValue(outputOp, ""); 109 | String explicitDoi = (String) 110 | parser.getOptionValue(doiOp, ""); 111 | boolean useTheForce = (Boolean) 112 | parser.getOptionValue(overwriteOp, Boolean.FALSE); 113 | boolean searchForDoi = (Boolean) 114 | parser.getOptionValue(searchOp, Boolean.FALSE); 115 | boolean noCopyright = (Boolean) 116 | parser.getOptionValue(copyrightOp, Boolean.FALSE); 117 | String rightsAgent = (String) 118 | parser.getOptionValue(rightsOp, ""); 119 | String apiKey = (String) 120 | parser.getOptionValue(apiKeyOp, ApiKey.DEFAULT); 121 | 122 | if (!explicitDoi.equals("") && searchForDoi) { 123 | exitWithError(2, "-d and -s are mutually exclusive options."); 124 | } 125 | 126 | if (!outputDir.isEmpty() && !new File(outputDir).exists()) { 127 | exitWithError(2, "The output directory, '" + outputDir 128 | + "' does not exist."); 129 | } 130 | 131 | byte[] optionalXmpData = null; 132 | 133 | if (!optionalXmpPath.equals("")) { 134 | /* We will take XMP data from a file. */ 135 | FileInfo xmpFile = FileInfo.readFileFully(optionalXmpPath); 136 | if (xmpFile.missing) { 137 | exitWithError(2, "Error: File '" + xmpFile.path 138 | + "' does not exist."); 139 | } else if (xmpFile.error != null) { 140 | exitWithError(2, "Error: Could not read '" + xmpFile.path 141 | + "' because of:\n" + xmpFile.error); 142 | } 143 | 144 | optionalXmpData = xmpFile.data; 145 | } 146 | 147 | grabber = new MetadataGrabber(apiKey); 148 | 149 | /* Now we're ready to merge our imported or generated XMP data with what 150 | * is already in each PDF. */ 151 | 152 | for (String pdfFilePath : parser.getRemainingArgs()) { 153 | String outputPath = getOutFileName(pdfFilePath); 154 | 155 | /* Grab the leaf. */ 156 | if (outputPath.contains(File.separator)) { 157 | String[] split = outputPath.split(File.separator); 158 | outputPath = split[split.length - 1]; 159 | } 160 | 161 | if (!outputDir.isEmpty()) { 162 | outputPath = outputDir + File.separator + outputPath; 163 | } else { 164 | /* Output to the working directory. */ 165 | } 166 | 167 | File pdfFile = new File(pdfFilePath); 168 | File outputFile = new File(outputPath); 169 | 170 | byte[] resolvedXmpData = null; 171 | 172 | if (!pdfFile.exists()) { 173 | exitWithError(2, "Error: File '" + pdfFilePath 174 | + "' does not exist."); 175 | } 176 | 177 | if (outputFile.exists() && !useTheForce) { 178 | exitWithError(2, "Error: File '" + outputPath 179 | + "' already exists.\nTry using -f (force)."); 180 | } 181 | 182 | try { 183 | if (!useTheForce && isLinearizedPdf(new FileInputStream(pdfFile))) { 184 | exitWithError(2, "Error: '" + pdfFilePath + "' is a" 185 | + " linearized PDF and force is not specified." 186 | + " This tool will output non-linearized PDF." 187 | + "\nIf you don't mind that, use -f (force)."); 188 | } 189 | } catch (IOException e) { 190 | exitWithError(2, "Error: Could not determine linearization" 191 | + " because of:\n" + e); 192 | } 193 | 194 | if (!explicitDoi.equals("")) { 195 | resolvedXmpData = getXmpForDoi(explicitDoi, 196 | !noCopyright, 197 | rightsAgent); 198 | } 199 | 200 | try { 201 | new File(outputFile.getPath() + ".tmp").deleteOnExit(); 202 | 203 | FileInputStream fileIn = new FileInputStream(pdfFile); 204 | FileOutputStream fileOut = new FileOutputStream(outputFile.getPath() + ".tmp"); 205 | PdfReader reader = new PdfReader(fileIn); 206 | PdfStamper stamper = new PdfStamper(reader, fileOut); 207 | 208 | byte[] merged = reader.getMetadata(); 209 | 210 | if (optionalXmpData != null) { 211 | merged = XmpUtils.mergeXmp(merged, optionalXmpData); 212 | } 213 | 214 | if (resolvedXmpData != null) { 215 | merged = XmpUtils.mergeXmp(merged, resolvedXmpData); 216 | } 217 | 218 | stamper.setXmpMetadata(merged); 219 | 220 | stamper.close(); 221 | reader.close(); 222 | 223 | fileIn = new FileInputStream(outputFile.getPath() + ".tmp"); 224 | writeInfoDictionary(fileIn, outputFile.getPath(), merged); 225 | } catch (IOException e) { 226 | exitWithError(2, "Error: Couldn't handle '" + pdfFilePath 227 | + "' because of:\n" + e); 228 | } catch (DocumentException e) { 229 | exitWithError(2, "Error: Couldn't handle '" + pdfFilePath 230 | + "' because of:\n" + e); 231 | } catch (XmpException e) { 232 | exitWithError(2, "Error: Couldn't handle '" + pdfFilePath 233 | + "' because of:\n" + e); 234 | } catch (COSVisitorException e) { 235 | exitWithError(2, "Error: Couldn't write document info dictionary" 236 | + " because of:\n" + e); 237 | } 238 | } 239 | 240 | shutDown(); 241 | } 242 | 243 | public static void writeInfoDictionary(FileInputStream in, 244 | String outputFile, byte[] xmp) throws IOException, COSVisitorException { 245 | 246 | PDFParser parser = new PDFParser(in); 247 | parser.parse(); 248 | 249 | PDDocument document = parser.getPDDocument(); 250 | PDDocumentInformation info = document.getDocumentInformation(); 251 | 252 | for (Entry entry : XmpUtils.toInfo(xmp).entrySet()) { 253 | info.setCustomMetadataValue(entry.getKey(), entry.getValue()); 254 | } 255 | 256 | document.setDocumentInformation(info); 257 | document.save(outputFile); 258 | document.close(); 259 | } 260 | 261 | /** 262 | * According to the PDF Reference Manual (appendix F) a linearized PDF 263 | * must have as its first object after the PDF header an indirect 264 | * dictionary containing only direct objects. Among these objects one 265 | * must be assigned the key "Linearized", representing the linearized PDF 266 | * version number. 267 | * 268 | * @return true if the PDF read by reader is a linearized PDF. 269 | */ 270 | public static boolean isLinearizedPdf(FileInputStream in) throws IOException { 271 | boolean isLinear = false; 272 | 273 | PDFParser parser = new PDFParser(in); 274 | parser.parse(); 275 | COSDocument doc = parser.getDocument(); 276 | 277 | for (Object o : doc.getObjects()) { 278 | COSObject obj = (COSObject) o; 279 | if (obj.getObject() instanceof COSDictionary) { 280 | COSDictionary dict = (COSDictionary) obj.getObject(); 281 | for (Object key : dict.keyList()) { 282 | COSName name = (COSName) key; 283 | if ("Linearized".equals(name.getName())) { 284 | isLinear = true; 285 | break; 286 | } 287 | } 288 | 289 | if (isLinear) break; 290 | } 291 | } 292 | 293 | doc.close(); 294 | 295 | return isLinear; 296 | } 297 | 298 | private byte[] getXmpForDoi(String doi, boolean genCr, String agent) { 299 | MarkBuilder builder = new MarkBuilder(genCr, agent) { 300 | @Override 301 | public void onFailure(String doi, int code, String msg) { 302 | if (code == MetadataGrabber.CRUMMY_XML_CODE) { 303 | exitWithError(2, "Failed to parse metadata XML because of:\n" 304 | + code + ": " + msg); 305 | } else { 306 | System.err.println(); 307 | exitWithError(2, "Failed to retreive metadata because of:\n" 308 | + code + ": " + msg); 309 | } 310 | } 311 | }; 312 | grabber.grabOne(doi, builder); 313 | System.out.println("Grabbing metadata for '" + doi + "'..."); 314 | grabber.waitForEmpty(); 315 | 316 | return builder.getXmpData(); 317 | } 318 | 319 | private static String getOutFileName(String pdfFileName) { 320 | if (pdfFileName.endsWith(".pdf")) { 321 | return pdfFileName.substring(0, pdfFileName.length() - 4) 322 | + "_xmp.pdf"; 323 | } else { 324 | return pdfFileName + "_xmp.pdf"; 325 | } 326 | } 327 | 328 | private void exitWithError(int code, String error) { 329 | shutDown(); 330 | System.err.println(); 331 | System.err.println(error); 332 | System.exit(code); 333 | } 334 | } 335 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/MarkBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 CrossRef.org (email: support@crossref.org) 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation; either version 2 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | package org.crossref.pdfmark; 19 | 20 | import java.io.ByteArrayOutputStream; 21 | import java.io.IOException; 22 | import java.net.URI; 23 | import java.net.URISyntaxException; 24 | 25 | import javax.xml.xpath.XPathExpressionException; 26 | 27 | import org.crossref.pdfmark.prism.Prism21Schema; 28 | import org.crossref.pdfmark.pub.Publisher; 29 | import org.crossref.pdfmark.unixref.Unixref; 30 | import org.crossref.pdfmark.unixref.Work; 31 | 32 | import com.itextpdf.text.xml.xmp.DublinCoreSchema; 33 | import com.itextpdf.text.xml.xmp.XmpWriter; 34 | 35 | public abstract class MarkBuilder implements MetadataGrabber.Handler { 36 | 37 | private static URI DOI_RESOLVER; 38 | static { 39 | try { 40 | DOI_RESOLVER = new URI("http://dx.doi.org/"); 41 | } catch (URISyntaxException e) { 42 | /* Not possible. */ 43 | } 44 | } 45 | 46 | private byte[] xmpData; 47 | 48 | private Unixref unixref; 49 | 50 | private Publisher publisher; 51 | 52 | private boolean generateCopyright; 53 | 54 | private String rightsAgent; 55 | 56 | public MarkBuilder(boolean generateCopyright, String rightsAgent) { 57 | this.generateCopyright = generateCopyright; 58 | this.rightsAgent = rightsAgent; 59 | } 60 | 61 | @Override 62 | public void onMetadata(String requestedDoi, Unixref unixref) { 63 | this.unixref = unixref; 64 | } 65 | 66 | @Override 67 | public void onPublisher(String requestedDoi, Publisher pub) { 68 | this.publisher = pub; 69 | } 70 | 71 | @Override 72 | public void onComplete(String requestedDoi) { 73 | ByteArrayOutputStream bout = new ByteArrayOutputStream(); 74 | SchemaSet schemaSet = new SchemaSet(); 75 | 76 | try { 77 | Work work = null; 78 | 79 | switch (unixref.getType()) { 80 | case JOURNAL: 81 | work = unixref.getJournal(); 82 | break; 83 | case BOOK: 84 | work = unixref.getBook(); 85 | break; 86 | default: 87 | break; 88 | } 89 | 90 | if (work != null) { 91 | XmpWriter writer = new XmpWriter(bout); 92 | 93 | work.writeXmp(schemaSet); 94 | 95 | if (publisher != null) { 96 | if (generateCopyright) { 97 | String cp = getCopyright(work); 98 | Work.addToSchema(schemaSet.getDc(), DublinCoreSchema.RIGHTS, cp); 99 | Work.addToSchema(schemaSet.getPrism(), Prism21Schema.COPYRIGHT, cp); 100 | } 101 | Work.addToSchema(schemaSet.getDc(), DublinCoreSchema.PUBLISHER, 102 | publisher.getName()); 103 | } 104 | 105 | Work.addToSchema(schemaSet.getPrism(), Prism21Schema.RIGHTS_AGENT, 106 | rightsAgent); 107 | 108 | writer.addRdfDescription(schemaSet.getDc()); 109 | writer.addRdfDescription(schemaSet.getPrism()); 110 | writer.close(); 111 | } 112 | 113 | xmpData = bout.toByteArray(); 114 | } catch (IOException e) { 115 | onFailure(requestedDoi, MetadataGrabber.CLIENT_EXCEPTION_CODE, 116 | e.toString()); 117 | } catch (XPathExpressionException e) { 118 | onFailure(requestedDoi, MetadataGrabber.CLIENT_EXCEPTION_CODE, 119 | e.toString()); 120 | } 121 | } 122 | 123 | private String getCopyright(Work work) throws XPathExpressionException { 124 | return "(C) " + work.getYear() + " " + publisher.getName(); 125 | } 126 | 127 | public static String getUrlForDoi(String doi) { 128 | return DOI_RESOLVER.resolve(doi).toString(); 129 | } 130 | 131 | public byte[] getXmpData() { 132 | return xmpData; 133 | } 134 | 135 | } 136 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/MetadataGrabber.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 CrossRef.org (email: support@crossref.org) 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation; either version 2 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | package org.crossref.pdfmark; 19 | 20 | import java.io.IOException; 21 | import java.util.LinkedList; 22 | import java.util.Queue; 23 | 24 | import javax.xml.parsers.DocumentBuilder; 25 | import javax.xml.parsers.DocumentBuilderFactory; 26 | import javax.xml.parsers.ParserConfigurationException; 27 | import javax.xml.xpath.XPathExpressionException; 28 | 29 | import org.apache.http.HttpEntity; 30 | import org.apache.http.HttpResponse; 31 | import org.apache.http.StatusLine; 32 | import org.apache.http.params.HttpParams; 33 | import org.apache.http.params.BasicHttpParams; 34 | import org.apache.http.params.HttpConnectionParams; 35 | import org.apache.http.client.ClientProtocolException; 36 | import org.apache.http.client.HttpClient; 37 | import org.apache.http.client.methods.HttpGet; 38 | import org.apache.http.client.methods.HttpUriRequest; 39 | import org.apache.http.impl.client.DefaultHttpClient; 40 | import org.crossref.pdfmark.pub.Publisher; 41 | import org.crossref.pdfmark.unixref.Unixref; 42 | import org.w3c.dom.Document; 43 | import org.xml.sax.SAXException; 44 | 45 | public class MetadataGrabber { 46 | 47 | public static final int CLIENT_EXCEPTION_CODE = -1; 48 | public static final int CRUMMY_XML_CODE = -2; 49 | public static final int BAD_XPATH_CODE = -3; 50 | 51 | private static final String DOI_QUERY = 52 | "http://www.crossref.org/openurl/" + 53 | "?id=doi:{0}&noredirect=true" + 54 | "&pid={1}" + 55 | "&format=unixref"; 56 | 57 | private static final String PUBLISHER_QUERY = 58 | "http://www.crossref.org/" + 59 | "getPrefixPublisher/" + 60 | "?prefix={0}"; 61 | 62 | private static final String QUERY_TOKEN = "{0}"; 63 | 64 | private static final String KEY_TOKEN = "{1}"; 65 | 66 | private HttpClient client; 67 | 68 | private Queue requests; 69 | 70 | private DocumentBuilder builder; 71 | 72 | private boolean terminated; 73 | 74 | private Object monitor = new Object(); 75 | 76 | private String apiKey; 77 | 78 | private enum RequestType { 79 | DOI, 80 | PUBLISHER, 81 | } 82 | 83 | private class RequestInfo { 84 | private String doi; 85 | private HttpUriRequest request; 86 | private Handler handler; 87 | private RequestType requestType; 88 | 89 | private RequestInfo(RequestType rt) { 90 | requestType = rt; 91 | } 92 | 93 | private RequestInfo withRequest(String location, String replacement) { 94 | String detokRequest = location.replace(QUERY_TOKEN, replacement); 95 | request = new HttpGet(detokRequest); 96 | return this; 97 | } 98 | 99 | private RequestInfo withRequest(String location, String replacement, 100 | String key) { 101 | String detokRequest = location.replace(QUERY_TOKEN, replacement); 102 | detokRequest = detokRequest.replace(KEY_TOKEN, key); 103 | request = new HttpGet(detokRequest); 104 | return this; 105 | } 106 | 107 | private RequestInfo withDoi(String doi) { 108 | this.doi = doi; 109 | return this; 110 | } 111 | 112 | private RequestInfo withHandler(Handler handler) { 113 | this.handler = handler; 114 | return this; 115 | } 116 | 117 | private void performOn(HttpClient client) { 118 | try { 119 | HttpResponse sponse = client.execute(request); 120 | HttpEntity entity = sponse.getEntity(); 121 | 122 | if (entity != null) { 123 | Document doc = builder.parse(entity.getContent()); 124 | 125 | if (requestType == RequestType.DOI) { 126 | Unixref unixref = new Unixref(doc); 127 | String ownerPrefix = unixref.getOwnerPrefix(); 128 | handler.onMetadata(doi, unixref); 129 | if (!ownerPrefix.isEmpty()) { 130 | queuePubReq(doi, handler, unixref.getOwnerPrefix()); 131 | } else { 132 | handler.onComplete(doi); 133 | } 134 | } else if (requestType == RequestType.PUBLISHER) { 135 | Publisher publisher = new Publisher(doc); 136 | handler.onPublisher(doi, publisher); 137 | handler.onComplete(doi); 138 | } 139 | 140 | } else { 141 | StatusLine sl = sponse.getStatusLine(); 142 | handler.onFailure(doi, 143 | sl.getStatusCode(), 144 | sl.getReasonPhrase()); 145 | } 146 | } catch (ClientProtocolException e) { 147 | handler.onFailure(doi, CLIENT_EXCEPTION_CODE, e.toString()); 148 | } catch (IOException e) { 149 | handler.onFailure(doi, CLIENT_EXCEPTION_CODE, e.toString()); 150 | } catch (SAXException e) { 151 | handler.onFailure(doi, CRUMMY_XML_CODE, e.toString()); 152 | } catch (XPathExpressionException e) { 153 | handler.onFailure(doi, BAD_XPATH_CODE, e.toString()); 154 | } 155 | } 156 | } 157 | 158 | public MetadataGrabber(String apiKey) { 159 | this.apiKey = apiKey; 160 | 161 | try { 162 | DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); 163 | domFactory.setNamespaceAware(true); 164 | builder = domFactory.newDocumentBuilder(); 165 | } catch (ParserConfigurationException e) { 166 | System.err.println("Error: Can't create an XML parser."); 167 | System.err.println(e); 168 | System.exit(2); 169 | } 170 | 171 | HttpParams httpParams = new BasicHttpParams(); 172 | HttpConnectionParams.setConnectionTimeout(httpParams, 10000); 173 | HttpConnectionParams.setSoTimeout(httpParams, 10000); 174 | 175 | client = new DefaultHttpClient(httpParams); 176 | requests = new LinkedList(); 177 | 178 | new Thread(new Runnable() { 179 | @Override 180 | public void run() { 181 | while (!terminated) { 182 | while (!requests.isEmpty()) { 183 | requests.peek().performOn(client); 184 | 185 | synchronized (monitor) { 186 | requests.remove(); 187 | monitor.notifyAll(); 188 | } 189 | } 190 | 191 | synchronized (monitor) { 192 | try { 193 | monitor.wait(); 194 | } catch (InterruptedException e) { 195 | } 196 | } 197 | } 198 | } 199 | }).start(); 200 | } 201 | 202 | public void shutDown() { 203 | terminated = true; 204 | synchronized (monitor) { 205 | monitor.notifyAll(); 206 | } 207 | } 208 | 209 | public void grabOne(String doi, Handler handler) { 210 | requests.add(new RequestInfo(RequestType.DOI) 211 | .withDoi(doi) 212 | .withHandler(handler) 213 | .withRequest(DOI_QUERY, doi, apiKey)); 214 | 215 | synchronized (monitor) { 216 | monitor.notifyAll(); 217 | } 218 | /* Later, when we receive this response, we will queue 219 | * a RequestInfo to get publisher data. */ 220 | } 221 | 222 | private void queuePubReq(String doi, Handler handler, String pubPrefix) { 223 | requests.add(new RequestInfo(RequestType.PUBLISHER) 224 | .withDoi(doi) 225 | .withHandler(handler) 226 | .withRequest(PUBLISHER_QUERY, pubPrefix)); 227 | 228 | synchronized (monitor) { 229 | monitor.notifyAll(); 230 | } 231 | } 232 | 233 | public void waitForEmpty() { 234 | synchronized (monitor) { 235 | while (true) { 236 | if (requests.isEmpty()) { 237 | break; 238 | } 239 | try { 240 | monitor.wait(); 241 | } catch (InterruptedException e) { 242 | } 243 | } 244 | } 245 | } 246 | 247 | public interface Handler { 248 | public void onMetadata(String doi, Unixref metadata); 249 | public void onPublisher(String doi, Publisher publisher); 250 | public void onComplete(String doi); 251 | public void onFailure(String doi, int code, String msg); 252 | } 253 | 254 | } 255 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/PdfxSchema.java: -------------------------------------------------------------------------------- 1 | package org.crossref.pdfmark; 2 | 3 | import com.itextpdf.text.xml.xmp.XmpSchema; 4 | 5 | public class PdfxSchema extends XmpSchema { 6 | 7 | public static final String DEFAULT_XPATH_ID = "pdfx"; 8 | public static final String DEFAULT_XPATH_URI 9 | = "http://ns.adobe.com/pdfx/1.3/"; 10 | 11 | public static final String DOI = "doi"; 12 | 13 | public PdfxSchema() { 14 | super("xmlns:" 15 | + DEFAULT_XPATH_ID 16 | + "=\"" + DEFAULT_XPATH_URI + "\""); 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/SchemaSet.java: -------------------------------------------------------------------------------- 1 | package org.crossref.pdfmark; 2 | 3 | import org.crossref.pdfmark.prism.Prism21Schema; 4 | 5 | import com.itextpdf.text.xml.xmp.DublinCoreSchema; 6 | import com.itextpdf.text.xml.xmp.XmpSchema; 7 | 8 | public class SchemaSet { 9 | 10 | private XmpSchema dc = new DublinCoreSchema(); 11 | private XmpSchema prism = new Prism21Schema(); 12 | private XmpSchema pdfx = new PdfxSchema(); 13 | 14 | public XmpSchema getPrism() { 15 | return prism; 16 | } 17 | 18 | public XmpSchema getDc() { 19 | return dc; 20 | } 21 | 22 | public XmpSchema getPdfx() { 23 | return pdfx; 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/XPathHelpers.java: -------------------------------------------------------------------------------- 1 | package org.crossref.pdfmark; 2 | 3 | import java.util.ArrayList; 4 | 5 | import javax.xml.xpath.XPathConstants; 6 | import javax.xml.xpath.XPathExpression; 7 | import javax.xml.xpath.XPathExpressionException; 8 | 9 | import org.w3c.dom.Node; 10 | 11 | public final class XPathHelpers { 12 | 13 | private XPathHelpers() { 14 | } 15 | 16 | public static String orEmptyStr(XPathExpression xpe, Node n) 17 | throws XPathExpressionException { 18 | Node inner = (Node) xpe.evaluate(n, XPathConstants.NODE); 19 | return inner == null ? "" : inner.getTextContent(); 20 | } 21 | 22 | public static String evalConcat(Node n, String delimiter, XPathExpression... exprs) 23 | throws XPathExpressionException { 24 | ArrayList results = new ArrayList(); 25 | for (XPathExpression expr : exprs) { 26 | results.add(orEmptyStr(expr, n)); 27 | } 28 | 29 | while (results.remove("")); 30 | 31 | String retn = ""; 32 | 33 | for (String s : results) { 34 | retn += s; 35 | if (results.indexOf(s) != results.size() -1 ) { 36 | retn += delimiter; 37 | } 38 | } 39 | 40 | return retn; 41 | } 42 | 43 | public static Node oneOf(Node parent, XPathExpression... exprs) 44 | throws XPathExpressionException { 45 | for (XPathExpression expr : exprs) { 46 | Node child = (Node) expr.evaluate(parent, XPathConstants.NODE); 47 | if (child != null) { 48 | return child; 49 | } 50 | } 51 | return null; 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/XmlUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 CrossRef.org (email: support@crossref.org) 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation; either version 2 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | package org.crossref.pdfmark; 19 | 20 | import java.util.UUID; 21 | 22 | import javax.xml.XMLConstants; 23 | 24 | import org.w3c.dom.Attr; 25 | import org.w3c.dom.Element; 26 | import org.w3c.dom.NamedNodeMap; 27 | 28 | public final class XmlUtils { 29 | 30 | private XmlUtils() { 31 | } 32 | 33 | /** 34 | * @return A String[] of length two, [prefix, URI]. 35 | */ 36 | public static String[] getNamespaceDeclaration(Element ele) { 37 | String prefixHint = null; 38 | String[] parts = ele.getNodeName().split(":"); 39 | if (parts.length == 2) { 40 | prefixHint = parts[0]; 41 | } 42 | 43 | return getNamespaceDeclaration(ele, prefixHint); 44 | } 45 | 46 | /** 47 | * @return A String[] of length two, [prefix, URI]. 48 | */ 49 | public static String[] getNamespaceDeclaration(Element ele, String prefixHint) { 50 | String[] ns = new String[2]; // prefix, URI 51 | NamedNodeMap attribs = ele.getAttributes(); 52 | 53 | for (int i=0; i schemata = new HashMap(); 81 | 82 | for (int i=0; i schemata, Element ele) { 98 | String propertyName = ele.getNodeName(); 99 | String[] ns = XmlUtils.getNamespaceDeclaration(ele); 100 | XmpSchema schema = null; 101 | 102 | if (schemata.containsKey(ns[1])) { 103 | schema = schemata.get(ns[1]); 104 | } else { 105 | schema = new AnyXmpSchema(ns[0], ns[1]); 106 | schemata.put(ns[1], schema); 107 | } 108 | 109 | /* Should have either Text or a single . */ 110 | boolean hasElementChildren = false; 111 | for (int i=0; i toInfo(byte[] xmp) throws XmpException { 222 | try { 223 | DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 224 | factory.setNamespaceAware(true); 225 | 226 | Map info = new HashMap(); 227 | 228 | XmpSchema[] schemata = XmpUtils.parseSchemata(xmp); 229 | for (XmpSchema schema : schemata) { 230 | if (schema.getXmlns().contains("pdfx")) { 231 | for (Entry entry : schema.entrySet()) { 232 | Object value = entry.getValue(); 233 | 234 | String key = (String) entry.getKey(); 235 | String[] parts = key.split(":"); 236 | String infoKey = parts.length == 2 ? parts[1] : parts[0]; 237 | 238 | String val = (String) entry.getValue(); 239 | 240 | if (val.toLowerCase().contains("") 241 | || val.toLowerCase().contains("")) { 242 | val = "" + val + ""; 243 | DocumentBuilder builder = factory.newDocumentBuilder(); 244 | Document doc = builder.parse(new ByteArrayInputStream(val.getBytes())); 245 | 246 | NodeList nodes = doc.getElementsByTagName("rdf:li"); 247 | for (int i=0; i element. 89 | */ 90 | public String getPreferredIssn() throws XPathExpressionException { 91 | if (preferredIssn == null) { 92 | // Assumption: All ISSNs for a journal are provided in unixref. 93 | Node n = (Node) PRINT_ISSN_EXPR.evaluate(journalNode, 94 | XPathConstants.NODE); 95 | 96 | if (n == null) { 97 | n = (Node) ANY_ISSN_EXPR.evaluate(journalNode, 98 | XPathConstants.NODE); 99 | } 100 | 101 | if (n != null) { 102 | preferredIssn = n.getTextContent(); 103 | } else { 104 | preferredIssn = ""; 105 | } 106 | } 107 | return preferredIssn; 108 | } 109 | 110 | public String getElectronicIssn() throws XPathExpressionException { 111 | if (electronicIssn == null) { 112 | electronicIssn = XPathHelpers.orEmptyStr(ELECTRONIC_ISSN_EXPR, 113 | journalNode); 114 | } 115 | return electronicIssn; 116 | } 117 | 118 | public String getDoi() throws XPathExpressionException { 119 | if (doi == null) { 120 | doi = XPathHelpers.orEmptyStr(DOI_EXPR, journalNode); 121 | } 122 | return doi; 123 | } 124 | 125 | public String getVolume() throws XPathExpressionException { 126 | if (volume == null) { 127 | volume = XPathHelpers.orEmptyStr(VOLUME_EXPR, journalNode); 128 | } 129 | return volume; 130 | } 131 | 132 | public String getIssue() throws XPathExpressionException { 133 | if (issue == null) { 134 | issue = XPathHelpers.orEmptyStr(ISSUE_EXPR, journalNode); 135 | } 136 | return issue; 137 | } 138 | 139 | public String getFullTitle() throws XPathExpressionException { 140 | if (title == null) { 141 | title = XPathHelpers.orEmptyStr(TITLE_EXPR, journalNode); 142 | } 143 | return title; 144 | } 145 | 146 | public String getYear() throws XPathExpressionException { 147 | return getArticle().getYear(); 148 | } 149 | 150 | public void writeXmp(SchemaSet schemaSet) throws XPathExpressionException { 151 | JournalArticle article = getArticle(); 152 | XmpSchema dc = schemaSet.getDc(); 153 | XmpSchema prism = schemaSet.getPrism(); 154 | XmpSchema pdfx = schemaSet.getPdfx(); 155 | 156 | addToSchema(dc, DublinCoreSchema.CREATOR, article.getContributors()); 157 | addToSchema(dc, DublinCoreSchema.TITLE, article.getTitles()); 158 | addToSchema(dc, DublinCoreSchema.DATE, article.getDate()); 159 | addToSchema(dc, DublinCoreSchema.IDENTIFIER, article.getDoi()); 160 | 161 | addToSchema(prism, Prism21Schema.PUBLICATION_DATE, article.getDate()); 162 | addToSchema(prism, Prism21Schema.DOI, article.getDoi()); 163 | addToSchema(prism, Prism21Schema.ISSN, getPreferredIssn()); 164 | addToSchema(prism, Prism21Schema.E_ISSN, getElectronicIssn()); 165 | addToSchema(prism, Prism21Schema.ISSUE_IDENTIFIER, getDoi()); 166 | addToSchema(prism, Prism21Schema.PUBLICATION_NAME, getFullTitle()); 167 | addToSchema(prism, Prism21Schema.VOLUME, getVolume()); 168 | addToSchema(prism, Prism21Schema.NUMBER, getIssue()); 169 | addToSchema(prism, Prism21Schema.STARTING_PAGE, article.getFirstPage()); 170 | addToSchema(prism, Prism21Schema.ENDING_PAGE, article.getLastPage()); 171 | addToSchema(prism, Prism21Schema.URL, MarkBuilder.getUrlForDoi(article.getDoi())); 172 | 173 | addToSchema(pdfx, PdfxSchema.DOI, article.getDoi()); 174 | } 175 | 176 | } 177 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/unixref/JournalArticle.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 CrossRef.org (email: support@crossref.org) 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation; either version 2 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | package org.crossref.pdfmark.unixref; 19 | 20 | import javax.xml.xpath.XPath; 21 | import javax.xml.xpath.XPathConstants; 22 | import javax.xml.xpath.XPathExpression; 23 | import javax.xml.xpath.XPathExpressionException; 24 | 25 | import org.crossref.pdfmark.XPathHelpers; 26 | import org.w3c.dom.Document; 27 | import org.w3c.dom.Node; 28 | import org.w3c.dom.NodeList; 29 | 30 | public class JournalArticle { 31 | 32 | private static XPathExpression DOI_EXPR; 33 | private static XPathExpression FIRST_PAGE_EXPR; 34 | private static XPathExpression LAST_PAGE_EXPR; 35 | 36 | private Node articleNode; 37 | 38 | private String[] titles, contributors; 39 | 40 | private String publishedDate, doi, firstPage, lastPage, year; 41 | 42 | public JournalArticle(Document doc, Node newArticleNode) 43 | throws XPathExpressionException { 44 | articleNode = newArticleNode; 45 | 46 | XPath xpath = Unixref.getXPath(doc); 47 | 48 | DOI_EXPR = xpath.compile("doi_data/doi"); 49 | FIRST_PAGE_EXPR = xpath.compile("pages/first_page"); 50 | LAST_PAGE_EXPR = xpath.compile("pages/last_page"); 51 | } 52 | 53 | public String[] getTitles() throws XPathExpressionException { 54 | if (titles == null) { 55 | titles = Unixref.getTitles(articleNode); 56 | } 57 | return titles; 58 | } 59 | 60 | public String[] getContributors() throws XPathExpressionException { 61 | if (contributors == null) { 62 | contributors = Unixref.getContributors(articleNode); 63 | } 64 | return contributors; 65 | } 66 | 67 | public String getDate() throws XPathExpressionException { 68 | if (publishedDate == null) { 69 | publishedDate = Unixref.getPublicationDate(articleNode); 70 | } 71 | return publishedDate; 72 | } 73 | 74 | public String getFirstPage() throws XPathExpressionException { 75 | if (firstPage == null) { 76 | firstPage = XPathHelpers.orEmptyStr(FIRST_PAGE_EXPR, articleNode); 77 | } 78 | return firstPage; 79 | } 80 | 81 | public String getLastPage() throws XPathExpressionException { 82 | if (lastPage == null) { 83 | lastPage = XPathHelpers.orEmptyStr(LAST_PAGE_EXPR, articleNode); 84 | } 85 | return lastPage; 86 | } 87 | 88 | public String getDoi() throws XPathExpressionException { 89 | if (doi == null) { 90 | doi = XPathHelpers.orEmptyStr(DOI_EXPR, articleNode); 91 | } 92 | return doi; 93 | } 94 | 95 | public String getYear() throws XPathExpressionException { 96 | if (year == null) { 97 | year = Unixref.getPublicationYear(articleNode); 98 | } 99 | return year; 100 | } 101 | 102 | } 103 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/unixref/ReportPaper.java: -------------------------------------------------------------------------------- 1 | package org.crossref.pdfmark.unixref; 2 | 3 | public class ReportPaper { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/unixref/Standard.java: -------------------------------------------------------------------------------- 1 | package org.crossref.pdfmark.unixref; 2 | 3 | import javax.xml.xpath.XPath; 4 | import javax.xml.xpath.XPathExpression; 5 | import javax.xml.xpath.XPathExpressionException; 6 | 7 | import org.crossref.pdfmark.SchemaSet; 8 | import org.crossref.pdfmark.XPathHelpers; 9 | import org.w3c.dom.Document; 10 | import org.w3c.dom.Node; 11 | 12 | public class Standard extends Work { 13 | 14 | private static XPathExpression REPORT_META_EXPR; 15 | private static XPathExpression CONFPROC_META_EXPR; 16 | private static XPathExpression DISSERTATION_META_EXPR; 17 | 18 | private static XPathExpression ISSUE_NUMBER_EXPR; 19 | private static XPathExpression EDITION_NUMBER_EXPR; 20 | private static XPathExpression ISSN_EXPR; 21 | private static XPathExpression ISBN_EXPR; 22 | private static XPathExpression SINGLE_CONTRIBUTOR_EXPR; 23 | private static XPathExpression DOI_EXPR; 24 | private static XPathExpression INSTITUTION_NAME_EXPR; 25 | private static XPathExpression INSTITUTION_LOC_EXPR; 26 | private static XPathExpression NUMBER_EXPR; 27 | 28 | private Node workNode; 29 | private Node mdNode; 30 | 31 | private String[] titles, contributors; 32 | 33 | private String publicationDate, issueNumber, editionNumber, issn, isbn, 34 | doi, singleContributor, institutionName, institutionLocation, 35 | number; 36 | 37 | public Standard(Document doc, Node workNode) throws XPathExpressionException { 38 | this.workNode = workNode; 39 | 40 | XPath xpath = Unixref.getXPath(doc); 41 | 42 | REPORT_META_EXPR = xpath.compile("report"); 43 | 44 | mdNode = XPathHelpers.oneOf(workNode, REPORT_META_EXPR, 45 | CONFPROC_META_EXPR, DISSERTATION_META_EXPR); 46 | } 47 | 48 | public String[] getTitles() throws XPathExpressionException { 49 | if (titles == null) { 50 | titles = Unixref.getTitles(mdNode); 51 | } 52 | return titles; 53 | } 54 | 55 | public String[] getContributors() throws XPathExpressionException { 56 | if (contributors == null) { 57 | contributors = Unixref.getContributors(mdNode); 58 | } 59 | return contributors; 60 | } 61 | 62 | public String getPublicationDate() throws XPathExpressionException { 63 | if (publicationDate == null) { 64 | publicationDate = Unixref.getPublicationDate(mdNode); 65 | } 66 | return publicationDate; 67 | } 68 | 69 | @Override 70 | public String getYear() throws XPathExpressionException { 71 | // TODO Auto-generated method stub 72 | return null; 73 | } 74 | 75 | @Override 76 | public void writeXmp(SchemaSet dcPrism) throws XPathExpressionException { 77 | // TODO Auto-generated method stub 78 | 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /src/org/crossref/pdfmark/unixref/Unixref.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 CrossRef.org (email: support@crossref.org) 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation; either version 2 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | package org.crossref.pdfmark.unixref; 19 | 20 | import javax.xml.xpath.XPath; 21 | import javax.xml.xpath.XPathConstants; 22 | import javax.xml.xpath.XPathExpression; 23 | import javax.xml.xpath.XPathExpressionException; 24 | import javax.xml.xpath.XPathFactory; 25 | 26 | import org.crossref.pdfmark.XPathHelpers; 27 | import org.w3c.dom.Document; 28 | import org.w3c.dom.Node; 29 | import org.w3c.dom.NodeList; 30 | 31 | public class Unixref { 32 | 33 | public enum Type { 34 | JOURNAL, 35 | BOOK, 36 | DISSERTATION, 37 | CONFERENCE, 38 | REPORT_PAPER, 39 | STANDARD, 40 | OTHER, 41 | } 42 | 43 | /* Work type */ 44 | private static XPathExpression JOURNAL_EXPR; 45 | private static XPathExpression BOOK_EXPR; 46 | private static XPathExpression DISSERTATION_EXPR; 47 | private static XPathExpression CONFERENCE_EXPR; 48 | private static XPathExpression REPORT_PAPER_EXPR; 49 | private static XPathExpression STANDARD_EXPR; 50 | private static XPathExpression OWNER_PREFIX_EXPR; 51 | 52 | /* Publication date */ 53 | private static XPathExpression DATE_EXPR; 54 | private static XPathExpression YEAR_EXPR; 55 | private static XPathExpression MONTH_EXPR; 56 | private static XPathExpression DAY_EXPR; 57 | 58 | /* Titles and authors */ 59 | private static XPathExpression TITLES_EXPR; 60 | private static XPathExpression AUTHORS_EXPR; 61 | private static XPathExpression GIVEN_NAME_EXPR; 62 | private static XPathExpression SURNAME_EXPR; 63 | 64 | private XPath xpath; 65 | 66 | private Document doc; 67 | 68 | private String ownerPrefix; 69 | 70 | public static XPath getXPath(Document doc) { 71 | XPathFactory factory = XPathFactory.newInstance(); 72 | XPath xpath = factory.newXPath(); 73 | return xpath; 74 | } 75 | 76 | public Unixref(Document doc) throws XPathExpressionException { 77 | this.doc = doc; 78 | this.xpath = getXPath(doc); 79 | 80 | JOURNAL_EXPR = xpath.compile("//journal"); 81 | BOOK_EXPR = xpath.compile("//book"); 82 | DISSERTATION_EXPR = xpath.compile("//dissertation"); 83 | CONFERENCE_EXPR = xpath.compile("//conference"); 84 | REPORT_PAPER_EXPR = xpath.compile("//report-paper"); 85 | STANDARD_EXPR = xpath.compile("//standard"); 86 | OWNER_PREFIX_EXPR = xpath.compile("//doi_record/@owner"); 87 | 88 | DATE_EXPR = xpath.compile("publication_date"); 89 | DAY_EXPR = xpath.compile("day"); 90 | MONTH_EXPR = xpath.compile("month"); 91 | YEAR_EXPR = xpath.compile("year"); 92 | 93 | TITLES_EXPR = xpath.compile("titles/title"); 94 | AUTHORS_EXPR = xpath.compile("contributors/person_name" 95 | + "[@contributor_role='author']"); 96 | GIVEN_NAME_EXPR = xpath.compile("given_name"); 97 | SURNAME_EXPR = xpath.compile("surname"); 98 | } 99 | 100 | public String getOwnerPrefix() throws XPathExpressionException { 101 | if (ownerPrefix == null) { 102 | ownerPrefix = XPathHelpers.orEmptyStr(OWNER_PREFIX_EXPR, doc); 103 | } 104 | return ownerPrefix; 105 | } 106 | 107 | public Type getType() throws XPathExpressionException { 108 | try { 109 | if (JOURNAL_EXPR.evaluate(doc, XPathConstants.BOOLEAN) 110 | .equals(Boolean.TRUE)) { 111 | return Type.JOURNAL; 112 | } else if ((Boolean) BOOK_EXPR.evaluate(doc, XPathConstants.BOOLEAN) 113 | .equals(Boolean.TRUE)) { 114 | return Type.BOOK; 115 | } else if ((Boolean) DISSERTATION_EXPR.evaluate(doc, XPathConstants.BOOLEAN) 116 | .equals(Boolean.TRUE)) { 117 | return Type.DISSERTATION; 118 | } else if ((Boolean) CONFERENCE_EXPR.evaluate(doc, XPathConstants.BOOLEAN) 119 | .equals(Boolean.TRUE)) { 120 | return Type.CONFERENCE; 121 | } else if ((Boolean) REPORT_PAPER_EXPR.evaluate(doc, XPathConstants.BOOLEAN) 122 | .equals(Boolean.TRUE)) { 123 | return Type.REPORT_PAPER; 124 | } else if ((Boolean) STANDARD_EXPR.evaluate(doc, XPathConstants.BOOLEAN) 125 | .equals(Boolean.TRUE)) { 126 | return Type.STANDARD; 127 | } 128 | } catch (XPathExpressionException e) { 129 | /* Do nothing. */ 130 | } 131 | return Type.OTHER; 132 | } 133 | 134 | public Journal getJournal() throws XPathExpressionException { 135 | return new Journal(doc, (Node) JOURNAL_EXPR.evaluate(doc, XPathConstants.NODE)); 136 | } 137 | 138 | public Book getBook() throws XPathExpressionException { 139 | return new Book(doc, (Node) BOOK_EXPR.evaluate(doc, XPathConstants.NODE)); 140 | } 141 | 142 | static String getPublicationDate(Node work) throws XPathExpressionException { 143 | String date = ""; 144 | Node pubDate = (Node) DATE_EXPR.evaluate(work, XPathConstants.NODE); 145 | 146 | if (pubDate != null) { 147 | date = XPathHelpers.evalConcat(pubDate, "-", YEAR_EXPR, MONTH_EXPR, 148 | DAY_EXPR); 149 | } 150 | 151 | return date; 152 | } 153 | 154 | static String getPublicationYear(Node work) throws XPathExpressionException { 155 | String year = ""; 156 | Node pubDate = (Node) DATE_EXPR.evaluate(work, XPathConstants.NODE); 157 | 158 | if (pubDate != null) { 159 | year = XPathHelpers.orEmptyStr(YEAR_EXPR, pubDate); 160 | } 161 | 162 | return year; 163 | } 164 | 165 | static String[] getContributors(Node work) throws XPathExpressionException { 166 | NodeList s = (NodeList) AUTHORS_EXPR.evaluate(work, XPathConstants.NODESET); 167 | 168 | String[] names = new String[s.getLength()]; 169 | 170 | for (int i=0; i 1) { 26 | XmpArray bag = new XmpArray(XmpArray.ORDERED); 27 | for (String val : vals) { 28 | bag.add(val); 29 | } 30 | schema.setProperty(key, bag); 31 | } 32 | } 33 | 34 | public abstract void writeXmp(SchemaSet schemaSet) throws XPathExpressionException; 35 | 36 | public abstract String getYear() throws XPathExpressionException; 37 | 38 | } 39 | -------------------------------------------------------------------------------- /test-data/nature-metadata-example.txt: -------------------------------------------------------------------------------- 1 | me="prism.copyright" content="© 2009 Nature Publishing Group" /> 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | link title="schema(DC)" rel="schema.dc" href="http://purl.org/dc/elements/1.1/" /> 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /test-data/pdfx-xmp-example.xmp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | application/pdf 7 | 10.1016/j.aca.2010.07.015 8 | 9 | 10 | Preparation of a reference mussel tissue material for polycyclic aromatic hydrocarbons and trace metals determination 11 | 12 | 13 | 14 | 15 | Patricia Navarro 16 | Luis Bartolomé 17 | Juan Carlos Raposo 18 | Olatz Zuloaga 19 | Gorka Arana 20 | Nestor Etxebarria 21 | 22 | 23 | 24 | 25 | Analytica Chimica Acta, 675 (2010) 91-96. 10.1016/j.aca.2010.07.015 26 | 27 | 28 | 29 | 30 | Elsevier B.V. 31 | 32 | 33 | 34 | 35 | Quality control 36 | Laboratory reference material 37 | Mussel tissue 38 | Polycyclic aromatic hydrocarbons 39 | Trace metals 40 | 41 | 42 | 43 | 45 | journal 46 | Analytica Chimica Acta 47 | © 2010 Elsevier B.V. All rights reserved. 48 | 0003-2670 49 | 675 50 | 1 51 | 18 August 2010 52 | 2010-08-18 53 | 91-96 54 | 91 55 | 96 56 | 10.1016/j.aca.2010.07.015 57 | http://dx.doi.org/10.1016/j.aca.2010.07.015 58 | 59 | 61 | Elsevier 62 | 2010-08-24T09:06:08+02:00 63 | 2010-08-01T04:36:20Z 64 | 2010-08-24T09:06:08+02:00 65 | 66 | 68 | True 69 | 70 | 72 | Acrobat Distiller 8.1.0 (Windows) 73 | Unknown 74 | Quality control; Laboratory reference material; Mussel tissue; Polycyclic aromatic hydrocarbons; Trace metals 75 | 76 | 78 | uuid:16b5ae77-6164-45a5-a950-9a1d91d80a2f 79 | uuid:3607da1d-24f0-4999-8469-f7b87b2b6366 80 | 81 | 83 | noindex 84 | 2010-04-23 85 | true 86 | 6.1 87 | 88 | 89 | sciencedirect.com 90 | elsevier.com 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /test-data/publisher-example.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Emerald 4 | 5 | 60-62 Toller Lane 6 | 7 | Bradford 8 | 9 | W Yorkshire, 10 | BD8 9BY 11 | UK 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /test-data/random-xmp-example.xmp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | XMP metadata Exif IPTC PSIR file I/O 6 | Copyright 4008, Adobe Systems Incorporated, all rights reserved. 7 | True 8 | Acrobat Distiller 8.1.0 (Windows) 9 | 10 | 12 | 2008-09-16T08:43:43-07:00 13 | FrameMaker 7.2 14 | 2008-09-16T08:19:40Z 15 | 2008-09-16T08:43:43-07:00 16 | 17 | 19 | application/pdf 20 | 21 | 22 | XMP Specification Part 3: Storage in Files 23 | 24 | 25 | 26 | 27 | Adobe Developer Technologies 28 | 29 | 30 | 31 | 32 | Storage and handling of XMP in files, and legacy metadata in still image file formats. 33 | 34 | 35 | 36 | 38 | Copyright 2008, Adobe Systems Incorporated, all rights reserved. 39 | True 40 | 41 | 43 | uuid:a2a0d182-7b1c-4801-a22c-d610115116bd 44 | uuid:1a365cee-e070-4b52-8278-db5e46b20a4c 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /test-data/test-pdf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrossRef/pdfmark/8a5dd0596467741ac446f73eb93be74321ad4e5b/test-data/test-pdf.pdf -------------------------------------------------------------------------------- /test-data/unixref-example.json: -------------------------------------------------------------------------------- 1 | { 2 | "xsi:schemaLocation": "http:\/\/www.crossref.org\/xschema\/1.1 http:\/\/www.crossref.org\/schema\/unixref1.1.xsd http:\/\/www.crossref.org\/xschema\/1.0 http:\/\/www.crossref.org\/schema\/unixref1.0.xsd", 3 | "doi_record": { 4 | "timestamp": "2008-10-09 12:12:14.0", 5 | "crossref": { 6 | "journal": { 7 | "journal_article": { 8 | "doi_data": { 9 | "resource": "http:\/\/hdl.handle.net\/2027\/spo.3336451.0009.101", 10 | "doi": "10.3998\/3336451.0009.101" 11 | }, 12 | "publication_date": { 13 | "media_type": "print", 14 | "month": "01", 15 | "day": "31", 16 | "year": "2006" 17 | }, 18 | "contributors": [ 19 | { 20 | "sequence": "first", 21 | "contributor_role": "author", 22 | "given_name": "Geoffrey", 23 | "surname": "Bilder" 24 | } 25 | ], 26 | "publication_type": "full_text", 27 | "titles": { 28 | "title": "In Google We Trust?" 29 | } 30 | }, 31 | "journal_metadata": { 32 | "doi_data": { 33 | "resource": "http:\/\/www.journalofelectronicpublishing.org\/", 34 | "doi": "10.3998\/jep" 35 | }, 36 | "abbrev_title": "J Electron Publ", 37 | "issn": { 38 | "media_type": "electronic", 39 | "content": "10802711" 40 | }, 41 | "full_title": "Journal of Electronic Publishing" 42 | }, 43 | "journal_issue": { 44 | "publication_date": { 45 | "media_type": "print", 46 | "month": "01", 47 | "day": "31", 48 | "year": "2006" 49 | }, 50 | "journal_volume": { 51 | "volume": "9" 52 | }, 53 | "issue": "1" 54 | } 55 | } 56 | }, 57 | "owner": "10.3998", 58 | "xmlns": "http:\/\/www.crossref.org\/xschema\/1.0" 59 | }, 60 | "xmlns:xsi": "http:\/\/www.w3.org\/2001\/XMLSchema-instance", 61 | "xmlns": "http:\/\/www.crossref.org\/xschema\/1.1" 62 | } -------------------------------------------------------------------------------- /test-data/unixref-example.xml: -------------------------------------------------------------------------------- 1 | 2 | Journal of Electronic PublishingJ Electron Publ1080271110.3998/jephttp://www.journalofelectronicpublishing.org/0131200691In Google We Trust?GeoffreyBilder0131200610.3998/3336451.0009.101http://hdl.handle.net/2027/spo.3336451.0009.101 4 | --------------------------------------------------------------------------------