├── .gitignore ├── .gitmodules ├── .travis.yml ├── LICENSE ├── README.md ├── anhalytics-annotate └── src │ └── main │ ├── java │ └── fr │ │ └── inria │ │ └── anhalytics │ │ └── annotate │ │ ├── Annotator.java │ │ ├── AnnotatorWorker.java │ │ ├── KeyTermAnnotatorWorker.java │ │ ├── NerdAnnotatorWorker.java │ │ ├── PDFQuantitiesAnnotatorWorker.java │ │ ├── QuantitiesAnnotatorWorker.java │ │ ├── exceptions │ │ ├── AnnotatorNotAvailableException.java │ │ ├── ClientException.java │ │ └── UnreachableAnnotateServiceException.java │ │ ├── main │ │ └── Main.java │ │ └── services │ │ ├── AnnotateService.java │ │ ├── KeyTermExtractionService.java │ │ ├── PDFQuantitiesService.java │ │ └── QuantitiesService.java │ └── resources │ └── log4j2.xml ├── anhalytics-commons └── src │ ├── main │ └── java │ │ └── fr │ │ └── inria │ │ └── anhalytics │ │ └── commons │ │ ├── dao │ │ ├── AbstractDAOFactory.java │ │ ├── AddressDAO.java │ │ ├── Conference_EventDAO.java │ │ ├── DAO.java │ │ ├── DatabaseConnection.java │ │ ├── DocumentDAO.java │ │ ├── Document_OrganisationDAO.java │ │ ├── In_SerialDAO.java │ │ ├── MongoDAOFactory.java │ │ ├── MonographDAO.java │ │ ├── PersonDAO.java │ │ ├── PublicationDAO.java │ │ ├── PublisherDAO.java │ │ ├── anhalytics │ │ │ ├── AffiliationDAO.java │ │ │ ├── DAOFactory.java │ │ │ ├── LocationDAO.java │ │ │ └── OrganisationDAO.java │ │ └── biblio │ │ │ ├── AbstractBiblioDAOFactory.java │ │ │ └── BiblioDAOFactory.java │ │ ├── data │ │ ├── Annotation.java │ │ ├── BiblioObject.java │ │ ├── BinaryFile.java │ │ ├── File.java │ │ └── Processings.java │ │ ├── entities │ │ ├── Address.java │ │ ├── Affiliation.java │ │ ├── Author.java │ │ ├── Collection.java │ │ ├── Conference.java │ │ ├── Conference_Event.java │ │ ├── Country.java │ │ ├── Document.java │ │ ├── Document_Identifier.java │ │ ├── Document_Organisation.java │ │ ├── Editor.java │ │ ├── In_Serial.java │ │ ├── Journal.java │ │ ├── Location.java │ │ ├── Monograph.java │ │ ├── Organisation.java │ │ ├── Organisation_Identifier.java │ │ ├── Organisation_Name.java │ │ ├── PART_OF.java │ │ ├── Person.java │ │ ├── Person_Identifier.java │ │ ├── Person_Name.java │ │ ├── Publication.java │ │ ├── Publisher.java │ │ └── Serial_Identifier.java │ │ ├── exceptions │ │ ├── DataException.java │ │ ├── DirectoryNotFoundException.java │ │ ├── FileNotFoundException.java │ │ ├── PropertyException.java │ │ ├── ServiceException.java │ │ └── SystemException.java │ │ ├── main │ │ └── Main.java │ │ ├── managers │ │ ├── MongoCollectionsInterface.java │ │ ├── MongoDataManager.java │ │ ├── MongoFileManager.java │ │ └── MongoManager.java │ │ ├── properties │ │ ├── AnnotateProperties.java │ │ ├── CommonsProperties.java │ │ ├── HarvestProperties.java │ │ ├── IndexProperties.java │ │ └── KbProperties.java │ │ └── utilities │ │ ├── JaroWinkler.java │ │ ├── KeyGen.java │ │ ├── NamespaceContextMap.java │ │ ├── ScriptRunner.java │ │ └── Utilities.java │ └── test │ └── java │ └── fr │ └── inria │ └── anhalytics │ └── commons │ └── utilities │ └── UtilitiesTest.java ├── anhalytics-harvest ├── list-halSample-quantities.txt ├── src │ ├── main │ │ ├── java │ │ │ └── fr │ │ │ │ └── inria │ │ │ │ └── anhalytics │ │ │ │ └── harvest │ │ │ │ ├── converters │ │ │ │ ├── HalTEIConverter.java │ │ │ │ ├── IstexTEIConverter.java │ │ │ │ └── MetadataConverter.java │ │ │ │ ├── crossref │ │ │ │ ├── CrossRef.java │ │ │ │ ├── CrossRefBiblioData.java │ │ │ │ └── OpenUrl.java │ │ │ │ ├── exceptions │ │ │ │ ├── BinaryNotAvailableException.java │ │ │ │ ├── GrobidTimeoutException.java │ │ │ │ └── UnreachableGrobidServiceException.java │ │ │ │ ├── grobid │ │ │ │ ├── AssetLegendExtracter.java │ │ │ │ ├── GrobidAnnexWorker.java │ │ │ │ ├── GrobidFulltextWorker.java │ │ │ │ ├── GrobidProcess.java │ │ │ │ ├── GrobidService.java │ │ │ │ ├── GrobidSimpleFulltextWorker.java │ │ │ │ └── GrobidWorker.java │ │ │ │ ├── harvesters │ │ │ │ ├── HALOAIPMHHarvester.java │ │ │ │ ├── Harvester.java │ │ │ │ └── IstexHarvester.java │ │ │ │ ├── main │ │ │ │ └── Main.java │ │ │ │ ├── parsers │ │ │ │ ├── HALOAIPMHDomParser.java │ │ │ │ └── OAIPMHPathsItf.java │ │ │ │ ├── service │ │ │ │ ├── AnhalyticsAssetService.java │ │ │ │ └── Application.java │ │ │ │ └── teibuild │ │ │ │ ├── Steps.java │ │ │ │ ├── TeiBuilderWorker.java │ │ │ │ └── TeiCorpusBuilderProcess.java │ │ └── resources │ │ │ ├── application.properties │ │ │ └── log4j2.xml │ └── test │ │ ├── java │ │ └── fr │ │ │ └── inria │ │ │ └── anhalytics │ │ │ └── harvest │ │ │ └── teibuild │ │ │ └── TeiBuildIntegrationTest.java │ │ └── resources │ │ ├── 5a7c4c00b64afc92264bd313.hal.corpus.tei.xml │ │ ├── 5a7c4c00b64afc92264bd313.hal.grobid.tei.xml │ │ ├── 5a7c4c00b64afc92264bd313.hal.tei.xml │ │ ├── 5a87113db64a8b35ae8e6916.istex.corpus.tei.xml │ │ ├── 5a87113db64a8b35ae8e6916.istex.grobid.tei.xml │ │ └── 5a87113db64a8b35ae8e6916.istex.tei.xml └── tmp │ └── .gitkeep ├── anhalytics-index └── src │ ├── main │ ├── java │ │ ├── fr │ │ │ └── inria │ │ │ │ └── anhalytics │ │ │ │ └── index │ │ │ │ ├── DocumentIndexer.java │ │ │ │ ├── Indexer.java │ │ │ │ ├── IndexingPreprocess.java │ │ │ │ ├── KnowledgeBaseIndexer.java │ │ │ │ ├── exceptions │ │ │ │ ├── ElasticSearchConfigurationException.java │ │ │ │ ├── IndexNotCreatedException.java │ │ │ │ └── IndexingServiceException.java │ │ │ │ └── main │ │ │ │ └── Main.java │ │ └── org │ │ │ └── json │ │ │ ├── JSONArray.java │ │ │ ├── JSONObject.java │ │ │ ├── JsonTapasML.java │ │ │ └── XML.java │ └── resources │ │ ├── elasticSearch │ │ ├── analyzer.json │ │ ├── annotation_keyterm.json │ │ ├── annotation_nerd.json │ │ ├── annotation_pdf_quantities.json │ │ ├── annotation_quantities.json │ │ ├── kbauthors.json │ │ ├── kborganisations.json │ │ ├── kbpublications.json │ │ └── npl.json │ │ └── log4j2.xml │ └── test │ └── resources │ ├── hal-01110586v1.final.tei.xml │ └── hal-01110668v1.final.tei.xml ├── anhalytics-kb └── src │ ├── main │ ├── java │ │ └── fr │ │ │ └── inria │ │ │ └── anhalytics │ │ │ └── kb │ │ │ ├── datamine │ │ │ ├── HALPaths.java │ │ │ ├── IstexMiner.java │ │ │ ├── KnowledgeBaseFeeder.java │ │ │ └── TeiPaths.java │ │ │ ├── exceptions │ │ │ └── NumberOfCoAuthorsExceededException.java │ │ │ ├── main │ │ │ └── Main.java │ │ │ └── stax │ │ │ ├── PublicationTeiDocumentStaxHandler.java │ │ │ ├── StaxParserContentHandler.java │ │ │ └── StaxUtils.java │ └── resources │ │ └── log4j2.xml │ └── test │ ├── java │ └── fr │ │ └── inria │ │ └── anhalytics │ │ └── kb │ │ └── stax │ │ └── PublicationTeiDocumentStaxHandlerTest.java │ └── resources │ ├── hal-00576900.corpus.tei.xml │ └── inria-00510267.corpus.tei.xml ├── anhalytics.sh ├── build.gradle ├── config ├── anhalytics.default.properties └── anhalytics.test.properties ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── logs └── placeholder ├── settings.gradle └── sql ├── anhalyticsDB.sql └── biblioDB.sql /.gitignore: -------------------------------------------------------------------------------- 1 | .gradle 2 | **/build/* 3 | **/tmp/* 4 | **/target/* 5 | config/local 6 | tempPDF/* 7 | tempTEI/* 8 | tempPdf/* 9 | *.log* 10 | *.out* 11 | *.DS_Store 12 | *.old 13 | *.bak 14 | *nb-configuration.xml 15 | *.iml 16 | **/*.iml 17 | .idea 18 | config/anhalytics.properties -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "anhalytics-er"] 2 | path = anhalytics-er 3 | url = https://github.com/anHALytics/anhalytics-er.git 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | sudo: true 3 | 4 | jdk: 5 | - openjdk8 6 | 7 | env: 8 | global: 9 | - GRADLE_OPTS="-Xmx512m" 10 | 11 | script: 12 | - ./gradlew build --stacktrace --info --no-daemon 13 | 14 | after_success: 15 | - ./gradlew jacocoRootReport coveralls --no-daemon 16 | 17 | before_cache: 18 | - rm -f $HOME/.gradle/caches/modules-2/modules-2.lock 19 | - rm -fr $HOME/.gradle/caches/*/plugin-resolution/ 20 | 21 | cache: 22 | directories: 23 | - $HOME/.gradle/caches/ 24 | - $HOME/.gradle/wrapper/ 25 | 26 | notifications: 27 | slack: 28 | secure: yoEiartTToGgIIkxbtvQHsPYXjZK0CPDsNN3KJ6ZHuObdt0KbEihrhR3DWikWn3B+oCW5HQpVZoU8GtYxS5PlJguXLbEO44sqWXaCktEM4vvJILDsjLc6WkL1lkDXZU2z/F9y81f7l9NTmDkIxcRgsDUBmGz59alDw3mq1VGEL7Gq3LjP7PhgdmECN42I4lBE4S6H5a6sJNAr1PQ4zc2/xntu529mesn7VKDTF5FLm/ubSFlOBtKrIMBIxG7w9SYVZBwlX5EGtgSj4yojbWjPkhtPjyBcuzCv6tTus1Z21iDt2Iu9NK4fZPqZPE8tNPZuliwoFBfU/Dq9tP/qP9WRvxztX71vSucFbVm672nrdxeepgWK6+6RI7+wgKOZyAa8awNrv4hZUvicKO9tJwHzh/GbavaQ6+keLfkOifmO4Jh7d02zVQCswb+DuPbcV4U/FPTm5CK8bWx+eVrNN0oGTBkkp2b2mudDEgi/7TUOru/DD3P5mkosigRSGCRRDdoXl4uvObKP9BvlsFOMg3pt3U8L6GduHKi7l0uv5bfIIbc61Nq97LH7eDXEufsSRa/fLFSARBguO+e/9/jd92CVNJkedGqkTLwU37cdThXkEOgOhDnMUcfV6CayV++BKpHoSvOdBEoOzyz5DY/DZNbODMkaMC3IlqNspWnJOPrYqE= 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AnHALytics Core 2 | 3 | [![License](http://img.shields.io/:license-apache-blue.svg)](http://www.apache.org/licenses/LICENSE-2.0.html) 4 | [![Documentation Status](https://readthedocs.org/projects/anhalytics/badge/?version=latest)](https://readthedocs.org/projects/anhalytics/?badge=latest) 5 | 6 | AnHALytics is a project aiming at creating an analytic platform for the [HAL research archive](https://hal.archives-ouvertes.fr) or other scientific Open Access repositories, exploring various analytic aspects such as search/discovery, activity and collaboration statistics, trend/technology maps, knowledge graph and data visualization. The project is supported by an [ADT Inria](http://www.inria.fr/en/research/research-teams/technological-development-at-inria) grant and good will :). 7 | 8 | This core module realizes the data ingestion process and support the core back-end functionalities of the system. 9 | 10 | ## Documentation 11 | 12 | Please visit the [anHALytics documentation](http://anHALytics.readthedocs.io) for more detailed information on the project, including how to install, build and run the application. 13 | 14 | ## License 15 | 16 | This code is distributed under [Apache 2.0 license](http://www.apache.org/licenses/LICENSE-2.0). 17 | 18 | ## Warning 19 | 20 | AnHALytics is a work at early stage and a work in progress. It is evolving rapidly and is certainly not production ready! 21 | 22 | So far, only the [HAL repository](https://hal.archives-ouvertes.fr/?lang=en) (the National French scientific archive) is supported. 23 | 24 | ## People 25 | 26 | - Achraf Azhar 27 | - [Patrice Lopez](https://github.com/kermitt2) 28 | 29 | If you are interested in contributing to the project, please contact . -------------------------------------------------------------------------------- /anhalytics-annotate/src/main/java/fr/inria/anhalytics/annotate/AnnotatorWorker.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.annotate; 2 | 3 | import fr.inria.anhalytics.commons.data.BiblioObject; 4 | import fr.inria.anhalytics.commons.managers.MongoFileManager; 5 | 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | /** 10 | * Runnable for annotating HAL documents. 11 | * 12 | * @author Achraf, Patrice 13 | */ 14 | public abstract class AnnotatorWorker implements Runnable { 15 | 16 | private static final Logger logger = LoggerFactory.getLogger(AnnotatorWorker.class); 17 | protected MongoFileManager mm = null; 18 | protected BiblioObject biblioObject = null; 19 | protected String annotationsCollection; 20 | 21 | public AnnotatorWorker(MongoFileManager mongoManager, 22 | BiblioObject biblioObject, 23 | String annotationsCollection) { 24 | this.mm = mongoManager; 25 | this.biblioObject = biblioObject; 26 | this.annotationsCollection = annotationsCollection; 27 | } 28 | 29 | @Override 30 | public void run() { 31 | long startTime = System.nanoTime(); 32 | logger.info("\t\t " + Thread.currentThread().getName() + " Start. Processing = "+biblioObject.getRepositoryDocId()); 33 | processCommand(); 34 | long endTime = System.nanoTime(); 35 | logger.info("\t\t " + Thread.currentThread().getName() + " End. :" + (endTime - startTime) / 1000000 + " ms"); 36 | } 37 | protected abstract void processCommand() ; 38 | protected abstract String annotateDocument() ; 39 | 40 | /** 41 | * return documentId of the file being annotated. 42 | */ 43 | public String getRepositoryDocId() { 44 | return biblioObject.getRepositoryDocId(); 45 | } 46 | 47 | @Override 48 | public String toString() { 49 | return this.biblioObject.getRepositoryDocId(); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /anhalytics-annotate/src/main/java/fr/inria/anhalytics/annotate/KeyTermAnnotatorWorker.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.annotate; 2 | 3 | import fr.inria.anhalytics.annotate.services.KeyTermExtractionService; 4 | import fr.inria.anhalytics.commons.data.BiblioObject; 5 | import fr.inria.anhalytics.commons.data.Processings; 6 | import fr.inria.anhalytics.commons.managers.MongoFileManager; 7 | import fr.inria.anhalytics.commons.managers.MongoCollectionsInterface; 8 | 9 | import java.io.IOException; 10 | 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | import org.apache.commons.io.IOUtils; 15 | 16 | /** 17 | * Runnable that uses the KeyTerm REST service for annotating available Grobid TEI 18 | * documents.Resulting JSON annotations are then stored in MongoDB as persistent 19 | * storage. 20 | * 21 | * @author Achraf, Patrice 22 | */ 23 | public class KeyTermAnnotatorWorker extends AnnotatorWorker { 24 | 25 | private static final Logger logger = LoggerFactory.getLogger(KeyTermAnnotatorWorker.class); 26 | 27 | public KeyTermAnnotatorWorker(MongoFileManager mongoManager, 28 | BiblioObject biblioObject) { 29 | super(mongoManager, biblioObject, MongoCollectionsInterface.KEYTERM_ANNOTATIONS); 30 | } 31 | 32 | @Override 33 | protected void processCommand() { 34 | try { 35 | boolean inserted = mm.insertAnnotation(annotateDocument(), annotationsCollection); 36 | 37 | if (inserted) { 38 | mm.updateBiblioObjectStatus(biblioObject, Processings.KEYTERM, false); 39 | logger.info("\t\t " + Thread.currentThread().getName() + ": " + biblioObject.getRepositoryDocId() + " annotated by the KeyTerm extraction and disambiguation service."); 40 | } else { 41 | logger.info("\t\t " + Thread.currentThread().getName() + ": " 42 | + biblioObject.getRepositoryDocId() + " error occured trying to annotate Keyterms."); 43 | } 44 | } catch (Exception ex) { 45 | logger.error("\t\t " + Thread.currentThread().getName() + ": TEI could not be processed by the keyterm extractor: " + biblioObject.getRepositoryDocId()); 46 | ex.printStackTrace(); 47 | } 48 | } 49 | 50 | /** 51 | * Annotation of a complete document with extracted disambiguated key terms. 52 | */ 53 | @Override 54 | protected String annotateDocument() { 55 | // NOTE: the part bellow should be used in the future for improving the keyterm disambiguation 56 | // by setting a custom domain context which helps the disambiguation (so don't remove it ;) 57 | 58 | /*List halDomainTexts = new ArrayList(); 59 | List halDomains = new ArrayList(); 60 | List meSHDescriptors = new ArrayList(); 61 | 62 | // get the HAL domain 63 | NodeList classes = docTei.getElementsByTagName("classCode"); 64 | for (int p = 0; p < classes.getLength(); p++) { 65 | Node node = classes.item(p); 66 | if (node.getNodeType() == Node.ELEMENT_NODE) { 67 | Element e = (Element) (node); 68 | // filter on attribute @scheme="halDomain" 69 | String scheme = e.getAttribute("scheme"); 70 | if ((scheme != null) && scheme.equals("halDomain")) { 71 | halDomainTexts.add(e.getTextContent()); 72 | String n_att = e.getAttribute("n"); 73 | halDomains.add(n_att); 74 | } else if ((scheme != null) && scheme.equals("mesh")) { 75 | meSHDescriptors.add(e.getTextContent()); 76 | } 77 | } 78 | }*/ 79 | StringBuffer json = new StringBuffer(); 80 | try { 81 | json.append("{ \"repositoryDocId\" : \"" + biblioObject.getRepositoryDocId() 82 | + "\",\"anhalyticsId\" : \"" + biblioObject.getAnhalyticsId() 83 | + "\",\"isIndexed\" : \"" + false 84 | + "\", \"keyterm\" : "); 85 | String jsonText = null; 86 | //call keyterm service on the grobid TEI. 87 | String tei = biblioObject.getGrobidTei()!= null ? biblioObject.getGrobidTei() : biblioObject.getTeiCorpus(); 88 | KeyTermExtractionService keyTermService = new KeyTermExtractionService(IOUtils.toInputStream(tei, "UTF-8")); 89 | jsonText = keyTermService.runKeyTermExtraction(); 90 | if (jsonText != null) { 91 | json.append(jsonText).append("}"); 92 | } else { 93 | json.append("{} }"); 94 | } 95 | } catch (IOException e) { 96 | logger.error(Thread.currentThread().getName() + ": TEI could not be processed by the keyterm extractor: " + biblioObject.getRepositoryDocId(), e); 97 | return null; 98 | } 99 | return json.toString(); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /anhalytics-annotate/src/main/java/fr/inria/anhalytics/annotate/PDFQuantitiesAnnotatorWorker.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.annotate; 2 | 3 | import fr.inria.anhalytics.annotate.services.PDFQuantitiesService; 4 | import fr.inria.anhalytics.commons.data.BiblioObject; 5 | import fr.inria.anhalytics.commons.data.Processings; 6 | import fr.inria.anhalytics.commons.managers.MongoCollectionsInterface; 7 | import fr.inria.anhalytics.commons.managers.MongoFileManager; 8 | 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | 13 | /** 14 | * Annotates the PDF with the quantities along with the boudingBoxes. 15 | */ 16 | public class PDFQuantitiesAnnotatorWorker extends AnnotatorWorker { 17 | 18 | private static final Logger logger = LoggerFactory.getLogger(PDFQuantitiesAnnotatorWorker.class); 19 | 20 | public PDFQuantitiesAnnotatorWorker(MongoFileManager mongoManager, 21 | BiblioObject biblioObject) { 22 | super(mongoManager, biblioObject, MongoCollectionsInterface.PDF_QUANTITIES_ANNOTATIONS); 23 | } 24 | 25 | @Override 26 | protected void processCommand() { 27 | // get all the elements having an attribute id and annotate their text content 28 | boolean inserted = mm.insertAnnotation(annotateDocument(), annotationsCollection); 29 | if (inserted) { 30 | mm.updateBiblioObjectStatus(biblioObject, Processings.PDFQUANTITIES, false); 31 | logger.info("\t\t " + Thread.currentThread().getName() + ": " 32 | + biblioObject.getRepositoryDocId() + " annotated by the QUANTITIES service."); 33 | } else { 34 | logger.info("\t\t " + Thread.currentThread().getName() + ": " 35 | + biblioObject.getRepositoryDocId() + " error occured trying to annotate with QUANTITIES."); 36 | } 37 | 38 | } 39 | 40 | @Override 41 | protected String annotateDocument() { 42 | StringBuffer json = new StringBuffer(); 43 | try { 44 | /*String filepath = Utilities.storeTmpFile(((IstexFile)file).getStream()); 45 | try { 46 | ((IstexFile)file).getStream().close(); 47 | } catch (IOException ex) { 48 | throw new DataException("File stream can't be closed.", ex); 49 | }*/ 50 | 51 | json.append("{ \"repositoryDocId\" : \"" + biblioObject.getRepositoryDocId() 52 | + "\",\"anhalyticsId\" : \"" + biblioObject.getAnhalyticsId() 53 | // + "\", \"date\" :\"" + date 54 | + "\",\"isIndexed\" : \"" + false 55 | + "\", \"annotation\" : "); 56 | String jsonText = null; 57 | 58 | PDFQuantitiesService quantitiesService = new PDFQuantitiesService(biblioObject.getPdf().getStream()); 59 | jsonText = quantitiesService.processPDFQuantities(); 60 | if (jsonText != null) { 61 | json.append(jsonText).append("}"); 62 | } else { 63 | json.append("{} }"); 64 | } 65 | biblioObject.getPdf().getStream().close(); 66 | } catch (Exception ex) { 67 | logger.error("\t\t " + Thread.currentThread().getName() + ": PDF could not be processed by the quantities extractor: "); 68 | ex.printStackTrace(); 69 | return null; 70 | } 71 | return json.toString(); 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /anhalytics-annotate/src/main/java/fr/inria/anhalytics/annotate/exceptions/AnnotatorNotAvailableException.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.annotate.exceptions; 2 | 3 | import fr.inria.anhalytics.commons.exceptions.ServiceException; 4 | 5 | /** 6 | * 7 | * @author patrice 8 | */ 9 | public class AnnotatorNotAvailableException extends ServiceException { 10 | 11 | public AnnotatorNotAvailableException(String message) { 12 | super(message); 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /anhalytics-annotate/src/main/java/fr/inria/anhalytics/annotate/exceptions/ClientException.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.annotate.exceptions; 2 | 3 | public class ClientException extends RuntimeException { 4 | 5 | public ClientException() { 6 | super(); 7 | } 8 | 9 | public ClientException(String message) { 10 | super(message); 11 | } 12 | 13 | public ClientException(String message, Throwable cause) { 14 | super(message, cause); 15 | } 16 | 17 | public ClientException(Throwable cause) { 18 | super(cause); 19 | } 20 | 21 | protected ClientException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { 22 | super(message, cause, enableSuppression, writableStackTrace); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /anhalytics-annotate/src/main/java/fr/inria/anhalytics/annotate/exceptions/UnreachableAnnotateServiceException.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.annotate.exceptions; 2 | 3 | import fr.inria.anhalytics.commons.exceptions.SystemException; 4 | 5 | /** 6 | * 7 | * @author achraf 8 | */ 9 | public class UnreachableAnnotateServiceException extends SystemException{ 10 | public UnreachableAnnotateServiceException(int responseCode, String serviceName) { 11 | super(serviceName+" service is not alive. HTTP error: " + responseCode); 12 | } 13 | 14 | public UnreachableAnnotateServiceException(String message, Throwable cause) { 15 | super(message, cause); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /anhalytics-annotate/src/main/java/fr/inria/anhalytics/annotate/main/Main.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.annotate.main; 2 | 3 | import fr.inria.anhalytics.annotate.Annotator; 4 | import fr.inria.anhalytics.commons.data.Processings; 5 | import fr.inria.anhalytics.commons.exceptions.PropertyException; 6 | import fr.inria.anhalytics.commons.exceptions.ServiceException; 7 | import fr.inria.anhalytics.commons.properties.AnnotateProperties; 8 | import fr.inria.anhalytics.commons.utilities.Utilities; 9 | import java.net.UnknownHostException; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | /** 16 | * Main class that implements command for annotating TEIs and saving the result 17 | * to Mongodb. 18 | * 19 | * @author Achraf, Patrice 20 | */ 21 | public class Main { 22 | 23 | private static final Logger logger = LoggerFactory.getLogger(Main.class); 24 | 25 | private static List availableCommands = new ArrayList() { 26 | { 27 | add("annotateAll"); 28 | add("annotateNerd"); 29 | add("annotateKeyTerm"); 30 | add("annotateQuantities"); 31 | add("annotateQuantitiesFromPDF"); 32 | } 33 | }; 34 | 35 | public static void main(String[] args) throws UnknownHostException { 36 | try { 37 | AnnotateProperties.init("anhalytics.properties"); 38 | } catch (PropertyException e) { 39 | logger.error(e.getMessage()); 40 | return; 41 | } 42 | 43 | if (processArgs(args)) { 44 | Utilities.setTmpPath(AnnotateProperties.getTmp()); 45 | Main main = new Main(); 46 | main.processCommand(); 47 | } else { 48 | System.out.println(getHelp()); 49 | return; 50 | } 51 | } 52 | 53 | private void processCommand() { 54 | String process = AnnotateProperties.getProcessName(); 55 | 56 | try { 57 | Annotator annotator = new Annotator(); 58 | if (process.equals("annotateNerd")) { 59 | annotator.annotate(Processings.NERD); 60 | } else if (process.equals("annotateKeyTerm")) { 61 | annotator.annotate(Processings.KEYTERM); 62 | } else if (process.equals("annotateAll")) { 63 | annotator.annotate(Processings.NERD); 64 | annotator.annotate(Processings.KEYTERM); 65 | } else if (process.equals("annotateQuantities")) { 66 | annotator.annotate(Processings.QUANTITIES); 67 | } else if (process.equals("annotateQuantitiesFromPDF")) { 68 | annotator.annotate(Processings.PDFQUANTITIES); 69 | } 70 | } catch (ServiceException se) { 71 | logger.error(se.getMessage()); 72 | } 73 | } 74 | 75 | protected static boolean processArgs(final String[] args) { 76 | String currArg; 77 | boolean result = true; 78 | 79 | if (args.length == 0) { 80 | result = false; 81 | } else { 82 | for (int i = 0; i < args.length; i++) { 83 | currArg = args[i]; 84 | if (currArg.equals("-h")) { 85 | result = false; 86 | break; 87 | } else if (currArg.equals("-multiThread")) { 88 | AnnotateProperties.setIsMultiThread(true); 89 | continue; 90 | } else if (currArg.equals("-exe")) { 91 | String command = args[i + 1]; 92 | if (availableCommands.contains(command)) { 93 | AnnotateProperties.setProcessName(command); 94 | i++; 95 | continue; 96 | } else { 97 | System.err.println("-exe value should be one value from this list: " + availableCommands); 98 | result = false; 99 | break; 100 | } 101 | } else if (currArg.equals("--reset")) { 102 | AnnotateProperties.setReset(true); 103 | i++; 104 | continue; 105 | } else { 106 | result = false; 107 | } 108 | } 109 | } 110 | return result; 111 | } 112 | 113 | protected static String getHelp() { 114 | final StringBuffer help = new StringBuffer(); 115 | help.append("HELP ANHALYTICS-ANNOTATE \n"); 116 | help.append("-h: displays help\n"); 117 | help.append("-multiThread: enables using multiple threads to annotate\n"); 118 | help.append("-nodates: fetches entries from database with no date filtering.\n"); 119 | help.append("-exe: gives the command to execute. The value should be one of these : \n"); 120 | help.append("--reset: updates all the documents (beware about versions/updates) : \n"); 121 | help.append("\t" + availableCommands + "\n"); 122 | return help.toString(); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /anhalytics-annotate/src/main/java/fr/inria/anhalytics/annotate/services/AnnotateService.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.annotate.services; 2 | 3 | import fr.inria.anhalytics.commons.properties.AnnotateProperties; 4 | import fr.inria.anhalytics.annotate.exceptions.UnreachableAnnotateServiceException; 5 | import fr.inria.anhalytics.commons.data.Processings; 6 | import java.io.IOException; 7 | import java.io.InputStream; 8 | import java.net.HttpURLConnection; 9 | import java.net.URL; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | 14 | /** 15 | * Call of annotate services via its REST web services. Data to be sent to the service 16 | * is given as a stream, which could be textual, xml, PDF or whatever. 17 | * 18 | */ 19 | public abstract class AnnotateService { 20 | 21 | private static final Logger logger = LoggerFactory.getLogger(AnnotateService.class); 22 | 23 | //protected String input = null; 24 | protected InputStream input = null; 25 | 26 | public AnnotateService(InputStream input) { 27 | this.input = input; 28 | } 29 | 30 | /** 31 | * Checks if Annotating service is responding and available. 32 | * 33 | * @return boolean 34 | */ 35 | public static boolean isAnnotateServiceReady(Processings annotator_type) throws UnreachableAnnotateServiceException { 36 | logger.info("Checking " + annotator_type + " service..."); 37 | int responseCode = 0; 38 | HttpURLConnection conn = null; 39 | try { 40 | String urlString = ""; 41 | if (annotator_type == Processings.NERD) { 42 | urlString = AnnotateProperties.getNerdHost() 43 | + (AnnotateProperties.getNerdPort().isEmpty() ? "" : ":" + AnnotateProperties.getNerdPort()) + "/isalive"; 44 | } else if (annotator_type == Processings.QUANTITIES) { 45 | urlString = AnnotateProperties.getQuantitiesHost() 46 | + (AnnotateProperties.getQuantitiesPort().isEmpty() ? "" : ":" + AnnotateProperties.getQuantitiesPort()) + "/isalive"; 47 | } else { 48 | // keyterm isalive checking not implemented yet. 49 | logger.info(annotator_type + " service is ok and can be used."); 50 | return true; 51 | } 52 | URL url = new URL(urlString); 53 | conn = (HttpURLConnection) url.openConnection(); 54 | logger.info(urlString); 55 | conn.setDoOutput(true); 56 | conn.setRequestMethod("GET"); 57 | responseCode = conn.getResponseCode(); 58 | } catch (IOException e) { 59 | throw new UnreachableAnnotateServiceException(responseCode, annotator_type.toString()); 60 | } 61 | if (responseCode != 200) { 62 | logger.error(annotator_type + " service is not alive."); 63 | throw new UnreachableAnnotateServiceException(responseCode, annotator_type.toString()); 64 | } 65 | conn.disconnect(); 66 | logger.info(annotator_type + " service is ok and can be used."); 67 | return true; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /anhalytics-annotate/src/main/java/fr/inria/anhalytics/annotate/services/KeyTermExtractionService.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.annotate.services; 2 | 3 | import org.slf4j.Logger; 4 | import org.slf4j.LoggerFactory; 5 | 6 | import fr.inria.anhalytics.commons.properties.AnnotateProperties; 7 | 8 | import java.io.*; 9 | import java.io.IOException; 10 | import java.io.OutputStream; 11 | import java.io.InputStream; 12 | import java.nio.charset.Charset; 13 | import java.net.HttpRetryException; 14 | import java.net.ConnectException; 15 | 16 | import org.apache.http.entity.mime.content.StringBody; 17 | import org.apache.http.entity.mime.MultipartEntity; 18 | import org.apache.http.entity.mime.HttpMultipartMode; 19 | import java.net.HttpURLConnection; 20 | import java.net.MalformedURLException; 21 | import java.net.URL; 22 | 23 | import org.apache.commons.io.IOUtils; 24 | 25 | /** 26 | * Perform a key term extraction for a document and disambiguate the resulting 27 | * terms. We use the tool via its REST web services, similarly as for the NERD. 28 | * The resulting extraction can be used to annotate a document as a whole, so 29 | * without stand-off position to particular chunks of texts. 30 | * 31 | * @author Achraf, Patrice 32 | */ 33 | public class KeyTermExtractionService extends AnnotateService { 34 | 35 | private static final Logger logger = LoggerFactory.getLogger(KeyTermExtractionService.class); 36 | 37 | static private String RESOURCEPATH = "processKeyTermArticleTEI"; 38 | 39 | public KeyTermExtractionService(InputStream teiStream) { 40 | super(teiStream); 41 | } 42 | 43 | /** 44 | * Call the Keyterm extraction service on server for a TEI document. 45 | * 46 | * @return the resulting extracted disambiguated terms in JSON 47 | */ 48 | public String runKeyTermExtraction() { 49 | StringBuffer output = new StringBuffer(); 50 | try { 51 | URL url = new URL(AnnotateProperties.getKeytermHost() + 52 | (AnnotateProperties.getKeytermPort().isEmpty() ? "" : ":" + AnnotateProperties.getKeytermPort()) + "/" + RESOURCEPATH); 53 | HttpURLConnection conn = (HttpURLConnection) url.openConnection(); 54 | conn.setDoOutput(true); 55 | conn.setRequestMethod("POST"); 56 | 57 | // Note: to be review, we could maybe use the InputStream to stream the file part of the 58 | // multipartEntity without the intermediate String conversion 59 | String inputString = IOUtils.toString(input, "UTF-8"); 60 | StringBody contentBody = new StringBody(inputString, Charset.forName("UTF-8")); 61 | MultipartEntity multipartEntity = new MultipartEntity(HttpMultipartMode.STRICT); 62 | multipartEntity.addPart("file", contentBody); 63 | 64 | conn.setRequestProperty("Content-Type", multipartEntity.getContentType().getValue()); 65 | OutputStream out = conn.getOutputStream(); 66 | try { 67 | multipartEntity.writeTo(out); 68 | } finally { 69 | out.close(); 70 | } 71 | 72 | if (conn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) { 73 | throw new HttpRetryException("Failed: service not available - HTTP error code : " 74 | + conn.getResponseCode(), conn.getResponseCode()); 75 | } 76 | 77 | if (conn.getResponseCode() != HttpURLConnection.HTTP_OK) { 78 | throw new RuntimeException("Failed: HTTP error code : " 79 | + conn.getResponseCode()); 80 | } 81 | 82 | BufferedReader br = new BufferedReader(new InputStreamReader((conn.getInputStream()))); 83 | String line = null; 84 | while ((line = br.readLine()) != null) { 85 | output.append(line); 86 | output.append(" "); 87 | } 88 | br.close(); 89 | conn.disconnect(); 90 | } catch (ConnectException e) { 91 | e.printStackTrace(); 92 | try { 93 | Thread.sleep(20000); 94 | runKeyTermExtraction(); 95 | } catch (InterruptedException ex) { 96 | Thread.currentThread().interrupt(); 97 | } 98 | } catch (HttpRetryException e) { 99 | e.printStackTrace(); 100 | try { 101 | Thread.sleep(20000); 102 | runKeyTermExtraction(); 103 | } catch (InterruptedException ex) { 104 | Thread.currentThread().interrupt(); 105 | } 106 | } catch (MalformedURLException e) { 107 | e.printStackTrace(); 108 | } catch (IOException e) { 109 | e.printStackTrace(); 110 | } 111 | return output.toString().trim(); 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /anhalytics-annotate/src/main/java/fr/inria/anhalytics/annotate/services/PDFQuantitiesService.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.annotate.services; 2 | 3 | import fr.inria.anhalytics.commons.properties.AnnotateProperties; 4 | import fr.inria.anhalytics.commons.utilities.KeyGen; 5 | 6 | import java.io.BufferedReader; 7 | import java.io.File; 8 | import java.io.IOException; 9 | import java.io.InputStream; 10 | import java.io.InputStreamReader; 11 | import java.io.OutputStream; 12 | 13 | import java.net.HttpRetryException; 14 | import java.net.HttpURLConnection; 15 | import java.net.MalformedURLException; 16 | import java.net.URL; 17 | 18 | import org.apache.commons.io.IOUtils; 19 | import org.apache.commons.io.FileUtils; 20 | import org.apache.http.entity.mime.HttpMultipartMode; 21 | import org.apache.http.entity.mime.MultipartEntity; 22 | import org.apache.http.entity.mime.content.FileBody; 23 | 24 | /*import org.codehaus.jackson.map.ObjectMapper; 25 | import org.codehaus.jackson.node.ArrayNode; 26 | import org.codehaus.jackson.node.ObjectNode;*/ 27 | 28 | import com.fasterxml.jackson.core.*; 29 | import com.fasterxml.jackson.databind.*; 30 | import com.fasterxml.jackson.databind.node.*; 31 | import com.fasterxml.jackson.annotation.*; 32 | import com.fasterxml.jackson.core.io.*; 33 | 34 | import org.slf4j.Logger; 35 | import org.slf4j.LoggerFactory; 36 | 37 | /** 38 | * Call the quantity annotation service for a PDF input. Annotations will be enriched with 39 | * coordinates of annotations in the original PDF document. 40 | */ 41 | public class PDFQuantitiesService extends AnnotateService { 42 | 43 | private static final Logger logger = LoggerFactory.getLogger(QuantitiesService.class); 44 | 45 | static private String REQUEST_PDF_QUANTITIES = "annotateQuantityPDF"; 46 | 47 | public PDFQuantitiesService(InputStream inputPDF) { 48 | super(inputPDF); 49 | } 50 | 51 | /** 52 | * Call the Quantities PDF annotation service on server. 53 | * 54 | * @return the resulting annotation in JSON 55 | */ 56 | public String processPDFQuantities() { 57 | StringBuffer output = new StringBuffer(); 58 | try { 59 | URL url = new URL(AnnotateProperties.getQuantitiesHost() 60 | + (AnnotateProperties.getQuantitiesPort().isEmpty() ? "" : ":" + AnnotateProperties.getQuantitiesPort()) + "/" + REQUEST_PDF_QUANTITIES); 61 | logger.info("http://" + AnnotateProperties.getQuantitiesHost() 62 | + (AnnotateProperties.getQuantitiesPort().isEmpty() ? "" : ":" + AnnotateProperties.getQuantitiesPort()) + "/" + REQUEST_PDF_QUANTITIES); 63 | HttpURLConnection conn = (HttpURLConnection) url.openConnection(); 64 | conn.setDoOutput(true); 65 | conn.setRequestMethod("POST"); 66 | 67 | // note: how to pass directly the stream in the multipartEntity? - we could if we know the length of the stream 68 | File file = new File(AnnotateProperties.getTmp(), KeyGen.getKey()); 69 | FileUtils.copyInputStreamToFile(input,file); 70 | FileBody fileBody = new FileBody(file); 71 | MultipartEntity multipartEntity = new MultipartEntity(HttpMultipartMode.STRICT); 72 | multipartEntity.addPart("input", fileBody); 73 | 74 | conn.setRequestProperty("Content-Type", multipartEntity.getContentType().getValue()); 75 | OutputStream out = conn.getOutputStream(); 76 | try { 77 | multipartEntity.writeTo(out); 78 | } finally { 79 | out.close(); 80 | } 81 | if (conn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) { 82 | throw new HttpRetryException("Failed : HTTP error code : " 83 | + conn.getResponseCode(), conn.getResponseCode()); 84 | } 85 | 86 | //int status = connection.getResponseCode(); 87 | if (conn.getResponseCode() != HttpURLConnection.HTTP_OK) { 88 | throw new RuntimeException("Failed : HTTP error code : " 89 | + conn.getResponseCode() + " " + IOUtils.toString(conn.getErrorStream(), "UTF-8")); 90 | } 91 | logger.info("Response "+conn.getResponseCode()); 92 | InputStream in = conn.getInputStream(); 93 | 94 | BufferedReader br = new BufferedReader(new InputStreamReader((in))); 95 | String line = null; 96 | while ((line = br.readLine()) != null) { 97 | output.append(line); 98 | output.append(" "); 99 | } 100 | 101 | IOUtils.closeQuietly(in); 102 | conn.disconnect(); 103 | 104 | } catch (MalformedURLException e) { 105 | e.printStackTrace(); 106 | } catch (IOException e) { 107 | e.printStackTrace(); 108 | } 109 | //System.out.println(output.toString().trim()); 110 | return output.toString().trim(); 111 | } 112 | 113 | } 114 | -------------------------------------------------------------------------------- /anhalytics-annotate/src/main/java/fr/inria/anhalytics/annotate/services/QuantitiesService.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.annotate.services; 2 | 3 | import fr.inria.anhalytics.commons.properties.AnnotateProperties; 4 | import org.apache.commons.io.IOUtils; 5 | import org.slf4j.Logger; 6 | import org.slf4j.LoggerFactory; 7 | 8 | import java.io.*; 9 | import java.net.HttpURLConnection; 10 | import java.net.MalformedURLException; 11 | import java.net.URL; 12 | 13 | 14 | /** 15 | * 16 | * @author azhar 17 | */ 18 | public class QuantitiesService extends AnnotateService { 19 | 20 | private static final Logger logger = LoggerFactory.getLogger(QuantitiesService.class); 21 | 22 | static private String REQUEST_TEXT_QUANTITIES = "processQuantityText"; 23 | 24 | public QuantitiesService(InputStream input) { 25 | super(input); 26 | } 27 | 28 | /** 29 | * Call the Quantities text annotation service on server. 30 | * 31 | * @return the resulting annotation in JSON 32 | */ 33 | public String processTextQuantities() { 34 | StringBuffer output = new StringBuffer(); 35 | try { 36 | URL url = new URL(AnnotateProperties.getQuantitiesHost() 37 | + (AnnotateProperties.getQuantitiesPort().isEmpty() ? "" : ":" + AnnotateProperties.getQuantitiesPort()) + "/" + REQUEST_TEXT_QUANTITIES); 38 | HttpURLConnection conn = (HttpURLConnection) url.openConnection(); 39 | conn.setDoOutput(true); 40 | conn.setRequestMethod("POST"); 41 | conn.setRequestProperty("Content-Type", "application/json; charset=utf8"); 42 | 43 | /*ObjectMapper mapper = new ObjectMapper(); 44 | ObjectNode node = mapper.createObjectNode(); 45 | node.put("text", input); 46 | byte[] postDataBytes = node.toString().getBytes("UTF-8");*/ 47 | 48 | String inputString = IOUtils.toString(input, "UTF-8"); 49 | String piece = "text="+inputString; 50 | byte[] postDataBytes = piece.getBytes("UTF-8"); 51 | 52 | OutputStream os = conn.getOutputStream(); 53 | os.write(postDataBytes); 54 | os.flush(); 55 | logger.info("Response "+conn.getResponseCode()); 56 | if (conn.getResponseCode() != HttpURLConnection.HTTP_OK) { 57 | logger.error("Failed annotating text segment: HTTP error code : " 58 | + conn.getResponseCode()); 59 | return null; 60 | } 61 | BufferedReader br = new BufferedReader(new InputStreamReader((conn.getInputStream()))); 62 | String line = null; 63 | while ((line = br.readLine()) != null) { 64 | output.append(line); 65 | output.append(" "); 66 | } 67 | os.close(); 68 | conn.disconnect(); 69 | 70 | } catch (MalformedURLException e) { 71 | e.printStackTrace(); 72 | } catch (IOException e) { 73 | e.printStackTrace(); 74 | } 75 | //System.out.println(output.toString().trim()); 76 | return output.toString().trim(); 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /anhalytics-annotate/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/dao/AbstractDAOFactory.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.dao; 2 | 3 | import fr.inria.anhalytics.commons.dao.anhalytics.DAOFactory; 4 | 5 | /** 6 | * 7 | * @author azhar 8 | */ 9 | public abstract class AbstractDAOFactory { 10 | 11 | public static final int DAO_FACTORY = 0; 12 | 13 | public static final int MONGO_DAO_FACTORY = 1; 14 | 15 | public abstract DAO getDocumentDAO(); 16 | 17 | public abstract DAO getPublicationDAO(); 18 | 19 | public abstract DAO getMonographDAO(); 20 | 21 | public abstract DAO getPublisherDAO(); 22 | 23 | public abstract DAO getAddressDAO(); 24 | 25 | public abstract DAO getAffiliationDAO(); 26 | 27 | public abstract DAO getConference_EventDAO(); 28 | 29 | public abstract DAO getIn_SerialDAO(); 30 | 31 | public abstract DAO getLocationDAO(); 32 | 33 | public abstract DAO getDocument_OrganisationDAO(); 34 | 35 | public abstract DAO getOrganisationDAO(); 36 | 37 | public abstract DAO getPersonDAO(); 38 | 39 | public abstract void openTransaction(); 40 | 41 | public abstract void endTransaction(); 42 | 43 | public abstract void rollback(); 44 | 45 | public static AbstractDAOFactory getFactory(int type) { 46 | switch (type) { 47 | case DAO_FACTORY: 48 | return new DAOFactory(); 49 | case MONGO_DAO_FACTORY: 50 | return new MongoDAOFactory(); 51 | default: 52 | return null; 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/dao/DAO.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.dao; 2 | 3 | import java.sql.Connection; 4 | import java.sql.PreparedStatement; 5 | import java.sql.SQLException; 6 | 7 | public abstract class DAO { 8 | 9 | protected Connection connect = null; 10 | 11 | public DAO(Connection conn) { 12 | this.connect = conn; 13 | } 14 | 15 | /** 16 | * @param obj 17 | * @return boolean 18 | */ 19 | public abstract boolean create(E obj) throws SQLException; 20 | 21 | /** 22 | * @param obj 23 | * @return boolean 24 | */ 25 | public abstract boolean delete(E obj) throws SQLException; 26 | 27 | /** 28 | * @param obj 29 | * @return boolean 30 | */ 31 | public abstract boolean update(E obj) throws SQLException; 32 | 33 | /** 34 | * @param id 35 | * @return T 36 | */ 37 | public abstract E find(T id) throws SQLException; 38 | 39 | public static void closeQuietly(PreparedStatement ps) { 40 | if (ps != null) { 41 | try { 42 | ps.close(); 43 | } catch (SQLException se) { 44 | //ignoring 45 | } 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/dao/DatabaseConnection.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.dao; 2 | 3 | import fr.inria.anhalytics.commons.exceptions.ServiceException; 4 | import fr.inria.anhalytics.commons.properties.CommonsProperties; 5 | import org.slf4j.Logger; 6 | import org.slf4j.LoggerFactory; 7 | 8 | import java.sql.Connection; 9 | import java.sql.DriverManager; 10 | import java.sql.SQLException; 11 | 12 | /** 13 | * @author azhar 14 | */ 15 | public class DatabaseConnection { 16 | 17 | private static final Logger logger = LoggerFactory.getLogger(DatabaseConnection.class); 18 | private static Connection connectDB; 19 | private static Connection connectBiblioDB; 20 | 21 | public static Connection getDBInstance() { 22 | try { 23 | if (connectDB == null) { 24 | 25 | final String mysqlPort = CommonsProperties.getMysql_port().isEmpty() ? "" : ":" + CommonsProperties.getMysql_port(); 26 | final String url = "jdbc:mysql://" 27 | + CommonsProperties.getMysql_host() + 28 | mysqlPort + "/" + CommonsProperties.getMysql_db() + "?characterEncoding=utf8"; 29 | 30 | connectDB = DriverManager.getConnection(url, CommonsProperties.getMysql_user(), CommonsProperties.getMysql_pass()); 31 | } 32 | } catch (SQLException e) { 33 | throw new ServiceException("Can't connect to MySQL. ", e); 34 | } 35 | return connectDB; 36 | } 37 | 38 | public static Connection getBiblioDBInstance() { 39 | try { 40 | if (connectBiblioDB == null) { 41 | 42 | final String mysqlPort = CommonsProperties.getMysql_port().isEmpty() ? "" : ":" + CommonsProperties.getMysql_port(); 43 | final String url = "jdbc:mysql://" 44 | + CommonsProperties.getMysql_host() + 45 | mysqlPort + "/" + CommonsProperties.getMysql_bibliodb() + "?characterEncoding=utf8"; 46 | 47 | connectBiblioDB = DriverManager.getConnection(url, CommonsProperties.getMysql_user(), CommonsProperties.getMysql_pass()); 48 | 49 | } 50 | } catch (SQLException e) { 51 | throw new ServiceException("Can't connect to MySQL. ", e); 52 | } 53 | return connectBiblioDB; 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/dao/Document_OrganisationDAO.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.dao; 2 | 3 | import com.mysql.jdbc.exceptions.jdbc4.MySQLIntegrityConstraintViolationException; 4 | import fr.inria.anhalytics.commons.entities.Document_Organisation; 5 | import fr.inria.anhalytics.commons.entities.Organisation; 6 | 7 | import java.sql.Connection; 8 | import java.sql.PreparedStatement; 9 | import java.sql.SQLException; 10 | import java.sql.Statement; 11 | 12 | /** 13 | * @author achraf 14 | */ 15 | public class Document_OrganisationDAO extends DAO { 16 | 17 | private static final String SQL_INSERT 18 | = "INSERT INTO DOCUMENT_ORGANISATION (docID, organisationID) VALUES (?, ?)"; 19 | 20 | public Document_OrganisationDAO(Connection conn) { 21 | super(conn); 22 | } 23 | 24 | @Override 25 | public boolean create(Document_Organisation obj) throws SQLException { 26 | boolean result = false; 27 | if (obj.getDoc() == null || obj.getOrgs() == null) { 28 | throw new IllegalArgumentException("No Document nor organisation is already created, the Affiliation ID is not null."); 29 | } 30 | 31 | PreparedStatement statement = null; 32 | 33 | try { 34 | statement = connect.prepareStatement(SQL_INSERT, Statement.RETURN_GENERATED_KEYS); 35 | 36 | 37 | for (Organisation org : obj.getOrgs()) { 38 | try { 39 | statement.setString(1, obj.getDoc().getDocID()); 40 | statement.setLong(2, org.getOrganisationId()); 41 | statement.executeUpdate(); 42 | 43 | result = true; 44 | } catch (MySQLIntegrityConstraintViolationException e) { 45 | } 46 | } 47 | } finally { 48 | closeQuietly(statement); 49 | } 50 | 51 | return result; 52 | } 53 | 54 | @Override 55 | public boolean delete(Document_Organisation obj) { 56 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 57 | } 58 | 59 | @Override 60 | public boolean update(Document_Organisation obj) { 61 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 62 | } 63 | 64 | @Override 65 | public Document_Organisation find(Long id) { 66 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 67 | } 68 | 69 | } 70 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/dao/MongoDAOFactory.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.dao; 2 | 3 | import fr.inria.anhalytics.commons.dao.AbstractDAOFactory; 4 | import fr.inria.anhalytics.commons.dao.DAO; 5 | 6 | /** 7 | * 8 | * @author achraf 9 | */ 10 | public class MongoDAOFactory extends AbstractDAOFactory { 11 | 12 | 13 | @Override 14 | public DAO getDocumentDAO() { 15 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 16 | } 17 | 18 | @Override 19 | public DAO getPublicationDAO() { 20 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 21 | } 22 | 23 | @Override 24 | public DAO getMonographDAO() { 25 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 26 | } 27 | 28 | @Override 29 | public DAO getPublisherDAO() { 30 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 31 | } 32 | 33 | @Override 34 | public DAO getAddressDAO() { 35 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 36 | } 37 | 38 | @Override 39 | public DAO getAffiliationDAO() { 40 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 41 | } 42 | 43 | @Override 44 | public DAO getConference_EventDAO() { 45 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 46 | } 47 | 48 | @Override 49 | public DAO getIn_SerialDAO() { 50 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 51 | } 52 | 53 | @Override 54 | public DAO getLocationDAO() { 55 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 56 | } 57 | 58 | @Override 59 | public DAO getOrganisationDAO() { 60 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 61 | } 62 | 63 | @Override 64 | public DAO getPersonDAO() { 65 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 66 | } 67 | 68 | @Override 69 | public void openTransaction() { 70 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 71 | } 72 | 73 | @Override 74 | public void endTransaction() { 75 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 76 | } 77 | 78 | @Override 79 | public void rollback() { 80 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 81 | } 82 | 83 | @Override 84 | public DAO getDocument_OrganisationDAO() { 85 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/dao/MonographDAO.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.dao; 2 | 3 | import fr.inria.anhalytics.commons.entities.Monograph; 4 | 5 | import java.sql.*; 6 | import java.util.logging.Level; 7 | import java.util.logging.Logger; 8 | 9 | /** 10 | * @author azhar 11 | */ 12 | public class MonographDAO extends DAO { 13 | 14 | private static final String SQL_INSERT 15 | = "INSERT INTO MONOGRAPH (type, title, shortname) VALUES (?, ?, ?)"; 16 | 17 | private static final String SQL_SELECT_MONOGR_BY_ID 18 | = "SELECT * FROM MONOGRAPH WHERE monographID = ?"; 19 | 20 | public MonographDAO(Connection conn) { 21 | super(conn); 22 | } 23 | 24 | @Override 25 | public boolean create(Monograph obj) { 26 | boolean result = false; 27 | if (obj.getMonographID() != null) { 28 | throw new IllegalArgumentException("Monograph is already created, the Monograph ID is not null."); 29 | } 30 | 31 | PreparedStatement statement = null; 32 | try { 33 | statement = connect.prepareStatement(SQL_INSERT, Statement.RETURN_GENERATED_KEYS); 34 | statement.setString(1, obj.getType()); 35 | statement.setString(2, obj.getTitle()); 36 | statement.setString(3, obj.getShortname()); 37 | 38 | int code = statement.executeUpdate(); 39 | ResultSet rs = statement.getGeneratedKeys(); 40 | 41 | if (rs.next()) { 42 | obj.setMonographID(rs.getLong(1)); 43 | } 44 | 45 | result = true; 46 | } catch (SQLException ex) { 47 | Logger.getLogger(DocumentDAO.class.getName()).log(Level.SEVERE, null, ex); 48 | } finally { 49 | closeQuietly(statement); 50 | } 51 | return result; 52 | } 53 | 54 | @Override 55 | public boolean delete(Monograph obj) { 56 | return false; 57 | } 58 | 59 | @Override 60 | public boolean update(Monograph obj) { 61 | return false; 62 | } 63 | 64 | @Override 65 | public Monograph find(Long monographID) { 66 | Monograph monograph = new Monograph(); 67 | PreparedStatement preparedStatement = null; 68 | try { 69 | preparedStatement = this.connect.prepareStatement(SQL_SELECT_MONOGR_BY_ID); 70 | //preparedStatement.setFetchSize(Integer.MIN_VALUE); 71 | preparedStatement.setLong(1, monographID); 72 | ResultSet result = preparedStatement.executeQuery(); 73 | if (result.first()) { 74 | monograph = new Monograph( 75 | monographID, 76 | result.getString("type"), 77 | result.getString("title"), 78 | result.getString("shortname")); 79 | } 80 | } catch (SQLException e) { 81 | e.printStackTrace(); 82 | } finally { 83 | closeQuietly(preparedStatement); 84 | } 85 | return monograph; 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/dao/PublisherDAO.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.dao; 2 | 3 | import fr.inria.anhalytics.commons.entities.Publisher; 4 | import java.sql.Connection; 5 | import java.sql.PreparedStatement; 6 | import java.sql.ResultSet; 7 | import java.sql.SQLException; 8 | import java.sql.Statement; 9 | import org.slf4j.LoggerFactory; 10 | 11 | /** 12 | * 13 | * @author azhar 14 | */ 15 | public class PublisherDAO extends DAO { 16 | 17 | private static final org.slf4j.Logger logger = LoggerFactory.getLogger(PublisherDAO.class); 18 | private static final String SQL_INSERT 19 | = "INSERT INTO PUBLISHER (name) VALUES (?)"; 20 | 21 | private static final String SQL_SELECT_PUBLISHER_BY_NAME 22 | = "SELECT * FROM PUBLISHER WHERE name = ?"; 23 | 24 | private static final String SQL_SELECT_PUBLISHER_BY_ID 25 | = "SELECT * FROM PUBLISHER WHERE publisherID = ?"; 26 | 27 | public PublisherDAO(Connection conn) { 28 | super(conn); 29 | } 30 | 31 | @Override 32 | public boolean create(Publisher obj) throws SQLException { 33 | boolean result = false; 34 | if (obj.getPublisherID() != null) { 35 | throw new IllegalArgumentException("Publisher is already created, the document ID is not null."); 36 | } 37 | Publisher foundObj = findPublisherIfAlreadyStored(obj); 38 | if (foundObj != null) { 39 | obj.setPublisherID(obj.getPublisherID()); 40 | } else { 41 | PreparedStatement statement = null; 42 | try { 43 | statement = connect.prepareStatement(SQL_INSERT, Statement.RETURN_GENERATED_KEYS); 44 | statement.setString(1, obj.getName()); 45 | int code = statement.executeUpdate(); 46 | ResultSet rs = statement.getGeneratedKeys(); 47 | 48 | if (rs.next()) { 49 | obj.setPublisherID(rs.getLong(1)); 50 | } 51 | 52 | result = true; 53 | } finally { 54 | closeQuietly(statement); 55 | } 56 | } 57 | return result; 58 | } 59 | 60 | @Override 61 | public boolean delete(Publisher obj) { 62 | return false; 63 | } 64 | 65 | @Override 66 | public boolean update(Publisher obj) { 67 | return false; 68 | } 69 | 70 | @Override 71 | public Publisher find(Long publisher_id) throws SQLException { 72 | Publisher publisher = new Publisher(); 73 | PreparedStatement preparedStatement = null; 74 | preparedStatement = this.connect.prepareStatement(SQL_SELECT_PUBLISHER_BY_ID); 75 | try { 76 | preparedStatement.setFetchSize(Integer.MIN_VALUE); 77 | preparedStatement.setLong(1, publisher_id); 78 | ResultSet rs = preparedStatement.executeQuery(); 79 | if (rs.first()) { 80 | publisher = new Publisher( 81 | publisher_id, 82 | rs.getString("name") 83 | ); 84 | } 85 | } catch (SQLException ex) { 86 | logger.error(ex.getMessage()); 87 | } finally { 88 | closeQuietly(preparedStatement); 89 | } 90 | return publisher; 91 | } 92 | 93 | private Publisher findPublisherIfAlreadyStored(Publisher obj) throws SQLException { 94 | Publisher publisher = null; 95 | PreparedStatement preparedStatement = null; 96 | preparedStatement = this.connect.prepareStatement(SQL_SELECT_PUBLISHER_BY_NAME); 97 | try { 98 | preparedStatement.setString(1, obj.getName()); 99 | ResultSet rs = preparedStatement.executeQuery(); 100 | if (rs.first()) { 101 | publisher = new Publisher( 102 | rs.getLong("publisherID"), 103 | rs.getString("name") 104 | ); 105 | } 106 | } catch (SQLException ex) { 107 | logger.error(ex.getMessage()); 108 | } finally { 109 | closeQuietly(preparedStatement); 110 | } 111 | return publisher; 112 | 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/dao/anhalytics/DAOFactory.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.dao.anhalytics; 2 | 3 | import fr.inria.anhalytics.commons.exceptions.PropertyException; 4 | import fr.inria.anhalytics.commons.properties.CommonsProperties; 5 | import fr.inria.anhalytics.commons.dao.AbstractDAOFactory; 6 | import fr.inria.anhalytics.commons.dao.DAO; 7 | import fr.inria.anhalytics.commons.dao.DatabaseConnection; 8 | import fr.inria.anhalytics.commons.dao.DocumentDAO; 9 | import fr.inria.anhalytics.commons.dao.In_SerialDAO; 10 | import fr.inria.anhalytics.commons.dao.AddressDAO; 11 | import fr.inria.anhalytics.commons.dao.PublisherDAO; 12 | import fr.inria.anhalytics.commons.dao.MonographDAO; 13 | import fr.inria.anhalytics.commons.dao.PublicationDAO; 14 | import fr.inria.anhalytics.commons.dao.Conference_EventDAO; 15 | import fr.inria.anhalytics.commons.dao.Document_OrganisationDAO; 16 | import fr.inria.anhalytics.commons.dao.PersonDAO; 17 | import java.sql.Connection; 18 | import java.sql.SQLException; 19 | import java.util.logging.Level; 20 | import org.slf4j.Logger; 21 | import org.slf4j.LoggerFactory; 22 | 23 | /** 24 | * 25 | * @author azhar 26 | */ 27 | public class DAOFactory extends AbstractDAOFactory { 28 | 29 | private static final Logger logger = LoggerFactory.getLogger(DAOFactory.class); 30 | 31 | protected static Connection conn = null; 32 | 33 | public static void initConnection() { 34 | if (conn == null) { 35 | try { 36 | CommonsProperties.init("anhalytics.properties", false); 37 | } catch (Exception exp) { 38 | throw new PropertyException("Cannot open file of properties anhalytics.properties", exp); 39 | } 40 | conn = DatabaseConnection.getDBInstance(); 41 | } 42 | } 43 | 44 | public DAO getDocumentDAO() { 45 | return new DocumentDAO(conn); 46 | } 47 | 48 | public DAO getAddressDAO() { 49 | return new AddressDAO(conn); 50 | } 51 | 52 | public DAO getAffiliationDAO() { 53 | return new AffiliationDAO(conn); 54 | } 55 | 56 | public DAO getConference_EventDAO() { 57 | return new Conference_EventDAO(conn); 58 | } 59 | 60 | public DAO getIn_SerialDAO() { 61 | return new In_SerialDAO(conn); 62 | } 63 | 64 | public DAO getLocationDAO() { 65 | return new LocationDAO(conn); 66 | } 67 | 68 | public DAO getMonographDAO() { 69 | return new MonographDAO(conn); 70 | } 71 | 72 | public DAO getOrganisationDAO() { 73 | return new OrganisationDAO(conn); 74 | } 75 | 76 | public DAO getPersonDAO() { 77 | return new PersonDAO(conn); 78 | } 79 | 80 | public DAO getPublicationDAO() { 81 | return new PublicationDAO(conn); 82 | } 83 | 84 | public DAO getPublisherDAO() { 85 | return new PublisherDAO(conn); 86 | } 87 | 88 | public void openTransaction() { 89 | try { 90 | conn.setAutoCommit(false); 91 | logger.info("Storing entry"); 92 | } catch (SQLException e) { 93 | logger.error("There was an error disabling autocommit"); 94 | } 95 | } 96 | 97 | public void endTransaction() { 98 | try { 99 | conn.commit(); 100 | logger.info("Stored"); 101 | } catch (SQLException ex) { 102 | logger.error("Error happened while commiting the changes."); 103 | } 104 | } 105 | 106 | public void rollback() { 107 | try { 108 | // We rollback the transaction, to the last SavePoint! 109 | conn.rollback(); 110 | logger.info("The transaction was rollback."); 111 | } catch (SQLException e1) { 112 | logger.error("There was an error making a rollback"); 113 | 114 | } 115 | } 116 | 117 | public static void closeConnection() { 118 | try { 119 | conn.close(); 120 | } catch (SQLException ex) { 121 | ex.printStackTrace(); 122 | } 123 | } 124 | 125 | @Override 126 | public DAO getDocument_OrganisationDAO() { 127 | return new Document_OrganisationDAO(conn); 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/dao/biblio/AbstractBiblioDAOFactory.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.dao.biblio; 2 | 3 | import fr.inria.anhalytics.commons.dao.DAO; 4 | 5 | 6 | /** 7 | * 8 | * @author azhar 9 | */ 10 | public abstract class AbstractBiblioDAOFactory { 11 | 12 | public static final int DAO_FACTORY = 0; 13 | 14 | public static final int MONGO_DAO_FACTORY = 1; 15 | 16 | public abstract DAO getDocumentDAO(); 17 | 18 | public abstract DAO getPublicationDAO(); 19 | 20 | public abstract DAO getMonographDAO(); 21 | 22 | public abstract DAO getPublisherDAO(); 23 | 24 | public abstract DAO getAddressDAO(); 25 | 26 | public abstract DAO getConference_EventDAO(); 27 | 28 | public abstract DAO getIn_SerialDAO(); 29 | 30 | public abstract DAO getPersonDAO(); 31 | 32 | public abstract void openTransaction(); 33 | 34 | public abstract void endTransaction(); 35 | 36 | public abstract void rollback(); 37 | 38 | public static AbstractBiblioDAOFactory getFactory(int type) { 39 | switch (type) { 40 | case DAO_FACTORY: 41 | return new fr.inria.anhalytics.commons.dao.biblio.BiblioDAOFactory(); 42 | //case MONGO_DAO_FACTORY: 43 | // return new BiblioMongoDAOFactory(); 44 | default: 45 | return null; 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/dao/biblio/BiblioDAOFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package fr.inria.anhalytics.commons.dao.biblio; 7 | 8 | import fr.inria.anhalytics.commons.exceptions.PropertyException; 9 | import fr.inria.anhalytics.commons.properties.CommonsProperties; 10 | import fr.inria.anhalytics.commons.dao.AddressDAO; 11 | import fr.inria.anhalytics.commons.dao.Conference_EventDAO; 12 | import fr.inria.anhalytics.commons.dao.DatabaseConnection; 13 | import fr.inria.anhalytics.commons.dao.DAO; 14 | import fr.inria.anhalytics.commons.dao.DocumentDAO; 15 | import fr.inria.anhalytics.commons.dao.In_SerialDAO; 16 | import fr.inria.anhalytics.commons.dao.MonographDAO; 17 | import fr.inria.anhalytics.commons.dao.PersonDAO; 18 | import fr.inria.anhalytics.commons.dao.PublicationDAO; 19 | import fr.inria.anhalytics.commons.dao.PublisherDAO; 20 | import java.sql.Connection; 21 | import java.sql.SQLException; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | /** 26 | * 27 | * @author achraf 28 | */ 29 | public class BiblioDAOFactory extends AbstractBiblioDAOFactory { 30 | 31 | private static final Logger logger = LoggerFactory.getLogger(BiblioDAOFactory.class); 32 | protected static Connection conn = null; 33 | 34 | public static void initConnection() { 35 | if (conn == null) { 36 | try { 37 | CommonsProperties.init("anhalytics.properties", false); 38 | } catch (Exception exp) { 39 | throw new PropertyException("Cannot open file of harvest properties ingest.properties", exp); 40 | } 41 | conn = DatabaseConnection.getBiblioDBInstance(); 42 | } 43 | } 44 | 45 | public DAO getDocumentDAO() { 46 | return new DocumentDAO(conn); 47 | } 48 | 49 | public DAO getAddressDAO() { 50 | return new AddressDAO(conn); 51 | } 52 | 53 | public DAO getConference_EventDAO() { 54 | return new Conference_EventDAO(conn); 55 | } 56 | 57 | public DAO getIn_SerialDAO() { 58 | return new In_SerialDAO(conn); 59 | } 60 | 61 | public DAO getMonographDAO() { 62 | return new MonographDAO(conn); 63 | } 64 | 65 | public DAO getPersonDAO() { 66 | return new PersonDAO(conn); 67 | } 68 | 69 | public DAO getPublicationDAO() { 70 | return new PublicationDAO(conn); 71 | } 72 | 73 | public DAO getPublisherDAO() { 74 | return new PublisherDAO(conn); 75 | } 76 | 77 | public void openTransaction() { 78 | try { 79 | conn.setAutoCommit(false); 80 | logger.info("Storing entry"); 81 | } catch (SQLException e) { 82 | logger.error("There was an error disabling autocommit"); 83 | } 84 | } 85 | 86 | public void endTransaction() { 87 | try { 88 | conn.commit(); 89 | logger.info("Entry stored"); 90 | } catch (SQLException ex) { 91 | logger.error("Error happened while commiting the changes."); 92 | } 93 | } 94 | 95 | public void rollback() { 96 | try { 97 | // We rollback the transaction, to the last SavePoint! 98 | conn.rollback(); 99 | logger.info("The transaction was rollback."); 100 | } catch (SQLException e1) { 101 | logger.error("There was an error making a rollback"); 102 | 103 | } 104 | } 105 | 106 | public static void closeConnection() { 107 | try { 108 | conn.close(); 109 | } catch (SQLException ex) { 110 | ex.printStackTrace(); 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/data/Annotation.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.data; 2 | 3 | /** 4 | * 5 | * @author azhar 6 | */ 7 | public class Annotation { 8 | private String json; 9 | private String repositoryDocId; 10 | private String anhalyticsId; 11 | private boolean isIndexed; 12 | 13 | public Annotation(String json, String repositoryDocId, String anhalyticsId, boolean isIndexed) { 14 | this.json = json; 15 | this.repositoryDocId = repositoryDocId; 16 | this.anhalyticsId = anhalyticsId; 17 | this.isIndexed = isIndexed; 18 | } 19 | 20 | /** 21 | * @return the doi 22 | */ 23 | public String getJson() { 24 | return json; 25 | } 26 | 27 | /** 28 | * @param doi the doi to set 29 | */ 30 | public void setJson(String json) { 31 | this.json = json; 32 | } 33 | 34 | /** 35 | * @return the repositoryDocId 36 | */ 37 | public String getRepositoryDocId() { 38 | return repositoryDocId; 39 | } 40 | 41 | /** 42 | * @param repositoryDocId the repositoryDocId to set 43 | */ 44 | public void setRepositoryDocId(String repositoryDocId) { 45 | this.repositoryDocId = repositoryDocId; 46 | } 47 | 48 | /** 49 | * @return the anhalyticsId 50 | */ 51 | public String getAnhalyticsId() { 52 | return anhalyticsId; 53 | } 54 | 55 | /** 56 | * @param anhalyticsId the anhalyticsId to set 57 | */ 58 | public void setAnhalyticsId(String anhalyticsId) { 59 | this.anhalyticsId = anhalyticsId; 60 | } 61 | 62 | /** 63 | * @return the isIndexed 64 | */ 65 | public boolean isIsIndexed() { 66 | return isIndexed; 67 | } 68 | 69 | /** 70 | * @param isIndexed the isIndexed to set 71 | */ 72 | public void setIsIndexed(boolean isIndexed) { 73 | this.isIndexed = isIndexed; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/data/BinaryFile.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.data; 2 | 3 | import java.io.InputStream; 4 | 5 | /** 6 | * 7 | * @author azhar 8 | * Represents the file attached to the tei. 9 | */ 10 | public class BinaryFile extends File { 11 | 12 | //date the file is available 13 | private String embargoDate; 14 | //is it an annex or the main file 15 | private boolean isAnnexFile; 16 | private InputStream stream = null; 17 | 18 | public BinaryFile(){}; 19 | public BinaryFile(String source, String url, String repositoryDocId, String doi, String documentType, String fileType, String fileName, String repositoryDocVersion, String anhalyticsId, String embargoDate){ 20 | super(source, url, repositoryDocId, repositoryDocVersion, anhalyticsId, documentType, fileType, fileName, doi); 21 | this.embargoDate = embargoDate; 22 | } 23 | 24 | /** 25 | * @return the embargoDate 26 | */ 27 | public String getEmbargoDate() { 28 | return embargoDate; 29 | } 30 | 31 | /** 32 | * @param embargoDate the embargoDate to set 33 | */ 34 | public void setEmbargoDate(String embargoDate) { 35 | this.embargoDate = embargoDate; 36 | } 37 | 38 | /** 39 | * @return the isAnnexFile 40 | */ 41 | public boolean isIsAnnexFile() { 42 | return isAnnexFile; 43 | } 44 | 45 | /** 46 | * @param isAnnexFile the isAnnexFile to set 47 | */ 48 | public void setIsAnnexFile(boolean isAnnexFile) { 49 | this.isAnnexFile = isAnnexFile; 50 | } 51 | 52 | /** 53 | * @return the stream 54 | */ 55 | public InputStream getStream() { 56 | return stream; 57 | } 58 | 59 | /** 60 | * @param stream the stream to set 61 | */ 62 | public void setStream(InputStream stream) { 63 | this.stream = stream; 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/data/File.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.data; 2 | 3 | /** 4 | * 5 | * @author azhar 6 | */ 7 | public class File { 8 | private String source; 9 | //the link of the resource 10 | private String url; 11 | private String repositoryDocId; 12 | private String repositoryDocVersion; 13 | private String anhalyticsId; 14 | private String documentType; 15 | private String fileType; 16 | private String fileName; 17 | private String doi; 18 | 19 | 20 | 21 | public File(){} 22 | public File(String source, String url, String repositoryDocId, String repositoryDocVersion, String anhalyticsId, String documentType, String fileType, String fileName, String doi){ 23 | this.source = source; 24 | this.url = url; 25 | this.repositoryDocId = repositoryDocId; 26 | this.repositoryDocVersion = repositoryDocVersion; 27 | this.anhalyticsId = anhalyticsId; 28 | this.documentType = documentType; 29 | this.fileType = fileType; 30 | this.fileName = fileName; 31 | this.doi = doi; 32 | } 33 | 34 | /** 35 | * @return the source 36 | */ 37 | public String getSource() { 38 | return source; 39 | } 40 | 41 | /** 42 | * @param source the source to set 43 | */ 44 | public void setSource(String source) { 45 | this.source = source; 46 | } 47 | 48 | /** 49 | * @return the repositoryDocId 50 | */ 51 | public String getRepositoryDocId() { 52 | return repositoryDocId; 53 | } 54 | 55 | /** 56 | * @param repositoryDocId the repositoryDocId to set 57 | */ 58 | public void setRepositoryDocId(String repositoryDocId) { 59 | this.repositoryDocId = repositoryDocId; 60 | } 61 | 62 | /** 63 | * @return the repositoryDocVersion 64 | */ 65 | public String getRepositoryDocVersion() { 66 | return repositoryDocVersion; 67 | } 68 | 69 | /** 70 | * @param repositoryDocVersion the repositoryDocVersion to set 71 | */ 72 | public void setRepositoryDocVersion(String repositoryDocVersion) { 73 | this.repositoryDocVersion = repositoryDocVersion; 74 | } 75 | 76 | /** 77 | * @return the anhalyticsId 78 | */ 79 | public String getAnhalyticsId() { 80 | return anhalyticsId; 81 | } 82 | 83 | /** 84 | * @param anhalyticsId the anhalyticsId to set 85 | */ 86 | public void setAnhalyticsId(String anhalyticsId) { 87 | this.anhalyticsId = anhalyticsId; 88 | } 89 | 90 | /** 91 | * @return the documentType 92 | */ 93 | public String getDocumentType() { 94 | return documentType; 95 | } 96 | 97 | /** 98 | * @param documentType the documentType to set 99 | */ 100 | public void setDocumentType(String documentType) { 101 | this.documentType = documentType; 102 | } 103 | 104 | /** 105 | * @return the fileType 106 | */ 107 | public String getFileType() { 108 | return fileType; 109 | } 110 | 111 | /** 112 | * @param fileType the fileType to set 113 | */ 114 | public void setFileType(String fileType) { 115 | this.fileType = fileType; 116 | } 117 | 118 | /** 119 | * @return the fileName 120 | */ 121 | public String getFileName() { 122 | return fileName; 123 | } 124 | 125 | /** 126 | * @param fileName the fileName to set 127 | */ 128 | public void setFileName(String fileName) { 129 | this.fileName = fileName; 130 | } 131 | 132 | /** 133 | * @return the doi 134 | */ 135 | public String getDoi() { 136 | return doi; 137 | } 138 | 139 | /** 140 | * @param doi the doi to set 141 | */ 142 | public void setDoi(String doi) { 143 | this.doi = doi; 144 | } 145 | 146 | /** 147 | * @return the url 148 | */ 149 | public String getUrl() { 150 | return url; 151 | } 152 | 153 | /** 154 | * @param url the url to set 155 | */ 156 | public void setUrl(String url) { 157 | this.url = url; 158 | } 159 | 160 | 161 | 162 | } 163 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/data/Processings.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.data; 2 | 3 | /** 4 | * 5 | * @author azhar 6 | */ 7 | public enum Processings { 8 | 9 | GROBID("grobid"), 10 | NERD("nerd"), 11 | KEYTERM("keyterm"), 12 | QUANTITIES("quantities"), 13 | PDFQUANTITIES("PDFQUANTITIES"); 14 | 15 | private String name; 16 | 17 | private Processings(String name) { 18 | this.name = name; 19 | } 20 | 21 | public String getName() { 22 | return name; 23 | } 24 | 25 | public static boolean contains(String test) { 26 | for (Processings c : Processings.values()) { 27 | if (c.getName().equals(test)) { 28 | return true; 29 | } 30 | } 31 | return false; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Address.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | /** 7 | * 8 | * @author azhar 9 | */ 10 | public class Address { 11 | 12 | private Long addressId; 13 | private String addrLine = ""; 14 | private String postBox = ""; 15 | private String postCode = ""; 16 | private String settlement = ""; 17 | private String region = ""; 18 | private Country country; 19 | 20 | public Address() { 21 | } 22 | 23 | public Address(Long addressId, String addrLine, String postBox, String postCode, String settlement, String region, Country country) { 24 | this.addressId = addressId; 25 | this.addrLine = addrLine; 26 | this.postBox = postBox; 27 | this.postCode = postCode; 28 | this.settlement = settlement; 29 | this.region = region; 30 | this.country = country; 31 | } 32 | 33 | /** 34 | * @return the addressId 35 | */ 36 | public Long getAddressId() { 37 | return addressId; 38 | } 39 | 40 | /** 41 | * @param addressId the addressId to set 42 | */ 43 | public void setAddressId(Long addressId) { 44 | this.addressId = addressId; 45 | } 46 | 47 | /** 48 | * @return the addrLine 49 | */ 50 | public String getAddrLine() { 51 | return addrLine; 52 | } 53 | 54 | /** 55 | * @param addrLine the addrLine to set 56 | */ 57 | public void setAddrLine(String addrLine) { 58 | if (addrLine.length() > 150) { 59 | addrLine = addrLine.substring(0, 149); 60 | } 61 | this.addrLine = addrLine; 62 | } 63 | 64 | /** 65 | * @return the postBox 66 | */ 67 | public String getPostBox() { 68 | return postBox; 69 | } 70 | 71 | /** 72 | * @param postBox the postBox to set 73 | */ 74 | public void setPostBox(String postBox) { 75 | if (postBox.length() > 45) { 76 | postBox = postBox.substring(0, 44); 77 | } 78 | this.postBox = postBox; 79 | } 80 | 81 | /** 82 | * @return the postCode 83 | */ 84 | public String getPostCode() { 85 | return postCode; 86 | } 87 | 88 | /** 89 | * @param postCode the postCode to set 90 | */ 91 | public void setPostCode(String postCode) { 92 | if (postCode.length() > 45) { 93 | postCode = postCode.substring(0, 44); 94 | } 95 | this.postCode = postCode; 96 | } 97 | 98 | /** 99 | * @return the Settlement 100 | */ 101 | public String getSettlement() { 102 | return settlement; 103 | } 104 | 105 | /** 106 | * @param Settlement the Settlement to set 107 | */ 108 | public void setSettlement(String settlement) { 109 | if (settlement.length() > 45) { 110 | settlement = settlement.substring(0, 44); 111 | } 112 | this.settlement = settlement; 113 | } 114 | 115 | /** 116 | * @return the region 117 | */ 118 | public String getRegion() { 119 | return region; 120 | } 121 | 122 | /** 123 | * @param region the region to set 124 | */ 125 | public void setRegion(String region) { 126 | if (region.length() > 45) { 127 | region = region.substring(0, 44); 128 | } 129 | this.region = region; 130 | } 131 | 132 | /** 133 | * @return the country 134 | */ 135 | public Country getCountry() { 136 | return country; 137 | } 138 | 139 | /** 140 | * @param country the country to set 141 | */ 142 | public void setCountry(Country country) { 143 | this.country = country; 144 | } 145 | 146 | public Map getAddressDocument() { 147 | Map addressDocument = new HashMap(); 148 | addressDocument.put("addressId", this.getAddressId()); 149 | addressDocument.put("addrLine", this.getAddrLine()); 150 | if (this.getCountry() != null) { 151 | addressDocument.put("country", this.getCountry().getIso()); 152 | } else { 153 | addressDocument.put("country", ""); 154 | } 155 | addressDocument.put("postBox", this.getPostBox()); 156 | addressDocument.put("postCode", this.getPostCode()); 157 | addressDocument.put("region", this.getRegion()); 158 | addressDocument.put("settlement", this.getSettlement()); 159 | return addressDocument; 160 | 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Affiliation.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Date; 5 | import java.util.List; 6 | 7 | /** 8 | * 9 | * @author azhar 10 | */ 11 | public class Affiliation { 12 | private Long affiliationId; 13 | private List organisations; 14 | private Person person; 15 | private Date from_date; 16 | private Date until_date; 17 | 18 | public Affiliation(){} 19 | public Affiliation(Long affiliationId ,List organisations ,Person person ,Date from_date ,Date until_date){ 20 | this.affiliationId = affiliationId; 21 | this.organisations = organisations; 22 | this.person = person; 23 | this.from_date = from_date; 24 | this.until_date = until_date; 25 | } 26 | 27 | /** 28 | * @return the affiliationId 29 | */ 30 | public Long getAffiliationId() { 31 | return affiliationId; 32 | } 33 | 34 | /** 35 | * @param affiliationId the affiliationId to set 36 | */ 37 | public void setAffiliationId(Long affiliationId) { 38 | this.affiliationId = affiliationId; 39 | } 40 | 41 | /** 42 | * @return the organisation 43 | */ 44 | public List getOrganisations() { 45 | return organisations; 46 | } 47 | 48 | /** 49 | * @param organisation the organisation to set 50 | */ 51 | public void setOrganisation(List organisations) { 52 | this.organisations = organisations; 53 | } 54 | 55 | /** 56 | * @param organisation the organisation to set 57 | */ 58 | public void addOrganisation(Organisation organisation) { 59 | if(this.organisations == null) 60 | this.organisations = new ArrayList(); 61 | this.organisations.add(organisation); 62 | } 63 | 64 | /** 65 | * @return the person 66 | */ 67 | public Person getPerson() { 68 | return person; 69 | } 70 | 71 | /** 72 | * @param person the person to set 73 | */ 74 | public void setPerson(Person person) { 75 | this.person = person; 76 | } 77 | 78 | /** 79 | * @return the from_date 80 | */ 81 | public Date getFrom_date() { 82 | return from_date; 83 | } 84 | 85 | /** 86 | * @param from_date the from_date to set 87 | */ 88 | public void setFrom_date(Date from_date) { 89 | this.from_date = from_date; 90 | } 91 | 92 | /** 93 | * @return the until_date 94 | */ 95 | public Date getUntil_date() { 96 | return until_date; 97 | } 98 | 99 | /** 100 | * @param until_date the until_date to set 101 | */ 102 | public void setUntil_date(Date until_date) { 103 | this.until_date = until_date; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Author.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | /** 4 | * 5 | * @author azhar 6 | */ 7 | public class Author { 8 | 9 | private Document document; 10 | private Person person; 11 | private int rank; 12 | private int correp; 13 | 14 | public Author() { 15 | } 16 | 17 | public Author(Document document, Person person, int rank, int correp) { 18 | this.document = document; 19 | this.person = person; 20 | this.rank = rank; 21 | this.correp = correp; 22 | } 23 | 24 | /** 25 | * @return the document 26 | */ 27 | public Document getDocument() { 28 | return document; 29 | } 30 | 31 | /** 32 | * @param document the document to set 33 | */ 34 | public void setDocument(Document document) { 35 | this.document = document; 36 | } 37 | 38 | /** 39 | * @return the person 40 | */ 41 | public Person getPerson() { 42 | return person; 43 | } 44 | 45 | /** 46 | * @param person the person to set 47 | */ 48 | public void setPerson(Person person) { 49 | this.person = person; 50 | } 51 | 52 | /** 53 | * @return the rank 54 | */ 55 | public int getRank() { 56 | return rank; 57 | } 58 | 59 | /** 60 | * @param rank the rank to set 61 | */ 62 | public void setRank(int rank) { 63 | this.rank = rank; 64 | } 65 | 66 | /** 67 | * @return the correp 68 | */ 69 | public int getCorrep() { 70 | return correp; 71 | } 72 | 73 | /** 74 | * @param correp the correp to set 75 | */ 76 | public void setCorrep(int correp) { 77 | this.correp = correp; 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Collection.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | /** 7 | * 8 | * @author azhar 9 | */ 10 | public class Collection { 11 | 12 | private Long collectionID; 13 | private String title = ""; 14 | 15 | public Collection() { 16 | } 17 | 18 | public Collection(Long collectionID, String title) { 19 | this.collectionID = collectionID; 20 | this.title = title; 21 | } 22 | 23 | /** 24 | * @return the collectionID 25 | */ 26 | public Long getCollectionID() { 27 | return collectionID; 28 | } 29 | 30 | /** 31 | * @param collectionID the collectionID to set 32 | */ 33 | public void setCollectionID(Long collectionID) { 34 | this.collectionID = collectionID; 35 | } 36 | 37 | /** 38 | * @return the title 39 | */ 40 | public String getTitle() { 41 | return title; 42 | } 43 | 44 | /** 45 | * @param title the title to set 46 | */ 47 | public void setTitle(String title) { 48 | this.title = title; 49 | } 50 | 51 | public Map getCollectionDocument() { 52 | Map collectionDocument = new HashMap(); 53 | collectionDocument.put("collectionID", this.getCollectionID()); 54 | collectionDocument.put("title", this.getTitle()); 55 | return collectionDocument; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Conference.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | /** 4 | * 5 | * @author azhar 6 | */ 7 | public class Conference { 8 | 9 | private Long confID; 10 | private String title = ""; 11 | 12 | public Conference() { 13 | } 14 | 15 | public Conference(Long confID, String title) { 16 | this.confID = confID; 17 | this.title = title; 18 | } 19 | 20 | /** 21 | * @return the confID 22 | */ 23 | public Long getConfID() { 24 | return confID; 25 | } 26 | 27 | /** 28 | * @param confID the confID to set 29 | */ 30 | public void setConfID(Long confID) { 31 | this.confID = confID; 32 | } 33 | 34 | /** 35 | * @return the title 36 | */ 37 | public String getTitle() { 38 | return title; 39 | } 40 | 41 | /** 42 | * @param title the title to set 43 | */ 44 | public void setTitle(String title) { 45 | this.title = title; 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Conference_Event.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | /** 7 | * 8 | * @author azhar 9 | */ 10 | public class Conference_Event { 11 | 12 | private Long conf_eventID; 13 | private String start_date = ""; 14 | private String end_date = ""; 15 | private Monograph monograph; 16 | private Conference conference; 17 | private Address address; 18 | 19 | public Conference_Event() { 20 | } 21 | 22 | public Conference_Event(Long conf_eventID, String start_date, String end_date, Monograph monograph, Conference conference, Address address) { 23 | this.conf_eventID = conf_eventID; 24 | this.start_date = start_date; 25 | this.end_date = end_date; 26 | this.monograph = monograph; 27 | this.conference = conference; 28 | this.address = address; 29 | } 30 | 31 | /** 32 | * @return the conf_eventID 33 | */ 34 | public Long getConf_eventID() { 35 | return conf_eventID; 36 | } 37 | 38 | /** 39 | * @param conf_eventID the conf_eventID to set 40 | */ 41 | public void setConf_eventID(Long conf_eventID) { 42 | this.conf_eventID = conf_eventID; 43 | } 44 | 45 | /** 46 | * @return the start_date 47 | */ 48 | public String getStart_date() { 49 | return start_date; 50 | } 51 | 52 | /** 53 | * @param start_date the start_date to set 54 | */ 55 | public void setStart_date(String start_date) { 56 | if (start_date.length() > 45) { 57 | start_date = start_date.substring(0, 44); 58 | } 59 | this.start_date = start_date; 60 | } 61 | 62 | /** 63 | * @return the end_date 64 | */ 65 | public String getEnd_date() { 66 | return end_date; 67 | } 68 | 69 | /** 70 | * @param end_date the end_date to set 71 | */ 72 | public void setEnd_date(String end_date) { 73 | if (end_date.length() > 45) { 74 | end_date = end_date.substring(0, 44); 75 | } 76 | this.end_date = end_date; 77 | } 78 | 79 | /** 80 | * @return the mongoraph 81 | */ 82 | public Monograph getMonograph() { 83 | return monograph; 84 | } 85 | 86 | /** 87 | * @param mongoraph the mongoraph to set 88 | */ 89 | public void setMongoraph(Monograph monograph) { 90 | this.monograph = monograph; 91 | } 92 | 93 | /** 94 | * @return the conference 95 | */ 96 | public Conference getConference() { 97 | return conference; 98 | } 99 | 100 | /** 101 | * @param conference the conference to set 102 | */ 103 | public void setConference(Conference conference) { 104 | this.conference = conference; 105 | } 106 | 107 | /** 108 | * @return the address 109 | */ 110 | public Address getAddress() { 111 | return address; 112 | } 113 | 114 | /** 115 | * @param address the address to set 116 | */ 117 | public void setAddress(Address address) { 118 | this.address = address; 119 | } 120 | 121 | public Map getConference_EventDocument() { 122 | Map conference_EventDocument = new HashMap(); 123 | conference_EventDocument.put("conf_eventID", this.getConf_eventID()); 124 | conference_EventDocument.put("address", this.getAddress().getAddressDocument()); 125 | conference_EventDocument.put("start_date", this.getStart_date()); 126 | conference_EventDocument.put("end_date", this.getEnd_date()); 127 | conference_EventDocument.put("title", this.getConference().getTitle()); 128 | return conference_EventDocument; 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Country.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | /** 4 | * 5 | * @author azhar 6 | */ 7 | public class Country { 8 | private Long countryID; 9 | private String iso=""; 10 | public Country(){} 11 | public Country(Long countryID, String iso){ 12 | this.countryID = countryID; 13 | this.iso = iso; 14 | } 15 | 16 | /** 17 | * @return the countryID 18 | */ 19 | public Long getCountryID() { 20 | return countryID; 21 | } 22 | 23 | /** 24 | * @param countryID the countryID to set 25 | */ 26 | public void setCountryID(Long countryID) { 27 | this.countryID = countryID; 28 | } 29 | 30 | /** 31 | * @return the iso 32 | */ 33 | public String getIso() { 34 | return iso; 35 | } 36 | 37 | /** 38 | * @param iso the iso to set 39 | */ 40 | public void setIso(String iso) { 41 | if(iso.length() > 2) 42 | iso = iso.substring(0, 1); 43 | this.iso = iso; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Document.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | /** 9 | * 10 | * @author azhar 11 | */ 12 | public class Document { 13 | 14 | private String docID; 15 | private String version = ""; 16 | private List document_Identifiers = null; 17 | 18 | public Document(String docID, String version, List document_Identifiers) { 19 | this.docID = docID; 20 | this.version = version; 21 | this.document_Identifiers = document_Identifiers; 22 | } 23 | 24 | public Document() { 25 | } 26 | 27 | /** 28 | * @return the docID 29 | */ 30 | public String getDocID() { 31 | return docID; 32 | } 33 | 34 | /** 35 | * @param docID the docID to set 36 | */ 37 | public void setDocID(String docID) { 38 | this.docID = docID; 39 | } 40 | 41 | /** 42 | * @return the version 43 | */ 44 | public String getVersion() { 45 | return version; 46 | } 47 | 48 | /** 49 | * @param version the version to set 50 | */ 51 | public void setVersion(String version) { 52 | if(version.length() > 45) 53 | version = version.substring(0, 44); 54 | this.version = version; 55 | } 56 | 57 | /** 58 | * The user ID is unique for each User. So this should compare User by ID 59 | * only. 60 | * 61 | * @see java.lang.Object#equals(java.lang.Object) 62 | */ 63 | @Override 64 | public boolean equals(Object other) { 65 | return (other instanceof Document) && (docID != null) 66 | ? docID.equals(((Document) other).docID) 67 | : (other == this); 68 | } 69 | 70 | /** 71 | * The user ID is unique for each User. So User with same ID should return 72 | * same hashcode. 73 | * 74 | * @see java.lang.Object#hashCode() 75 | */ 76 | @Override 77 | public int hashCode() { 78 | return (docID != null) 79 | ? (this.getClass().hashCode() + docID.hashCode()) 80 | : super.hashCode(); 81 | } 82 | 83 | /** 84 | * Returns the String representation of this User. Not required, it just 85 | * pleases reading logs. 86 | * 87 | * @see java.lang.Object#toString() 88 | */ 89 | @Override 90 | public String toString() { 91 | return String.format("User[docID=%d,version=%s]", 92 | docID, version); 93 | } 94 | 95 | public Map getDocumentDocument() { 96 | Map documentDocument = new HashMap(); 97 | documentDocument.put("docID", this.getDocID()); 98 | documentDocument.put("version", this.getVersion()); 99 | 100 | List> identifiers = new ArrayList>(); 101 | for (Document_Identifier di : this.getDocument_Identifiers()) { 102 | Map identifier = new HashMap(); 103 | identifier.put("id", di.getId()); 104 | identifier.put("type", di.getType()); 105 | identifiers.add(identifier); 106 | } 107 | documentDocument.put("identifiers", identifiers); 108 | 109 | return documentDocument; 110 | } 111 | 112 | 113 | /** 114 | * @return the document_Identifiers 115 | */ 116 | public void addDocument_Identifier(Document_Identifier di) { 117 | if (this.document_Identifiers == null) { 118 | this.document_Identifiers = new ArrayList(); 119 | } 120 | document_Identifiers.add(di); 121 | } 122 | 123 | /** 124 | * @return the document_Identifiers 125 | */ 126 | public List getDocument_Identifiers() { 127 | if (this.document_Identifiers == null) { 128 | this.document_Identifiers = new ArrayList(); 129 | } 130 | return document_Identifiers; 131 | } 132 | 133 | /** 134 | * @param document_Identifiers the document_Identifiers to set 135 | */ 136 | public void setDocument_Identifiers(List document_Identifiers) { 137 | this.document_Identifiers = document_Identifiers; 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Document_Identifier.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | /** 4 | * 5 | * @author azhar 6 | */ 7 | public class Document_Identifier { 8 | 9 | private String id = ""; 10 | private String type = ""; 11 | 12 | public Document_Identifier(String id, String type) { 13 | this.id = id; 14 | this.type = type; 15 | } 16 | 17 | public Document_Identifier(){} 18 | 19 | /** 20 | * @return the id 21 | */ 22 | public String getId() { 23 | return id; 24 | } 25 | 26 | /** 27 | * @param id the id to set 28 | */ 29 | public void setId(String id) { 30 | if(id.length() > 150) 31 | id = id.substring(0, 149); 32 | this.id = id; 33 | } 34 | 35 | /** 36 | * @return the type 37 | */ 38 | public String getType() { 39 | return type; 40 | } 41 | 42 | /** 43 | * @param type the type to set 44 | */ 45 | public void setType(String type) { 46 | if(type.length() > 55) 47 | type = type.substring(0, 54); 48 | this.type = type; 49 | } 50 | 51 | public String getVersion() { 52 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Document_Organisation.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | /** 7 | * 8 | * @author achraf 9 | */ 10 | public class Document_Organisation { 11 | 12 | private Document doc = null; 13 | private List orgs = null; 14 | 15 | public Document_Organisation(Document doc, List orgs) { 16 | this.doc = doc; 17 | this.orgs = orgs; 18 | } 19 | 20 | public Document_Organisation() { 21 | } 22 | 23 | /** 24 | * @return the doc 25 | */ 26 | public Document getDoc() { 27 | return doc; 28 | } 29 | 30 | /** 31 | * @param doc the doc to set 32 | */ 33 | public void setDoc(Document doc) { 34 | this.doc = doc; 35 | } 36 | 37 | /** 38 | * @return the orgs 39 | */ 40 | public List getOrgs() { 41 | if (this.orgs == null) { 42 | this.orgs = new ArrayList(); 43 | } 44 | return orgs; 45 | } 46 | 47 | /** 48 | * @param orgs the doc_orgs to set 49 | */ 50 | public void addOrg(Organisation doc_org) { 51 | if (this.orgs == null) { 52 | this.orgs = new ArrayList(); 53 | } 54 | if (!this.orgs.contains(doc_org)) { 55 | this.orgs.add(doc_org); 56 | } 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Editor.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | /** 4 | * 5 | * @author azhar 6 | */ 7 | public class Editor { 8 | 9 | private int rank; 10 | private Person person; 11 | private Publication publication; 12 | 13 | public Editor() { 14 | } 15 | 16 | public Editor(int rank, Person person, Publication publication) { 17 | this.rank = rank; 18 | this.person = person; 19 | this.publication = publication; 20 | } 21 | 22 | /** 23 | * @return the rank 24 | */ 25 | public int getRank() { 26 | return rank; 27 | } 28 | 29 | /** 30 | * @param rank the rank to set 31 | */ 32 | public void setRank(int rank) { 33 | this.rank = rank; 34 | } 35 | 36 | /** 37 | * @return the person 38 | */ 39 | public Person getPerson() { 40 | return person; 41 | } 42 | 43 | /** 44 | * @param person the person to set 45 | */ 46 | public void setPerson(Person person) { 47 | this.person = person; 48 | } 49 | 50 | /** 51 | * @return the publication 52 | */ 53 | public Publication getPublication() { 54 | return publication; 55 | } 56 | 57 | /** 58 | * @param publication the publication to set 59 | */ 60 | public void setPublication(Publication publication) { 61 | this.publication = publication; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/In_Serial.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | /** 4 | * 5 | * @author azhar 6 | */ 7 | public class In_Serial { 8 | private Monograph mg; 9 | private Journal j; 10 | private Collection c; 11 | private String volume=""; 12 | private String issue=""; 13 | public In_Serial(){} 14 | public In_Serial(Monograph mg, Journal j, Collection c, String volume, String issue){ 15 | this.mg = mg; 16 | this.j = j; 17 | this.c = c; 18 | this.volume = volume; 19 | this.issue = issue; 20 | } 21 | 22 | /** 23 | * @return the mg 24 | */ 25 | public Monograph getMg() { 26 | return mg; 27 | } 28 | 29 | /** 30 | * @param mg the mg to set 31 | */ 32 | public void setMg(Monograph mg) { 33 | this.mg = mg; 34 | } 35 | 36 | /** 37 | * @return the j 38 | */ 39 | public Journal getJ() { 40 | return j; 41 | } 42 | 43 | /** 44 | * @param j the j to set 45 | */ 46 | public void setJ(Journal j) { 47 | this.j = j; 48 | } 49 | 50 | /** 51 | * @return the c 52 | */ 53 | public Collection getC() { 54 | return c; 55 | } 56 | 57 | /** 58 | * @param c the c to set 59 | */ 60 | public void setC(Collection c) { 61 | this.c = c; 62 | } 63 | 64 | /** 65 | * @return the volume 66 | */ 67 | public String getVolume() { 68 | return volume; 69 | } 70 | 71 | /** 72 | * @param volume the volume to set 73 | */ 74 | public void setVolume(String volume) { 75 | if(volume.length() > 45) 76 | volume = volume.substring(0, 44); 77 | this.volume = volume; 78 | } 79 | 80 | /** 81 | * @return the number 82 | */ 83 | public String getIssue() { 84 | return issue; 85 | } 86 | 87 | /** 88 | * @param number the number to set 89 | */ 90 | public void setIssue(String issue) { 91 | if(issue.length() > 45) 92 | issue = issue.substring(0, 44); 93 | this.issue = issue; 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Journal.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | /** 7 | * 8 | * @author azhar 9 | */ 10 | public class Journal { 11 | 12 | private Long journalID; 13 | 14 | private String title = ""; 15 | 16 | public Journal() { 17 | } 18 | 19 | public Journal(Long journalID, String title) { 20 | this.journalID = journalID; 21 | this.title = title; 22 | } 23 | 24 | /** 25 | * @return the journalID 26 | */ 27 | public Long getJournalID() { 28 | return journalID; 29 | } 30 | 31 | /** 32 | * @param journalID the journalID to set 33 | */ 34 | public void setJournalID(Long journalID) { 35 | this.journalID = journalID; 36 | } 37 | 38 | /** 39 | * @return the title 40 | */ 41 | public String getTitle() { 42 | return title; 43 | } 44 | 45 | /** 46 | * @param title the title to set 47 | */ 48 | public void setTitle(String title) { 49 | this.title = title; 50 | } 51 | 52 | public Map getJournalDocument() { 53 | Map journalDocument = new HashMap(); 54 | journalDocument.put("journalID", this.getJournalID()); 55 | journalDocument.put("title", this.getTitle()); 56 | return journalDocument; 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Location.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | import java.util.Date; 4 | 5 | /** 6 | * 7 | * @author azhar 8 | */ 9 | public class Location { 10 | private Long locationId; 11 | private Organisation organisation; 12 | private Address address; 13 | private Date from_date; 14 | private Date until_date; 15 | public Location(){} 16 | public Location(Long locationId, Organisation organisation, Address address, Date from_date, Date until_date){ 17 | this.locationId = locationId; 18 | this.organisation = organisation; 19 | this.address = address; 20 | this.from_date = from_date; 21 | this.until_date = until_date; 22 | } 23 | 24 | /** 25 | * @return the locationId 26 | */ 27 | public Long getLocationId() { 28 | return locationId; 29 | } 30 | 31 | /** 32 | * @param locationId the locationId to set 33 | */ 34 | public void setLocationId(Long locationId) { 35 | this.locationId = locationId; 36 | } 37 | 38 | /** 39 | * @return the organisation 40 | */ 41 | public Organisation getOrganisation() { 42 | return organisation; 43 | } 44 | 45 | /** 46 | * @param organisation the organisation to set 47 | */ 48 | public void setOrganisation(Organisation organisation) { 49 | this.organisation = organisation; 50 | } 51 | 52 | /** 53 | * @return the address 54 | */ 55 | public Address getAddress() { 56 | return address; 57 | } 58 | 59 | /** 60 | * @param address the address to set 61 | */ 62 | public void setAddress(Address address) { 63 | this.address = address; 64 | } 65 | 66 | /** 67 | * @return the from_date 68 | */ 69 | public Date getFrom_date() { 70 | return from_date; 71 | } 72 | 73 | /** 74 | * @param from_date the from_date to set 75 | */ 76 | public void setFrom_date(Date from_date) { 77 | this.from_date = from_date; 78 | } 79 | 80 | /** 81 | * @return the until_date 82 | */ 83 | public Date getUntil_date() { 84 | return until_date; 85 | } 86 | 87 | /** 88 | * @param until_date the until_date to set 89 | */ 90 | public void setUntil_date(Date until_date) { 91 | this.until_date = until_date; 92 | } 93 | 94 | } 95 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Monograph.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | /** 7 | * 8 | * @author azhar 9 | */ 10 | public class Monograph { 11 | 12 | private Long monographID; 13 | private String type = ""; 14 | private String title = ""; 15 | private String shortname = ""; 16 | 17 | public Monograph() { 18 | } 19 | 20 | public Monograph(Long monographID, String type, String title, String shortname) { 21 | this.monographID = monographID; 22 | this.type = type; 23 | this.title = title; 24 | this.shortname = shortname; 25 | } 26 | 27 | /** 28 | * @return the monographID 29 | */ 30 | public Long getMonographID() { 31 | return monographID; 32 | } 33 | 34 | /** 35 | * @param monographID the monographID to set 36 | */ 37 | public void setMonographID(Long monographID) { 38 | this.monographID = monographID; 39 | } 40 | 41 | /** 42 | * @return the type 43 | */ 44 | public String getType() { 45 | return type; 46 | } 47 | 48 | /** 49 | * @param type the type to set 50 | */ 51 | public void setType(String type) { 52 | if(type.length() > 45) 53 | type = type.substring(0, 44); 54 | this.type = type; 55 | } 56 | 57 | /** 58 | * @return the title 59 | */ 60 | public String getTitle() { 61 | return title; 62 | } 63 | 64 | /** 65 | * @param title the title to set 66 | */ 67 | public void setTitle(String title) { 68 | this.title = title; 69 | } 70 | 71 | /** 72 | * @return the shortname 73 | */ 74 | public String getShortname() { 75 | return shortname; 76 | } 77 | 78 | /** 79 | * @param shortname the shortname to set 80 | */ 81 | public void setShortname(String shortname) { 82 | if(shortname.length() > 45) 83 | shortname = shortname.substring(0, 44); 84 | this.shortname = shortname; 85 | } 86 | 87 | public Map getMonographDocument() { 88 | Map monographDocument = new HashMap(); 89 | monographDocument.put("monographID", this.getMonographID()); 90 | monographDocument.put("title", this.getTitle()); 91 | monographDocument.put("type", this.getType()); 92 | monographDocument.put("shortname", this.getShortname()); 93 | return monographDocument; 94 | 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Organisation_Identifier.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | /** 4 | * 5 | * @author azhar 6 | */ 7 | public class Organisation_Identifier { 8 | 9 | private String id = ""; 10 | private String type = ""; 11 | 12 | public Organisation_Identifier(String id, String type) { 13 | this.id = id; 14 | this.type = type; 15 | } 16 | 17 | public Organisation_Identifier() { 18 | } 19 | 20 | /** 21 | * @return the id 22 | */ 23 | public String getId() { 24 | return id; 25 | } 26 | 27 | /** 28 | * @param id the id to set 29 | */ 30 | public void setId(String id) { 31 | this.id = id; 32 | } 33 | 34 | /** 35 | * @return the type 36 | */ 37 | public String getType() { 38 | return type; 39 | } 40 | 41 | /** 42 | * @param type the type to set 43 | */ 44 | public void setType(String type) { 45 | this.type = type; 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Organisation_Name.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | import fr.inria.anhalytics.commons.utilities.Utilities; 4 | import java.util.Date; 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | 8 | /** 9 | * 10 | * @author azhar 11 | */ 12 | public class Organisation_Name { 13 | 14 | private Long organisation_nameid; 15 | private String name = ""; 16 | private Date lastupdate_date; 17 | 18 | public Organisation_Name(){}; 19 | public Organisation_Name(Long organisation_nameid, String name, Date lastupdate_date) { 20 | this.organisation_nameid = organisation_nameid; 21 | this.name = name; 22 | this.lastupdate_date = lastupdate_date; 23 | } 24 | 25 | /** 26 | * @return the name 27 | */ 28 | public String getName() { 29 | return name; 30 | } 31 | 32 | /** 33 | * @param name the name to set 34 | */ 35 | public void setName(String name) { 36 | this.name = name; 37 | } 38 | 39 | /** 40 | * @return the lastupdate_date 41 | */ 42 | public Date getLastupdate_date() { 43 | return lastupdate_date; 44 | } 45 | 46 | /** 47 | * @param lastupdate_date the lastupdate_date to set 48 | */ 49 | public void setLastupdate_date(Date publication_date) { 50 | this.lastupdate_date = publication_date; 51 | } 52 | 53 | public Map getOrganisationNameDocument() { 54 | Map organisationNameDocument = new HashMap(); 55 | organisationNameDocument.put("date", Utilities.formatDate(this.getLastupdate_date())); 56 | organisationNameDocument.put("name", this.getName()); 57 | return organisationNameDocument; 58 | 59 | } 60 | 61 | @Override 62 | public boolean equals(Object object) 63 | { 64 | boolean sameSame = false; 65 | 66 | if (object != null && object instanceof Organisation_Name) 67 | { 68 | sameSame = this.name.equals(((Organisation_Name) object).name); 69 | } 70 | return sameSame; 71 | } 72 | 73 | /** 74 | * @return the organisation_nameid 75 | */ 76 | public Long getOrganisation_nameid() { 77 | return organisation_nameid; 78 | } 79 | 80 | /** 81 | * @param organisation_nameid the organisation_nameid to set 82 | */ 83 | public void setOrganisation_nameid(Long organisation_nameid) { 84 | this.organisation_nameid = organisation_nameid; 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/PART_OF.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package fr.inria.anhalytics.commons.entities; 7 | 8 | import java.util.Date; 9 | 10 | /** 11 | * 12 | * @author achraf 13 | */ 14 | public class PART_OF { 15 | private Organisation organisation_mother; 16 | private Date fromDate; 17 | private Date untilDate; 18 | 19 | public PART_OF(){} 20 | public PART_OF(Organisation organisation_mother, Date fromDate, Date untilDate){ 21 | this.organisation_mother = organisation_mother; 22 | this.fromDate = fromDate; 23 | this.untilDate = untilDate; 24 | } 25 | 26 | /** 27 | * @return the organisation_motherId 28 | */ 29 | public Organisation getOrganisation_mother() { 30 | return organisation_mother; 31 | } 32 | 33 | /** 34 | * @param organisation_motherId the organisation_motherId to set 35 | */ 36 | public void setOrganisation_mother(Organisation organisation_mother) { 37 | this.organisation_mother = organisation_mother; 38 | } 39 | 40 | /** 41 | * @return the fromDate 42 | */ 43 | public Date getFromDate() { 44 | return fromDate; 45 | } 46 | 47 | /** 48 | * @param fromDate the fromDate to set 49 | */ 50 | public void setFromDate(Date fromDate) { 51 | this.fromDate = fromDate; 52 | } 53 | 54 | /** 55 | * @return the untilDate 56 | */ 57 | public Date getUntilDate() { 58 | return untilDate; 59 | } 60 | 61 | /** 62 | * @param untilDate the untilDate to set 63 | */ 64 | public void setUntilDate(Date untilDate) { 65 | this.untilDate = untilDate; 66 | } 67 | 68 | @Override 69 | public boolean equals(Object object) { 70 | boolean isEqual = false; 71 | 72 | if (object != null && object instanceof PART_OF) { 73 | isEqual = (this.getOrganisation_mother().getOrganisationId().equals(((PART_OF) object).getOrganisation_mother().getOrganisationId())); 74 | } 75 | 76 | return isEqual; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Person_Identifier.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | /** 4 | * 5 | * @author azhar 6 | */ 7 | public class Person_Identifier { 8 | 9 | private String id = ""; 10 | private String type = ""; 11 | 12 | public Person_Identifier() { 13 | } 14 | 15 | public Person_Identifier(String id, String type) { 16 | this.id = id; 17 | this.type = type; 18 | } 19 | 20 | /** 21 | * @return the id 22 | */ 23 | public String getId() { 24 | return id; 25 | } 26 | 27 | /** 28 | * @param id the id to set 29 | */ 30 | public void setId(String id) { 31 | if (id.length() > 150) { 32 | id = id.substring(0, 149); 33 | } 34 | this.id = id; 35 | } 36 | 37 | /** 38 | * @return the type 39 | */ 40 | public String getType() { 41 | return type; 42 | } 43 | 44 | /** 45 | * @param type the type to set 46 | */ 47 | public void setType(String type) { 48 | if (type.length() > 45) { 49 | type = type.substring(0, 44); 50 | } 51 | this.type = type; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Person_Name.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | import java.util.Date; 4 | 5 | /** 6 | * 7 | * @author azhar 8 | */ 9 | public class Person_Name { 10 | 11 | private Long personNameId; 12 | private Long personId; 13 | private String title = ""; 14 | private String fullname = ""; 15 | private String forename = ""; 16 | private String middlename = ""; 17 | private String surname = ""; 18 | private Date lastupdate_date; 19 | 20 | 21 | public Person_Name() { 22 | } 23 | public Person_Name(Long personNameId, Long personId, String fullname, String forename, String middlename, String surname, String title, Date lastupdate_date) { 24 | this.personNameId = personNameId; 25 | this.personId = personId; 26 | this.title = title; 27 | this.fullname = fullname; 28 | this.forename = forename; 29 | this.middlename = middlename; 30 | this.surname = surname; 31 | this.lastupdate_date = lastupdate_date; 32 | } 33 | 34 | /** 35 | * @return the title 36 | */ 37 | public String getTitle() { 38 | return title; 39 | } 40 | 41 | /** 42 | * @param title the title to set 43 | */ 44 | public void setTitle(String title) { 45 | if(title.length() > 45) 46 | title = title.substring(0, 44); 47 | this.title = title; 48 | } 49 | 50 | /** 51 | * @return the fullname 52 | */ 53 | public String getFullname() { 54 | return fullname; 55 | } 56 | 57 | /** 58 | * @param fullname the fullname to set 59 | */ 60 | public void setFullname(String fullname) { 61 | if(fullname.length() > 150) 62 | fullname = fullname.substring(0, 149); 63 | this.fullname = fullname; 64 | } 65 | 66 | /** 67 | * @return the forename 68 | */ 69 | public String getForename() { 70 | return forename; 71 | } 72 | 73 | /** 74 | * @param forename the forename to set 75 | */ 76 | public void setForename(String forename) { 77 | if(forename.length() > 150) 78 | forename = forename.substring(0, 149); 79 | this.forename = forename; 80 | } 81 | 82 | /** 83 | * @return the middlename 84 | */ 85 | public String getMiddlename() { 86 | return middlename; 87 | } 88 | 89 | /** 90 | * @param middlename the middlename to set 91 | */ 92 | public void setMiddlename(String middlename) { 93 | if(middlename.length() > 45) 94 | middlename = middlename.substring(0, 44); 95 | this.middlename = middlename; 96 | } 97 | 98 | /** 99 | * @return the surname 100 | */ 101 | public String getSurname() { 102 | return surname; 103 | } 104 | 105 | /** 106 | * @param surname the surname to set 107 | */ 108 | public void setSurname(String surname) { 109 | if(surname.length() > 150) 110 | surname = surname.substring(0, 149); 111 | this.surname = surname; 112 | } 113 | 114 | /** 115 | * @return the personNameId 116 | */ 117 | public Long getPersonNameId() { 118 | return personNameId; 119 | } 120 | 121 | /** 122 | * @param personNameId the personNameId to set 123 | */ 124 | public void setPersonNameId(Long personNameId) { 125 | this.personNameId = personNameId; 126 | } 127 | 128 | /** 129 | * @return the personId 130 | */ 131 | public Long getPersonId() { 132 | return personId; 133 | } 134 | 135 | /** 136 | * @param personId the personId to set 137 | */ 138 | public void setPersonId(Long personId) { 139 | this.personId = personId; 140 | } 141 | 142 | /** 143 | * @return the lastupdate_date 144 | */ 145 | public Date getLastupdate_date() { 146 | return lastupdate_date; 147 | } 148 | 149 | /** 150 | * @param lastupdate_date the lastupdate_date to set 151 | */ 152 | public void setLastupdate_date(Date lastupdate_date) { 153 | this.lastupdate_date = lastupdate_date; 154 | } 155 | 156 | @Override 157 | public boolean equals(Object object) 158 | { 159 | boolean isEqual= false; 160 | 161 | if (object != null && object instanceof Person_Name) 162 | { 163 | isEqual = (this.fullname.equals(((Person_Name) object).fullname)) ; 164 | } 165 | 166 | return isEqual; 167 | } 168 | 169 | } 170 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Publisher.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | /** 7 | * 8 | * @author azhar 9 | */ 10 | public class Publisher { 11 | 12 | private Long publisherID; 13 | private String name = ""; 14 | 15 | public Publisher() { 16 | } 17 | 18 | public Publisher(Long publisherID, String name) { 19 | this.publisherID = publisherID; 20 | this.name = name; 21 | } 22 | 23 | /** 24 | * @return the publisherID 25 | */ 26 | public Long getPublisherID() { 27 | return publisherID; 28 | } 29 | 30 | /** 31 | * @param publisherID the publisherID to set 32 | */ 33 | public void setPublisherID(Long publisherID) { 34 | this.publisherID = publisherID; 35 | } 36 | 37 | /** 38 | * @return the name 39 | */ 40 | public String getName() { 41 | return name; 42 | } 43 | 44 | /** 45 | * @param name the name to set 46 | */ 47 | public void setName(String name) { 48 | if(name.length() > 150) 49 | name = name.substring(0, 149); 50 | this.name = name; 51 | } 52 | 53 | public Map getPublisherDocument() { 54 | Map publisherDocument = new HashMap(); 55 | publisherDocument.put("publisherID", this.getPublisherID()); 56 | publisherDocument.put("name", this.getName()); 57 | return publisherDocument; 58 | 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/entities/Serial_Identifier.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.entities; 2 | 3 | /** 4 | * 5 | * @author azhar 6 | */ 7 | public class Serial_Identifier { 8 | private Long serial_IdentifierID; 9 | private String id; 10 | private String type; 11 | private Journal journal; 12 | private Collection collection; 13 | 14 | 15 | public Serial_Identifier(){} 16 | 17 | public Serial_Identifier(Long serial_IdentifierID, String id, String type, Journal journal, Collection collection){ 18 | if(id.length() > 45) 19 | id = id.substring(0, 44); 20 | this.id = id; 21 | this.serial_IdentifierID = serial_IdentifierID; 22 | this.type = type; 23 | this.journal = journal; 24 | this.collection = collection; 25 | } 26 | 27 | /** 28 | * @return the serial_IdentifierID 29 | */ 30 | public Long getSerial_IdentifierID() { 31 | return serial_IdentifierID; 32 | } 33 | 34 | /** 35 | * @param serial_IdentifierID the serial_IdentifierID to set 36 | */ 37 | public void setSerial_IdentifierID(Long serial_IdentifierID) { 38 | this.serial_IdentifierID = serial_IdentifierID; 39 | } 40 | 41 | /** 42 | * @return the id 43 | */ 44 | public String getId() { 45 | return id; 46 | } 47 | 48 | /** 49 | * @param id the id to set 50 | */ 51 | public void setId(String id) { 52 | if(id.length() > 45) 53 | this.id = id.substring(0, 44); 54 | this.id = id; 55 | } 56 | 57 | /** 58 | * @return the type 59 | */ 60 | public String getType() { 61 | return type; 62 | } 63 | 64 | /** 65 | * @param type the type to set 66 | */ 67 | public void setType(String type) { 68 | this.type = type; 69 | } 70 | 71 | /** 72 | * @return the journal 73 | */ 74 | public Journal getJournal() { 75 | return journal; 76 | } 77 | 78 | /** 79 | * @param journal the journal to set 80 | */ 81 | public void setJournal(Journal journal) { 82 | this.journal = journal; 83 | } 84 | 85 | /** 86 | * @return the collection 87 | */ 88 | public Collection getCollection() { 89 | return collection; 90 | } 91 | 92 | /** 93 | * @param collection the collection to set 94 | */ 95 | public void setCollection(Collection collection) { 96 | this.collection = collection; 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/exceptions/DataException.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.exceptions; 2 | 3 | /** 4 | * Created by lfoppiano on 12/10/16. 5 | * 6 | * This class represent an exception when the content causes some problems. 7 | * Parsing, extraction, null values. 8 | */ 9 | public class DataException extends RuntimeException { 10 | 11 | public DataException() { 12 | super(); 13 | } 14 | 15 | public DataException(String message) { 16 | super(message); 17 | } 18 | 19 | public DataException(Throwable cause) { 20 | super(cause); 21 | } 22 | 23 | public DataException(String message, Throwable cause) { 24 | super(message, cause); 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/exceptions/DirectoryNotFoundException.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.exceptions; 2 | 3 | /** 4 | * 5 | * @author achraf 6 | */ 7 | public class DirectoryNotFoundException extends RuntimeException { 8 | 9 | public DirectoryNotFoundException() { 10 | super(); 11 | } 12 | 13 | public DirectoryNotFoundException(String message) { 14 | super(message); 15 | } 16 | 17 | public DirectoryNotFoundException(Throwable cause) { 18 | super(cause); 19 | } 20 | 21 | public DirectoryNotFoundException(String message, Throwable cause) { 22 | super(message, cause); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/exceptions/FileNotFoundException.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.exceptions; 2 | 3 | /** 4 | * 5 | * @author achraf 6 | */ 7 | public class FileNotFoundException extends Exception{ 8 | public FileNotFoundException() { 9 | super(); 10 | } 11 | 12 | public FileNotFoundException(String message) { 13 | super(message); 14 | } 15 | 16 | public FileNotFoundException(Throwable cause) { 17 | super(cause); 18 | } 19 | 20 | public FileNotFoundException(String message, Throwable cause) { 21 | super(message, cause); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/exceptions/PropertyException.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.exceptions; 2 | 3 | /** 4 | * 5 | * @author achraf 6 | */ 7 | public class PropertyException extends RuntimeException { 8 | 9 | private static final long serialVersionUID = -3337770841815682150L; 10 | 11 | public PropertyException() { 12 | super(); 13 | } 14 | 15 | public PropertyException(String message) { 16 | super(message); 17 | } 18 | 19 | public PropertyException(Throwable cause) { 20 | super(cause); 21 | } 22 | 23 | public PropertyException(String message, Throwable cause) { 24 | super(message, cause); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/exceptions/ServiceException.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.exceptions; 2 | 3 | /** 4 | * Created by lfoppiano on 12/10/16. 5 | * 6 | * This class represent problems due to external services, third-party libraries, databases access. 7 | */ 8 | public class ServiceException extends RuntimeException { 9 | 10 | public ServiceException() { 11 | super(); 12 | } 13 | 14 | public ServiceException(String message) { 15 | super(message); 16 | } 17 | 18 | public ServiceException(Throwable cause) { 19 | super(cause); 20 | } 21 | 22 | public ServiceException(String message, Throwable cause) { 23 | super(message, cause); 24 | } 25 | } -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/exceptions/SystemException.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.exceptions; 2 | 3 | import java.io.IOException; 4 | 5 | /** 6 | * Created by lfoppiano on 14/06/16. 7 | * 8 | * This class represent general problems not related to content. For content problem check #DataException 9 | */ 10 | public class SystemException extends RuntimeException { 11 | 12 | public SystemException() { 13 | super(); 14 | } 15 | 16 | public SystemException(String message) { 17 | super(message); 18 | } 19 | 20 | public SystemException(Throwable cause) { 21 | super(cause); 22 | } 23 | 24 | public SystemException(String message, Throwable cause) { 25 | super(message, cause); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/managers/MongoCollectionsInterface.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.managers; 2 | 3 | /** 4 | * All MongoDb collections are listed here. 5 | * 6 | * @author Achraf 7 | */ 8 | public interface MongoCollectionsInterface { 9 | 10 | // PL note: could be changed to an enum class 11 | 12 | 13 | // files that can't be donwloaded and will be processed later 14 | public static final String BIBLIO_OBJECTS = "biblio_objects"; 15 | 16 | // where anhalytics identifiers are generated 17 | public static final String IDENTIFIERS = "identifiers"; 18 | // mainly for grobid process to analyze extraction performance 19 | public static final String HARVEST_DIAGNOSTIC = "diagnostic"; 20 | // files that can't be donwloaded and will be processed later 21 | public static final String TO_REQUEST_LATER = "to_request_later"; 22 | // source metadata tei to be renamed to METADATAS_TEIS 23 | public static final String METADATAS_TEIS = "metadata_teis"; 24 | // binary files , pdf 25 | public static final String BINARIES = "binaries"; 26 | // publications annexes 27 | public static final String PUB_ANNEXES = "pub_annexes"; 28 | // tei generated using metadata and fulltext 29 | public static final String TEI_CORPUS = "tei_corpus"; 30 | // tei generated using metadata and fulltext 31 | public static final String METADATA_WITHFULLTEXT_TEIS = "metadata_teis_fulltext"; 32 | // tei extracted using grobid 33 | public static final String GROBID_TEIS = "grobid_teis"; 34 | // grobid provided by grobid after tei extraction 35 | public static final String GROBID_ASSETS = "grobid_assets"; 36 | // text mining annotation 37 | public static final String NERD_ANNOTATIONS = "nerd_annotations"; 38 | public static final String KEYTERM_ANNOTATIONS = "keyterm_annotations"; 39 | public static final String QUANTITIES_ANNOTATIONS = "quantities_annotations"; 40 | public static final String PDF_QUANTITIES_ANNOTATIONS = "pdf_quantities_annotations"; 41 | //metadata harvested from other repositorues 42 | public static final String ISTEX_TEIS = "istex_teis"; 43 | 44 | public static final String ISTEX_PDFS = "istex_pdfs"; 45 | public static final String ARXIV_METADATA = "arxiv_metadata"; 46 | public static String CROSSREF_METADATAS = "crossref_metadata"; 47 | } 48 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/managers/MongoDataManager.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.managers; 2 | 3 | import java.io.IOException; 4 | import java.net.UnknownHostException; 5 | 6 | /** 7 | * 8 | * @author achraf 9 | */ 10 | public class MongoDataManager extends MongoManager { 11 | 12 | /** 13 | * A static {@link MongoManager} object containing MongoManager instance 14 | * that can be used from different locations.. 15 | */ 16 | private static MongoDataManager mongoManager = null; 17 | 18 | public MongoDataManager(boolean isTest) throws IOException { 19 | super(isTest); 20 | initDatabase(); 21 | } 22 | 23 | /** 24 | * Returns a static {@link MongoManager} object. If no one is set, then it 25 | * creates one. {@inheritDoc #MongoFilesManager()} 26 | * 27 | * @return 28 | */ 29 | public static MongoDataManager getInstance(boolean isTest) throws IOException { 30 | if (mongoManager == null) { 31 | return getNewInstance(isTest); 32 | } else { 33 | return mongoManager; 34 | } 35 | } 36 | 37 | /** 38 | * Creates a new {@link MongoFilesManager} object, initializes it and 39 | * returns it. {@inheritDoc #MongoFilesManager()} 40 | * 41 | * @return MongoFilesManager 42 | */ 43 | protected static synchronized MongoDataManager getNewInstance(boolean isTest) throws IOException { 44 | mongoManager = new MongoDataManager(isTest); 45 | return mongoManager; 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/managers/MongoManager.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.managers; 2 | 3 | import com.mongodb.*; 4 | 5 | import fr.inria.anhalytics.commons.exceptions.ServiceException; 6 | import fr.inria.anhalytics.commons.properties.CommonsProperties; 7 | 8 | import java.io.IOException; 9 | 10 | import java.net.ConnectException; 11 | import java.net.UnknownHostException; 12 | 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | 16 | import static org.apache.commons.lang3.StringUtils.isNotBlank; 17 | 18 | /** 19 | * Abstract class to be sub-classed to use mongoDB service. 20 | * 21 | * @author Achraf 22 | */ 23 | abstract class MongoManager { 24 | 25 | private static final Logger LOGGER = LoggerFactory.getLogger(MongoManager.class); 26 | 27 | private MongoClient mongo = null; 28 | 29 | protected DB db = null; 30 | 31 | 32 | public MongoClientOptions.Builder builder = new MongoClientOptions.Builder(); 33 | 34 | public MongoManager(boolean isTest) { 35 | try { 36 | CommonsProperties.init("anhalytics.properties", isTest); 37 | } catch (Exception e) { 38 | LOGGER.error(e.getMessage()); 39 | } 40 | 41 | try { 42 | builder.socketKeepAlive(true); 43 | MongoClientOptions options = builder.build(); 44 | 45 | 46 | if (isNotBlank(CommonsProperties.getMongodbUser())) { 47 | final MongoCredential credential = MongoCredential.createCredential(CommonsProperties.getMongodbUser(), CommonsProperties.getMongodbDb(), CommonsProperties.getMongodbPass().toCharArray()); 48 | mongo = new MongoClient( 49 | new ServerAddress( 50 | CommonsProperties.getMongodbServer(), 51 | CommonsProperties.getMongodbPort()), 52 | credential, 53 | options); 54 | } else { 55 | mongo = new MongoClient( 56 | new ServerAddress( 57 | CommonsProperties.getMongodbServer(), 58 | CommonsProperties.getMongodbPort()), 59 | options); 60 | } 61 | 62 | 63 | LOGGER.info("Mongodb is running on server : " + CommonsProperties.getMongodbServer() + " port : " + CommonsProperties.getMongodbPort()); 64 | if (!mongo.getDatabaseNames().contains(CommonsProperties.getMongodbDb())) { 65 | LOGGER.info("MongoDB database " + CommonsProperties.getMongodbDb() + " does not exist and will be created"); 66 | } 67 | } catch (MongoException ex) { 68 | throw new ServiceException("MongoDB is not UP, the process will be halted."); 69 | } 70 | } 71 | 72 | /** 73 | * Initializes database if it exists and create it otherwise. 74 | */ 75 | protected void initDatabase() { 76 | db = mongo.getDB(CommonsProperties.getMongodbDb()); 77 | LOGGER.info("Mongodb is connecting to : " + CommonsProperties.getMongodbDb() + "."); 78 | if (!mongo.getDatabaseNames().contains(CommonsProperties.getMongodbDb())) { 79 | BasicDBObject commandArguments = new BasicDBObject(); 80 | commandArguments.put("user", CommonsProperties.getMongodbUser()); 81 | commandArguments.put("pwd", CommonsProperties.getMongodbPass()); 82 | String[] roles = {"readWrite"}; 83 | commandArguments.put("roles", roles); 84 | BasicDBObject command = new BasicDBObject("createUser", 85 | commandArguments); 86 | db.command(command); 87 | } 88 | } 89 | 90 | public DBCollection getCollection(String collectionName) { 91 | /* can t check with gridfs collection ! 92 | boolean collectionFound = db.collectionExists(collectionName); 93 | 94 | if (!collectionFound) { 95 | LOGGER.debug("MongoDB collection " + collectionName + " does not exist and will be created"); 96 | } 97 | */ 98 | return db.getCollection(collectionName); 99 | } 100 | 101 | public void close() { 102 | mongo.close(); 103 | } 104 | 105 | public void setDB(DB currentDB) { 106 | this.db = currentDB; 107 | } 108 | 109 | } 110 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/properties/KbProperties.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.properties; 2 | 3 | import fr.inria.anhalytics.commons.exceptions.PropertyException; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.util.Properties; 7 | 8 | /** 9 | * 10 | * @author achraf 11 | */ 12 | public class KbProperties { 13 | 14 | private static String processName; 15 | 16 | private static String fromDate; 17 | 18 | private static String untilDate; 19 | 20 | private static boolean processByDate = true; 21 | 22 | private static boolean reset; 23 | 24 | 25 | public static void init(String properties_filename) { 26 | Properties props = new Properties(); 27 | try { 28 | File file = new File(System.getProperty("user.dir")); 29 | props.load(new FileInputStream(file.getAbsolutePath()+File.separator+"config"+File.separator+properties_filename)); 30 | } catch (Exception exp) { 31 | throw new PropertyException("Cannot open file " + properties_filename, exp); 32 | } 33 | } 34 | 35 | /** 36 | * @return the processName 37 | */ 38 | public static String getProcessName() { 39 | return processName; 40 | } 41 | 42 | /** 43 | * @param processName the processName to set 44 | */ 45 | public static void setProcessName(String processname) { 46 | processName = processname; 47 | } 48 | 49 | /** 50 | * @return the fromDate 51 | */ 52 | public static String getFromDate() { 53 | return fromDate; 54 | } 55 | 56 | /** 57 | * @param fromDate the fromDate to set 58 | */ 59 | public static void setFromDate(String fromdate) { 60 | fromDate = fromdate; 61 | } 62 | 63 | /** 64 | * @return the untilDate 65 | */ 66 | public static String getUntilDate() { 67 | return untilDate; 68 | } 69 | 70 | /** 71 | * @param untilDate the untilDate to set 72 | */ 73 | public static void setUntilDate(String untildate) { 74 | untilDate = untildate; 75 | } 76 | 77 | /** 78 | * @return the reset 79 | */ 80 | public static boolean isReset() { 81 | return reset; 82 | } 83 | 84 | /** 85 | * @param isreset the reset to set 86 | */ 87 | public static void setReset(boolean isreset) { 88 | reset = isreset; 89 | } 90 | 91 | /** 92 | * @return the processByDate 93 | */ 94 | public static boolean isProcessByDate() { 95 | return processByDate; 96 | } 97 | 98 | /** 99 | * @param aProcessByDate the processByDate to set 100 | */ 101 | public static void setProcessByDate(boolean aProcessByDate) { 102 | processByDate = aProcessByDate; 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/utilities/JaroWinkler.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.utilities; 2 | 3 | import java.util.Arrays; 4 | 5 | /** 6 | * The Jaro–Winkler distance metric is designed and best suited for short 7 | * strings such as person names, and to detect typos; it is (roughly) a 8 | * variation of Damerau-Levenshtein, where the substitution of 2 close 9 | * characters is considered less important then the substitution of 2 characters 10 | * that a far from each other. 11 | * Jaro-Winkler was developed in the area of record linkage (duplicate 12 | * detection) (Winkler, 1990). It returns a value in the interval [0.0, 1.0]. 13 | * The distance is computed as 1 - Jaro-Winkler similarity. 14 | * https://github.com/tdebatty/java-string-similarity 15 | */ 16 | public class JaroWinkler { 17 | 18 | public JaroWinkler() { 19 | 20 | } 21 | 22 | public JaroWinkler(double threshold) { 23 | this.setThreshold(threshold); 24 | } 25 | 26 | private double threshold = 0.7; 27 | 28 | /** 29 | * Sets the threshold used to determine when Winkler bonus should be used. 30 | * Set to a negative value to get the Jaro distance. 31 | * Default value is 0.7 32 | * 33 | * @param threshold the new value of the threshold 34 | */ 35 | public final void setThreshold(double threshold) { 36 | this.threshold = threshold; 37 | } 38 | 39 | /** 40 | * Returns the current value of the threshold used for adding the Winkler 41 | * bonus. The default value is 0.7. 42 | * 43 | * @return the current value of the threshold 44 | */ 45 | public double getThreshold() { 46 | return threshold; 47 | } 48 | 49 | public double similarity(String s1, String s2) { 50 | int[] mtp = matches(s1, s2); 51 | float m = mtp[0]; 52 | if (m == 0) { 53 | return 0f; 54 | } 55 | float j = ((m / s1.length() + m / s2.length() + (m - mtp[1]) / m)) / 3; 56 | float jw = j < getThreshold() ? j : j + Math.min(0.1f, 1f / mtp[3]) * mtp[2] 57 | * (1 - j); 58 | return jw; 59 | } 60 | 61 | 62 | public double distance(String s1, String s2) { 63 | return 1.0 - similarity(s1, s2); 64 | } 65 | 66 | 67 | private int[] matches(String s1, String s2) { 68 | String max, min; 69 | if (s1.length() > s2.length()) { 70 | max = s1; 71 | min = s2; 72 | } else { 73 | max = s2; 74 | min = s1; 75 | } 76 | int range = Math.max(max.length() / 2 - 1, 0); 77 | int[] matchIndexes = new int[min.length()]; 78 | Arrays.fill(matchIndexes, -1); 79 | boolean[] matchFlags = new boolean[max.length()]; 80 | int matches = 0; 81 | for (int mi = 0; mi < min.length(); mi++) { 82 | char c1 = min.charAt(mi); 83 | for (int xi = Math.max(mi - range, 0), 84 | xn = Math.min(mi + range + 1, max.length()); xi < xn; xi++) { 85 | if (!matchFlags[xi] && c1 == max.charAt(xi)) { 86 | matchIndexes[mi] = xi; 87 | matchFlags[xi] = true; 88 | matches++; 89 | break; 90 | } 91 | } 92 | } 93 | char[] ms1 = new char[matches]; 94 | char[] ms2 = new char[matches]; 95 | for (int i = 0, si = 0; i < min.length(); i++) { 96 | if (matchIndexes[i] != -1) { 97 | ms1[si] = min.charAt(i); 98 | si++; 99 | } 100 | } 101 | for (int i = 0, si = 0; i < max.length(); i++) { 102 | if (matchFlags[i]) { 103 | ms2[si] = max.charAt(i); 104 | si++; 105 | } 106 | } 107 | int transpositions = 0; 108 | for (int mi = 0; mi < ms1.length; mi++) { 109 | if (ms1[mi] != ms2[mi]) { 110 | transpositions++; 111 | } 112 | } 113 | int prefix = 0; 114 | for (int mi = 0; mi < min.length(); mi++) { 115 | if (s1.charAt(mi) == s2.charAt(mi)) { 116 | prefix++; 117 | } else { 118 | break; 119 | } 120 | } 121 | return new int[]{matches, transpositions / 2, prefix, max.length()}; 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/utilities/KeyGen.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.utilities; 2 | /** 3 | * Generate a random key. 4 | * 5 | * @author Florian Zipser 6 | */ 7 | public class KeyGen { 8 | /** 9 | * Minimum length for a decent key 10 | */ 11 | public static final int MIN_LENGTH = 10; 12 | 13 | /** 14 | * The random number generator. 15 | */ 16 | protected static java.util.Random r = new java.util.Random(); 17 | 18 | /** 19 | * Set of characters that is valid. Must be printable, memorable, and "won't 20 | * break HTML" (i.e., not ' <', '>', '&', '=', ...). or break shell commands 21 | * (i.e., not ' <', '>', '$', '!', ...). I, L and O are good to leave out, 22 | * as are numeric zero and one. 23 | */ 24 | protected static final char[] goodChar = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 25 | 'h', 'j', 'k', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 26 | 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 27 | 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 28 | '2', '3', '4', '5', '6', '7', '8', '9'}; 29 | 30 | /** 31 | * Generate a Password object with a random password. 32 | * @return a generated key 33 | */ 34 | public static String getKey() { 35 | StringBuilder sb = new StringBuilder(); 36 | for (int i = 0; i < MIN_LENGTH; i++) { 37 | sb.append(goodChar[r.nextInt(goodChar.length)]); 38 | } 39 | return sb.toString(); 40 | } 41 | 42 | } -------------------------------------------------------------------------------- /anhalytics-commons/src/main/java/fr/inria/anhalytics/commons/utilities/NamespaceContextMap.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.utilities; 2 | 3 | import java.util.Collections; 4 | import java.util.HashMap; 5 | import java.util.HashSet; 6 | import java.util.Iterator; 7 | import java.util.Map; 8 | import java.util.Set; 9 | import javax.xml.XMLConstants; 10 | import javax.xml.namespace.NamespaceContext; 11 | 12 | /** 13 | * An implementation of 15 | * NamespaceContext . Instances are immutable. 16 | * 17 | * Code from http://illegalargumentexception.blogspot.com/2009/05/java-using-xpath-with-namespaces-and.html 18 | * Free to reuse. 19 | * 20 | * @author McDowell 21 | */ 22 | public final class NamespaceContextMap implements 23 | NamespaceContext { 24 | 25 | private final Map prefixMap; 26 | private final Map> nsMap; 27 | 28 | /** 29 | * Constructor that takes a map of XML prefix-namespaceURI values. A defensive 30 | * copy is made of the map. An IllegalArgumentException will be thrown if the 31 | * map attempts to remap the standard prefixes defined in the NamespaceContext 32 | * contract. 33 | * 34 | * @param prefixMappings 35 | * a map of prefix:namespaceURI values 36 | */ 37 | public NamespaceContextMap(Map prefixMappings) { 38 | prefixMap = createPrefixMap(prefixMappings); 39 | nsMap = createNamespaceMap(prefixMap); 40 | } 41 | 42 | /** 43 | * Convenience constructor. 44 | * 45 | * @param mappingPairs 46 | * pairs of prefix-namespaceURI values 47 | */ 48 | public NamespaceContextMap(String... mappingPairs) { 49 | this(toMap(mappingPairs)); 50 | } 51 | 52 | private static Map toMap( 53 | String... mappingPairs) { 54 | Map prefixMappings = new HashMap( 55 | mappingPairs.length / 2); 56 | for (int i = 0; i < mappingPairs.length; i++) { 57 | prefixMappings 58 | .put(mappingPairs[i], mappingPairs[++i]); 59 | } 60 | return prefixMappings; 61 | } 62 | 63 | private Map createPrefixMap( 64 | Map prefixMappings) { 65 | Map prefixMap = new HashMap( 66 | prefixMappings); 67 | addConstant(prefixMap, XMLConstants.XML_NS_PREFIX, 68 | XMLConstants.XML_NS_URI); 69 | addConstant(prefixMap, XMLConstants.XMLNS_ATTRIBUTE, 70 | XMLConstants.XMLNS_ATTRIBUTE_NS_URI); 71 | return Collections.unmodifiableMap(prefixMap); 72 | } 73 | 74 | private void addConstant(Map prefixMap, 75 | String prefix, String nsURI) { 76 | String previous = prefixMap.put(prefix, nsURI); 77 | if (previous != null && !previous.equals(nsURI)) { 78 | throw new IllegalArgumentException(prefix + " -> " 79 | + previous + "; see NamespaceContext contract"); 80 | } 81 | } 82 | 83 | private Map> createNamespaceMap( 84 | Map prefixMap) { 85 | Map> nsMap = new HashMap>(); 86 | for (Map.Entry entry : prefixMap 87 | .entrySet()) { 88 | String nsURI = entry.getValue(); 89 | Set prefixes = nsMap.get(nsURI); 90 | if (prefixes == null) { 91 | prefixes = new HashSet(); 92 | nsMap.put(nsURI, prefixes); 93 | } 94 | prefixes.add(entry.getKey()); 95 | } 96 | for (Map.Entry> entry : nsMap 97 | .entrySet()) { 98 | Set readOnly = Collections 99 | .unmodifiableSet(entry.getValue()); 100 | entry.setValue(readOnly); 101 | } 102 | return nsMap; 103 | } 104 | 105 | @Override 106 | public String getNamespaceURI(String prefix) { 107 | checkNotNull(prefix); 108 | String nsURI = prefixMap.get(prefix); 109 | return nsURI == null ? XMLConstants.NULL_NS_URI : nsURI; 110 | } 111 | 112 | @Override 113 | public String getPrefix(String namespaceURI) { 114 | checkNotNull(namespaceURI); 115 | Set set = nsMap.get(namespaceURI); 116 | return set == null ? null : set.iterator().next(); 117 | } 118 | 119 | @Override 120 | public Iterator getPrefixes(String namespaceURI) { 121 | checkNotNull(namespaceURI); 122 | Set set = nsMap.get(namespaceURI); 123 | return set.iterator(); 124 | } 125 | 126 | private void checkNotNull(String value) { 127 | if (value == null) { 128 | throw new IllegalArgumentException("null"); 129 | } 130 | } 131 | 132 | /** 133 | * @return an unmodifiable map of the mappings in the form prefix-namespaceURI 134 | */ 135 | public Map getMap() { 136 | return prefixMap; 137 | } 138 | 139 | } -------------------------------------------------------------------------------- /anhalytics-commons/src/test/java/fr/inria/anhalytics/commons/utilities/UtilitiesTest.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.commons.utilities; 2 | 3 | 4 | import org.junit.Test; 5 | 6 | import static org.hamcrest.CoreMatchers.is; 7 | import static org.junit.Assert.assertThat; 8 | 9 | /** 10 | * @author azhar 11 | */ 12 | public class UtilitiesTest { 13 | 14 | @Test 15 | public void testDaysInMonths_standardMonths_30() throws Exception { 16 | assertThat(Utilities.daysInMonth(2400, 4), is(30)); 17 | assertThat(Utilities.daysInMonth(2400, 6), is(30)); 18 | assertThat(Utilities.daysInMonth(2400, 9), is(30)); 19 | assertThat(Utilities.daysInMonth(2400, 11), is(30)); 20 | 21 | assertThat(Utilities.daysInMonth(2003, 4), is(30)); 22 | assertThat(Utilities.daysInMonth(2003, 6), is(30)); 23 | assertThat(Utilities.daysInMonth(2003, 9), is(30)); 24 | assertThat(Utilities.daysInMonth(2003, 11), is(30)); 25 | } 26 | 27 | @Test 28 | public void testDaysInMonths_standardMonths_31() throws Exception { 29 | assertThat(Utilities.daysInMonth(2400, 1), is(31)); 30 | assertThat(Utilities.daysInMonth(2400, 3), is(31)); 31 | assertThat(Utilities.daysInMonth(2400, 5), is(31)); 32 | assertThat(Utilities.daysInMonth(2400, 7), is(31)); 33 | assertThat(Utilities.daysInMonth(2400, 8), is(31)); 34 | assertThat(Utilities.daysInMonth(2400, 10), is(31)); 35 | assertThat(Utilities.daysInMonth(2400, 12), is(31)); 36 | 37 | assertThat(Utilities.daysInMonth(2003, 1), is(31)); 38 | assertThat(Utilities.daysInMonth(2003, 3), is(31)); 39 | assertThat(Utilities.daysInMonth(2003, 5), is(31)); 40 | assertThat(Utilities.daysInMonth(2003, 7), is(31)); 41 | assertThat(Utilities.daysInMonth(2003, 8), is(31)); 42 | assertThat(Utilities.daysInMonth(2003, 10), is(31)); 43 | assertThat(Utilities.daysInMonth(2003, 12), is(31)); 44 | } 45 | 46 | @Test 47 | public void testDaysInMonth_february_nbOfDaysShouldAdaptAccordingToYear() throws Exception { 48 | assertThat(Utilities.daysInMonth(2400, 2), is(29)); 49 | assertThat(Utilities.daysInMonth(1996, 2), is(29)); 50 | assertThat(Utilities.daysInMonth(2003, 2), is(28)); 51 | } 52 | 53 | 54 | @Test 55 | public void testCompleteDate_alreadyCompletedDate_shouldWork() throws Exception { 56 | assertThat(Utilities.completeDate("2015-06-16"), is("2015-06-16")); 57 | } 58 | 59 | @Test 60 | public void testCompleteDate_emptyDate_shouldReturnEmptyValue() throws Exception { 61 | assertThat(Utilities.completeDate(""), is("")); 62 | } 63 | 64 | @Test 65 | public void testCompleteDate_onlyYear_shouldWork() throws Exception { 66 | assertThat(Utilities.completeDate("2015-"), is("2015-12-31")); 67 | } 68 | 69 | @Test 70 | public void testCompleteDate_onlyYearShort_shouldReturnEmptyValue() throws Exception { 71 | assertThat(Utilities.completeDate("15"), is("")); 72 | } 73 | 74 | @Test 75 | public void testCompleteDate_onlyYearComplete_shouldReturnCompletedDate() throws Exception { 76 | assertThat(Utilities.completeDate("2015"), is("2015-12-31")); 77 | } 78 | 79 | @Test 80 | public void testCompleteDate_onlyYM_shouldReturnCompletedDate() throws Exception { 81 | assertThat(Utilities.completeDate("2015-12"), is("2015-12-31")); 82 | } 83 | 84 | @Test 85 | public void testCompleteDate_onlyYM_Feb_shouldReturnCompletedDate() throws Exception { 86 | assertThat(Utilities.completeDate("2015-02"), is("2015-02-28")); 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /anhalytics-harvest/list-halSample-quantities.txt: -------------------------------------------------------------------------------- 1 | hal-00643787 2 | hal-00702344 3 | hal-00720564 4 | hal-00852260 5 | hal-00924047 6 | hal-01078051 7 | hal-01205501 8 | hal-01223150 9 | hal-01278820 10 | hal-01321536 11 | halshs-01279855 12 | inserm-00676663 13 | inserm-00752141 14 | inserm-00851058 15 | inserm-01196815 16 | hal-00563000 17 | hal-00651627 18 | hal-00297631 19 | hal-01070389 20 | inserm-00504792 21 | hal-00499067 22 | hal-00317008 23 | 24 | hal-00915599 25 | hal-00595947 26 | hal-01278907 27 | jpa-00232521 28 | hal-00962359 29 | inserm-00721680 30 | hal-00535466 31 | hal-01269917 32 | hal-00987664 33 | jpa-00231626 34 | hal-00568675 35 | hal-00915599 36 | jpa-00251546 -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/converters/MetadataConverter.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.converters; 2 | 3 | import fr.inria.anhalytics.commons.data.BiblioObject; 4 | import org.w3c.dom.Document; 5 | import org.w3c.dom.Element; 6 | 7 | /** 8 | * 9 | * @author azhar 10 | */ 11 | public interface MetadataConverter { 12 | Element convertMetadataToTEIHeader(Document metadata, Document newTEIcorpus, BiblioObject biblio) ; 13 | } 14 | -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/crossref/CrossRefBiblioData.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.crossref; 2 | 3 | /** 4 | * Represents metadata needed to find DOI. 5 | * 6 | * @author azhar 7 | */ 8 | public class CrossRefBiblioData { 9 | 10 | private String doi; 11 | private String aut; 12 | private String title; 13 | private String journalTitle; 14 | private String volume; 15 | private String pageRange; 16 | private int beginPage; 17 | 18 | public CrossRefBiblioData() { 19 | } 20 | 21 | public CrossRefBiblioData(String doi, String aut, String title, String journalTitle, String volume, String pageRange, int beginPage) { 22 | this.doi = doi; 23 | this.aut = aut; 24 | this.title = title; 25 | this.journalTitle = journalTitle; 26 | this.volume = volume; 27 | this.pageRange = pageRange; 28 | this.beginPage = beginPage; 29 | } 30 | 31 | ; 32 | /** 33 | * @return the doi 34 | */ 35 | public String getDoi() { 36 | return doi; 37 | } 38 | 39 | /** 40 | * @param doi the doi to set 41 | */ 42 | public void setDoi(String doi) { 43 | this.doi = doi; 44 | } 45 | 46 | /** 47 | * @return the aut 48 | */ 49 | public String getAut() { 50 | return aut; 51 | } 52 | 53 | /** 54 | * @param aut the aut to set 55 | */ 56 | public void setAut(String aut) { 57 | this.aut = aut; 58 | } 59 | 60 | /** 61 | * @return the title 62 | */ 63 | public String getTitle() { 64 | return title; 65 | } 66 | 67 | /** 68 | * @param title the title to set 69 | */ 70 | public void setTitle(String title) { 71 | this.title = title; 72 | } 73 | 74 | /** 75 | * @return the journalTitle 76 | */ 77 | public String getJournalTitle() { 78 | return journalTitle; 79 | } 80 | 81 | /** 82 | * @param journalTitle the journalTitle to set 83 | */ 84 | public void setJournalTitle(String journalTitle) { 85 | this.journalTitle = journalTitle; 86 | } 87 | 88 | /** 89 | * @return the volume 90 | */ 91 | public String getVolume() { 92 | return volume; 93 | } 94 | 95 | /** 96 | * @param volume the volume to set 97 | */ 98 | public void setVolume(String volume) { 99 | this.volume = volume; 100 | } 101 | 102 | /** 103 | * @return the firstPage 104 | */ 105 | public String getPageRange() { 106 | return pageRange; 107 | } 108 | 109 | /** 110 | * @param pageRange the firstPage to set 111 | */ 112 | public void setPageRange(String pageRange) { 113 | this.pageRange = pageRange; 114 | } 115 | 116 | /** 117 | * @return the beginPage 118 | */ 119 | public int getBeginPage() { 120 | return beginPage; 121 | } 122 | 123 | /** 124 | * @param beginPage the beginPage to set 125 | */ 126 | public void setBeginPage(int beginPage) { 127 | this.beginPage = beginPage; 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/crossref/OpenUrl.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.crossref; 2 | 3 | import fr.inria.anhalytics.commons.managers.MongoFileManager; 4 | import org.slf4j.Logger; 5 | import org.slf4j.LoggerFactory; 6 | 7 | /** 8 | * 9 | * @author azhar 10 | */ 11 | public class OpenUrl { 12 | 13 | private static final Logger logger = LoggerFactory.getLogger(OpenUrl.class); 14 | private MongoFileManager mm; 15 | 16 | private static final String IstexURL 17 | = "http://api.istex.fr/document/openurl?url_ver=rft_id=info:doi/%s"; 18 | 19 | public OpenUrl() { 20 | this.mm = MongoFileManager.getInstance(false); 21 | } 22 | 23 | public void getIstexUrl() { 24 | // if (mm.initIdentifiersWithoutPdfUrl()) { 25 | // while (mm.hasMore()) { 26 | // try { 27 | // Identifier id = mm.nextIdentifier(); 28 | // String currentAnhalyticsId = id.getAnhalyticsId(); 29 | // logger.info("################################" + currentAnhalyticsId + "####################"); 30 | // URL url = new URL(String.format(IstexURL, id.getDoi())); 31 | // logger.info("Sending: " + url.toString()); 32 | // HttpURLConnection urlConn = null; 33 | // try { 34 | // urlConn = (HttpURLConnection) url.openConnection(); 35 | // } catch (Exception e) { 36 | // try { 37 | // urlConn = (HttpURLConnection) url.openConnection(); 38 | // } catch (Exception e2) { 39 | // urlConn = null; 40 | // throw new Exception("An exception occured while running Grobid.", e2); 41 | // } 42 | // } 43 | // if (urlConn != null) { 44 | // try { 45 | // urlConn.setDoOutput(true); 46 | // urlConn.setDoInput(true); 47 | // urlConn.setRequestMethod("GET"); 48 | // 49 | // urlConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); 50 | // if (urlConn.getResponseCode() == 200) { 51 | // String foundurl = urlConn.getURL().toString(); 52 | // logger.info("URL found : " + foundurl); 53 | // //mm.updateIdentifier(doi); 54 | // //mm.insertBinary(); 55 | // } 56 | // urlConn.disconnect(); 57 | // } catch (Exception e) { 58 | // e.printStackTrace(); 59 | // } 60 | // 61 | // } 62 | // } catch (Exception e) { 63 | // e.printStackTrace(); 64 | // } 65 | // } 66 | // logger.info("Done."); 67 | // } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/exceptions/BinaryNotAvailableException.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.exceptions; 2 | 3 | import fr.inria.anhalytics.commons.exceptions.ServiceException; 4 | 5 | /** 6 | * 7 | * @author Achraf 8 | */ 9 | public class BinaryNotAvailableException extends ServiceException { 10 | 11 | private static final long serialVersionUID = -3337770841815682150L; 12 | 13 | public BinaryNotAvailableException() { 14 | super(); 15 | } 16 | 17 | public BinaryNotAvailableException(String message) { 18 | super(message); 19 | } 20 | 21 | public BinaryNotAvailableException(Throwable cause) { 22 | super(cause); 23 | } 24 | 25 | public BinaryNotAvailableException(String message, Throwable cause) { 26 | super(message, cause); 27 | } 28 | } -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/exceptions/GrobidTimeoutException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package fr.inria.anhalytics.harvest.exceptions; 7 | 8 | /** 9 | * 10 | * @author azhar 11 | */ 12 | public class GrobidTimeoutException extends RuntimeException { 13 | 14 | private static final long serialVersionUID = -3337770841815682150L; 15 | 16 | public GrobidTimeoutException() { 17 | super(); 18 | } 19 | 20 | public GrobidTimeoutException(String message) { 21 | super(message); 22 | } 23 | 24 | public GrobidTimeoutException(Throwable cause) { 25 | super(cause); 26 | } 27 | 28 | public GrobidTimeoutException(String message, Throwable cause) { 29 | super(message, cause); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/exceptions/UnreachableGrobidServiceException.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.exceptions; 2 | 3 | import fr.inria.anhalytics.commons.exceptions.SystemException; 4 | import java.io.IOException; 5 | 6 | /** 7 | * 8 | * @author achraf 9 | */ 10 | public class UnreachableGrobidServiceException extends SystemException { 11 | 12 | public UnreachableGrobidServiceException(int responseCode) { 13 | super("Grobid service is not alive. HTTP error: " + responseCode); 14 | } 15 | 16 | public UnreachableGrobidServiceException(String message, Throwable cause) { 17 | super(message, cause); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/grobid/AssetLegendExtracter.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.grobid; 2 | 3 | import org.w3c.dom.Document; 4 | import org.w3c.dom.Element; 5 | import org.w3c.dom.NodeList; 6 | 7 | import javax.xml.parsers.DocumentBuilder; 8 | import javax.xml.parsers.DocumentBuilderFactory; 9 | import java.io.InputStream; 10 | 11 | /** 12 | * @author achraf 13 | */ 14 | public class AssetLegendExtracter { 15 | 16 | static String extractLegendFromTei(String filename, InputStream teiStream) { 17 | String legend = null; 18 | DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); 19 | docFactory.setValidating(false); 20 | //docFactory.setNamespaceAware(true); 21 | Document tei = null; 22 | DocumentBuilder docBuilder = null; 23 | try { 24 | docBuilder = docFactory.newDocumentBuilder(); 25 | 26 | tei = docBuilder.parse(teiStream); 27 | 28 | NodeList nodeList = tei.getElementsByTagName("graphic"); 29 | if (nodeList != null && nodeList.getLength() > 0) { 30 | System.out.println("nodeList.getLength() " + nodeList.getLength()); 31 | for (int j = 0; j < nodeList.getLength(); j++) { 32 | Element el = (Element) nodeList.item(j); 33 | System.out.println(el.getAttribute("url")); 34 | if (el.getAttribute("url").equals(filename)) { 35 | Element figure = (Element) el.getParentNode(); 36 | NodeList figDescChild = figure.getElementsByTagName("figDesc"); 37 | if (figDescChild != null && figDescChild.getLength() > 0) { 38 | Element figDesc = (Element) figDescChild.item(0); 39 | legend = figDesc.getTextContent(); 40 | 41 | } 42 | } 43 | } 44 | } 45 | } catch (Exception e) { 46 | e.printStackTrace(); 47 | } 48 | return legend; 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/grobid/GrobidAnnexWorker.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.grobid; 2 | 3 | import fr.inria.anhalytics.commons.data.BiblioObject; 4 | import java.io.File; 5 | import javax.xml.parsers.ParserConfigurationException; 6 | 7 | /** 8 | * Process publications annex with grobid. 9 | * 10 | * @author Achraf 11 | */ 12 | public class GrobidAnnexWorker extends GrobidWorker { 13 | 14 | public GrobidAnnexWorker(BiblioObject biblioObject, String date, int start, int end) throws ParserConfigurationException { 15 | super(biblioObject, start, end); 16 | } 17 | 18 | @Override 19 | protected void saveExtractions(String zipDirectoryPath) { 20 | String tei = null; 21 | try { 22 | File directoryPath = new File(zipDirectoryPath); 23 | if (directoryPath.exists()) { 24 | File[] files = directoryPath.listFiles(); 25 | if (files != null) { 26 | for (final File currFile : files) { 27 | 28 | if (currFile.getName().toLowerCase().endsWith(".png")) { 29 | } else if (currFile.getName().toLowerCase().endsWith(".xml")) { 30 | } 31 | 32 | } 33 | } 34 | } 35 | } catch (Exception e) { 36 | e.printStackTrace(); 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/grobid/GrobidFulltextWorker.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.grobid; 2 | 3 | import fr.inria.anhalytics.commons.data.BiblioObject; 4 | import javax.xml.parsers.ParserConfigurationException; 5 | import org.slf4j.Logger; 6 | import org.slf4j.LoggerFactory; 7 | 8 | /** 9 | * Worker to extract tei/assets from a publication binary. 10 | * 11 | * @author Achraf 12 | */ 13 | public class GrobidFulltextWorker extends GrobidWorker { 14 | 15 | private static final Logger logger = LoggerFactory.getLogger(GrobidFulltextWorker.class); 16 | 17 | public GrobidFulltextWorker(BiblioObject biblioObject, String date, int start, int end) throws ParserConfigurationException { 18 | super(biblioObject, start, end); 19 | } 20 | 21 | // @Override 22 | // protected void saveExtractions(String resultDirectoryPath) { 23 | // String tei = null; 24 | // try { 25 | // File directoryPath = new File(resultDirectoryPath); 26 | // if (directoryPath.exists()) { 27 | // File[] files = directoryPath.listFiles(); 28 | // if (files != null) { 29 | // for (final File currFile : files) { 30 | // if (currFile.getName().toLowerCase().endsWith(".png")) { 31 | // InputStream targetStream = FileUtils.openInputStream(currFile); 32 | // mm.insertGrobidAssetDocument(targetStream, repositoryDocId, anhalyticsId,currFile.getName(), date); 33 | // targetStream.close(); 34 | // } else if (currFile.getName().toLowerCase().endsWith(".xml")) { 35 | // tei = Utilities.readFile(currFile.getAbsolutePath()); 36 | // tei = Utilities.trimEncodedCharaters(tei); 37 | // tei = generateIdsTeiDoc(tei); 38 | // System.out.println(repositoryDocId); 39 | // mm.insertGrobidTei(tei, repositoryDocId, anhalyticsId, date); 40 | // } 41 | // } 42 | // } 43 | // } 44 | // } catch (Exception ex) { 45 | // logger.error(ex.getMessage(), ex.getCause()); 46 | // } 47 | // } 48 | } 49 | -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/grobid/GrobidProcess.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.grobid; 2 | 3 | import com.mongodb.Mongo; 4 | import fr.inria.anhalytics.commons.data.BiblioObject; 5 | import fr.inria.anhalytics.commons.data.BinaryFile; 6 | import fr.inria.anhalytics.commons.data.Processings; 7 | import fr.inria.anhalytics.commons.exceptions.DataException; 8 | import fr.inria.anhalytics.harvest.exceptions.UnreachableGrobidServiceException; 9 | import fr.inria.anhalytics.commons.managers.MongoFileManager; 10 | import fr.inria.anhalytics.commons.properties.HarvestProperties; 11 | import java.util.Arrays; 12 | import java.util.List; 13 | import java.util.concurrent.ExecutorService; 14 | import java.util.concurrent.Executors; 15 | import java.util.concurrent.TimeUnit; 16 | import javax.xml.parsers.ParserConfigurationException; 17 | import org.slf4j.Logger; 18 | import org.slf4j.LoggerFactory; 19 | 20 | /** 21 | * Processes the PDFs using Grobid. 22 | * @author Achraf 23 | */ 24 | public class GrobidProcess { 25 | 26 | private static final Logger logger = LoggerFactory.getLogger(GrobidProcess.class); 27 | 28 | private MongoFileManager mm; 29 | 30 | public GrobidProcess() { 31 | this.mm = MongoFileManager.getInstance(false); 32 | } 33 | 34 | static final public List toBeGrobidified 35 | = Arrays.asList("ART", "COMM", "OUV", "POSTER", "DOUV", "PATENT", "REPORT", "COUV", "OTHER", "UNDEFINED"); 36 | 37 | /** 38 | * Extracts the TEI using the available PDF. 39 | */ 40 | public void processFulltexts() { 41 | BinaryFile bf = null; 42 | try { 43 | if (GrobidService.isGrobidOk()) { 44 | ExecutorService executor = Executors.newFixedThreadPool(HarvestProperties.getNbThreads()); 45 | int start = -1; 46 | int end = -1; 47 | 48 | boolean initResult; 49 | if (HarvestProperties.isReset()) { 50 | initResult = mm.initObjects(null, MongoFileManager.ONLY_WITH_FULLTEXT_PROCESS); 51 | } else { 52 | initResult = mm.initObjects(null, MongoFileManager.ONLY_WITH_FULLTEXT_NOT_PROCESSED_GROBID_PROCESS); 53 | } 54 | 55 | if (initResult) { 56 | while (mm.hasMore()) { 57 | BiblioObject biblioObject = mm.nextBiblioObject(); 58 | // if (toBeGrobidified.contains(biblioObject.getPublicationType().split("_")[0])) { 59 | 60 | if (!biblioObject.getIsWithFulltext()) { 61 | logger.info("\t\t No fulltext available for : "+biblioObject.getRepositoryDocId()+", Skipping..."); 62 | continue; 63 | } 64 | if (!HarvestProperties.isReset() && mm.isProcessed(Processings.GROBID)) { 65 | logger.info("\t\t Already grobidified, Skipping..."); 66 | continue; 67 | } 68 | 69 | try { 70 | bf = new BinaryFile(); 71 | 72 | if (biblioObject.getSource().equalsIgnoreCase("hal")) { 73 | start = 2; 74 | } 75 | 76 | bf.setStream(mm.getFulltext(biblioObject)); 77 | //dont run it if stream is null 78 | if(bf.getStream()==null) 79 | throw new DataException("PDF stream is null"); 80 | biblioObject.setPdf(bf); 81 | Runnable worker = new GrobidSimpleFulltextWorker(biblioObject, start, end); 82 | executor.execute(worker); 83 | } catch (ParserConfigurationException exp) { 84 | logger.error("An error occured while processing the file " + bf.getRepositoryDocId() 85 | + ". Continuing the process for the other files.", exp); 86 | } catch (DataException dataexp) { 87 | logger.error("Can't get the fulltext PDF for " + bf.getRepositoryDocId() 88 | + ".", dataexp); 89 | } 90 | // } 91 | } 92 | } 93 | 94 | executor.shutdown(); 95 | logger.info("Jobs done, shutting down thread pool. The executor will wait 2 minutes before forcing the shutdown."); 96 | try { 97 | if (!executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MINUTES)) { 98 | executor.shutdownNow(); 99 | } 100 | } catch (InterruptedException e) { 101 | executor.shutdownNow(); 102 | } 103 | } 104 | logger.info("Finished all threads"); 105 | } catch (UnreachableGrobidServiceException ugse) { 106 | logger.error(ugse.getMessage()); 107 | } 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/grobid/GrobidSimpleFulltextWorker.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.grobid; 2 | 3 | import fr.inria.anhalytics.commons.data.BiblioObject; 4 | import fr.inria.anhalytics.commons.data.Processings; 5 | import fr.inria.anhalytics.commons.exceptions.DataException; 6 | import fr.inria.anhalytics.harvest.exceptions.GrobidTimeoutException; 7 | import fr.inria.anhalytics.commons.utilities.Utilities; 8 | import java.io.File; 9 | import java.io.IOException; 10 | import javax.xml.parsers.ParserConfigurationException; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | /** 15 | * 16 | * @author Patrice 17 | */ 18 | class GrobidSimpleFulltextWorker extends GrobidWorker { 19 | 20 | private static final Logger logger = LoggerFactory.getLogger(GrobidSimpleFulltextWorker.class); 21 | 22 | public GrobidSimpleFulltextWorker(BiblioObject biblioObject, int start, int end) throws ParserConfigurationException { 23 | super(biblioObject, start, end); 24 | } 25 | 26 | @Override 27 | protected void processCommand() { 28 | try { 29 | GrobidService grobidService = new GrobidService(this.start, this.end, true); 30 | // configured for HAL, first page is added to the document 31 | 32 | String filepath = Utilities.storeTmpFile(biblioObject.getPdf().getStream()); 33 | 34 | try { 35 | biblioObject.getPdf().getStream().close(); 36 | } catch (IOException ex) { 37 | throw new DataException("File stream can't be closed.", ex); 38 | } 39 | file = new File(filepath); 40 | double mb = file.length() / (1024.0 * 1024.0); 41 | 42 | // for now we extract just files with less size (avoid thesis..which may take long time) 43 | if (mb <= 15) { 44 | logger.info("\t\t "+Thread.currentThread().getName() +": TEI extraction for : " + biblioObject.getRepositoryDocId() + " sizing :" + mb + "mb"); 45 | String tei = grobidService.runFullTextGrobid(filepath).trim(); 46 | tei = generateIdsGrobidTeiDoc(tei); 47 | 48 | boolean inserted = mm.insertGrobidTei(tei, biblioObject.getAnhalyticsId()); 49 | if (inserted) { 50 | this.saveExtractedDOI(tei); 51 | mm.updateBiblioObjectStatus(biblioObject, Processings.GROBID, false); 52 | logger.info("\t\t "+Thread.currentThread().getName() +": " + biblioObject.getRepositoryDocId() + " processed."); 53 | } else 54 | logger.error("\t\t "+Thread.currentThread().getName() +": Problem occured while saving " + biblioObject.getRepositoryDocId() + " grobid TEI."); 55 | } else { 56 | logger.info("\t\t "+Thread.currentThread().getName() +": can't extract TEI for : " + biblioObject.getRepositoryDocId() + "size too large : " + mb + "mb"); 57 | } 58 | 59 | } catch (GrobidTimeoutException e) { 60 | mm.save(biblioObject.getRepositoryDocId(), "processGrobid", "timed out"); 61 | logger.warn(Thread.currentThread().getName() +"Processing of " + biblioObject.getRepositoryDocId() + " timed out"); 62 | } catch (RuntimeException e) { 63 | e.printStackTrace(); 64 | logger.error("\t\t "+Thread.currentThread().getName() +": error occurred while processing " + biblioObject.getRepositoryDocId()); 65 | mm.save(biblioObject.getRepositoryDocId(), "processGrobid", e.getMessage()); 66 | logger.error(e.getMessage(), e.getCause()); 67 | } catch (IOException ex) { 68 | logger.error(ex.getMessage(), ex.getCause()); 69 | } 70 | boolean success = false; 71 | if(file.exists()) { 72 | success = file.delete(); 73 | if (!success) { 74 | logger.error( 75 | Thread.currentThread().getName() +": Deletion of temporary image files failed for file '" + file.getAbsolutePath() + "'"); 76 | }else 77 | logger.info("\t\t "+Thread.currentThread().getName() +" :"+ file.getAbsolutePath() +" deleted."); 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/parsers/OAIPMHPathsItf.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.parsers; 2 | 3 | import org.w3c.dom.Node; 4 | import org.w3c.dom.NodeList; 5 | 6 | /** 7 | * 8 | * @author Achraf 9 | */ 10 | 11 | /* to be reviewed and moved !!*/ 12 | 13 | public interface OAIPMHPathsItf { 14 | public final static String ListRecordsElement = "ListRecords"; 15 | public final static String RecordElement = "record"; 16 | public final static String TeiElement = "metadata"; 17 | public final static String IdElementPath = "text/body/listBibl/biblFull/publicationStmt/idno[@type='halId']"; 18 | public final static String ResumptionToken = "resumptionToken"; 19 | public final static String AnnexesUrlsElement = "text/body/listBibl/biblFull/editionStmt/edition[@type='current']/ref[@type='annex']"; 20 | public final static String FileElement = "text/body/listBibl/biblFull/editionStmt/edition[@type='current']/ref[@type='file'][1]"; 21 | public final static String EditionElement = "text/body/listBibl/biblFull/editionStmt/edition[@type='current']"; 22 | public final static String RefPATH = "text/body/listBibl/biblFull/publicationStmt/idno[@type='halRef']"; 23 | public final static String DoiPATH = "text/body/listBibl/biblFull/sourceDesc/biblStruct/idno[@type='doi']"; 24 | public final static String PublicationTypePATH = "text/body/listBibl/biblFull/profileDesc/textClass/classCode[@scheme='halTypology']"; 25 | public final static String DomainsPATH = "text/body/listBibl/biblFull/profileDesc/textClass/classCode[@scheme='halDomain']"; 26 | 27 | /* note: these are HAL specific types */ 28 | enum ConsideredTypes { 29 | ART, COMM, OUV, POSTER, DOUV, PATENT, REPORT, THESE, HDR, LECTURE, COUV, OTHER, UNDEFINED //IMG, VIDEO, AUDIOS, SON, MAP 30 | }; 31 | } 32 | -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/service/AnhalyticsAssetService.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.service; 2 | 3 | import fr.inria.anhalytics.commons.managers.MongoFileManager; 4 | import org.apache.commons.io.IOUtils; 5 | import org.apache.commons.lang3.StringUtils; 6 | 7 | import java.io.IOException; 8 | import java.io.InputStream; 9 | 10 | /** 11 | * Grobid assets provider. 12 | * 13 | * @author Achraf 14 | */ 15 | import java.util.concurrent.atomic.AtomicLong; 16 | 17 | import org.springframework.web.bind.annotation.*; 18 | import org.springframework.http.MediaType; 19 | 20 | import javax.servlet.http.HttpServletResponse; 21 | import javax.servlet.ServletOutputStream; 22 | 23 | @RestController 24 | public class AnhalyticsAssetService { 25 | 26 | private MongoFileManager mm; 27 | 28 | private static String KEY = "key"; // :) 29 | 30 | public AnhalyticsAssetService() throws IOException { 31 | this.mm = MongoFileManager.getInstance(false); 32 | // Properties prop = new Properties(); 33 | // try { 34 | // ClassLoader classLoader = AnhalyticsAssetService.class.getClassLoader(); 35 | // prop.load(classLoader.getResourceAsStream("harvest.properties")); 36 | // } catch (Exception exp) { 37 | // throw new PropertyException("Cannot open file of harvest.properties", exp); 38 | // } 39 | // KEY = prop.getProperty("harvest.service_key"); 40 | } 41 | 42 | // @Path("asset") 43 | // @GET 44 | // public Response getImage(@QueryParam("id") String id, @QueryParam("filename") String filename, @QueryParam("key") String key) { 45 | // 46 | // Response response = null; 47 | // InputStream is = null; 48 | // if (StringUtils.equals(key, KEY)) { 49 | // try { 50 | // is = mm.getFulltextByAnhalyticsId(id); 51 | // if (is == null) { 52 | // response = Response.status(Status.NOT_FOUND).build(); 53 | // } else { 54 | // response = Response 55 | // .ok() 56 | // .type("image/png") 57 | // .entity(IOUtils.toByteArray(is)) 58 | // .header("Content-Disposition", "attachment; filename=\"" + filename + "\"") 59 | // .build(); 60 | // } 61 | // 62 | // } catch (Exception exp) { 63 | // response = Response.status(Status.INTERNAL_SERVER_ERROR).type("text/plain").entity(exp.getMessage()).build(); 64 | // } finally { 65 | // IOUtils.closeQuietly(is); 66 | // } 67 | // } else { 68 | // response = Response.status(Status.UNAUTHORIZED).type("text/plain").build(); 69 | // } 70 | // return response; 71 | // } 72 | 73 | @ResponseBody 74 | @RequestMapping(value = "/pdf", method = RequestMethod.GET, produces = MediaType.IMAGE_PNG_VALUE) 75 | public void getPDF(@RequestParam(value="id") String id, @RequestParam(value="key") String key, HttpServletResponse response) throws IOException { 76 | InputStream is = null; 77 | System.out.println(id); 78 | if (StringUtils.equals(key, KEY)) { 79 | try { 80 | is = mm.getFulltextByAnhalyticsId(id); 81 | if (is == null) { 82 | response.setStatus(HttpServletResponse.SC_NOT_FOUND); 83 | } else { 84 | response.setStatus(HttpServletResponse.SC_OK); 85 | response.addHeader("content-type", "application/pdf"); 86 | IOUtils.copy(is, response.getOutputStream()); 87 | response.addHeader("Content-Disposition", "filename=\"" + id + ".pdf\""); 88 | response.addHeader("Access-Control-Allow-Origin", "*"); 89 | response.addHeader("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT"); 90 | response.addHeader("Access-Control-Allow-Headers", "Range"); 91 | response.addHeader("Access-Control-Expose-Headers", "Accept-Ranges, Content-Encoding, Content-Length, Content-Range"); 92 | } 93 | 94 | } catch (Exception exp) { 95 | response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); 96 | response.addHeader("content-type", "text/plain"); 97 | ServletOutputStream sout = response.getOutputStream(); 98 | sout.print(exp.getMessage()); 99 | } finally { 100 | IOUtils.closeQuietly(is); 101 | } 102 | } else { 103 | response.setStatus(HttpServletResponse.SC_UNAUTHORIZED); 104 | response.addHeader("content-type", "text/plain"); 105 | } 106 | } 107 | 108 | @RequestMapping(value = "/hello", method = RequestMethod.GET, produces = MediaType.TEXT_HTML_VALUE) 109 | public String sayHtmlHello() { 110 | return " " + "" + "Hello Anhalytics" + "" 111 | + "

" + "Hello Anhalytics" + "

" + " "; 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/service/Application.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.service; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class Application { 8 | 9 | public static void main(String[] args) { 10 | SpringApplication.run(Application.class, args); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/teibuild/Steps.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.teibuild; 2 | 3 | public enum Steps { 4 | TRANSFORM, 5 | APPEND_FULLTEXT; 6 | } -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/java/fr/inria/anhalytics/harvest/teibuild/TeiCorpusBuilderProcess.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.harvest.teibuild; 2 | 3 | import fr.inria.anhalytics.commons.data.BiblioObject; 4 | import fr.inria.anhalytics.commons.managers.MongoFileManager; 5 | import fr.inria.anhalytics.commons.properties.HarvestProperties; 6 | import fr.inria.anhalytics.harvest.exceptions.UnreachableGrobidServiceException; 7 | import fr.inria.anhalytics.harvest.grobid.GrobidService; 8 | import org.slf4j.Logger; 9 | import org.slf4j.LoggerFactory; 10 | 11 | import java.util.concurrent.*; 12 | 13 | import static org.apache.commons.lang3.StringUtils.lowerCase; 14 | 15 | /** 16 | * Appends available harvested/extracted data to the teiCorpus. 17 | * 18 | * @author Achraf 19 | */ 20 | public class TeiCorpusBuilderProcess { 21 | 22 | private static final Logger logger = LoggerFactory.getLogger(TeiCorpusBuilderProcess.class); 23 | 24 | private MongoFileManager mm; 25 | 26 | public TeiCorpusBuilderProcess() { 27 | this.mm = MongoFileManager.getInstance(false); 28 | } 29 | 30 | /** 31 | * Formats the metadata and initializes the TEICorpus Based on the Grobid 32 | * standard and pub2TEI. Pub2TEI equivalent. 33 | */ 34 | public void transformMetadata() { 35 | 36 | ExecutorService executor = Executors.newFixedThreadPool(HarvestProperties.getNbThreads()); 37 | 38 | boolean initResult = false; 39 | if (HarvestProperties.isReset()) { 40 | initResult = mm.initObjects(lowerCase(HarvestProperties.getSource())); 41 | } else { 42 | initResult = mm.initObjects(lowerCase(HarvestProperties.getSource()), MongoFileManager.ONLY_NOT_PROCESSED_TRANSFORM_METADATA_PROCESS); 43 | } 44 | 45 | if (initResult) { 46 | while (mm.hasMore()) { 47 | BiblioObject biblioObject = mm.nextBiblioObject(); 48 | if (!HarvestProperties.isReset() && biblioObject.getIsProcessedByPub2TEI()) { 49 | logger.info("\t\t Already transformed, Skipping... " + biblioObject.getRepositoryDocId()); 50 | continue; 51 | } 52 | biblioObject.setMetadata(mm.getMetadata(biblioObject)); 53 | Runnable worker = new TeiBuilderWorker(biblioObject, Steps.TRANSFORM); 54 | executor.execute(worker); 55 | } 56 | } 57 | 58 | executor.shutdown(); 59 | logger.info("Jobs done, shutting down thread pool. The executor will wait 1 minutes before forcing off. "); 60 | try { 61 | if (!executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MINUTES)) { 62 | executor.shutdownNow(); 63 | } 64 | } catch (InterruptedException e) { 65 | executor.shutdownNow(); 66 | } 67 | logger.info("Finished all threads"); 68 | } 69 | 70 | /** 71 | * Appends Grobid TEI to preexisting TEICorpus. Completes the missing 72 | * metadata parts, abstract, keywords, publication date, and authors 73 | * affiliations. 74 | */ 75 | public void addGrobidFulltextToTEICorpus() { 76 | ExecutorService executor = Executors.newFixedThreadPool(HarvestProperties.getNbThreads()); 77 | try { 78 | if (!GrobidService.isGrobidOk()) { 79 | return; 80 | } 81 | boolean initResult = false; 82 | 83 | if (HarvestProperties.isReset()) { 84 | initResult = mm.initObjects(null, MongoFileManager.ONLY_WITH_FULLTEXT_PROCESS); 85 | } else { 86 | initResult = mm.initObjects(null, MongoFileManager.ONLY_NOT_PROCESSED_FULLTEXT_APPEND_PROCESS); 87 | } 88 | 89 | if (initResult) { 90 | while (mm.hasMore()) { 91 | BiblioObject biblioObject = mm.nextBiblioObject(); 92 | if (!HarvestProperties.isReset() && biblioObject.getIsFulltextAppended()) { 93 | logger.info("\t\t Fulltext already appended, Skipping... " + biblioObject.getRepositoryDocId()); 94 | continue; 95 | } 96 | //grobid tei and tei corpus with metadata initialisation should be available. 97 | if (!biblioObject.getIsProcessedByPub2TEI()) { 98 | logger.info("\t\t Metadata TEI not found, first consider creating TEI from metadata, Skipping... " + biblioObject.getRepositoryDocId()); 99 | continue; 100 | } 101 | Runnable worker = new TeiBuilderWorker(biblioObject, Steps.APPEND_FULLTEXT); 102 | executor.execute(worker); 103 | } 104 | } 105 | 106 | 107 | executor.shutdown(); 108 | logger.info("Jobs done, shutting down thread pool. "); 109 | try { 110 | if (!executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MINUTES)) { 111 | executor.shutdownNow(); 112 | } 113 | } catch (InterruptedException e) { 114 | executor.shutdownNow(); 115 | } 116 | logger.info("Finished all threads"); 117 | } catch (UnreachableGrobidServiceException ugse) { 118 | logger.error(ugse.getMessage()); 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | server.port = 8070 -------------------------------------------------------------------------------- /anhalytics-harvest/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /anhalytics-harvest/tmp/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anHALytics/anhalytics-core/77243bc437857a0031d1b14d28b80a7a4402e00d/anhalytics-harvest/tmp/.gitkeep -------------------------------------------------------------------------------- /anhalytics-index/src/main/java/fr/inria/anhalytics/index/exceptions/ElasticSearchConfigurationException.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.index.exceptions; 2 | 3 | /** 4 | * 5 | * @author achraf 6 | */ 7 | public class ElasticSearchConfigurationException extends IndexingServiceException { 8 | private static final long serialVersionUID = -3337770841815682150L; 9 | 10 | public ElasticSearchConfigurationException() { 11 | super(); 12 | } 13 | 14 | public ElasticSearchConfigurationException(String message) { 15 | super(message); 16 | } 17 | 18 | public ElasticSearchConfigurationException(Throwable cause) { 19 | super(cause); 20 | } 21 | 22 | public ElasticSearchConfigurationException(String message, Throwable cause) { 23 | super(message, cause); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /anhalytics-index/src/main/java/fr/inria/anhalytics/index/exceptions/IndexNotCreatedException.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.index.exceptions; 2 | 3 | /** 4 | * 5 | * @author azhar 6 | */ 7 | public class IndexNotCreatedException extends IndexingServiceException{ 8 | public IndexNotCreatedException() { 9 | super(); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /anhalytics-index/src/main/java/fr/inria/anhalytics/index/exceptions/IndexingServiceException.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.index.exceptions; 2 | 3 | import fr.inria.anhalytics.commons.exceptions.ServiceException; 4 | 5 | /** 6 | * 7 | * @author azhar 8 | */ 9 | public class IndexingServiceException extends ServiceException { 10 | private static final long serialVersionUID = -3337770841815682150L; 11 | 12 | public IndexingServiceException() { 13 | super(); 14 | } 15 | 16 | public IndexingServiceException(String message) { 17 | super(message); 18 | } 19 | 20 | public IndexingServiceException(Throwable cause) { 21 | super(cause); 22 | } 23 | 24 | public IndexingServiceException(String message, Throwable cause) { 25 | super(message, cause); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /anhalytics-index/src/main/resources/elasticSearch/analyzer.json: -------------------------------------------------------------------------------- 1 | { 2 | "analysis" : { 3 | "analyzer" : { 4 | "case_insensitive_keyword" : { 5 | "type" : "custom", 6 | "tokenizer" : "keyword", 7 | "filter" : "lowercase" 8 | }, 9 | "case_insensitive" : { 10 | "type" : "custom", 11 | "tokenizer" : "standard", 12 | "filter" : "lowercase" 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /anhalytics-index/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /anhalytics-kb/src/main/java/fr/inria/anhalytics/kb/datamine/HALPaths.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.kb.datamine; 2 | 3 | /** 4 | * 5 | * @author achraf 6 | */ 7 | public interface HALPaths { 8 | 9 | } 10 | -------------------------------------------------------------------------------- /anhalytics-kb/src/main/java/fr/inria/anhalytics/kb/datamine/IstexMiner.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.kb.datamine; 2 | 3 | /** 4 | * 5 | * @author achraf 6 | */ 7 | public class IstexMiner { 8 | public void extractMetadata() { 9 | 10 | 11 | 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /anhalytics-kb/src/main/java/fr/inria/anhalytics/kb/datamine/TeiPaths.java: -------------------------------------------------------------------------------- 1 | 2 | package fr.inria.anhalytics.kb.datamine; 3 | 4 | /** 5 | * 6 | * @author achraf 7 | */ 8 | public interface TeiPaths { 9 | 10 | public final static String MetadataElement = "/teiCorpus/teiHeader"; 11 | public final static String FulltextTeiHeaderAuthors = "/teiCorpus/TEI/teiHeader/fileDesc/sourceDesc/biblStruct/analytic/author"; 12 | public final static String MonogrElement = "/teiCorpus/teiHeader/fileDesc/sourceDesc/biblStruct/monogr"; 13 | public final static String IdnoElement = "/teiCorpus/teiHeader/fileDesc/sourceDesc/biblStruct/idno"; 14 | public final static String TitleElement = "/teiCorpus/teiHeader/fileDesc/titleStmt/title"; 15 | public final static String LanguageElement = "/teiCorpus/teiHeader/profileDesc/langUsage/language"; 16 | public final static String TypologyElement = "/teiCorpus/teiHeader/profileDesc/textClass/classCode[@scheme=\"typology\"]"; 17 | public final static String SubmissionDateElement = "/teiCorpus/teiHeader/fileDesc/editionStmt/edition[@type=\"current\"]/date[@type=\"whenSubmitted\"]"; 18 | public final static String DomainElement = "/teiCorpus/teiHeader/profileDesc/textClass/classCode[@scheme=\"domain\"]"; 19 | public final static String EditorElement = "/teiCorpus/teiHeader/fileDesc/sourceDesc/biblStruct/analytic/editor"; 20 | public final static String AuthorElement = "/teiCorpus/teiHeader/fileDesc/sourceDesc/biblStruct/analytic/author"; 21 | 22 | } 23 | -------------------------------------------------------------------------------- /anhalytics-kb/src/main/java/fr/inria/anhalytics/kb/exceptions/NumberOfCoAuthorsExceededException.java: -------------------------------------------------------------------------------- 1 | 2 | package fr.inria.anhalytics.kb.exceptions; 3 | 4 | /** 5 | * 6 | * @author azhar 7 | */ 8 | public class NumberOfCoAuthorsExceededException 9 | extends Exception{ 10 | public NumberOfCoAuthorsExceededException() { 11 | super(); 12 | } 13 | 14 | public NumberOfCoAuthorsExceededException(String message) { 15 | super(message); 16 | } 17 | 18 | public NumberOfCoAuthorsExceededException(Throwable cause) { 19 | super(cause); 20 | } 21 | 22 | public NumberOfCoAuthorsExceededException(String message, Throwable cause) { 23 | super(message, cause); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /anhalytics-kb/src/main/java/fr/inria/anhalytics/kb/main/Main.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.kb.main; 2 | 3 | import fr.inria.anhalytics.commons.exceptions.ServiceException; 4 | import fr.inria.anhalytics.kb.datamine.KnowledgeBaseFeeder; 5 | import fr.inria.anhalytics.commons.properties.KbProperties; 6 | import java.net.UnknownHostException; 7 | import java.sql.SQLException; 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | import java.util.Scanner; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | /** 15 | * 16 | * @author Achraf 17 | */ 18 | public class Main { 19 | 20 | private static final Logger logger = LoggerFactory.getLogger(Main.class); 21 | 22 | private static List availableCommands = new ArrayList() { 23 | { 24 | add("initKnowledgeBase"); 25 | add("initCitationKnowledgeBase"); 26 | add("deduplicate"); 27 | } 28 | }; 29 | 30 | public static void main(String[] args) throws UnknownHostException, SQLException { 31 | try { 32 | KbProperties.init("anhalytics.properties"); 33 | } catch (Exception exp) { 34 | logger.error(exp.getMessage()); 35 | return; 36 | } 37 | 38 | if (processArgs(args)) { 39 | Main main = new Main(); 40 | main.processCommand(); 41 | } else { 42 | System.err.println(getHelp()); 43 | return; 44 | } 45 | } 46 | 47 | private void processCommand() throws UnknownHostException, SQLException { 48 | Scanner sc = new Scanner(System.in); 49 | char reponse = ' '; 50 | String process = KbProperties.getProcessName(); 51 | try { 52 | KnowledgeBaseFeeder kbf = new KnowledgeBaseFeeder(); 53 | if (process.equals("initKnowledgeBase")) { 54 | //Initiates HAL knowledge base and creates working corpus TEI. 55 | kbf.initKnowledgeBase(); 56 | } else if (process.equals("initCitationKnowledgeBase")) { 57 | kbf.processCitations(); 58 | } 59 | } catch (ServiceException se) { 60 | logger.error("Error: ", se); 61 | } 62 | return; 63 | } 64 | 65 | protected static boolean processArgs(final String[] pArgs) { 66 | boolean result = true; 67 | if (pArgs.length == 0) { 68 | result = false; 69 | } else { 70 | String currArg; 71 | for (int i = 0; i < pArgs.length; i++) { 72 | currArg = pArgs[i]; 73 | if (currArg.equals("-h")) { 74 | result = false; 75 | break; 76 | } else if (currArg.equals("-exe")) { 77 | String command = pArgs[i + 1]; 78 | if (availableCommands.contains(command)) { 79 | KbProperties.setProcessName(command); 80 | i++; 81 | continue; 82 | } else { 83 | System.err.println("-exe value should be one value from this list: " + availableCommands); 84 | result = false; 85 | break; 86 | } 87 | } else if (currArg.equals("--reset")) { 88 | KbProperties.setReset(true); 89 | i++; 90 | continue; 91 | } else { 92 | result = false; 93 | break; 94 | } 95 | } 96 | } 97 | return result; 98 | } 99 | 100 | protected static String getHelp() { 101 | final StringBuffer help = new StringBuffer(); 102 | help.append("HELP ANHALYTICS_KNOWLEDGE_BASE\n"); 103 | help.append("-h: displays help\n"); 104 | help.append("-dFromDate: filter start date for the process, make sure it follows the pattern : yyyy-MM-dd\n"); 105 | help.append("-dUntilDate: filter until date for the process, make sure it follows the pattern : yyyy-MM-dd\n"); 106 | help.append("-nodates: fetches entries from database with no date filtering.\n"); 107 | help.append("-exe: gives the command to execute. The value should be one of these : \n"); 108 | help.append("--reset: updates all the documents (beware about versions/updates) : \n"); 109 | help.append("\t" + availableCommands + "\n"); 110 | return help.toString(); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /anhalytics-kb/src/main/java/fr/inria/anhalytics/kb/stax/StaxParserContentHandler.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.kb.stax; 2 | 3 | import org.codehaus.stax2.XMLStreamReader2; 4 | 5 | public interface StaxParserContentHandler { 6 | 7 | void onStartDocument(XMLStreamReader2 reader); 8 | 9 | void onEndDocument(XMLStreamReader2 reader); 10 | 11 | void onStartElement(XMLStreamReader2 reader); 12 | 13 | void onEndElement(XMLStreamReader2 reader); 14 | 15 | void onCharacter(XMLStreamReader2 reader); 16 | } -------------------------------------------------------------------------------- /anhalytics-kb/src/main/java/fr/inria/anhalytics/kb/stax/StaxUtils.java: -------------------------------------------------------------------------------- 1 | package fr.inria.anhalytics.kb.stax; 2 | 3 | import org.codehaus.stax2.XMLStreamReader2; 4 | 5 | import javax.xml.namespace.QName; 6 | import javax.xml.stream.XMLStreamConstants; 7 | import javax.xml.stream.XMLStreamException; 8 | import javax.xml.stream.XMLStreamReader; 9 | import java.util.HashMap; 10 | import java.util.Map; 11 | 12 | /** 13 | * Created by ac86559 on 04/09/14. 14 | */ 15 | public class StaxUtils { 16 | 17 | public static void traverse(XMLStreamReader2 reader, final StaxParserContentHandler contentHandler) throws XMLStreamException { 18 | traverse(reader, new StaxClosure() { 19 | @Override 20 | public void process(XMLStreamReader streamReader) throws XMLStreamException { 21 | XMLStreamReader2 reader = (XMLStreamReader2) streamReader; 22 | switch (reader.getEventType()) { 23 | case XMLStreamReader.START_DOCUMENT: 24 | contentHandler.onStartDocument(reader); 25 | break; 26 | case XMLStreamReader.START_ELEMENT: 27 | contentHandler.onStartElement(reader); 28 | break; 29 | case XMLStreamReader.END_ELEMENT: 30 | contentHandler.onEndElement(reader); 31 | break; 32 | case XMLStreamConstants.CHARACTERS: 33 | contentHandler.onCharacter(reader); 34 | break; 35 | case XMLStreamReader.END_DOCUMENT: 36 | contentHandler.onEndDocument(reader); 37 | break; 38 | } 39 | } 40 | }); 41 | } 42 | 43 | public static void traverse(XMLStreamReader streamReader, StaxClosure closure) throws XMLStreamException { 44 | while (streamReader.hasNext()) { 45 | streamReader.next(); 46 | closure.process(streamReader); 47 | } 48 | } 49 | 50 | public static void traverse(XMLStreamReader streamReader, String[] tags, StaxClosure closure) throws XMLStreamException { 51 | while (streamReader.hasNext()) { 52 | streamReader.next(); 53 | if (XMLStreamReader.START_ELEMENT == streamReader.getEventType()) { 54 | String localPart = streamReader.getName().getLocalPart(); 55 | for (String tag : tags) { 56 | if (tag.equals(localPart)) { 57 | closure.process(streamReader); 58 | } 59 | } 60 | } 61 | } 62 | } 63 | 64 | public static Map parse(XMLStreamReader streamReader, EndCondition endCondition, String... tags) throws XMLStreamException { 65 | Map mapping = new HashMap(); 66 | while (streamReader.hasNext() && !endCondition.mustExit(streamReader)) { 67 | streamReader.next(); 68 | if (XMLStreamReader.START_ELEMENT == streamReader.getEventType()) { 69 | String localPart = streamReader.getName().getLocalPart(); 70 | for (String tag : tags) { 71 | if (tag.equals(localPart)) { 72 | streamReader.next(); 73 | if (XMLStreamReader.CHARACTERS == streamReader.getEventType()) { 74 | mapping.put(tag, streamReader.getText()); 75 | } 76 | } 77 | } 78 | } 79 | } 80 | return mapping; 81 | } 82 | 83 | public static String getAttributeByLocalName(XMLStreamReader reader, String localName) { 84 | String result = ""; 85 | for (int i = 0; i < reader.getAttributeCount(); i++) { 86 | QName attribute = reader.getAttributeName(i); 87 | if (attribute != null && attribute.getLocalPart().equals(localName)) { 88 | result = reader.getAttributeValue(i); 89 | } 90 | } 91 | return result; 92 | } 93 | 94 | 95 | public interface StaxClosure { 96 | void process(XMLStreamReader streamReader) throws XMLStreamException; 97 | } 98 | 99 | public interface EndCondition { 100 | boolean mustExit(XMLStreamReader streamReader) throws XMLStreamException; 101 | } 102 | 103 | public static class ReachedClosingTagCondition implements EndCondition { 104 | 105 | private String endTagName; 106 | 107 | public ReachedClosingTagCondition(String tagName) { 108 | this.endTagName = tagName; 109 | } 110 | 111 | @Override 112 | public boolean mustExit(XMLStreamReader streamReader) throws XMLStreamException { 113 | return XMLStreamReader.END_ELEMENT == streamReader.getEventType() 114 | && endTagName.equals(streamReader.getName().getLocalPart()); 115 | } 116 | } 117 | 118 | 119 | } 120 | -------------------------------------------------------------------------------- /anhalytics-kb/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /config/anhalytics.default.properties: -------------------------------------------------------------------------------- 1 | # MongoDB host 2 | commons.mongodb_host=localhost 3 | commons.mongodb_port=27017 4 | commons.mongodb_db=anhalytics 5 | commons.mongodb_user=admin 6 | commons.mongodb_pass=password 7 | 8 | # Grobid service 9 | harvest.grobid_host=http://localhost:8080/api 10 | harvest.nbThreads=3 11 | 12 | harvest.tmpPath=tmp 13 | harvest.service_key=8uk797Nw74 14 | 15 | 16 | harvest.crossref_host=doi.crossref.org/servlet 17 | harvest.crossref_id=id 18 | harvest.crossref_pw=pw 19 | 20 | # Mysql host 21 | kb.mysql_host=localhost 22 | kb.mysql_port=3306 23 | kb.mysql_db=anhalytics 24 | kb.mysql_bibliodb=anhalytics_biblio 25 | kb.mysql_user=anhalytics 26 | kb.mysql_pass=anhalytics 27 | 28 | # nerd 29 | annotate.nerd_host=localhost 30 | annotate.nerd_port=8090 31 | annotate.nerd.nbThreads=4 32 | 33 | annotate.keyterm_host=localhost 34 | annotate.keyterm_port=8070 35 | annotate.keyterm.nbThreads=4 36 | 37 | annotate.quantities_host=localhost 38 | annotate.quantities_port=8060 39 | annotate.quantities.nbThreads=1 40 | annotate.quantities.tmp=tmp 41 | 42 | # ElasticSearch host 43 | index.elasticSearch_host=localhost 44 | index.elasticSearch_port=9300 45 | index.elasticSearch_cluster=anhalytics_cluster2711 46 | index.elasticSearch_kbIndexName=anhalytics_kb 47 | index.elasticSearch_keytermAnnotsIndexName=annotations_keyterm 48 | index.elasticSearch_TeisIndexName=anhalytics_teis 49 | index.elasticSearch_TeisTypeName=anhalytics_teis 50 | index.elasticSearch_nerdAnnotsIndexName=annotations_nerd 51 | index.elasticSearch_quantitiesAnnotsIndexName=annotations_quantities 52 | -------------------------------------------------------------------------------- /config/anhalytics.test.properties: -------------------------------------------------------------------------------- 1 | # MongoDB host 2 | commons.mongodb_host=localhost 3 | commons.mongodb_port=27017 4 | commons.mongodb_db=anhalytics 5 | commons.mongodb_user=admin 6 | commons.mongodb_pass=password 7 | 8 | #source 9 | harvest.source=hal 10 | #OAI endpoint 11 | harvest.api_url=http://api.archives-ouvertes.fr/oai/hal 12 | # Grobid service 13 | harvest.grobid_host=https://traces1.inria.fr/grobid/api 14 | harvest.grobid_port= 15 | harvest.nbThreads=3 16 | 17 | harvest.tmpPath=anhalytics-harvest/tmp 18 | harvest.service_key=8uk797Nw74 19 | 20 | 21 | harvest.crossref_host=doi.crossref.org/servlet 22 | harvest.crossref_id=id 23 | harvest.crossref_pw=pw 24 | 25 | # Mysql host 26 | kb.mysql_host=localhost 27 | kb.mysql_port=3306 28 | kb.mysql_db=anhalytics 29 | kb.mysql_bibliodb=anhalytics_biblio 30 | kb.mysql_user=user 31 | kb.mysql_pass=pass 32 | 33 | # nerd 34 | annotate.nerd_host=https://traces1.inria.fr/nerd 35 | annotate.nerd_port= 36 | annotate.nerd.nbThreads=4 37 | 38 | annotate.keyterm_host=https://traces1.inria.fr/keyterm 39 | annotate.keyterm_port= 40 | annotate.keyterm.nbThreads=4 41 | 42 | 43 | annotate.quantities_host=https://traces1.inria.fr/quantities 44 | annotate.quantities_port= 45 | annotate.quantities.nbThreads=4 46 | annotate.quantities.tmp=tmp 47 | 48 | # ElasticSearch host 49 | index.elasticSearch_host=localhost 50 | index.elasticSearch_port=9200 51 | index.elasticSearch_cluster=anhalytics_cluster2711 52 | index.elasticSearch_nerdAnnotsIndexName=annotations_nerd 53 | index.elasticSearch_keytermAnnotsIndexName=annotations_keyterm 54 | index.elasticSearch_TeisIndexName=anhalytics_teis 55 | index.elasticSearch_TeisTypeName=anhalytics_teis 56 | index.elasticSearch_kbIndexName=anhalytics_kb 57 | index.elasticSearch_quantitiesAnnotsIndexName=annotations_quantities 58 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anHALytics/anhalytics-core/77243bc437857a0031d1b14d28b80a7a4402e00d/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Tue Nov 20 12:17:05 CET 2018 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-5.4.1-all.zip 7 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /logs/placeholder: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anHALytics/anhalytics-core/77243bc437857a0031d1b14d28b80a7a4402e00d/logs/placeholder -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = "anhalytics" 2 | include 'anhalytics-commons' 3 | include 'anhalytics-harvest' 4 | include 'anhalytics-annotate' 5 | include 'anhalytics-kb' 6 | include 'anhalytics-index' 7 | //include 'anhalytics-er' --------------------------------------------------------------------------------