├── src ├── test │ ├── java │ │ └── ca │ │ │ └── crim │ │ │ └── nlp │ │ │ └── pacte │ │ │ ├── UnitTestConstants.java │ │ │ ├── DemoTest.java │ │ │ ├── client │ │ │ ├── LexiconTest.java │ │ │ ├── AdminTest.java │ │ │ ├── SampleBuilder.java │ │ │ └── CorpusTest.java │ │ │ └── QuickConfigTest.java │ └── resources │ │ └── ca │ │ └── crim │ │ └── nlp │ │ └── pacte │ │ ├── config.properties │ │ └── client │ │ ├── FTB_fr.tagset │ │ ├── 20newsgroups.tagset │ │ ├── PTB_en.tagset │ │ ├── DOCUMENT_META.json │ │ └── Brown_en.tagset └── main │ ├── resources │ └── ca │ │ └── crim │ │ └── nlp │ │ └── pacte │ │ ├── config.properties │ │ └── client │ │ ├── FTB_fr.tagset │ │ ├── 20newsgroups.tagset │ │ ├── PTB_en.tagset │ │ ├── DOCUMENT_META.json │ │ └── Brown_en.tagset │ └── java │ └── ca │ └── crim │ └── nlp │ └── pacte │ ├── client │ ├── services │ │ ├── iServices.java │ │ └── NERService.java │ ├── SchemaData.java │ ├── PacteDocument.java │ ├── Project.java │ ├── Admin.java │ ├── Lexicon.java │ └── Corpus.java │ ├── Demo.java │ ├── Credential.java │ └── QuickConfig.java ├── LICENSE ├── pom.xml └── README.md /src/test/java/ca/crim/nlp/pacte/UnitTestConstants.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte; 2 | 3 | public class UnitTestConstants { 4 | 5 | /* Fixed corpus name for unit testing purpose */ 6 | public static final String TESTCORPUS = "CorpusTest-999999999"; 7 | 8 | public static final String TRANSCODEGROUP = "Transcode task bucket"; 9 | } 10 | -------------------------------------------------------------------------------- /src/main/resources/ca/crim/nlp/pacte/config.properties: -------------------------------------------------------------------------------- 1 | server=https://staging-pacte.crim.ca 2 | PSCAdmin=test@test.com 3 | PSCAdminPwd=secret 4 | PACTEAdmin=support-pacte@crim.ca 5 | PACTEAdminPwd=secret 6 | StandardUser=testuser011@crim.ca 7 | StandardUserPwd=fortestingonly 8 | Verbose=false 9 | TokenRenewDelay=1 10 | ServiceUrl=http://patx-pacte.crim.ca:5050 -------------------------------------------------------------------------------- /src/test/resources/ca/crim/nlp/pacte/config.properties: -------------------------------------------------------------------------------- 1 | server=https://staging-pacte.crim.ca 2 | PSCAdmin=test@test.com 3 | PSCAdminPwd=secret 4 | PACTEAdmin=support-pacte@crim.ca 5 | PACTEAdminPwd=secret 6 | StandardUser=testuser011@crim.ca 7 | StandardUserPwd=fortestingonly 8 | Verbose=true 9 | TokenRenewDelay=1 10 | ServiceUrl=http://patx-pacte.crim.ca:5050 -------------------------------------------------------------------------------- /src/test/java/ca/crim/nlp/pacte/DemoTest.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte; 2 | 3 | import static org.junit.Assert.assertNotNull; 4 | 5 | import org.junit.Test; 6 | 7 | import ca.crim.nlp.pacte.client.Admin; 8 | 9 | public class DemoTest { 10 | 11 | @Test 12 | public void runDemo() { 13 | QuickConfig loCfg = new QuickConfig(); 14 | Admin loAdmin = new Admin(loCfg); 15 | 16 | loAdmin.createUser("menardpa@crim.ca", "demo1testing", "pa", "menard"); 17 | 18 | assertNotNull(loAdmin.checkUser("menardpa@crim.ca", "demo1testing")); 19 | // Demo loDemo = new Demo(loCfg); 20 | // loDemo.giveRessources(true, true, true); 21 | 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/ca/crim/nlp/pacte/client/services/iServices.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte.client.services; 2 | 3 | public interface iServices { 4 | /** 5 | * Run the service with the preset parameters. 6 | * 7 | * @return Unique ID of the execution for status update. 8 | */ 9 | public String execute(); 10 | 11 | /** 12 | * Check the status of a specific execution. 13 | * 14 | * @return JSON containing the status of execution 15 | */ 16 | public String checkStatus(String tsUUID); 17 | 18 | /** 19 | * Check the status of the last execution 20 | * 21 | * @return 22 | */ 23 | public String checkStatus(); 24 | 25 | /** 26 | * Retrieve the service published documentation. 27 | * 28 | * @return Null if no info, a json doc otherwise. 29 | */ 30 | public String getInfo(); 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/ca/crim/nlp/pacte/client/SchemaData.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte.client; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.json.JSONObject; 7 | 8 | public class SchemaData { 9 | List FeatureList = new ArrayList(); 10 | 11 | public SchemaData(String tsJson) { 12 | JSONObject loSchema = new JSONObject(tsJson); 13 | 14 | if (loSchema.has("schema")) 15 | loSchema = new JSONObject(loSchema.getJSONObject("schema").getString("schemaJsonContent")); 16 | 17 | loSchema = loSchema.getJSONObject("properties"); 18 | 19 | for (String lsKey : loSchema.keySet()) { 20 | if (",schematype,_corpusid,_documentid,offsets,".indexOf("," + lsKey.toLowerCase() + ",") < 0 ) 21 | FeatureList.add(lsKey); 22 | } 23 | 24 | System.out.println(loSchema.toString()); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/resources/ca/crim/nlp/pacte/client/FTB_fr.tagset: -------------------------------------------------------------------------------- 1 | { 2 | "title": "French Treebank - POS", 3 | "description" : "Codification pour le FTB", 4 | "reference" : "http://www.llf.cnrs.fr/Gens/Abeille/French-Treebank-fr.php", 5 | "tagset": [{"tag": "A","description": "adjectif"}, 6 | {"tag": "Adv","description": "adverbe"}, 7 | {"tag": "CC","description": "conjonction de coordination"}, 8 | {"tag": "Cl","description": "pronom clitic faible"}, 9 | {"tag": "CS","description": "conjonction de subordination"}, 10 | {"tag": "D","description": "determinant"}, 11 | {"tag": "ET","description": "mot étranger"}, 12 | {"tag": "I","description": "interjection"}, 13 | {"tag": "NC","description": "nom commun"}, 14 | {"tag": "NP","description": "nom propre"}, 15 | {"tag": "P","description": "preposition"}, 16 | {"tag": "PRE","description": "prefix"}, 17 | {"tag": "PRO","description": "pronom fort"}, 18 | {"tag": "V","description": "verbe"}, 19 | {"tag": "PONCT","description": "ponctuation"}] 20 | } -------------------------------------------------------------------------------- /src/test/resources/ca/crim/nlp/pacte/client/FTB_fr.tagset: -------------------------------------------------------------------------------- 1 | { 2 | "title": "French Treebank - POS", 3 | "description" : "Codification pour le FTB", 4 | "reference" : "http://www.llf.cnrs.fr/Gens/Abeille/French-Treebank-fr.php", 5 | "tagset": [{"tag": "A","description": "adjectif"}, 6 | {"tag": "Adv","description": "adverbe"}, 7 | {"tag": "CC","description": "conjonction de coordination"}, 8 | {"tag": "Cl","description": "pronom clitic faible"}, 9 | {"tag": "CS","description": "conjonction de subordination"}, 10 | {"tag": "D","description": "determinant"}, 11 | {"tag": "ET","description": "mot étranger"}, 12 | {"tag": "I","description": "interjection"}, 13 | {"tag": "NC","description": "nom commun"}, 14 | {"tag": "NP","description": "nom propre"}, 15 | {"tag": "P","description": "preposition"}, 16 | {"tag": "PRE","description": "prefix"}, 17 | {"tag": "PRO","description": "pronom fort"}, 18 | {"tag": "V","description": "verbe"}, 19 | {"tag": "PONCT","description": "ponctuation"}] 20 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Computer Research Institute of Montreal 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/main/java/ca/crim/nlp/pacte/client/PacteDocument.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte.client; 2 | 3 | public class PacteDocument { 4 | 5 | private String psContent = null; 6 | private String psTitle = null; 7 | private String psID = null; 8 | private String psSource = null; 9 | private String psLanguages = null; 10 | private Long pnlDocSize = null; 11 | private String psDateAdded = null; 12 | private String psPath = null; 13 | 14 | public PacteDocument(String tsID, String tsTitle, String tsContent, String tsSource, String tsLanguages, 15 | Long tnlDocSize, String tsDateAdded, String tsPath) { 16 | psContent = tsContent; 17 | psTitle = tsTitle; 18 | psID = tsID; 19 | psSource = tsSource; 20 | psLanguages = tsLanguages; 21 | pnlDocSize = tnlDocSize; 22 | psDateAdded = tsDateAdded; 23 | psPath = tsPath; 24 | } 25 | 26 | public Long getDocSize() { 27 | return pnlDocSize; 28 | } 29 | 30 | public String getDateAdded() { 31 | return psDateAdded; 32 | } 33 | 34 | public String getPath() { 35 | return psPath; 36 | } 37 | 38 | public String getContent() { 39 | return psContent; 40 | } 41 | 42 | public String getTitle() { 43 | return psTitle; 44 | } 45 | 46 | public String getID() { 47 | return psID; 48 | } 49 | 50 | public String getSource() { 51 | return psSource; 52 | } 53 | 54 | public String getLanguages() { 55 | return psLanguages; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/resources/ca/crim/nlp/pacte/client/20newsgroups.tagset: -------------------------------------------------------------------------------- 1 | { 2 | "title": "20 newsgroups", 3 | "description": "Classification des vingt groupes de nouvelles.", 4 | "reference": "https://archive.ics.uci.edu/ml/datasets/Twenty+Newsgroups", 5 | "tagset": [ 6 | {"tag": "graphics","description": "comp.graphics"}, 7 | {"tag": "os.ms-windows.misc","description": "comp.os.ms-windows.misc"}, 8 | {"tag": "os.ms-windows.misc","description": "comp.os.ms-windows.misc"}, 9 | {"tag": "sys.ibm.pc.hardware","description": "comp.sys.ibm.pc.hardware"}, 10 | {"tag": "sys.mac.hardware","description": "comp.sys.mac.hardware"}, 11 | {"tag": "windows.x","description": "comp.windows.x"}, 12 | {"tag": "autos","description": "rec.autos"}, 13 | {"tag": "motorcycles","description": "rec.motorcycles"}, 14 | {"tag": "baseball","description": "rec.sport.baseball"}, 15 | {"tag": "hockey","description": "rec.sport.hockey"}, 16 | {"tag": "crypt","description": "sci.crypt"}, 17 | {"tag": "electronics","description": "sci.electronics"}, 18 | {"tag": "med","description": "sci.med"}, 19 | {"tag": "space","description": "sci.space"}, 20 | {"tag": "forsale","description": "misc.forsale"}, 21 | {"tag": "politics.misc","description": "talk.politics.misc"}, 22 | {"tag": "politics.guns","description": "talk.politics.guns"}, 23 | {"tag": "politics.mideast","description": "talk.politics.mideast"}, 24 | {"tag": "religion.misc","description": "talk.religion.misc"}, 25 | {"tag": "atheism","description": "alt.atheism"}, 26 | {"tag": "christian","description": "coosoc.religion.christian"}] 27 | } -------------------------------------------------------------------------------- /src/test/resources/ca/crim/nlp/pacte/client/20newsgroups.tagset: -------------------------------------------------------------------------------- 1 | { 2 | "title": "20 newsgroups", 3 | "description": "Classification des vingt groupes de nouvelles.", 4 | "reference": "https://archive.ics.uci.edu/ml/datasets/Twenty+Newsgroups", 5 | "tagset": [ 6 | {"tag": "graphics","description": "comp.graphics"}, 7 | {"tag": "os.ms-windows.misc","description": "comp.os.ms-windows.misc"}, 8 | {"tag": "os.ms-windows.misc","description": "comp.os.ms-windows.misc"}, 9 | {"tag": "sys.ibm.pc.hardware","description": "comp.sys.ibm.pc.hardware"}, 10 | {"tag": "sys.mac.hardware","description": "comp.sys.mac.hardware"}, 11 | {"tag": "windows.x","description": "comp.windows.x"}, 12 | {"tag": "autos","description": "rec.autos"}, 13 | {"tag": "motorcycles","description": "rec.motorcycles"}, 14 | {"tag": "baseball","description": "rec.sport.baseball"}, 15 | {"tag": "hockey","description": "rec.sport.hockey"}, 16 | {"tag": "crypt","description": "sci.crypt"}, 17 | {"tag": "electronics","description": "sci.electronics"}, 18 | {"tag": "med","description": "sci.med"}, 19 | {"tag": "space","description": "sci.space"}, 20 | {"tag": "forsale","description": "misc.forsale"}, 21 | {"tag": "politics.misc","description": "talk.politics.misc"}, 22 | {"tag": "politics.guns","description": "talk.politics.guns"}, 23 | {"tag": "politics.mideast","description": "talk.politics.mideast"}, 24 | {"tag": "religion.misc","description": "talk.religion.misc"}, 25 | {"tag": "atheism","description": "alt.atheism"}, 26 | {"tag": "christian","description": "coosoc.religion.christian"}] 27 | } -------------------------------------------------------------------------------- /src/main/java/ca/crim/nlp/pacte/Demo.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.net.URISyntaxException; 6 | import java.net.URL; 7 | import java.nio.charset.Charset; 8 | import java.nio.file.Files; 9 | import java.nio.file.Paths; 10 | 11 | import ca.crim.nlp.pacte.client.Corpus; 12 | 13 | public class Demo { 14 | 15 | private QuickConfig poCfg = null; 16 | 17 | public Demo(QuickConfig toConfig) { 18 | poCfg = toConfig; 19 | } 20 | 21 | /** 22 | * Add basic resources to a custom user 23 | * 24 | * @param tbTagset 25 | * @param tbSchemas 26 | * @param tbCorpus 27 | * @return 28 | */ 29 | public boolean giveRessources(boolean tbTagset, boolean tbSchemas, boolean tbCorpus) { 30 | Corpus loCorpus = new Corpus(poCfg); 31 | 32 | if (tbTagset) { 33 | for (File loF : getResources("ca/crim/nlp/pacte/client")) 34 | if (loF.getName().endsWith(".tagset")) 35 | System.out.println(loCorpus.createTagset(readFile(loF))); 36 | } 37 | 38 | return true; 39 | } 40 | 41 | private File[] getResources(String folder) { 42 | ClassLoader loader = Thread.currentThread().getContextClassLoader(); 43 | URL url = loader.getResource(folder); 44 | String path = url.getPath(); 45 | return new File(path).listFiles(); 46 | } 47 | 48 | private String readFile(File loResource) { 49 | try { 50 | return new String( 51 | Files.readAllBytes(Paths.get(ClassLoader.class 52 | .getResource("/ca/crim/nlp/pacte/client/" + loResource.getName()).toURI())), 53 | Charset.forName("UTF-8")); 54 | } catch (IOException | URISyntaxException e) { 55 | return null; 56 | } 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | ca.crim.nlp.pacte 4 | PacteClient 5 | 0.1.0 6 | 7 | Pacteclient 8 | Client java pour faciliter l'accès à l'api REST de la plateforme d'annotation Pacte du CRIM. 9 | 10 | Java client facilitating access to CRIM Pacte's REST api. 11 | 12 | 13 | UTF-8 14 | UTF-8 15 | 16 | 17 | Centre de recherche informatique de Montréal 18 | http://www.crim.ca 19 | 20 | http://pacte.crim.ca 21 | 22 | 23 | src\main\java 24 | 25 | 26 | maven-compiler-plugin 27 | 3.5.1 28 | 29 | 1.8 30 | 1.8 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | org.apache.httpcomponents 39 | httpclient 40 | 4.5.4 41 | 42 | 43 | org.json 44 | json 45 | 20141113 46 | 47 | 48 | 49 | junit 50 | junit 51 | 4.12 52 | 53 | 54 | -------------------------------------------------------------------------------- /src/main/java/ca/crim/nlp/pacte/Credential.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte; 2 | 3 | import java.util.Date; 4 | 5 | public class Credential { 6 | private String psUsername = null; 7 | private String psPassword = null; 8 | private String psPrenom = null; 9 | private String psNom = null; 10 | private String psToken = null; 11 | private String psUserId = null; 12 | private String psUserProfileId = null; 13 | private Date pdTokenCreation = null; 14 | 15 | public Credential(String tsUsername, String tsPassword, int tniRenewHour) { 16 | psUsername = tsUsername; 17 | psPassword = tsPassword; 18 | } 19 | 20 | public Credential(String tsUserId, String tsUsername, String tsPassword, String tsPrenom, String tsNom) { 21 | psUsername = tsUsername; 22 | psPassword = tsPassword; 23 | psUserId = tsUserId; 24 | psNom = tsNom; 25 | psPrenom = tsPrenom; 26 | } 27 | 28 | public Credential(String tsUserId, String tsUserProfileId, String tsUsername, String tsPassword) { 29 | psUsername = tsUsername; 30 | psPassword = tsPassword; 31 | psUserId = tsUserId; 32 | psUserProfileId = tsUserProfileId; 33 | } 34 | 35 | public String getUsername() { 36 | return psUsername; 37 | } 38 | 39 | public String getName() { 40 | return psPrenom; 41 | } 42 | 43 | public String getSurname() { 44 | return psNom; 45 | } 46 | 47 | public String getPassword() { 48 | return psPassword; 49 | } 50 | 51 | public String getUserId() { 52 | return psUserId; 53 | } 54 | 55 | public String getUserProfileId() { 56 | return psUserProfileId; 57 | } 58 | 59 | public String getToken() { 60 | return psToken; 61 | } 62 | 63 | public Date getTokenCreation() { 64 | return pdTokenCreation; 65 | } 66 | 67 | public void setToken(String tsNewToken) { 68 | psToken = ((tsNewToken == null) || (tsNewToken.isEmpty())) ? null : tsNewToken; 69 | pdTokenCreation = new Date(); 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/main/resources/ca/crim/nlp/pacte/client/PTB_en.tagset: -------------------------------------------------------------------------------- 1 | { 2 | "title": "Penn Treebank - POS", 3 | "description": "Penn treebank part-of-speech tags", 4 | "reference": "http://www.comp.leeds.ac.uk/ccalas/tagsets/upenn.html", 5 | "tagset": [{"tag": "CC","description": "Coordinating conjunction"}, 6 | {"tag": "CD","description": "Cardinal number"}, 7 | {"tag": "DT","description": "Determiner"}, 8 | {"tag": "EX","description": "Existential there"}, 9 | {"tag": "FW","description": "Foreign word"}, 10 | {"tag": "IN","description": "Preposition or subordinating conjunction"}, 11 | {"tag": "JJ","description": "Adjective"}, 12 | {"tag": "JJR","description": "Adjective, comparative"}, 13 | {"tag": "JJS","description": "Adjective, superlative"}, 14 | {"tag": "LS","description": "List item marker"}, 15 | {"tag": "MD","description": "Modal"}, 16 | {"tag": "NN","description": "Noun, singular or mass"}, 17 | {"tag": "NNS","description": "Noun, plural"}, 18 | {"tag": "NNP","description": "Proper noun, singular"}, 19 | {"tag": "NNPS","description": "Proper noun, plural"}, 20 | {"tag": "PDT","description": "Predeterminer"}, 21 | {"tag": "POS","description": "Possessive ending"}, 22 | {"tag": "PRP","description": "Personal pronoun"}, 23 | {"tag": "PRP_","description": "Possessive pronoun"}, 24 | {"tag": "RB","description": "Adverb"}, 25 | {"tag": "RBR","description": "Adverb, comparative"}, 26 | {"tag": "RBS","description": "Adverb, superlative"}, 27 | {"tag": "RP","description": "Particle"}, 28 | {"tag": "SYM","description": "Symbol"}, 29 | {"tag": "TO","description": "to"}, 30 | {"tag": "UH","description": "Interjection"}, 31 | {"tag": "VB","description": "Verb, base form"}, 32 | {"tag": "VBD","description": "Verb, past tense"}, 33 | {"tag": "VBG","description": "Verb, gerund or present participle"}, 34 | {"tag": "VBN","description": "Verb, past participle"}, 35 | {"tag": "VBP","description": "Verb, non-3rd person singular present"}, 36 | {"tag": "VBZ","description": "Verb, 3rd person singular present"}, 37 | {"tag": "WDT","description": "Wh-determiner"}, 38 | {"tag": "WP","description": "Wh-pronoun"}, 39 | {"tag": "WP_","description": "Possessive wh-pronoun"}, 40 | {"tag": "WRB","description": "Wh-adverb"}] 41 | } -------------------------------------------------------------------------------- /src/test/resources/ca/crim/nlp/pacte/client/PTB_en.tagset: -------------------------------------------------------------------------------- 1 | { 2 | "title": "Penn Treebank - POS", 3 | "description": "Penn treebank part-of-speech tags", 4 | "reference": "http://www.comp.leeds.ac.uk/ccalas/tagsets/upenn.html", 5 | "tagset": [{"tag": "CC","description": "Coordinating conjunction"}, 6 | {"tag": "CD","description": "Cardinal number"}, 7 | {"tag": "DT","description": "Determiner"}, 8 | {"tag": "EX","description": "Existential there"}, 9 | {"tag": "FW","description": "Foreign word"}, 10 | {"tag": "IN","description": "Preposition or subordinating conjunction"}, 11 | {"tag": "JJ","description": "Adjective"}, 12 | {"tag": "JJR","description": "Adjective, comparative"}, 13 | {"tag": "JJS","description": "Adjective, superlative"}, 14 | {"tag": "LS","description": "List item marker"}, 15 | {"tag": "MD","description": "Modal"}, 16 | {"tag": "NN","description": "Noun, singular or mass"}, 17 | {"tag": "NNS","description": "Noun, plural"}, 18 | {"tag": "NNP","description": "Proper noun, singular"}, 19 | {"tag": "NNPS","description": "Proper noun, plural"}, 20 | {"tag": "PDT","description": "Predeterminer"}, 21 | {"tag": "POS","description": "Possessive ending"}, 22 | {"tag": "PRP","description": "Personal pronoun"}, 23 | {"tag": "PRP_","description": "Possessive pronoun"}, 24 | {"tag": "RB","description": "Adverb"}, 25 | {"tag": "RBR","description": "Adverb, comparative"}, 26 | {"tag": "RBS","description": "Adverb, superlative"}, 27 | {"tag": "RP","description": "Particle"}, 28 | {"tag": "SYM","description": "Symbol"}, 29 | {"tag": "TO","description": "to"}, 30 | {"tag": "UH","description": "Interjection"}, 31 | {"tag": "VB","description": "Verb, base form"}, 32 | {"tag": "VBD","description": "Verb, past tense"}, 33 | {"tag": "VBG","description": "Verb, gerund or present participle"}, 34 | {"tag": "VBN","description": "Verb, past participle"}, 35 | {"tag": "VBP","description": "Verb, non-3rd person singular present"}, 36 | {"tag": "VBZ","description": "Verb, 3rd person singular present"}, 37 | {"tag": "WDT","description": "Wh-determiner"}, 38 | {"tag": "WP","description": "Wh-pronoun"}, 39 | {"tag": "WP_","description": "Possessive wh-pronoun"}, 40 | {"tag": "WRB","description": "Wh-adverb"}] 41 | } -------------------------------------------------------------------------------- /src/test/java/ca/crim/nlp/pacte/client/LexiconTest.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte.client; 2 | 3 | import static org.junit.Assert.assertNotEquals; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | 7 | import org.junit.Before; 8 | import org.junit.Test; 9 | 10 | import ca.crim.nlp.pacte.QuickConfig; 11 | 12 | public class LexiconTest { 13 | 14 | /** 15 | * Create the test user 16 | */ 17 | @Before 18 | public void checkTestSubject() { 19 | SampleBuilder.createTestingUser(); 20 | } 21 | 22 | @Test 23 | public void checkLexique() { 24 | String lsIdLexique = null; 25 | String lsIdDomain = null; 26 | String lsIdConcept = null; 27 | String lsIdTerm = null; 28 | 29 | Lexicon loLex = new Lexicon(new QuickConfig()); 30 | 31 | lsIdLexique = loLex.createLexicon("Test lexicon - " + System.currentTimeMillis()); 32 | assertNotEquals("", lsIdLexique); 33 | 34 | // ajouter un domaine 35 | Map loTitles = new HashMap(); 36 | loTitles.put("FR", "titre 1"); 37 | loTitles.put("EN", "title 1"); 38 | lsIdDomain = loLex.createDomain(lsIdLexique, "concept 1", null, loTitles); 39 | assertNotEquals("", lsIdDomain); 40 | 41 | // créer et lier un concept 42 | Map loDescs = new HashMap(); 43 | loDescs.put("FR", "desc 1"); 44 | loDescs.put("EN", "desc 1"); 45 | Map loExamples = new HashMap(); 46 | loExamples.put("FR", "concept 1"); 47 | loExamples.put("EN", "concept 1"); 48 | lsIdConcept = loLex.createConcept(lsIdLexique, "concept", loTitles, loExamples, loDescs); 49 | assertNotEquals("", lsIdConcept); 50 | 51 | loLex.linkDomainConcept(lsIdDomain, lsIdConcept); 52 | 53 | // ajouter des termes concurrents 54 | lsIdTerm = loLex.createTerm(lsIdLexique, "term1", "", "FR", null, null, null); 55 | assertNotEquals("", lsIdTerm); 56 | loLex.linkConceptTerm(lsIdConcept, lsIdTerm); 57 | 58 | // TODO : Détruire le lexique 59 | 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/test/java/ca/crim/nlp/pacte/QuickConfigTest.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertNotNull; 5 | import static org.junit.Assert.assertNull; 6 | 7 | import org.junit.Test; 8 | 9 | import ca.crim.nlp.pacte.QuickConfig.USERTYPE; 10 | 11 | public class QuickConfigTest { 12 | 13 | @Test 14 | public void testEmptyURL() { 15 | QuickConfig loCfg = null; 16 | 17 | try { 18 | loCfg = new QuickConfig(null, "1", "2", false, 1); 19 | } catch (Exception e) { 20 | } 21 | assertNull(loCfg); 22 | 23 | try { 24 | loCfg = new QuickConfig("", "1", "2", false, 1); 25 | } catch (Exception e) { 26 | } 27 | assertNull(loCfg); 28 | } 29 | 30 | @Test 31 | public void testEmptyUser() { 32 | QuickConfig loCfg = null; 33 | try { 34 | loCfg = new QuickConfig("https://", null, "2", false, 1); 35 | } catch (Exception e) { 36 | } 37 | assertNull(loCfg); 38 | 39 | try { 40 | loCfg = new QuickConfig("https://", "", "2", false, 1); 41 | } catch (Exception e) { 42 | } 43 | assertNull(loCfg); 44 | } 45 | 46 | @Test 47 | public void testCredentials() { 48 | QuickConfig loCfg = null; 49 | 50 | loCfg = new QuickConfig("https://", "1", "2", "3", "4", "5", "6", false, 1, ""); 51 | 52 | assertNotNull(loCfg.poCred.get(USERTYPE.CustomUser)); 53 | assertNotNull(loCfg.poCred.get(USERTYPE.PacteAdmin)); 54 | assertNotNull(loCfg.poCred.get(USERTYPE.PSCAdmin)); 55 | 56 | assertEquals("1", loCfg.poCred.get(USERTYPE.PSCAdmin).getUsername()); 57 | assertEquals("2", loCfg.poCred.get(USERTYPE.PSCAdmin).getPassword()); 58 | 59 | assertEquals("3", loCfg.poCred.get(USERTYPE.PacteAdmin).getUsername()); 60 | assertEquals("4", loCfg.poCred.get(USERTYPE.PacteAdmin).getPassword()); 61 | 62 | assertEquals("5", loCfg.poCred.get(USERTYPE.CustomUser).getUsername()); 63 | assertEquals("6", loCfg.poCred.get(USERTYPE.CustomUser).getPassword()); 64 | } 65 | 66 | @Test 67 | public void testDefaultAdminConfig() { 68 | QuickConfig loCfg = new QuickConfig(); 69 | 70 | assertNotNull(loCfg.getToken(loCfg.getUserCredential(USERTYPE.PacteAdmin))); 71 | assertNotNull(loCfg.getToken(loCfg.getUserCredential(USERTYPE.PSCAdmin))); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PacteClient 2 | 3 | (For English, see below.) 4 | 5 | Client Java pour un accès simplifié aux données et fonctionnalités de la [plateforme PACTE](http://pacte.crim.ca). 6 | 7 | ## Utilisation 8 | 9 | Définir l'adresse du serveur et vos accès à l'instance de PACTE utilisé : 10 | 11 | ``` 12 | QuickConfig config = new QuickConfig( "https://patx-pacte.crim.ca", "", "", false, 1); 13 | ``` 14 | 15 | Vous pouvez alors instancier la classe nécessaire pour avoir accès aux fonctions (ici avec la classe `Corpus`) : 16 | 17 | ``` 18 | Corpus corpus = new Corpus(config); 19 | corpus.createCorpus("Nouveau corpus", "fr_fr,en_en"); 20 | ``` 21 | 22 | ## Contributeurs et remerciements 23 | 24 | Ce client été produit par l’équipe [Parole et Texte](http://crim.ca/fr/equipes/parole-et-texte) du CRIM dans le cadre du projet [PACTE](http://pacte.crim.ca). Le projet a bénéficié du soutien financier de CANARIE et du ministère de l’Économie, de la Science et de l’Innovation (MESI) du gouvernement du Québec. 25 | 26 | ## Références 27 | Si vous utilisez la plateforme PACTE pour vos recherches, prière d'utiliser la référence suivante : 28 | 29 | [1] Ménard, P. A. et Barrière, C. "PACTE: a collaborative platform for textual annotation" dans Proceedings of the 12th International Conference on Computational Semantics (IWCS 2017). Montpellier, France, du 19 au 22 septembre 2017. 30 | 31 | --- 32 | 33 | # PacteClient 34 | 35 | Java client for an easy access to data and functionalities of [PACTE platform](http://pacte.crim.ca). 36 | 37 | ## Usage 38 | 39 | Define the server address and your credentials for the PACTE's instance used: 40 | 41 | ``` 42 | QuickConfig config = new QuickConfig( "https://patx-pacte.crim.ca", "", "", false, 1); 43 | ``` 44 | 45 | You can then use the required class to access related functions (here with the `Corpus` class) : 46 | 47 | ``` 48 | Corpus corpus = new Corpus(config); 49 | corpus.createCorpus("New corpus", "fr_fr,en_en"); 50 | ``` 51 | 52 | ## Credits and acknowledgements 53 | 54 | This client has been produced by the [Speech and Text](http://crim.ca/en/teams/speech-and-text) team at CRIM as part of the [Pacte](http://pacte.crim.ca) project. The project was supported by CANARIE and the *ministère de l’Économie, de la Science et de l’Innovation* (MESI) of the Government of Québec. 55 | 56 | ## References 57 | If you use the PACTE platform for your research, kindly use the following reference: 58 | 59 | [1] Ménard, P. A. et Barrière, C. "PACTE: a collaborative platform for textual annotation" in Proceedings of the 12th International Conference on Computational Semantics (IWCS 2017). Montpellier, France, 19 to 22 September 2017 -------------------------------------------------------------------------------- /src/test/java/ca/crim/nlp/pacte/client/AdminTest.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte.client; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertNotNull; 5 | import static org.junit.Assert.assertNull; 6 | import static org.junit.Assert.assertTrue; 7 | 8 | import java.util.UUID; 9 | 10 | import org.junit.Before; 11 | import org.junit.Ignore; 12 | import org.junit.Test; 13 | 14 | import ca.crim.nlp.pacte.Credential; 15 | import ca.crim.nlp.pacte.QuickConfig; 16 | 17 | public class AdminTest { 18 | 19 | @Ignore 20 | // FIXME : when deleting a user is available on the rest api 21 | public void testCreateDeleteUser() { 22 | String lsUsername = "user-" + UUID.randomUUID().toString() + "@test.com"; 23 | String lsPwd = UUID.randomUUID().toString(); 24 | String lsPrenom = "User"; 25 | String lsNom = "Test"; 26 | String lsUserId = null; 27 | 28 | QuickConfig loCfg = new QuickConfig(); 29 | loCfg.setCustomUser(lsUsername, lsPwd); 30 | Admin loAdmin = new Admin(loCfg); 31 | lsUserId = loAdmin.createUser(lsUsername, lsPwd, lsPrenom, lsNom).getUserId(); 32 | 33 | assertNotNull(lsUserId); 34 | assertEquals(36, lsUserId.length()); 35 | assertNotNull(loAdmin.checkUser(lsUsername, lsPwd)); 36 | 37 | loCfg.setCustomUser(lsUsername, lsPwd); 38 | loAdmin.deleteUser(lsUserId); 39 | assertNull(loAdmin.checkUser(lsUsername, lsPwd)); 40 | } 41 | 42 | @Before 43 | public void checkCreateUsers() { 44 | String lsId1 = null; 45 | String lsId2 = null; 46 | String lsUsername1 = "testuser-unlinked1@test.com"; 47 | String lsPwd1 = "secret"; 48 | String lsUsername2 = "testuser-unlinked2@test.com"; 49 | String lsPwd2 = "secret"; 50 | 51 | Admin loAdmin = new Admin(new QuickConfig()); 52 | lsId1 = loAdmin.checkUser(lsUsername1, lsPwd1); 53 | lsId2 = loAdmin.checkUser(lsUsername1, lsPwd1); 54 | 55 | if (lsId1 == null) 56 | loAdmin.createUser(lsUsername1, lsPwd1, "testingUser1", "testingUser1"); 57 | 58 | if (lsId2 == null) 59 | loAdmin.createUser(lsUsername2, lsPwd2, "testingUser2", "testingUser2"); 60 | } 61 | 62 | @Test 63 | public void testLinkUsers() { 64 | Credential loId1 = null; 65 | Credential loId2 = null; 66 | String lsUsername1 = UUID.randomUUID().toString(); 67 | String lsPwd1 = UUID.randomUUID().toString(); 68 | String lsUsername2 = UUID.randomUUID().toString(); 69 | String lsPwd2 = UUID.randomUUID().toString(); 70 | 71 | QuickConfig loCfg = new QuickConfig(); 72 | Admin loAdmin = new Admin(loCfg); 73 | 74 | loId1 = loAdmin.createUser(lsUsername1, lsPwd1, UUID.randomUUID().toString(), UUID.randomUUID().toString()); 75 | loId2 = loAdmin.createUser(lsUsername2, lsPwd2, UUID.randomUUID().toString(), UUID.randomUUID().toString()); 76 | 77 | loCfg.setCustomUser(lsUsername1, lsPwd1); 78 | loAdmin.removeContact(loId2.getUserId()); 79 | assertTrue(loAdmin.addContact(loId2.getUserProfileId())); 80 | assertTrue(loAdmin.removeContact(loId2.getUserProfileId())); 81 | 82 | loAdmin.deleteUser(loId1.getUserId()); 83 | assertNull(loAdmin.checkUser(lsUsername1, lsPwd1)); 84 | 85 | loCfg.setCustomUser(lsUsername2, lsPwd2); 86 | loAdmin.deleteUser(loId2.getUserId()); 87 | assertNull(loAdmin.checkUser(lsUsername2, lsPwd2)); 88 | } 89 | 90 | } -------------------------------------------------------------------------------- /src/main/java/ca/crim/nlp/pacte/client/services/NERService.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte.client.services; 2 | 3 | import java.security.InvalidParameterException; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | import org.json.JSONArray; 9 | import org.json.JSONObject; 10 | 11 | import ca.crim.nlp.pacte.QuickConfig; 12 | 13 | public class NERService implements iServices { 14 | QuickConfig poConfig = null; 15 | String psServiceUrl = null; 16 | String psCorpusId = null; 17 | String psModel = null; 18 | String psDocUrl = null; 19 | Boolean pbDoLinking = false; 20 | String psLinkingMethod = null; 21 | String psReportUrl = null; 22 | String psSchemaUpload = null; 23 | String psAnnotationUploadUrl = null; 24 | List psLabels = null; 25 | Map poParams = new HashMap(); 26 | String psLastUUID = null; 27 | final String SERVICENAME = "pacte_semantic"; 28 | final String TOOLNAME = "ner"; 29 | 30 | public enum LINKING_METHOD { 31 | Cluster, AltNameLength, Population, Graph 32 | }; 33 | 34 | public NERService(QuickConfig toCfg) throws InvalidParameterException { 35 | if (toCfg == null) 36 | throw new InvalidParameterException("QuickConfig parameter is null."); 37 | else { 38 | poConfig = toCfg; 39 | psServiceUrl = poConfig.getServiceUrl(); 40 | } 41 | } 42 | 43 | /** 44 | * 45 | * @return 46 | */ 47 | public boolean setOptions(String tsCorpusId, String tsDocUrl, String tsModelName, boolean tbDoLinking, 48 | LINKING_METHOD tsLinkingMethod, String tsReportUrl, String tsAnnotationUploadUrl, String tsSchemaUploadUrl, 49 | List tsLabels, Map tasCustomParams) { 50 | psCorpusId = tsCorpusId; 51 | psDocUrl = tsDocUrl; 52 | psModel = tsModelName; 53 | pbDoLinking = tbDoLinking; 54 | 55 | switch (tsLinkingMethod) { 56 | case Cluster: 57 | psLinkingMethod = "cluster"; 58 | break; 59 | case Population: 60 | psLinkingMethod = "population"; 61 | break; 62 | case Graph: 63 | psLinkingMethod = "graph"; 64 | break; 65 | case AltNameLength: 66 | psLinkingMethod = "altnamelength"; 67 | break; 68 | } 69 | 70 | psReportUrl = tsReportUrl; 71 | psSchemaUpload = tsSchemaUploadUrl; 72 | psAnnotationUploadUrl = tsAnnotationUploadUrl; 73 | psLabels = tsLabels; 74 | if ((tasCustomParams != null) && (tasCustomParams.size() > 0)) 75 | poParams.putAll(tasCustomParams); 76 | 77 | return true; 78 | } 79 | 80 | String getJSONConfig() { 81 | JSONObject loJ = new JSONObject(); 82 | 83 | loJ.put("annot_out_url", psAnnotationUploadUrl); 84 | loJ.put("corpus_id", psCorpusId); 85 | JSONArray loLabel = new JSONArray(); 86 | for (String lsVal : psLabels) 87 | loLabel.put(lsVal); 88 | loJ.put("labels", loLabel); 89 | loJ.put("linking", pbDoLinking); 90 | loJ.put("linking_method", psLinkingMethod); 91 | loJ.put("model_name", psModel); 92 | loJ.put("report_out_url", psReportUrl); 93 | loJ.put("schema_upload_url", psSchemaUpload); 94 | loJ.put("tool", TOOLNAME); 95 | if (poParams.size() > 0) 96 | for (String lsKey : poParams.keySet()) 97 | loJ.put(lsKey, poParams.get(lsKey)); 98 | 99 | return loJ.toString(); 100 | } 101 | 102 | @Override 103 | public String execute() { 104 | String lsResults = null; 105 | 106 | lsResults = poConfig.postRequest(poConfig.getServiceUrl() + "pacte_semantic/process?doc_url=" + psDocUrl, 107 | getJSONConfig(), null); 108 | JSONObject loR = new JSONObject(lsResults); 109 | 110 | if (loR.has("uuid")) 111 | psLastUUID = loR.getString("uuid"); 112 | 113 | return psLastUUID; 114 | } 115 | 116 | @Override 117 | public String getInfo() { 118 | return null; 119 | } 120 | 121 | @Override 122 | public String checkStatus(String tsUUID) { 123 | String lsResponse = null; 124 | 125 | lsResponse = poConfig.getRequest(poConfig.getServiceUrl() + SERVICENAME + "/status?uuid=" + tsUUID, null, null); 126 | 127 | return lsResponse; 128 | } 129 | 130 | @Override 131 | public String checkStatus() { 132 | return checkStatus(psLastUUID); 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /src/main/java/ca/crim/nlp/pacte/client/Project.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte.client; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.json.JSONArray; 7 | import org.json.JSONObject; 8 | 9 | import ca.crim.nlp.pacte.QuickConfig; 10 | import ca.crim.nlp.pacte.QuickConfig.USERTYPE; 11 | 12 | public class Project { 13 | private QuickConfig poCfg = null; 14 | 15 | public Project(QuickConfig toConfig) { 16 | poCfg = toConfig; 17 | } 18 | 19 | /** 20 | * Get the unique identifier for all your documents of the specified step. 21 | * 22 | * @param tsStepId 23 | * @param tniFrom 24 | * : Starting index 25 | * @param tniTo 26 | * : End index 27 | * @return 28 | */ 29 | public List getStepDocumentIds(String tsStepId, int tniFrom, int tniTo) { 30 | List loDocs = new ArrayList(); 31 | String lsReturn = ""; 32 | 33 | for (int lniCpt = tniFrom; lniCpt <= tniTo; lniCpt++) { 34 | 35 | lsReturn = poCfg.getRequest(poCfg.getPacteBackend() 36 | + "ProjectStepDocumentDistribution/myProjectStepDocumentByIndex/" + tsStepId + "/" + lniCpt, 37 | USERTYPE.CustomUser, null); 38 | 39 | if (lsReturn != null && !lsReturn.isEmpty()) 40 | loDocs.add(new JSONObject(lsReturn).getString("id")); 41 | 42 | } 43 | 44 | return loDocs; 45 | } 46 | 47 | /** 48 | * Retrieve a participant id from a step 49 | * 50 | * @param tsStepId 51 | * @param tsFirstName 52 | * @param tsLastName 53 | * @return 54 | */ 55 | public String getStepParticipantId(String tsStepId, String tsFirstName, String tsLastName) { 56 | String lsReturn = ""; 57 | 58 | lsReturn = poCfg.getRequest(poCfg.getPacteBackend() + "/ProjectSteps/projectStep/" + tsStepId, 59 | USERTYPE.CustomUser, null); 60 | 61 | if (lsReturn != null && !lsReturn.isEmpty()) { 62 | JSONArray loParts = new JSONObject(lsReturn).getJSONArray("participants"); 63 | 64 | for (int lniCpt = 0; lniCpt < loParts.length(); lniCpt++) { 65 | JSONObject loP = loParts.getJSONObject(lniCpt); 66 | if (loP.getString("firstname").equalsIgnoreCase(tsFirstName) 67 | && loP.getString("lastname").equalsIgnoreCase(tsLastName)) 68 | return loP.getString("id"); 69 | } 70 | } 71 | 72 | return null; 73 | } 74 | 75 | /** 76 | * Retrieve a project unique identifier by name. 77 | * 78 | * @param tsProjectName 79 | * @return 80 | */ 81 | public String getProjectId(String tsProjectName) { 82 | String lsId = null; 83 | String lsReturn = null; 84 | 85 | lsReturn = poCfg.getRequest(poCfg.getPacteBackend() + "/Projects/projects", USERTYPE.CustomUser, null); 86 | 87 | if (lsReturn != null && !lsReturn.isEmpty()) { 88 | int lniPos = lsReturn.toLowerCase().indexOf("\"title\":\"" + tsProjectName.toLowerCase() + "\""); 89 | if (lniPos >= 0) { 90 | lniPos = lsReturn.substring(0, lniPos).lastIndexOf("\"id\":\"") + 6; 91 | lsId = lsReturn.substring(lniPos, lniPos + 36); 92 | // System.out.println("Project " + tsProjectName + " (" + lsId + 93 | // ") a été trouvé!"); 94 | return lsId; 95 | } 96 | } 97 | 98 | return null; 99 | } 100 | 101 | /** 102 | * Retrieve a step unique identifier by name from a project. 103 | * 104 | * @param tsProjectName 105 | * @return 106 | */ 107 | public String getStepId(String tsProjectId, String lsStepName) { 108 | String lsId = null; 109 | String lsReturn = null; 110 | 111 | lsReturn = poCfg.getRequest(poCfg.getPacteBackend() + "ProjectSteps/projectSteps/" + tsProjectId, 112 | USERTYPE.CustomUser, null); 113 | 114 | if (lsReturn != null && !lsReturn.isEmpty()) { 115 | int lniPos = lsReturn.toLowerCase().indexOf("\"title\":\"" + lsStepName.toLowerCase() + "\""); 116 | if (lniPos >= 0) { 117 | lniPos = lsReturn.substring(0, lniPos).lastIndexOf("\"id\":\"") + 6; 118 | lsId = lsReturn.substring(lniPos, lniPos + 36); 119 | // System.out.println("Project " + tsProjectName + " (" + lsId + 120 | // ") a été trouvé!"); 121 | return lsId; 122 | } 123 | } 124 | 125 | return null; 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/main/resources/ca/crim/nlp/pacte/client/DOCUMENT_META.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "targetType": "document", 4 | "schemaType": "DOCUMENT_META", 5 | "title": "DOCUMENT_META", 6 | "type": "object", 7 | "required": [ 8 | "schemaType", 9 | "_corpusID", 10 | "_documentID" 11 | ], 12 | "properties": { 13 | "schemaType": { 14 | "type": "string", 15 | "description": "Constant: 'DOCUMENT_META'", 16 | "searchable": true, 17 | "searchModes": ["noop"], 18 | "default": "DOCUMENT_META", 19 | "locked": true 20 | }, 21 | "_documentID": { 22 | "type": "string", 23 | "description": "Internal document GUID", 24 | "searchable": true, 25 | "searchModes": ["noop"], 26 | "locked": true 27 | }, 28 | "_corpusID": { 29 | "type": "string", 30 | "description": "Internal Corpus GUID", 31 | "searchable": true, 32 | "searchModes": ["noop"], 33 | "locked": true 34 | }, 35 | "indexedLanguage": { 36 | "type": "string", 37 | "description": "primary language used in document (Elastic Search language name)", 38 | "searchable": true, 39 | "searchModes": ["noop"], 40 | "locked": false 41 | }, 42 | "detectedLanguage": { 43 | "type": "string", 44 | "description": "detected language of document", 45 | "searchable": true, 46 | "searchModes": ["noop"], 47 | "locked": false 48 | }, 49 | "detectedLanguageProb": { 50 | "type": "number", 51 | "minimum": 0, 52 | "maximum": 100, 53 | "description": "detected language probability of document", 54 | "searchable": true, 55 | "searchModes": ["noop"], 56 | "locked": false 57 | }, 58 | "file_name": { 59 | "type": "string", 60 | "description": "Name of the orignal document file (with extension)", 61 | "searchable": true, 62 | "searchModes": ["noop"], 63 | "locked": true 64 | }, 65 | "file_path": { 66 | "type": "string", 67 | "description": "Path of the document file within the source archive or directory", 68 | "searchable": true, 69 | "searchModes": ["path"], 70 | "locked": true 71 | }, 72 | "file_encoding": { 73 | "type": "string", 74 | "description": "Encoding of the source file (optional)", 75 | "searchable": true, 76 | "searchModes": ["basic"], 77 | "locked": true 78 | }, 79 | "file_type": { 80 | "type": "string", 81 | "description": "Source file MIME type (ex: text/plain; charset=UTF-8)", 82 | "searchable": true, 83 | "searchModes": ["basic"], 84 | "locked": true 85 | }, 86 | "file_creation_date": { 87 | "type": "string", 88 | "description": "Creation date of the document file", 89 | "searchable": true, 90 | "searchModes": ["basic"], 91 | "locked": true 92 | }, 93 | "file_edit_date": { 94 | "type": "string", 95 | "description": "Edit date of the document file", 96 | "searchable": true, 97 | "searchModes": ["basic"], 98 | "locked": true 99 | }, 100 | "document_size": { 101 | "type": "number", 102 | "minimum": 0, 103 | "description": "Size of the extracted text of the document file", 104 | "searchable": true, 105 | "searchModes": ["noop"], 106 | "locked": true 107 | }, 108 | "file_size": { 109 | "type": "number", 110 | "minimum": 0, 111 | "description": "Size of the document file", 112 | "searchable": true, 113 | "searchModes": ["noop"], 114 | "locked": true 115 | }, 116 | "file_extension": { 117 | "type": "string", 118 | "description": "Extension of the document file", 119 | "searchable": true, 120 | "searchModes": ["noop"], 121 | "locked": true 122 | }, 123 | "source": { 124 | "type": "string", 125 | "description": "Name of source archive", 126 | "searchable": true, 127 | "searchModes": ["basic"], 128 | "locked": true 129 | } 130 | } 131 | } -------------------------------------------------------------------------------- /src/test/resources/ca/crim/nlp/pacte/client/DOCUMENT_META.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "targetType": "document", 4 | "schemaType": "DOCUMENT_META", 5 | "title": "DOCUMENT_META", 6 | "type": "object", 7 | "required": [ 8 | "schemaType", 9 | "_corpusID", 10 | "_documentID" 11 | ], 12 | "properties": { 13 | "schemaType": { 14 | "type": "string", 15 | "description": "Constant: 'DOCUMENT_META'", 16 | "searchable": true, 17 | "searchModes": ["noop"], 18 | "default": "DOCUMENT_META", 19 | "locked": true 20 | }, 21 | "_documentID": { 22 | "type": "string", 23 | "description": "Internal document GUID", 24 | "searchable": true, 25 | "searchModes": ["noop"], 26 | "locked": true 27 | }, 28 | "_corpusID": { 29 | "type": "string", 30 | "description": "Internal Corpus GUID", 31 | "searchable": true, 32 | "searchModes": ["noop"], 33 | "locked": true 34 | }, 35 | "indexedLanguage": { 36 | "type": "string", 37 | "description": "primary language used in document (Elastic Search language name)", 38 | "searchable": true, 39 | "searchModes": ["noop"], 40 | "locked": false 41 | }, 42 | "detectedLanguage": { 43 | "type": "string", 44 | "description": "detected language of document", 45 | "searchable": true, 46 | "searchModes": ["noop"], 47 | "locked": false 48 | }, 49 | "detectedLanguageProb": { 50 | "type": "number", 51 | "minimum": 0, 52 | "maximum": 100, 53 | "description": "detected language probability of document", 54 | "searchable": true, 55 | "searchModes": ["noop"], 56 | "locked": false 57 | }, 58 | "file_name": { 59 | "type": "string", 60 | "description": "Name of the orignal document file (with extension)", 61 | "searchable": true, 62 | "searchModes": ["noop"], 63 | "locked": true 64 | }, 65 | "file_path": { 66 | "type": "string", 67 | "description": "Path of the document file within the source archive or directory", 68 | "searchable": true, 69 | "searchModes": ["path"], 70 | "locked": true 71 | }, 72 | "file_encoding": { 73 | "type": "string", 74 | "description": "Encoding of the source file (optional)", 75 | "searchable": true, 76 | "searchModes": ["basic"], 77 | "locked": true 78 | }, 79 | "file_type": { 80 | "type": "string", 81 | "description": "Source file MIME type (ex: text/plain; charset=UTF-8)", 82 | "searchable": true, 83 | "searchModes": ["basic"], 84 | "locked": true 85 | }, 86 | "file_creation_date": { 87 | "type": "string", 88 | "description": "Creation date of the document file", 89 | "searchable": true, 90 | "searchModes": ["basic"], 91 | "locked": true 92 | }, 93 | "file_edit_date": { 94 | "type": "string", 95 | "description": "Edit date of the document file", 96 | "searchable": true, 97 | "searchModes": ["basic"], 98 | "locked": true 99 | }, 100 | "document_size": { 101 | "type": "number", 102 | "minimum": 0, 103 | "description": "Size of the extracted text of the document file", 104 | "searchable": true, 105 | "searchModes": ["noop"], 106 | "locked": true 107 | }, 108 | "file_size": { 109 | "type": "number", 110 | "minimum": 0, 111 | "description": "Size of the document file", 112 | "searchable": true, 113 | "searchModes": ["noop"], 114 | "locked": true 115 | }, 116 | "file_extension": { 117 | "type": "string", 118 | "description": "Extension of the document file", 119 | "searchable": true, 120 | "searchModes": ["noop"], 121 | "locked": true 122 | }, 123 | "source": { 124 | "type": "string", 125 | "description": "Name of source archive", 126 | "searchable": true, 127 | "searchModes": ["basic"], 128 | "locked": true 129 | } 130 | } 131 | } -------------------------------------------------------------------------------- /src/main/resources/ca/crim/nlp/pacte/client/Brown_en.tagset: -------------------------------------------------------------------------------- 1 | { 2 | "title": "Brown corpus - POS", 3 | "description" : "Brown corpus part--of-speech tag", 4 | "reference" : "http://clu.uni.no/icame/manuals/", 5 | "tagset": [{"tag": ".","description": "sentence (. ; ? *)"}, 6 | {"tag": "(","description": "left paren"}, 7 | {"tag": ")","description": "right paren"}, 8 | {"tag": "_ast_","description": "not, n't"}, 9 | {"tag": "--","description": "dash"}, 10 | {"tag": ",","description": "comma"}, 11 | {"tag": "_colon_","description": "colon"}, 12 | {"tag": "ABL","description": "pre-qualifier (quite, rather)"}, 13 | {"tag": "ABN","description": "pre-quantifier (half, all)"}, 14 | {"tag": "ABX","description": "pre-quantifier (both)"}, 15 | {"tag": "AP","description": "post-determiner (many, several, next)"}, 16 | {"tag": "AT","description": "article (a, the, no)"}, 17 | {"tag": "BE","description": "be"}, 18 | {"tag": "BED","description": "were"}, 19 | {"tag": "BEDZ","description": "was"}, 20 | {"tag": "BEG","description": "being"}, 21 | {"tag": "BEM","description": "am"}, 22 | {"tag": "BEN","description": "been"}, 23 | {"tag": "BER","description": "are, art"}, 24 | {"tag": "BEZ","description": "is"}, 25 | {"tag": "CC","description": "coordinating conjunction (and, or)"}, 26 | {"tag": "CD","description": "cardinal numeral (one, two, 2, etc.)"}, 27 | {"tag": "CS","description": "subordinating conjunction (if, although)"}, 28 | {"tag": "DO","description": "do"}, 29 | {"tag": "DOD","description": "did"}, 30 | {"tag": "DOZ","description": "does"}, 31 | {"tag": "DT","description": "singular determiner/quantifier (this, that)"}, 32 | {"tag": "DTI","description": "singular or plural determiner/quantifier (some, any)"}, 33 | {"tag": "DTS","description": "plural determiner (these, those)"}, 34 | {"tag": "DTX","description": "determiner/double conjunction (either)"}, 35 | {"tag": "EX","description": "existential there"}, 36 | {"tag": "FW","description": "foreign word (hyphenated before regular tag)"}, 37 | {"tag": "HV","description": "have"}, 38 | {"tag": "HVD","description": "had (past tense)"}, 39 | {"tag": "HVG","description": "having"}, 40 | {"tag": "HVN","description": "had (past participle)"}, 41 | {"tag": "IN","description": "preposition"}, 42 | {"tag": "JJ","description": "adjective"}, 43 | {"tag": "JJR","description": "comparative adjective"}, 44 | {"tag": "JJS","description": "semantically superlative adjective (chief, top)"}, 45 | {"tag": "JJT","description": "morphologically superlative adjective (biggest)"}, 46 | {"tag": "MD","description": "modal auxiliary (can, should, will)"}, 47 | {"tag": "NC","description": "cited word (hyphenated after regular tag)"}, 48 | {"tag": "NN","description": "singular or mass noun"}, 49 | {"tag": "NN_","description": "possessive singular noun"}, 50 | {"tag": "NNS","description": "plural noun"}, 51 | {"tag": "NNS_","description": "possessive plural noun"}, 52 | {"tag": "NP","description": "proper noun or part of name phrase"}, 53 | {"tag": "NP_","description": "possessive proper noun"}, 54 | {"tag": "NPS","description": "plural proper noun"}, 55 | {"tag": "NPS_","description": "possessive plural proper noun"}, 56 | {"tag": "NR","description": "adverbial noun (home, today, west)"}, 57 | {"tag": "OD","description": "ordinal numeral (first, 2nd)"}, 58 | {"tag": "PN","description": "nominal pronoun (everybody, nothing)"}, 59 | {"tag": "PN_","description": "possessive nominal pronoun"}, 60 | {"tag": "PP_","description": "possessive personal pronoun (my, our)"}, 61 | {"tag": "PP__","description": "second (nominal) possessive pronoun (mine, ours)"}, 62 | {"tag": "PPL","description": "singular reflexive/intensive personal pronoun (myself)"}, 63 | {"tag": "PPLS","description": "plural reflexive/intensive personal pronoun (ourselves)"}, 64 | {"tag": "PPO","description": "objective personal pronoun (me, him, it, them)"}, 65 | {"tag": "PPS","description": "3rd. singular nominative pronoun (he, she, it, one)"}, 66 | {"tag": "PPSS","description": "other nominative personal pronoun (I, we, they, you)"}, 67 | {"tag": "PRP","description": "Personal pronoun"}, 68 | {"tag": "PRP_","description": "Possessive pronoun"}, 69 | {"tag": "QL","description": "qualifier (very, fairly)"}, 70 | {"tag": "QLP","description": "post-qualifier (enough, indeed)"}, 71 | {"tag": "RB","description": "adverb"}, 72 | {"tag": "RBR","description": "comparative adverb"}, 73 | {"tag": "RBT","description": "superlative adverb"}, 74 | {"tag": "RN","description": "nominal adverb (here, then, indoors)"}, 75 | {"tag": "RP","description": "adverb/particle (about, off, up)"}, 76 | {"tag": "TO","description": "infinitive marker to"}, 77 | {"tag": "UH","description": "interjection, exclamation"}, 78 | {"tag": "VB","description": "verb, base form"}, 79 | {"tag": "VBD","description": "verb, past tense"}, 80 | {"tag": "VBG","description": "verb, present participle/gerund"}, 81 | {"tag": "VBN","description": "verb, past participle"}, 82 | {"tag": "VBP","description": "verb, non 3rd person, singular, present"}, 83 | {"tag": "VBZ","description": "verb, 3rd. singular present"}, 84 | {"tag": "WDT","description": "wh- determiner (what, which)"}, 85 | {"tag": "WP_","description": "possessive wh- pronoun (whose)"}, 86 | {"tag": "WPO","description": "objective wh- pronoun (whom, which, that)"}, 87 | {"tag": "WPS","description": "nominative wh- pronoun (who, which, that)"}, 88 | {"tag": "WQL","description": "wh- qualifier (how)"}, 89 | {"tag": "WRB","description": "wh- adverb (how, where, when)"}] 90 | } -------------------------------------------------------------------------------- /src/test/resources/ca/crim/nlp/pacte/client/Brown_en.tagset: -------------------------------------------------------------------------------- 1 | { 2 | "title": "Brown corpus - POS", 3 | "description" : "Brown corpus part--of-speech tag", 4 | "reference" : "http://clu.uni.no/icame/manuals/", 5 | "tagset": [{"tag": ".","description": "sentence (. ; ? *)"}, 6 | {"tag": "(","description": "left paren"}, 7 | {"tag": ")","description": "right paren"}, 8 | {"tag": "_ast_","description": "not, n't"}, 9 | {"tag": "--","description": "dash"}, 10 | {"tag": ",","description": "comma"}, 11 | {"tag": "_colon_","description": "colon"}, 12 | {"tag": "ABL","description": "pre-qualifier (quite, rather)"}, 13 | {"tag": "ABN","description": "pre-quantifier (half, all)"}, 14 | {"tag": "ABX","description": "pre-quantifier (both)"}, 15 | {"tag": "AP","description": "post-determiner (many, several, next)"}, 16 | {"tag": "AT","description": "article (a, the, no)"}, 17 | {"tag": "BE","description": "be"}, 18 | {"tag": "BED","description": "were"}, 19 | {"tag": "BEDZ","description": "was"}, 20 | {"tag": "BEG","description": "being"}, 21 | {"tag": "BEM","description": "am"}, 22 | {"tag": "BEN","description": "been"}, 23 | {"tag": "BER","description": "are, art"}, 24 | {"tag": "BEZ","description": "is"}, 25 | {"tag": "CC","description": "coordinating conjunction (and, or)"}, 26 | {"tag": "CD","description": "cardinal numeral (one, two, 2, etc.)"}, 27 | {"tag": "CS","description": "subordinating conjunction (if, although)"}, 28 | {"tag": "DO","description": "do"}, 29 | {"tag": "DOD","description": "did"}, 30 | {"tag": "DOZ","description": "does"}, 31 | {"tag": "DT","description": "singular determiner/quantifier (this, that)"}, 32 | {"tag": "DTI","description": "singular or plural determiner/quantifier (some, any)"}, 33 | {"tag": "DTS","description": "plural determiner (these, those)"}, 34 | {"tag": "DTX","description": "determiner/double conjunction (either)"}, 35 | {"tag": "EX","description": "existential there"}, 36 | {"tag": "FW","description": "foreign word (hyphenated before regular tag)"}, 37 | {"tag": "HV","description": "have"}, 38 | {"tag": "HVD","description": "had (past tense)"}, 39 | {"tag": "HVG","description": "having"}, 40 | {"tag": "HVN","description": "had (past participle)"}, 41 | {"tag": "IN","description": "preposition"}, 42 | {"tag": "JJ","description": "adjective"}, 43 | {"tag": "JJR","description": "comparative adjective"}, 44 | {"tag": "JJS","description": "semantically superlative adjective (chief, top)"}, 45 | {"tag": "JJT","description": "morphologically superlative adjective (biggest)"}, 46 | {"tag": "MD","description": "modal auxiliary (can, should, will)"}, 47 | {"tag": "NC","description": "cited word (hyphenated after regular tag)"}, 48 | {"tag": "NN","description": "singular or mass noun"}, 49 | {"tag": "NN_","description": "possessive singular noun"}, 50 | {"tag": "NNS","description": "plural noun"}, 51 | {"tag": "NNS_","description": "possessive plural noun"}, 52 | {"tag": "NP","description": "proper noun or part of name phrase"}, 53 | {"tag": "NP_","description": "possessive proper noun"}, 54 | {"tag": "NPS","description": "plural proper noun"}, 55 | {"tag": "NPS_","description": "possessive plural proper noun"}, 56 | {"tag": "NR","description": "adverbial noun (home, today, west)"}, 57 | {"tag": "OD","description": "ordinal numeral (first, 2nd)"}, 58 | {"tag": "PN","description": "nominal pronoun (everybody, nothing)"}, 59 | {"tag": "PN_","description": "possessive nominal pronoun"}, 60 | {"tag": "PP_","description": "possessive personal pronoun (my, our)"}, 61 | {"tag": "PP__","description": "second (nominal) possessive pronoun (mine, ours)"}, 62 | {"tag": "PPL","description": "singular reflexive/intensive personal pronoun (myself)"}, 63 | {"tag": "PPLS","description": "plural reflexive/intensive personal pronoun (ourselves)"}, 64 | {"tag": "PPO","description": "objective personal pronoun (me, him, it, them)"}, 65 | {"tag": "PPS","description": "3rd. singular nominative pronoun (he, she, it, one)"}, 66 | {"tag": "PPSS","description": "other nominative personal pronoun (I, we, they, you)"}, 67 | {"tag": "PRP","description": "Personal pronoun"}, 68 | {"tag": "PRP_","description": "Possessive pronoun"}, 69 | {"tag": "QL","description": "qualifier (very, fairly)"}, 70 | {"tag": "QLP","description": "post-qualifier (enough, indeed)"}, 71 | {"tag": "RB","description": "adverb"}, 72 | {"tag": "RBR","description": "comparative adverb"}, 73 | {"tag": "RBT","description": "superlative adverb"}, 74 | {"tag": "RN","description": "nominal adverb (here, then, indoors)"}, 75 | {"tag": "RP","description": "adverb/particle (about, off, up)"}, 76 | {"tag": "TO","description": "infinitive marker to"}, 77 | {"tag": "UH","description": "interjection, exclamation"}, 78 | {"tag": "VB","description": "verb, base form"}, 79 | {"tag": "VBD","description": "verb, past tense"}, 80 | {"tag": "VBG","description": "verb, present participle/gerund"}, 81 | {"tag": "VBN","description": "verb, past participle"}, 82 | {"tag": "VBP","description": "verb, non 3rd person, singular, present"}, 83 | {"tag": "VBZ","description": "verb, 3rd. singular present"}, 84 | {"tag": "WDT","description": "wh- determiner (what, which)"}, 85 | {"tag": "WP_","description": "possessive wh- pronoun (whose)"}, 86 | {"tag": "WPO","description": "objective wh- pronoun (whom, which, that)"}, 87 | {"tag": "WPS","description": "nominative wh- pronoun (who, which, that)"}, 88 | {"tag": "WQL","description": "wh- qualifier (how)"}, 89 | {"tag": "WRB","description": "wh- adverb (how, where, when)"}] 90 | } -------------------------------------------------------------------------------- /src/main/java/ca/crim/nlp/pacte/client/Admin.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte.client; 2 | 3 | import org.json.JSONObject; 4 | 5 | import ca.crim.nlp.pacte.QuickConfig; 6 | import ca.crim.nlp.pacte.Credential; 7 | import ca.crim.nlp.pacte.QuickConfig.USERTYPE; 8 | 9 | public class Admin { 10 | private QuickConfig poCfg = null; 11 | 12 | public Admin(QuickConfig toConfig) { 13 | poCfg = toConfig; 14 | } 15 | 16 | public String listAllUsers() { 17 | String lsReturn = null; 18 | 19 | lsReturn = poCfg.getRequest(poCfg.getAuthenUrl() + "psc-users-permissions-management/Users/users", 20 | USERTYPE.PSCAdmin, null); 21 | 22 | return lsReturn; 23 | } 24 | 25 | /** 26 | * Reset the password of a user 27 | * 28 | * @param tsUsername 29 | * @param tsOldPassword 30 | * @param tsNewPassword 31 | * @return 32 | */ 33 | public boolean resetPassword(String tsUsername, String tsOldPassword, String tsNewPassword) { 34 | String lsReturn = null; 35 | 36 | poCfg.setCustomUser(tsUsername, tsOldPassword); 37 | 38 | lsReturn = poCfg.putRequest(poCfg.getAuthenUrl() + "/psc-users-permissions-management/Users/myPassword", 39 | "{\"password\":\"" + tsNewPassword + "\"}", USERTYPE.CustomUser); 40 | 41 | if (lsReturn != null && lsReturn != "") { 42 | if (lsReturn.contains("\"id\":")) { 43 | System.out.println(lsReturn); 44 | } 45 | } 46 | 47 | return false; 48 | } 49 | 50 | /** 51 | * 52 | * @param tsUsername 53 | * @param tsPassword 54 | * @param tsPrenom 55 | * @param tsNom 56 | * @return 57 | */ 58 | public Credential createUser(String tsUsername, String tsPassword, String tsPrenom, String tsNom) { 59 | String lsReturn = ""; 60 | Credential loCred = null; 61 | 62 | // Ajouter un nouvel utilisateur 63 | lsReturn = poCfg 64 | .postRequest(poCfg.getPacteBackend() + "PlatformUsers/platformUser", 65 | "{\"password\": \"" + tsPassword + "\",\"firstName\":\"" + tsPrenom + "\",\"lastName\":\"" 66 | + tsNom + "\",\"email\":\"" + tsUsername + "\", \"jwtAudience\": [\"Pacte\"]}", 67 | USERTYPE.PacteAdmin); 68 | 69 | if (lsReturn != null && !lsReturn.isEmpty() && lsReturn.toLowerCase().contains("userprofileid")) { 70 | if (poCfg.getVerbose()) { 71 | System.out.println("Utilisateur " + tsUsername + " a été créé!"); 72 | System.out.println(lsReturn); 73 | } 74 | JSONObject loObj = new JSONObject(lsReturn); 75 | loCred = new Credential( loObj.getString("userId"), loObj.getString("userProfileId"), tsUsername, tsPassword); 76 | 77 | } else if (poCfg.getVerbose()) { 78 | if (lsReturn.toLowerCase().contains("conflict")) 79 | System.err.println("Utilisateur " + tsUsername + " existant! (possiblement avec d'autres accès)"); 80 | 81 | else if (lsReturn.toLowerCase().contains("Unauthorized")) 82 | System.out.println("Accès administrateur invalides!"); 83 | } 84 | 85 | return loCred; 86 | } 87 | 88 | /** 89 | * Delete the configured custom account 90 | * 91 | * @param tsUserID 92 | * @return 93 | */ 94 | public boolean deleteUser(String tsUserId) { 95 | String lsUsername = null; 96 | 97 | // Delete the user 98 | poCfg.deleteRequest(poCfg.getPSCUserBackend() + "Users/user/" + tsUserId, USERTYPE.PSCAdmin, null); 99 | 100 | lsUsername = poCfg.getRequest(poCfg.getPSCUserBackend() + "Users/user/" + tsUserId, USERTYPE.PSCAdmin, null); 101 | if (new JSONObject(lsUsername).has("username")) 102 | lsUsername = new JSONObject(lsUsername).getString("username"); 103 | else if (lsUsername.toLowerCase().indexOf("not found") >= 0) 104 | return true; 105 | 106 | return (lsUsername == null); 107 | } 108 | 109 | /** 110 | * Verify if a user exists 111 | * 112 | * @param tsUsername 113 | * @param tsPassword 114 | * @return Unique ID of user, Null is non-existant 115 | */ 116 | public String checkUser(String tsUsername, String tsPassword) { 117 | String lsReturn = ""; 118 | 119 | // Se logger et obtenir un token 120 | poCfg.setCustomUser(tsUsername, tsPassword); 121 | 122 | lsReturn = poCfg.getRequest(poCfg.getPacteBackend() + "PlatformUsers/myPlatformUserContacts", 123 | USERTYPE.CustomUser, null); 124 | 125 | if (lsReturn != null && !lsReturn.isEmpty() && !lsReturn.contains("Forbidden") 126 | && !lsReturn.contains("Unauthorized")) { 127 | JSONObject loJson = new JSONObject(lsReturn); 128 | if (poCfg.getVerbose()) 129 | System.out.println("Utilisateur " + loJson.getJSONObject("user").getString("userProfileId") + " existant."); 130 | return loJson.getJSONObject("user").getString("userProfileId"); 131 | } else 132 | return null; 133 | 134 | } 135 | 136 | /** 137 | * 138 | * @param tsUserID1 139 | * @param tsUserID2 140 | * @return 141 | */ 142 | public boolean addContact(String tsUserID) { 143 | String lsReturn = null; 144 | 145 | lsReturn = poCfg.postRequest(poCfg.getPacteBackend() + "PlatformUsers/myPlatformUserContact", 146 | "{\"contactUserProfileId\": \"" + tsUserID + "\"}", USERTYPE.CustomUser); 147 | 148 | if (lsReturn.contains("{\"contactStatus\":\"")) 149 | return true; 150 | 151 | return false; 152 | } 153 | 154 | public boolean removeContact(String tsUserID) { 155 | String lsReturn = null; 156 | 157 | if ((tsUserID == null) || tsUserID.isEmpty()) 158 | return false; 159 | 160 | lsReturn = poCfg.deleteRequest(poCfg.getPacteBackend() + "PlatformUsers/myPlatformUserContact/" + tsUserID, 161 | USERTYPE.CustomUser, null); 162 | 163 | if (!lsReturn.contains("\"Unauthorized\"")) 164 | return true; 165 | 166 | return false; 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /src/test/java/ca/crim/nlp/pacte/client/SampleBuilder.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte.client; 2 | 3 | import org.json.JSONObject; 4 | 5 | import java.io.IOException; 6 | import java.net.URISyntaxException; 7 | import java.nio.charset.Charset; 8 | import java.nio.file.Files; 9 | import java.nio.file.Paths; 10 | import java.time.*; 11 | import java.time.format.DateTimeFormatter; 12 | 13 | import ca.crim.nlp.pacte.QuickConfig; 14 | import ca.crim.nlp.pacte.QuickConfig.USERTYPE; 15 | import ca.crim.nlp.pacte.UnitTestConstants; 16 | 17 | public class SampleBuilder { 18 | public static final Integer SmallCorpusSize = 2; 19 | 20 | /** 21 | * Create a small test corpus if it does not already exists. 22 | * 23 | * @param toCorpus 24 | * A preconfigured corpus instance 25 | * @return The new or existing corpus id 26 | * @throws URISyntaxException 27 | * @throws IOException 28 | */ 29 | public static String smallCorpus(Corpus toCorpus) { 30 | String lsCorpusId = null; 31 | String lsTranscodeGroup = null; 32 | String lsTrancodeSchema = null; 33 | String lsDocId = null; 34 | String lsAnnotationId = null; 35 | int lniCptFail = 0; 36 | lsCorpusId = toCorpus.getCorpusId(UnitTestConstants.TESTCORPUS); 37 | String lsCurrentTime = LocalDateTime.now().format(DateTimeFormatter.ISO_DATE_TIME); 38 | 39 | // Check for corpus integrity before returning existing id 40 | if (lsCorpusId != null) 41 | if (toCorpus.getSize(lsCorpusId) == SmallCorpusSize) 42 | return lsCorpusId; 43 | else { 44 | System.err.println("Deleting corrupted small sample corpus..."); 45 | toCorpus.deleteCorpus(lsCorpusId); 46 | } 47 | 48 | // Create the new corpus 49 | lsCorpusId = toCorpus.createCorpus(UnitTestConstants.TESTCORPUS, "fr-fr,en-en"); 50 | 51 | if (lsCorpusId != null) { 52 | lsTranscodeGroup = toCorpus.getGroupId(UnitTestConstants.TRANSCODEGROUP, lsCorpusId); 53 | while (lsTranscodeGroup == null && lniCptFail < 10) { 54 | pleaseWait(); 55 | lsTranscodeGroup = toCorpus.getGroupId(UnitTestConstants.TRANSCODEGROUP, lsCorpusId); 56 | lniCptFail++; 57 | } 58 | 59 | if (lsTranscodeGroup == null) { 60 | System.err.println("Cannot find transcoder group id"); 61 | return null; 62 | } 63 | 64 | // Register schemas 65 | try { 66 | lsTrancodeSchema = new String( 67 | Files.readAllBytes(Paths.get( 68 | ClassLoader.class.getResource("/ca/crim/nlp/pacte/client/" + Corpus.DOCMETA).toURI())), 69 | Charset.forName("UTF-8")); 70 | } catch (IOException | URISyntaxException e) { 71 | e.printStackTrace(); 72 | return null; 73 | } 74 | 75 | String lsSchemaId = toCorpus.getSchemaId(new JSONObject(lsTrancodeSchema).getString("schemaType"), "", ""); 76 | if (lsSchemaId == null) 77 | lsSchemaId = toCorpus.registerSchema(lsTrancodeSchema); 78 | toCorpus.copySchemaToGroup(lsSchemaId, lsCorpusId, lsTranscodeGroup); 79 | 80 | // Documents and their metadata 81 | lsDocId = toCorpus.addDocument(lsCorpusId, "bla bla bla", "testExport1", "yep1", "fr-fr"); 82 | pleaseWait(); 83 | lsAnnotationId = toCorpus.addAnnotation(lsCorpusId, lsTranscodeGroup, 84 | "{\"document_size\":11,\"source\":\"tamere.zip\",\"file_edit_date\":\"" + lsCurrentTime 85 | + "\",\"detectedLanguageProb\":99.99972436012376," 86 | + "\"file_type\":\"text/plain; charset=UTF-8\"," + "\"_documentID\":\"" + lsDocId + "\"," 87 | + "\"file_path\":\"/\",\"indexedLanguage\":\"fr-fr\",\"schemaType\":\"DOCUMENT_META\"," 88 | + "\"file_name\":\"1.txt\",\"file_encoding\":\"UTF-8\",\"_corpusID\":\"" + lsCorpusId 89 | + "\",\"detectedLanguage\":\"fr-fr\"," + "\"file_size\":12,\"file_creation_date\":\"" 90 | + lsCurrentTime + "\",\"file_extension\":\".txt\"}"); 91 | if (lsAnnotationId == null) 92 | System.err.println("Empty annotation 1 "); 93 | 94 | lsDocId = toCorpus.addDocument(lsCorpusId, "bli bli bli bli", "testExport2", "yep2", "fr-fr"); 95 | pleaseWait(); 96 | lsAnnotationId = toCorpus.addAnnotation(lsCorpusId, lsTranscodeGroup, 97 | "{\"document_size\":15,\"source\":\"tamere.zip\",\"file_edit_date\":\"" + lsCurrentTime 98 | + "\",\"detectedLanguageProb\":99.99972436012376," 99 | + "\"file_type\":\"text/plain; charset=UTF-8\"," + "\"_documentID\":\"" + lsDocId + "\"," 100 | + "\"file_path\":\"/\",\"indexedLanguage\":\"fr-fr\",\"schemaType\":\"DOCUMENT_META\"," 101 | + "\"file_name\":\"2.txt\",\"file_encoding\":\"UTF-8\",\"_corpusID\":\"" + lsCorpusId 102 | + "\",\"detectedLanguage\":\"fr-fr\"," + "\"file_size\":16,\"file_creation_date\":\"" 103 | + lsCurrentTime + "\",\"file_extension\":\".txt\"}"); 104 | if (lsAnnotationId == null) 105 | System.err.println("Empty annotation 2 "); 106 | 107 | // Groups 108 | toCorpus.createBucket(lsCorpusId, "group1"); 109 | toCorpus.createBucket(lsCorpusId, "group2"); 110 | toCorpus.createBucket(lsCorpusId, "group3"); 111 | 112 | // TODO Ajouter des schémas + annotations 113 | } 114 | 115 | return lsCorpusId; 116 | } 117 | 118 | /** 119 | * Create the testing user on the PACTE platform defined in the 120 | * configuration file 121 | * 122 | * @return True if the user exists 123 | */ 124 | public static boolean createTestingUser() { 125 | QuickConfig loCfg = new QuickConfig(); 126 | Admin loAdmin = new Admin(loCfg); 127 | ca.crim.nlp.pacte.Credential loUser = loCfg.getUserCredential(USERTYPE.CustomUser); 128 | String lsUserId = null; 129 | 130 | lsUserId = loAdmin.checkUser(loUser.getUsername(), loUser.getPassword()); 131 | 132 | if (lsUserId == null) { 133 | loAdmin.createUser(loUser.getUsername(), loUser.getPassword(), "TestUser", "011"); 134 | lsUserId = loAdmin.checkUser(loUser.getUsername(), loUser.getPassword()); 135 | } 136 | 137 | return lsUserId != null; 138 | } 139 | 140 | private static void pleaseWait() { 141 | try { 142 | Thread.sleep(300); 143 | } catch (InterruptedException e) { 144 | e.printStackTrace(); 145 | } 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/test/java/ca/crim/nlp/pacte/client/CorpusTest.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte.client; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertNotNull; 5 | import static org.junit.Assert.assertNull; 6 | import static org.junit.Assert.assertTrue; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | import java.util.UUID; 13 | 14 | import org.junit.Before; 15 | import org.junit.Rule; 16 | import org.junit.Test; 17 | import org.junit.rules.TemporaryFolder; 18 | 19 | import ca.crim.nlp.pacte.QuickConfig; 20 | 21 | public class CorpusTest { 22 | @Rule 23 | public TemporaryFolder poTestFolder = new TemporaryFolder(); 24 | 25 | /** 26 | * Create the test user 27 | */ 28 | @Before 29 | public void checkTestSubject() { 30 | SampleBuilder.createTestingUser(); 31 | } 32 | 33 | /** 34 | * Create, populate and delete a corpus. 35 | */ 36 | @Test 37 | public void corpusLifeCycle() throws InterruptedException { 38 | String lsNewCorpusName = UUID.randomUUID().toString() + UUID.randomUUID().toString(); 39 | String lsCorpusID = null; 40 | String lsReturn = null; 41 | String lsDociID = null; 42 | String lsGroupID = null; 43 | 44 | Corpus loCorpus = new Corpus(new QuickConfig()); 45 | 46 | // Create the corpus 47 | System.out.print("Creating new corpus... "); 48 | lsReturn = loCorpus.createCorpus(lsNewCorpusName, "fr-fr"); 49 | if (lsReturn != null && !lsReturn.isEmpty()) 50 | lsCorpusID = lsReturn; 51 | assertNotNull(lsCorpusID); 52 | System.out.println("Created!"); 53 | 54 | // Populate 55 | System.out.print("Adding document... "); 56 | lsDociID = loCorpus.addDocument(lsCorpusID, "bla bla bla", "bla", "none", "fr-fr"); 57 | assertNotNull(lsDociID); 58 | System.out.println("Added!"); 59 | 60 | // Create annotation group 61 | System.out.print("Creating annotation group... "); 62 | lsGroupID = loCorpus.createBucket(lsCorpusID, UUID.randomUUID().toString()); 63 | assertNotNull(lsGroupID); 64 | System.out.println("Created!"); 65 | 66 | // TODO: Removing group 67 | 68 | // Remove document 69 | Thread.sleep(500); // 70 | System.out.print("Deleting document... "); 71 | assertTrue(loCorpus.getDocument(lsCorpusID, lsDociID).getTitle().equals("bla")); 72 | System.out.println("Deleted!"); 73 | 74 | // Delete 75 | System.out.print("Deleting corpus..."); 76 | assertTrue(loCorpus.deleteCorpus(lsCorpusID)); 77 | lsReturn = loCorpus.getCorpusId(lsNewCorpusName); 78 | assertNull(lsReturn); 79 | assertNull(loCorpus.getCorpusId(lsNewCorpusName)); 80 | System.out.println("Deleted!"); 81 | System.out.println("Done!"); 82 | } 83 | 84 | /** 85 | * Export the sample corpus in a temporary folder. 86 | * 87 | * @return The path for the exported corpus 88 | * @throws IOException 89 | */ 90 | @Test 91 | public void testExportCorpus() throws IOException { 92 | String lsCorpusId = null; 93 | String lsExportPath = null; 94 | Corpus loCorpus = new Corpus(new QuickConfig()); 95 | 96 | lsCorpusId = SampleBuilder.smallCorpus(loCorpus); 97 | assertNotNull(lsCorpusId); 98 | 99 | System.out.println(lsCorpusId); 100 | System.out.println(loCorpus.getSize(lsCorpusId)); 101 | 102 | lsExportPath = exportCorpus(loCorpus, lsCorpusId); 103 | System.out.println(lsExportPath); 104 | 105 | // Stuff exported? 106 | assertNotNull(lsExportPath); 107 | assertTrue(new File(lsExportPath).list().length > 0); 108 | 109 | // Only four groups? 110 | 111 | // All documents exported? 112 | assertEquals("", 2, new File(lsExportPath, "documents").list().length); 113 | } 114 | 115 | private String exportCorpus(Corpus toCorpus, String tsCorpusId) { 116 | List lasGroupList = new ArrayList(); 117 | File loExportPath = null; 118 | 119 | try { 120 | loExportPath = poTestFolder.newFolder(); 121 | } catch (IOException e) { 122 | e.printStackTrace(); 123 | return null; 124 | } 125 | 126 | return toCorpus.exportToDisk(tsCorpusId, loExportPath.getAbsolutePath(), lasGroupList) 127 | ? loExportPath.getAbsolutePath() : null; 128 | } 129 | 130 | @Test 131 | public void testImportCorpus() throws IOException, InterruptedException { 132 | String lsCorpusId = null; 133 | String lsNewCorpusId = null; 134 | Corpus loCorpus = new Corpus(new QuickConfig()); 135 | String lsSourcePath = null; 136 | 137 | // Export the corpus before running the test 138 | System.out.println("Uploading a corpus sample for the test"); 139 | lsCorpusId = SampleBuilder.smallCorpus(loCorpus); 140 | System.out.println("Upload completed, corpus id : " + lsCorpusId); 141 | lsSourcePath = exportCorpus(loCorpus, lsCorpusId); 142 | System.out.println("Exported the corpus to local path : " + lsSourcePath); 143 | 144 | assertNotNull(lsSourcePath); 145 | assertTrue(new File(lsSourcePath).exists()); 146 | 147 | // The real test 148 | System.out.println("Starting corpus importation from disk..."); 149 | lsNewCorpusId = loCorpus.importCorpus(lsSourcePath); 150 | System.out.println("Corpus importation completed, new corpus id : " + lsNewCorpusId); 151 | 152 | assertNotNull(lsNewCorpusId); 153 | assertNotNull(loCorpus.getCorpusMetadata(lsNewCorpusId)); 154 | Thread.sleep(1000); // 155 | assertEquals(SampleBuilder.SmallCorpusSize, loCorpus.getSize(lsNewCorpusId)); 156 | 157 | // Delete imported corpus after successful test 158 | System.out.println("Deleting created corpora.."); 159 | loCorpus.deleteCorpus(lsNewCorpusId); 160 | loCorpus.deleteCorpus(lsCorpusId); 161 | System.out.println("Deletion completed."); 162 | } 163 | 164 | @Test 165 | public void testTagset() { 166 | String lsTagsetId = null; 167 | String lsTagsetName = UUID.randomUUID().toString(); 168 | String lsTagset = "{\"title\":\"" + lsTagsetName + "\", \"reference\":\"rr\", \"description\":\"desc\",\"tagset\":[{\"tag\":\"cc\",\"description\":\"dd\"}]}"; 169 | Corpus loCorpus = new Corpus(new QuickConfig()); 170 | 171 | // Create tagset 172 | loCorpus.createTagset(lsTagset); 173 | 174 | // Retreive it 175 | lsTagsetId = loCorpus.getTagsetId(lsTagsetName); 176 | assertNotNull(lsTagsetId); 177 | 178 | // Delete it 179 | assertTrue(loCorpus.deleteTagset(lsTagsetId)); 180 | assertNull(loCorpus.getTagsetId(lsTagsetName)); 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /src/main/java/ca/crim/nlp/pacte/client/Lexicon.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte.client; 2 | 3 | import ca.crim.nlp.pacte.QuickConfig; 4 | import ca.crim.nlp.pacte.QuickConfig.USERTYPE; 5 | 6 | import java.util.Map; 7 | 8 | import org.json.JSONObject; 9 | 10 | public class Lexicon { 11 | 12 | private QuickConfig poCfg = null; 13 | 14 | public Lexicon(QuickConfig toConfig) { 15 | poCfg = toConfig; 16 | } 17 | 18 | /** 19 | * Vérifie si le lexique existe et le crée au besoin. 20 | * 21 | * @param tsNomCorpus 22 | * @param tsToken 23 | * @param tsLangage 24 | * @return 25 | */ 26 | public String createLexicon(String tsLexiconName) { 27 | String lsReturn = ""; 28 | String lsIdLexicon = null; 29 | JSONObject loNewLex = null; 30 | 31 | lsReturn = poCfg.postRequest(poCfg.getPacteBackend() + "Lexicons/lexicon", 32 | "{\"title\": \"" + tsLexiconName.replace("\"", "\\\"") + "\"," 33 | + "\"description\": \"\",\"licence\": \"\",\"version\":\"\",\"source\":\"\",\"tagsetId\": {}}", 34 | USERTYPE.CustomUser); 35 | 36 | loNewLex = new JSONObject(lsReturn); 37 | if (lsReturn != null && !lsReturn.isEmpty() && loNewLex.has("id")) { 38 | lsIdLexicon = loNewLex.getString("id"); 39 | } else 40 | System.err.println("Create lexicon response : " 41 | + (loNewLex.has("message") ? loNewLex.getString("message") : "unknown")); 42 | 43 | return lsIdLexicon; 44 | } 45 | 46 | /** 47 | * V�rifie si un lexique existe 48 | * 49 | * @param tsLexiconName 50 | * @return GUID du lexique s'il existe, null sinon. 51 | */ 52 | public String checkLexicon(String tsLexiconName) { 53 | String lsReturn = ""; 54 | String lsIdLexicon = ""; 55 | 56 | lsReturn = poCfg.getRequest(poCfg.getPacteBackend() + "Lexicons/lexicons", USERTYPE.CustomUser, null).trim(); 57 | 58 | if (lsReturn != null && !lsReturn.isEmpty()) { 59 | if (lsReturn.toLowerCase().contains("\"title\":")) { 60 | 61 | String[] lasLex = lsReturn.substring(1, lsReturn.length() - 2).split("},{"); 62 | 63 | for (String lsL : lasLex) 64 | if (lsL.contains("\"" + tsLexiconName + "\"")) 65 | 66 | return lsIdLexicon; 67 | } 68 | } 69 | 70 | return null; 71 | } 72 | 73 | /** 74 | * Create a new domain with a new id 75 | * 76 | * @param tsLexiconId 77 | * @param tsPreferredName 78 | * @param tsParentId 79 | * @param tasDomainNames 80 | * : Language/Name 81 | * @return the unique ID of the new domain 82 | */ 83 | public String createDomain(String tsLexiconId, String tsPreferredName, String tsParentId, 84 | Map tasDomainNames) { 85 | String lsDom = null; 86 | String lsReturn = null; 87 | 88 | lsDom = "{\"userDefinedId\": \"\", \"lexiconId\": \"" + tsLexiconId + "\"," + " \"parentDomainId\": null ," 89 | + " \"parentDomain\": " + (tsParentId == null ? null : "\"" + tsParentId + "\"") + "," 90 | + " \"name\": \"" + tsPreferredName.replace("\"", "\\\"") + "\", \"domainsTitleLocalized\": [" 91 | + getLocalizedString("title", tasDomainNames) + "]}"; 92 | 93 | // Lancer l'appel 94 | lsReturn = poCfg.postRequest(poCfg.getPacteBackend() + "Domains/domain", lsDom, USERTYPE.CustomUser); 95 | 96 | // Récupérer l'id 97 | JSONObject loRet = new JSONObject(lsReturn); 98 | if (loRet.has("domainId")) 99 | return loRet.getString("domainId"); 100 | else 101 | return null; 102 | } 103 | 104 | public String createConcept(String tsLexiconId, String tsPreferredName, Map tasConceptNames, 105 | Map tasExampleNames, Map tasDescriptionNames) { 106 | String lsCon = null; 107 | String lsReturn = null; 108 | 109 | lsCon = "{\"userDefinedId\": \"\", \"lexiconId\": \"" + tsLexiconId + "\"," + " \"name\": \"" 110 | + tsPreferredName.replace("\"", "\\\"") + "\", \"conceptsTitleLocalized\": [" 111 | + getLocalizedString("title", tasConceptNames) + "], \"conceptsExampleLocalized\": [" 112 | + getLocalizedString("example", tasExampleNames) + "], \"conceptsDescriptionLocalized\": [" 113 | + getLocalizedString("description", tasDescriptionNames) + "]" + "}"; 114 | 115 | // Lancer l'appel 116 | lsReturn = poCfg.postRequest(poCfg.getPacteBackend() + "Concepts/concept", lsCon, USERTYPE.CustomUser); 117 | 118 | // Récupérer l'id 119 | JSONObject loRet = new JSONObject(lsReturn); 120 | if (loRet.has("conceptId")) 121 | return loRet.getString("conceptId"); 122 | else 123 | return null; 124 | } 125 | 126 | public void linkDomainConcept(String tsDomainId, String tsConceptId) { 127 | String lsCon = null; 128 | 129 | lsCon = "{\"conceptId\": \"" + tsConceptId + "\", \"domainId\": \"" + tsDomainId + "\"}"; 130 | 131 | // Lancer l'appel 132 | poCfg.postRequest(poCfg.getPacteBackend() + "DomainsToConcepts/domainToConcept", lsCon, USERTYPE.CustomUser); 133 | 134 | } 135 | 136 | public void linkConceptTerm(String tsConceptId, String tsTermId) { 137 | String lsCon = null; 138 | 139 | lsCon = "{\"conceptId\": \"" + tsConceptId + "\", \"termId\": \"" + tsTermId + "\"}"; 140 | 141 | // Lancer l'appel 142 | poCfg.postRequest(poCfg.getPacteBackend() + "TermsToConcepts/termToConcept", lsCon, USERTYPE.CustomUser); 143 | } 144 | 145 | public String createTerm(String tsLexiconId, String tsName, String tsUserId, String tsLangue, String tsPostag, 146 | String tsGenre, String tsNombre) { 147 | String lsTerm = null; 148 | String lsReturn = null; 149 | 150 | lsTerm = "{\"userDefinedId\": \"" + tsUserId + "\", " + " \"lexiconId\": \"" + tsLexiconId + "\" ," 151 | + " \"language\": " + (tsLangue == null ? "\"\"" : "\"" + tsLangue + "\"") + "," + " \"posTag\": " 152 | + (tsPostag == null ? "\"\"" : "\"" + tsPostag + "\"") + "," + " \"genre\": " 153 | + (tsGenre == null ? "\"\"" : "\"" + tsGenre + "\"") + "," + " \"number\": " 154 | + (tsNombre == null ? "\"\"" : "\"" + tsNombre + "\"") + "," + " \"name\": \"" 155 | + tsName.replace("\"", "\\\"") + "\"}"; 156 | 157 | // Lancer l'appel 158 | lsReturn = poCfg.postRequest(poCfg.getPacteBackend() + "Terms/term", lsTerm, USERTYPE.CustomUser); 159 | 160 | // Récupérer l'id 161 | JSONObject loRet = new JSONObject(lsReturn); 162 | if (loRet.has("id")) 163 | return loRet.getString("id"); 164 | else 165 | return null; 166 | } 167 | 168 | /** 169 | * 170 | * @param tsLangs 171 | * @return 172 | */ 173 | private String getLocalizedString(String tsHeader, Map tsLangs) { 174 | String lsLang = ""; 175 | 176 | // Loop les langues 177 | if (tsLangs != null) 178 | for (String lsKey : tsLangs.keySet()) { 179 | lsLang += "{\"" + tsHeader + "\":\"" + tsLangs.get(lsKey).replace("\"", "\\\"") + "\",\"language\":\"" 180 | + lsKey + "\"},"; 181 | } 182 | 183 | if (tsLangs != null && !tsLangs.isEmpty() && lsLang.length() > 0) 184 | return lsLang.substring(0, lsLang.length() - 1); 185 | else 186 | return lsLang; 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /src/main/java/ca/crim/nlp/pacte/QuickConfig.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | import java.io.InputStreamReader; 7 | import java.net.URISyntaxException; 8 | import java.util.Calendar; 9 | import java.util.HashMap; 10 | import java.util.List; 11 | import java.util.Map; 12 | import java.util.Properties; 13 | 14 | import org.apache.http.NameValuePair; 15 | import org.apache.http.client.ClientProtocolException; 16 | import org.apache.http.client.methods.CloseableHttpResponse; 17 | import org.apache.http.client.methods.HttpDelete; 18 | import org.apache.http.client.methods.HttpGet; 19 | import org.apache.http.client.methods.HttpPost; 20 | import org.apache.http.client.methods.HttpPut; 21 | import org.apache.http.client.utils.URIBuilder; 22 | import org.apache.http.entity.StringEntity; 23 | import org.apache.http.impl.client.CloseableHttpClient; 24 | import org.apache.http.impl.client.HttpClients; 25 | import org.json.JSONObject; 26 | 27 | public class QuickConfig { 28 | String psBaseURLAuthen = ""; 29 | String psBaseURLPacteBE = ""; 30 | String psBaseURLPSCUser = ""; 31 | String psBaseURLService = ""; 32 | Integer pniTokenRenewDelay = -12; 33 | 34 | private boolean pbVerbose = true; 35 | 36 | public enum USERTYPE { 37 | PSCAdmin, PacteAdmin, CustomUser 38 | }; 39 | 40 | Map poCred = new HashMap(); 41 | 42 | // Creating an instance of HttpClient. 43 | CloseableHttpClient httpclient = HttpClients.createDefault(); 44 | 45 | public QuickConfig() { 46 | String[] lasConfig = readConfiguration(); 47 | setConfig(lasConfig[0], lasConfig[1], lasConfig[2], lasConfig[3], lasConfig[4], lasConfig[5], lasConfig[6], 48 | Boolean.parseBoolean(lasConfig[7]), Integer.parseInt(lasConfig[8]), lasConfig[9]); 49 | } 50 | 51 | private void setConfig(String tsBasePacteUrl, String tsAdminPSCUsername, String tsAdminPSCPassword, 52 | String tsAdminPacteUsername, String tsAdminPactePassword, String tsCustomUser, String tsCustomPassword, 53 | boolean tbVerbose, int tniTokenRenewDelay, String tsServiceUrl) { 54 | psBaseURLAuthen = tsBasePacteUrl.endsWith("/") ? tsBasePacteUrl : tsBasePacteUrl + "/"; 55 | psBaseURLPacteBE = psBaseURLAuthen + "pacte-backend/"; 56 | psBaseURLPSCUser = psBaseURLAuthen + "psc-users-permissions-management/"; 57 | psBaseURLService = tsServiceUrl.endsWith("/") ? tsServiceUrl : tsServiceUrl + "/"; 58 | pniTokenRenewDelay = tniTokenRenewDelay; 59 | pbVerbose = tbVerbose; 60 | 61 | // PSC admin 62 | if (tsAdminPSCUsername != null && tsAdminPSCPassword != null) 63 | poCred.put(USERTYPE.PSCAdmin, new Credential(tsAdminPSCUsername, tsAdminPSCPassword, pniTokenRenewDelay)); 64 | 65 | // Pacte admin 66 | if (tsAdminPacteUsername != null && tsAdminPactePassword != null) 67 | poCred.put(USERTYPE.PacteAdmin, 68 | new Credential(tsAdminPacteUsername, tsAdminPactePassword, pniTokenRenewDelay)); 69 | 70 | // Pacte custom user 71 | if (tsCustomUser != null && tsCustomPassword != null) 72 | poCred.put(USERTYPE.CustomUser, new Credential(tsCustomUser, tsCustomPassword, pniTokenRenewDelay)); 73 | } 74 | 75 | /** 76 | * New configuration with admin and user level credentials 77 | * 78 | * @param tsBasePacteUrl 79 | * @param tsAdminPSCUsername 80 | * @param tsAdminPSCPassword 81 | * @param tsAdminPacteUsername 82 | * @param tsAdminPactePassword 83 | * @param tsCustomUser 84 | * @param tsCustomPassword 85 | * @param tbVerbose 86 | */ 87 | public QuickConfig(String tsBasePacteUrl, String tsAdminPSCUsername, String tsAdminPSCPassword, 88 | String tsAdminPacteUsername, String tsAdminPactePassword, String tsCustomUser, String tsCustomPassword, 89 | boolean tbVerbose, int tniTokenRenewDelay, String tsServiceUrl) { 90 | if (tsBasePacteUrl == null || tsBasePacteUrl.isEmpty()) 91 | throw new IllegalArgumentException("PACTE url should not be null"); 92 | 93 | setConfig(tsBasePacteUrl, tsAdminPSCUsername, tsAdminPSCPassword, tsAdminPacteUsername, tsAdminPactePassword, 94 | tsCustomUser, tsCustomPassword, tbVerbose, tniTokenRenewDelay, tsServiceUrl); 95 | } 96 | 97 | /** 98 | * New configuration with user level credentials 99 | * 100 | * @param tsBasePacteUrl 101 | * Mandatory url to acessible PACTE platform. 102 | * @param tsCustomUser 103 | * Mandatory username to access the platform. 104 | * @param tsCustomPassword 105 | * User's password. 106 | * @param tbVerbose 107 | * True if you want detailed processing messages. 108 | */ 109 | public QuickConfig(String tsBasePacteUrl, String tsCustomUser, String tsCustomPassword, boolean tbVerbose, 110 | int tniTokenRenewDelay) { 111 | if (tsBasePacteUrl == null || tsBasePacteUrl.isEmpty()) 112 | throw new IllegalArgumentException("PACTE url should not be null"); 113 | 114 | if (tsCustomUser == null || tsCustomUser.isEmpty()) 115 | throw new IllegalArgumentException("Username should not be null"); 116 | 117 | String[] lasConfig = readConfiguration(); 118 | 119 | setConfig(tsBasePacteUrl, null, null, null, null, tsCustomUser, tsCustomPassword, tbVerbose, tniTokenRenewDelay, 120 | lasConfig[9]); 121 | } 122 | 123 | public void setCustomUser(String tsUsername, String tsPassword) { 124 | poCred.put(USERTYPE.CustomUser, new Credential(tsUsername, tsPassword, pniTokenRenewDelay)); 125 | } 126 | 127 | public Credential getUserCredential(USERTYPE toType) { 128 | if (poCred.keySet().contains(toType)) 129 | return poCred.get(toType); 130 | else 131 | return null; 132 | } 133 | 134 | /** 135 | * Set a different route for credentials (for unusual backend configuration) 136 | * 137 | * @param tsUrl 138 | * : Complete url to the authentication backend for tokens 139 | */ 140 | public void setAuthenUrl(String tsUrl) { 141 | psBaseURLAuthen = tsUrl.endsWith("/") ? tsUrl : tsUrl + "/"; 142 | } 143 | 144 | public void setServiceUrl(String tsServiceUrl) { 145 | psBaseURLService = tsServiceUrl.endsWith("/") ? tsServiceUrl : tsServiceUrl + "/"; 146 | } 147 | 148 | public String getServiceUrl() { 149 | return psBaseURLService; 150 | } 151 | 152 | public String getAuthenUrl() { 153 | return psBaseURLAuthen; 154 | } 155 | 156 | public String getPacteBackend() { 157 | return psBaseURLPacteBE; 158 | } 159 | 160 | public String getPSCUserBackend() { 161 | return psBaseURLPSCUser; 162 | } 163 | 164 | /** 165 | * Call a GET request with preconfigured user credentials. 166 | * 167 | * @param tsTargetEndpoint 168 | * @param toUsertype 169 | * @param toParams 170 | * @return 171 | */ 172 | public String getRequest(String tsTargetEndpoint, USERTYPE toUsertype, List toParams) { 173 | String lsReturn = ""; 174 | URIBuilder loUriBuilder = null; 175 | 176 | try { 177 | loUriBuilder = new URIBuilder(tsTargetEndpoint); 178 | } catch (URISyntaxException e1) { 179 | if (pbVerbose) 180 | e1.printStackTrace(); 181 | return null; 182 | } 183 | 184 | if (toParams != null) 185 | loUriBuilder.addParameters(toParams); 186 | 187 | HttpGet loGet = new HttpGet(loUriBuilder.toString()); 188 | 189 | if (toUsertype != null) { 190 | loGet.addHeader("Authorization", "Bearer " + getToken(poCred.get(toUsertype))); 191 | loGet.addHeader("AuthorizationAudience", "Pacte"); 192 | } 193 | 194 | try { 195 | CloseableHttpResponse response = httpclient.execute(loGet); 196 | lsReturn = readInput(response.getEntity().getContent()); 197 | 198 | if (pbVerbose || ((response.getStatusLine().getStatusCode() != 200) 199 | && (response.getStatusLine().getStatusCode() != 204))) 200 | System.out.println("Response Status line :" + response.getStatusLine()); 201 | 202 | response.close(); 203 | 204 | } catch (IOException e) { 205 | e.printStackTrace(); 206 | } 207 | 208 | return lsReturn; 209 | } 210 | 211 | /** 212 | * Call a DELETE request with preconfigured user credentials. 213 | * 214 | * @param tsTargetEndpoint 215 | * @param tsToken 216 | * @param toParams 217 | * @return 218 | */ 219 | public String deleteRequest(String tsTargetEndpoint, USERTYPE toUsertype, List toParams) { 220 | String lsReturn = ""; 221 | URIBuilder loUriBuilder = null; 222 | 223 | try { 224 | loUriBuilder = new URIBuilder(tsTargetEndpoint); 225 | } catch (URISyntaxException e1) { 226 | e1.printStackTrace(); 227 | } 228 | 229 | if (toParams != null) 230 | loUriBuilder.addParameters(toParams); 231 | 232 | HttpDelete loDel = new HttpDelete(tsTargetEndpoint); 233 | 234 | if (toUsertype != null) { 235 | loDel.addHeader("Authorization", "Bearer " + getToken(poCred.get(toUsertype))); 236 | loDel.addHeader("AuthorizationAudience", "Pacte"); 237 | } 238 | 239 | try { 240 | CloseableHttpResponse response = httpclient.execute(loDel); 241 | if (response.getEntity() != null) 242 | lsReturn = readInput(response.getEntity().getContent()); 243 | 244 | if (pbVerbose || ((response.getStatusLine().getStatusCode() != 200) 245 | && (response.getStatusLine().getStatusCode() != 204))) 246 | System.out.println("Response Status line :" + response.getStatusLine()); 247 | 248 | response.close(); 249 | 250 | } catch (IOException e) { 251 | e.printStackTrace(); 252 | } 253 | 254 | return lsReturn; 255 | } 256 | 257 | /** 258 | * Call a POST request with preconfigured user credentials. 259 | * 260 | * @param tsTargetEndpoint 261 | * @param tsJson2Post 262 | * @param toUsertype 263 | * @return 264 | */ 265 | public String postRequest(String tsTargetEndpoint, String tsJson2Post, USERTYPE toUsertype) { 266 | HttpPost httpost = new HttpPost(tsTargetEndpoint); 267 | // EntityBuilder loBuilder = EntityBuilder.create(); 268 | // HttpEntity entity = null; 269 | String lsResponse = ""; 270 | 271 | // Ajouter le json 272 | if (tsJson2Post != null) { 273 | StringEntity postingString = new StringEntity(tsJson2Post, "UTF-8"); 274 | httpost.setEntity(postingString); 275 | httpost.setHeader("Content-type", "application/json"); 276 | httpost.setHeader("Accept", "application/json"); 277 | } 278 | 279 | if (toUsertype != null) { 280 | httpost.setHeader("Authorization", "Bearer " + getToken(poCred.get(toUsertype))); 281 | httpost.setHeader("AuthorizationAudience", "Pacte"); 282 | } 283 | 284 | // Executing the request. 285 | try { 286 | CloseableHttpResponse response = httpclient.execute(httpost); 287 | if (response.getEntity() != null) 288 | lsResponse = readInput(response.getEntity().getContent()); 289 | 290 | if (pbVerbose || ((response.getStatusLine().getStatusCode() != 200) 291 | && (response.getStatusLine().getStatusCode() != 204))) 292 | System.out.println("Response Status line :" + response.getStatusLine()); 293 | 294 | response.close(); 295 | 296 | } catch (ClientProtocolException e) { 297 | e.printStackTrace(); 298 | 299 | } catch (IOException e) { 300 | e.printStackTrace(); 301 | } 302 | 303 | return lsResponse; 304 | } 305 | 306 | /** 307 | * Call a PUT request with preconfigured user credentials. 308 | * 309 | * @param tsTargetEndpoint 310 | * @param tsJson2Post 311 | * @param toUsertype 312 | * @return 313 | */ 314 | public String putRequest(String tsTargetEndpoint, String tsJson2Post, USERTYPE toUsertype) { 315 | HttpPut httput = new HttpPut(tsTargetEndpoint); 316 | 317 | String lsResponse = ""; 318 | 319 | // Ajouter le json 320 | if (tsJson2Post != null) { 321 | StringEntity postingString = new StringEntity(tsJson2Post, "UTF-8"); 322 | httput.setEntity(postingString); 323 | httput.setHeader("Content-type", "application/json"); 324 | httput.setHeader("Accept", "application/json"); 325 | } 326 | 327 | if (toUsertype != null) { 328 | httput.setHeader("Authorization", "Bearer " + getToken(poCred.get(toUsertype))); 329 | httput.setHeader("AuthorizationAudience", "Pacte"); 330 | } 331 | 332 | // Executing the request. 333 | try { 334 | CloseableHttpResponse response = httpclient.execute(httput); 335 | if (response.getEntity() != null) 336 | lsResponse = readInput(response.getEntity().getContent()); 337 | 338 | if (pbVerbose || ((response.getStatusLine().getStatusCode() != 200) 339 | && (response.getStatusLine().getStatusCode() != 204))) 340 | System.out.println("Response Status line :" + response.getStatusLine()); 341 | 342 | response.close(); 343 | 344 | } catch (ClientProtocolException e) { 345 | e.printStackTrace(); 346 | 347 | } catch (IOException e) { 348 | e.printStackTrace(); 349 | } 350 | 351 | return lsResponse; 352 | } 353 | 354 | public void setVerbose(boolean tbVerbose) { 355 | pbVerbose = tbVerbose; 356 | } 357 | 358 | public boolean getVerbose() { 359 | return pbVerbose; 360 | } 361 | 362 | /** 363 | * Get the authentication token, renewing it after the delay. 364 | * 365 | * @return User's token 366 | */ 367 | public String getToken(Credential toUserCredentials) { 368 | String lsReturn = null; 369 | Calendar ldElapsed = Calendar.getInstance(); 370 | ldElapsed.add(Calendar.HOUR, pniTokenRenewDelay); 371 | 372 | if (toUserCredentials.getToken() == null || toUserCredentials.getTokenCreation().before(ldElapsed.getTime())) { 373 | toUserCredentials.setToken(null); 374 | lsReturn = postRequest(psBaseURLAuthen + "psc-authentication-service/FormLogin/login", 375 | "{\"username\": \"" + toUserCredentials.getUsername() + "\",\"password\": \"" 376 | + toUserCredentials.getPassword() + "\",\"jwtAudience\": [\"Pacte\"]}", 377 | null); 378 | 379 | if (lsReturn != null && !lsReturn.isEmpty() && !lsReturn.toLowerCase().contains("unauthorized")) 380 | toUserCredentials.setToken(new JSONObject(lsReturn).getString("token")); 381 | } 382 | 383 | return toUserCredentials.getToken(); 384 | } 385 | 386 | /** 387 | * Read the input stream from http socket 388 | * 389 | * @param in 390 | * @return 391 | */ 392 | private String readInput(InputStream in) { 393 | BufferedReader reader = new BufferedReader(new InputStreamReader(in)); 394 | StringBuilder result = new StringBuilder(); 395 | String line = ""; 396 | 397 | try { 398 | while ((line = reader.readLine()) != null) { 399 | result.append(line); 400 | } 401 | } catch (IOException e) { 402 | e.printStackTrace(); 403 | } 404 | 405 | return result.toString(); 406 | } 407 | 408 | /** 409 | * 410 | */ 411 | private String[] readConfiguration() { 412 | Properties prop = new Properties(); 413 | InputStream input = null; 414 | String[] lasConfig = new String[10]; 415 | 416 | try { 417 | input = ClassLoader.class.getResourceAsStream("/ca/crim/nlp/pacte/config.properties"); 418 | 419 | // load a properties file 420 | prop.load(input); 421 | 422 | // get the property value and print it out 423 | lasConfig[0] = prop.getProperty("server"); 424 | lasConfig[1] = prop.getProperty("PSCAdmin"); 425 | lasConfig[2] = prop.getProperty("PSCAdminPwd"); 426 | lasConfig[3] = prop.getProperty("PACTEAdmin"); 427 | lasConfig[4] = prop.getProperty("PACTEAdminPwd"); 428 | lasConfig[5] = prop.getProperty("StandardUser"); 429 | lasConfig[6] = prop.getProperty("StandardUserPwd"); 430 | lasConfig[7] = prop.getProperty("Verbose"); 431 | lasConfig[8] = prop.getProperty("TokenRenewDelay"); 432 | lasConfig[9] = prop.getProperty("ServiceUrl"); 433 | 434 | } catch (IOException ex) { 435 | ex.printStackTrace(); 436 | } finally { 437 | if (input != null) { 438 | try { 439 | input.close(); 440 | } catch (IOException e) { 441 | e.printStackTrace(); 442 | } 443 | } 444 | } 445 | return lasConfig; 446 | } 447 | 448 | } 449 | -------------------------------------------------------------------------------- /src/main/java/ca/crim/nlp/pacte/client/Corpus.java: -------------------------------------------------------------------------------- 1 | package ca.crim.nlp.pacte.client; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.net.URISyntaxException; 6 | import java.nio.charset.Charset; 7 | import java.nio.file.DirectoryStream; 8 | import java.nio.file.Files; 9 | import java.nio.file.Path; 10 | import java.nio.file.Paths; 11 | import java.util.ArrayList; 12 | import java.util.Arrays; 13 | import java.util.HashMap; 14 | import java.util.List; 15 | import java.util.Map; 16 | 17 | import org.apache.http.NameValuePair; 18 | import org.apache.http.message.BasicNameValuePair; 19 | import org.json.JSONArray; 20 | import org.json.JSONObject; 21 | 22 | import ca.crim.nlp.pacte.QuickConfig; 23 | import ca.crim.nlp.pacte.QuickConfig.USERTYPE; 24 | 25 | public class Corpus { 26 | private QuickConfig poCfg = null; 27 | final static String CORPUS_STRUCT_FILE = "CorpusStructure.json"; 28 | final static String DOCMETA = "DOCUMENT_META.json"; 29 | final static String DOCMETASchema = "DOCUMENT_META.schema"; 30 | 31 | public Corpus(QuickConfig toConfig) { 32 | poCfg = toConfig; 33 | } 34 | 35 | /** 36 | * Import a corpus from exported files 37 | * 38 | * @param tsCorpusPath 39 | * Path to the corpus exported with the {@link exportToDisk} 40 | * function 41 | * @return The new corpus unique identification 42 | */ 43 | public String importCorpus(String tsCorpusPath) { 44 | String lsReturn = null; 45 | String lsCorpusNewId = null; 46 | Map laoGroups = new HashMap(); 47 | String lsCorpusOldId = null; 48 | String lsLang = ""; 49 | 50 | if (!new File(tsCorpusPath).exists()) 51 | return null; 52 | 53 | // Lire les métadata et recréer le corpus 54 | lsReturn = readFile(new File(tsCorpusPath, "corpus.json").getAbsolutePath()); 55 | if (lsReturn == null) { 56 | System.err.println("Missing corpus metadata"); 57 | return null; 58 | } 59 | 60 | JSONObject loCorpMeta = new JSONObject(lsReturn); 61 | lsCorpusOldId = loCorpMeta.getString("id"); 62 | for (int lniCpt = 0; lniCpt < loCorpMeta.getJSONArray("languages").length(); lniCpt++) 63 | lsLang += loCorpMeta.getJSONArray("languages").get(lniCpt) + ","; 64 | lsCorpusNewId = createCorpus(loCorpMeta.getString("title") + " - Import", 65 | lsLang.substring(0, lsLang.length() - 1)); 66 | try { 67 | Thread.sleep(1000); 68 | } catch (InterruptedException e1) { 69 | } 70 | 71 | // Recréer les groupes et ajouter les schémas 72 | if (!(new File(tsCorpusPath, CORPUS_STRUCT_FILE)).exists()) { 73 | System.err.println("Corpus structure is missing from exported"); 74 | return null; 75 | } 76 | lsReturn = readFile(new File(tsCorpusPath, CORPUS_STRUCT_FILE).getAbsolutePath()); 77 | JSONArray lasGroups = new JSONObject(lsReturn).getJSONArray("buckets"); 78 | 79 | for (int lniCpt = 0; lniCpt < lasGroups.length(); lniCpt++) { 80 | String lsGroupName = ((JSONObject) lasGroups.get(lniCpt)).getString("name"); 81 | String lsOldGroupId = ((JSONObject) lasGroups.get(lniCpt)).getString("id"); 82 | String lsGroupId = getGroupId(lsGroupName, lsCorpusNewId); 83 | if (lsGroupId == null) 84 | lsGroupId = createBucket(lsCorpusNewId, lsGroupName); 85 | laoGroups.put(lsOldGroupId, lsGroupId); 86 | 87 | // Ajouter les schémas disponibles 88 | JSONArray lasSchemas = ((JSONObject) lasGroups.get(lniCpt)).getJSONArray("schemas"); 89 | for (int lniCptSchema = 0; lniCptSchema < lasSchemas.length(); lniCptSchema++) { 90 | String lsSchema = null; 91 | File loFile = new File(tsCorpusPath, "groups/" + lsOldGroupId + "/" 92 | + ((JSONObject) lasSchemas.get(lniCptSchema)).getString("schemaType") + ".schema"); 93 | if (loFile.exists()) 94 | lsSchema = new JSONObject(readFile(loFile.getAbsolutePath())).getJSONObject("schema") 95 | .getString("schemaJsonContent"); 96 | else if (loFile.getName().equalsIgnoreCase(DOCMETASchema)) 97 | try { 98 | lsSchema = new String( 99 | Files.readAllBytes(Paths.get( 100 | ClassLoader.class.getResource("/ca/crim/nlp/pacte/client/" + DOCMETA).toURI())), 101 | Charset.forName("UTF-8")); 102 | } catch (IOException | URISyntaxException e) { 103 | e.printStackTrace(); 104 | } 105 | 106 | String lsSchemaId = getSchemaId(new JSONObject(lsSchema).getString("schemaType"), "", ""); 107 | if (lsSchemaId == null) 108 | lsSchemaId = registerSchema(lsSchema); 109 | copySchemaToGroup(lsSchemaId, lsCorpusNewId, lsGroupId); 110 | } 111 | } 112 | 113 | /* 114 | * Uploader les documents et créer la table de correspondance pour les 115 | * identifiants 116 | */ 117 | try (DirectoryStream directoryStream = Files 118 | .newDirectoryStream(Paths.get(new File(tsCorpusPath, "documents").getAbsolutePath()))) { 119 | for (Path path : directoryStream) { 120 | String lsDocEx = readFile(path.toAbsolutePath().toString()); 121 | JSONObject loDoc = new JSONObject(lsDocEx); 122 | String lsDocOldId = loDoc.getString("id"); 123 | String lsDocId = addDocument(lsCorpusNewId, loDoc.getString("text"), loDoc.getString("title"), 124 | loDoc.getString("source"), loDoc.getString("language")); 125 | // Ajouter les annotations 126 | for (String lsGroup : laoGroups.keySet()) { 127 | File loAnnotFile = new File(tsCorpusPath, "groups/" + lsGroup + "/" + lsDocOldId + ".json"); 128 | if (!loAnnotFile.exists()) 129 | continue; 130 | String lsAnnot = readFile(loAnnotFile.getAbsolutePath()); 131 | JSONObject loAnnots = new JSONObject(lsAnnot); 132 | if (loAnnots.isNull(lsCorpusOldId)) 133 | continue; 134 | loAnnots = loAnnots.getJSONObject(lsCorpusOldId).getJSONObject(lsGroup); 135 | 136 | for (int lniCpt = 0; lniCpt < loAnnots.names().length(); lniCpt++) { 137 | JSONArray loAnnotations = loAnnots.getJSONArray(loAnnots.names().get(lniCpt).toString()); 138 | for (int lniCptAnn = 0; lniCptAnn < loAnnotations.length(); lniCptAnn++) { 139 | JSONObject loAnn = loAnnotations.getJSONObject(lniCptAnn); 140 | loAnn.remove("annotationId"); 141 | loAnn.remove("_corpusID"); 142 | loAnn.put("_corpusID", lsCorpusNewId); 143 | loAnn.remove("_documentID"); 144 | loAnn.put("_documentID", lsDocId); 145 | addAnnotation(lsCorpusNewId, laoGroups.get(lsGroup), loAnn.toString()); 146 | } 147 | } 148 | } 149 | } 150 | } catch (IOException ex) { 151 | System.err.println("Corpus upload failed : " + ex.getMessage()); 152 | return null; 153 | } 154 | 155 | return lsCorpusNewId; 156 | } 157 | 158 | /** 159 | * Save a corpus' documents, groups and annotations to disk in subfolders. 160 | * Will not retain user rights. For large corpus, please use the batch 161 | * functionality of the back-end. 162 | * 163 | * @param tsCorpusId 164 | * Corpus unique id to export. 165 | * @param tsOuputPath 166 | * The local directory to store the exported corpus. 167 | * @param tsExportGroupId 168 | * : Ids of accessible groups to export. If none listed, all 169 | * accessible groups are exported. 170 | * @return True if exported with success, false if error during export. 171 | */ 172 | public boolean exportToDisk(String tsCorpusId, String tsOutputPath, List tasExportGroupId) { 173 | String lsReturn = ""; 174 | Map> lasBuckets = new HashMap>(); 175 | File loDocsFolder = null; 176 | File loGroupsFolder = null; 177 | 178 | // Prepare the subfolders 179 | if (!(new File(tsOutputPath)).exists()) 180 | return false; 181 | 182 | loDocsFolder = new File(tsOutputPath, "documents"); 183 | loDocsFolder.mkdirs(); 184 | loGroupsFolder = new File(tsOutputPath, "groups"); 185 | loGroupsFolder.mkdirs(); 186 | 187 | // Download corpus specs and save them 188 | writeFile(new JSONObject(getCorpusMetadata(tsCorpusId)).toString(4), "corpus.json", tsOutputPath); 189 | 190 | // Download the corpus structure and replicate it 191 | lsReturn = poCfg.getRequest(poCfg.getPacteBackend() + "RACSProxy/corpora/" + tsCorpusId + "/structure", 192 | USERTYPE.CustomUser, null); 193 | if (lsReturn != null && !lsReturn.isEmpty()) { 194 | // Save to keep track or group names and schemas 195 | writeFile(lsReturn, CORPUS_STRUCT_FILE, tsOutputPath); 196 | 197 | JSONObject loRet = new JSONObject(lsReturn); 198 | for (int lniCpt = 0; lniCpt < loRet.getJSONArray("buckets").length(); lniCpt++) { 199 | String lsId = ((JSONObject) loRet.getJSONArray("buckets").get(lniCpt)).getString("id"); 200 | 201 | if ((tasExportGroupId == null) || tasExportGroupId.isEmpty() || tasExportGroupId.contains(lsId)) { 202 | new File(loGroupsFolder, lsId).mkdirs(); 203 | 204 | lasBuckets.put(lsId, new ArrayList()); 205 | JSONArray loSchemas = ((JSONObject) loRet.getJSONArray("buckets").get(lniCpt)) 206 | .getJSONArray("schemas"); 207 | for (int lniCptSchema = 0; lniCptSchema < loSchemas.length(); lniCptSchema++) { 208 | String lsName = ((JSONObject) loSchemas.get(lniCptSchema)).getString("schemaType"); 209 | lasBuckets.get(lsId).add(((JSONObject) loSchemas.get(lniCptSchema)).getString("schemaType")); 210 | String lsSchemaId = getSchemaId(lsName, tsCorpusId, lsId); 211 | if (lsSchemaId != null) 212 | writeFile(getSchema(lsSchemaId), lsName + ".schema", 213 | new File(loGroupsFolder, lsId).getAbsolutePath()); 214 | } 215 | } 216 | } 217 | } 218 | 219 | // Check if all required groups are in the structure, if not, exit. 220 | if (tasExportGroupId != null && !tasExportGroupId.isEmpty()) 221 | for (String lsId : tasExportGroupId) 222 | if (!lasBuckets.keySet().contains(lsId)) { 223 | System.err.println("Missing group : " + lsId); 224 | return false; 225 | } 226 | 227 | // List documents 228 | List loDocs = getDocuments(tsCorpusId); 229 | // and download them 230 | for (PacteDocument loDoc : loDocs) { 231 | writeFile(poCfg.getRequest( 232 | poCfg.getPacteBackend() + "RACSProxy/corpora/" + tsCorpusId + "/documents/" + loDoc.getID(), 233 | USERTYPE.CustomUser, null), loDoc.getID() + ".json", loDocsFolder.getAbsolutePath()); 234 | 235 | // List annotations per group and store them 236 | for (String lsGroupId : lasBuckets.keySet()) { 237 | String lsSchemas = ""; 238 | for (String lsType : lasBuckets.get(lsGroupId)) 239 | lsSchemas += lsGroupId + ":" + lsType + ","; 240 | // Get each docs/groups annotations 241 | if (!lsSchemas.isEmpty()) { 242 | String lsAnnots = getAnnotations(tsCorpusId, loDoc.getID(), 243 | lsSchemas.substring(0, lsSchemas.length() - 1)); 244 | writeFile(lsAnnots, loDoc.getID() + ".json", 245 | new File(loGroupsFolder.getAbsolutePath(), lsGroupId).getAbsolutePath()); 246 | } 247 | } 248 | } 249 | 250 | // Save schemas 251 | 252 | return true; 253 | } 254 | 255 | /** 256 | * Save a corpus' annotations to disk in a tab-separated file. Each 257 | * attribute will be exported as separated column. 258 | * 259 | * @param tsCorpusId 260 | * Corpus unique id to export. 261 | * 262 | * @param tsOutputPath 263 | * The local directory to store the exported corpus. 264 | * @param tsGroupId 265 | * : Ids of accessible groups to export. If none listed, all 266 | * accessible groups are exported. 267 | * @param tsSchemaName 268 | * : Schema name to export 269 | * @param tbIncludeSourceText 270 | * : If True, add a column called "source text" covered for a 271 | * surface annotation. No effect if false or is schema does not 272 | * target surface annotation. 273 | * @return True if exported with success, false if error during export. 274 | */ 275 | public boolean exportToTSV(String tsCorpusId, String tsOutputPath, String tsOutputFilename, String tsGroupId, 276 | String tsSchemaName, boolean tbIncludeSourceText) { 277 | File loTsvPath = new File(tsOutputPath); 278 | StringBuilder lsFile = new StringBuilder(); 279 | 280 | if ((tsSchemaName == null) || tsSchemaName.isEmpty() || (tsGroupId == null) || tsGroupId.isEmpty()) 281 | return false; 282 | 283 | // Prepare the subfolders 284 | if (!loTsvPath.exists()) { 285 | loTsvPath.mkdirs(); 286 | if (!loTsvPath.exists()) 287 | return false; 288 | } 289 | 290 | // Parse schema structure to order attributes 291 | SchemaData loSchema = new SchemaData(getSchema(getSchemaId(tsSchemaName, tsCorpusId, tsGroupId))); 292 | 293 | // Add the header 294 | lsFile.append("DocumentId\t"); 295 | for (String lsFeature : loSchema.FeatureList) 296 | lsFile.append(lsFeature + "\t"); 297 | lsFile.append((tbIncludeSourceText ? "source text" : "") + System.lineSeparator()); 298 | 299 | // List documents 300 | List loDocs = getDocuments(tsCorpusId); 301 | 302 | // and download them 303 | for (PacteDocument loDoc : loDocs) { 304 | String lsDoc = ""; 305 | 306 | if (tbIncludeSourceText) 307 | lsDoc = new JSONObject(poCfg.getRequest( 308 | poCfg.getPacteBackend() + "RACSProxy/corpora/" + tsCorpusId + "/documents/" + loDoc.getID(), 309 | USERTYPE.CustomUser, null)).getString("text"); 310 | 311 | // Get each docs/groups annotations 312 | String lsReturn = getAnnotations(tsCorpusId, loDoc.getID(), tsGroupId + ":" + tsSchemaName); 313 | 314 | if (!lsReturn.equals("{}")) { 315 | System.out.println(lsReturn); 316 | // Parse the features for each lines 317 | JSONArray loAnnots = (new JSONObject(lsReturn)).getJSONObject(tsCorpusId).getJSONObject(tsGroupId) 318 | .getJSONArray(tsSchemaName); 319 | 320 | for (int lniCpt = 0; lniCpt < loAnnots.length(); lniCpt++) { 321 | JSONObject loAnnot = loAnnots.getJSONObject(lniCpt); 322 | StringBuilder lsLine = new StringBuilder(); 323 | 324 | // Add all features 325 | lsLine.append(loAnnot.getString("_documentID") + "\t"); 326 | for (String lsFeature : loSchema.FeatureList) { 327 | if (loAnnot.has(lsFeature)) 328 | lsLine.append(loAnnot.getString(lsFeature)); 329 | lsLine.append("\t"); 330 | } 331 | 332 | // Add the source text 333 | if (tbIncludeSourceText) { 334 | // Get all offsets 335 | JSONArray loOffsets = loAnnot.getJSONArray("offsets"); 336 | for (int lniCptPos = 0; lniCptPos < loOffsets.length(); lniCptPos++) 337 | lsLine.append(lsDoc 338 | .substring(loOffsets.getJSONObject(lniCptPos).getInt("begin"), 339 | loOffsets.getJSONObject(lniCptPos).getInt("end")) 340 | .replace("\r\n", " ### ").replace("\t", " ### ")); 341 | lsLine.append("\t"); 342 | } 343 | 344 | lsFile.append(lsLine.toString() + System.lineSeparator()); 345 | } 346 | } 347 | 348 | } 349 | 350 | writeFile(lsFile.toString(), tsOutputFilename, tsOutputPath); 351 | 352 | return true; 353 | 354 | } 355 | 356 | /** 357 | * Get the definition of a corpus 358 | * 359 | * @param tsCorpusId 360 | * Unique identification of the targeted corpus 361 | * @return Json definition, null if not found. 362 | */ 363 | public String getCorpusMetadata(String tsCorpusId) { 364 | return poCfg.getRequest(poCfg.getPacteBackend() + "Corpora/corpus/" + tsCorpusId, USERTYPE.CustomUser, null); 365 | } 366 | 367 | /** 368 | * Return corpus unique identification from the name. In case there are 369 | * several corpora with the same name, the first is returned. 370 | * 371 | * @param tsNomCorpus 372 | * @return 373 | */ 374 | public String getCorpusId(String tsNomCorpus) { 375 | String lsIdCorpus = ""; 376 | String lsReturn = ""; 377 | 378 | lsReturn = poCfg.getRequest(poCfg.getPacteBackend() + "Corpora/corpora", USERTYPE.CustomUser, null); 379 | if (lsReturn != null && !lsReturn.isEmpty()) { 380 | int lniPos = lsReturn.toLowerCase().indexOf("\"title\":\"" + tsNomCorpus.toLowerCase() + "\""); 381 | if (lniPos >= 0) { 382 | lniPos = lsReturn.substring(0, lniPos).lastIndexOf("\"id\":\"") + 6; 383 | lsIdCorpus = lsReturn.substring(lniPos, lniPos + 36); 384 | System.out.println("Corpus " + tsNomCorpus + " (" + lsIdCorpus + ") a été trouvé!"); 385 | return lsIdCorpus; 386 | } 387 | } 388 | return null; 389 | } 390 | 391 | /** 392 | * Create a new corpus 393 | * 394 | * @param tsNomCorpus 395 | * Corpus name 396 | * @param tsLangage 397 | * List of comma separated values. ex: FR_fr, EN_en, ES_es 398 | * @return Corpus ID if created successfully, null if not created. 399 | */ 400 | public String createCorpus(String tsNomCorpus, String tsLangage) { 401 | String lsReturn = ""; 402 | String lsIdCorpus = null; 403 | JSONObject loResponse = null; 404 | 405 | lsReturn = poCfg.postRequest(poCfg.getPacteBackend() + "Corpora/corpus", "{\"title\": \"" + tsNomCorpus 406 | + "\",\"description\":\"" 407 | + "\",\"version\":\"\",\"source\":\"\", \"addAllPermissionsOnTranscoderBucketToOwner\":true, \"reference\":\"\",\"languages\":[\"" 408 | + tsLangage + "\"]}", USERTYPE.CustomUser); 409 | 410 | loResponse = new JSONObject(lsReturn); 411 | 412 | if (loResponse.has("id")) { 413 | lsIdCorpus = loResponse.getString("id"); 414 | if (poCfg.getVerbose()) 415 | System.out.println("Corpus " + tsNomCorpus + " (" + lsIdCorpus + ") a été créé!"); 416 | } else if (loResponse.has("message")) 417 | System.err.println("Cannot create corpus : " + loResponse.getString("message")); 418 | 419 | return lsIdCorpus; 420 | } 421 | 422 | /** 423 | * 424 | * @param tsCorpusId 425 | * @return 426 | */ 427 | public List getDocuments(String tsCorpusId) { 428 | String lsResponse = null; 429 | List loValues = new ArrayList(); 430 | List loDocs = new ArrayList(); 431 | Integer lniMaxDoc = Integer.MAX_VALUE; 432 | Integer lniCptPage = 0; 433 | 434 | if (tsCorpusId == null || tsCorpusId.trim().isEmpty()) 435 | return null; 436 | 437 | loValues.add(new BasicNameValuePair("entriesperpage", "2")); 438 | loValues.add(new BasicNameValuePair("page", lniCptPage.toString())); 439 | 440 | while (loDocs.size() < lniMaxDoc) { 441 | lsResponse = null; 442 | 443 | // Aller chercher la prochaine page 444 | loValues.remove(loValues.size() - 1); 445 | loValues.add(new BasicNameValuePair("page", (++lniCptPage).toString())); 446 | 447 | lsResponse = poCfg.getRequest(poCfg.getPacteBackend() + "Corpora/documentsCorpus/" + tsCorpusId, 448 | USERTYPE.CustomUser, loValues); 449 | // System.out.println(lsResponse); 450 | 451 | if (lsResponse == null || lsResponse.contains("documents\":[]")) 452 | return loDocs; 453 | lniMaxDoc = new JSONObject(lsResponse).getInt("documentCount"); 454 | 455 | JSONArray loJson = new JSONObject(lsResponse).getJSONArray("documents"); 456 | for (int lniCpt = 0; lniCpt < loJson.length(); lniCpt++) { 457 | JSONObject loDoc = (JSONObject) loJson.get(lniCpt); 458 | loDocs.add(new PacteDocument(loDoc.getString("id"), loDoc.getString("title"), null, null, 459 | loDoc.getString("language"), loDoc.getLong("docByteSize"), loDoc.getString("dateAdded"), 460 | loDoc.getString("path"))); 461 | } 462 | } 463 | 464 | return loDocs; 465 | } 466 | 467 | /** 468 | * Destroy a corpus and everything contained within (documents, groups, 469 | * annotations, etc). 470 | * 471 | * @param tsIdCorpus 472 | * @return 473 | */ 474 | public boolean deleteCorpus(String tsIdCorpus) { 475 | String lsReturn = ""; 476 | 477 | if (tsIdCorpus == null || tsIdCorpus.isEmpty()) 478 | return false; 479 | 480 | lsReturn = poCfg.deleteRequest(poCfg.getPacteBackend() + "Corpora/corpus/" + tsIdCorpus, USERTYPE.CustomUser, 481 | null); 482 | if (lsReturn != null && lsReturn == "") { 483 | return true; 484 | } 485 | return false; 486 | } 487 | 488 | public String createBucket(String tsIDCorpus, String tsNomBucket) { 489 | String lsReturn = ""; 490 | 491 | // Ajouter un groupe pertinent 492 | // String lsIdBucket1 = UUID.randomUUID().toString(); 493 | lsReturn = poCfg.postRequest(poCfg.getPacteBackend() + "Corpora/corpusBucket/" + tsIDCorpus, 494 | "{\"id\":\"\",\"name\":\"" + tsNomBucket + "\"}", USERTYPE.CustomUser); 495 | 496 | if (lsReturn != null && !lsReturn.isEmpty()) { 497 | JSONObject loJson = new JSONObject(lsReturn); 498 | return loJson.getString("bucketId"); 499 | } 500 | 501 | return null; 502 | } 503 | 504 | /** 505 | * 506 | * @param tsTagsetDefinition 507 | * @return 508 | */ 509 | public String createTagset(String tsTagsetDefinition) { 510 | String lsReturn = ""; 511 | String lsTagset = "{\"tagsetJsonContent\": \"" + tsTagsetDefinition.replaceAll("\"", "\\\\\"").replaceAll("\r", "").replaceAll("\n", "") + "\"}"; 512 | 513 | // Ajouter un groupe pertinent 514 | lsReturn = poCfg.postRequest(poCfg.getPacteBackend() + "Tagsets/tagset", lsTagset, 515 | USERTYPE.CustomUser); 516 | 517 | if (lsReturn != null && new JSONObject(lsReturn).has("id")) { 518 | JSONObject loJson = new JSONObject(lsReturn); 519 | return loJson.getString("id"); 520 | } 521 | 522 | return null; 523 | } 524 | 525 | /** 526 | * Get the JSON definition for a stored tagset. 527 | * 528 | * @param tsTagsetId 529 | * : ID of the required tagset 530 | * @return JSON definition 531 | */ 532 | public String getTagset(String tsTagsetId) { 533 | String lsTagset = null; 534 | 535 | // Aller chercher le schéma 536 | lsTagset = poCfg.getRequest(poCfg.getPacteBackend() + "Tagsets/tagset/" + tsTagsetId, USERTYPE.CustomUser, 537 | null); 538 | 539 | if (lsTagset == null || lsTagset.isEmpty()) 540 | return null; 541 | else 542 | return lsTagset; 543 | } 544 | 545 | /** 546 | * Get schema id from name, filtered by corpus and group 547 | * 548 | * @param tsSchemaName 549 | * @param tsCorpusId 550 | * @param tsBucketId 551 | * @return 552 | */ 553 | public String getTagsetId(String tsTagsetName) { 554 | String lsTagsetList = null; 555 | JSONArray loTagsets = null; 556 | 557 | // Aller chercher tous les schémas 558 | lsTagsetList = poCfg.getRequest(poCfg.getPacteBackend() + "Tagsets/tagsets", USERTYPE.CustomUser, null); 559 | loTagsets = new JSONArray(lsTagsetList); 560 | 561 | for (int lniCpt = 0; lniCpt < loTagsets.length(); lniCpt++) { 562 | JSONObject loObj = loTagsets.getJSONObject(lniCpt); 563 | 564 | if (new JSONObject(loObj.getString("tagsetJsonContent")).getString("title").equalsIgnoreCase(tsTagsetName)) 565 | return loObj.getString("id"); 566 | } 567 | 568 | return null; 569 | } 570 | 571 | /** 572 | * Destroy a corpus and everything contained within (documents, groups, 573 | * annotations, etc). 574 | * 575 | * @param tsIdTagset 576 | * @return 577 | */ 578 | public boolean deleteTagset(String tsIdTagset) { 579 | String lsReturn = ""; 580 | 581 | if (tsIdTagset == null || tsIdTagset.isEmpty()) 582 | return false; 583 | 584 | lsReturn = poCfg.deleteRequest(poCfg.getPacteBackend() + "Tagsets/tagset/" + tsIdTagset, USERTYPE.CustomUser, 585 | null); 586 | if (lsReturn != null && lsReturn == "") { 587 | return true; 588 | } 589 | return false; 590 | } 591 | 592 | public String registerSchema(String tsSchema) { 593 | String lsReturn = ""; 594 | 595 | lsReturn = poCfg.postRequest(poCfg.getPacteBackend() + "Schemas/schema", 596 | "{\"schemaJsonContent\": \"" + tsSchema.replace("\"", "\\\"") + "\"}", USERTYPE.CustomUser); 597 | 598 | if (lsReturn.contains("{\"id\":\"")) { 599 | JSONObject loJson = new JSONObject(lsReturn); 600 | 601 | return loJson.getString("id"); 602 | } else 603 | return null; 604 | } 605 | 606 | public boolean deleteSchema(String tsIdSchema) { 607 | 608 | poCfg.deleteRequest(poCfg.getPacteBackend() + "Schemas/schema/" + tsIdSchema, USERTYPE.CustomUser, null); 609 | 610 | return true; 611 | } 612 | 613 | /** 614 | * 615 | * @param tsSchemaId 616 | * @return 617 | */ 618 | public String getSchema(String tsSchemaId) { 619 | String lsSchema = null; 620 | 621 | // Aller chercher le schéma 622 | lsSchema = poCfg.getRequest(poCfg.getPacteBackend() + "Schemas/schema/" + tsSchemaId, USERTYPE.CustomUser, 623 | null); 624 | 625 | if (lsSchema == null || lsSchema.isEmpty()) 626 | return null; 627 | else 628 | return lsSchema; 629 | } 630 | 631 | /** 632 | * Get schema id from name, filtered by corpus and group 633 | * 634 | * @param tsSchemaName 635 | * @param tsCorpusId 636 | * @param tsBucketId 637 | * @return 638 | */ 639 | public String getSchemaId(String tsSchemaName, String tsCorpusId, String tsBucketId) { 640 | String lsSchemaList = null; 641 | JSONArray loSchemas = null; 642 | String lsSchemaId = null; 643 | 644 | // Aller chercher tous les schémas 645 | lsSchemaList = poCfg.getRequest(poCfg.getPacteBackend() + "Schemas/schemas", USERTYPE.CustomUser, null); 646 | loSchemas = new JSONArray(lsSchemaList); 647 | 648 | for (int lniCpt = 0; lniCpt < loSchemas.length(); lniCpt++) { 649 | JSONObject loObj = loSchemas.getJSONObject(lniCpt); 650 | 651 | if (((String) ((JSONObject) loObj.get("schema")).get("schemaType")).equalsIgnoreCase(tsSchemaName)) { 652 | lsSchemaId = ((String) ((JSONObject) loObj.get("schema")).get("id")); 653 | JSONArray loaCorpus = loObj.getJSONArray("relatedCorpusBuckets"); 654 | 655 | // Schema pas dans un groupe 656 | if ((tsBucketId == null || tsBucketId == "") && (tsCorpusId == null || tsCorpusId.isEmpty()) 657 | && loaCorpus.length() == 0) 658 | return lsSchemaId; 659 | 660 | else if (((tsBucketId != null && !tsBucketId.isEmpty()) 661 | || (tsCorpusId != null || !tsCorpusId.isEmpty())) && loaCorpus.length() > 0) { 662 | // Vérifier que la bucket en bien enregistrée 663 | String lsCorp = ((JSONObject) loaCorpus.get(0)).getString("corpusId"); 664 | String lsBuck = ((JSONObject) loaCorpus.get(0)).getString("bucketId"); 665 | 666 | if (lsCorp.isEmpty() ? true 667 | : lsCorp.equals(tsCorpusId) && lsBuck.isEmpty() ? true : lsBuck.equals(tsBucketId)) 668 | return lsSchemaId; 669 | } 670 | } 671 | } 672 | 673 | return null; 674 | } 675 | 676 | /** 677 | * Enregistrer un schéma dans un groupe d'annotation à partir d'un schéma 678 | * existant. 679 | * 680 | * @param tsIdSchema 681 | * @param tsIdCorpus 682 | * @param tsIdBucket 683 | * @return 684 | */ 685 | public boolean copySchemaToGroup(String tsIdSchema, String tsIdCorpus, String tsIdBucket) { 686 | 687 | poCfg.putRequest(poCfg.getPacteBackend() + "Schemas/schemaToCorpusBucket/" + tsIdSchema, 688 | "{\"corpusId\": \"" + tsIdCorpus + "\", \"bucketId\": \"" + tsIdBucket + "\"}", USERTYPE.CustomUser); 689 | 690 | return true; 691 | } 692 | 693 | /** 694 | * Ajouter un nouveau document 695 | * 696 | * @param tsCorpusId 697 | * @param tsContent 698 | * @param tsToken 699 | * @param tsLangage 700 | * @return 701 | */ 702 | public String addDocument(String tsCorpusId, String tsContent, String tsTitle, String tsSource, String tsLangage) { 703 | String lsReturn = ""; 704 | String lsIdDoc = null; 705 | 706 | lsReturn = poCfg.postRequest(poCfg.getPacteBackend() + "RACSProxy/corpora/" + tsCorpusId + "/documents", 707 | "{\"title\": \"" + tsTitle + "\",\"source\": \"" + tsSource + "\",\"text\": \"" 708 | + tsContent.replace("\"", "\\\"") + "\",\"language\": \"" + tsLangage + "\"}", 709 | USERTYPE.CustomUser); 710 | 711 | if (lsReturn != null && !lsReturn.isEmpty()) 712 | lsIdDoc = new JSONObject(lsReturn).getString("id"); 713 | else 714 | System.err.println(lsReturn); 715 | 716 | return lsIdDoc; 717 | } 718 | 719 | public PacteDocument getDocument(String tsCorpusID, String tsDocumentID) { 720 | String lsContent = null; 721 | String lsTitle = null; 722 | String lsSource = null; 723 | String lsLanguages = null; 724 | String lsReturn = ""; 725 | 726 | lsReturn = poCfg.getRequest( 727 | poCfg.getPacteBackend() + "RACSProxy/corpora/" + tsCorpusID + "/documents/" + tsDocumentID, 728 | USERTYPE.CustomUser, null); 729 | 730 | if (lsReturn != null && !lsReturn.isEmpty()) { 731 | lsContent = new JSONObject(lsReturn).getString("text"); 732 | lsTitle = new JSONObject(lsReturn).getString("title"); 733 | lsSource = new JSONObject(lsReturn).getString("source"); 734 | lsLanguages = new JSONObject(lsReturn).getString("language"); 735 | 736 | return new PacteDocument(tsDocumentID, lsTitle, lsContent, lsSource, lsLanguages, null, null, null); 737 | } 738 | 739 | return null; 740 | } 741 | 742 | /** 743 | * Get the number of documents in the corpus 744 | * 745 | * @param tsCorpusId 746 | * @return 747 | */ 748 | public Integer getSize(String tsCorpusId) { 749 | String lsResponse = null; 750 | lsResponse = poCfg.getRequest(poCfg.getPacteBackend() + "Corpora/corpus/" + tsCorpusId, USERTYPE.CustomUser, 751 | null); 752 | 753 | if (lsResponse != null && !lsResponse.isEmpty()) { 754 | JSONObject loJson = new JSONObject(lsResponse); 755 | if (loJson.has("documentCount")) 756 | return loJson.getInt("documentCount"); 757 | else 758 | System.err.println("No document count returned : " + lsResponse); 759 | } 760 | 761 | return null; 762 | } 763 | 764 | /** 765 | * Add a new annotation to a group. 766 | * 767 | * @param tsCorpusId 768 | * @param tsGroupId 769 | * @param tsAnnotation 770 | * @return 771 | */ 772 | public String addAnnotation(String tsCorpusId, String tsGroupId, String tsAnnotation) { 773 | String lsAnnotId = null; 774 | String lsReturn = ""; 775 | lsReturn = poCfg.postRequest( 776 | poCfg.getPacteBackend() + "RACSProxy/corpora/" + tsCorpusId + "/buckets/" + tsGroupId + "/annotations", 777 | tsAnnotation, USERTYPE.CustomUser); 778 | 779 | if (lsReturn != null && !lsReturn.isEmpty() && !lsReturn.contains("Not Found:") 780 | && !lsReturn.contains("illegal")) 781 | lsAnnotId = new JSONObject(lsReturn).getString("id"); 782 | else 783 | System.err.println(lsReturn); 784 | 785 | return lsAnnotId; 786 | } 787 | 788 | /** 789 | * Add new contact to the configured custom user 790 | * 791 | * @param tsBucketName 792 | * @param tsToken 793 | * @return 794 | */ 795 | public String getGroupId(String tsBucketName, String tsCorpusId) { 796 | String lsReturn = ""; 797 | List lasParam = new ArrayList(); 798 | 799 | lasParam.add(new BasicNameValuePair("includeSchemaJson", "false")); 800 | lsReturn = poCfg.getRequest(poCfg.getPacteBackend() + "RACSProxy/corpora/" + tsCorpusId + "/structure", 801 | USERTYPE.CustomUser, lasParam); 802 | 803 | if (lsReturn != null && !lsReturn.isEmpty()) { 804 | JSONArray loRet = new JSONObject(lsReturn).getJSONArray("buckets"); 805 | for (int lniCpt = 0; lniCpt < loRet.length(); lniCpt++) { 806 | if (loRet.getJSONObject(lniCpt).getString("name").equals(tsBucketName)) 807 | return loRet.getJSONObject(lniCpt).getString("id"); 808 | } 809 | } 810 | return null; 811 | } 812 | 813 | /** 814 | * Return the id and name of each annotation group of a corpus 815 | * @param tsCorpusId 816 | * @return 817 | */ 818 | public Map getGroups(String tsCorpusId) { 819 | String lsReturn = null; 820 | Map loGroups = new HashMap(); 821 | 822 | // Get structure 823 | lsReturn = poCfg.getRequest(poCfg.getPacteBackend() + "RACSProxy/corpora/" + tsCorpusId + "/structure", 824 | USERTYPE.CustomUser, null); 825 | 826 | // parse json 827 | JSONArray loGrps = new JSONObject(lsReturn).getJSONArray("buckets"); 828 | for (int lniCpt = 0; lniCpt < loGrps.length(); lniCpt++) { 829 | JSONObject loObj = loGrps.getJSONObject(lniCpt); 830 | loGroups.put(loObj.getString("id"), loObj.getString("name")); 831 | } 832 | 833 | return loGroups; 834 | } 835 | 836 | public String getAnnotations(String tsCorpusId, String tsDocId, String tsSchemaTypes) { 837 | String lsReturn = ""; 838 | List lasParam = new ArrayList(); 839 | 840 | lasParam.add(new BasicNameValuePair("schemaTypes", tsSchemaTypes)); 841 | 842 | lsReturn = poCfg.getRequest( 843 | poCfg.getPacteBackend() + "RACSProxy/annosearch/corpora/" + tsCorpusId + "/documents/" + tsDocId, 844 | USERTYPE.CustomUser, lasParam); 845 | 846 | if (lsReturn != null && !lsReturn.isEmpty()) { 847 | return lsReturn; 848 | } 849 | return null; 850 | } 851 | 852 | public boolean copyAnnotationGroup(String tsCorpusId, String tsGroupFromId, String tsGroupToId) { 853 | return false; 854 | } 855 | 856 | private boolean writeFile(String tsContent, String tsFileName, String tsPath) { 857 | try { 858 | Files.write((new File(tsPath, tsFileName)).toPath(), Arrays.asList(tsContent.split("\r\n")), 859 | Charset.forName("UTF-8")); 860 | 861 | } catch (IOException e) { 862 | e.printStackTrace(); 863 | return false; 864 | } 865 | 866 | return true; 867 | } 868 | 869 | private String readFile(String path) { 870 | byte[] encoded; 871 | try { 872 | encoded = Files.readAllBytes(Paths.get(path)); 873 | } catch (IOException e) { 874 | return null; 875 | } 876 | return new String(encoded, Charset.forName("UTF-8")); 877 | } 878 | } 879 | --------------------------------------------------------------------------------