├── .gitignore
├── LICENSE
├── README.md
├── src
    ├── .gitignore
    ├── TranskribusCommands
    │   ├── TranskribusDU_transcriptUploader.py
    │   ├── Transkribus_downloader.py
    │   ├── Transkribus_uploader.py
    │   ├── __init__.py
    │   ├── do_addDocToCollec.py
    │   ├── do_analyzeLayout.py
    │   ├── do_analyzeLayoutBatch.py
    │   ├── do_createCollec.py
    │   ├── do_deleteCollec.py
    │   ├── do_deleteJob.py
    │   ├── do_duplicateDoc.py
    │   ├── do_export.py
    │   ├── do_getDocTrp.py
    │   ├── do_getJobStatus.py
    │   ├── do_getJobs.py
    │   ├── do_getRnnTrainingJobStatus.py
    │   ├── do_htrHmm.py
    │   ├── do_htrRnn.py
    │   ├── do_htrRnnPerRegion.py
    │   ├── do_htrTrainRnn.py
    │   ├── do_listCollec.py
    │   ├── do_listHtrHmm.py
    │   ├── do_listHtrRnn.py
    │   ├── do_listPageLocks.py
    │   ├── do_login.py
    │   ├── do_logout.py
    │   ├── do_tableTemplate.py
    │   ├── do_transcript.py
    │   └── do_uploadDictionary.py
    ├── TranskribusDU
    │   └── xml_formats
    │   │   ├── DS2PageXml.py
    │   │   ├── Page2DS.py
    │   │   ├── PageXml.py
    │   │   ├── PageXmlExtractor.py
    │   │   ├── __init__.py
    │   │   ├── mpxml2pxml.py
    │   │   ├── multipagecontent.xsd
    │   │   ├── pagecontent.xsd
    │   │   └── tests
    │   │       ├── testDS2PageXml
    │   │           ├── .gitignore
    │   │           └── RRB_MM_01_033_Jahr_1810.ds.xml
    │   │       ├── test_DS2PageXml.py
    │   │       └── test_PageXml.py
    ├── TranskribusPyClient
    │   ├── TRP_FullDoc.py
    │   ├── __init__.py
    │   ├── application.wadl
    │   ├── client.html
    │   ├── client.py
    │   ├── common
    │   │   ├── DateTimeRange.py
    │   │   ├── IntegerRange.py
    │   │   ├── IntegerRangeHalfBounded.py
    │   │   ├── __init__.py
    │   │   └── trace.py
    │   └── test
    │   │   ├── __init__.py
    │   │   ├── test_collections_addDocToCollection.py
    │   │   ├── test_collections_copyDocToCollection.py
    │   │   ├── test_collections_fulldoc.py
    │   │   ├── test_collections_fulldoc_xml.py
    │   │   ├── test_collections_list.py
    │   │   ├── test_collections_listEditDeclFeatures.py
    │   │   └── test_collections_postPageTranscript.py
    ├── TranskribusPyClient_version.py
    └── Transkribus_credential.py
└── tests
    ├── .gitignore
    └── test_commands.sh


/.gitignore:
--------------------------------------------------------------------------------
  1 | #################
  2 | ## READ
  3 | #################
  4 | .trnskrbs
  5 | 
  6 | #################
  7 | ## Eclipse
  8 | #################
  9 | .cache
 10 | *.pydevproject
 11 | .project
 12 | .metadata
 13 | bin/
 14 | tmp/
 15 | *.tmp
 16 | *.bak
 17 | *.swp
 18 | *~.nib
 19 | local.properties
 20 | .classpath
 21 | .settings/
 22 | .loadpath
 23 | 
 24 | # External tool builders
 25 | .externalToolBuilders/
 26 | 
 27 | # Locally stored "Eclipse launch configurations"
 28 | *.launch
 29 | 
 30 | # CDT-specific
 31 | .cproject
 32 | 
 33 | # PDT-specific
 34 | .buildpath
 35 | 
 36 | 
 37 | #################
 38 | ## Visual Studio
 39 | #################
 40 | 
 41 | ## Ignore Visual Studio temporary files, build results, and
 42 | ## files generated by popular Visual Studio add-ons.
 43 | 
 44 | # User-specific files
 45 | *.suo
 46 | *.user
 47 | *.sln.docstates
 48 | 
 49 | # Build results
 50 | 
 51 | [Dd]ebug/
 52 | [Rr]elease/
 53 | x64/
 54 | build/
 55 | [Bb]in/
 56 | [Oo]bj/
 57 | 
 58 | # MSTest test Results
 59 | [Tt]est[Rr]esult*/
 60 | [Bb]uild[Ll]og.*
 61 | 
 62 | *_i.c
 63 | *_p.c
 64 | *.ilk
 65 | *.meta
 66 | *.obj
 67 | *.pch
 68 | *.pdb
 69 | *.pgc
 70 | *.pgd
 71 | *.rsp
 72 | *.sbr
 73 | *.tlb
 74 | *.tli
 75 | *.tlh
 76 | *.tmp
 77 | *.tmp_proj
 78 | *.log
 79 | *.vspscc
 80 | *.vssscc
 81 | .builds
 82 | *.pidb
 83 | *.log
 84 | *.scc
 85 | 
 86 | # Visual C++ cache files
 87 | ipch/
 88 | *.aps
 89 | *.ncb
 90 | *.opensdf
 91 | *.sdf
 92 | *.cachefile
 93 | 
 94 | # Visual Studio profiler
 95 | *.psess
 96 | *.vsp
 97 | *.vspx
 98 | 
 99 | # Guidance Automation Toolkit
100 | *.gpState
101 | 
102 | # ReSharper is a .NET coding add-in
103 | _ReSharper*/
104 | *.[Rr]e[Ss]harper
105 | 
106 | # TeamCity is a build add-in
107 | _TeamCity*
108 | 
109 | # DotCover is a Code Coverage Tool
110 | *.dotCover
111 | 
112 | # NCrunch
113 | *.ncrunch*
114 | .*crunch*.local.xml
115 | 
116 | # Installshield output folder
117 | [Ee]xpress/
118 | 
119 | # DocProject is a documentation generator add-in
120 | DocProject/buildhelp/
121 | DocProject/Help/*.HxT
122 | DocProject/Help/*.HxC
123 | DocProject/Help/*.hhc
124 | DocProject/Help/*.hhk
125 | DocProject/Help/*.hhp
126 | DocProject/Help/Html2
127 | DocProject/Help/html
128 | 
129 | # Click-Once directory
130 | publish/
131 | 
132 | # Publish Web Output
133 | *.Publish.xml
134 | *.pubxml
135 | *.publishproj
136 | 
137 | # NuGet Packages Directory
138 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line
139 | #packages/
140 | 
141 | # Windows Azure Build Output
142 | csx
143 | *.build.csdef
144 | 
145 | # Windows Store app package directory
146 | AppPackages/
147 | 
148 | # Others
149 | sql/
150 | *.Cache
151 | ClientBin/
152 | [Ss]tyle[Cc]op.*
153 | ~$*
154 | *~
155 | *.dbmdl
156 | *.[Pp]ublish.xml
157 | *.pfx
158 | *.publishsettings
159 | 
160 | # RIA/Silverlight projects
161 | Generated_Code/
162 | 
163 | # Backup & report files from converting an old project file to a newer
164 | # Visual Studio version. Backup files are not needed, because we have git ;-)
165 | _UpgradeReport_Files/
166 | Backup*/
167 | UpgradeLog*.XML
168 | UpgradeLog*.htm
169 | 
170 | # SQL Server files
171 | App_Data/*.mdf
172 | App_Data/*.ldf
173 | 
174 | #############
175 | ## Windows detritus
176 | #############
177 | 
178 | # Windows image file caches
179 | Thumbs.db
180 | ehthumbs.db
181 | 
182 | # Folder config file
183 | Desktop.ini
184 | 
185 | # Recycle Bin used on file shares
186 | $RECYCLE.BIN/
187 | 
188 | # Mac crap
189 | .DS_Store
190 | 
191 | 
192 | #############
193 | ## Python
194 | #############
195 | 
196 | *.py[cod]
197 | 
198 | # Packages
199 | *.egg
200 | *.egg-info
201 | dist/
202 | build/
203 | eggs/
204 | parts/
205 | var/
206 | sdist/
207 | develop-eggs/
208 | .installed.cfg
209 | 
210 | # Installer logs
211 | pip-log.txt
212 | 
213 | # Unit test / coverage reports
214 | .coverage
215 | .tox
216 | 
217 | #Translations
218 | *.mo
219 | 
220 | #Mr Developer
221 | .mr.developer.cfg
222 | src/Transkribus_credential.py
223 | *.keep
224 | src/Transkribus_credential.py
225 | /trnskrbs_3820/
226 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                    GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TranskribusPyClient
 2 | 
 3 | A Pythonic API and some command line tools to access the Transkribus server via its REST API
 4 | 
 5 | ### Requirements, installation & testing
 6 | 
 7 | #### Python
 8 | 
 9 | * Install the latest release of [Python] 2.7.x, 3.5.x or 3.6.x
10 | 
11 | ### Additional Libraries
12 | 
13 | * python-dateutil
14 | 
15 | ### Wiki documentation [https://github.com/Transkribus/TranskribusPyClient/wiki]
16 | 
17 | ### Commands ###
18 | 
19 | * do_addDocToCollec.py
20 | * do_createCollec.py
21 | * do_deleteCollec.py
22 | * do_deleteJob.py
23 | * do_duplicateDoc.py
24 | * do_getJobStatus.py
25 | * do_listCollec.py
26 | * do_listPageLocks.py
27 | * do_Transcript.py
28 | 
29 | * do_analyzeLayout.py
30 | * do_tableTemplate.py
31 | * do_htrHmm.py
32 | * do_htrRnn.py
33 | * do_listHtrHmm.py
34 | * do_listHtrRnn.py
35 | 
36 | * do_login.py
37 | * do_logout.py
38 | 
39 | * Transkribus_downloader.py
40 | * TranskribusDU_transcriptUploader.py
41 | 
42 | **Help on module client:**
43 | 
44 | See in [TranskribusPyClient/client.html](http://htmlpreview.github.com/?https://github.com/Transkribus/TranskribusPyClient/blob/master/src/TranskribusPyClient/client.html
45 | )
46 | 
47 | 
48 | [Python]: <https://www.python.org>
49 | [Pip]: <https://pip.pypa.io/en/stable/installing/>
50 | [LIBXML2]: <http://www.lfd.uci.edu/~gohlke/pythonlibs/#libxml-python>
51 | [TranskribusDU]: <https://github.com/Transkribus/TranskribusDU>
52 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | /Transkribus_credential.py
2 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/Transkribus_downloader.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 |     Utility to extract collection or documents from Transkribus and create DS test structures
  6 |     
  7 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
  8 | 
  9 |     This program is free software: you can redistribute it and/or modify
 10 |     it under the terms of the GNU General Public License as published by
 11 |     the Free Software Foundation, either version 3 of the License, or
 12 |     (at your option) any later version.
 13 | 
 14 |     This program is distributed in the hope that it will be useful,
 15 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 |     GNU General Public License for more details.
 18 | 
 19 |     You should have received a copy of the GNU General Public License
 20 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 21 |     
 22 |     
 23 |     Developed  for the EU project READ. The READ project has received funding 
 24 |     from the European Union�s Horizon 2020 research and innovation programme 
 25 |     under grant agreement No 674943.
 26 | 
 27 | Created on 15 Nov 2016
 28 | 
 29 | @author: meunier    
 30 | """
 31 | 
 32 | from __future__ import absolute_import
 33 | from __future__ import  print_function
 34 | from __future__ import unicode_literals
 35 | 
 36 | DEBUG = 0
 37 | 
 38 | 
 39 | import sys, os, logging
 40 | 
 41 | from optparse import OptionParser
 42 | import json
 43 | from io import open
 44 | 
 45 | 
 46 | try: #to ease the use without proper Python installation
 47 |     import TranskribusPyClient_version
 48 | except ImportError:
 49 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 50 |     import TranskribusPyClient_version
 51 | 
 52 | from TranskribusPyClient.common.trace import traceln, trace
 53 | 
 54 | from TranskribusCommands import sCOL, sMPXMLExtension, _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 55 | from TranskribusPyClient.client import TranskribusClient
 56 | from TranskribusDU.xml_formats import  PageXml
 57 |     
 58 |     
 59 | 
 60 | 
 61 | class TranskribusDownloader(TranskribusClient):
 62 |     """
 63 |     Download a Transkribus collection as a DS structured dataset
 64 |     """
 65 |     sDefaultServerUrl = _Trnskrbs_default_url
 66 |     
 67 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 68 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 69 |         TranskribusClient.__init__(self, sServerUrl=trnkbsServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 70 |         
 71 |     def createStandardFolders(self, colId, destDir):
 72 |         """
 73 |         CReate the standard DU folde structure and return the collection folder
 74 |         """
 75 |         if not( os.path.exists(destDir) and os.path.isdir(destDir) ):
 76 |             raise ValueError("Non-existing destination folder %s" % destDir)
 77 |         
 78 |         colDir = os.path.join(destDir, "trnskrbs_%s"%colId)
 79 |             
 80 |         #Creating folder structure
 81 |         if os.path.exists(colDir): 
 82 |             if not os.path.isdir(colDir): raise ValueError("%s exists and is not a folder."%colDir)
 83 |         else:
 84 |             traceln('- creating folder: %s'%colDir)
 85 |             os.mkdir(colDir)
 86 | 
 87 |         for sSubDir in [sCOL, "xml", "ref", "run", "out"]:
 88 |             sDir = os.path.join(colDir, sSubDir)
 89 |             if os.path.exists(sDir):
 90 |                 if not os.path.isdir(sDir): raise ValueError("%s exists and is not a folder."%sDir)
 91 |             else:
 92 |                 os.mkdir(sDir)
 93 |         
 94 |         return colDir
 95 |     
 96 |     def downloadCollection(self, colId, destDir, bForce=False, bNoImage=False,sDocId=None):
 97 |         """
 98 |         Here, we create the appropriate structure and fetch either the whole collection or one document and convert this to DS XML
 99 | 
100 |         if bForce==True, data on disk is overwritten, otherwise raise an exception is some data is there already
101 |         if bNoImage==True, do not download the images
102 |         """
103 |         colDir = self.createStandardFolders(colId, destDir)
104 | 
105 |         col_max_ts,ldocids, dFileListPerDoc = self.download_collection(colId, os.path.join(colDir,sCOL), bForce, bNoImage,sDocId)
106 |         with open(destDir+os.sep+sCOL+TranskribusClient._POSTFIX_MAX_TX, "w") as fd: fd.write("%s"%col_max_ts) #"col_max.ts" file
107 | 
108 |         return col_max_ts, colDir, ldocids, dFileListPerDoc
109 |     
110 |     def download_document_by_trp(self, colId, docId, destDir, trp_spec, bOverwrite=False, bNoImage=False):       
111 |         """
112 |         we have a trp, and download what is specified in it
113 |         """ 
114 |         colDir = self.createStandardFolders(colId, destDir)
115 |         
116 |         docFolder = os.path.join(colDir, sCOL, str(docId))
117 |         
118 |         doc_max_ts, lFileList = self.download_document(colId, docId, docFolder
119 |                                                        , bForce=False, bOverwrite=bOverwrite, bNoImage=bNoImage
120 |                                                        , trp_spec=trp_spec)        
121 |         return doc_max_ts, docFolder, lFileList
122 |         
123 |     def generateCollectionMultiPageXml(self, colDir, dFileListPerDoc, bStrict):
124 |         """
125 |         We concatenate all pages into a "multi-page PageXml" for each document of the collection
126 |         return the list of XML filenames
127 |         """
128 |         lsXmlFilename = list()
129 |         traceln("- Generating multi_page PageXml")
130 | #         lsDocMaxTSFilename = sorted(glob.iglob(os.path.join(colDir, "*%s"%TranskribusClient._POSTFIX_MAX_TX)), reverse=True)  # *_max.ts files
131 |         for docId in dFileListPerDoc.keys():
132 |             if dFileListPerDoc[docId] is not None:
133 |                 lFiles= list(map(lambda x:os.path.join(colDir,docId,x+".pxml"),dFileListPerDoc[docId] ))
134 |                 docDir = os.path.join(colDir,docId)
135 |                 traceln("\t- %s"%docDir)
136 |                 
137 |                 doc = self.makeMultiPageXml(lFiles)
138 |     
139 |                 sXmlFilename = docDir+sMPXMLExtension
140 |                 self.writeDom(doc, sXmlFilename, True)
141 |                 lsXmlFilename.append(sXmlFilename)
142 |     
143 |                 trace("\t\t- validating the MultiPageXml ...")
144 |                 if not PageXml.MultiPageXml.validate(doc): 
145 |                     if bStrict:
146 |                         raise ValueError("Invalid XML generated in '%s'"%sXmlFilename)
147 |                     else:
148 |                         traceln("   *** WARNING: XML file is invalid against the schema: '%s'"%sXmlFilename)
149 |                 traceln(" Ok!")
150 |                     
151 |                 if DEBUG>1:
152 |                     PageXml.MultiPageXml.splitMultiPageXml(doc, docDir, "debug_%d.xml", bIndent=True)
153 |                 
154 | #                 doc.freeDoc()
155 |                 traceln('\t- %s'%sXmlFilename)
156 | 
157 |         
158 |         return lsXmlFilename
159 |             
160 |     def makeMultiPageXml(self, slFilenames):
161 |         """
162 |         We concatenate all pages into a "multi-page PageXml"
163 |         return a DOM
164 |         """
165 |         doc = PageXml.MultiPageXml.makeMultiPageXml(slFilenames)
166 |         
167 |         return doc
168 |                 
169 |     def writeDom(self, doc, filename, bIndent=False):
170 |         doc.write(filename,xml_declaration=True,encoding='utf-8',pretty_print=True)
171 | #         doc.saveFormatFileEnc(filename, "UTF-8", bIndent)
172 |         
173 | #         if self.bZLib:
174 | #             #traceln("ZLIB WRITE")
175 | #             try:
176 | #                 FIX_docSetCompressMode(doc, self.iZLibRatio)
177 | #             except Exception, e:
178 | #                 traceln("WARNING: ZLib error in Component.py: cannot set the libxml2 in compression mode. Was libxml2 compiled with zlib? :", e)
179 | #         if bIndent:
180 | #             doc.saveFormatFileEnc(self.getOutputFileName(), "UTF-8",bIndent)
181 | #         else: 
182 | #             #JLM - April 2009 - dump does not support the compressiondoc.dump(self.getOutputFile())
183 | #             doc.saveFileEnc(self.getOutputFileName(),"UTF-8")
184 |      
185 | if __name__ == '__main__':
186 |     usage = "%s [-f|--force] [--strict] [--docid <id>] [--trp <trp_file>] [--noImage] <colid> [<directory>]"%sys.argv[0]
187 |     version = "v.03"
188 |     description = "Extract a collection from transkribus and create a DS test structure containing that collection. \n" + _Trnskrbs_description
189 | 
190 |     #prepare for the parsing of the command line
191 |     parser = OptionParser(usage=usage, version=version)
192 |     parser.description = description
193 |     
194 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
195 |     __Trnskrbs_basic_options(parser, TranskribusDownloader.sDefaultServerUrl)
196 |         
197 |     parser.add_option("-f", "--force"   , dest='bForce' ,  action="store_true", default=False, help="Force rewrite if disk data is obsolete, or force overwrite in --trp mode")    
198 |     parser.add_option("--strict"        , dest='bStrict',  action="store_true", default=False, help="Failed schema validation stops the processus.")    
199 |     parser.add_option("--noimage", "--noImage", dest='bNoImage', action="store_true", default=False, help="Do not download images.")    
200 |     parser.add_option("--docid",  dest='docid', action="store", type="int", help="download specific document")    
201 |     parser.add_option("--trp"  ,  dest='trp'  , action="store", type="string", help="download the content specified by the trp file.")    
202 | 
203 |     # --- 
204 |     #parse the command line
205 |     (options, args) = parser.parse_args()
206 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
207 | 
208 |     # --- 
209 |     
210 |     try:
211 |         colid = args.pop(0)
212 |     except:
213 |         _exit(usage, 1)
214 | 
215 |     if args:
216 |         destDir = args[0]
217 |     else:
218 |         destDir = "."
219 | 
220 |     # --- 
221 |     trnkbs2ds = TranskribusDownloader(options.server, proxies, loggingLevel=logging.WARN)
222 |     __Trnskrbs_do_login_stuff(trnkbs2ds, options, trace=trace, traceln=traceln)
223 |     
224 |     if options.trp:
225 |         traceln("- Loading trp data from %s" % options.trp)
226 | #         trp = json.load(open(options.trp, "rb",encoding='utf-8'))
227 |         trp = json.load(open(options.trp, "rt",encoding='utf-8'))
228 | 
229 |         traceln("- Downloading collection %s to folder %s, as specified by trp data"%(colid, os.path.abspath(destDir)))
230 |         if not options.docid:
231 |             options.docid = trp["md"]["docId"]
232 |             traceln(" read docId from TRP: docId = %s"%options.docid) 
233 |         logging.basicConfig(level=logging.INFO)
234 |         col_ts, docFolder, lFileList = trnkbs2ds.download_document_by_trp(colid, options.docid, destDir, trp, bOverwrite=options.bForce, bNoImage=options.bNoImage)
235 |         traceln(list(map(lambda x: x.encode('utf-8'), lFileList)))
236 |         colFolder = docFolder #inaccurate, but fine for rest of code 
237 |     else:
238 |         traceln("- Downloading collection %s to folder %s"%(colid, os.path.abspath(destDir)))
239 |         col_ts, colFolder, ldocids, dFileListPerDoc = trnkbs2ds.downloadCollection(colid, destDir, bForce=options.bForce, bNoImage=options.bNoImage,sDocId=options.docid)
240 |         trnkbs2ds.generateCollectionMultiPageXml(os.path.join(colFolder, sCOL), dFileListPerDoc,options.bStrict)
241 |     traceln("- Done")
242 |     
243 |     with open(os.path.join(colFolder, "config.txt"), "w") as fd: 
244 |         fd.write("server=%s\nforce=%s\nstrict=%s\ntrp=%s\n"%(options.server, options.bForce, options.bStrict, options.trp))
245 |     
246 |     
247 |     traceln('- Done, see in %s'%colFolder)
248 |     
249 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/Transkribus_uploader.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 |     Utility to upload to Transkribus from a DS test structure
  6 |     
  7 |     Copyright Naver Labs Europe(C) 2017  JL. Meunier
  8 | 
  9 |     This program is free software: you can redistribute it and/or modify
 10 |     it under the terms of the GNU General Public License as published by
 11 |     the Free Software Foundation, either version 3 of the License, or
 12 |     (at your option) any later version.
 13 | 
 14 |     This program is distributed in the hope that it will be useful,
 15 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 |     GNU General Public License for more details.
 18 | 
 19 |     You should have received a copy of the GNU General Public License
 20 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 21 |     
 22 |     
 23 |     Developed  for the EU project READ. The READ project has received funding 
 24 |     from the European Union�s Horizon 2020 research and innovation programme 
 25 |     under grant agreement No 674943.
 26 | 
 27 | Created on 11 October 2017
 28 | 
 29 | @author: meunier    
 30 | """
 31 | 
 32 | from __future__ import absolute_import
 33 | from __future__ import  print_function
 34 | from __future__ import unicode_literals
 35 | DEBUG = 0
 36 | 
 37 | import sys, os, logging
 38 | from optparse import OptionParser
 39 | import json
 40 | from io import open
 41 | 
 42 | 
 43 | try: #to ease the use without proper Python installation
 44 |     import TranskribusPyClient_version
 45 | except ImportError:
 46 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 47 |     import TranskribusPyClient_version
 48 | 
 49 | from TranskribusPyClient.common.trace import traceln, trace 
 50 | 
 51 | from TranskribusCommands import  sCOL, _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 52 | from TranskribusPyClient.client import TranskribusClient
 53 | 
 54 | from TranskribusDU.xml_formats import  PageXml
 55 | 
 56 | 
 57 | 
 58 | class TranskribusTranscriptUploader(TranskribusClient):
 59 |     """
 60 |     Upload transcripts from the disk or memory to Transkribus 
 61 |     """
 62 |     sDefaultServerUrl = _Trnskrbs_default_url
 63 |     
 64 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 65 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 66 |         TranskribusClient.__init__(self, sServerUrl=trnkbsServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 67 |         
 68 |     def uploadCollectionTranscript(self, colid, sColDSDir, sNote="",sToolName="", iVerbose=0, status=None):
 69 |         """
 70 |         Upload the transcripts of all document in that collection into Transkribus
 71 |         return nothing
 72 |         """
 73 |         if iVerbose: 
 74 |             traceln("- Uploading all transcripts from folder %s to collection %s"%(sColDSDir, colid))
 75 | 
 76 |         trpFilename = os.path.join(sColDSDir, "trp.json")
 77 |         traceln(" - reading %s"%trpFilename)
 78 |         if not os.path.exists(trpFilename):
 79 |             raise Exception("File not found %s. \nData probably created in --trp mode, so upload must be done in --trp mode."%trpFilename)
 80 |         trp = json.load(open(trpFilename, "r",encoding='utf-8'))
 81 |         
 82 |         for docid in [d["docId"] for d in trp]:
 83 |             self.uploadDocumentTranscript(colid, docid, sColDSDir, sNote=sNote, sToolName=sToolName, iVerbose=iVerbose, status=status)
 84 |         
 85 |         if iVerbose: 
 86 |             traceln("  Done (collection %s)"%colid)
 87 |         return
 88 | 
 89 |     def uploadDocumentTranscript(self, colid, docid, sColDSDir, sNote="",sToolName="", iVerbose=0, status=None):
 90 |         """
 91 |         Upload the transcripts of all document in that collection into Transkribus
 92 |         return nothing
 93 |         """
 94 |         trpFilename = os.path.join(sColDSDir, str(docid), "trp.json")
 95 |         traceln(" - reading %s"%trpFilename)
 96 |         if not os.path.exists(trpFilename):
 97 |             raise Exception("File not found %s. \nData probably created in --trp mode, so upload must be done in --trp mode."%trpFilename)
 98 |         trp = json.load(open(trpFilename, "r",encoding='utf-8'))
 99 |         self.uploadDocumentTranscript_by_trp(colid, docid, trp, sColDSDir, sNote=sNote, sToolName=sToolName, iVerbose=iVerbose, status=status)
100 |         return
101 |     
102 |     def uploadDocumentTranscript_by_trp(self, colid, docid, trp, sColDSDir, sNote="",sToolName="", iVerbose=0, status=None):
103 |         """
104 |         Upload the transcripts of one document in that collection into Transkribus, as specified by the TRP data
105 |             status = None     ==> we get the status from the TRP
106 |             otherwise         ==> we set the given status
107 |         return nothing
108 |         """
109 |         if iVerbose:
110 |             traceln("- Uploading as listed in TRP, the transcript(s) of document %s from folder %s to collection %s "%(docid, sColDSDir, colid))
111 | 
112 |         if docid:
113 |             if str(trp["md"]["docId"]) != str(docid):
114 |                 raise ValueError("Document ID does not match docId of TRP data.")
115 |         else:
116 |             docid = trp["md"]["docId"]  
117 | 
118 |         pageList = trp["pageList"]
119 | 
120 |         docDir = os.path.join(sColDSDir, str(docid))
121 |                               
122 |         if not os.path.exists(docDir): raise ValueError("Document directory not found: %s" % docDir)
123 |         
124 |         lFileList= []
125 |         for dPage in pageList['pages']:
126 |             pagenum= dPage['pageNr']
127 |             logging.info("\t\t- page %s"%pagenum)
128 |             
129 |             imgFileName = dPage['imgFileName']
130 |             base,_= os.path.splitext(imgFileName)
131 |             lFileList.append(base)
132 |             
133 |             _trpTranscript0 = dPage['tsList']["transcripts"][0]
134 |             tsId = _trpTranscript0['tsId']
135 |             sBaseName, _ = os.path.splitext(imgFileName)
136 |             xmlFilename = docDir + os.sep + sBaseName + ".pxml"
137 |             logging.info("\t\t\t%s"%xmlFilename)
138 |             assert os.path.exists(xmlFilename)
139 |             with open(xmlFilename, "r",encoding='utf-8') as fd: sXMlTranscript = fd.read()
140 |             cur_status = _trpTranscript0["status"] if status == None else status
141 |             traceln("page %5d : %s : %s : %s : %s : %s"%(pagenum, cur_status, sToolName, tsId, sNote, xmlFilename))
142 |             self.postPageTranscript(colid, docid, pagenum, sXMlTranscript, parentId=tsId, bEncoded=False, sNote=sNote, sToolName=sToolName, status=cur_status)
143 |                 
144 |             
145 |         if iVerbose:
146 |             traceln("   Done (collection %s, document %s as per TRP)"%(colid, docid))
147 |             
148 |         return lFileList
149 | 
150 | def main():        
151 |     usage = "%s <directory> <coldId> [<docId>]"%sys.argv[0]
152 |     version = "v.01"
153 |     description = """Upload the transcript(s) from the DS structure to Transkribus, either of the collection or one of its document(s). 
154 | The <directory> must have been created by transkribus_downloader.py and should contain the 'col' directory and a trp.json file for the collection, and one per document (the 'out', 'ref', 'run', 'xml' folders are not used).
155 | The page transcript from the single page PageXml files are uploaded. (The multi-page xml file(s) are ignored))    
156 | """ + _Trnskrbs_description
157 | 
158 |     #prepare for the parsing of the command line
159 |     parser = OptionParser(usage=usage, version=version)
160 |     parser.description = description
161 |     
162 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
163 |     __Trnskrbs_basic_options(parser, TranskribusTranscriptUploader.sDefaultServerUrl)
164 |         
165 |     parser.add_option("-q", "--quiet"  , dest='bQuiet',  action="store_true", default=False, help="Quiet mode")    
166 |     parser.add_option("--trp"  ,  dest='trp'  , action="store", type="string", help="download the content specified by the trp file.")    
167 |     parser.add_option("--toolname",  dest='tool'  , action="store", type="string", default="", help="Set the Toolname metadata in Transkribus.")    
168 |     parser.add_option("--message",  dest='message', action="store", type="string", default="", help="Set the message metadata in Transkribus.")    
169 |     parser.add_option("--set_status",  dest='set_status', action="store", type="string", default=None, help="Set the status of the uploaded trasnscript.")    
170 | 
171 |     # --- 
172 |     #parse the command line
173 |     (options, args) = parser.parse_args()
174 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
175 | 
176 |     iVerbose = 0 if options.bQuiet else 2
177 |     # --- 
178 |     try:    sDSDir = args.pop(0)
179 |     except: _exit(usage, 1)
180 |     if not(sDSDir.endswith(sCOL) or sDSDir.endswith(sCOL+os.path.sep)): 
181 |         sColDSDir = os.path.abspath(os.path.join(sDSDir, sCOL))
182 |     else:
183 |         sColDSDir = os.path.abspath(sDSDir)
184 |     if not( os.path.exists(sColDSDir) and os.path.isdir(sColDSDir) ):
185 |         raise ValueError("Non-existing folder: %s "%sColDSDir)
186 |         
187 |     try:    colid = args.pop(0)
188 |     except: _exit(usage, 1)
189 |     
190 |     try:    docid = args.pop(0)
191 |     except: docid = None
192 |     
193 |     # --- 
194 |     doer = TranskribusTranscriptUploader(options.server, proxies, loggingLevel=logging.WARN)
195 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
196 |     
197 |     if options.trp:
198 |         trp = json.load(open(options.trp, "r",encoding='utf-8'))
199 |         traceln("- Uploading to collection %s, as specified by trp data"%(colid))
200 |         if not docid:
201 |             docid = trp["md"]["docId"]
202 |             traceln(" read docId from TRP: docId = %s"%docid) 
203 |         sToolname = options.tool if options.tool else "Transkribus_uploader (--trp)"
204 |         lFileList = doer.uploadDocumentTranscript_by_trp(colid, docid, trp, sColDSDir
205 |                                 , sNote=options.message, sToolName=sToolname, iVerbose=iVerbose
206 |                                 , status=options.set_status)
207 |         #traceln(map(lambda x: x.encode('utf-8'), lFileList))
208 |     else:
209 |         if docid == None:
210 |             sToolname = options.tool if options.tool else "Transkribus_uploader"
211 |             doer.uploadCollectionTranscript(colid, sColDSDir
212 |                                 , sNote=options.message, sToolName=sToolname, iVerbose=iVerbose
213 |                                 , status=options.set_status)
214 | 
215 |         else:
216 |             sToolname = options.tool if options.tool else "Transkribus_uploader (docid)"
217 |             doer.uploadDocumentTranscript(colid, docid, sColDSDir
218 |                                 , sNote=options.message, sToolName=sToolname, iVerbose=iVerbose
219 |                                 , status=options.set_status)
220 |         
221 |     traceln('- DONE, all transcripts were uploaded. See in collection %s'%colid)
222 |     
223 | if __name__ == '__main__':
224 |     main()


--------------------------------------------------------------------------------
/src/TranskribusCommands/__init__.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | #REMOVE THIS annoying warning saying:
  4 | #  /usr/lib/python2.7/site-packages/requests-2.12.1-py2.7.egg/requests/packages/urllib3/connectionpool.py:843: InsecureRequestWarning: Unverified HTTPS request is being made. 
  5 | #  Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings  InsecureRequestWarning)
  6 | from __future__ import absolute_import
  7 | from __future__ import  print_function
  8 | from __future__ import unicode_literals
  9 | 
 10 | import sys
 11 | 
 12 | import requests.packages.urllib3
 13 | 
 14 | from requests.packages.urllib3.exceptions import InsecureRequestWarning
 15 | requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
 16 | DEBUG=0
 17 | 
 18 | _Trnskrbs_default_url = "https://transkribus.eu/TrpServer"
 19 | 
 20 | _Trnskrbs_description = u"""Pass your login/password as options otherwise consider having a Transkribus_credential.py file, which defines a 'login' and a 'pwd' variables.
 21 |  If you need to use a proxy, use the --https_proxy option or set the environment variables HTTPS_PROXY. 
 22 |  To use HTTP Basic Auth with your proxy, use the http://user:password@host/ syntax.
 23 |  """
 24 | 
 25 | sCOL = "col"
 26 | sMPXMLExtension = ".mpxml"
 27 | 
 28 | NS_PAGE_XML         = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15"
 29 | 
 30 | def __Trnskrbs_basic_options(parser, sDefaultServerUrl):
 31 |     """
 32 |     UTILITY
 33 |     add the usual options for Transkribus to a command line option parser
 34 |     """
 35 |     #prepare for the parsing of the command line
 36 |     #parser = OptionParser(usage=usage, version=version)
 37 |     
 38 |     parser.add_option("-s", "--server"  , dest='server', action="store", type="string", default=sDefaultServerUrl, help="Transkribus server URL")
 39 |     
 40 |     parser.add_option("-l", "--login"   , dest='login' , action="store", type="string", help="Transkribus login (consider storing your credentials in 'transkribus_credentials.py')")    
 41 |     parser.add_option("-p", "--pwd"     , dest='pwd'   , action="store", type="string", help="Transkribus password")
 42 | 
 43 |     parser.add_option("--persist"       , dest='persist', action="store_true", help="Try using an existing persistent session, or log-in and persists the session.")
 44 |     
 45 |     parser.add_option("--https_proxy"   , dest='https_proxy'  , action="store", type="string", help="proxy, e.g. http://cornillon:8000")
 46 | 
 47 | 
 48 | def __Trnskrbs_do_login_stuff(trnskrbs_client, options, trace=None, traceln=None):
 49 |     """
 50 |     deal with the complicated login variants...
 51 |         -trace and traceln are optional print methods 
 52 |     return True or raises an exception
 53 |     """  
 54 |     bOk = False
 55 |     
 56 |     if options.persist:
 57 |         #try getting some persistent session token
 58 |         if DEBUG and trace: trace("  ---login--- Try reusing persistent session ... ")
 59 |         try:
 60 |             bOk = trnskrbs_client.reusePersistentSession()
 61 |             if DEBUG and traceln: traceln("OK!")
 62 |         except:
 63 |             if DEBUG and traceln: traceln("Failed")
 64 |           
 65 |     if not bOk:
 66 |         if options.login:
 67 |             login, pwd = options.login, options.pwd
 68 |         else:
 69 |             if trace: DEBUG and trace("  ---login--- no login provided, looking for stored credentials... ")
 70 |             login, pwd = trnskrbs_client.getStoredCredentials(bAsk=False)
 71 |             if DEBUG and traceln: traceln("OK")    
 72 | 
 73 |         if DEBUG and traceln: trace("  ---login--- logging onto Transkribus as %s "%login)
 74 |         trnskrbs_client.auth_login(login, pwd)
 75 |         if DEBUG and traceln: traceln("OK")
 76 |         bOk = True
 77 | 
 78 |     return bOk
 79 | 
 80 | def _exit(usage, status, exc=None):
 81 |     if usage: sys.stderr.write("ERROR: usage : %s\n"%usage)
 82 |     if exc != None: sys.stderr.write(str(exc))  #any exception?
 83 |     sys.exit(status)    
 84 |     
 85 |     
 86 | def strTabularFormat(lDic, lsKey, sSortKey=None):
 87 |     """
 88 |     Format as a table a list of dictionary like:
 89 |         [
 90 |             {
 91 |                 "modelName": "Marine_Lives",
 92 |                 "nrOfTokens": 0,
 93 |                 "isUsableInTranskribus": 1,
 94 |                 "nrOfDictTokens": 0,
 95 |                 "nrOfLines": 0,
 96 |                 "modelId": 45
 97 |             },
 98 |          ...       
 99 |     Show only keys listed in lsKey
100 |     if given, sSortKey is used to sort the lines of the table.
101 |     return a string
102 |     """
103 |     if sSortKey: lDic.sort(key=lambda x: x[sSortKey])
104 |     #computing column width
105 |     lWidth = [1] * len(lsKey)
106 |     for i, k in enumerate(lsKey): lWidth[i] = max(len(k), *[len(str(v[k])) for v in lDic])
107 |     sFmt = "|".join(["%%(%s)%ds"%(name,k) for name, k in zip(lsKey, lWidth)])  #something like "%(modelName)25s %(modelId)13s ..."
108 |     sFmt = sFmt + "\n"
109 |     sRet = sFmt%{k:k for k in lsKey}    #table header
110 |     sRet += sFmt % {s:("-"*n) for s,n in zip(lsKey, lWidth)}
111 |     for record in lDic: sRet += sFmt % record
112 |     return sRet
113 |  


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_addDocToCollec.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 |     Utility to add Transkribus documents to another collection
  6 | 
  7 |     JL Meunier - Nov 2016
  8 | 
  9 | 
 10 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
 11 | 
 12 |     This program is free software: you can redistribute it and/or modify
 13 |     it under the terms of the GNU General Public License as published by
 14 |     the Free Software Foundation, either version 3 of the License, or
 15 |     (at your option) any later version.
 16 | 
 17 |     This program is distributed in the hope that it will be useful,
 18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |     GNU General Public License for more details.
 21 | 
 22 |     You should have received a copy of the GNU General Public License
 23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 24 |     
 25 |     
 26 |     Developed  for the EU project READ. The READ project has received funding 
 27 |     from the European Union’s Horizon 2020 research and innovation programme 
 28 |     under grant agreement No 674943.
 29 | 
 30 | """
 31 | from __future__ import absolute_import
 32 | from __future__ import  print_function
 33 | from __future__ import unicode_literals
 34 | 
 35 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 36 | import sys, os, logging
 37 | from optparse import OptionParser
 38 | 
 39 | try: #to ease the use without proper Python installation
 40 |     import TranskribusPyClient_version
 41 | except ImportError:
 42 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 43 |     import TranskribusPyClient_version
 44 | 
 45 | from TranskribusPyClient.common.trace import traceln, trace
 46 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 47 | from TranskribusPyClient.client import TranskribusClient
 48 | 
 49 | DEBUG = 0
 50 | 
 51 | description = """Add one or several documents stored in Transkribus to another Transkribus collection.
 52 | Document(s) and collection are specified by their unique identifier (a number).
 53 | """ + _Trnskrbs_description
 54 | 
 55 | usage = """%s <colId>  [ <docId> | <docIdFrom>-<docIdTo> ]+
 56 | Documents are specified by a space-separated list of numbers, or number ranges, e.g. 3-36.
 57 | """%sys.argv[0]
 58 | 
 59 | class DoAddDocToCollec(TranskribusClient):
 60 |     """
 61 |     Add a document to another collection.
 62 |     """
 63 |     
 64 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 65 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 66 |         TranskribusClient.__init__(self, sServerUrl=_Trnskrbs_default_url, proxies=sHttpProxy, loggingLevel=loggingLevel)
 67 |         
 68 | 
 69 | if __name__ == '__main__':
 70 |     version = "v.01"
 71 | 
 72 |     #prepare for the parsing of the command line
 73 |     parser = OptionParser(usage=usage, version=version)
 74 |     parser.description = description
 75 |     
 76 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
 77 |     __Trnskrbs_basic_options(parser, _Trnskrbs_default_url)
 78 |         
 79 |     #parse the command line
 80 |     (options, args) = parser.parse_args()
 81 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
 82 |     # ------------------------------------------------------------------------------------------------
 83 |     doer = DoAddDocToCollec(options.server, proxies, loggingLevel=logging.INFO)
 84 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
 85 |     
 86 |     # --- 
 87 |     #target collection
 88 |     try:                    colId = int(args.pop(0))
 89 |     except Exception as e:  _exit(usage, 1, e)
 90 | 
 91 |     # --- 
 92 |     # document list
 93 |     try:
 94 |         lDocId = []
 95 |         while args:
 96 |             chunk = args.pop(0).strip()
 97 |             li = chunk.split('-')
 98 |             if li and len(li) == 2:
 99 |                 docId1, docId2 = [int(i) for i in li]
100 |                 lDocId.extend( range(docId1,docId2+1) )
101 |             else:
102 |                 docId = int(chunk)
103 |                 lDocId.append(docId)
104 |     except Exception as e:
105 |         _exit(usage, 2, e)
106 | 
107 |     # ---   
108 |     #credentials and proxy
109 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
110 | 
111 | 
112 | 
113 |     # ------------------------------------------------------------------------------------------------
114 |     doer = DoAddDocToCollec(options.server, proxies, loggingLevel=logging.INFO)
115 | 
116 |     __Trnskrbs_do_login_stuff(doer, options, trace, traceln)
117 |     
118 |     trace("- adding to collection '%d' the %d documents: "%(colId, len(lDocId)))
119 |     for docId in lDocId:
120 |         trace(" %d"%docId)
121 |         try:
122 |             doer.addDocToCollection(colId, docId)
123 |         except Exception as e:
124 |             traceln()
125 |             traceln("ERROR: could not add document '%d' to collection '%d'"%(docId, colId))
126 |             raise e
127 |     traceln()      
128 |     traceln("- Done for %d documents"%len(lDocId))
129 |     
130 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_analyzeLayoutBatch.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 | 
  6 |     H. Déjean - Dec 2016
  7 | 
  8 | 
  9 |     Copyright Xerox(C) 2016 H. Déjean
 10 | 
 11 |     This program is free software: you can redistribute it and/or modify
 12 |     it under the terms of the GNU General Public License as published by
 13 |     the Free Software Foundation, either version 3 of the License, or
 14 |     (at your option) any later version.
 15 | 
 16 |     This program is distributed in the hope that it will be useful,
 17 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 19 |     GNU General Public License for more details.
 20 | 
 21 |     You should have received a copy of the GNU General Public License
 22 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 23 |     
 24 |     
 25 |     Developed  for the EU project READ. The READ project has received funding 
 26 |     from the European Union’s Horizon 2020 research and innovation programme 
 27 |     under grant agreement No 674943.
 28 | 
 29 | """
 30 | 
 31 | #    TranskribusCommands/do_LAbatch.py 3571 3820 8251 8252
 32 | 
 33 | 
 34 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 35 | import sys, os, logging
 36 | from optparse import OptionParser
 37 | import json
 38 | import codecs
 39 | 
 40 | try: #to ease the use without proper Python installation
 41 |     import TranskribusPyClient_version
 42 | except ImportError:
 43 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 44 |     import TranskribusPyClient_version
 45 | 
 46 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 47 | from TranskribusPyClient.client import TranskribusClient
 48 | from do_transcript import DoTranscript
 49 | from TranskribusPyClient.common.IntegerRange import IntegerRange
 50 | from TranskribusPyClient.TRP_FullDoc import TRP_FullDoc
 51 | 
 52 | 
 53 | from TranskribusPyClient.common.trace import traceln, trace
 54 | 
 55 | DEBUG = 0
 56 | 
 57 | description = """Apply Layout Analysis (LA) with batch model.
 58 | 
 59 | The syntax for specifying the page range is:
 60 | - one or several specifiers separated by a comma
 61 | - one separator is a page number, or a range of page number, e.g. 3-8
 62 | - Examples: 1   1,3,5   1-3    1,3,5-99,100
 63 | 
 64 | """ + _Trnskrbs_description
 65 | 
 66 | usage = """%s <colId> <docId> [<pages>] <doNotBlockSeg> <doNotLineSeg>
 67 | """%sys.argv[0]
 68 | 
 69 | class DoLAbatch(TranskribusClient):
 70 |     """
 71 |             Hi Hervé,
 72 |         
 73 |         Sebastian has done the integration of the tools and can answer more indepth questions.
 74 |         
 75 |         Please take a look at:
 76 |         https://transkribus.eu/TrpServer/Swadl/wadl.html
 77 |         
 78 |         or
 79 |         
 80 |         https://transkribus.eu/TrpServer/rest/application.wadl
 81 |         
 82 |         The new methods are at:
 83 |         /LA/analyze
 84 |         
 85 |         Valid values for the jobImpl parameter are:
 86 |         NcsrLaJob
 87 |         CvlLaJob
 88 |         CITlabAdvancedLaJob
 89 |         
 90 |         You have to post a list of descriptor objects either as XML or JSON to the service, specifying the pages that have to be analyzed. A single page descriptor would look like this (regionId optional):
 91 |         <documentSelectionDescriptor>
 92 |             <docId>1</docId>
 93 |             <pageList>
 94 |                 <pages>
 95 |                     <pageId>2</pageId>
 96 |                     <tsId>3</tsId>
 97 |                     <regionIds>aRegionId</regionIds>
 98 |                 </pages>
 99 |             </pageList>
100 |         </documentSelectionDescriptor>
101 |         
102 |         Do let us know if there are any problems with the new method.
103 |         
104 |         Best regards and have a nice weekend,
105 |         Philip    
106 |     
107 |     
108 |     """
109 |     sDefaultServerUrl = _Trnskrbs_default_url
110 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
111 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
112 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
113 |                 
114 |         self._trpMng = DoTranscript(self.sDefaultServerUrl, sHttpProxy=sHttpProxy, loggingLevel=loggingLevel)
115 | 
116 | 
117 |     def buildDescription(self,colId,docpage,trp=None):
118 |         """
119 |             '{"docId":17442,"pageList":{"pages":[{"pageId":400008,"tsId":1243509,"regionIds":[]}]}}'
120 | <documentSelectionDescriptor>
121 |             <docId>1</docId>
122 |             <pageList>
123 |                 <pages>
124 |                     <pageId>2</pageId>
125 |                     <tsId>3</tsId>
126 |                     <regionIds>aRegionId</regionIds>
127 |                 </pages>
128 |             </pageList>
129 |         </documentSelectionDescriptor>            
130 |             
131 |         """
132 |         jsonDesc = {}
133 |         
134 |         if trp is None:
135 |             docId,pageRange= docpage.split('/')
136 |             jsonDesc["docId"]=docId
137 |             oPageRange = IntegerRange(pageRange)                 
138 |             trpObj = self._trpMng.filter(colId,docId,page_filter=oPageRange,bLast=True)
139 |         else:
140 |             trpObj = TRP_FullDoc(trp)
141 |         jsonDesc["pageList"]={}
142 | #         pList= trpObj.getTranscriptList()
143 |         jsonDesc["pageList"]['pages']= []
144 |         for page in trpObj.getPageList():
145 |             docId = page['docId']
146 |             jsonDesc["docId"]=page['docId']
147 |             jsonDesc["pageList"]['pages'].append({"pageId":page['pageId'],"tsId":page['tsList']['transcripts'][0]['tsId'],"regionIds":[]})        
148 |         
149 |         
150 |         return jsonDesc["docId"], json.dumps(jsonDesc,encoding='utf-8')
151 | 
152 |     
153 |     def run(self, colId, sDescription, sJobImpl='CITlabAdvancedLaJob',bBlockSeg,bLineSeq):
154 |         ret = self.analyzeLayoutNew(colId, sDescription,sJobImpl,bBlockSeg,bLineSeq)
155 |         return ret
156 | 
157 | 
158 | 
159 | 
160 | if __name__ == '__main__':
161 |     version = "v.01"
162 | 
163 |     #prepare for the parsing of the command line
164 |     parser = OptionParser(usage=usage, version=version)
165 |     parser.description = description
166 |     
167 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
168 |     __Trnskrbs_basic_options(parser, DoLAbatch.sDefaultServerUrl)
169 |         
170 |     parser.add_option("-r", "--region"  , dest='region', action="store", type="string", default=DoLAbatch.sDefaultServerUrl, help="apply Layout Analysis (textLine)")
171 |     parser.add_option("--trp"  , dest='trp_doc', action="store", type="string",default=None, help="use trp doc file")
172 |     parser.add_option("--docid"  , dest='docid'   , action="store", type="string", default=None, help="document/pages to be htr'd")        
173 |     # ---   
174 |     #parse the command line
175 |     (options, args) = parser.parse_args()
176 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
177 | 
178 |     # --- 
179 |     doer = DoLAbatch(options.server, proxies, loggingLevel=logging.WARN)
180 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
181 |     doer._trpMng.setSessionId(doer._sessionID)
182 |     
183 |     # --- 
184 |     try:                        colId = int(args.pop(0))
185 |     except Exception as e:      _exit(usage, 1, e)
186 |     try:                        docId   = int(args.pop(0))
187 |     except Exception as e:      _exit(usage, 1, e)
188 |     try:                        sPages = args.pop(0)
189 |     except Exception as e:      _exit(usage, 1, e)
190 |     try:                        doNotBlockSeg = int(args.pop(0)) == 0
191 |     except Exception as e:      doNotBlockSeg = False
192 |     try:                        doNotLineSeg = int(args.pop(0)) == 0
193 |     except Exception as e:      doNotLineSeg= False    
194 |     if args:                    _exit(usage, 2, Exception("Extra arguments to the command"))
195 | 
196 |     # --- 
197 |     # do the job...
198 |     if options.trp_doc:
199 |         trpdoc =  json.load(codecs.open(options.trp_doc, "rb",'utf-8'))
200 |         docId,sPageDesc = doer.buildDescription(colId,options.docid,trpdoc)
201 |     else:
202 |         docId,sPageDesc = doer.buildDescription(colId,options.docid)    
203 |     
204 |     jobid = doer.run(colId, sPageDesc,not(doNotBlockSeg),not(doNotLineSeg))
205 |     traceln(jobid)
206 |         
207 |     traceln()      
208 |     traceln("- Done")
209 |     
210 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_createCollec.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 |     Create a collection
  6 | 
  7 |     JL Meunier - Nov 2016
  8 | 
  9 | 
 10 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
 11 | 
 12 |     This program is free software: you can redistribute it and/or modify
 13 |     it under the terms of the GNU General Public License as published by
 14 |     the Free Software Foundation, either version 3 of the License, or
 15 |     (at your option) any later version.
 16 | 
 17 |     This program is distributed in the hope that it will be useful,
 18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |     GNU General Public License for more details.
 21 | 
 22 |     You should have received a copy of the GNU General Public License
 23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 24 |     
 25 |     
 26 |     Developed  for the EU project READ. The READ project has received funding 
 27 |     from the European Union’s Horizon 2020 research and innovation programme 
 28 |     under grant agreement No 674943.
 29 | 
 30 | """
 31 | from __future__ import absolute_import
 32 | from __future__ import  print_function
 33 | from __future__ import unicode_literals
 34 | #    TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
 35 | 
 36 | 
 37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 38 | import sys, os, logging
 39 | from optparse import OptionParser
 40 | 
 41 | try: #to ease the use without proper Python installation
 42 |     import TranskribusPyClient_version
 43 | except ImportError:
 44 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 45 |     import TranskribusPyClient_version
 46 | 
 47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 48 | from TranskribusPyClient.client import TranskribusClient
 49 | from TranskribusPyClient.common.trace import traceln, trace
 50 | 
 51 | DEBUG = 0
 52 | 
 53 | description = """create a Transkribus collection.
 54 | """ + _Trnskrbs_description
 55 | 
 56 | usage = """%s <colName> 
 57 | """%sys.argv[0]
 58 | 
 59 | class DoCreateCollec(TranskribusClient):
 60 |     
 61 |     sDefaultServerUrl = _Trnskrbs_default_url
 62 |     
 63 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 64 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 65 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 66 |         
 67 |         
 68 | 
 69 | if __name__ == '__main__':
 70 |     version = "v.01"
 71 | 
 72 |     #prepare for the parsing of the command line
 73 |     parser = OptionParser(usage=usage, version=version)
 74 |     parser.description = description
 75 |     
 76 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
 77 |     __Trnskrbs_basic_options(parser, DoCreateCollec.sDefaultServerUrl)
 78 |         
 79 |     # ---   
 80 |     #parse the command line
 81 |     (options, args) = parser.parse_args()
 82 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
 83 | 
 84 |     # --- 
 85 |     #source collection(s)
 86 |     try:
 87 |         sColName = args[0]
 88 |     except Exception as e:
 89 |         _exit(usage, 1, e)
 90 | 
 91 |     # --- 
 92 |     doer = DoCreateCollec(options.server, proxies, loggingLevel=logging.INFO)
 93 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
 94 | 
 95 |     # --- 
 96 |     # do the job...
 97 |     try:
 98 |         resp = doer.createCollection(sColName)
 99 |     except Exception as e:  _exit("", 1, e)
100 |         
101 |         
102 |     traceln("- Done: --> %s"%resp)
103 |     
104 |     print (resp)
105 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_deleteCollec.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 |     Delete a collection
  6 | 
  7 |     JL Meunier - Nov 2016
  8 | 
  9 | 
 10 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
 11 | 
 12 |     This program is free software: you can redistribute it and/or modify
 13 |     it under the terms of the GNU General Public License as published by
 14 |     the Free Software Foundation, either version 3 of the License, or
 15 |     (at your option) any later version.
 16 | 
 17 |     This program is distributed in the hope that it will be useful,
 18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |     GNU General Public License for more details.
 21 | 
 22 |     You should have received a copy of the GNU General Public License
 23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 24 |     
 25 |     
 26 |     Developed  for the EU project READ. The READ project has received funding 
 27 |     from the European Union’s Horizon 2020 research and innovation programme 
 28 |     under grant agreement No 674943.
 29 | 
 30 | """
 31 | from __future__ import absolute_import
 32 | from __future__ import  print_function
 33 | from __future__ import unicode_literals
 34 | #    TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
 35 | 
 36 | 
 37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 38 | import sys, os, logging
 39 | from optparse import OptionParser
 40 | 
 41 | try: #to ease the use without proper Python installation
 42 |     import TranskribusPyClient_version
 43 | except ImportError:
 44 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 45 |     import TranskribusPyClient_version
 46 | 
 47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 48 | from TranskribusPyClient.client import TranskribusClient
 49 | from TranskribusPyClient.common.trace import traceln, trace
 50 | 
 51 | DEBUG = 0
 52 | 
 53 | description = """delete a Transkribus collection.
 54 | """ + _Trnskrbs_description
 55 | 
 56 | usage = """%s <colId> 
 57 | """%sys.argv[0]
 58 | 
 59 | class DoDeleteCollec(TranskribusClient):
 60 |     
 61 |     sDefaultServerUrl = _Trnskrbs_default_url
 62 |     
 63 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 64 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 65 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 66 |         
 67 |         
 68 | 
 69 | if __name__ == '__main__':
 70 |     version = "v.01"
 71 | 
 72 |     #prepare for the parsing of the command line
 73 |     parser = OptionParser(usage=usage, version=version)
 74 |     parser.description = description
 75 |     
 76 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
 77 |     __Trnskrbs_basic_options(parser, DoDeleteCollec.sDefaultServerUrl)
 78 |         
 79 |     # ---   
 80 |     #parse the command line
 81 |     (options, args) = parser.parse_args()
 82 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
 83 | 
 84 |     # --- 
 85 |     #source collection(s)
 86 |     try:
 87 |         colId = int(args[0])
 88 |     except Exception as e:
 89 |         _exit(usage, 1, e)
 90 | 
 91 |     # --- 
 92 |     doer = DoDeleteCollec(options.server, proxies, loggingLevel=logging.INFO)
 93 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
 94 | 
 95 |     # --- 
 96 |     # do the job...
 97 |     try:
 98 |         resp = doer.deleteCollection(colId)
 99 |     except Exception as e:  _exit("", 1, e)
100 |         
101 |     traceln("- Done")
102 |     
103 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_deleteJob.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 |     Delete a job
  6 | 
  7 |     H. Déjean - Dec 2016
  8 | 
  9 | 
 10 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
 11 | 
 12 |     This program is free software: you can redistribute it and/or modify
 13 |     it under the terms of the GNU General Public License as published by
 14 |     the Free Software Foundation, either version 3 of the License, or
 15 |     (at your option) any later version.
 16 | 
 17 |     This program is distributed in the hope that it will be useful,
 18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |     GNU General Public License for more details.
 21 | 
 22 |     You should have received a copy of the GNU General Public License
 23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 24 |     
 25 |     
 26 |     Developed  for the EU project READ. The READ project has received funding 
 27 |     from the European Union’s Horizon 2020 research and innovation programme 
 28 |     under grant agreement No 674943.
 29 | 
 30 | """
 31 | from __future__ import absolute_import
 32 | from __future__ import  print_function
 33 | from __future__ import unicode_literals
 34 | #    TranskribusCommands/do_deleteJob.py <JOBID>
 35 | 
 36 | 
 37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 38 | import sys, os, logging
 39 | from optparse import OptionParser
 40 | 
 41 | try: #to ease the use without proper Python installation
 42 |     import TranskribusPyClient_version
 43 | except ImportError:
 44 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 45 |     import TranskribusPyClient_version
 46 | 
 47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 48 | from TranskribusPyClient.client import TranskribusClient
 49 | from TranskribusPyClient.common.trace import traceln, trace
 50 | 
 51 | DEBUG = 0
 52 | 
 53 | description = """delete a Transkribus job.
 54 | """ + _Trnskrbs_description
 55 | 
 56 | usage = """%s <jobId> 
 57 | """%sys.argv[0]
 58 | 
 59 | class DoDeleteJob(TranskribusClient):
 60 |     
 61 |     sDefaultServerUrl = _Trnskrbs_default_url
 62 |     
 63 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 64 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 65 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 66 |         
 67 |         
 68 | 
 69 | if __name__ == '__main__':
 70 |     version = "v.01"
 71 | 
 72 |     #prepare for the parsing of the command line
 73 |     parser = OptionParser(usage=usage, version=version)
 74 |     parser.description = description
 75 |     
 76 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
 77 |     __Trnskrbs_basic_options(parser, DoDeleteJob.sDefaultServerUrl)
 78 |         
 79 |     # ---   
 80 |     #parse the command line
 81 |     (options, args) = parser.parse_args()
 82 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
 83 | 
 84 |     # --- 
 85 |     #source collection(s)
 86 |     try:
 87 |         jobid = int(args[0])
 88 |     except Exception as e:
 89 |         _exit(usage, 1, e)
 90 | 
 91 |     # --- 
 92 |     doer = DoDeleteJob(options.server, proxies, loggingLevel=logging.INFO)
 93 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
 94 | 
 95 |     # --- 
 96 |     # do the job...
 97 |     try:
 98 |         resp = doer.deleteJob(jobid)
 99 |     except Exception as e:  _exit("", 1, e)
100 |     
101 |     if resp != "CANCELED":
102 |         raise Exception("Job status should be CANCELED not '%s'"%resp)
103 |         
104 |     traceln("- Done")
105 |     
106 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_duplicateDoc.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 |     Utility to duplicate Transkribus documents from a collection to another collection
  6 | 
  7 |     JL Meunier - Nov 2016
  8 | 
  9 | 
 10 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
 11 | 
 12 |     This program is free software: you can redistribute it and/or modify
 13 |     it under the terms of the GNU General Public License as published by
 14 |     the Free Software Foundation, either version 3 of the License, or
 15 |     (at your option) any later version.
 16 | 
 17 |     This program is distributed in the hope that it will be useful,
 18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |     GNU General Public License for more details.
 21 | 
 22 |     You should have received a copy of the GNU General Public License
 23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 24 |     
 25 |     
 26 |     Developed  for the EU project READ. The READ project has received funding 
 27 |     from the European Unionâ€™s Horizon 2020 research and innovation programme 
 28 |     under grant agreement No 674943.
 29 | 
 30 | """
 31 | from __future__ import absolute_import
 32 | from __future__ import  print_function
 33 | from __future__ import unicode_literals
 34 | #    TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
 35 | 
 36 | 
 37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 38 | import sys, os, logging
 39 | from optparse import OptionParser
 40 | 
 41 | try: #to ease the use without proper Python installation
 42 |     import TranskribusPyClient_version
 43 | except ImportError:
 44 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 45 |     import TranskribusPyClient_version
 46 | 
 47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 48 | from TranskribusPyClient.client import TranskribusClient, getStoredCredentials
 49 | from TranskribusPyClient.common.trace import traceln, trace
 50 | 
 51 | DEBUG = 0
 52 | 
 53 | description = """Copy (duplicate) one or several documents stored in a Transkribus collection to another Transkribus collection.
 54 | Document(s) and collections are specified by their unique identifier (a number).
 55 | """ + _Trnskrbs_description
 56 | 
 57 | usage = """%s <from_colId>  <to_colId> ( <docId> | <docIdFrom>-<docIdTo> )+
 58 | Documents are specified by a space-separated list of numbers, or number ranges, e.g. 3-36.
 59 | """%sys.argv[0]
 60 | 
 61 | class DoCopyDocToCollec(TranskribusClient):
 62 |     """
 63 |     Copy a document from a collection to another
 64 |     """
 65 |     sDefaultServerUrl = _Trnskrbs_default_url
 66 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 67 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 68 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 69 |         
 70 | 
 71 | if __name__ == '__main__':
 72 |     version = "v.01"
 73 | 
 74 |     #prepare for the parsing of the command line
 75 |     parser = OptionParser(usage=usage, version=version)
 76 |     parser.description = description
 77 |     
 78 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
 79 |     __Trnskrbs_basic_options(parser, DoCopyDocToCollec.sDefaultServerUrl)
 80 |         
 81 |     # ---   
 82 |     #parse the command line
 83 |     (options, args) = parser.parse_args()
 84 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
 85 | 
 86 |     # --- 
 87 |     #source collection
 88 |     try:                        colIdFrom = int(args.pop(0))
 89 |     except Exception as e:      _exit(usage, 1, e)
 90 |     #target collection
 91 |     try:                        colIdTo   = int(args.pop(0))
 92 |     except Exception as e:      _exit(usage, 1, e)
 93 | 
 94 |     # --- 
 95 |     # document list
 96 |     try:
 97 |         lDocId = []
 98 |         while args:
 99 |             chunk = args.pop(0).strip()
100 |             li = chunk.split('-')
101 |             if li and len(li) == 2:
102 |                 docId1, docId2 = [int(i) for i in li]
103 |                 lDocId.extend( range(docId1,docId2+1) )
104 |             else:
105 |                 docId = int(chunk)
106 |                 lDocId.append(docId)
107 |     except Exception as e:
108 |         _exit(usage, 2, e)
109 | 
110 |     # ------------------------------------------------------------------------------------------------
111 |     doer = DoCopyDocToCollec(options.server, proxies, loggingLevel=logging.INFO)
112 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
113 | 
114 | 
115 |     #the only issue is that we need to have the name of each document...
116 |     traceln("- checking existence of each document in source collection '%d'"%(colIdFrom))
117 |     dName_by_docId = {}
118 |     lDocDic = doer.listDocsByCollectionId(colIdFrom)
119 |     for docDic in lDocDic:
120 |         dName_by_docId[ docDic['docId'] ] = docDic['title']
121 |     #check now, so as to avoid partial copies...
122 |     for docId in lDocId:
123 |         try:
124 |             name = dName_by_docId[docId]
125 |         except KeyError as e:
126 |             traceln()
127 |             traceln("ERROR: document '%d' is not in source collection '%d'"%(docId, colIdFrom))
128 |             _exit("", 3, e)
129 |     
130 |     trace("- duplicating from collection %d to collection '%d' the %d documents: "%(colIdFrom, colIdTo, len(lDocId)))
131 |     for docId in lDocId:
132 |         name = dName_by_docId[docId]
133 |         trace(" %d  ('%s')"%(docId, name))
134 |         try:
135 |             doer.duplicateDoc(colIdFrom, docId, colIdTo, name)
136 |         except Exception as e:
137 |             traceln()
138 |             traceln("ERROR: could not copy document '%d' from collection '%d' to collection '%d'"%(docId, colIdFrom, colIdTo))
139 |             _exit("", 4, e)
140 |     traceln()      
141 |     traceln("- Done for %d documents"%len(lDocId))
142 |     
143 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_export.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 | 
  6 |     Hervé Déjean - april 2021
  7 | 
  8 | 
  9 |     Copyright Naver LabsEurope (C) 2021 
 10 | 
 11 |     see https://transkribus.eu/wiki/index.php/HTR
 12 | """
 13 | from __future__ import absolute_import
 14 | from __future__ import  print_function
 15 | from __future__ import unicode_literals
 16 | 
 17 | #    TranskribusCommands/do_htrTrainRnn model-name colId docid pages 
 18 | 
 19 | 
 20 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 21 | import sys, os, logging
 22 | from optparse import OptionParser
 23 | import json
 24 | from lxml import etree
 25 | 
 26 | try: #to ease the use without proper Python installation
 27 |     import TranskribusPyClient_version
 28 | except ImportError:
 29 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 30 |     import TranskribusPyClient_version
 31 | 
 32 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 33 | # from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 34 | 
 35 | from TranskribusPyClient.common.IntegerRange import IntegerRange
 36 | from TranskribusPyClient.common.trace import traceln, trace
 37 | from TranskribusPyClient.client import TranskribusClient
 38 | 
 39 | 
 40 | DEBUG = 0
 41 | 
 42 | description = """Export a document into alto format """
 43 | 
 44 | 
 45 | usage = """%s<colId> <docid> 
 46 | """%sys.argv[0]
 47 | 
 48 | class Export(TranskribusClient):
 49 | 
 50 |     sDefaultServerUrl = _Trnskrbs_default_url
 51 |     params="""
 52 | {   "commonPars" : {
 53 |       "pages" : "1",
 54 |       "doExportDocMetadata" : true,
 55 |       "doWriteMets" : true,
 56 |       "doWriteImages" : true,
 57 |       "doExportPageXml" : true,
 58 |       "doExportAltoXml" : true,
 59 |       "doExportSingleTxtFiles" : false,
 60 |       "doWritePdf" : false,
 61 |       "doWriteTei" : false,
 62 |       "doWriteDocx" : false,
 63 |       "doWriteOneTxt" : false,
 64 |       "doWriteTagsXlsx" : false,
 65 |       "doWriteTagsIob" : false,
 66 |       "doWriteTablesXlsx" : false,
 67 |       "doWriteStructureInMets" : false,
 68 |       "doCreateTitle" : false,
 69 |       "useVersionStatus" : "Latest version",
 70 |       "writeTextOnWordLevel" : false,
 71 |       "doBlackening" : false,
 72 |       "selectedTags" : [ "add", "date", "Address", "human_production", "supplied", "work", "unclear", "sic", "structure", "div", "highlight", "place1", "regionType", "speech", "person", "gap", "organization", "comment", "abbrev", "place", "add1", "Initial", "lat" ],
 73 |       "font" : "FreeSerif",
 74 |       "splitIntoWordsInAltoXml" : true,
 75 |       "pageDirName" : "page",
 76 |       "fileNamePattern" : "${filename}",
 77 |       "useHttps" : true,
 78 |       "remoteImgQuality" : "orig",
 79 |       "doOverwrite" : true,
 80 |       "useOcrMasterDir" : true,
 81 |       "exportTranscriptMetadata" : true,
 82 |       "updatePageXmlImageDimensions" : false
 83 |    },
 84 |    "altoPars" : {
 85 |       "splitIntoWordsInAltoXml" : true
 86 |    },
 87 |    "pdfPars" : {
 88 |       "doPdfImagesOnly" : false,
 89 |       "doPdfImagesPlusText" : true,
 90 |       "doPdfWithTextPages" : false,
 91 |       "doPdfWithTags" : false,
 92 |       "doPdfWithArticles" : false,
 93 |       "doPdfA" : false,
 94 |       "pdfImgQuality" : "view"
 95 |    },
 96 |    "docxPars" : {
 97 |       "doDocxWithTags" : false,
 98 |       "doDocxPreserveLineBreaks" : false,
 99 |       "doDocxForcePageBreaks" : false,
100 |       "doDocxMarkUnclear" : false,
101 |       "doDocxKeepAbbrevs" : false,
102 |       "doDocxExpandAbbrevs" : false,
103 |       "doDocxSubstituteAbbrevs" : false,
104 |       "doDocxWriteFilenames" : false,
105 |       "doDocxIgnoreSuppliedTag" : false,
106 |       "doDocxShowSuppliedTagWithBrackets" : false
107 |    }
108 | }
109 |         """
110 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
111 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
112 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
113 |     
114 |     def run(self, colId, docid,sParams):
115 |         ret = self.exportCollection(colId, docid,sParams)
116 |         return ret
117 |     
118 | 
119 |             
120 |    
121 |     
122 | if __name__ == '__main__':
123 |     version = "v.01"
124 |     #prepare for the parsing of the command line
125 |     parser = OptionParser(usage=usage, version=version)
126 |     parser.description = description
127 |     
128 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
129 |     __Trnskrbs_basic_options(parser, Export.sDefaultServerUrl)
130 |         
131 |     # parser.add_option("--trp"  , dest='trp_doc', action="store", type="string",default=None, help="use trp doc file")
132 |     # parser.add_option("--templateID"  , dest='templateID'   , action="store", type="string" , help="template id")        
133 | #     parser.add_option("--batchjob"  , dest='doBatchJob'   , action="store_true",  default=False, help="do one job per page")        
134 | 
135 |     # ---   
136 |     #parse the command line
137 |     (options, args) = parser.parse_args()
138 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
139 | 
140 |     # --- 
141 |     doer = Export(options.server, proxies, loggingLevel=logging.WARN)
142 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
143 |     # doer._trpMng.setSessionId(doer._sessionID)
144 |     
145 |     # --- 
146 |     try:                        colId = int(args.pop(0))
147 |     except Exception as e:      _exit(usage, 1, e)
148 |     try:                        docid = args.pop(0)
149 |     except Exception as e:      _exit(usage, 1, e)    
150 |     if args:                    _exit(usage, 2, Exception("Extra arguments to the command"))
151 | 
152 |     # --- 
153 | 
154 |     jobid = doer.run(colId, docid,doer.params)
155 |     traceln("job ID:",jobid)
156 |     traceln("- Done")
157 |     
158 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_getDocTrp.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 | 
  6 |     JL Meunier - August 2017
  7 | 
  8 | 
  9 |     Copyright Naver(C) 2017 JL. Meunier
 10 | 
 11 |     This program is free software: you can redistribute it and/or modify
 12 |     it under the terms of the GNU General Public License as published by
 13 |     the Free Software Foundation, either version 3 of the License, or
 14 |     (at your option) any later version.
 15 | 
 16 |     This program is distributed in the hope that it will be useful,
 17 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 19 |     GNU General Public License for more details.
 20 | 
 21 |     You should have received a copy of the GNU General Public License
 22 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 23 |     
 24 |     
 25 |     Developed  for the EU project READ. The READ project has received funding 
 26 |     from the European Union’s Horizon 2020 research and innovation programme 
 27 |     under grant agreement No 674943.
 28 | 
 29 | """
 30 | from __future__ import absolute_import
 31 | from __future__ import  print_function
 32 | from __future__ import unicode_literals
 33 | #    TranskribusCommands/do_LAbatch.py 3571 3820 8251 8252
 34 | 
 35 | 
 36 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 37 | import sys, os, logging
 38 | from optparse import OptionParser
 39 | import json
 40 | 
 41 | try: #to ease the use without proper Python installation
 42 |     import TranskribusPyClient_version
 43 | except ImportError:
 44 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 45 |     import TranskribusPyClient_version
 46 | 
 47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 48 | from TranskribusPyClient.client import TranskribusClient
 49 | from TranskribusPyClient.common.IntegerRange  import IntegerRange as PageRangeSpec
 50 | from TranskribusPyClient.common.trace import traceln, trace
 51 | 
 52 | DEBUG = 0
 53 | 
 54 | description = """Get the TRP of a document
 55 | """ + _Trnskrbs_description
 56 | 
 57 | usage = """%s <colId> <docId> [<page-ranges>] -n <nb_transcripts>
 58 | Return the so-called TRP of all or certain pages, optionally with the given number of transcript(s) per page (-1 means all).
 59 | 
 60 | Page range is a comma-separated series of integer or pair of integers separated by a '-' 
 61 | For instance 1  or 1,3  or 1-4 or 1,3-6,8
 62 | """%sys.argv[0]
 63 | 
 64 | class DoGetDocTrp(TranskribusClient):
 65 |     sDefaultServerUrl = _Trnskrbs_default_url
 66 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 67 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 68 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 69 |     
 70 |     def run(self, colId, docId, nrOfTranscripts=1):
 71 |         ret = self.getDocById(colId, docId, nrOfTranscripts)
 72 |         return ret
 73 | 
 74 | if __name__ == '__main__':
 75 |     version = "v.01"
 76 | 
 77 |     #prepare for the parsing of the command line
 78 |     parser = OptionParser(usage=usage, version=version)
 79 |     parser.description = description
 80 |     
 81 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
 82 |     __Trnskrbs_basic_options(parser, DoGetDocTrp.sDefaultServerUrl)
 83 |     parser.add_option("-n", "--n"  , dest='nbTranscript', action="store", type="int", default=1, help="Number of transcripts")
 84 |         
 85 |     # ---   
 86 |     #parse the command line
 87 |     (options, args) = parser.parse_args()
 88 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
 89 | 
 90 |     # --- 
 91 |     doer = DoGetDocTrp(options.server, proxies, loggingLevel=logging.WARN)
 92 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
 93 |     # --- 
 94 |     try:                        colId = int(args.pop(0))
 95 |     except Exception as e:      _exit(usage, 1, e)
 96 |     try:                        docId   = int(args.pop(0))
 97 |     except Exception as e:      _exit(usage, 1, e)
 98 |     try:                        sPageRangeSpec = args.pop(0)
 99 |     except Exception as e:      sPageRangeSpec = None
100 |     if args:                    _exit(usage, 2, Exception("Extra arguments to the command"))
101 | 
102 |     oPageRange = PageRangeSpec(sPageRangeSpec) if sPageRangeSpec else None
103 |         
104 |     # --- 
105 |     # do the job...
106 |     resp = doer.run(colId, docId, nrOfTranscripts=options.nbTranscript)
107 |     if oPageRange:
108 |         traceln("Filtering response as per page specification: %s"%oPageRange)
109 |         #let's filter the response (not super efficient but easy to code...
110 |         ldPages = resp["pageList"]["pages"]
111 |         ldPagesInRange = [ dPage for dPage in ldPages if dPage["pageNr"] in oPageRange]
112 |         resp["pageList"]["pages"] = ldPagesInRange
113 | 
114 |     print (json.dumps(resp, sort_keys=True, indent=4, separators=(',', ': ')))
115 |         
116 |     traceln()      
117 |     traceln("- Done")
118 |     
119 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_getJobStatus.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 |     Get the status of a job
  6 | 
  7 |     JL Meunier - Dev 2016
  8 | 
  9 | 
 10 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
 11 | 
 12 |     This program is free software: you can redistribute it and/or modify
 13 |     it under the terms of the GNU General Public License as published by
 14 |     the Free Software Foundation, either version 3 of the License, or
 15 |     (at your option) any later version.
 16 | 
 17 |     This program is distributed in the hope that it will be useful,
 18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |     GNU General Public License for more details.
 21 | 
 22 |     You should have received a copy of the GNU General Public License
 23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 24 |     
 25 |     
 26 |     Developed  for the EU project READ. The READ project has received funding 
 27 |     from the European Union’s Horizon 2020 research and innovation programme 
 28 |     under grant agreement No 674943.
 29 | 
 30 | """
 31 | from __future__ import absolute_import
 32 | from __future__ import  print_function
 33 | from __future__ import unicode_literals
 34 | #    TranskribusCommands/do_deleteJob.py <JOBID>
 35 | 
 36 | 
 37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 38 | import sys, os, logging
 39 | from optparse import OptionParser
 40 | 
 41 | try: #to ease the use without proper Python installation
 42 |     import TranskribusPyClient_version
 43 | except ImportError:
 44 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 45 |     import TranskribusPyClient_version
 46 | 
 47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 48 | from TranskribusPyClient.client import TranskribusClient
 49 | from TranskribusPyClient.common.trace import traceln, trace
 50 | 
 51 | import json
 52 | DEBUG = 0
 53 | 
 54 | description = """Get the status of a Transkribus job.
 55 | """ + _Trnskrbs_description
 56 | 
 57 | usage = """%s <jobId> 
 58 | """%sys.argv[0]
 59 | 
 60 | class DoDeleteJob(TranskribusClient):
 61 |     
 62 |     sDefaultServerUrl = _Trnskrbs_default_url
 63 |     
 64 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 65 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 66 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 67 |         
 68 |         
 69 | 
 70 | if __name__ == '__main__':
 71 |     version = "v.01"
 72 | 
 73 |     #prepare for the parsing of the command line
 74 |     parser = OptionParser(usage=usage, version=version)
 75 |     parser.description = description
 76 |     
 77 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
 78 |     __Trnskrbs_basic_options(parser, DoDeleteJob.sDefaultServerUrl)
 79 |         
 80 |     # ---   
 81 |     #parse the command line
 82 |     (options, args) = parser.parse_args()
 83 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
 84 | 
 85 |     # --- 
 86 |     #source collection(s)
 87 |     try:
 88 |         jobid = int(args[0])
 89 |     except Exception as e:
 90 |         _exit(usage, 1, e)
 91 | 
 92 |     # --- 
 93 |     doer = DoDeleteJob(options.server, proxies, loggingLevel=logging.INFO)
 94 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
 95 | 
 96 |     # --- 
 97 |     # do the job...
 98 |     try:
 99 |         resp = doer.getJobStatus(jobid)
100 |     except Exception as e:  _exit("", 1, e)
101 |     traceln( json.dumps(resp, sort_keys=True, indent=4, separators=(',', ': ')))
102 |     
103 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_getJobs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 |     Get the list of jobs
  6 | 
  7 |     Hervé Déjean - April 2017
  8 | 
  9 | 
 10 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
 11 | 
 12 |     This program is free software: you can redistribute it and/or modify
 13 |     it under the terms of the GNU General Public License as published by
 14 |     the Free Software Foundation, either version 3 of the License, or
 15 |     (at your option) any later version.
 16 | 
 17 |     This program is distributed in the hope that it will be useful,
 18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |     GNU General Public License for more details.
 21 | 
 22 |     You should have received a copy of the GNU General Public License
 23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 24 |     
 25 |     
 26 |     Developed  for the EU project READ. The READ project has received funding 
 27 |     from the European Union’s Horizon 2020 research and innovation programme 
 28 |     under grant agreement No 674943.
 29 | 
 30 | """
 31 | from __future__ import absolute_import
 32 | from __future__ import  print_function
 33 | from __future__ import unicode_literals
 34 | #    TranskribusCommands/do_deleteJob.py <JOBID>
 35 | 
 36 | 
 37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 38 | import sys, os, logging
 39 | from optparse import OptionParser
 40 | 
 41 | try: #to ease the use without proper Python installation
 42 |     import TranskribusPyClient_version
 43 | except ImportError:
 44 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 45 |     import TranskribusPyClient_version
 46 | 
 47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 48 | from TranskribusPyClient.client import TranskribusClient
 49 | from TranskribusPyClient.common.trace import traceln, trace
 50 | 
 51 | import json
 52 | 
 53 | DEBUG = 0
 54 | 
 55 | 
 56 | description = """Get the status of a Transkribus job.
 57 | """ + _Trnskrbs_description
 58 | 
 59 | usage = """%s <jobId> 
 60 | """%sys.argv[0]
 61 | 
 62 | class DoGetJobs(TranskribusClient):
 63 |     
 64 |     sDefaultServerUrl = _Trnskrbs_default_url
 65 |     
 66 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 67 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 68 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 69 |         
 70 |         
 71 | 
 72 | if __name__ == '__main__':
 73 |     version = "v.01"
 74 | 
 75 |     #prepare for the parsing of the command line
 76 |     parser = OptionParser(usage=usage, version=version)
 77 |     parser.description = description
 78 |     
 79 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
 80 |     __Trnskrbs_basic_options(parser, DoGetJobs.sDefaultServerUrl)
 81 |         
 82 |     # ---   
 83 |     #parse the command line
 84 |     (options, args) = parser.parse_args()
 85 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
 86 | 
 87 |     # --- 
 88 |     #source collection(s)
 89 | #     try:
 90 | #         jobid = int(args[0])
 91 | #     except Exception as e:
 92 | #         _exit(usage, 1, e)
 93 | 
 94 |     # --- 
 95 |     doer = DoGetJobs(options.server, proxies, loggingLevel=logging.INFO)
 96 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
 97 | 
 98 |     # --- 
 99 |     # do the job...
100 |     try:
101 |         resp = doer.getJobs()
102 |     except Exception as e:  _exit("", 1, e)
103 |     traceln( json.dumps(resp, sort_keys=True, indent=4, separators=(',', ': ')))
104 |     
105 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_getRnnTrainingJobStatus.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 |     Get the status of a job
  6 | 
  7 |     JL Meunier - Dev 2016
  8 | 
  9 | 
 10 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
 11 | 
 12 |     This program is free software: you can redistribute it and/or modify
 13 |     it under the terms of the GNU General Public License as published by
 14 |     the Free Software Foundation, either version 3 of the License, or
 15 |     (at your option) any later version.
 16 | 
 17 |     This program is distributed in the hope that it will be useful,
 18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |     GNU General Public License for more details.
 21 | 
 22 |     You should have received a copy of the GNU General Public License
 23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 24 |     
 25 |     
 26 |     Developed  for the EU project READ. The READ project has received funding 
 27 |     from the European Union’s Horizon 2020 research and innovation programme 
 28 |     under grant agreement No 674943.
 29 | 
 30 | """
 31 | from __future__ import absolute_import
 32 | from __future__ import  print_function
 33 | from __future__ import unicode_literals
 34 | 
 35 | #    TranskribusCommands/do_deleteJob.py <JOBID>
 36 | 
 37 | 
 38 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 39 | import sys, os, logging
 40 | from optparse import OptionParser
 41 | 
 42 | try: #to ease the use without proper Python installation
 43 |     import TranskribusPyClient_version
 44 | except ImportError:
 45 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 46 |     import TranskribusPyClient_version
 47 | 
 48 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 49 | from TranskribusPyClient.client import TranskribusClient
 50 | from TranskribusPyClient.common.trace import traceln, trace
 51 | 
 52 | import json
 53 | DEBUG = 0
 54 | 
 55 | description = """Get the status of a Transkribus job.
 56 | """ + _Trnskrbs_description
 57 | 
 58 | usage = """%s <jobId> 
 59 | """%sys.argv[0]
 60 | 
 61 | class DoDeleteJob(TranskribusClient):
 62 |     
 63 |     sDefaultServerUrl = _Trnskrbs_default_url
 64 |     
 65 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 66 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 67 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 68 |         
 69 |         
 70 | 
 71 | if __name__ == '__main__':
 72 |     version = "v.01"
 73 | 
 74 |     #prepare for the parsing of the command line
 75 |     parser = OptionParser(usage=usage, version=version)
 76 |     parser.description = description
 77 |     
 78 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
 79 |     __Trnskrbs_basic_options(parser, DoDeleteJob.sDefaultServerUrl)
 80 |         
 81 |     # ---   
 82 |     #parse the command line
 83 |     (options, args) = parser.parse_args()
 84 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
 85 | 
 86 |     # --- 
 87 |     #source collection(s)
 88 |     try:
 89 |         jobid = int(args[0])
 90 |     except Exception as e:
 91 |         _exit(usage, 1, e)
 92 | 
 93 |     # --- 
 94 |     doer = DoDeleteJob(options.server, proxies, loggingLevel=logging.INFO)
 95 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
 96 | 
 97 |     # --- 
 98 |     # do the job...
 99 |     try:
100 |         resp = doer.getJobStatus(jobid)
101 |     except Exception as e:  _exit("", 1, e)
102 | #     traceln( json.dumps(resp, sort_keys=True, indent=4, separators=(',', ': ')))
103 |     traceln( json.dumps(resp['description'], sort_keys=True, indent=4, separators=(',', ': ')))
104 | #     traceln( json.dumps(resp['jobData'].split('\n')[1:], sort_keys=True, indent=4, separators=(',', ': ')))
105 | 
106 |     
107 |     
108 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_htrHmm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 | 
  6 |     JL Meunier - Dec 2016
  7 | 
  8 | 
  9 |     Copyright Xerox(C) 2016 JL. Meunier
 10 | 
 11 |     This program is free software: you can redistribute it and/or modify
 12 |     it under the terms of the GNU General Public License as published by
 13 |     the Free Software Foundation, either version 3 of the License, or
 14 |     (at your option) any later version.
 15 | 
 16 |     This program is distributed in the hope that it will be useful,
 17 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 19 |     GNU General Public License for more details.
 20 | 
 21 |     You should have received a copy of the GNU General Public License
 22 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 23 |     
 24 |     
 25 |     Developed  for the EU project READ. The READ project has received funding 
 26 |     from the European Union’s Horizon 2020 research and innovation programme 
 27 |     under grant agreement No 674943.
 28 | 
 29 | """
 30 | from __future__ import absolute_import
 31 | from __future__ import  print_function
 32 | from __future__ import unicode_literals
 33 | #    TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
 34 | 
 35 | 
 36 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 37 | import sys, os, logging
 38 | from optparse import OptionParser
 39 | # import json
 40 | 
 41 | try: #to ease the use without proper Python installation
 42 |     import TranskribusPyClient_version
 43 | except ImportError:
 44 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 45 |     import TranskribusPyClient_version
 46 | 
 47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 48 | from TranskribusPyClient.client import TranskribusClient
 49 | from TranskribusPyClient.common.trace import traceln, trace
 50 | 
 51 | DEBUG = 0
 52 | 
 53 | description = """Apply an HTR model.
 54 | 
 55 | The syntax for specifying the page range is:
 56 | - one or several specifiers separated by a comma
 57 | - one separator is a page number, or a range of page number, e.g. 3-8
 58 | - Examples: 1   1,3,5   1-3    1,3,5-99,100
 59 | 
 60 | """ + _Trnskrbs_description
 61 | 
 62 | usage = """%s <model-name> <colId> <docId> [<pages>]
 63 | """%sys.argv[0]
 64 | 
 65 | class DoHtr(TranskribusClient):
 66 |     sDefaultServerUrl = _Trnskrbs_default_url
 67 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 68 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 69 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 70 |     
 71 |     def run(self, sModelName, colId, docId, sPages):
 72 |         ret = self.rehtrDecode(colId, sModelName, docId, sPages)
 73 |         return ret
 74 | 
 75 | if __name__ == '__main__':
 76 |     version = "v.01"
 77 | 
 78 |     #prepare for the parsing of the command line
 79 |     parser = OptionParser(usage=usage, version=version)
 80 |     parser.description = description
 81 |     
 82 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
 83 |     __Trnskrbs_basic_options(parser, DoHtr.sDefaultServerUrl)
 84 |         
 85 |     # ---   
 86 |     #parse the command line
 87 |     (options, args) = parser.parse_args()
 88 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
 89 | 
 90 |     # --- 
 91 |     doer = DoHtr(options.server, proxies, loggingLevel=logging.WARN)
 92 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
 93 |     # --- 
 94 |     try:                        sModelName = args.pop(0)
 95 |     except Exception as e:      _exit(usage, 1, e)
 96 |     try:                        colId = int(args.pop(0))
 97 |     except Exception as e:      _exit(usage, 1, e)
 98 |     try:                        docId   = int(args.pop(0))
 99 |     except Exception as e:      _exit(usage, 1, e)
100 |     try:                        sPages = args.pop(0)
101 |     except Exception as e:      sPages = None
102 |     if args:                    _exit(usage, 2, Exception("Extra arguments to the command"))
103 | 
104 |     # --- 
105 |     # do the job...
106 |     jobid = doer.run(sModelName, colId, docId, sPages)
107 |     traceln(jobid)
108 |         
109 |     traceln()      
110 |     traceln("- Done")
111 |     
112 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_htrRnn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 | 
  6 |     JL Meunier - Dec 2016
  7 | 
  8 | 
  9 |     Copyright Xerox(C) 2016 JL. Meunier
 10 | 
 11 |     This program is free software: you can redistribute it and/or modify
 12 |     it under the terms of the GNU General Public License as published by
 13 |     the Free Software Foundation, either version 3 of the License, or
 14 |     (at your option) any later version.
 15 | 
 16 |     This program is distributed in the hope that it will be useful,
 17 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 19 |     GNU General Public License for more details.
 20 | 
 21 |     You should have received a copy of the GNU General Public License
 22 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 23 |     
 24 |     
 25 |     Developed  for the EU project READ. The READ project has received funding 
 26 |     from the European Union’s Horizon 2020 research and innovation programme 
 27 |     under grant agreement No 674943.
 28 | 
 29 | """
 30 | from __future__ import absolute_import
 31 | from __future__ import  print_function
 32 | from __future__ import unicode_literals
 33 | #    TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
 34 | 
 35 | 
 36 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 37 | import sys, os, logging
 38 | from optparse import OptionParser
 39 | from io import open
 40 | 
 41 | import json
 42 | 
 43 | try: #to ease the use without proper Python installation
 44 |     import TranskribusPyClient_version
 45 | except ImportError:
 46 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 47 |     import TranskribusPyClient_version
 48 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 49 | from TranskribusPyClient.client import TranskribusClient
 50 | 
 51 | from TranskribusCommands.do_transcript import DoTranscript
 52 | 
 53 | from TranskribusPyClient.common.IntegerRange import IntegerRange
 54 | from TranskribusPyClient.TRP_FullDoc import TRP_FullDoc
 55 | 
 56 | from TranskribusPyClient.common.trace import traceln, trace
 57 | 
 58 | DEBUG = 0
 59 | 
 60 | description = """Apply an HTR RNN model.
 61 | 
 62 | The syntax for specifying the page range is:
 63 | - one or several specifiers separated by a comma
 64 | - one separator is a page number, or a range of page number, e.g. 3-8
 65 | - Examples: 1   1,3,5   1-3    1,3,5-99,100
 66 | """ + _Trnskrbs_description
 67 | 
 68 | usage = """%s <modelID>   <colId>  (--trp TRP_FILE | --docid DOCID)
 69 | """%sys.argv[0]
 70 | 
 71 | class DoHtrRnn(TranskribusClient):
 72 |     """
 73 |         10/16/2017:  at region level
 74 |         {"docId":2278,"pageList":{"pages":[{"pageId":10070,"tsId":25143,"regionIds":["r2","r1"]}]}}
 75 |         
 76 |         Our client sends it like this:
 77 |         
 78 |         3 > POST
 79 |         https://transkribus.eu/TrpServerTesting/rest/recognition/2/241/htrCITlab?id=2278
 80 |         3 > Accept: text/plain
 81 |         ...
 82 |         3 > Content-Type: application/json
 83 |         3 > Cookie: $Version=1;JSESSIONID=....
 84 |         {"docId":2278,"pageList":{"pages":[{"pageId":10070,"tsId":25143,"regionIds":["r2","r1"]}]}}
 85 | 
 86 | 
 87 |     """
 88 |     sDefaultServerUrl = _Trnskrbs_default_url
 89 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 90 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 91 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 92 |         
 93 |         self._trpMng = DoTranscript(self.sDefaultServerUrl, sHttpProxy=sHttpProxy, loggingLevel=loggingLevel)
 94 | 
 95 |     def run(self, sModelID,  colId, docId, sDescPages,bPyLaia):
 96 |         ret = self.htrRnnDecode(colId, sModelID,  docId, sDescPages,bPyLaia)
 97 |         return ret
 98 | 
 99 |     def buildDescription(self,colId,docpage,trp=None):
100 |         """
101 |             '{"docId":17442,"pageList":{"pages":[{"pageId":400008,"tsId":1243509,"regionIds":[]}]}}'
102 |         """
103 |         jsonDesc = {}
104 |         
105 |         if trp is None:
106 |             try: docId,pageRange= docpage.split('/')
107 |             except ValueError: docId=docpage; pageRange = ""
108 |             jsonDesc["docId"]=docId
109 |             oPageRange = IntegerRange(pageRange)                 
110 |             trpObj = self._trpMng.filter(colId,docId,page_filter=oPageRange,bLast=True)
111 |         else:
112 |             trpObj = TRP_FullDoc(trp)
113 |         jsonDesc["pageList"]={}
114 | #         pList= trpObj.getTranscriptList()
115 |         jsonDesc["pageList"]['pages']= []
116 |         for page in trpObj.getPageList():
117 |             docId = page['docId']
118 |             jsonDesc["docId"]=page['docId']
119 |             jsonDesc["pageList"]['pages'].append({"pageId":page['pageId'],"tsId":page['tsList']['transcripts'][0]['tsId'],"regionIds":[]})        
120 |         
121 |         
122 | #         return jsonDesc["docId"], json.dumps(jsonDesc,encoding='utf-8')
123 |         return jsonDesc["docId"], json.dumps(jsonDesc)
124 |     
125 | if __name__ == '__main__':
126 |     version = "v.01"
127 | 
128 |     #prepare for the parsing of the command line
129 |     parser = OptionParser(usage=usage, version=version)
130 |     parser.description = description
131 |     
132 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
133 |     __Trnskrbs_basic_options(parser, DoHtrRnn.sDefaultServerUrl)
134 |         
135 |     parser.add_option("-r", "--region"  , dest='region', action="store", type="string", default=DoHtrRnn.sDefaultServerUrl, help="apply HTR at region level")
136 |     parser.add_option("--trp"  , dest='trp_doc', action="store", type="string",default=None, help="use trp doc file")
137 |     parser.add_option("--docid"  , dest='docid'   , action="store", type="string", default=None, help="document/pages to be htr'd")
138 |     parser.add_option("--tempdict"  , dest='dictTemp' , action="store_true", default=False, help="use tempDict folder")
139 |     parser.add_option("--pylaia"  , dest='bPylaia' , action="store_true", default=True, help="use PyLaia model")
140 | 
141 | # ---   
142 |     #parse the command line
143 |     (options, args) = parser.parse_args()
144 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
145 | 
146 |     # --- 
147 |     doer = DoHtrRnn(options.server, proxies, loggingLevel=logging.WARN)
148 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
149 |     doer._trpMng.setSessionId(doer._sessionID)
150 | 
151 |     # --- 
152 |     
153 |     try:                        sModelID = args.pop(0)
154 |     except Exception as e:      _exit(usage, 1, e)
155 |     #try:                        sDictName = args.pop(0)
156 |     #except Exception as e:      _exit(usage, 1, e)
157 |     try:                        colId = int(args.pop(0))
158 |     except Exception as e:      _exit(usage, 1, e)
159 | #     try:                        docId   = int(args.pop(0))
160 | #     except Exception as e:      _exit(usage, 1, e)
161 | #     try:                        sPages = args.pop(0)
162 | #     except Exception as e:      sPages = None
163 | 
164 |     if args:                    _exit(usage, 2, Exception("Extra arguments to the command"))
165 | 
166 |     if options.trp_doc:
167 |         trpdoc =  json.load(open(options.trp_doc, "r",encoding='utf-8'))
168 |         docId,sPageDesc = doer.buildDescription(colId,options.docid,trpdoc)
169 |     else:
170 |         docId,sPageDesc = doer.buildDescription(colId,options.docid)
171 | 
172 |     # do the job...
173 |     #jobid = doer.run(sModelID, sDictName, colId, docId, sPageDesc,options.bPylaia,options.dictTemp)
174 |     jobid = doer.run(sModelID, colId, docId, sPageDesc,options.bPylaia)
175 |     traceln(jobid)
176 |         
177 |     traceln()      
178 |     traceln("- Done")
179 |     
180 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_htrRnnPerRegion.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 | 
  6 |     Hervé Déjean 
  7 | 
  8 |     Copyright NLE(C) 2017
  9 | 
 10 |     This program is free software: you can redistribute it and/or modify
 11 |     it under the terms of the GNU General Public License as published by
 12 |     the Free Software Foundation, either version 3 of the License, or
 13 |     (at your option) any later version.
 14 | 
 15 |     This program is distributed in the hope that it will be useful,
 16 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 |     GNU General Public License for more details.
 19 | 
 20 |     You should have received a copy of the GNU General Public License
 21 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 22 |     
 23 |     
 24 |     Developed  for the EU project READ. The READ project has received funding 
 25 |     from the European Union’s Horizon 2020 research and innovation programme 
 26 |     under grant agreement No 674943.
 27 | 
 28 | """
 29 | 
 30 | #    TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
 31 | 
 32 | 
 33 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 34 | import sys, os, logging
 35 | from optparse import OptionParser
 36 | from io import open
 37 | import json
 38 | 
 39 | try: #to ease the use without proper Python installation
 40 |     import TranskribusPyClient_version
 41 | except ImportError:
 42 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 43 |     import TranskribusPyClient_version
 44 | 
 45 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 46 | from TranskribusPyClient.client import TranskribusClient
 47 | 
 48 | from do_transcript import DoTranscript
 49 | 
 50 | from TranskribusPyClient.common.IntegerRange import IntegerRange
 51 | from TranskribusPyClient.TRP_FullDoc import TRP_FullDoc
 52 | 
 53 | from TranskribusPyClient.common.trace import traceln, trace
 54 | 
 55 | DEBUG = 0
 56 | 
 57 | description = """Apply an HTR RNN model for a given table column with a specific dictionary.
 58 | 
 59 | """ + _Trnskrbs_description
 60 | 
 61 | usage = """%s <modelID>  <dictname> <colId> [--trp] [--docid] [--colnum] [--dict] [--tempdict]
 62 | """%sys.argv[0]
 63 | 
 64 | class DoHtrRnnPerColumn(TranskribusClient):
 65 |     """
 66 |         10/16/2017:  at region level
 67 |         {"docId":2278,"pageList":{"pages":[{"pageId":10070,"tsId":25143,"regionIds":["r2","r1"]}]}}
 68 |         
 69 |         Our client sends it like this:
 70 |         
 71 |         3 > POST
 72 |         https://transkribus.eu/TrpServerTesting/rest/recognition/2/241/htrCITlab?id=2278
 73 |         3 > Accept: text/plain
 74 |         ...
 75 |         3 > Content-Type: application/json
 76 |         3 > Cookie: $Version=1;JSESSIONID=....
 77 |         {"docId":2278,"pageList":{"pages":[{"pageId":10070,"tsId":25143,"regionIds":["r2","r1"]}]}}
 78 | 
 79 | 
 80 |     """
 81 |     sDefaultServerUrl = _Trnskrbs_default_url
 82 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 83 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 84 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 85 |         
 86 |         self._trpMng = DoTranscript(self.sDefaultServerUrl, sHttpProxy=sHttpProxy, loggingLevel=loggingLevel)
 87 | 
 88 |     def run(self, sModelID, sDictName, colId, docId,sDescPages,bDictTemp):
 89 |         """
 90 |             
 91 |         """
 92 |         ret = self.htrRnnDecode(colId, sModelID, sDictName, docId, sDescPages,bDictTemp)
 93 |         return ret
 94 | 
 95 |     def buildDescription(self,colId,docpage,colnum,trp=None):
 96 |         """
 97 |             '{"docId":17442,"pageList":{"pages":[{"pageId":400008,"tsId":1243509,"regionIds":[]}]}}'
 98 |         """
 99 | #         return  17442,'{"docId":17442,"pageList":{"pages":[{"pageId":400008,"tsId":1243509,"regionIds":[]}]}}'.encode('utf-8')
100 |         jsonDesc = {}
101 |         
102 |         if trp is None:
103 |             try: docId,pageRange= docpage.split('/')
104 |             except ValueError: docId=docpage; pageRange = ""
105 |             jsonDesc["docId"]=docId
106 |             oPageRange = IntegerRange(pageRange)                 
107 |             trpObj = self._trpMng.filter(colId,docId,page_filter=oPageRange,bLast=True)
108 |         else:
109 |             trpObj = TRP_FullDoc(trp)
110 |         jsonDesc["pageList"]={}
111 | #         pList= trpObj.getTranscriptList()
112 |         jsonDesc["pageList"]['pages']= []
113 |         for page in trpObj.getPageList():
114 |             ## need to upload the page!!!!
115 |             regionsIDs=[]
116 |             docId = page['docId']
117 |             jsonDesc["docId"]=page['docId']
118 |             jsonDesc["pageList"]['pages'].append({"pageId":page['pageId'],"tsId":page['tsList']['transcripts'][0]['tsId'],"regionIds":regionsIDs})        
119 |         
120 |         
121 | #         return jsonDesc["docId"], json.dumps(jsonDesc,encoding='utf-8')
122 |         return jsonDesc["docId"], json.dumps(jsonDesc)
123 |     
124 | if __name__ == '__main__':
125 |     version = "v.01"
126 | 
127 |     #prepare for the parsing of the command line
128 |     parser = OptionParser(usage=usage, version=version)
129 |     parser.description = description
130 |     
131 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
132 |     __Trnskrbs_basic_options(parser, DoHtrRnnPerColumn.sDefaultServerUrl)
133 |         
134 |     parser.add_option("-r", "--region"  , dest='region', action="store", type="string", default=DoHtrRnnPerColumn.sDefaultServerUrl, help="apply HTR at region level")
135 |     parser.add_option("--trp"  , dest='trp_doc', action="store", type="string",default=None, help="use trp doc file")
136 |     parser.add_option("--docid"  , dest='docid'   , action="store", type="string", default=None, help="document/pages to be htr'd")
137 |     parser.add_option("--colnum"  , dest='colnum'   , action="store", type="string", default=None, help="column to be htr'd")
138 |     parser.add_option("--tempdict"  , dest='dictTemp' , action="store_true", default=False, help="use tempDict folder")
139 | # ---   
140 |     #parse the command line
141 |     (options, args) = parser.parse_args()
142 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
143 | 
144 |     # --- 
145 |     doer = DoHtrRnnPerColumn(options.server, proxies, loggingLevel=logging.WARN)
146 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
147 |     doer._trpMng.setSessionId(doer._sessionID)
148 | 
149 |     # --- 
150 |     
151 |     try:                        sModelID = args.pop(0)
152 |     except Exception as e:      _exit(usage, 1, e)
153 |     try:                        sDictName = args.pop(0)
154 |     except Exception as e:      _exit(usage, 1, e)
155 |     try:                        colId = int(args.pop(0))
156 |     except Exception as e:      _exit(usage, 1, e)
157 | #     try:                        docId   = int(args.pop(0))
158 | #     except Exception as e:      _exit(usage, 1, e)
159 | #     try:                        sPages = args.pop(0)
160 | #     except Exception as e:      sPages = None
161 | 
162 |     if args:                    _exit(usage, 2, Exception("Extra arguments to the command"))
163 | 
164 |     if options.trp_doc:
165 |         trpdoc =  json.load(open(options.trp_doc, "rb",encoding='utf-8'))
166 |         docId,sPageDesc = doer.buildDescription(colId,options.docid,options.colnum,trpdoc)
167 |     else:
168 |         docId,sPageDesc = doer.buildDescription(colId,options.docid,options.colnum)
169 | 
170 |     # do the job...
171 |     jobid = doer.run(sModelID, sDictName, colId, docId,sPageDesc,options.dictTemp)
172 |     traceln(jobid)
173 |         
174 |     traceln()      
175 |     traceln("- Done")
176 |     
177 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_listCollec.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 |     List the content of a collection
  6 | 
  7 |     JL Meunier - Nov 2016
  8 | 
  9 | 
 10 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
 11 | 
 12 |     This program is free software: you can redistribute it and/or modify
 13 |     it under the terms of the GNU General Public License as published by
 14 |     the Free Software Foundation, either version 3 of the License, or
 15 |     (at your option) any later version.
 16 | 
 17 |     This program is distributed in the hope that it will be useful,
 18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |     GNU General Public License for more details.
 21 | 
 22 |     You should have received a copy of the GNU General Public License
 23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 24 |     
 25 |     
 26 |     Developed  for the EU project READ. The READ project has received funding 
 27 |     from the European Union’s Horizon 2020 research and innovation programme 
 28 |     under grant agreement No 674943.
 29 | 
 30 | """
 31 | from __future__ import absolute_import
 32 | from __future__ import  print_function
 33 | from __future__ import unicode_literals
 34 | 
 35 | #    TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
 36 | 
 37 | import json 
 38 | 
 39 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 40 | import sys, os, logging
 41 | from optparse import OptionParser
 42 | 
 43 | try: #to ease the use without proper Python installation
 44 |     import TranskribusPyClient_version
 45 | except ImportError:
 46 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 47 |     import TranskribusPyClient_version
 48 | 
 49 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 50 | from TranskribusPyClient.client import TranskribusClient
 51 | from TranskribusPyClient.common.trace import traceln, trace
 52 | 
 53 | DEBUG = 0
 54 | 
 55 | description = """List the content of one or several Transkribus collection.
 56 | """ + _Trnskrbs_description
 57 | 
 58 | usage = """%s <colId>+ 
 59 | """%sys.argv[0]
 60 | 
 61 | class DoListCollec(TranskribusClient):
 62 |     """
 63 |     List the content of a collection
 64 |     """
 65 |     sDefaultServerUrl = _Trnskrbs_default_url
 66 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 67 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 68 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 69 |         
 70 |     def run(self, colId, options):
 71 |         """
 72 | 
 73 | [{u'collectionList': {u'colList': [{u'colId': 3571,
 74 |                                     u'colName': u'READDU',
 75 |                                     u'description': u'created by herve.dejean@xrce.xerox.com'}]},
 76 |   u'createdFromTimestamp': 33175290,
 77 |   u'createdToTimestamp': 33175290,
 78 |   u'docId': 7749,
 79 |   u'fimgStoreColl': u'TrpDoc_DEA_7749',
 80 |   u'nrOfPages': 10,
 81 |   u'scriptType': u'HANDWRITTEN',
 82 |   u'status': 0,
 83 |   u'title': u'MM_1_001',
 84 |   u'uploadTimestamp': 1478161395893L,
 85 |   u'uploader': u'herve.dejean@xrce.xerox.com',
 86 |   u'uploaderId': 275},
 87 |  {u'collectionList': {u'colList': [{u'colId': 3571,
 88 |                                     u'colName': u'READDU',
 89 |                                     u'description': u'created by herve.dejean@xrce.xerox.com'}]},
 90 |   u'createdFromTimestamp': 0,
 91 |   u'createdToTimestamp': 0,
 92 |   u'docId': 7750,
 93 |   u'fimgStoreColl': u'TrpDoc_DEA_7750',
 94 |   u'nrOfPages': 10,
 95 |   u'scriptType': u'HANDWRITTEN',
 96 |   u'status': 0,
 97 |   u'title': u'MM_1_005',
 98 |   u'uploadTimestamp': 1478161451242L,
 99 |   u'uploader': u'herve.dejean@xrce.xerox.com',
100 |   u'uploaderId': 275}]
101 |   
102 |   """        
103 |         bRaw=options.bRaw
104 |         data = self.listDocsByCollectionId(colId)
105 |         if options.trp:
106 |             with open(options.trp, "wt",) as fd: json.dump(data, fd, indent=2)
107 |         if bRaw:
108 |             while data:
109 |                 dic = data.pop(0)
110 |                 print (dic[u'docId'])
111 |         else:
112 |             if data:
113 |                 _d = data[0][u'collectionList'][u'colList'][-1]
114 |                 print( "Collection: %s  (%s)"%(_d[u'colName'], _d[u'colId']))
115 |                 
116 |                 while data:
117 |                     dic = data.pop(0)
118 |                     print (">> (%s) #p=%d  '%s' by %s  (status=%s)" % (dic[u'docId'], dic[u'nrOfPages'], dic[u'title'], dic[u'uploader'], dic[u'status']))
119 |             else:
120 |                 print (">> Collection is empty!")
121 |         
122 |         
123 | 
124 | if __name__ == '__main__':
125 |     version = "v.01"
126 | 
127 |     #prepare for the parsing of the command line
128 |     parser = OptionParser(usage=usage, version=version)
129 |     parser.description = description
130 |     
131 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
132 |     __Trnskrbs_basic_options(parser, DoListCollec.sDefaultServerUrl)
133 | 
134 |     parser.add_option("--raw", dest='bRaw', action="store_true", default=False, help="Raw output, one docid per line")    
135 |     parser.add_option("--trp"   , dest='trp'     , action="store", type="string", default=None, help="Store the TRP data reflecting the documents in the given file.")
136 |         
137 |     # ---   
138 |     #parse the command line
139 |     (options, args) = parser.parse_args()
140 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
141 | 
142 |     # --- 
143 |     #source collection(s)
144 |     try:
145 |         lColId = [ int(arg) for arg in args ]
146 |     except Exception as e:
147 |         _exit(usage, 1, e)
148 | 
149 |     # --- 
150 |     doer = DoListCollec(options.server, proxies, loggingLevel=logging.INFO)
151 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
152 |     # --- 
153 |     # do the job...
154 |     for colId in lColId:
155 |         doer.run(colId, options)
156 |         try:
157 |             doer.run(colId, options)
158 |         except Exception as e:
159 |             traceln()
160 |             traceln("ERROR: could not list collection '%d' "%colId)
161 |             _exit("", 1, e)
162 |     if not options.bRaw:    
163 |         traceln()      
164 |         traceln("- Done for %d collection(s)"%len(lColId))
165 |     
166 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_listHtrHmm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #-*- coding:utf-8 -*-
 3 | 
 4 | """
 5 |     List the HTR Models
 6 | 
 7 |     JL Meunier - Dec 2016
 8 | 
 9 | 
10 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 | 
12 |     This program is free software: you can redistribute it and/or modify
13 |     it under the terms of the GNU General Public License as published by
14 |     the Free Software Foundation, either version 3 of the License, or
15 |     (at your option) any later version.
16 | 
17 |     This program is distributed in the hope that it will be useful,
18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 |     GNU General Public License for more details.
21 | 
22 |     You should have received a copy of the GNU General Public License
23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
24 |     
25 |     
26 |     Developed  for the EU project READ. The READ project has received funding 
27 |     from the European Union’s Horizon 2020 research and innovation programme 
28 |     under grant agreement No 674943.
29 | 
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import  print_function
33 | from __future__ import unicode_literals
34 | #    TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
35 | 
36 | 
37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
38 | import sys, os, logging
39 | from optparse import OptionParser
40 | # import json
41 | 
42 | try: #to ease the use without proper Python installation
43 |     import TranskribusPyClient_version
44 | except ImportError:
45 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
46 |     import TranskribusPyClient_version
47 | 
48 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, strTabularFormat
49 | from TranskribusPyClient.client import TranskribusClient
50 | from TranskribusPyClient.common.trace import traceln, trace
51 | 
52 | DEBUG = 0
53 | 
54 | description = """List HTR models available in Transkribus.
55 | """ + _Trnskrbs_description
56 | 
57 | usage = """%s
58 | """%sys.argv[0]
59 | 
60 | class DoListHtrModels(TranskribusClient):
61 |     sDefaultServerUrl = _Trnskrbs_default_url
62 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
63 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
64 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
65 |     
66 |     def run(self):
67 |         lDic = self.listHmmHtrModels()      
68 |         #traceln(json.dumps(data, indent=4))
69 |         traceln( strTabularFormat(lDic, ["modelName", "modelId", "isUsableInTranskribus", "nrOfTokens", "nrOfDictTokens", "nrOfLines"], "modelName")   )     
70 |         return lDic
71 | 
72 | if __name__ == '__main__':
73 |     version = "v.01"
74 | 
75 |     #prepare for the parsing of the command line
76 |     parser = OptionParser(usage=usage, version=version)
77 |     parser.description = description
78 |     
79 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
80 |     __Trnskrbs_basic_options(parser, DoListHtrModels.sDefaultServerUrl)
81 |         
82 |     # ---   
83 |     #parse the command line
84 |     (options, args) = parser.parse_args()
85 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
86 | 
87 |     # --- 
88 |     doer = DoListHtrModels(options.server, proxies, loggingLevel=logging.WARN)
89 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
90 | 
91 |     # --- 
92 |     # do the job...
93 |     doer.run()
94 |         
95 |     traceln()      
96 |     traceln("- Done")
97 |     
98 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_listHtrRnn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 |     List the HTR RNN Models and Dictionaries
  6 | 
  7 |     JL Meunier - Dec 2016
  8 | 
  9 | 
 10 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
 11 | 
 12 |     This program is free software: you can redistribute it and/or modify
 13 |     it under the terms of the GNU General Public License as published by
 14 |     the Free Software Foundation, either version 3 of the License, or
 15 |     (at your option) any later version.
 16 | 
 17 |     This program is distributed in the hope that it will be useful,
 18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |     GNU General Public License for more details.
 21 | 
 22 |     You should have received a copy of the GNU General Public License
 23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 24 |     
 25 |     
 26 |     Developed  for the EU project READ. The READ project has received funding 
 27 |     from the European Union’s Horizon 2020 research and innovation programme 
 28 |     under grant agreement No 674943.
 29 | 
 30 | """
 31 | from __future__ import absolute_import
 32 | from __future__ import  print_function
 33 | from __future__ import unicode_literals
 34 | 
 35 | #    TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
 36 | 
 37 | 
 38 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 39 | import sys, os, logging
 40 | from optparse import OptionParser
 41 | # import json
 42 | 
 43 | try: #to ease the use without proper Python installation
 44 |     import TranskribusPyClient_version
 45 | except ImportError:
 46 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 47 |     import TranskribusPyClient_version
 48 | 
 49 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit, strTabularFormat
 50 | from TranskribusPyClient.client import TranskribusClient
 51 | from TranskribusPyClient.common.trace import traceln, trace
 52 | 
 53 | DEBUG = 0
 54 | 
 55 | description = """List HTR RNN models and dictionaries available in Transkribus.
 56 | """ + _Trnskrbs_description
 57 | 
 58 | usage = """%s
 59 | """%sys.argv[0]
 60 | 
 61 | class DoListHtrRnn(TranskribusClient):
 62 |     sDefaultServerUrl = _Trnskrbs_default_url
 63 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 64 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 65 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 66 |     
 67 |     def run(self,colid=None,bListDict=False):
 68 |         """
 69 |         2 textual lists
 70 |         """
 71 |         sModels=None
 72 |         sColModels=None
 73 |         sDicts = None
 74 |         if colid is not None:
 75 |             sColModels = self.listRnns(colid)
 76 |             for models in sColModels:
 77 |                 #print(models.keys())
 78 |                 #some old? models do not have params field
 79 |                 #try: traceln("%s\t%s\t%s\ndescription:%s" % (models['htrId'],models['name'].strip(),models['params'].strip(),models['description'].strip()))
 80 |                 try: traceln("%s\t%s\t%s\ndescription:%s" % (models['htrId'],models['name'].strip(),models['provider'].strip(),models['description'].strip()))
 81 |                 except KeyError: traceln("%s\t%s\tno params" % (models['htrId'],models['name']))             
 82 |                 traceln()
 83 |         else:
 84 |             sModels = self.listRnnsText()        
 85 |             traceln("\n--- Models ---------------------------")
 86 |             traceln(sModels)
 87 |         
 88 |         if bListDict:
 89 |             sDicts = self.listDictsText()        
 90 |             traceln("\n--- Dictionaries ---------------------")
 91 |             traceln(sDicts)
 92 |         
 93 |         return sModels, sColModels, sDicts
 94 | 
 95 | if __name__ == '__main__':
 96 |     version = "v.01"
 97 | 
 98 |     #prepare for the parsing of the command line
 99 |     parser = OptionParser(usage=usage, version=version)
100 |     parser.description = description
101 |     parser.add_option("--colid", dest='colid', type='string', default=None, help = 'get models linked to the colid')
102 |     parser.add_option("--dict", dest='dict', action='store_true', default=False, help = 'get dictionaries')
103 | 
104 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
105 |     __Trnskrbs_basic_options(parser, DoListHtrRnn.sDefaultServerUrl)
106 |         
107 |     # ---   
108 |     #parse the command line
109 |     (options, args) = parser.parse_args()
110 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
111 |     # --- 
112 |     doer = DoListHtrRnn(options.server, proxies, loggingLevel=logging.WARN)
113 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
114 | 
115 |     # --- 
116 |     # do the job...
117 |     doer.run(options.colid,options.dict)
118 |         
119 |     traceln()      
120 |     traceln("- Done")
121 |     
122 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_listPageLocks.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 |     list the locks for a colid/docid/page
  6 | 
  7 |     H. Déjean - Nov 2016
  8 | 
  9 | 
 10 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
 11 | 
 12 |     This program is free software: you can redistribute it and/or modify
 13 |     it under the terms of the GNU General Public License as published by
 14 |     the Free Software Foundation, either version 3 of the License, or
 15 |     (at your option) any later version.
 16 | 
 17 |     This program is distributed in the hope that it will be useful,
 18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |     GNU General Public License for more details.
 21 | 
 22 |     You should have received a copy of the GNU General Public License
 23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 24 |     
 25 |     
 26 |     Developed  for the EU project READ. The READ project has received funding 
 27 |     from the European Union’s Horizon 2020 research and innovation programme 
 28 |     under grant agreement No 674943.
 29 | 
 30 | """
 31 | from __future__ import absolute_import
 32 | from __future__ import  print_function
 33 | from __future__ import unicode_literals
 34 | #    TranskribusCommands/do_ListPageLocks.py <COLID> <DOCID> <PAGENUM>
 35 | 
 36 | 
 37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 38 | import sys, os, logging
 39 | from optparse import OptionParser
 40 | 
 41 | try: #to ease the use without proper Python installation
 42 |     import TranskribusPyClient_version
 43 | except ImportError:
 44 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 45 |     import TranskribusPyClient_version
 46 | 
 47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 48 | from TranskribusPyClient.client import TranskribusClient
 49 | from TranskribusPyClient.common.trace import traceln, trace
 50 | 
 51 | DEBUG = 0
 52 | 
 53 | description = """list the locked pages.
 54 | """ + _Trnskrbs_description
 55 | 
 56 | usage = """%s <colId>  <docId>  <page> 
 57 | """%sys.argv[0]
 58 | 
 59 | class listPageLocks(TranskribusClient):
 60 |     
 61 |     sDefaultServerUrl = _Trnskrbs_default_url
 62 |     
 63 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 64 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 65 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 66 |         
 67 |         
 68 | 
 69 | if __name__ == '__main__':
 70 |     version = "v.01"
 71 | 
 72 |     #prepare for the parsing of the command line
 73 |     parser = OptionParser(usage=usage, version=version)
 74 |     parser.description = description
 75 |     
 76 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
 77 |     __Trnskrbs_basic_options(parser, listPageLocks.sDefaultServerUrl)
 78 |         
 79 |     # ---   
 80 |     #parse the command line
 81 |     (options, args) = parser.parse_args()
 82 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
 83 | 
 84 |     # --- 
 85 |     #source collection(s)
 86 |     try:
 87 |         colid = int(args[0])
 88 |     except Exception as e:
 89 |         _exit(usage, 1, e)
 90 |     try:
 91 |         docid = int(args[0])
 92 |     except Exception as e:
 93 |         _exit(usage, 1, e)
 94 |     try:
 95 |         page = int(args[0])
 96 |     except Exception as e:
 97 |         _exit(usage, 1, e)
 98 | 
 99 |     # --- 
100 |     doer = listPageLocks(options.server, proxies, loggingLevel=logging.INFO)
101 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
102 | 
103 |     # --- 
104 |     # do the job...
105 |     try:
106 |         resp = doer.getListofLockedPages(colid, docid, page)
107 |     except Exception as e:  _exit("", 1, e)
108 |     traceln(resp)
109 |     traceln("- Done")
110 |     
111 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_login.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 |     Utility to login into Transkribus and store the sessionId in a secure way for next commands
  6 | 
  7 |     JL Meunier - Nov 2016
  8 | 
  9 | 
 10 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
 11 | 
 12 |     This program is free software: you can redistribute it and/or modify
 13 |     it under the terms of the GNU General Public License as published by
 14 |     the Free Software Foundation, either version 3 of the License, or
 15 |     (at your option) any later version.
 16 | 
 17 |     This program is distributed in the hope that it will be useful,
 18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |     GNU General Public License for more details.
 21 | 
 22 |     You should have received a copy of the GNU General Public License
 23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 24 |     
 25 |     
 26 |     Developed  for the EU project READ. The READ project has received funding 
 27 |     from the European Union’s Horizon 2020 research and innovation programme 
 28 |     under grant agreement No 674943.
 29 | 
 30 | """
 31 | from __future__ import absolute_import
 32 | from __future__ import  print_function
 33 | from __future__ import unicode_literals
 34 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 35 | import sys, os, logging
 36 | from optparse import OptionParser
 37 | 
 38 | try: #to ease the use without proper Python installation
 39 |     import TranskribusPyClient_version
 40 | except ImportError:
 41 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 42 |     import TranskribusPyClient_version
 43 | 
 44 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, _exit
 45 | from TranskribusPyClient.client import TranskribusClient, getStoredCredentials
 46 | from TranskribusPyClient.common.trace import traceln, trace
 47 | 
 48 | DEBUG = 0
 49 | 
 50 | description = """Login into Transkribus to avoid the need for login in next commands (until the session expires).
 51 | """ + _Trnskrbs_description
 52 | 
 53 | usage = """%s"""%sys.argv[0]
 54 | 
 55 | class DoLogin(TranskribusClient):
 56 |     """
 57 |     Download a Transkribus collection as a DS structured dataset
 58 |     """
 59 |     sDefaultServerUrl = _Trnskrbs_default_url
 60 |     
 61 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 62 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 63 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 64 |         
 65 | 
 66 | if __name__ == '__main__':
 67 |     version = "v.01"
 68 | 
 69 |     #prepare for the parsing of the command line
 70 |     parser = OptionParser(usage=usage, version=version)
 71 |     parser.description = description
 72 |     
 73 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
 74 |     __Trnskrbs_basic_options(parser, DoLogin.sDefaultServerUrl)
 75 |         
 76 |     #parse the command line
 77 |     (options, args) = parser.parse_args()
 78 | 
 79 |     # ---   
 80 |     #credentials and proxy
 81 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
 82 | 
 83 |     if options.login:
 84 |         login, pwd = options.login, options.pwd
 85 |     else:
 86 |         trace("- no login provided, looking for stored credentials... ")
 87 |         login, pwd = getStoredCredentials(bAsk=False)
 88 |         traceln("OK")
 89 | 
 90 |     # ------------------------------------------------------------------------------------------------
 91 |     
 92 |     doer = DoLogin(options.server, proxies, loggingLevel=logging.INFO)
 93 |     
 94 |     try:
 95 |         if options.persist:
 96 |             traceln("- Logging onto Transkribus as %s and making a persistent session"%login)
 97 |             doer.cleanPersistentSession()
 98 |             resp = doer.auth_login(login, pwd, bPersist=options.persist)
 99 |             traceln("\t --> %s"%os.path.join(DoLogin._sSESSION_FOLDER, DoLogin._sSESSION_FILENAME))
100 |         else:
101 |             trace("- Checking Transkribus login as %s "%login)
102 |             resp = doer.auth_login(login, pwd, bPersist=options.persist)
103 |             traceln(" OK!")
104 |     except Exception as e:  _exit("", 1, e)
105 |     
106 |     traceln("- Done")
107 | 
108 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_logout.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #-*- coding:utf-8 -*-
 3 | 
 4 | """
 5 |     Utility to remove any persistent session from the disk
 6 | 
 7 |     JL Meunier - Nov 2016
 8 | 
 9 | 
10 |     Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 | 
12 |     This program is free software: you can redistribute it and/or modify
13 |     it under the terms of the GNU General Public License as published by
14 |     the Free Software Foundation, either version 3 of the License, or
15 |     (at your option) any later version.
16 | 
17 |     This program is distributed in the hope that it will be useful,
18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 |     GNU General Public License for more details.
21 | 
22 |     You should have received a copy of the GNU General Public License
23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
24 |     
25 |     
26 |     Developed  for the EU project READ. The READ project has received funding 
27 |     from the European Union’s Horizon 2020 research and innovation programme 
28 |     under grant agreement No 674943.
29 | 
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import  print_function
33 | from __future__ import unicode_literals
34 | 
35 | #optional: useful if you want to choose the logging level to something else than logging.WARN
36 | import sys, os, logging
37 | from optparse import OptionParser
38 | 
39 | try: #to ease the use without proper Python installation
40 |     import TranskribusPyClient_version
41 | except ImportError:
42 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
43 |     import TranskribusPyClient_version
44 | 
45 | from TranskribusPyClient.common.trace import traceln, trace
46 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
47 | from TranskribusPyClient.client import TranskribusClient
48 | 
49 | DEBUG = 0
50 | 
51 | description = """Remove any persistent session from disk.
52 | """ + _Trnskrbs_description
53 | 
54 | usage = """%s"""%sys.argv[0]
55 | 
56 | class DoLogout(TranskribusClient):
57 |     """
58 |     Add a document to another collection.
59 |     """
60 |     sDefaultServerUrl = _Trnskrbs_default_url
61 |     
62 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
63 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
64 |         TranskribusClient.__init__(self, sServerUrl=_Trnskrbs_default_url, proxies=sHttpProxy, loggingLevel=loggingLevel)
65 |         
66 | 
67 | if __name__ == '__main__':
68 |     version = "v.01"
69 | 
70 |     #prepare for the parsing of the command line
71 |     parser = OptionParser(usage=usage, version=version)
72 |     parser.description = description
73 |     
74 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
75 |     __Trnskrbs_basic_options(parser, DoLogout.sDefaultServerUrl)
76 |         
77 |     #parse the command line
78 |     (options, args) = parser.parse_args()
79 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
80 |     # ------------------------------------------------------------------------------------------------
81 |     doer = DoLogout(options.server, proxies, loggingLevel=logging.INFO)
82 |     try:
83 |         __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
84 |     except:
85 |         pass
86 |     
87 |     try:
88 |         traceln('- cleaning any persistent session.')
89 |         doer.auth_logout()
90 |     except Exception as e:
91 |         pass  
92 |         #_exit("", 1, e)
93 |     
94 |     traceln("- Done"   )
95 |     
96 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_tableTemplate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 | 
  6 |     Hervé Déjean - Jan  2017
  7 | 
  8 | 
  9 |     Copyright Xerox(C) 2016 
 10 | 
 11 |     This program is free software: you can redistribute it and/or modify
 12 |     it under the terms of the GNU General Public License as published by
 13 |     the Free Software Foundation, either version 3 of the License, or
 14 |     (at your option) any later version.
 15 | 
 16 |     This program is distributed in the hope that it will be useful,
 17 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 19 |     GNU General Public License for more details.
 20 | 
 21 |     You should have received a copy of the GNU General Public License
 22 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 23 |     
 24 |     
 25 |     Developed  for the EU project READ. The READ project has received funding 
 26 |     from the European Union’s Horizon 2020 research and innovation programme 
 27 |     under grant agreement No 674943.
 28 |     
 29 |     
 30 |     see https://transkribus.eu/wiki/index.php/HTR
 31 | """
 32 | from __future__ import absolute_import
 33 | from __future__ import  print_function
 34 | from __future__ import unicode_literals
 35 | 
 36 | #    TranskribusCommands/do_htrTrainRnn model-name colId docid pages 
 37 | 
 38 | 
 39 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 40 | import sys, os, logging
 41 | from optparse import OptionParser
 42 | import json
 43 | from lxml import etree
 44 | 
 45 | try: #to ease the use without proper Python installation
 46 |     import TranskribusPyClient_version
 47 | except ImportError:
 48 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 49 |     import TranskribusPyClient_version
 50 | 
 51 | from TranskribusCommands import  __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 52 | 
 53 | from do_analyzeLayout import DoLAbatch
 54 | from TranskribusPyClient.common.IntegerRange import IntegerRange
 55 | from TranskribusPyClient.TRP_FullDoc import TRP_FullDoc
 56 | from TranskribusPyClient.common.trace import traceln, trace
 57 | 
 58 | 
 59 | DEBUG = 0
 60 | 
 61 | description = """Apply a table template to a list of pages
 62 | 
 63 | The syntax for specifying the page range is:
 64 | - one or several specifiers separated by a comma
 65 | - one separator is a page number, or a range of page number, e.g. 3-8
 66 | - Examples: 1   1,3,5   1-3    1,3,5-99,100
 67 | """ + _Trnskrbs_description
 68 | 
 69 | 
 70 | usage = """%s --templateID <> <colId> <docid/pagerange> 
 71 | """%sys.argv[0]
 72 | 
 73 | class DoTableTemplate(DoLAbatch):
 74 | 
 75 |     
 76 |     def run(self, templateID, colId, sDescription, sJobImpl):
 77 |         ret = self.tableMatching(templateID, colId, sDescription,  sJobImpl)
 78 |         jobid= self.getJobIDsFromXMLStatuses(ret)
 79 |         return ret,jobid
 80 |     
 81 | 
 82 |     def jsonToXMLDescription(self,jsonDesc):
 83 |         """
 84 |             convert json description to XML
 85 | 
 86 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 87 | <jobParameters>
 88 |      <docList>
 89 |          <docs>
 90 |              <docId>1</docId>
 91 |              <pageList>
 92 |                  <pages>
 93 |                      <pageId>2</pageId>
 94 |                      <tsId>3</tsId>
 95 |                  </pages>
 96 |              </pageList>
 97 |          </docs>
 98 |      </docList>
 99 |      <params>
100 |          <entry>
101 |              <key>templateId</key>
102 |              <value>1543</value>
103 |          </entry>
104 |      </params>
105 | </jobParameters>
106 |             
107 |         """
108 | #         s = '{"docId":17442,"pageList":{"pages":[{"pageId":400008,"tsId":1243509,"regionIds":[]}]}}'
109 | #         s ='{"pageList": {"pages": [{"tsId": "1305027", "regionIds": [], "pageId": "478362"}]}, "docId": "18975"}'
110 | # 
111 |         jsonDesc=json.loads(jsonDesc)
112 |     
113 |         root = etree.Element("jobParameters")
114 |         xmldesc= etree.ElementTree(root)
115 |         root2 = etree.Element("jobParameters")
116 |         root.append(root2)
117 |         
118 |         docList =etree.Element("docList")
119 | #         root2.append(docList)
120 |         root.append(docList)
121 |         
122 |         docs= etree.Element("docs")
123 |         docList.append(docs)
124 | 
125 |         # docId
126 |         node =  etree.Element("docId")
127 |         docs.append(node)
128 |         node.text = str(jsonDesc["docId"])
129 |         
130 |         #pageList
131 |         nodelp = etree.Element("pageList")
132 |         docs.append(nodelp)
133 |                 
134 |         for page in jsonDesc["pageList"]['pages']:
135 |             nodep = etree.Element("pages")
136 |             nodelp.append(nodep)
137 |             pageId = etree.Element("pageId")
138 |             pageId.text = str(page['pageId'])
139 |             tsId=etree.Element("tsId")
140 |             tsId.text= str(page['tsId'])
141 | #             regId=etree.Element("regionIds")
142 | #             regId.text = ''
143 |             nodep.append(pageId)
144 |             nodep.append(tsId)
145 | #             nodep.append(regId)
146 | 
147 |         params= etree.Element('params')
148 |         root.append(params)
149 |         
150 |         entry=etree.Element('entry')
151 |         params.append(entry)
152 |         
153 |         key=etree.Element('key')
154 |         key.text = 'templateId'
155 |         entry.append(key)
156 |   
157 |         value=etree.Element('value')
158 |         value.text= str(jsonDesc['template'])
159 |         entry.append(value)
160 | 
161 |         return etree.tostring(xmldesc, encoding='utf-8',pretty_print=True)       
162 |             
163 |     def buildDescription(self,colId,docpage,templateId,trp=None):
164 |         """
165 |         <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
166 |     <jobParameters>
167 |      <docList>
168 |          <docs>
169 |              <docId>1</docId>
170 |              <pageList>
171 |                  <pages>
172 |                      <pageId>2</pageId>
173 |                      <tsId>3</tsId>
174 |                  </pages>
175 |              </pageList>
176 |          </docs>
177 |      </docList>
178 |      <params>
179 |          <entry>
180 |              <key>templateId</key>
181 |              <value>1543</value>
182 |          </entry>
183 |      </params>    
184 |             
185 |         """
186 |         jsonDesc = {}
187 |         
188 |         if trp is None:
189 |             try: docId,pageRange= docpage.split('/')
190 |             except ValueError: docId=docpage; pageRange = ""
191 |             jsonDesc["docId"]=docId
192 |             oPageRange = IntegerRange(pageRange)                 
193 |             trpObj = self._trpMng.filter(colId,docId,page_filter=oPageRange,bLast=True)
194 |         else:
195 |             trpObj = TRP_FullDoc(trp)
196 |         jsonDesc["pageList"]={}
197 | #         pList= trpObj.getTranscriptList()
198 |         jsonDesc["pageList"]['pages']= []
199 |         jsonDesc['template'] = str(templateId)
200 |         for page in trpObj.getPageList():
201 |             docId = page['docId']
202 |             jsonDesc["docId"]=page['docId']
203 |             jsonDesc["pageList"]['pages'].append({"pageId":page['pageId'],"tsId":page['tsList']['transcripts'][0]['tsId'],"regionIds":[]})        
204 |         
205 | #         return jsonDesc["docId"], json.dumps(jsonDesc,encoding='utf-8')
206 |         return jsonDesc["docId"], json.dumps(jsonDesc)    
207 |     
208 | if __name__ == '__main__':
209 |     version = "v.01"
210 |     #prepare for the parsing of the command line
211 |     parser = OptionParser(usage=usage, version=version)
212 |     parser.description = description
213 |     
214 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
215 |     __Trnskrbs_basic_options(parser, DoTableTemplate.sDefaultServerUrl)
216 |         
217 |     parser.add_option("--trp"  , dest='trp_doc', action="store", type="string",default=None, help="use trp doc file")
218 |     parser.add_option("--templateID"  , dest='templateID'   , action="store", type="string" , help="template id")        
219 | #     parser.add_option("--batchjob"  , dest='doBatchJob'   , action="store_true",  default=False, help="do one job per page")        
220 | 
221 |     # ---   
222 |     #parse the command line
223 |     (options, args) = parser.parse_args()
224 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
225 | 
226 |     # --- 
227 |     doer = DoTableTemplate(options.server, proxies, loggingLevel=logging.WARN)
228 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
229 |     doer._trpMng.setSessionId(doer._sessionID)
230 |     
231 |     # --- 
232 |     try:                        colId = int(args.pop(0))
233 |     except Exception as e:      _exit(usage, 1, e)
234 |     try:                        docidpages = args.pop(0)
235 |     except Exception as e:      _exit(usage, 1, e)    
236 |     if args:                    _exit(usage, 2, Exception("Extra arguments to the command"))
237 | 
238 |     # --- 
239 |     # do the job...
240 |     if options.trp_doc:
241 |         trpdoc =  json.load(open(options.trp_doc, "r",encoding='utf-8'))
242 |         docId,sPageDesc = doer.buildDescription(colId,docidpages,options.templateID,trpdoc)
243 |     else:
244 |         docId,sPageDesc = doer.buildDescription(colId,docidpages,options.templateID)
245 | #     NcsrLaJob
246 | #     CITlabAdvancedLaJob
247 |     sPageDesc = doer.jsonToXMLDescription(sPageDesc)
248 |     """
249 |         do_tableTemplate.py --temp 6078228 23017 87023/14
250 |     """
251 |     
252 |     # jobImpl = CvlTableJob
253 |     status, jobid = doer.run(options.templateID,colId, sPageDesc,"CvlTableJob")
254 |     traceln("job ID:",jobid)
255 |     traceln("- Done")
256 |     
257 | 


--------------------------------------------------------------------------------
/src/TranskribusCommands/do_uploadDictionary.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | """
  5 | 
  6 |     H Déjean
  7 | 
  8 | 
  9 |     Copyright NLE 2017 
 10 | 
 11 |     This program is free software: you can redistribute it and/or modify
 12 |     it under the terms of the GNU General Public License as published by
 13 |     the Free Software Foundation, either version 3 of the License, or
 14 |     (at your option) any later version.
 15 | 
 16 |     This program is distributed in the hope that it will be useful,
 17 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 19 |     GNU General Public License for more details.
 20 | 
 21 |     You should have received a copy of the GNU General Public License
 22 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 23 |     
 24 |     
 25 |     Developed  for the EU project READ. The READ project has received funding 
 26 |     from the European Union’s Horizon 2020 research and innovation programme 
 27 |     under grant agreement No 674943.
 28 | 
 29 | """
 30 | from __future__ import absolute_import
 31 | from __future__ import  print_function
 32 | from __future__ import unicode_literals
 33 | 
 34 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 35 | import sys, os, logging
 36 | from io import open
 37 | 
 38 | from optparse import OptionParser
 39 | # import json
 40 | 
 41 | try: #to ease the use without proper Python installation
 42 |     import TranskribusPyClient_version
 43 | except ImportError:
 44 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 45 |     import TranskribusPyClient_version
 46 | 
 47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
 48 | from TranskribusPyClient.client import TranskribusClient
 49 | 
 50 | from TranskribusPyClient.common.trace import traceln, trace
 51 | 
 52 | DEBUG = 0
 53 | 
 54 | description = """upload a private dictionary
 55 | 
 56 | """ + _Trnskrbs_description
 57 | 
 58 | usage = """%s  <dictionary-name> -d <dictionary-file>
 59 | 
 60 | a single file called  <dictionary-name> will be created by concatenating   <dictionary-file> and will be uploaded in the tempDict user ftp folder
 61 | """%sys.argv[0]
 62 | 
 63 | class DoHtrRnn(TranskribusClient):
 64 |     """
 65 |         Good morning,
 66 | 
 67 |         temp. dictionaries also can be sent now, see example below.
 68 |         The response will contain the dict. filename to be used in the HTR
 69 |         request's tempDict parameter. If extension of the given name does not
 70 |         match ".dict", this will be appended.
 71 |         The POST request's body should contain the dictionary data as UTF-8 String.
 72 |         The temp. dictionaries are now bound to the user account and you can
 73 |         check the transmission outcome by logging in via FTP to transkribus.eu
 74 |         with your credentials. There you will find a dir. called "dictTmp"
 75 |         containing the sent files, that will be used for HTR. You can also put
 76 |         dictionaries there via FTP and use them for HTR with the tempDict parameter.
 77 |         
 78 |         Best regards,
 79 |         Philip
 80 |         
 81 |         POST /TrpServerTesting/rest/recognition/tempDict?fileName=test.dict HTTP/1.1
 82 |         Host: transkribus.eu
 83 |         Content-Type: text/plain
 84 |         Cache-Control: no-cache
 85 |         
 86 |         er,124
 87 | ...
 88 |     """
 89 |     sDefaultServerUrl = _Trnskrbs_default_url
 90 |     #--- INIT -------------------------------------------------------------------------------------------------------------    
 91 |     def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
 92 |         TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
 93 |         
 94 |     def run(self, dictName,dictString):
 95 |         ret = self.uploadDict(dictName,dictString)
 96 |         return ret
 97 | 
 98 | if __name__ == '__main__':
 99 |     version = "v.01"
100 | 
101 |     #prepare for the parsing of the command line
102 |     parser = OptionParser(usage=usage, version=version)
103 |     parser.description = description
104 |     
105 |     #"-s", "--server",  "-l", "--login" ,   "-p", "--pwd",   "--https_proxy"    OPTIONS
106 |     __Trnskrbs_basic_options(parser, DoHtrRnn.sDefaultServerUrl)
107 |         
108 |     parser.add_option("-d", "--dict"  , dest='ldict', action="append", type="string", help="list of dictionaries")
109 |         
110 |     # ---   
111 |     #parse the command line
112 |     (options, args) = parser.parse_args()
113 |     proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
114 | 
115 |     # --- 
116 |     doer = DoHtrRnn(options.server, proxies, loggingLevel=logging.WARN)
117 |     __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
118 |     # --- 
119 |     try:                        dictName = args.pop(0)
120 |     except Exception as e:      _exit(usage, 1, e)
121 | #     try:                        filename = args.pop(0)
122 | #     except Exception as e:      _exit(usage, 1, e)
123 | 
124 |     try:
125 |         sfullDict="" 
126 |         for filename in options.ldict:
127 |             dictFile = open(filename,'r',encoding='utf-8').read()
128 |             dictFile = dictFile.replace('\t',',')
129 |             sfullDict += dictFile #+ '\n'
130 |             traceln( "loaded %s"%(filename))
131 |     except  IOError:print ('not possible to open file :%s'%(filename))
132 |     
133 | #     print sfullDict.encode("utf-8")
134 |     # need to normalize the weights when build this different dictionaries???
135 |     response  = doer.run(dictName, sfullDict)
136 |     traceln(response)
137 |         
138 |     traceln()      
139 |     traceln("- Done")
140 |     
141 | 


--------------------------------------------------------------------------------
/src/TranskribusDU/xml_formats/PageXmlExtractor.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | '''
  4 | Created on August 1st, 2017
  5 | 
  6 | 
  7 | Utility to extract several pages from several document to a folder or a MultiPageXml file
  8 | 
  9 | @author: meunier
 10 | '''
 11 | 
 12 | from __future__ import absolute_import
 13 | from __future__ import  print_function
 14 | from __future__ import unicode_literals
 15 | 
 16 | import os
 17 | from io import open
 18 | import json
 19 | import shutil
 20 | import math
 21 | 
 22 | import xml_formats.PageXml as PageXml
 23 | 
 24 | class DocPageSet:
 25 |     '''
 26 |     the list of pages of interest of a document
 27 |     take the textual form: docID=<page-range-set>]
 28 |         a page-range-set takes the form: <pageRange>[,<pageRange>]+
 29 |          with pageRange taking the form: N|N-N
 30 |         For instance: 111=1 or 222=1-10 or 333=1,10-20,3,40-50
 31 |         
 32 |     NOTE: ranges should not overlap!!! 
 33 |     '''
 34 |     def __init__(self, sSpec):
 35 |         try:
 36 |             sDocID, sPageRangeSet = sSpec.strip().split('=')
 37 |         except ValueError:
 38 |             raise ValueError("Malformed range: '%s'"%sSpec)
 39 |         
 40 |         self.sDocID = sDocID
 41 |         self._ltiRange = []
 42 |         prev_b = None
 43 |         for sPageRange in sPageRangeSet.split(","):
 44 |             lsN = sPageRange.split('-')
 45 |             if len(lsN) == 1:
 46 |                 a = int(lsN[0])
 47 |                 b = a
 48 |             elif len(lsN) == 2:
 49 |                 a,b = int(lsN[0]), int(lsN[1])
 50 |             else:
 51 |                 raise ValueError("invalid range: '%s'"%sPageRange)
 52 |             if not(a<=b): raise ValueError("Invalid range: '%s'"%sPageRange)
 53 |             self._ltiRange.append( (a,b) )     #222=1-10
 54 |             if prev_b < a:
 55 |                 prev_b = b
 56 |             else:
 57 |                 raise ValueError("unordered or overlapping ranges: '%d' >= '%d' '%s'"%(prev_b, a, sSpec))
 58 |         if not self.sDocID:   raise ValueError("missing docID: '%s'"%sSpec)
 59 |         if not self._ltiRange: raise ValueError("empty range: '%s'"%sSpec)
 60 |     
 61 |     # -----    
 62 |     def getDocID(self, bSkipPath=False):
 63 |         if bSkipPath:
 64 |             return os.path.basename(self.sDocID)
 65 |         else:
 66 |             return self.sDocID
 67 |     
 68 |     def getRangeString(self): return ",".join( "%d-%d"%(a,b) if a != b else "%d"%a for (a,b) in self._ltiRange )
 69 |     
 70 |     def iterPageNumber(self):
 71 |         """
 72 |         Iterator returning each page number in turn
 73 |         """    
 74 |         for a,b in self._ltiRange:
 75 |             for n in range(a,b+1):
 76 |                 yield n
 77 |         raise StopIteration
 78 |     
 79 |     # -----    
 80 |     def __str__(self):
 81 |         return "%s=%s"%(self.sDocID, self.getRangeString())
 82 |         
 83 | def testDocPageSet():
 84 |     import pytest
 85 |     
 86 |     for s in ["111=1", "222=1-10", "333=1,10-20,23,40-50"]:
 87 |         assert str(DocPageSet(s)) == s, s
 88 |         
 89 |     o = DocPageSet("111=1")
 90 |     assert o.getDocID() == "111"
 91 |     assert [i for i in o.iterPageNumber()] == [1]
 92 | 
 93 |     o = DocPageSet("a/b/c/111=1")
 94 |     assert o.getDocID() == "a/b/c/111"
 95 |     assert o.getDocID(True) == "111"
 96 |     assert [i for i in o.iterPageNumber()] == [1]
 97 | 
 98 | 
 99 |     o = DocPageSet("333=1,10-20,23,40-50")
100 |     assert o.getDocID() == "333"
101 |     assert [i for i in o.iterPageNumber()] == [1]+range(10,21)+[23]+range(40,51)
102 |     
103 |     with pytest.raises(ValueError): DocPageSet("aaa")
104 |     with pytest.raises(ValueError): DocPageSet("aaa=")
105 |     with pytest.raises(ValueError): DocPageSet("=1")
106 |     with pytest.raises(ValueError): DocPageSet("=1-2")
107 |     with pytest.raises(ValueError): DocPageSet("aaa=12=12")
108 |     with pytest.raises(ValueError): DocPageSet("aaa=22-11")
109 |     with pytest.raises(ValueError): DocPageSet("aaa=-11")
110 |     with pytest.raises(ValueError): DocPageSet("aaa=-11-")
111 |     with pytest.raises(ValueError): DocPageSet("aaa=-11-12")
112 |     with pytest.raises(ValueError): DocPageSet("aaa=333=1,10-20,3,40-50")
113 | 
114 | class PageXmlExtractor:
115 |     '''
116 |     Utility to extract several pages from several document to a folder
117 |     '''
118 |     sColDir = 'col'
119 |     
120 |     @classmethod
121 |     def getFilename(self, sDocID, name):
122 |         return os.path.join(sDocID, name)
123 |         
124 |     @classmethod
125 |     def extractPagesToDir(cls, lDocPageSet, sToDir):
126 |         """
127 |         extract the pages from the given list of PageSet and store them in the given folder.
128 |         (typically to be packaged as a MultiPageXml using PageXml.py)
129 |         return the number of copied files, and list of tuple (pnum, orig-docID, orig-pnum, orig-filename)
130 |         """
131 |         if not os.path.isdir(sToDir):
132 |             print(" - creating directory ", sToDir) 
133 |             os.mkdir(sToDir) 
134 |         else:
135 |             if len(os.listdir(sToDir)) > 0: raise ValueError("Target folder (%s) must be empty."%sToDir)
136 |         if not os.path.isdir(sToDir): raise ValueError("%s is not a directory"%sToDir) 
137 | 
138 |         jsonOriginFilename = os.path.join(sToDir, "origin.json")
139 |         cnt, ltOrigin = cls.getOriginTuple(lDocPageSet, jsonOriginFilename)
140 |                
141 |         print( " - total number of pages = %d"%cnt)
142 |         
143 |         nbDigit = math.log10(cnt)+1
144 |         sFmt = "%%0%dd.pxml" % nbDigit    #e.g. %03d.pxml
145 |         
146 |         for (cnt, docID, n, sFilename) in ltOrigin:
147 |             sToFilename = os.path.join(sToDir, sFmt%cnt)
148 |             print("   copying %s --> %s"%(sFilename, sToFilename))
149 |             shutil.copy(sFilename, sToFilename)
150 |         
151 |         return cnt, ltOrigin
152 |                     
153 |     @classmethod
154 |     def extractPagesToFile(cls, lDocPageSet, sToFile, bIndent=True):
155 |         """
156 |         extract the pages from the given list of PageSet and store them in a MultiPageXml file
157 |         (typically to be packaged as a MultiPageXml using PageXml.py)
158 |         return the number of copied files, and list of tuple (pnum, orig-docID, orig-pnum, orig-filename)
159 |         """
160 |         
161 |         sBaseName, _ = os.path.splitext(sToFile)
162 |         jsonOriginFilename = sBaseName + "_origin.json"
163 |         cnt, ltOrigin = cls.getOriginTuple(lDocPageSet, jsonOriginFilename)
164 |                
165 |         print( " - total number of pages = %d"%cnt)
166 |         
167 |         print( "   Generating %s"%(sToFile))
168 |         doc = PageXml.MultiPageXml.makeMultiPageXml([sFilename for (cnt, docID, n, sFilename) in ltOrigin] )
169 |         doc.write(sToFile, xml_declaration='UTF-8',encoding="utf-8", pretty_print=bIndent)
170 |         
171 |         return cnt, ltOrigin
172 |         
173 |     @classmethod
174 |     def getOriginTuple(cls, lDocPageSet, jsonOriginFilename=None):
175 |         """
176 |         prepare for extracting the pages from the given list of PageSet 
177 |         return the number of files, and list of tuple (pnum, orig-docID, orig-pnum, orig-filename)
178 |         """
179 |         
180 |         ltOrigin = list() 
181 |         cnt = 0
182 |         for o in lDocPageSet:
183 |             print( " - Processing doc %s, pages %s"%(o.getDocID(), o.getRangeString()))
184 |             lsFilename = cls.getPageFilenameList(o.getDocID(), ".pxml")
185 |             for n in o.iterPageNumber():
186 |                 cnt += 1
187 |                 sFilename = lsFilename[n-1]
188 |                 ltOrigin.append( (cnt, o.getDocID(True), n, sFilename) ) # new-PNum, docID, orig-PNum, orig-filename
189 | 
190 |         if jsonOriginFilename:
191 |             if sys.version_info > (3,0):
192 |                 with open(jsonOriginFilename, "wb",encoding='utf-8') as fd: json.dump(ltOrigin, fd, indent=True)
193 |             else:
194 |                 with open(jsonOriginFilename, "wb") as fd: json.dump(ltOrigin, fd, indent=True)
195 |                         
196 |             print( "   (see %s)"%(jsonOriginFilename))
197 |         
198 |         return cnt, ltOrigin
199 |         
200 | 
201 |     @classmethod
202 |     def getPageFilenameList(cls, sDocID, sExt):
203 |         assert sExt.startswith('.')
204 |         
205 |         #Look in trp.json file
206 |         lsFilename = [] 
207 | 
208 |         trpFile = os.path.join(sDocID, 'trp.json')
209 |         if not( os.path.exists(trpFile)): raise ValueError("Non-existing trp.json file %s" % trpFile)
210 |         with open(trpFile, "rb",'utf-8') as fd: 
211 |             jTrp = json.load(fd)
212 |         
213 |             for i, page in enumerate(jTrp['pageList']['pages']):
214 |                 sImgFileName = page['imgFileName']
215 |                 sBaseName, _ = os.path.splitext(sImgFileName)
216 |                 sXmlFilename = cls.getFilename(sDocID,  sBaseName + sExt)
217 |                 lsFilename .append( sXmlFilename )
218 |                 if page['pageNr'] != i+1: print( "\tWarning: expected page number %d , got %s"%(i+1, page['pageNr']))
219 |             
220 |         return lsFilename
221 |     
222 | if __name__ == "__main__":
223 |     
224 |     import sys, optparse
225 |     usage = """
226 | %s [--mpxml filename] [--dir dirname] [docID=<page-range-set>]+
227 | 
228 | Utility to extract a set of PageXml files from a set of documents and either:
229 | - store them into a target folder with simple numbering, with unambiguous order.
230 | - generate a MultiPageXMl document. In case of empty filename or "-", the filename is automatically composed from the arguments.  
231 | 
232 | a page-range-set takes the form: <pageRange>[,<pageRange>]+
233 |  with pageRange taking the form: N|N-N
234 |  Page ranges must be ordered, per document.
235 | For instance: 111=1 222=1-10 333=1,10-20,23,40-50
236 | 
237 | JL Meunier - Aug. 2017
238 | """ % sys.argv[0]
239 | 
240 |     parser = optparse.OptionParser(usage=usage)
241 |     parser.add_option("--dir" ,  dest='dir' , action="store", type="string", help="Store the extracted PageXml pages into the specified directory.")    
242 |     parser.add_option("--file",  dest='file', action="store", type="string", help="Store the extracted PageXml pages into the specified MultiPageXml document.")    
243 |     
244 |     (options, args) = parser.parse_args()
245 | 
246 |     if args:
247 |         lsDocPageSet = args
248 |     else:
249 |         parser.print_help()
250 |         parser.exit(1, "")
251 |     
252 |     lDocPageSet = []
253 |     print("Parsing range(s)")
254 |     for s in lsDocPageSet:
255 |         o = DocPageSet(s)
256 |         lDocPageSet.append(o)
257 |     
258 |     if options.dir:
259 |         print( "Extracting into folder: ", options.dir)
260 |         n = PageXmlExtractor.extractPagesToDir(lDocPageSet, options.dir)    
261 |     
262 |     if options.file != None:
263 |         if options.file in["", "-"]: options.file = "extraction_" + "_".join(map(str, lDocPageSet))     #automatic filename
264 |         sToFile = options.file if options.file.lower().endswith(".mpxml") else options.file+".mpxml"   #automatic .mpxml extension
265 |         print( "Extracting into file: ", sToFile)
266 |         n = PageXmlExtractor.extractPagesToFile(lDocPageSet, sToFile)    
267 | 
268 |     print( "DONE")
269 |         


--------------------------------------------------------------------------------
/src/TranskribusDU/xml_formats/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Transkribus/TranskribusPyClient/f3b0208751a553257ddf313b73278477aab1ffef/src/TranskribusDU/xml_formats/__init__.py


--------------------------------------------------------------------------------
/src/TranskribusDU/xml_formats/mpxml2pxml.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     mpxml to pxml convertor
 4 |     
 5 |     @author: H Déjean
 6 |     
 7 |     READ project
 8 |     31/05/2017
 9 | """ 
10 | from __future__ import absolute_import
11 | from __future__ import  print_function
12 | from __future__ import unicode_literals
13 | import sys, os.path, optparse
14 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(sys.argv[0]))))
15 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(sys.argv[0])))))
16 | 
17 | from lxml import etree
18 | import xml_formats.PageXml as PageXml
19 |     
20 | if __name__ == "__main__":
21 |     
22 |     usage = """
23 | %s dir docid
24 | Utility to create a set of pageXml XML files from a mpxml file.
25 | """ % sys.argv[0]
26 | 
27 |     parser = optparse.OptionParser(usage=usage)
28 |     
29 |     parser.add_option("--format", dest='bIndent',  action="store_true" , help="reformat/reindent the input")    
30 |     parser.add_option("--dir", dest='destdir',  action="store", default='pxml' , help="directory ouptut")  
31 |     (options, args) = parser.parse_args()
32 | 
33 |     try:
34 |         dir  = args[0]
35 |         docid= args[1]
36 |     except:
37 |         parser.print_help()
38 |         parser.exit(1, "")
39 |     
40 |     sDocFilename = "%s%scol%s%s.mpxml" % (dir,os.sep,os.sep,docid)        
41 |         
42 |     doc = etree.parse(sDocFilename)
43 | 
44 |     for pnum, pageDoc in PageXml.MultiPageXml._iter_splitMultiPageXml(doc, bInPlace=False):
45 |         outfilename = "%s%s%s%s%s_%03d.pxml" % (dir,os.sep,options.destdir,os.sep,docid,pnum)
46 |         print(outfilename)        
47 |         pageDoc.write(outfilename, xml_declaration ='UTF-8',encoding="utf-8", pretty_print = bool(options.bIndent))
48 |     print ("DONE")    


--------------------------------------------------------------------------------
/src/TranskribusDU/xml_formats/tests/testDS2PageXml/.gitignore:
--------------------------------------------------------------------------------
1 | /RRB_MM_01_033_Jahr_1810.mpxml
2 | 


--------------------------------------------------------------------------------
/src/TranskribusDU/xml_formats/tests/test_DS2PageXml.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     test DS2PageXml convertor
 4 |     @author:déjean
 5 | """
 6 | import os.path
 7 | from xml_formats.DS2PageXml import DS2PageXMLConvertor
 8 | from xml_formats.PageXml import MultiPageXml
 9 | 
10 | sTESTS_DIR = os.path.dirname(os.path.abspath(__file__))
11 | 
12 | def test_DS2PageXmlConversion():
13 |     filename = os.path.join(sTESTS_DIR,
14 |                             'testDS2PageXml/RRB_MM_01_033_Jahr_1810.ds.xml')
15 |     conv= DS2PageXMLConvertor()
16 |     conv.inputFileName = filename
17 |     doc = conv.loadDom(filename)
18 |     lPageXmlDocs = conv.run(doc)
19 |     mp = MultiPageXml()
20 |     # newDoc = mp.makeMultiPageXmlMemory(map(lambda (x,y):x,lPageXmlDocs))
21 |     newDoc = mp.makeMultiPageXmlMemory([x for x,_y in lPageXmlDocs])
22 |     newDoc.write(os.path.join(sTESTS_DIR,
23 |                               "testDS2PageXml/RRB_MM_01_033_Jahr_1810.mpxml"),
24 |                  xml_declaration=True,
25 |                  encoding="UTF-8",
26 |                  pretty_print=True)
27 | 
28 | 
29 | #     res= conv.storePageXmlSetofFiles(lPageXmlDocs)
30 | #     print 'test:', True if res == 0  else False
31 |     
32 | if __name__ == "__main__":
33 | #     test_setMetadata()
34 |     test_DS2PageXmlConversion()


--------------------------------------------------------------------------------
/src/TranskribusDU/xml_formats/tests/test_PageXml.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | '''
  4 | Created on 23 Nov 2016
  5 | 
  6 | @author: meunier
  7 | '''
  8 | import pytest
  9 | from lxml import etree
 10 | from io import BytesIO
 11 | 
 12 | from xml_formats.PageXml import PageXml, PageXmlException
 13 | 
 14 | 
 15 | def test_custom():
 16 |     assert PageXml.parseCustomAttr("")    == {}
 17 |     assert PageXml.parseCustomAttr(" ")   == {}
 18 |     assert PageXml.parseCustomAttr("   ") == {}
 19 | 
 20 |     assert PageXml.parseCustomAttr("a {x:1;}")    == { 'a': { 'x':'1' } }
 21 |     assert PageXml.parseCustomAttr(" a {x:1;}")   == { 'a': { 'x':'1' } }
 22 |     assert PageXml.parseCustomAttr("a {x:1;} ")   == { 'a': { 'x':'1' } }
 23 |     assert PageXml.parseCustomAttr(" a {x:1;} ")  == { 'a': { 'x':'1' } }
 24 |     assert PageXml.parseCustomAttr("a {x:1 ;}")   == { 'a': { 'x':'1' } }
 25 |     assert PageXml.parseCustomAttr("a {x:1 ; }")  == { 'a': { 'x':'1' } }
 26 |     assert PageXml.parseCustomAttr("a { x:1 ; }") == { 'a': { 'x':'1' } }
 27 |     
 28 |     assert PageXml.parseCustomAttr("a{x:1;}")     == { 'a': { 'x':'1' } }
 29 |     assert PageXml.parseCustomAttr("a{x:1 ;}")    == { 'a': { 'x':'1' } }
 30 |     assert PageXml.parseCustomAttr("a{x:1 ; }")   == { 'a': { 'x':'1' } }
 31 |     assert PageXml.parseCustomAttr("a{ x:1 ; }")  == { 'a': { 'x':'1' } }
 32 |     
 33 |     assert PageXml.parseCustomAttr("a,b{x:1;}")       == { 'a': { 'x':'1' }, 'b': { 'x':'1' } }
 34 |     assert PageXml.parseCustomAttr("a, b{x:1 ;}")     == { 'a': { 'x':'1' }, 'b': { 'x':'1' } }
 35 |     assert PageXml.parseCustomAttr("a , b{x:1 ; }")   == { 'a': { 'x':'1' }, 'b': { 'x':'1' } }
 36 |     assert PageXml.parseCustomAttr("a ,b{ x:1 ; }")   == { 'a': { 'x':'1' }, 'b': { 'x':'1' } }
 37 |     assert PageXml.parseCustomAttr("a ,b { x:1 ; }")   == { 'a': { 'x':'1' }, 'b': { 'x':'1' } }
 38 |     
 39 |     assert PageXml.parseCustomAttr("a { x:1 ; y:2 }")   == { 'a': { 'x':'1', 'y':'2'} }
 40 |     assert PageXml.parseCustomAttr("a,b { x:1 ; y:2 }")   == { 'a': { 'x':'1', 'y':'2'}, 'b': { 'x':'1', 'y':'2'} }
 41 | 
 42 |     assert PageXml.parseCustomAttr("a {}")    == { 'a': { } }
 43 | 
 44 |     assert PageXml.parseCustomAttr("readingOrder {index:4;} structure {type:catch-word;}") == { 'readingOrder': { 'index':'4' }, 'structure':{'type':'catch-word'} }
 45 | 
 46 | def test_malformed_custom():
 47 |     with pytest.raises(ValueError): PageXml.parseCustomAttr("a {x1;}")
 48 |     with pytest.raises(ValueError): PageXml.parseCustomAttr("a x1;}")
 49 |     with pytest.raises(ValueError): PageXml.parseCustomAttr("a { x1;")
 50 |     with pytest.raises(ValueError): PageXml.parseCustomAttr("a { x1 }")
 51 |     
 52 |     #with pytest.raises(ValueError): PageXml.parseCustomAttr("a { x:1 }")  #should it fail?
 53 |     assert PageXml.parseCustomAttr("a { x:1  2}") == {'a': {'x': '1  2'}}
 54 | 
 55 |     #with pytest.raises(ValueError): PageXml.parseCustomAttr("a { x:1  2}")#should it fail? (or do we allow spaces in names or values?)
 56 |     assert PageXml.parseCustomAttr("  a b   {   x y : 1  2  }") == {'a b': {'x y': '1  2'}}
 57 |     
 58 | def test_getsetCustomAttr():
 59 |     sXml = b"""
 60 |             <TextRegion type="page-number" id="p1_region_1471502505726_2" custom="readingOrder {index:9;} structure {type:page-number;}">
 61 |                 <Coords points="972,43 1039,43 1039,104 972,104"/>
 62 |             </TextRegion>
 63 |             """
 64 |     doc = etree.parse(BytesIO(sXml))
 65 |     nd = doc.getroot()
 66 |     assert PageXml.getCustomAttr(nd, "readingOrder", "index") == '9'
 67 |     assert PageXml.setCustomAttr(nd, "readingOrder", "index", 99) == 99
 68 |     assert PageXml.getCustomAttr(nd, "readingOrder", "index") == '99'
 69 | 
 70 |     assert PageXml.getCustomAttr(nd, "readingOrder") == {'index':'99'}
 71 |     
 72 |     assert PageXml.setCustomAttr(nd, "readingOrder", "toto", "zou") == "zou"
 73 |     assert PageXml.getCustomAttr(nd, "readingOrder", "toto") == 'zou'
 74 | 
 75 |     with pytest.raises(PageXmlException): PageXml.getCustomAttr(nd, "readingOrder", "axiste_pas")
 76 |     with pytest.raises(PageXmlException): PageXml.getCustomAttr(nd, "axiste_pas_non_plus", "axiste_pas")
 77 |     
 78 | def getMetadataTestDOM():
 79 |     sXml = b"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 80 |         <PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15/pagecontent.xsd">
 81 |             <Metadata>
 82 |                 <Creator>Tilla</Creator>
 83 |                 <Created>2016-08-18T13:35:08.252+07:00</Created>
 84 |                 <LastChange>2016-12-01T09:53:39.610+01:00</LastChange>
 85 |             </Metadata>
 86 |             <Page imageFilename="MM_1_001_001.jpg" imageWidth="1277" imageHeight="3518" type="other">
 87 |                 <ReadingOrder>
 88 |                     <OrderedGroup id="p1_ro_1480582418139" caption="Regions reading order">
 89 |                         <RegionRefIndexed index="0" regionRef="region_1471502505726_2"/>
 90 |                         <RegionRefIndexed index="1" regionRef="region_1471502509664_3"/>
 91 |                         <RegionRefIndexed index="2" regionRef="region_1471502512664_4"/>
 92 |                         <RegionRefIndexed index="3" regionRef="region_1471502516586_5"/>
 93 |                         <RegionRefIndexed index="4" regionRef="region_1471502522320_6"/>
 94 |                         <RegionRefIndexed index="5" regionRef="region_1471502528414_7"/>
 95 |                         <RegionRefIndexed index="6" regionRef="region_1471502534742_8"/>
 96 |                         <RegionRefIndexed index="7" regionRef="region_1471502539352_9"/>
 97 |                         <RegionRefIndexed index="8" regionRef="region_1471502542539_10"/>
 98 |                         <RegionRefIndexed index="9" regionRef="region_1471502547211_11"/>
 99 |                         <RegionRefIndexed index="10" regionRef="region_1471502550274_12"/>
100 |                         <RegionRefIndexed index="11" regionRef="region_1480582401040_1"/>
101 |                     </OrderedGroup>
102 |                 </ReadingOrder>
103 |             </Page>
104 |         </PcGts>"""
105 |     doc = etree.parse(BytesIO(sXml))
106 |     return doc
107 | 
108 | def test_getMetadata():
109 |     doc = getMetadataTestDOM()
110 |     nd = doc.getroot()
111 |     
112 |     md = PageXml.getMetadata(doc)
113 |     assert md.Creator == "Tilla"
114 |     assert md.Created == "2016-08-18T13:35:08.252+07:00"
115 |     assert md.LastChange == "2016-12-01T09:53:39.610+01:00"
116 |     assert md.Comments == None
117 |    
118 |     md = PageXml.getMetadata(None, nd[0])
119 |     assert md.Creator == "Tilla"
120 |     assert md.Created == "2016-08-18T13:35:08.252+07:00"
121 |     assert md.LastChange == "2016-12-01T09:53:39.610+01:00"
122 |     
123 | def test_setMetadata():
124 |     import datetime
125 |     doc = getMetadataTestDOM()
126 | 
127 |     nd = doc.getroot()
128 |     _sutc = datetime.datetime.utcnow().isoformat()
129 |     PageXml.setMetadata(doc, None, "Tigrette")
130 |     
131 |     sutc = datetime.datetime.utcnow().isoformat()
132 |     md = PageXml.getMetadata(doc)
133 |     assert md.Creator == "Tigrette"
134 |     assert md.Created == "2016-08-18T13:35:08.252+07:00"
135 |     assert md.LastChange.startswith(sutc[:15])
136 |     assert md.Comments == None
137 |     print(doc)
138 |    
139 |     sutc = datetime.datetime.utcnow().isoformat()
140 |     PageXml.setMetadata(doc, None, "Bijoux", "Le chat de Martine")
141 |     md = PageXml.getMetadata(None, nd[0])
142 |     assert md.Creator == "Bijoux"
143 |     assert md.Created == "2016-08-18T13:35:08.252+07:00"
144 |     assert md.LastChange.startswith(sutc[:15])
145 |     assert md.Comments == "Le chat de Martine"
146 |     print(doc)
147 |     
148 | def test_CreationPageXmlDocument():
149 |     doc= PageXml.createPageXmlDocument(creatorName='HerveforTest', filename='hervefortest.jpg', imgW=100, imgH=100)
150 |     print(doc)
151 |     
152 | if __name__ == "__main__":
153 |     test_setMetadata()
154 |     test_CreationPageXmlDocument()


--------------------------------------------------------------------------------
/src/TranskribusPyClient/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Transkribus/TranskribusPyClient/f3b0208751a553257ddf313b73278477aab1ffef/src/TranskribusPyClient/__init__.py


--------------------------------------------------------------------------------
/src/TranskribusPyClient/common/IntegerRange.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 |     Integer range specification for Python clients
  5 |     
  6 |     A class to deal with integer range specifications like 1-5,8
  7 |     
  8 |     Copyright Naver(C) 2017, JL. Meunier, August 2017
  9 | 
 10 |     This program is free software: you can redistribute it and/or modify
 11 |     it under the terms of the GNU General Public License as published by
 12 |     the Free Software Foundation, either version 3 of the License, or
 13 |     (at your option) any later version.
 14 | 
 15 |     This program is distributed in the hope that it will be useful,
 16 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 |     GNU General Public License for more details.
 19 | 
 20 |     You should have received a copy of the GNU General Public License
 21 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 22 |     
 23 |     
 24 |     Developed  for the EU project READ. The READ project has received funding 
 25 |     from the European Union’s Horizon 2020 research and innovation programme 
 26 |     under grant agreement No 674943.
 27 |     
 28 | """
 29 | from __future__ import absolute_import
 30 | from __future__ import  print_function
 31 | from __future__ import unicode_literals
 32 | 
 33 | from builtins import int
 34 | 
 35 | class IntegerRange:
 36 |     """
 37 |     A integer range object
 38 |     
 39 |     - at creation, pass a range specification of the form: 1  or 1-3  or 1,3  or 1,5-7,8
 40 |             IntegerRange = RANGE [, RANGE]+
 41 |         where RANGE if either an integer or 2 integer separated by a '-'
 42 |             RANGE = N
 43 |             RANGE = N-N
 44 |         Spaces are ignored, apart between digits.
 45 |     - the object is a container that supports:
 46 |         - iteration
 47 |         - len()
 48 |         - reversed()
 49 |         - contains test (if n in o: ...)
 50 |     """
 51 |     def __init__(self, sRange=""):
 52 |         self._ltAB = self.parseSpec(sRange)
 53 |         assert str(self) == "".join(sRange.split())
 54 | 
 55 |     def initFromEnumeration(self, lN):
 56 |         """
 57 |         create the list of ranges that exactly cover the enumeration. 
 58 |         """        
 59 |         if not lN: 
 60 |             pass
 61 |         elif len(lN) == 1:
 62 |             self.addRange(lN[0])
 63 |         else:
 64 |             lN = sorted(lN)
 65 |             A = lN[0]
 66 |             Nprev = A
 67 |             for N in lN[1:]:
 68 |                 if Nprev+1 < N:
 69 |                     #hole in sequence, create an interval!
 70 |                     self.addRange(A, Nprev)
 71 |                     A = N
 72 |                 Nprev = N
 73 |             self.addRange(A, Nprev)
 74 |         return self
 75 |     
 76 |     @classmethod
 77 |     def parseSpec(cls, sSpec):
 78 |         """
 79 |         parse a range specification of positive integers and return a list of pair of indices
 80 |         """
 81 |         ltAB = list()
 82 |         prev_b = -1
 83 |         for sRange in sSpec.split(","):
 84 |             if not sRange.split(): continue #empty spec!
 85 |             a,b = cls._getAB(sRange)
 86 |             ltAB.append( (a,b) )
 87 |             if prev_b < a:
 88 |                 prev_b = b
 89 |             else:
 90 |                 raise ValueError("unordered or overlapping ranges: '%s' >= '%s' '%s'"%(prev_b, a, sSpec))
 91 |         return ltAB
 92 | 
 93 |     def addRange(self, a, b=None):
 94 |         if b==None: b = a
 95 |         assert a <= b
 96 |         self._ltAB.append( (a,b) )
 97 |         self._ltAB.sort()
 98 |         if not self._check():
 99 |             self._ltAB.remove( (a,b) )
100 |             raise ValueError("Overlapping range")
101 |         
102 |     def len(self):
103 |         """
104 |         For som subclass, this method can be useful as it is not forced by Python to return an int (like for return float('inf'))
105 |         """
106 |         return sum(b-a+1 for a,b in self._ltAB)
107 |     
108 |     @classmethod
109 |     def _getAB(cls, sRange):
110 |         lsN = sRange.split('-')
111 |         if len(lsN) == 1:
112 |             a = int(lsN[0])
113 |             b = a
114 |         elif len(lsN) == 2:
115 |             sA, sB = lsN
116 |             a,b = int(sA), int(sB)
117 |             if not(a<=b): raise ValueError("Invalid range: '%s'"%sRange)
118 |         else:
119 |             raise ValueError("invalid range: '%s'"%sRange)        
120 |         return a, b
121 |     
122 |     def _check(self):
123 |         """
124 |         checking things are in order
125 |         """
126 |         prevB = -float('inf')
127 |         for a,b in self._ltAB:
128 |             if prevB > a: return False
129 |             prevB = b
130 |         return True
131 |     
132 |     def __str__(self): 
133 |         return ",".join( "%s-%s"%(a,b) if a != b else "%s"%a for (a,b) in self._ltAB )
134 | 
135 |     def __bool__(self):
136 |         return bool(self._ltAB)
137 |     
138 |     def __nonzero__(self):
139 |         return bool(self._ltAB)
140 | 
141 |     #--- Emulating Container type...
142 |     def __iter__(self):
143 |         """
144 |         Iterator returning each number in turn
145 |         """    
146 |         for a,b in self._ltAB:
147 |             for n in range(a,b+1): yield n
148 |         raise StopIteration
149 |     
150 |     def __reversed__(self):
151 |         """
152 |         Reversed iterator
153 |         If we do not provide it, we must provide a __getitem__ (boring to code and how useful??)
154 |         """
155 |         for a,b in reversed(self._ltAB):
156 |             for n in range(b,a-1,-1): yield n
157 |         raise StopIteration        
158 |         
159 |     def __len__(self):
160 |         return sum(b-a+1 for a,b in self._ltAB)
161 | 
162 |     def __contains__(self, item):
163 |         #All integers are long in python3 and call to covert is just int
164 |         try:
165 |             item = int(item)
166 |         except TypeError:
167 |             raise ValueError("A range contains numeric values not %s"%type(item))
168 |         #if type(item) != types.IntType and type(item) != types.LongType: raise ValueError("A range contains integer values not %s"%type(item))
169 |         a, b = None, None
170 |         for a,b in self._ltAB:
171 |             if b >= item: break
172 |             #print a, item, b
173 |         return a<= item and item <= b
174 | 
175 | 
176 | 
177 | # ------ TESTS ----------------------------------------------------------------------------------
178 | def test_good_spec(capsys):
179 |     def container_test(o, lref):
180 |         assert list(o) == lref
181 |         assert list(reversed(o)) == list(reversed(lref))
182 |         for item in lref: assert item in o
183 |         assert -99 not in o
184 |         
185 |     o = IntegerRange("1")
186 | #     with capsys.disabled():
187 | #         print "YOOOOOOOOOOOOOOOOOOOOOOOOOOO ", list(reversed(o))    
188 |     container_test(o, [1])
189 |     
190 |     o = IntegerRange("99")
191 |     container_test(o, [99])    
192 |     
193 |     o = IntegerRange("1,99")
194 |     container_test(o, [1, 99])      
195 |     
196 |     o = IntegerRange("1-5")
197 |     container_test(o, range(1, 6))
198 | 
199 |     o = IntegerRange("1-5,6-88")
200 |     container_test(o, range(1, 6)+range(6, 89))          
201 |     
202 |     o = IntegerRange("1-3,4-8")
203 |     container_test(o, range(1, 9))   
204 |     assert len(o) == len(range(1, 9)) 
205 | 
206 | def test_spaced_good_spec():
207 |     def container_test(o, lref):
208 |         assert list(o) == lref
209 |         assert list(reversed(o))== list(reversed(lref))
210 |         for item in lref: assert item in o
211 |         assert -99 not in o
212 |         
213 |     o = IntegerRange(" 1\t\t")
214 |     container_test(o, [1])
215 |     
216 |     o = IntegerRange("99  ")
217 |     container_test(o, [99])    
218 |     
219 |     o = IntegerRange("1  , 99")
220 |     container_test(o, [1, 99])      
221 |     
222 |     o = IntegerRange(" 1\t- 5\t")
223 |     container_test(o, range(1, 6))
224 | 
225 |     o = IntegerRange("1-5, 6-88")
226 |     container_test(o, range(1, 6)+range(6, 89))          
227 |     
228 |     o = IntegerRange("1 -3\t,4- 8")
229 |     container_test(o, range(1, 9))
230 |     assert len(o) == len(range(1, 9)) 
231 | 
232 | def test_errors():
233 |     import pytest
234 |     with pytest.raises(ValueError): IntegerRange("1 3")
235 |     with pytest.raises(ValueError): IntegerRange("3-1")
236 |     with pytest.raises(ValueError): IntegerRange("3,1")
237 |     with pytest.raises(ValueError): IntegerRange("1-3,2")
238 |     with pytest.raises(ValueError): IntegerRange("3,1-2")
239 |     with pytest.raises(ValueError): IntegerRange("1-3,3-8")
240 |     with pytest.raises(ValueError): IntegerRange("1-3 3,3-8")
241 |     with pytest.raises(ValueError): IntegerRange("1-3,3-8 8")
242 |     
243 | 
244 | def test_limit():
245 |     o = IntegerRange("")
246 |     assert list(o) == []
247 |     assert len(o) == 0
248 |     o = IntegerRange("\t  \t ")
249 |     assert list(o) == []
250 |     assert len(o) == 0    
251 |     
252 | def test_add():
253 |     import pytest
254 | 
255 |     def container_test(o, lref):
256 |         assert list(o) == lref
257 |         assert list(reversed(o)) == list(reversed(lref))
258 |         for item in lref: assert item in o
259 |         assert -99 not in o
260 |         
261 |     o = IntegerRange()
262 |     container_test(o, [])
263 |     
264 |     o.addRange(1)
265 |     container_test(o, [1])
266 | 
267 |     o.addRange(0)
268 |     container_test(o, [0, 1])    
269 |     
270 |     with pytest.raises(ValueError): o.addRange(1)
271 |     with pytest.raises(ValueError): o.addRange(0,1)
272 |     with pytest.raises(ValueError): o.addRange(-3,0)
273 |     with pytest.raises(ValueError): o.addRange(-3,3)
274 |     with pytest.raises(ValueError): o.addRange(1,3)
275 |     with pytest.raises(ValueError): o.addRange(0,3)
276 |     
277 |     o.addRange(90, 99)
278 |     container_test(o, [0, 1]+range(90, 100))    
279 |     
280 |     o.addRange(60, 66)
281 |     container_test(o, [0, 1]+range(60, 67)+range(90, 100))    
282 |     
283 |     with pytest.raises(ValueError): o.addRange(0,1000)
284 |     with pytest.raises(ValueError): o.addRange(10,60)
285 |     with pytest.raises(ValueError): o.addRange(70,95)
286 |     with pytest.raises(ValueError): o.addRange(95)
287 |     o.addRange(80, 88)
288 |     container_test(o, [0, 1]+range(60, 67)+range(80, 89)+range(90, 100))    
289 |     
290 |     assert 1 in o
291 |     assert 0 in o
292 |     assert 90 in o
293 |     assert 80 in o
294 |     assert 60 in o
295 |     assert 66 in o
296 |     assert 99 in o
297 |     assert 88 in o
298 |     
299 |     assert 50 not in o
300 |     
301 | def test_enum():
302 |     def test_enum(l):
303 |         ll = set(l)    
304 |         o = IntegerRange()
305 |         o.initFromEnumeration(l)
306 |         assert set(o) == ll
307 |     
308 |     test_enum([])
309 |     test_enum([2])
310 |     test_enum([-2])
311 |     test_enum([2,1])
312 |     test_enum([1,2])
313 |     test_enum([1,2,2]) #bad case that we cover anyway
314 |     test_enum([1,2,4,2,5])
315 |     test_enum([7,4,6,1])
316 |     test_enum([0])
317 |     
318 |     
319 |     


--------------------------------------------------------------------------------
/src/TranskribusPyClient/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Transkribus/TranskribusPyClient/f3b0208751a553257ddf313b73278477aab1ffef/src/TranskribusPyClient/common/__init__.py


--------------------------------------------------------------------------------
/src/TranskribusPyClient/common/trace.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # A simple trace module
 3 | # 
 4 | # JL Meunier - May 2004
 5 | # Copyright XRCE, 2004
 6 | #
 7 | 
 8 | import sys
 9 | 
10 | global traceFD
11 | traceFD = sys.stderr
12 | 
13 | def setTraceFD(fd):
14 |     global traceFD
15 |     traceFD = fd
16 |      
17 | def trace(*msg):
18 |     global traceFD
19 |     for i in msg:
20 |         try: traceFD.write(str(i))
21 |         except UnicodeEncodeError:sys.stderr.write(i.encode("utf-8"))
22 | 
23 | def traceln(*msg):
24 |     global traceFD
25 |     
26 |     trace(*msg)
27 | #     apply(trace, msg)
28 |     trace("\n")
29 |     traceFD.flush()
30 | 
31 | def flush():
32 |     traceFD.flush()
33 |     
34 | 
35 | 
36 | #SELF-TEST
37 | if __name__=="__main__":
38 | 
39 |     trace(1)
40 |     trace(" aut")
41 |     trace("o")
42 |     traceln("-test")
43 |     trace("2 auto", "-", "test")
44 |     trace()
45 |     traceln()
46 |     traceln("Done")
47 | 


--------------------------------------------------------------------------------
/src/TranskribusPyClient/test/__init__.py:
--------------------------------------------------------------------------------
 1 | #-*- coding:utf-8 -*-
 2 | 
 3 | """
 4 | If you run all the test using pytest, you should first set some appropriate values here!!
 5 | 
 6 | Created on 25 Nov 2016
 7 | 
 8 | @author: meunier
 9 | """
10 | 
11 | 
12 | # an existing collection A
13 | _colId_A    =   3571
14 | 
15 | #some existing documents in collection A
16 | _docId_a    =   7749
17 | _docId_b    =   7750
18 | _docId_c    =   8251
19 | _docId_d    =   8252
20 | 
21 | 
22 | #A different collection where you can do whatever you want
23 | _coldId_Sandbox   =   3820
24 | 
25 | 


--------------------------------------------------------------------------------
/src/TranskribusPyClient/test/test_collections_addDocToCollection.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 4 | import sys, os
 5 | import logging
 6 | 
 7 | try: #to ease the use without proper Python installation
 8 |     import TranskribusPyClient_version
 9 | except ImportError:
10 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
11 |     import TranskribusPyClient_version
12 | 
13 | from TranskribusPyClient.test import _coldId_Sandbox, _docId_a
14 | from TranskribusPyClient.client import TranskribusClient, getStoredCredentials
15 | 
16 | login, pwd = getStoredCredentials()
17 | 
18 | conn = TranskribusClient(proxies={'https':'http://cornillon:8000'}, loggingLevel=logging.INFO)
19 | sessionID = conn.auth_login(login, pwd)
20 | 
21 | data = conn.addDocToCollection(_coldId_Sandbox, _docId_a)
22 | """
23 | True or Exception
24 | """
25 | 
26 | print conn.auth_logout()
27 | 
28 | 


--------------------------------------------------------------------------------
/src/TranskribusPyClient/test/test_collections_copyDocToCollection.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 4 | import sys, os
 5 | import logging
 6 | 
 7 | try: #to ease the use without proper Python installation
 8 |     import TranskribusPyClient_version
 9 | except ImportError:
10 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
11 |     import TranskribusPyClient_version
12 | 
13 | from TranskribusPyClient.test import _colId_A, _coldId_Sandbox, _docId_c, _docId_d
14 | from TranskribusPyClient.client import TranskribusClient, getStoredCredentials
15 | 
16 | login, pwd = getStoredCredentials()
17 | 
18 | conn = TranskribusClient(proxies={'https':'http://cornillon:8000'}, loggingLevel=logging.INFO)
19 | sessionID = conn.auth_login(login, pwd)
20 | 
21 | data = conn.duplicateDoc(_colId_A, _docId_c, _coldId_Sandbox, "named_by_JL")
22 | data = conn.duplicateDoc(_colId_A, _docId_d, _coldId_Sandbox)
23 | """
24 | True or Exception
25 | """
26 | 
27 | print conn.auth_logout()
28 | 
29 | 


--------------------------------------------------------------------------------
/src/TranskribusPyClient/test/test_collections_fulldoc_xml.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | #optional: useful if you want to choose the logging level to something else than logging.WARN
 4 | import sys, os
 5 | import logging
 6 | 
 7 | try: #to ease the use without proper Python installation
 8 |     import TranskribusPyClient_version
 9 | except ImportError:
10 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
11 |     import TranskribusPyClient_version
12 | 
13 | from TranskribusPyClient.test import _colId_A, _docId_a
14 | from TranskribusPyClient.client import TranskribusClient, getStoredCredentials
15 | 
16 | 
17 | login, pwd = getStoredCredentials()
18 | 
19 | conn = TranskribusClient(proxies={'https':'http://cornillon:8000'}
20 |                          , loggingLevel=logging.INFO)
21 | print conn
22 | 
23 | #print conn.auth_logout()
24 | 
25 | sessionID = conn.auth_login(login, pwd)
26 | print sessionID
27 | 
28 | #sessionID = conn.auth_login("jean-luc.meunier@xrce.xerox.com", "trnjluc", sHttpsProxyUrl='http://cornillon:8000')
29 | 
30 | 
31 | 
32 | # ret = conn.getDocumentFromServer(colid, docid)
33 | #ret = conn.getDocumentFromServer("3571", "7750")
34 | data = conn.getDocByIdAsXml(_colId_A, str(_docId_a))  #str just to stress-test
35 | #data = conn.getDocByIdAsXml(3571, "7750")
36 | print data
37 | """
38 | 
39 | """
40 | 
41 | conn.setProxies({'https':'http://cornillon:8000'})
42 | 
43 | print conn.auth_logout()
44 | 
45 | 


--------------------------------------------------------------------------------
/src/TranskribusPyClient/test/test_collections_list.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import sys, os
 4 | import logging
 5 | 
 6 | try: #to ease the use without proper Python installation
 7 |     import TranskribusPyClient_version
 8 | except ImportError:
 9 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
10 |     import TranskribusPyClient_version
11 | 
12 | from TranskribusPyClient.test import _colId_A, _docId_a
13 | from TranskribusPyClient.client import TranskribusClient, getStoredCredentials
14 | 
15 | login, pwd = getStoredCredentials()
16 | 
17 | conn = TranskribusClient(proxies={'https':'http://cornillon:8000'}
18 |                          , loggingLevel=logging.INFO)
19 | 
20 | sessionID = conn.auth_login(login, pwd)
21 | data = conn.listDocsByCollectionId(_colId_A)
22 | import pprint
23 | pprint.pprint(data)
24 | 
25 | print conn.auth_logout()
26 | 
27 | """
28 | 
29 | [{u'collectionList': {u'colList': [{u'colId': 3571,
30 |                                     u'colName': u'READDU',
31 |                                     u'description': u'created by herve.dejean@xrce.xerox.com'}]},
32 |   u'createdFromTimestamp': 33175290,
33 |   u'createdToTimestamp': 33175290,
34 |   u'docId': 7749,
35 |   u'fimgStoreColl': u'TrpDoc_DEA_7749',
36 |   u'nrOfPages': 10,
37 |   u'scriptType': u'HANDWRITTEN',
38 |   u'status': 0,
39 |   u'title': u'MM_1_001',
40 |   u'uploadTimestamp': 1478161395893L,
41 |   u'uploader': u'herve.dejean@xrce.xerox.com',
42 |   u'uploaderId': 275},
43 |  {u'collectionList': {u'colList': [{u'colId': 3571,
44 |                                     u'colName': u'READDU',
45 |                                     u'description': u'created by herve.dejean@xrce.xerox.com'}]},
46 |   u'createdFromTimestamp': 0,
47 |   u'createdToTimestamp': 0,
48 |   u'docId': 7750,
49 |   u'fimgStoreColl': u'TrpDoc_DEA_7750',
50 |   u'nrOfPages': 10,
51 |   u'scriptType': u'HANDWRITTEN',
52 |   u'status': 0,
53 |   u'title': u'MM_1_005',
54 |   u'uploadTimestamp': 1478161451242L,
55 |   u'uploader': u'herve.dejean@xrce.xerox.com',
56 |   u'uploaderId': 275}]
57 |   
58 |   """


--------------------------------------------------------------------------------
/src/TranskribusPyClient/test/test_collections_listEditDeclFeatures.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import sys, os
  4 | import logging
  5 | 
  6 | try: #to ease the use without proper Python installation
  7 |     import TranskribusPyClient_version
  8 | except ImportError:
  9 |     sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
 10 |     import TranskribusPyClient_version
 11 | 
 12 | from TranskribusPyClient.test import _colId_A
 13 | from TranskribusPyClient.client import TranskribusClient, getStoredCredentials
 14 | 
 15 | 
 16 | login, pwd = getStoredCredentials()
 17 | 
 18 | conn = TranskribusClient(proxies={'https':'http://cornillon:8000'}
 19 |                          , loggingLevel=logging.INFO)
 20 | 
 21 | sessionID = conn.auth_login(login, pwd)
 22 | doc = conn.listEditDeclFeatures(_colId_A)
 23 | doc.saveFormatFileEnc("-", "UTF-8", True)
 24 | conn.xmlFreeDoc(doc)
 25 | 
 26 | print conn.auth_logout()
 27 | 
 28 | """
 29 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 30 | <edFeatures>
 31 |   <edFeature>
 32 |     <featureId>1</featureId>
 33 |     <title>Long S</title>
 34 |     <description>Source uses long "s"</description>
 35 |     <optionList>
 36 |       <options>
 37 |         <optionId>1</optionId>
 38 |         <featureId>1</featureId>
 39 |         <text>Long s is normalized to "s"</text>
 40 |         <selected>false</selected>
 41 |       </options>
 42 |       <options>
 43 |         <optionId>2</optionId>
 44 |         <featureId>1</featureId>
 45 |         <text>Long s is transcribed as "ſ" U+017F "Latin small letter long s"</text>
 46 |         <selected>false</selected>
 47 |       </options>
 48 |     </optionList>
 49 |   </edFeature>
 50 |   <edFeature>
 51 |     <featureId>2</featureId>
 52 |     <title>u and v</title>
 53 |     <description>Source uses v for u</description>
 54 |     <optionList>
 55 |       <options>
 56 |         <optionId>3</optionId>
 57 |         <featureId>2</featureId>
 58 |         <text>Transcribed as in source</text>
 59 |         <selected>false</selected>
 60 |       </options>
 61 |       <options>
 62 |         <optionId>4</optionId>
 63 |         <featureId>2</featureId>
 64 |         <text>Transcribed according to modern spelling</text>
 65 |         <selected>false</selected>
 66 |       </options>
 67 |     </optionList>
 68 |   </edFeature>
 69 |   <edFeature>
 70 |     <featureId>3</featureId>
 71 |     <title>i and j</title>
 72 |     <description>Source uses "i" and "j" differently to modern spelling</description>
 73 |     <optionList>
 74 |       <options>
 75 |         <optionId>7</optionId>
 76 |         <featureId>3</featureId>
 77 |         <text>Normalized according to modern lexicon</text>
 78 |         <selected>false</selected>
 79 |       </options>
 80 |       <options>
 81 |         <optionId>5</optionId>
 82 |         <featureId>3</featureId>
 83 |         <text>Transcribed as in source</text>
 84 |         <selected>false</selected>
 85 |       </options>
 86 |       <options>
 87 |         <optionId>279</optionId>
 88 |         <featureId>3</featureId>
 89 |         <text>Capital letter "J" is normalized to "I" at the beginning of a word</text>
 90 |         <selected>false</selected>
 91 |       </options>
 92 |     </optionList>
 93 |   </edFeature>
 94 |   <edFeature>
 95 |     <featureId>5</featureId>
 96 |     <title>Printspace</title>
 97 |     <description>The printspace indicates the overall text region.</description>
 98 |     <optionList>
 99 |       <options>
100 |         <optionId>9</optionId>
101 |         <featureId>5</featureId>
102 |         <text>Created by FineReader</text>
103 |         <selected>false</selected>
104 |       </options>
105 |       <options>
106 |         <optionId>8</optionId>
107 |         <featureId>5</featureId>
108 |         <text>Manually corrected</text>
109 |         <selected>false</selected>
110 |       </options>
111 |     </optionList>
112 |   </edFeature>
113 |   <edFeature>
114 |     <featureId>6</featureId>
115 |     <title>Ligature "sz"</title>
116 |     <description>"sz" is set as ligature</description>
117 |     <optionList>
118 |       <options>
119 |         <optionId>10</optionId>
120 |         <featureId>6</featureId>
121 |         <text>Transcribed as "sz"</text>
122 |         <selected>false</selected>
123 |       </options>
124 |       <options>
125 |         <optionId>11</optionId>
126 |         <featureId>6</featureId>
127 |         <text>Normalized to "ß"</text>
128 |         <selected>false</selected>
129 |       </options>
130 |     </optionList>
131 |   </edFeature>
132 |   <edFeature>
133 |     <featureId>28</featureId>
134 |     <title>Text regions</title>
135 |     <description>Regions which contain handwritten text</description>
136 |     <optionList>
137 |       <options>
138 |         <optionId>34</optionId>
139 |         <featureId>28</featureId>
140 |         <text>Manually corrected</text>
141 |         <selected>false</selected>
142 |       </options>
143 |       <options>
144 |         <optionId>33</optionId>
145 |         <featureId>28</featureId>
146 |         <text>Automatically created</text>
147 |         <selected>false</selected>
148 |       </options>
149 |     </optionList>
150 |   </edFeature>
151 |   <edFeature>
152 |     <featureId>29</featureId>
153 |     <title>Line Regions</title>
154 |     <description>Contain the text of line</description>
155 |     <optionList>
156 |       <options>
157 |         <optionId>35</optionId>
158 |         <featureId>29</featureId>
159 |         <text>Automatically created</text>
160 |         <selected>false</selected>
161 |       </options>
162 |       <options>
163 |         <optionId>36</optionId>
164 |         <featureId>29</featureId>
165 |         <text>Manually corrected</text>
166 |         <selected>false</selected>
167 |       </options>
168 |     </optionList>
169 |   </edFeature>
170 |   <edFeature>
171 |     <featureId>30</featureId>
172 |     <title>Baselines</title>
173 |     <description>The baseline is defined as in Wikipedia - characters are "sitting" on the baseline</description>
174 |     <optionList>
175 |       <options>
176 |         <optionId>38</optionId>
177 |         <featureId>30</featureId>
178 |         <text>Manually corrected</text>
179 |         <selected>false</selected>
180 |       </options>
181 |       <options>
182 |         <optionId>37</optionId>
183 |         <featureId>30</featureId>
184 |         <text>Automatically created</text>
185 |         <selected>false</selected>
186 |       </options>
187 |     </optionList>
188 |   </edFeature>
189 |   <edFeature>
190 |     <featureId>47</featureId>
191 |     <title>Omitted text</title>
192 |     <description>Even in diplomatic transcriptions the editor may decide to not transcribe specific notes or marginalia which do not contribute to the overall objective of the transcription</description>
193 |     <optionList>
194 |       <options>
195 |         <optionId>59</optionId>
196 |         <featureId>47</featureId>
197 |         <text>Some text was omitted, e.g. marginalia, notes of librarians</text>
198 |         <selected>false</selected>
199 |       </options>
200 |       <options>
201 |         <optionId>60</optionId>
202 |         <featureId>47</featureId>
203 |         <text>No text was omitted</text>
204 |         <selected>false</selected>
205 |       </options>
206 |     </optionList>
207 |   </edFeature>
208 |   <edFeature>
209 |     <featureId>48</featureId>
210 |     <title>Person names</title>
211 |     <description>Tagging of person names</description>
212 |     <optionList>
213 |       <options>
214 |         <optionId>61</optionId>
215 |         <featureId>48</featureId>
216 |         <text>Person names were tagged</text>
217 |         <selected>false</selected>
218 |       </options>
219 |       <options>
220 |         <optionId>62</optionId>
221 |         <featureId>48</featureId>
222 |         <text>Person names were not tagged</text>
223 |         <selected>false</selected>
224 |       </options>
225 |     </optionList>
226 |   </edFeature>
227 |   <edFeature>
228 |     <featureId>49</featureId>
229 |     <title>Geo-Names</title>
230 |     <description>Tagging of geo-names</description>
231 |     <optionList>
232 |       <options>
233 |         <optionId>63</optionId>
234 |         <featureId>49</featureId>
235 |         <text>Geo-names were tagged</text>
236 |         <selected>false</selected>
237 |       </options>
238 |       <options>
239 |         <optionId>64</optionId>
240 |         <featureId>49</featureId>
241 |         <text>Geo-names wer not tagged</text>
242 |         <selected>false</selected>
243 |       </options>
244 |     </optionList>
245 |   </edFeature>
246 |   <edFeature>
247 |     <featureId>50</featureId>
248 |     <title>Abbreviations - common</title>
249 |     <description>Common abbreviations are usually known to most readers of a text, for example: e.g., i.e., &amp;, etc.</description>
250 |     <optionList>
251 |       <options>
252 |         <optionId>65</optionId>
253 |         <featureId>50</featureId>
254 |         <text>Common abbreviations were not expanded</text>
255 |         <selected>false</selected>
256 |       </options>
257 |       <options>
258 |         <optionId>66</optionId>
259 |         <featureId>50</featureId>
260 |         <text>Common abbreviations were expanded</text>
261 |         <selected>false</selected>
262 |       </options>
263 |     </optionList>
264 |   </edFeature>
265 |   <edFeature>
266 |     <featureId>51</featureId>
267 |     <title>Abbreviations</title>
268 |     <description>Especially in medieval texts and early modern handwritting many words are abbreviated, or even characters are left out in the middle of a word. These abbreviations often need deep grammatical understanding to be correctly expanded.</description>
269 |     <optionList>
270 |       <options>
271 |         <optionId>68</optionId>
272 |         <featureId>51</featureId>
273 |         <text>Abbreviations were not marked</text>
274 |         <selected>false</selected>
275 |       </options>
276 |       <options>
277 |         <optionId>67</optionId>
278 |         <featureId>51</featureId>
279 |         <text>Abbreviations were marked, but not explanded</text>
280 |         <selected>false</selected>
281 |       </options>
282 |       <options>
283 |         <optionId>399</optionId>
284 |         <featureId>51</featureId>
285 |         <text>Abbreviations were marked and expanded</text>
286 |         <selected>false</selected>
287 |       </options>
288 |     </optionList>
289 |   </edFeature>
290 |   <edFeature>
291 |     <featureId>52</featureId>
292 |     <title>Blackening</title>
293 |     <description>Sensible text can be marked as "blackened" and can be suppressed when exporting the text and the images </description>
294 |     <optionList>
295 |       <options>
296 |         <optionId>70</optionId>
297 |         <featureId>52</featureId>
298 |         <text>Blackeing was not applied</text>
299 |         <selected>false</selected>
300 |       </options>
301 |       <options>
302 |         <optionId>69</optionId>
303 |         <featureId>52</featureId>
304 |         <text>Blackening was applied to names of persons and companies</text>
305 |         <selected>false</selected>
306 |       </options>
307 |     </optionList>
308 |   </edFeature>
309 | </edFeatures>
310 |   """


--------------------------------------------------------------------------------
/src/TranskribusPyClient_version.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on 29 Nov 2016
3 | 
4 | @author: meunier
5 | '''
6 | version="0.3"
7 | 


--------------------------------------------------------------------------------
/src/Transkribus_credential.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | Store in this file your transkribus credentials.
 5 | Change acess right to protect this information.
 6 | 
 7 | Alternatively, use do_login --persist to make a persistent session usable by next commands.
 8 | 
 9 | Created on 15 Nov 2016
10 | 
11 | @author: meunier
12 | """
13 | 
14 | # Either you store your credentials here, or you use the --persist options
15 | 
16 | login    = "herve.dejean@naverlabs.com"
17 | password = ""
18 | 


--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
1 | /trnskrbs_3571/
2 | 


--------------------------------------------------------------------------------
/tests/test_commands.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Tests of the TranskribusPyClient command-line utilities
  4 | #
  5 | # JL Meunier - Nov 29th 2016
  6 | #
  7 | # Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
  8 | #
  9 | #
 10 | # This program is distributed in the hope that it will be useful,
 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | # GNU General Public License for more details.
 14 | #
 15 | # You should have received a copy of the GNU General Public License
 16 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | #
 18 | # Developed  for the EU project READ. The READ project has received funding 
 19 | # from the European Union's Horizon 2020 research and innovation programme 
 20 | # under grant agreement No 674943.
 21 | 
 22 | # ------------------------------------------------------------------------------------------------------------------------
 23 | # ---  CONFIGURATION SECTION
 24 | # ------------------------------------------------------------------------------------------------------------------------
 25 | 
 26 | #transkribus valid login
 27 | login="herve.dejean@naverlabs.com"
 28 | passwd=""
 29 | 
 30 | #some existing collection with read access for you
 31 | colId=3571
 32 | #2 existing documents, forming a small range
 33 | docId_A=7749
 34 | docId_B=7750
 35 | TRP=tst.trp
 36 | 
 37 | #PYTHON=python
 38 | PYTHON=/drives/c/Local/anaconda3/envs/py36/python.exe
 39 | 
 40 | # ------------------------------------------------------------------------------------------------------------------------
 41 | # ---  GENERIC STUF BELOW
 42 | # ------------------------------------------------------------------------------------------------------------------------
 43 | 
 44 | SRC=`dirname "$0"`/../src
 45 | 
 46 | tmp_col_name="toto_$$"
 47 | 
 48 | # ------------------------------------------------------------------------------------------------------------------------
 49 | 
 50 | function error {
 51 | 	echo "ERROR: $1"
 52 | 	exit 1
 53 | }
 54 | 
 55 | # ------------------------------------------------------------------------------------------------------------------------
 56 | 
 57 | #---------------------------------------------------
 58 | #cleaning any persistent login info
 59 | echo "==================================================================="
 60 | echo "--- logout"
 61 | tmp_col_id=`$PYTHON $SRC/TranskribusCommands/do_logout.py --persist`
 62 | echo "OK"
 63 | 
 64 | #testing a bad login
 65 | echo
 66 | echo "--- login"
 67 | tmp_col_id=`$PYTHON $SRC/TranskribusCommands/do_login.py --persist -l "tilla" -p "miaouuuu"` && error "login should have failed"
 68 | echo
 69 | echo "OK"
 70 | 
 71 | #making a login and persisting the session token
 72 | echo
 73 | echo "--- login"
 74 | tmp_col_id=`$PYTHON $SRC/TranskribusCommands/do_login.py --persist -l "$login" -p "$passwd"` || error "login error"
 75 | echo "OK"
 76 | 
 77 | #---------------------------------------------------
 78 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
 79 | 
 80 | echo
 81 | echo "--- creating a collection $tmp_col_name"
 82 | tmp_col_id=`$PYTHON $SRC/TranskribusCommands/do_createCollec.py --persist $tmp_col_name` || error "collection creation error"
 83 | echo "--> $tmp_col_id"
 84 | echo "OK"
 85 | 
 86 | #---------------------------------------------------
 87 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
 88 | echo
 89 | echo "--- adding doc $docId_A - $docId_B to the new collection"
 90 | $PYTHON $SRC/TranskribusCommands/do_addDocToCollec.py --persist $tmp_col_id $docId_A  || error "collection add error 1"
 91 | echo "OK"
 92 | 
 93 | echo
 94 | echo "--- adding doc $docId_A - $docId_B to the new collection"
 95 | $PYTHON $SRC/TranskribusCommands/do_addDocToCollec.py --persist $tmp_col_id $docId_A-$docId_B  || error "collection add error 2"
 96 | echo "OK"
 97 | 
 98 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
 99 | echo
100 | echo "--- copying doc $docId_A from collection $colId to the new collection"
101 | $PYTHON $SRC/TranskribusCommands/do_duplicateDoc.py --persist $colId $tmp_col_id $docId_A  || error "collection copy error 1"
102 | echo "OK"
103 | 
104 | #---------------------------------------------------
105 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
106 | echo
107 | echo "--- deleting it ( $tmp_col_id ) "
108 | tmp_col_id=`$PYTHON $SRC/TranskribusCommands/do_deleteCollec.py --persist $tmp_col_id` || error "collection deletion error"
109 | echo "OK"
110 | 
111 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
112 | echo
113 | echo "--- display  trpdoc of the first page of $docId_A from collection $colId "
114 | $PYTHON $SRC/TranskribusCommands/do_getDocTrp.py --persist $colId $docId_A 1 || error "getDocTrp error 1"
115 | echo "OK"
116 | 
117 | 
118 | 
119 | #---------------------------------------------------
120 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
121 | echo
122 | echo "--- listing collection $colId "
123 | $PYTHON $SRC/TranskribusCommands/do_listCollec.py --persist $colId  || error "collection list error"
124 | echo "OK"
125 | 
126 | #---------------------------------------------------
127 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
128 | echo
129 | echo "--- Layout Analysis in collection $colId "
130 | $PYTHON $SRC/TranskribusCommands/do_analyzeLayout.py $colId $docId_A/1  || error "layout analysis error"
131 | echo "OK"
132 | 
133 | #---------------------------------------------------
134 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
135 | echo
136 | echo "--- delete last transcript $colid / $docid / 1 "
137 | $PYTHON $SRC/TranskribusCommands/do_transcript.py $colId  $docId_A 1 --last --rm || error " delete last transcript error"
138 | echo "OK"
139 | 
140 | #---------------------------------------------------
141 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
142 | echo
143 | echo "--- list of locked pages for  $docId_A in  $colId "
144 | $PYTHON $SRC/TranskribusCommands/do_listPageLocks.py $colId $docId_A   || error "locked pages error"
145 | echo "OK"
146 | 
147 | #---------------------------------------------------
148 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
149 | echo
150 | echo "--- list HTR models in collection $colId "
151 | $PYTHON $SRC/TranskribusCommands/do_listHtrRnn.py --colid=$colId   || error "list HTR models error"
152 | echo "OK"
153 | 
154 | #---------------------------------------------------
155 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
156 | echo
157 | echo "--- list trpdoc for document  $docId_A in  $colId "
158 | $PYTHON $SRC/TranskribusCommands/do_transcript.py $colId  $docId_A || error " transcript list models error"
159 | echo "OK"
160 | 
161 | 
162 | #---------------------------------------------------
163 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
164 | echo
165 | echo "--- save trpdoc for document  $docId_A in $TRP "
166 | $PYTHON $SRC/TranskribusCommands/do_transcript.py $colId  $docId_A 2 --trp=$TRP || error " transcript list models error"
167 | 
168 | echo "OK"
169 | 
170 | 
171 | #---------------------------------------------------
172 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
173 | echo
174 | echo "--- download as per trp ---"
175 | rm -rf trnskrbs_$colId 
176 | echo "--- download using $TRP "
177 | $PYTHON $SRC/TranskribusCommands/Transkribus_downloader.py $colId  --trp=$TRP || error " download error"
178 | echo "OK"
179 | #---------------------------------------------------
180 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
181 | echo
182 | echo "--- download trnskrbs_$colId document $docId_A ---"
183 | rm -rf trnskrbs_$colId 
184 | echo "--- download document  $docId_A ($colId) "
185 | $PYTHON $SRC/TranskribusCommands/Transkribus_downloader.py $colId  --docid=$docId_A --noimage || error " download error"
186 | echo "OK"
187 | 
188 | #---------------------------------------------------
189 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
190 | echo
191 | echo "--- upload  document  $docId_A ($colId ) "
192 | $PYTHON $SRC/TranskribusCommands/TranskribusDU_transcriptUploader.py trnskrbs_$colId  $colId  $docId_A --nodu || error "  TranskribusDU_transcriptUploaderupload error"
193 | echo "OK"
194 | 
195 | #---------------------------------------------------
196 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
197 | echo
198 | echo "--- upload as per trp $TRP "
199 | $PYTHON $SRC/TranskribusCommands/Transkribus_uploader.py trnskrbs_$colId  $colId  $docId_A --trp=$TRP || error " Transkribus_uploader upload error"
200 | echo "OK"
201 | echo "--- rm $TRP"
202 | rm $TRP
203 | 
204 | #---------------------------------------------------
205 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
206 | echo
207 | echo "--- test only --help"
208 | $PYTHON $SRC/TranskribusCommands/do_htrTrainRnn.py --help
209 | 
210 | echo "==================================================================="
211 | echo "TESTs done"
212 | 
213 | 
214 | 
215 | 
216 | 


--------------------------------------------------------------------------------