├── .gitignore
├── LICENSE
├── README.md
├── src
├── .gitignore
├── TranskribusCommands
│ ├── TranskribusDU_transcriptUploader.py
│ ├── Transkribus_downloader.py
│ ├── Transkribus_uploader.py
│ ├── __init__.py
│ ├── do_addDocToCollec.py
│ ├── do_analyzeLayout.py
│ ├── do_analyzeLayoutBatch.py
│ ├── do_createCollec.py
│ ├── do_deleteCollec.py
│ ├── do_deleteJob.py
│ ├── do_duplicateDoc.py
│ ├── do_export.py
│ ├── do_getDocTrp.py
│ ├── do_getJobStatus.py
│ ├── do_getJobs.py
│ ├── do_getRnnTrainingJobStatus.py
│ ├── do_htrHmm.py
│ ├── do_htrRnn.py
│ ├── do_htrRnnPerRegion.py
│ ├── do_htrTrainRnn.py
│ ├── do_listCollec.py
│ ├── do_listHtrHmm.py
│ ├── do_listHtrRnn.py
│ ├── do_listPageLocks.py
│ ├── do_login.py
│ ├── do_logout.py
│ ├── do_tableTemplate.py
│ ├── do_transcript.py
│ └── do_uploadDictionary.py
├── TranskribusDU
│ └── xml_formats
│ │ ├── DS2PageXml.py
│ │ ├── Page2DS.py
│ │ ├── PageXml.py
│ │ ├── PageXmlExtractor.py
│ │ ├── __init__.py
│ │ ├── mpxml2pxml.py
│ │ ├── multipagecontent.xsd
│ │ ├── pagecontent.xsd
│ │ └── tests
│ │ ├── testDS2PageXml
│ │ ├── .gitignore
│ │ └── RRB_MM_01_033_Jahr_1810.ds.xml
│ │ ├── test_DS2PageXml.py
│ │ └── test_PageXml.py
├── TranskribusPyClient
│ ├── TRP_FullDoc.py
│ ├── __init__.py
│ ├── application.wadl
│ ├── client.html
│ ├── client.py
│ ├── common
│ │ ├── DateTimeRange.py
│ │ ├── IntegerRange.py
│ │ ├── IntegerRangeHalfBounded.py
│ │ ├── __init__.py
│ │ └── trace.py
│ └── test
│ │ ├── __init__.py
│ │ ├── test_collections_addDocToCollection.py
│ │ ├── test_collections_copyDocToCollection.py
│ │ ├── test_collections_fulldoc.py
│ │ ├── test_collections_fulldoc_xml.py
│ │ ├── test_collections_list.py
│ │ ├── test_collections_listEditDeclFeatures.py
│ │ └── test_collections_postPageTranscript.py
├── TranskribusPyClient_version.py
└── Transkribus_credential.py
└── tests
├── .gitignore
└── test_commands.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | #################
2 | ## READ
3 | #################
4 | .trnskrbs
5 |
6 | #################
7 | ## Eclipse
8 | #################
9 | .cache
10 | *.pydevproject
11 | .project
12 | .metadata
13 | bin/
14 | tmp/
15 | *.tmp
16 | *.bak
17 | *.swp
18 | *~.nib
19 | local.properties
20 | .classpath
21 | .settings/
22 | .loadpath
23 |
24 | # External tool builders
25 | .externalToolBuilders/
26 |
27 | # Locally stored "Eclipse launch configurations"
28 | *.launch
29 |
30 | # CDT-specific
31 | .cproject
32 |
33 | # PDT-specific
34 | .buildpath
35 |
36 |
37 | #################
38 | ## Visual Studio
39 | #################
40 |
41 | ## Ignore Visual Studio temporary files, build results, and
42 | ## files generated by popular Visual Studio add-ons.
43 |
44 | # User-specific files
45 | *.suo
46 | *.user
47 | *.sln.docstates
48 |
49 | # Build results
50 |
51 | [Dd]ebug/
52 | [Rr]elease/
53 | x64/
54 | build/
55 | [Bb]in/
56 | [Oo]bj/
57 |
58 | # MSTest test Results
59 | [Tt]est[Rr]esult*/
60 | [Bb]uild[Ll]og.*
61 |
62 | *_i.c
63 | *_p.c
64 | *.ilk
65 | *.meta
66 | *.obj
67 | *.pch
68 | *.pdb
69 | *.pgc
70 | *.pgd
71 | *.rsp
72 | *.sbr
73 | *.tlb
74 | *.tli
75 | *.tlh
76 | *.tmp
77 | *.tmp_proj
78 | *.log
79 | *.vspscc
80 | *.vssscc
81 | .builds
82 | *.pidb
83 | *.log
84 | *.scc
85 |
86 | # Visual C++ cache files
87 | ipch/
88 | *.aps
89 | *.ncb
90 | *.opensdf
91 | *.sdf
92 | *.cachefile
93 |
94 | # Visual Studio profiler
95 | *.psess
96 | *.vsp
97 | *.vspx
98 |
99 | # Guidance Automation Toolkit
100 | *.gpState
101 |
102 | # ReSharper is a .NET coding add-in
103 | _ReSharper*/
104 | *.[Rr]e[Ss]harper
105 |
106 | # TeamCity is a build add-in
107 | _TeamCity*
108 |
109 | # DotCover is a Code Coverage Tool
110 | *.dotCover
111 |
112 | # NCrunch
113 | *.ncrunch*
114 | .*crunch*.local.xml
115 |
116 | # Installshield output folder
117 | [Ee]xpress/
118 |
119 | # DocProject is a documentation generator add-in
120 | DocProject/buildhelp/
121 | DocProject/Help/*.HxT
122 | DocProject/Help/*.HxC
123 | DocProject/Help/*.hhc
124 | DocProject/Help/*.hhk
125 | DocProject/Help/*.hhp
126 | DocProject/Help/Html2
127 | DocProject/Help/html
128 |
129 | # Click-Once directory
130 | publish/
131 |
132 | # Publish Web Output
133 | *.Publish.xml
134 | *.pubxml
135 | *.publishproj
136 |
137 | # NuGet Packages Directory
138 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line
139 | #packages/
140 |
141 | # Windows Azure Build Output
142 | csx
143 | *.build.csdef
144 |
145 | # Windows Store app package directory
146 | AppPackages/
147 |
148 | # Others
149 | sql/
150 | *.Cache
151 | ClientBin/
152 | [Ss]tyle[Cc]op.*
153 | ~$*
154 | *~
155 | *.dbmdl
156 | *.[Pp]ublish.xml
157 | *.pfx
158 | *.publishsettings
159 |
160 | # RIA/Silverlight projects
161 | Generated_Code/
162 |
163 | # Backup & report files from converting an old project file to a newer
164 | # Visual Studio version. Backup files are not needed, because we have git ;-)
165 | _UpgradeReport_Files/
166 | Backup*/
167 | UpgradeLog*.XML
168 | UpgradeLog*.htm
169 |
170 | # SQL Server files
171 | App_Data/*.mdf
172 | App_Data/*.ldf
173 |
174 | #############
175 | ## Windows detritus
176 | #############
177 |
178 | # Windows image file caches
179 | Thumbs.db
180 | ehthumbs.db
181 |
182 | # Folder config file
183 | Desktop.ini
184 |
185 | # Recycle Bin used on file shares
186 | $RECYCLE.BIN/
187 |
188 | # Mac crap
189 | .DS_Store
190 |
191 |
192 | #############
193 | ## Python
194 | #############
195 |
196 | *.py[cod]
197 |
198 | # Packages
199 | *.egg
200 | *.egg-info
201 | dist/
202 | build/
203 | eggs/
204 | parts/
205 | var/
206 | sdist/
207 | develop-eggs/
208 | .installed.cfg
209 |
210 | # Installer logs
211 | pip-log.txt
212 |
213 | # Unit test / coverage reports
214 | .coverage
215 | .tox
216 |
217 | #Translations
218 | *.mo
219 |
220 | #Mr Developer
221 | .mr.developer.cfg
222 | src/Transkribus_credential.py
223 | *.keep
224 | src/Transkribus_credential.py
225 | /trnskrbs_3820/
226 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TranskribusPyClient
2 |
3 | A Pythonic API and some command line tools to access the Transkribus server via its REST API
4 |
5 | ### Requirements, installation & testing
6 |
7 | #### Python
8 |
9 | * Install the latest release of [Python] 2.7.x, 3.5.x or 3.6.x
10 |
11 | ### Additional Libraries
12 |
13 | * python-dateutil
14 |
15 | ### Wiki documentation [https://github.com/Transkribus/TranskribusPyClient/wiki]
16 |
17 | ### Commands ###
18 |
19 | * do_addDocToCollec.py
20 | * do_createCollec.py
21 | * do_deleteCollec.py
22 | * do_deleteJob.py
23 | * do_duplicateDoc.py
24 | * do_getJobStatus.py
25 | * do_listCollec.py
26 | * do_listPageLocks.py
27 | * do_Transcript.py
28 |
29 | * do_analyzeLayout.py
30 | * do_tableTemplate.py
31 | * do_htrHmm.py
32 | * do_htrRnn.py
33 | * do_listHtrHmm.py
34 | * do_listHtrRnn.py
35 |
36 | * do_login.py
37 | * do_logout.py
38 |
39 | * Transkribus_downloader.py
40 | * TranskribusDU_transcriptUploader.py
41 |
42 | **Help on module client:**
43 |
44 | See in [TranskribusPyClient/client.html](http://htmlpreview.github.com/?https://github.com/Transkribus/TranskribusPyClient/blob/master/src/TranskribusPyClient/client.html
45 | )
46 |
47 |
48 | [Python]:
49 | [Pip]:
50 | [LIBXML2]:
51 | [TranskribusDU]:
52 |
--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | /Transkribus_credential.py
2 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/Transkribus_downloader.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | Utility to extract collection or documents from Transkribus and create DS test structures
6 |
7 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
8 |
9 | This program is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 |
14 | This program is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | GNU General Public License for more details.
18 |
19 | You should have received a copy of the GNU General Public License
20 | along with this program. If not, see .
21 |
22 |
23 | Developed for the EU project READ. The READ project has received funding
24 | from the European Union�s Horizon 2020 research and innovation programme
25 | under grant agreement No 674943.
26 |
27 | Created on 15 Nov 2016
28 |
29 | @author: meunier
30 | """
31 |
32 | from __future__ import absolute_import
33 | from __future__ import print_function
34 | from __future__ import unicode_literals
35 |
36 | DEBUG = 0
37 |
38 |
39 | import sys, os, logging
40 |
41 | from optparse import OptionParser
42 | import json
43 | from io import open
44 |
45 |
46 | try: #to ease the use without proper Python installation
47 | import TranskribusPyClient_version
48 | except ImportError:
49 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
50 | import TranskribusPyClient_version
51 |
52 | from TranskribusPyClient.common.trace import traceln, trace
53 |
54 | from TranskribusCommands import sCOL, sMPXMLExtension, _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
55 | from TranskribusPyClient.client import TranskribusClient
56 | from TranskribusDU.xml_formats import PageXml
57 |
58 |
59 |
60 |
61 | class TranskribusDownloader(TranskribusClient):
62 | """
63 | Download a Transkribus collection as a DS structured dataset
64 | """
65 | sDefaultServerUrl = _Trnskrbs_default_url
66 |
67 | #--- INIT -------------------------------------------------------------------------------------------------------------
68 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
69 | TranskribusClient.__init__(self, sServerUrl=trnkbsServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
70 |
71 | def createStandardFolders(self, colId, destDir):
72 | """
73 | CReate the standard DU folde structure and return the collection folder
74 | """
75 | if not( os.path.exists(destDir) and os.path.isdir(destDir) ):
76 | raise ValueError("Non-existing destination folder %s" % destDir)
77 |
78 | colDir = os.path.join(destDir, "trnskrbs_%s"%colId)
79 |
80 | #Creating folder structure
81 | if os.path.exists(colDir):
82 | if not os.path.isdir(colDir): raise ValueError("%s exists and is not a folder."%colDir)
83 | else:
84 | traceln('- creating folder: %s'%colDir)
85 | os.mkdir(colDir)
86 |
87 | for sSubDir in [sCOL, "xml", "ref", "run", "out"]:
88 | sDir = os.path.join(colDir, sSubDir)
89 | if os.path.exists(sDir):
90 | if not os.path.isdir(sDir): raise ValueError("%s exists and is not a folder."%sDir)
91 | else:
92 | os.mkdir(sDir)
93 |
94 | return colDir
95 |
96 | def downloadCollection(self, colId, destDir, bForce=False, bNoImage=False,sDocId=None):
97 | """
98 | Here, we create the appropriate structure and fetch either the whole collection or one document and convert this to DS XML
99 |
100 | if bForce==True, data on disk is overwritten, otherwise raise an exception is some data is there already
101 | if bNoImage==True, do not download the images
102 | """
103 | colDir = self.createStandardFolders(colId, destDir)
104 |
105 | col_max_ts,ldocids, dFileListPerDoc = self.download_collection(colId, os.path.join(colDir,sCOL), bForce, bNoImage,sDocId)
106 | with open(destDir+os.sep+sCOL+TranskribusClient._POSTFIX_MAX_TX, "w") as fd: fd.write("%s"%col_max_ts) #"col_max.ts" file
107 |
108 | return col_max_ts, colDir, ldocids, dFileListPerDoc
109 |
110 | def download_document_by_trp(self, colId, docId, destDir, trp_spec, bOverwrite=False, bNoImage=False):
111 | """
112 | we have a trp, and download what is specified in it
113 | """
114 | colDir = self.createStandardFolders(colId, destDir)
115 |
116 | docFolder = os.path.join(colDir, sCOL, str(docId))
117 |
118 | doc_max_ts, lFileList = self.download_document(colId, docId, docFolder
119 | , bForce=False, bOverwrite=bOverwrite, bNoImage=bNoImage
120 | , trp_spec=trp_spec)
121 | return doc_max_ts, docFolder, lFileList
122 |
123 | def generateCollectionMultiPageXml(self, colDir, dFileListPerDoc, bStrict):
124 | """
125 | We concatenate all pages into a "multi-page PageXml" for each document of the collection
126 | return the list of XML filenames
127 | """
128 | lsXmlFilename = list()
129 | traceln("- Generating multi_page PageXml")
130 | # lsDocMaxTSFilename = sorted(glob.iglob(os.path.join(colDir, "*%s"%TranskribusClient._POSTFIX_MAX_TX)), reverse=True) # *_max.ts files
131 | for docId in dFileListPerDoc.keys():
132 | if dFileListPerDoc[docId] is not None:
133 | lFiles= list(map(lambda x:os.path.join(colDir,docId,x+".pxml"),dFileListPerDoc[docId] ))
134 | docDir = os.path.join(colDir,docId)
135 | traceln("\t- %s"%docDir)
136 |
137 | doc = self.makeMultiPageXml(lFiles)
138 |
139 | sXmlFilename = docDir+sMPXMLExtension
140 | self.writeDom(doc, sXmlFilename, True)
141 | lsXmlFilename.append(sXmlFilename)
142 |
143 | trace("\t\t- validating the MultiPageXml ...")
144 | if not PageXml.MultiPageXml.validate(doc):
145 | if bStrict:
146 | raise ValueError("Invalid XML generated in '%s'"%sXmlFilename)
147 | else:
148 | traceln(" *** WARNING: XML file is invalid against the schema: '%s'"%sXmlFilename)
149 | traceln(" Ok!")
150 |
151 | if DEBUG>1:
152 | PageXml.MultiPageXml.splitMultiPageXml(doc, docDir, "debug_%d.xml", bIndent=True)
153 |
154 | # doc.freeDoc()
155 | traceln('\t- %s'%sXmlFilename)
156 |
157 |
158 | return lsXmlFilename
159 |
160 | def makeMultiPageXml(self, slFilenames):
161 | """
162 | We concatenate all pages into a "multi-page PageXml"
163 | return a DOM
164 | """
165 | doc = PageXml.MultiPageXml.makeMultiPageXml(slFilenames)
166 |
167 | return doc
168 |
169 | def writeDom(self, doc, filename, bIndent=False):
170 | doc.write(filename,xml_declaration=True,encoding='utf-8',pretty_print=True)
171 | # doc.saveFormatFileEnc(filename, "UTF-8", bIndent)
172 |
173 | # if self.bZLib:
174 | # #traceln("ZLIB WRITE")
175 | # try:
176 | # FIX_docSetCompressMode(doc, self.iZLibRatio)
177 | # except Exception, e:
178 | # traceln("WARNING: ZLib error in Component.py: cannot set the libxml2 in compression mode. Was libxml2 compiled with zlib? :", e)
179 | # if bIndent:
180 | # doc.saveFormatFileEnc(self.getOutputFileName(), "UTF-8",bIndent)
181 | # else:
182 | # #JLM - April 2009 - dump does not support the compressiondoc.dump(self.getOutputFile())
183 | # doc.saveFileEnc(self.getOutputFileName(),"UTF-8")
184 |
185 | if __name__ == '__main__':
186 | usage = "%s [-f|--force] [--strict] [--docid ] [--trp ] [--noImage] []"%sys.argv[0]
187 | version = "v.03"
188 | description = "Extract a collection from transkribus and create a DS test structure containing that collection. \n" + _Trnskrbs_description
189 |
190 | #prepare for the parsing of the command line
191 | parser = OptionParser(usage=usage, version=version)
192 | parser.description = description
193 |
194 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
195 | __Trnskrbs_basic_options(parser, TranskribusDownloader.sDefaultServerUrl)
196 |
197 | parser.add_option("-f", "--force" , dest='bForce' , action="store_true", default=False, help="Force rewrite if disk data is obsolete, or force overwrite in --trp mode")
198 | parser.add_option("--strict" , dest='bStrict', action="store_true", default=False, help="Failed schema validation stops the processus.")
199 | parser.add_option("--noimage", "--noImage", dest='bNoImage', action="store_true", default=False, help="Do not download images.")
200 | parser.add_option("--docid", dest='docid', action="store", type="int", help="download specific document")
201 | parser.add_option("--trp" , dest='trp' , action="store", type="string", help="download the content specified by the trp file.")
202 |
203 | # ---
204 | #parse the command line
205 | (options, args) = parser.parse_args()
206 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
207 |
208 | # ---
209 |
210 | try:
211 | colid = args.pop(0)
212 | except:
213 | _exit(usage, 1)
214 |
215 | if args:
216 | destDir = args[0]
217 | else:
218 | destDir = "."
219 |
220 | # ---
221 | trnkbs2ds = TranskribusDownloader(options.server, proxies, loggingLevel=logging.WARN)
222 | __Trnskrbs_do_login_stuff(trnkbs2ds, options, trace=trace, traceln=traceln)
223 |
224 | if options.trp:
225 | traceln("- Loading trp data from %s" % options.trp)
226 | # trp = json.load(open(options.trp, "rb",encoding='utf-8'))
227 | trp = json.load(open(options.trp, "rt",encoding='utf-8'))
228 |
229 | traceln("- Downloading collection %s to folder %s, as specified by trp data"%(colid, os.path.abspath(destDir)))
230 | if not options.docid:
231 | options.docid = trp["md"]["docId"]
232 | traceln(" read docId from TRP: docId = %s"%options.docid)
233 | logging.basicConfig(level=logging.INFO)
234 | col_ts, docFolder, lFileList = trnkbs2ds.download_document_by_trp(colid, options.docid, destDir, trp, bOverwrite=options.bForce, bNoImage=options.bNoImage)
235 | traceln(list(map(lambda x: x.encode('utf-8'), lFileList)))
236 | colFolder = docFolder #inaccurate, but fine for rest of code
237 | else:
238 | traceln("- Downloading collection %s to folder %s"%(colid, os.path.abspath(destDir)))
239 | col_ts, colFolder, ldocids, dFileListPerDoc = trnkbs2ds.downloadCollection(colid, destDir, bForce=options.bForce, bNoImage=options.bNoImage,sDocId=options.docid)
240 | trnkbs2ds.generateCollectionMultiPageXml(os.path.join(colFolder, sCOL), dFileListPerDoc,options.bStrict)
241 | traceln("- Done")
242 |
243 | with open(os.path.join(colFolder, "config.txt"), "w") as fd:
244 | fd.write("server=%s\nforce=%s\nstrict=%s\ntrp=%s\n"%(options.server, options.bForce, options.bStrict, options.trp))
245 |
246 |
247 | traceln('- Done, see in %s'%colFolder)
248 |
249 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/Transkribus_uploader.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | Utility to upload to Transkribus from a DS test structure
6 |
7 | Copyright Naver Labs Europe(C) 2017 JL. Meunier
8 |
9 | This program is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 |
14 | This program is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | GNU General Public License for more details.
18 |
19 | You should have received a copy of the GNU General Public License
20 | along with this program. If not, see .
21 |
22 |
23 | Developed for the EU project READ. The READ project has received funding
24 | from the European Union�s Horizon 2020 research and innovation programme
25 | under grant agreement No 674943.
26 |
27 | Created on 11 October 2017
28 |
29 | @author: meunier
30 | """
31 |
32 | from __future__ import absolute_import
33 | from __future__ import print_function
34 | from __future__ import unicode_literals
35 | DEBUG = 0
36 |
37 | import sys, os, logging
38 | from optparse import OptionParser
39 | import json
40 | from io import open
41 |
42 |
43 | try: #to ease the use without proper Python installation
44 | import TranskribusPyClient_version
45 | except ImportError:
46 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
47 | import TranskribusPyClient_version
48 |
49 | from TranskribusPyClient.common.trace import traceln, trace
50 |
51 | from TranskribusCommands import sCOL, _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
52 | from TranskribusPyClient.client import TranskribusClient
53 |
54 | from TranskribusDU.xml_formats import PageXml
55 |
56 |
57 |
58 | class TranskribusTranscriptUploader(TranskribusClient):
59 | """
60 | Upload transcripts from the disk or memory to Transkribus
61 | """
62 | sDefaultServerUrl = _Trnskrbs_default_url
63 |
64 | #--- INIT -------------------------------------------------------------------------------------------------------------
65 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
66 | TranskribusClient.__init__(self, sServerUrl=trnkbsServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
67 |
68 | def uploadCollectionTranscript(self, colid, sColDSDir, sNote="",sToolName="", iVerbose=0, status=None):
69 | """
70 | Upload the transcripts of all document in that collection into Transkribus
71 | return nothing
72 | """
73 | if iVerbose:
74 | traceln("- Uploading all transcripts from folder %s to collection %s"%(sColDSDir, colid))
75 |
76 | trpFilename = os.path.join(sColDSDir, "trp.json")
77 | traceln(" - reading %s"%trpFilename)
78 | if not os.path.exists(trpFilename):
79 | raise Exception("File not found %s. \nData probably created in --trp mode, so upload must be done in --trp mode."%trpFilename)
80 | trp = json.load(open(trpFilename, "r",encoding='utf-8'))
81 |
82 | for docid in [d["docId"] for d in trp]:
83 | self.uploadDocumentTranscript(colid, docid, sColDSDir, sNote=sNote, sToolName=sToolName, iVerbose=iVerbose, status=status)
84 |
85 | if iVerbose:
86 | traceln(" Done (collection %s)"%colid)
87 | return
88 |
89 | def uploadDocumentTranscript(self, colid, docid, sColDSDir, sNote="",sToolName="", iVerbose=0, status=None):
90 | """
91 | Upload the transcripts of all document in that collection into Transkribus
92 | return nothing
93 | """
94 | trpFilename = os.path.join(sColDSDir, str(docid), "trp.json")
95 | traceln(" - reading %s"%trpFilename)
96 | if not os.path.exists(trpFilename):
97 | raise Exception("File not found %s. \nData probably created in --trp mode, so upload must be done in --trp mode."%trpFilename)
98 | trp = json.load(open(trpFilename, "r",encoding='utf-8'))
99 | self.uploadDocumentTranscript_by_trp(colid, docid, trp, sColDSDir, sNote=sNote, sToolName=sToolName, iVerbose=iVerbose, status=status)
100 | return
101 |
102 | def uploadDocumentTranscript_by_trp(self, colid, docid, trp, sColDSDir, sNote="",sToolName="", iVerbose=0, status=None):
103 | """
104 | Upload the transcripts of one document in that collection into Transkribus, as specified by the TRP data
105 | status = None ==> we get the status from the TRP
106 | otherwise ==> we set the given status
107 | return nothing
108 | """
109 | if iVerbose:
110 | traceln("- Uploading as listed in TRP, the transcript(s) of document %s from folder %s to collection %s "%(docid, sColDSDir, colid))
111 |
112 | if docid:
113 | if str(trp["md"]["docId"]) != str(docid):
114 | raise ValueError("Document ID does not match docId of TRP data.")
115 | else:
116 | docid = trp["md"]["docId"]
117 |
118 | pageList = trp["pageList"]
119 |
120 | docDir = os.path.join(sColDSDir, str(docid))
121 |
122 | if not os.path.exists(docDir): raise ValueError("Document directory not found: %s" % docDir)
123 |
124 | lFileList= []
125 | for dPage in pageList['pages']:
126 | pagenum= dPage['pageNr']
127 | logging.info("\t\t- page %s"%pagenum)
128 |
129 | imgFileName = dPage['imgFileName']
130 | base,_= os.path.splitext(imgFileName)
131 | lFileList.append(base)
132 |
133 | _trpTranscript0 = dPage['tsList']["transcripts"][0]
134 | tsId = _trpTranscript0['tsId']
135 | sBaseName, _ = os.path.splitext(imgFileName)
136 | xmlFilename = docDir + os.sep + sBaseName + ".pxml"
137 | logging.info("\t\t\t%s"%xmlFilename)
138 | assert os.path.exists(xmlFilename)
139 | with open(xmlFilename, "r",encoding='utf-8') as fd: sXMlTranscript = fd.read()
140 | cur_status = _trpTranscript0["status"] if status == None else status
141 | traceln("page %5d : %s : %s : %s : %s : %s"%(pagenum, cur_status, sToolName, tsId, sNote, xmlFilename))
142 | self.postPageTranscript(colid, docid, pagenum, sXMlTranscript, parentId=tsId, bEncoded=False, sNote=sNote, sToolName=sToolName, status=cur_status)
143 |
144 |
145 | if iVerbose:
146 | traceln(" Done (collection %s, document %s as per TRP)"%(colid, docid))
147 |
148 | return lFileList
149 |
150 | def main():
151 | usage = "%s []"%sys.argv[0]
152 | version = "v.01"
153 | description = """Upload the transcript(s) from the DS structure to Transkribus, either of the collection or one of its document(s).
154 | The must have been created by transkribus_downloader.py and should contain the 'col' directory and a trp.json file for the collection, and one per document (the 'out', 'ref', 'run', 'xml' folders are not used).
155 | The page transcript from the single page PageXml files are uploaded. (The multi-page xml file(s) are ignored))
156 | """ + _Trnskrbs_description
157 |
158 | #prepare for the parsing of the command line
159 | parser = OptionParser(usage=usage, version=version)
160 | parser.description = description
161 |
162 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
163 | __Trnskrbs_basic_options(parser, TranskribusTranscriptUploader.sDefaultServerUrl)
164 |
165 | parser.add_option("-q", "--quiet" , dest='bQuiet', action="store_true", default=False, help="Quiet mode")
166 | parser.add_option("--trp" , dest='trp' , action="store", type="string", help="download the content specified by the trp file.")
167 | parser.add_option("--toolname", dest='tool' , action="store", type="string", default="", help="Set the Toolname metadata in Transkribus.")
168 | parser.add_option("--message", dest='message', action="store", type="string", default="", help="Set the message metadata in Transkribus.")
169 | parser.add_option("--set_status", dest='set_status', action="store", type="string", default=None, help="Set the status of the uploaded trasnscript.")
170 |
171 | # ---
172 | #parse the command line
173 | (options, args) = parser.parse_args()
174 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
175 |
176 | iVerbose = 0 if options.bQuiet else 2
177 | # ---
178 | try: sDSDir = args.pop(0)
179 | except: _exit(usage, 1)
180 | if not(sDSDir.endswith(sCOL) or sDSDir.endswith(sCOL+os.path.sep)):
181 | sColDSDir = os.path.abspath(os.path.join(sDSDir, sCOL))
182 | else:
183 | sColDSDir = os.path.abspath(sDSDir)
184 | if not( os.path.exists(sColDSDir) and os.path.isdir(sColDSDir) ):
185 | raise ValueError("Non-existing folder: %s "%sColDSDir)
186 |
187 | try: colid = args.pop(0)
188 | except: _exit(usage, 1)
189 |
190 | try: docid = args.pop(0)
191 | except: docid = None
192 |
193 | # ---
194 | doer = TranskribusTranscriptUploader(options.server, proxies, loggingLevel=logging.WARN)
195 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
196 |
197 | if options.trp:
198 | trp = json.load(open(options.trp, "r",encoding='utf-8'))
199 | traceln("- Uploading to collection %s, as specified by trp data"%(colid))
200 | if not docid:
201 | docid = trp["md"]["docId"]
202 | traceln(" read docId from TRP: docId = %s"%docid)
203 | sToolname = options.tool if options.tool else "Transkribus_uploader (--trp)"
204 | lFileList = doer.uploadDocumentTranscript_by_trp(colid, docid, trp, sColDSDir
205 | , sNote=options.message, sToolName=sToolname, iVerbose=iVerbose
206 | , status=options.set_status)
207 | #traceln(map(lambda x: x.encode('utf-8'), lFileList))
208 | else:
209 | if docid == None:
210 | sToolname = options.tool if options.tool else "Transkribus_uploader"
211 | doer.uploadCollectionTranscript(colid, sColDSDir
212 | , sNote=options.message, sToolName=sToolname, iVerbose=iVerbose
213 | , status=options.set_status)
214 |
215 | else:
216 | sToolname = options.tool if options.tool else "Transkribus_uploader (docid)"
217 | doer.uploadDocumentTranscript(colid, docid, sColDSDir
218 | , sNote=options.message, sToolName=sToolname, iVerbose=iVerbose
219 | , status=options.set_status)
220 |
221 | traceln('- DONE, all transcripts were uploaded. See in collection %s'%colid)
222 |
223 | if __name__ == '__main__':
224 | main()
--------------------------------------------------------------------------------
/src/TranskribusCommands/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | #REMOVE THIS annoying warning saying:
4 | # /usr/lib/python2.7/site-packages/requests-2.12.1-py2.7.egg/requests/packages/urllib3/connectionpool.py:843: InsecureRequestWarning: Unverified HTTPS request is being made.
5 | # Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings InsecureRequestWarning)
6 | from __future__ import absolute_import
7 | from __future__ import print_function
8 | from __future__ import unicode_literals
9 |
10 | import sys
11 |
12 | import requests.packages.urllib3
13 |
14 | from requests.packages.urllib3.exceptions import InsecureRequestWarning
15 | requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
16 | DEBUG=0
17 |
18 | _Trnskrbs_default_url = "https://transkribus.eu/TrpServer"
19 |
20 | _Trnskrbs_description = u"""Pass your login/password as options otherwise consider having a Transkribus_credential.py file, which defines a 'login' and a 'pwd' variables.
21 | If you need to use a proxy, use the --https_proxy option or set the environment variables HTTPS_PROXY.
22 | To use HTTP Basic Auth with your proxy, use the http://user:password@host/ syntax.
23 | """
24 |
25 | sCOL = "col"
26 | sMPXMLExtension = ".mpxml"
27 |
28 | NS_PAGE_XML = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15"
29 |
30 | def __Trnskrbs_basic_options(parser, sDefaultServerUrl):
31 | """
32 | UTILITY
33 | add the usual options for Transkribus to a command line option parser
34 | """
35 | #prepare for the parsing of the command line
36 | #parser = OptionParser(usage=usage, version=version)
37 |
38 | parser.add_option("-s", "--server" , dest='server', action="store", type="string", default=sDefaultServerUrl, help="Transkribus server URL")
39 |
40 | parser.add_option("-l", "--login" , dest='login' , action="store", type="string", help="Transkribus login (consider storing your credentials in 'transkribus_credentials.py')")
41 | parser.add_option("-p", "--pwd" , dest='pwd' , action="store", type="string", help="Transkribus password")
42 |
43 | parser.add_option("--persist" , dest='persist', action="store_true", help="Try using an existing persistent session, or log-in and persists the session.")
44 |
45 | parser.add_option("--https_proxy" , dest='https_proxy' , action="store", type="string", help="proxy, e.g. http://cornillon:8000")
46 |
47 |
48 | def __Trnskrbs_do_login_stuff(trnskrbs_client, options, trace=None, traceln=None):
49 | """
50 | deal with the complicated login variants...
51 | -trace and traceln are optional print methods
52 | return True or raises an exception
53 | """
54 | bOk = False
55 |
56 | if options.persist:
57 | #try getting some persistent session token
58 | if DEBUG and trace: trace(" ---login--- Try reusing persistent session ... ")
59 | try:
60 | bOk = trnskrbs_client.reusePersistentSession()
61 | if DEBUG and traceln: traceln("OK!")
62 | except:
63 | if DEBUG and traceln: traceln("Failed")
64 |
65 | if not bOk:
66 | if options.login:
67 | login, pwd = options.login, options.pwd
68 | else:
69 | if trace: DEBUG and trace(" ---login--- no login provided, looking for stored credentials... ")
70 | login, pwd = trnskrbs_client.getStoredCredentials(bAsk=False)
71 | if DEBUG and traceln: traceln("OK")
72 |
73 | if DEBUG and traceln: trace(" ---login--- logging onto Transkribus as %s "%login)
74 | trnskrbs_client.auth_login(login, pwd)
75 | if DEBUG and traceln: traceln("OK")
76 | bOk = True
77 |
78 | return bOk
79 |
80 | def _exit(usage, status, exc=None):
81 | if usage: sys.stderr.write("ERROR: usage : %s\n"%usage)
82 | if exc != None: sys.stderr.write(str(exc)) #any exception?
83 | sys.exit(status)
84 |
85 |
86 | def strTabularFormat(lDic, lsKey, sSortKey=None):
87 | """
88 | Format as a table a list of dictionary like:
89 | [
90 | {
91 | "modelName": "Marine_Lives",
92 | "nrOfTokens": 0,
93 | "isUsableInTranskribus": 1,
94 | "nrOfDictTokens": 0,
95 | "nrOfLines": 0,
96 | "modelId": 45
97 | },
98 | ...
99 | Show only keys listed in lsKey
100 | if given, sSortKey is used to sort the lines of the table.
101 | return a string
102 | """
103 | if sSortKey: lDic.sort(key=lambda x: x[sSortKey])
104 | #computing column width
105 | lWidth = [1] * len(lsKey)
106 | for i, k in enumerate(lsKey): lWidth[i] = max(len(k), *[len(str(v[k])) for v in lDic])
107 | sFmt = "|".join(["%%(%s)%ds"%(name,k) for name, k in zip(lsKey, lWidth)]) #something like "%(modelName)25s %(modelId)13s ..."
108 | sFmt = sFmt + "\n"
109 | sRet = sFmt%{k:k for k in lsKey} #table header
110 | sRet += sFmt % {s:("-"*n) for s,n in zip(lsKey, lWidth)}
111 | for record in lDic: sRet += sFmt % record
112 | return sRet
113 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_addDocToCollec.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | Utility to add Transkribus documents to another collection
6 |
7 | JL Meunier - Nov 2016
8 |
9 |
10 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 |
12 | This program is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 | This program is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU General Public License for more details.
21 |
22 | You should have received a copy of the GNU General Public License
23 | along with this program. If not, see .
24 |
25 |
26 | Developed for the EU project READ. The READ project has received funding
27 | from the European Union’s Horizon 2020 research and innovation programme
28 | under grant agreement No 674943.
29 |
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import print_function
33 | from __future__ import unicode_literals
34 |
35 | #optional: useful if you want to choose the logging level to something else than logging.WARN
36 | import sys, os, logging
37 | from optparse import OptionParser
38 |
39 | try: #to ease the use without proper Python installation
40 | import TranskribusPyClient_version
41 | except ImportError:
42 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
43 | import TranskribusPyClient_version
44 |
45 | from TranskribusPyClient.common.trace import traceln, trace
46 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
47 | from TranskribusPyClient.client import TranskribusClient
48 |
49 | DEBUG = 0
50 |
51 | description = """Add one or several documents stored in Transkribus to another Transkribus collection.
52 | Document(s) and collection are specified by their unique identifier (a number).
53 | """ + _Trnskrbs_description
54 |
55 | usage = """%s [ | - ]+
56 | Documents are specified by a space-separated list of numbers, or number ranges, e.g. 3-36.
57 | """%sys.argv[0]
58 |
59 | class DoAddDocToCollec(TranskribusClient):
60 | """
61 | Add a document to another collection.
62 | """
63 |
64 | #--- INIT -------------------------------------------------------------------------------------------------------------
65 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
66 | TranskribusClient.__init__(self, sServerUrl=_Trnskrbs_default_url, proxies=sHttpProxy, loggingLevel=loggingLevel)
67 |
68 |
69 | if __name__ == '__main__':
70 | version = "v.01"
71 |
72 | #prepare for the parsing of the command line
73 | parser = OptionParser(usage=usage, version=version)
74 | parser.description = description
75 |
76 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
77 | __Trnskrbs_basic_options(parser, _Trnskrbs_default_url)
78 |
79 | #parse the command line
80 | (options, args) = parser.parse_args()
81 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
82 | # ------------------------------------------------------------------------------------------------
83 | doer = DoAddDocToCollec(options.server, proxies, loggingLevel=logging.INFO)
84 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
85 |
86 | # ---
87 | #target collection
88 | try: colId = int(args.pop(0))
89 | except Exception as e: _exit(usage, 1, e)
90 |
91 | # ---
92 | # document list
93 | try:
94 | lDocId = []
95 | while args:
96 | chunk = args.pop(0).strip()
97 | li = chunk.split('-')
98 | if li and len(li) == 2:
99 | docId1, docId2 = [int(i) for i in li]
100 | lDocId.extend( range(docId1,docId2+1) )
101 | else:
102 | docId = int(chunk)
103 | lDocId.append(docId)
104 | except Exception as e:
105 | _exit(usage, 2, e)
106 |
107 | # ---
108 | #credentials and proxy
109 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
110 |
111 |
112 |
113 | # ------------------------------------------------------------------------------------------------
114 | doer = DoAddDocToCollec(options.server, proxies, loggingLevel=logging.INFO)
115 |
116 | __Trnskrbs_do_login_stuff(doer, options, trace, traceln)
117 |
118 | trace("- adding to collection '%d' the %d documents: "%(colId, len(lDocId)))
119 | for docId in lDocId:
120 | trace(" %d"%docId)
121 | try:
122 | doer.addDocToCollection(colId, docId)
123 | except Exception as e:
124 | traceln()
125 | traceln("ERROR: could not add document '%d' to collection '%d'"%(docId, colId))
126 | raise e
127 | traceln()
128 | traceln("- Done for %d documents"%len(lDocId))
129 |
130 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_analyzeLayoutBatch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 |
6 | H. Déjean - Dec 2016
7 |
8 |
9 | Copyright Xerox(C) 2016 H. Déjean
10 |
11 | This program is free software: you can redistribute it and/or modify
12 | it under the terms of the GNU General Public License as published by
13 | the Free Software Foundation, either version 3 of the License, or
14 | (at your option) any later version.
15 |
16 | This program is distributed in the hope that it will be useful,
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | GNU General Public License for more details.
20 |
21 | You should have received a copy of the GNU General Public License
22 | along with this program. If not, see .
23 |
24 |
25 | Developed for the EU project READ. The READ project has received funding
26 | from the European Union’s Horizon 2020 research and innovation programme
27 | under grant agreement No 674943.
28 |
29 | """
30 |
31 | # TranskribusCommands/do_LAbatch.py 3571 3820 8251 8252
32 |
33 |
34 | #optional: useful if you want to choose the logging level to something else than logging.WARN
35 | import sys, os, logging
36 | from optparse import OptionParser
37 | import json
38 | import codecs
39 |
40 | try: #to ease the use without proper Python installation
41 | import TranskribusPyClient_version
42 | except ImportError:
43 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
44 | import TranskribusPyClient_version
45 |
46 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
47 | from TranskribusPyClient.client import TranskribusClient
48 | from do_transcript import DoTranscript
49 | from TranskribusPyClient.common.IntegerRange import IntegerRange
50 | from TranskribusPyClient.TRP_FullDoc import TRP_FullDoc
51 |
52 |
53 | from TranskribusPyClient.common.trace import traceln, trace
54 |
55 | DEBUG = 0
56 |
57 | description = """Apply Layout Analysis (LA) with batch model.
58 |
59 | The syntax for specifying the page range is:
60 | - one or several specifiers separated by a comma
61 | - one separator is a page number, or a range of page number, e.g. 3-8
62 | - Examples: 1 1,3,5 1-3 1,3,5-99,100
63 |
64 | """ + _Trnskrbs_description
65 |
66 | usage = """%s []
67 | """%sys.argv[0]
68 |
69 | class DoLAbatch(TranskribusClient):
70 | """
71 | Hi Hervé,
72 |
73 | Sebastian has done the integration of the tools and can answer more indepth questions.
74 |
75 | Please take a look at:
76 | https://transkribus.eu/TrpServer/Swadl/wadl.html
77 |
78 | or
79 |
80 | https://transkribus.eu/TrpServer/rest/application.wadl
81 |
82 | The new methods are at:
83 | /LA/analyze
84 |
85 | Valid values for the jobImpl parameter are:
86 | NcsrLaJob
87 | CvlLaJob
88 | CITlabAdvancedLaJob
89 |
90 | You have to post a list of descriptor objects either as XML or JSON to the service, specifying the pages that have to be analyzed. A single page descriptor would look like this (regionId optional):
91 |
92 | 1
93 |
94 |
95 | 2
96 | 3
97 | aRegionId
98 |
99 |
100 |
101 |
102 | Do let us know if there are any problems with the new method.
103 |
104 | Best regards and have a nice weekend,
105 | Philip
106 |
107 |
108 | """
109 | sDefaultServerUrl = _Trnskrbs_default_url
110 | #--- INIT -------------------------------------------------------------------------------------------------------------
111 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
112 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
113 |
114 | self._trpMng = DoTranscript(self.sDefaultServerUrl, sHttpProxy=sHttpProxy, loggingLevel=loggingLevel)
115 |
116 |
117 | def buildDescription(self,colId,docpage,trp=None):
118 | """
119 | '{"docId":17442,"pageList":{"pages":[{"pageId":400008,"tsId":1243509,"regionIds":[]}]}}'
120 |
121 | 1
122 |
123 |
124 | 2
125 | 3
126 | aRegionId
127 |
128 |
129 |
130 |
131 | """
132 | jsonDesc = {}
133 |
134 | if trp is None:
135 | docId,pageRange= docpage.split('/')
136 | jsonDesc["docId"]=docId
137 | oPageRange = IntegerRange(pageRange)
138 | trpObj = self._trpMng.filter(colId,docId,page_filter=oPageRange,bLast=True)
139 | else:
140 | trpObj = TRP_FullDoc(trp)
141 | jsonDesc["pageList"]={}
142 | # pList= trpObj.getTranscriptList()
143 | jsonDesc["pageList"]['pages']= []
144 | for page in trpObj.getPageList():
145 | docId = page['docId']
146 | jsonDesc["docId"]=page['docId']
147 | jsonDesc["pageList"]['pages'].append({"pageId":page['pageId'],"tsId":page['tsList']['transcripts'][0]['tsId'],"regionIds":[]})
148 |
149 |
150 | return jsonDesc["docId"], json.dumps(jsonDesc,encoding='utf-8')
151 |
152 |
153 | def run(self, colId, sDescription, sJobImpl='CITlabAdvancedLaJob',bBlockSeg,bLineSeq):
154 | ret = self.analyzeLayoutNew(colId, sDescription,sJobImpl,bBlockSeg,bLineSeq)
155 | return ret
156 |
157 |
158 |
159 |
160 | if __name__ == '__main__':
161 | version = "v.01"
162 |
163 | #prepare for the parsing of the command line
164 | parser = OptionParser(usage=usage, version=version)
165 | parser.description = description
166 |
167 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
168 | __Trnskrbs_basic_options(parser, DoLAbatch.sDefaultServerUrl)
169 |
170 | parser.add_option("-r", "--region" , dest='region', action="store", type="string", default=DoLAbatch.sDefaultServerUrl, help="apply Layout Analysis (textLine)")
171 | parser.add_option("--trp" , dest='trp_doc', action="store", type="string",default=None, help="use trp doc file")
172 | parser.add_option("--docid" , dest='docid' , action="store", type="string", default=None, help="document/pages to be htr'd")
173 | # ---
174 | #parse the command line
175 | (options, args) = parser.parse_args()
176 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
177 |
178 | # ---
179 | doer = DoLAbatch(options.server, proxies, loggingLevel=logging.WARN)
180 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
181 | doer._trpMng.setSessionId(doer._sessionID)
182 |
183 | # ---
184 | try: colId = int(args.pop(0))
185 | except Exception as e: _exit(usage, 1, e)
186 | try: docId = int(args.pop(0))
187 | except Exception as e: _exit(usage, 1, e)
188 | try: sPages = args.pop(0)
189 | except Exception as e: _exit(usage, 1, e)
190 | try: doNotBlockSeg = int(args.pop(0)) == 0
191 | except Exception as e: doNotBlockSeg = False
192 | try: doNotLineSeg = int(args.pop(0)) == 0
193 | except Exception as e: doNotLineSeg= False
194 | if args: _exit(usage, 2, Exception("Extra arguments to the command"))
195 |
196 | # ---
197 | # do the job...
198 | if options.trp_doc:
199 | trpdoc = json.load(codecs.open(options.trp_doc, "rb",'utf-8'))
200 | docId,sPageDesc = doer.buildDescription(colId,options.docid,trpdoc)
201 | else:
202 | docId,sPageDesc = doer.buildDescription(colId,options.docid)
203 |
204 | jobid = doer.run(colId, sPageDesc,not(doNotBlockSeg),not(doNotLineSeg))
205 | traceln(jobid)
206 |
207 | traceln()
208 | traceln("- Done")
209 |
210 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_createCollec.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | Create a collection
6 |
7 | JL Meunier - Nov 2016
8 |
9 |
10 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 |
12 | This program is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 | This program is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU General Public License for more details.
21 |
22 | You should have received a copy of the GNU General Public License
23 | along with this program. If not, see .
24 |
25 |
26 | Developed for the EU project READ. The READ project has received funding
27 | from the European Union’s Horizon 2020 research and innovation programme
28 | under grant agreement No 674943.
29 |
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import print_function
33 | from __future__ import unicode_literals
34 | # TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
35 |
36 |
37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
38 | import sys, os, logging
39 | from optparse import OptionParser
40 |
41 | try: #to ease the use without proper Python installation
42 | import TranskribusPyClient_version
43 | except ImportError:
44 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
45 | import TranskribusPyClient_version
46 |
47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
48 | from TranskribusPyClient.client import TranskribusClient
49 | from TranskribusPyClient.common.trace import traceln, trace
50 |
51 | DEBUG = 0
52 |
53 | description = """create a Transkribus collection.
54 | """ + _Trnskrbs_description
55 |
56 | usage = """%s
57 | """%sys.argv[0]
58 |
59 | class DoCreateCollec(TranskribusClient):
60 |
61 | sDefaultServerUrl = _Trnskrbs_default_url
62 |
63 | #--- INIT -------------------------------------------------------------------------------------------------------------
64 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
65 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
66 |
67 |
68 |
69 | if __name__ == '__main__':
70 | version = "v.01"
71 |
72 | #prepare for the parsing of the command line
73 | parser = OptionParser(usage=usage, version=version)
74 | parser.description = description
75 |
76 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
77 | __Trnskrbs_basic_options(parser, DoCreateCollec.sDefaultServerUrl)
78 |
79 | # ---
80 | #parse the command line
81 | (options, args) = parser.parse_args()
82 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
83 |
84 | # ---
85 | #source collection(s)
86 | try:
87 | sColName = args[0]
88 | except Exception as e:
89 | _exit(usage, 1, e)
90 |
91 | # ---
92 | doer = DoCreateCollec(options.server, proxies, loggingLevel=logging.INFO)
93 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
94 |
95 | # ---
96 | # do the job...
97 | try:
98 | resp = doer.createCollection(sColName)
99 | except Exception as e: _exit("", 1, e)
100 |
101 |
102 | traceln("- Done: --> %s"%resp)
103 |
104 | print (resp)
105 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_deleteCollec.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | Delete a collection
6 |
7 | JL Meunier - Nov 2016
8 |
9 |
10 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 |
12 | This program is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 | This program is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU General Public License for more details.
21 |
22 | You should have received a copy of the GNU General Public License
23 | along with this program. If not, see .
24 |
25 |
26 | Developed for the EU project READ. The READ project has received funding
27 | from the European Union’s Horizon 2020 research and innovation programme
28 | under grant agreement No 674943.
29 |
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import print_function
33 | from __future__ import unicode_literals
34 | # TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
35 |
36 |
37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
38 | import sys, os, logging
39 | from optparse import OptionParser
40 |
41 | try: #to ease the use without proper Python installation
42 | import TranskribusPyClient_version
43 | except ImportError:
44 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
45 | import TranskribusPyClient_version
46 |
47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
48 | from TranskribusPyClient.client import TranskribusClient
49 | from TranskribusPyClient.common.trace import traceln, trace
50 |
51 | DEBUG = 0
52 |
53 | description = """delete a Transkribus collection.
54 | """ + _Trnskrbs_description
55 |
56 | usage = """%s
57 | """%sys.argv[0]
58 |
59 | class DoDeleteCollec(TranskribusClient):
60 |
61 | sDefaultServerUrl = _Trnskrbs_default_url
62 |
63 | #--- INIT -------------------------------------------------------------------------------------------------------------
64 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
65 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
66 |
67 |
68 |
69 | if __name__ == '__main__':
70 | version = "v.01"
71 |
72 | #prepare for the parsing of the command line
73 | parser = OptionParser(usage=usage, version=version)
74 | parser.description = description
75 |
76 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
77 | __Trnskrbs_basic_options(parser, DoDeleteCollec.sDefaultServerUrl)
78 |
79 | # ---
80 | #parse the command line
81 | (options, args) = parser.parse_args()
82 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
83 |
84 | # ---
85 | #source collection(s)
86 | try:
87 | colId = int(args[0])
88 | except Exception as e:
89 | _exit(usage, 1, e)
90 |
91 | # ---
92 | doer = DoDeleteCollec(options.server, proxies, loggingLevel=logging.INFO)
93 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
94 |
95 | # ---
96 | # do the job...
97 | try:
98 | resp = doer.deleteCollection(colId)
99 | except Exception as e: _exit("", 1, e)
100 |
101 | traceln("- Done")
102 |
103 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_deleteJob.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | Delete a job
6 |
7 | H. Déjean - Dec 2016
8 |
9 |
10 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 |
12 | This program is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 | This program is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU General Public License for more details.
21 |
22 | You should have received a copy of the GNU General Public License
23 | along with this program. If not, see .
24 |
25 |
26 | Developed for the EU project READ. The READ project has received funding
27 | from the European Union’s Horizon 2020 research and innovation programme
28 | under grant agreement No 674943.
29 |
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import print_function
33 | from __future__ import unicode_literals
34 | # TranskribusCommands/do_deleteJob.py
35 |
36 |
37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
38 | import sys, os, logging
39 | from optparse import OptionParser
40 |
41 | try: #to ease the use without proper Python installation
42 | import TranskribusPyClient_version
43 | except ImportError:
44 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
45 | import TranskribusPyClient_version
46 |
47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
48 | from TranskribusPyClient.client import TranskribusClient
49 | from TranskribusPyClient.common.trace import traceln, trace
50 |
51 | DEBUG = 0
52 |
53 | description = """delete a Transkribus job.
54 | """ + _Trnskrbs_description
55 |
56 | usage = """%s
57 | """%sys.argv[0]
58 |
59 | class DoDeleteJob(TranskribusClient):
60 |
61 | sDefaultServerUrl = _Trnskrbs_default_url
62 |
63 | #--- INIT -------------------------------------------------------------------------------------------------------------
64 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
65 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
66 |
67 |
68 |
69 | if __name__ == '__main__':
70 | version = "v.01"
71 |
72 | #prepare for the parsing of the command line
73 | parser = OptionParser(usage=usage, version=version)
74 | parser.description = description
75 |
76 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
77 | __Trnskrbs_basic_options(parser, DoDeleteJob.sDefaultServerUrl)
78 |
79 | # ---
80 | #parse the command line
81 | (options, args) = parser.parse_args()
82 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
83 |
84 | # ---
85 | #source collection(s)
86 | try:
87 | jobid = int(args[0])
88 | except Exception as e:
89 | _exit(usage, 1, e)
90 |
91 | # ---
92 | doer = DoDeleteJob(options.server, proxies, loggingLevel=logging.INFO)
93 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
94 |
95 | # ---
96 | # do the job...
97 | try:
98 | resp = doer.deleteJob(jobid)
99 | except Exception as e: _exit("", 1, e)
100 |
101 | if resp != "CANCELED":
102 | raise Exception("Job status should be CANCELED not '%s'"%resp)
103 |
104 | traceln("- Done")
105 |
106 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_duplicateDoc.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | Utility to duplicate Transkribus documents from a collection to another collection
6 |
7 | JL Meunier - Nov 2016
8 |
9 |
10 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 |
12 | This program is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 | This program is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU General Public License for more details.
21 |
22 | You should have received a copy of the GNU General Public License
23 | along with this program. If not, see .
24 |
25 |
26 | Developed for the EU project READ. The READ project has received funding
27 | from the European Union’s Horizon 2020 research and innovation programme
28 | under grant agreement No 674943.
29 |
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import print_function
33 | from __future__ import unicode_literals
34 | # TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
35 |
36 |
37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
38 | import sys, os, logging
39 | from optparse import OptionParser
40 |
41 | try: #to ease the use without proper Python installation
42 | import TranskribusPyClient_version
43 | except ImportError:
44 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
45 | import TranskribusPyClient_version
46 |
47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
48 | from TranskribusPyClient.client import TranskribusClient, getStoredCredentials
49 | from TranskribusPyClient.common.trace import traceln, trace
50 |
51 | DEBUG = 0
52 |
53 | description = """Copy (duplicate) one or several documents stored in a Transkribus collection to another Transkribus collection.
54 | Document(s) and collections are specified by their unique identifier (a number).
55 | """ + _Trnskrbs_description
56 |
57 | usage = """%s ( | - )+
58 | Documents are specified by a space-separated list of numbers, or number ranges, e.g. 3-36.
59 | """%sys.argv[0]
60 |
61 | class DoCopyDocToCollec(TranskribusClient):
62 | """
63 | Copy a document from a collection to another
64 | """
65 | sDefaultServerUrl = _Trnskrbs_default_url
66 | #--- INIT -------------------------------------------------------------------------------------------------------------
67 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
68 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
69 |
70 |
71 | if __name__ == '__main__':
72 | version = "v.01"
73 |
74 | #prepare for the parsing of the command line
75 | parser = OptionParser(usage=usage, version=version)
76 | parser.description = description
77 |
78 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
79 | __Trnskrbs_basic_options(parser, DoCopyDocToCollec.sDefaultServerUrl)
80 |
81 | # ---
82 | #parse the command line
83 | (options, args) = parser.parse_args()
84 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
85 |
86 | # ---
87 | #source collection
88 | try: colIdFrom = int(args.pop(0))
89 | except Exception as e: _exit(usage, 1, e)
90 | #target collection
91 | try: colIdTo = int(args.pop(0))
92 | except Exception as e: _exit(usage, 1, e)
93 |
94 | # ---
95 | # document list
96 | try:
97 | lDocId = []
98 | while args:
99 | chunk = args.pop(0).strip()
100 | li = chunk.split('-')
101 | if li and len(li) == 2:
102 | docId1, docId2 = [int(i) for i in li]
103 | lDocId.extend( range(docId1,docId2+1) )
104 | else:
105 | docId = int(chunk)
106 | lDocId.append(docId)
107 | except Exception as e:
108 | _exit(usage, 2, e)
109 |
110 | # ------------------------------------------------------------------------------------------------
111 | doer = DoCopyDocToCollec(options.server, proxies, loggingLevel=logging.INFO)
112 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
113 |
114 |
115 | #the only issue is that we need to have the name of each document...
116 | traceln("- checking existence of each document in source collection '%d'"%(colIdFrom))
117 | dName_by_docId = {}
118 | lDocDic = doer.listDocsByCollectionId(colIdFrom)
119 | for docDic in lDocDic:
120 | dName_by_docId[ docDic['docId'] ] = docDic['title']
121 | #check now, so as to avoid partial copies...
122 | for docId in lDocId:
123 | try:
124 | name = dName_by_docId[docId]
125 | except KeyError as e:
126 | traceln()
127 | traceln("ERROR: document '%d' is not in source collection '%d'"%(docId, colIdFrom))
128 | _exit("", 3, e)
129 |
130 | trace("- duplicating from collection %d to collection '%d' the %d documents: "%(colIdFrom, colIdTo, len(lDocId)))
131 | for docId in lDocId:
132 | name = dName_by_docId[docId]
133 | trace(" %d ('%s')"%(docId, name))
134 | try:
135 | doer.duplicateDoc(colIdFrom, docId, colIdTo, name)
136 | except Exception as e:
137 | traceln()
138 | traceln("ERROR: could not copy document '%d' from collection '%d' to collection '%d'"%(docId, colIdFrom, colIdTo))
139 | _exit("", 4, e)
140 | traceln()
141 | traceln("- Done for %d documents"%len(lDocId))
142 |
143 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_export.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 |
6 | Hervé Déjean - april 2021
7 |
8 |
9 | Copyright Naver LabsEurope (C) 2021
10 |
11 | see https://transkribus.eu/wiki/index.php/HTR
12 | """
13 | from __future__ import absolute_import
14 | from __future__ import print_function
15 | from __future__ import unicode_literals
16 |
17 | # TranskribusCommands/do_htrTrainRnn model-name colId docid pages
18 |
19 |
20 | #optional: useful if you want to choose the logging level to something else than logging.WARN
21 | import sys, os, logging
22 | from optparse import OptionParser
23 | import json
24 | from lxml import etree
25 |
26 | try: #to ease the use without proper Python installation
27 | import TranskribusPyClient_version
28 | except ImportError:
29 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
30 | import TranskribusPyClient_version
31 |
32 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
33 | # from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
34 |
35 | from TranskribusPyClient.common.IntegerRange import IntegerRange
36 | from TranskribusPyClient.common.trace import traceln, trace
37 | from TranskribusPyClient.client import TranskribusClient
38 |
39 |
40 | DEBUG = 0
41 |
42 | description = """Export a document into alto format """
43 |
44 |
45 | usage = """%s
46 | """%sys.argv[0]
47 |
48 | class Export(TranskribusClient):
49 |
50 | sDefaultServerUrl = _Trnskrbs_default_url
51 | params="""
52 | { "commonPars" : {
53 | "pages" : "1",
54 | "doExportDocMetadata" : true,
55 | "doWriteMets" : true,
56 | "doWriteImages" : true,
57 | "doExportPageXml" : true,
58 | "doExportAltoXml" : true,
59 | "doExportSingleTxtFiles" : false,
60 | "doWritePdf" : false,
61 | "doWriteTei" : false,
62 | "doWriteDocx" : false,
63 | "doWriteOneTxt" : false,
64 | "doWriteTagsXlsx" : false,
65 | "doWriteTagsIob" : false,
66 | "doWriteTablesXlsx" : false,
67 | "doWriteStructureInMets" : false,
68 | "doCreateTitle" : false,
69 | "useVersionStatus" : "Latest version",
70 | "writeTextOnWordLevel" : false,
71 | "doBlackening" : false,
72 | "selectedTags" : [ "add", "date", "Address", "human_production", "supplied", "work", "unclear", "sic", "structure", "div", "highlight", "place1", "regionType", "speech", "person", "gap", "organization", "comment", "abbrev", "place", "add1", "Initial", "lat" ],
73 | "font" : "FreeSerif",
74 | "splitIntoWordsInAltoXml" : true,
75 | "pageDirName" : "page",
76 | "fileNamePattern" : "${filename}",
77 | "useHttps" : true,
78 | "remoteImgQuality" : "orig",
79 | "doOverwrite" : true,
80 | "useOcrMasterDir" : true,
81 | "exportTranscriptMetadata" : true,
82 | "updatePageXmlImageDimensions" : false
83 | },
84 | "altoPars" : {
85 | "splitIntoWordsInAltoXml" : true
86 | },
87 | "pdfPars" : {
88 | "doPdfImagesOnly" : false,
89 | "doPdfImagesPlusText" : true,
90 | "doPdfWithTextPages" : false,
91 | "doPdfWithTags" : false,
92 | "doPdfWithArticles" : false,
93 | "doPdfA" : false,
94 | "pdfImgQuality" : "view"
95 | },
96 | "docxPars" : {
97 | "doDocxWithTags" : false,
98 | "doDocxPreserveLineBreaks" : false,
99 | "doDocxForcePageBreaks" : false,
100 | "doDocxMarkUnclear" : false,
101 | "doDocxKeepAbbrevs" : false,
102 | "doDocxExpandAbbrevs" : false,
103 | "doDocxSubstituteAbbrevs" : false,
104 | "doDocxWriteFilenames" : false,
105 | "doDocxIgnoreSuppliedTag" : false,
106 | "doDocxShowSuppliedTagWithBrackets" : false
107 | }
108 | }
109 | """
110 | #--- INIT -------------------------------------------------------------------------------------------------------------
111 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
112 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
113 |
114 | def run(self, colId, docid,sParams):
115 | ret = self.exportCollection(colId, docid,sParams)
116 | return ret
117 |
118 |
119 |
120 |
121 |
122 | if __name__ == '__main__':
123 | version = "v.01"
124 | #prepare for the parsing of the command line
125 | parser = OptionParser(usage=usage, version=version)
126 | parser.description = description
127 |
128 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
129 | __Trnskrbs_basic_options(parser, Export.sDefaultServerUrl)
130 |
131 | # parser.add_option("--trp" , dest='trp_doc', action="store", type="string",default=None, help="use trp doc file")
132 | # parser.add_option("--templateID" , dest='templateID' , action="store", type="string" , help="template id")
133 | # parser.add_option("--batchjob" , dest='doBatchJob' , action="store_true", default=False, help="do one job per page")
134 |
135 | # ---
136 | #parse the command line
137 | (options, args) = parser.parse_args()
138 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
139 |
140 | # ---
141 | doer = Export(options.server, proxies, loggingLevel=logging.WARN)
142 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
143 | # doer._trpMng.setSessionId(doer._sessionID)
144 |
145 | # ---
146 | try: colId = int(args.pop(0))
147 | except Exception as e: _exit(usage, 1, e)
148 | try: docid = args.pop(0)
149 | except Exception as e: _exit(usage, 1, e)
150 | if args: _exit(usage, 2, Exception("Extra arguments to the command"))
151 |
152 | # ---
153 |
154 | jobid = doer.run(colId, docid,doer.params)
155 | traceln("job ID:",jobid)
156 | traceln("- Done")
157 |
158 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_getDocTrp.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 |
6 | JL Meunier - August 2017
7 |
8 |
9 | Copyright Naver(C) 2017 JL. Meunier
10 |
11 | This program is free software: you can redistribute it and/or modify
12 | it under the terms of the GNU General Public License as published by
13 | the Free Software Foundation, either version 3 of the License, or
14 | (at your option) any later version.
15 |
16 | This program is distributed in the hope that it will be useful,
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | GNU General Public License for more details.
20 |
21 | You should have received a copy of the GNU General Public License
22 | along with this program. If not, see .
23 |
24 |
25 | Developed for the EU project READ. The READ project has received funding
26 | from the European Union’s Horizon 2020 research and innovation programme
27 | under grant agreement No 674943.
28 |
29 | """
30 | from __future__ import absolute_import
31 | from __future__ import print_function
32 | from __future__ import unicode_literals
33 | # TranskribusCommands/do_LAbatch.py 3571 3820 8251 8252
34 |
35 |
36 | #optional: useful if you want to choose the logging level to something else than logging.WARN
37 | import sys, os, logging
38 | from optparse import OptionParser
39 | import json
40 |
41 | try: #to ease the use without proper Python installation
42 | import TranskribusPyClient_version
43 | except ImportError:
44 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
45 | import TranskribusPyClient_version
46 |
47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
48 | from TranskribusPyClient.client import TranskribusClient
49 | from TranskribusPyClient.common.IntegerRange import IntegerRange as PageRangeSpec
50 | from TranskribusPyClient.common.trace import traceln, trace
51 |
52 | DEBUG = 0
53 |
54 | description = """Get the TRP of a document
55 | """ + _Trnskrbs_description
56 |
57 | usage = """%s [] -n
58 | Return the so-called TRP of all or certain pages, optionally with the given number of transcript(s) per page (-1 means all).
59 |
60 | Page range is a comma-separated series of integer or pair of integers separated by a '-'
61 | For instance 1 or 1,3 or 1-4 or 1,3-6,8
62 | """%sys.argv[0]
63 |
64 | class DoGetDocTrp(TranskribusClient):
65 | sDefaultServerUrl = _Trnskrbs_default_url
66 | #--- INIT -------------------------------------------------------------------------------------------------------------
67 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
68 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
69 |
70 | def run(self, colId, docId, nrOfTranscripts=1):
71 | ret = self.getDocById(colId, docId, nrOfTranscripts)
72 | return ret
73 |
74 | if __name__ == '__main__':
75 | version = "v.01"
76 |
77 | #prepare for the parsing of the command line
78 | parser = OptionParser(usage=usage, version=version)
79 | parser.description = description
80 |
81 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
82 | __Trnskrbs_basic_options(parser, DoGetDocTrp.sDefaultServerUrl)
83 | parser.add_option("-n", "--n" , dest='nbTranscript', action="store", type="int", default=1, help="Number of transcripts")
84 |
85 | # ---
86 | #parse the command line
87 | (options, args) = parser.parse_args()
88 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
89 |
90 | # ---
91 | doer = DoGetDocTrp(options.server, proxies, loggingLevel=logging.WARN)
92 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
93 | # ---
94 | try: colId = int(args.pop(0))
95 | except Exception as e: _exit(usage, 1, e)
96 | try: docId = int(args.pop(0))
97 | except Exception as e: _exit(usage, 1, e)
98 | try: sPageRangeSpec = args.pop(0)
99 | except Exception as e: sPageRangeSpec = None
100 | if args: _exit(usage, 2, Exception("Extra arguments to the command"))
101 |
102 | oPageRange = PageRangeSpec(sPageRangeSpec) if sPageRangeSpec else None
103 |
104 | # ---
105 | # do the job...
106 | resp = doer.run(colId, docId, nrOfTranscripts=options.nbTranscript)
107 | if oPageRange:
108 | traceln("Filtering response as per page specification: %s"%oPageRange)
109 | #let's filter the response (not super efficient but easy to code...
110 | ldPages = resp["pageList"]["pages"]
111 | ldPagesInRange = [ dPage for dPage in ldPages if dPage["pageNr"] in oPageRange]
112 | resp["pageList"]["pages"] = ldPagesInRange
113 |
114 | print (json.dumps(resp, sort_keys=True, indent=4, separators=(',', ': ')))
115 |
116 | traceln()
117 | traceln("- Done")
118 |
119 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_getJobStatus.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | Get the status of a job
6 |
7 | JL Meunier - Dev 2016
8 |
9 |
10 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 |
12 | This program is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 | This program is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU General Public License for more details.
21 |
22 | You should have received a copy of the GNU General Public License
23 | along with this program. If not, see .
24 |
25 |
26 | Developed for the EU project READ. The READ project has received funding
27 | from the European Union’s Horizon 2020 research and innovation programme
28 | under grant agreement No 674943.
29 |
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import print_function
33 | from __future__ import unicode_literals
34 | # TranskribusCommands/do_deleteJob.py
35 |
36 |
37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
38 | import sys, os, logging
39 | from optparse import OptionParser
40 |
41 | try: #to ease the use without proper Python installation
42 | import TranskribusPyClient_version
43 | except ImportError:
44 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
45 | import TranskribusPyClient_version
46 |
47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
48 | from TranskribusPyClient.client import TranskribusClient
49 | from TranskribusPyClient.common.trace import traceln, trace
50 |
51 | import json
52 | DEBUG = 0
53 |
54 | description = """Get the status of a Transkribus job.
55 | """ + _Trnskrbs_description
56 |
57 | usage = """%s
58 | """%sys.argv[0]
59 |
60 | class DoDeleteJob(TranskribusClient):
61 |
62 | sDefaultServerUrl = _Trnskrbs_default_url
63 |
64 | #--- INIT -------------------------------------------------------------------------------------------------------------
65 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
66 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
67 |
68 |
69 |
70 | if __name__ == '__main__':
71 | version = "v.01"
72 |
73 | #prepare for the parsing of the command line
74 | parser = OptionParser(usage=usage, version=version)
75 | parser.description = description
76 |
77 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
78 | __Trnskrbs_basic_options(parser, DoDeleteJob.sDefaultServerUrl)
79 |
80 | # ---
81 | #parse the command line
82 | (options, args) = parser.parse_args()
83 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
84 |
85 | # ---
86 | #source collection(s)
87 | try:
88 | jobid = int(args[0])
89 | except Exception as e:
90 | _exit(usage, 1, e)
91 |
92 | # ---
93 | doer = DoDeleteJob(options.server, proxies, loggingLevel=logging.INFO)
94 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
95 |
96 | # ---
97 | # do the job...
98 | try:
99 | resp = doer.getJobStatus(jobid)
100 | except Exception as e: _exit("", 1, e)
101 | traceln( json.dumps(resp, sort_keys=True, indent=4, separators=(',', ': ')))
102 |
103 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_getJobs.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | Get the list of jobs
6 |
7 | Hervé Déjean - April 2017
8 |
9 |
10 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 |
12 | This program is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 | This program is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU General Public License for more details.
21 |
22 | You should have received a copy of the GNU General Public License
23 | along with this program. If not, see .
24 |
25 |
26 | Developed for the EU project READ. The READ project has received funding
27 | from the European Union’s Horizon 2020 research and innovation programme
28 | under grant agreement No 674943.
29 |
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import print_function
33 | from __future__ import unicode_literals
34 | # TranskribusCommands/do_deleteJob.py
35 |
36 |
37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
38 | import sys, os, logging
39 | from optparse import OptionParser
40 |
41 | try: #to ease the use without proper Python installation
42 | import TranskribusPyClient_version
43 | except ImportError:
44 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
45 | import TranskribusPyClient_version
46 |
47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
48 | from TranskribusPyClient.client import TranskribusClient
49 | from TranskribusPyClient.common.trace import traceln, trace
50 |
51 | import json
52 |
53 | DEBUG = 0
54 |
55 |
56 | description = """Get the status of a Transkribus job.
57 | """ + _Trnskrbs_description
58 |
59 | usage = """%s
60 | """%sys.argv[0]
61 |
62 | class DoGetJobs(TranskribusClient):
63 |
64 | sDefaultServerUrl = _Trnskrbs_default_url
65 |
66 | #--- INIT -------------------------------------------------------------------------------------------------------------
67 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
68 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
69 |
70 |
71 |
72 | if __name__ == '__main__':
73 | version = "v.01"
74 |
75 | #prepare for the parsing of the command line
76 | parser = OptionParser(usage=usage, version=version)
77 | parser.description = description
78 |
79 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
80 | __Trnskrbs_basic_options(parser, DoGetJobs.sDefaultServerUrl)
81 |
82 | # ---
83 | #parse the command line
84 | (options, args) = parser.parse_args()
85 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
86 |
87 | # ---
88 | #source collection(s)
89 | # try:
90 | # jobid = int(args[0])
91 | # except Exception as e:
92 | # _exit(usage, 1, e)
93 |
94 | # ---
95 | doer = DoGetJobs(options.server, proxies, loggingLevel=logging.INFO)
96 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
97 |
98 | # ---
99 | # do the job...
100 | try:
101 | resp = doer.getJobs()
102 | except Exception as e: _exit("", 1, e)
103 | traceln( json.dumps(resp, sort_keys=True, indent=4, separators=(',', ': ')))
104 |
105 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_getRnnTrainingJobStatus.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | Get the status of a job
6 |
7 | JL Meunier - Dev 2016
8 |
9 |
10 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 |
12 | This program is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 | This program is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU General Public License for more details.
21 |
22 | You should have received a copy of the GNU General Public License
23 | along with this program. If not, see .
24 |
25 |
26 | Developed for the EU project READ. The READ project has received funding
27 | from the European Union’s Horizon 2020 research and innovation programme
28 | under grant agreement No 674943.
29 |
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import print_function
33 | from __future__ import unicode_literals
34 |
35 | # TranskribusCommands/do_deleteJob.py
36 |
37 |
38 | #optional: useful if you want to choose the logging level to something else than logging.WARN
39 | import sys, os, logging
40 | from optparse import OptionParser
41 |
42 | try: #to ease the use without proper Python installation
43 | import TranskribusPyClient_version
44 | except ImportError:
45 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
46 | import TranskribusPyClient_version
47 |
48 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
49 | from TranskribusPyClient.client import TranskribusClient
50 | from TranskribusPyClient.common.trace import traceln, trace
51 |
52 | import json
53 | DEBUG = 0
54 |
55 | description = """Get the status of a Transkribus job.
56 | """ + _Trnskrbs_description
57 |
58 | usage = """%s
59 | """%sys.argv[0]
60 |
61 | class DoDeleteJob(TranskribusClient):
62 |
63 | sDefaultServerUrl = _Trnskrbs_default_url
64 |
65 | #--- INIT -------------------------------------------------------------------------------------------------------------
66 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
67 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
68 |
69 |
70 |
71 | if __name__ == '__main__':
72 | version = "v.01"
73 |
74 | #prepare for the parsing of the command line
75 | parser = OptionParser(usage=usage, version=version)
76 | parser.description = description
77 |
78 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
79 | __Trnskrbs_basic_options(parser, DoDeleteJob.sDefaultServerUrl)
80 |
81 | # ---
82 | #parse the command line
83 | (options, args) = parser.parse_args()
84 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
85 |
86 | # ---
87 | #source collection(s)
88 | try:
89 | jobid = int(args[0])
90 | except Exception as e:
91 | _exit(usage, 1, e)
92 |
93 | # ---
94 | doer = DoDeleteJob(options.server, proxies, loggingLevel=logging.INFO)
95 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
96 |
97 | # ---
98 | # do the job...
99 | try:
100 | resp = doer.getJobStatus(jobid)
101 | except Exception as e: _exit("", 1, e)
102 | # traceln( json.dumps(resp, sort_keys=True, indent=4, separators=(',', ': ')))
103 | traceln( json.dumps(resp['description'], sort_keys=True, indent=4, separators=(',', ': ')))
104 | # traceln( json.dumps(resp['jobData'].split('\n')[1:], sort_keys=True, indent=4, separators=(',', ': ')))
105 |
106 |
107 |
108 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_htrHmm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 |
6 | JL Meunier - Dec 2016
7 |
8 |
9 | Copyright Xerox(C) 2016 JL. Meunier
10 |
11 | This program is free software: you can redistribute it and/or modify
12 | it under the terms of the GNU General Public License as published by
13 | the Free Software Foundation, either version 3 of the License, or
14 | (at your option) any later version.
15 |
16 | This program is distributed in the hope that it will be useful,
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | GNU General Public License for more details.
20 |
21 | You should have received a copy of the GNU General Public License
22 | along with this program. If not, see .
23 |
24 |
25 | Developed for the EU project READ. The READ project has received funding
26 | from the European Union’s Horizon 2020 research and innovation programme
27 | under grant agreement No 674943.
28 |
29 | """
30 | from __future__ import absolute_import
31 | from __future__ import print_function
32 | from __future__ import unicode_literals
33 | # TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
34 |
35 |
36 | #optional: useful if you want to choose the logging level to something else than logging.WARN
37 | import sys, os, logging
38 | from optparse import OptionParser
39 | # import json
40 |
41 | try: #to ease the use without proper Python installation
42 | import TranskribusPyClient_version
43 | except ImportError:
44 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
45 | import TranskribusPyClient_version
46 |
47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
48 | from TranskribusPyClient.client import TranskribusClient
49 | from TranskribusPyClient.common.trace import traceln, trace
50 |
51 | DEBUG = 0
52 |
53 | description = """Apply an HTR model.
54 |
55 | The syntax for specifying the page range is:
56 | - one or several specifiers separated by a comma
57 | - one separator is a page number, or a range of page number, e.g. 3-8
58 | - Examples: 1 1,3,5 1-3 1,3,5-99,100
59 |
60 | """ + _Trnskrbs_description
61 |
62 | usage = """%s []
63 | """%sys.argv[0]
64 |
65 | class DoHtr(TranskribusClient):
66 | sDefaultServerUrl = _Trnskrbs_default_url
67 | #--- INIT -------------------------------------------------------------------------------------------------------------
68 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
69 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
70 |
71 | def run(self, sModelName, colId, docId, sPages):
72 | ret = self.rehtrDecode(colId, sModelName, docId, sPages)
73 | return ret
74 |
75 | if __name__ == '__main__':
76 | version = "v.01"
77 |
78 | #prepare for the parsing of the command line
79 | parser = OptionParser(usage=usage, version=version)
80 | parser.description = description
81 |
82 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
83 | __Trnskrbs_basic_options(parser, DoHtr.sDefaultServerUrl)
84 |
85 | # ---
86 | #parse the command line
87 | (options, args) = parser.parse_args()
88 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
89 |
90 | # ---
91 | doer = DoHtr(options.server, proxies, loggingLevel=logging.WARN)
92 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
93 | # ---
94 | try: sModelName = args.pop(0)
95 | except Exception as e: _exit(usage, 1, e)
96 | try: colId = int(args.pop(0))
97 | except Exception as e: _exit(usage, 1, e)
98 | try: docId = int(args.pop(0))
99 | except Exception as e: _exit(usage, 1, e)
100 | try: sPages = args.pop(0)
101 | except Exception as e: sPages = None
102 | if args: _exit(usage, 2, Exception("Extra arguments to the command"))
103 |
104 | # ---
105 | # do the job...
106 | jobid = doer.run(sModelName, colId, docId, sPages)
107 | traceln(jobid)
108 |
109 | traceln()
110 | traceln("- Done")
111 |
112 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_htrRnn.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 |
6 | JL Meunier - Dec 2016
7 |
8 |
9 | Copyright Xerox(C) 2016 JL. Meunier
10 |
11 | This program is free software: you can redistribute it and/or modify
12 | it under the terms of the GNU General Public License as published by
13 | the Free Software Foundation, either version 3 of the License, or
14 | (at your option) any later version.
15 |
16 | This program is distributed in the hope that it will be useful,
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | GNU General Public License for more details.
20 |
21 | You should have received a copy of the GNU General Public License
22 | along with this program. If not, see .
23 |
24 |
25 | Developed for the EU project READ. The READ project has received funding
26 | from the European Union’s Horizon 2020 research and innovation programme
27 | under grant agreement No 674943.
28 |
29 | """
30 | from __future__ import absolute_import
31 | from __future__ import print_function
32 | from __future__ import unicode_literals
33 | # TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
34 |
35 |
36 | #optional: useful if you want to choose the logging level to something else than logging.WARN
37 | import sys, os, logging
38 | from optparse import OptionParser
39 | from io import open
40 |
41 | import json
42 |
43 | try: #to ease the use without proper Python installation
44 | import TranskribusPyClient_version
45 | except ImportError:
46 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
47 | import TranskribusPyClient_version
48 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
49 | from TranskribusPyClient.client import TranskribusClient
50 |
51 | from TranskribusCommands.do_transcript import DoTranscript
52 |
53 | from TranskribusPyClient.common.IntegerRange import IntegerRange
54 | from TranskribusPyClient.TRP_FullDoc import TRP_FullDoc
55 |
56 | from TranskribusPyClient.common.trace import traceln, trace
57 |
58 | DEBUG = 0
59 |
60 | description = """Apply an HTR RNN model.
61 |
62 | The syntax for specifying the page range is:
63 | - one or several specifiers separated by a comma
64 | - one separator is a page number, or a range of page number, e.g. 3-8
65 | - Examples: 1 1,3,5 1-3 1,3,5-99,100
66 | """ + _Trnskrbs_description
67 |
68 | usage = """%s (--trp TRP_FILE | --docid DOCID)
69 | """%sys.argv[0]
70 |
71 | class DoHtrRnn(TranskribusClient):
72 | """
73 | 10/16/2017: at region level
74 | {"docId":2278,"pageList":{"pages":[{"pageId":10070,"tsId":25143,"regionIds":["r2","r1"]}]}}
75 |
76 | Our client sends it like this:
77 |
78 | 3 > POST
79 | https://transkribus.eu/TrpServerTesting/rest/recognition/2/241/htrCITlab?id=2278
80 | 3 > Accept: text/plain
81 | ...
82 | 3 > Content-Type: application/json
83 | 3 > Cookie: $Version=1;JSESSIONID=....
84 | {"docId":2278,"pageList":{"pages":[{"pageId":10070,"tsId":25143,"regionIds":["r2","r1"]}]}}
85 |
86 |
87 | """
88 | sDefaultServerUrl = _Trnskrbs_default_url
89 | #--- INIT -------------------------------------------------------------------------------------------------------------
90 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
91 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
92 |
93 | self._trpMng = DoTranscript(self.sDefaultServerUrl, sHttpProxy=sHttpProxy, loggingLevel=loggingLevel)
94 |
95 | def run(self, sModelID, colId, docId, sDescPages,bPyLaia):
96 | ret = self.htrRnnDecode(colId, sModelID, docId, sDescPages,bPyLaia)
97 | return ret
98 |
99 | def buildDescription(self,colId,docpage,trp=None):
100 | """
101 | '{"docId":17442,"pageList":{"pages":[{"pageId":400008,"tsId":1243509,"regionIds":[]}]}}'
102 | """
103 | jsonDesc = {}
104 |
105 | if trp is None:
106 | try: docId,pageRange= docpage.split('/')
107 | except ValueError: docId=docpage; pageRange = ""
108 | jsonDesc["docId"]=docId
109 | oPageRange = IntegerRange(pageRange)
110 | trpObj = self._trpMng.filter(colId,docId,page_filter=oPageRange,bLast=True)
111 | else:
112 | trpObj = TRP_FullDoc(trp)
113 | jsonDesc["pageList"]={}
114 | # pList= trpObj.getTranscriptList()
115 | jsonDesc["pageList"]['pages']= []
116 | for page in trpObj.getPageList():
117 | docId = page['docId']
118 | jsonDesc["docId"]=page['docId']
119 | jsonDesc["pageList"]['pages'].append({"pageId":page['pageId'],"tsId":page['tsList']['transcripts'][0]['tsId'],"regionIds":[]})
120 |
121 |
122 | # return jsonDesc["docId"], json.dumps(jsonDesc,encoding='utf-8')
123 | return jsonDesc["docId"], json.dumps(jsonDesc)
124 |
125 | if __name__ == '__main__':
126 | version = "v.01"
127 |
128 | #prepare for the parsing of the command line
129 | parser = OptionParser(usage=usage, version=version)
130 | parser.description = description
131 |
132 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
133 | __Trnskrbs_basic_options(parser, DoHtrRnn.sDefaultServerUrl)
134 |
135 | parser.add_option("-r", "--region" , dest='region', action="store", type="string", default=DoHtrRnn.sDefaultServerUrl, help="apply HTR at region level")
136 | parser.add_option("--trp" , dest='trp_doc', action="store", type="string",default=None, help="use trp doc file")
137 | parser.add_option("--docid" , dest='docid' , action="store", type="string", default=None, help="document/pages to be htr'd")
138 | parser.add_option("--tempdict" , dest='dictTemp' , action="store_true", default=False, help="use tempDict folder")
139 | parser.add_option("--pylaia" , dest='bPylaia' , action="store_true", default=True, help="use PyLaia model")
140 |
141 | # ---
142 | #parse the command line
143 | (options, args) = parser.parse_args()
144 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
145 |
146 | # ---
147 | doer = DoHtrRnn(options.server, proxies, loggingLevel=logging.WARN)
148 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
149 | doer._trpMng.setSessionId(doer._sessionID)
150 |
151 | # ---
152 |
153 | try: sModelID = args.pop(0)
154 | except Exception as e: _exit(usage, 1, e)
155 | #try: sDictName = args.pop(0)
156 | #except Exception as e: _exit(usage, 1, e)
157 | try: colId = int(args.pop(0))
158 | except Exception as e: _exit(usage, 1, e)
159 | # try: docId = int(args.pop(0))
160 | # except Exception as e: _exit(usage, 1, e)
161 | # try: sPages = args.pop(0)
162 | # except Exception as e: sPages = None
163 |
164 | if args: _exit(usage, 2, Exception("Extra arguments to the command"))
165 |
166 | if options.trp_doc:
167 | trpdoc = json.load(open(options.trp_doc, "r",encoding='utf-8'))
168 | docId,sPageDesc = doer.buildDescription(colId,options.docid,trpdoc)
169 | else:
170 | docId,sPageDesc = doer.buildDescription(colId,options.docid)
171 |
172 | # do the job...
173 | #jobid = doer.run(sModelID, sDictName, colId, docId, sPageDesc,options.bPylaia,options.dictTemp)
174 | jobid = doer.run(sModelID, colId, docId, sPageDesc,options.bPylaia)
175 | traceln(jobid)
176 |
177 | traceln()
178 | traceln("- Done")
179 |
180 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_htrRnnPerRegion.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 |
6 | Hervé Déjean
7 |
8 | Copyright NLE(C) 2017
9 |
10 | This program is free software: you can redistribute it and/or modify
11 | it under the terms of the GNU General Public License as published by
12 | the Free Software Foundation, either version 3 of the License, or
13 | (at your option) any later version.
14 |
15 | This program is distributed in the hope that it will be useful,
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | GNU General Public License for more details.
19 |
20 | You should have received a copy of the GNU General Public License
21 | along with this program. If not, see .
22 |
23 |
24 | Developed for the EU project READ. The READ project has received funding
25 | from the European Union’s Horizon 2020 research and innovation programme
26 | under grant agreement No 674943.
27 |
28 | """
29 |
30 | # TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
31 |
32 |
33 | #optional: useful if you want to choose the logging level to something else than logging.WARN
34 | import sys, os, logging
35 | from optparse import OptionParser
36 | from io import open
37 | import json
38 |
39 | try: #to ease the use without proper Python installation
40 | import TranskribusPyClient_version
41 | except ImportError:
42 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
43 | import TranskribusPyClient_version
44 |
45 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
46 | from TranskribusPyClient.client import TranskribusClient
47 |
48 | from do_transcript import DoTranscript
49 |
50 | from TranskribusPyClient.common.IntegerRange import IntegerRange
51 | from TranskribusPyClient.TRP_FullDoc import TRP_FullDoc
52 |
53 | from TranskribusPyClient.common.trace import traceln, trace
54 |
55 | DEBUG = 0
56 |
57 | description = """Apply an HTR RNN model for a given table column with a specific dictionary.
58 |
59 | """ + _Trnskrbs_description
60 |
61 | usage = """%s [--trp] [--docid] [--colnum] [--dict] [--tempdict]
62 | """%sys.argv[0]
63 |
64 | class DoHtrRnnPerColumn(TranskribusClient):
65 | """
66 | 10/16/2017: at region level
67 | {"docId":2278,"pageList":{"pages":[{"pageId":10070,"tsId":25143,"regionIds":["r2","r1"]}]}}
68 |
69 | Our client sends it like this:
70 |
71 | 3 > POST
72 | https://transkribus.eu/TrpServerTesting/rest/recognition/2/241/htrCITlab?id=2278
73 | 3 > Accept: text/plain
74 | ...
75 | 3 > Content-Type: application/json
76 | 3 > Cookie: $Version=1;JSESSIONID=....
77 | {"docId":2278,"pageList":{"pages":[{"pageId":10070,"tsId":25143,"regionIds":["r2","r1"]}]}}
78 |
79 |
80 | """
81 | sDefaultServerUrl = _Trnskrbs_default_url
82 | #--- INIT -------------------------------------------------------------------------------------------------------------
83 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
84 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
85 |
86 | self._trpMng = DoTranscript(self.sDefaultServerUrl, sHttpProxy=sHttpProxy, loggingLevel=loggingLevel)
87 |
88 | def run(self, sModelID, sDictName, colId, docId,sDescPages,bDictTemp):
89 | """
90 |
91 | """
92 | ret = self.htrRnnDecode(colId, sModelID, sDictName, docId, sDescPages,bDictTemp)
93 | return ret
94 |
95 | def buildDescription(self,colId,docpage,colnum,trp=None):
96 | """
97 | '{"docId":17442,"pageList":{"pages":[{"pageId":400008,"tsId":1243509,"regionIds":[]}]}}'
98 | """
99 | # return 17442,'{"docId":17442,"pageList":{"pages":[{"pageId":400008,"tsId":1243509,"regionIds":[]}]}}'.encode('utf-8')
100 | jsonDesc = {}
101 |
102 | if trp is None:
103 | try: docId,pageRange= docpage.split('/')
104 | except ValueError: docId=docpage; pageRange = ""
105 | jsonDesc["docId"]=docId
106 | oPageRange = IntegerRange(pageRange)
107 | trpObj = self._trpMng.filter(colId,docId,page_filter=oPageRange,bLast=True)
108 | else:
109 | trpObj = TRP_FullDoc(trp)
110 | jsonDesc["pageList"]={}
111 | # pList= trpObj.getTranscriptList()
112 | jsonDesc["pageList"]['pages']= []
113 | for page in trpObj.getPageList():
114 | ## need to upload the page!!!!
115 | regionsIDs=[]
116 | docId = page['docId']
117 | jsonDesc["docId"]=page['docId']
118 | jsonDesc["pageList"]['pages'].append({"pageId":page['pageId'],"tsId":page['tsList']['transcripts'][0]['tsId'],"regionIds":regionsIDs})
119 |
120 |
121 | # return jsonDesc["docId"], json.dumps(jsonDesc,encoding='utf-8')
122 | return jsonDesc["docId"], json.dumps(jsonDesc)
123 |
124 | if __name__ == '__main__':
125 | version = "v.01"
126 |
127 | #prepare for the parsing of the command line
128 | parser = OptionParser(usage=usage, version=version)
129 | parser.description = description
130 |
131 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
132 | __Trnskrbs_basic_options(parser, DoHtrRnnPerColumn.sDefaultServerUrl)
133 |
134 | parser.add_option("-r", "--region" , dest='region', action="store", type="string", default=DoHtrRnnPerColumn.sDefaultServerUrl, help="apply HTR at region level")
135 | parser.add_option("--trp" , dest='trp_doc', action="store", type="string",default=None, help="use trp doc file")
136 | parser.add_option("--docid" , dest='docid' , action="store", type="string", default=None, help="document/pages to be htr'd")
137 | parser.add_option("--colnum" , dest='colnum' , action="store", type="string", default=None, help="column to be htr'd")
138 | parser.add_option("--tempdict" , dest='dictTemp' , action="store_true", default=False, help="use tempDict folder")
139 | # ---
140 | #parse the command line
141 | (options, args) = parser.parse_args()
142 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
143 |
144 | # ---
145 | doer = DoHtrRnnPerColumn(options.server, proxies, loggingLevel=logging.WARN)
146 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
147 | doer._trpMng.setSessionId(doer._sessionID)
148 |
149 | # ---
150 |
151 | try: sModelID = args.pop(0)
152 | except Exception as e: _exit(usage, 1, e)
153 | try: sDictName = args.pop(0)
154 | except Exception as e: _exit(usage, 1, e)
155 | try: colId = int(args.pop(0))
156 | except Exception as e: _exit(usage, 1, e)
157 | # try: docId = int(args.pop(0))
158 | # except Exception as e: _exit(usage, 1, e)
159 | # try: sPages = args.pop(0)
160 | # except Exception as e: sPages = None
161 |
162 | if args: _exit(usage, 2, Exception("Extra arguments to the command"))
163 |
164 | if options.trp_doc:
165 | trpdoc = json.load(open(options.trp_doc, "rb",encoding='utf-8'))
166 | docId,sPageDesc = doer.buildDescription(colId,options.docid,options.colnum,trpdoc)
167 | else:
168 | docId,sPageDesc = doer.buildDescription(colId,options.docid,options.colnum)
169 |
170 | # do the job...
171 | jobid = doer.run(sModelID, sDictName, colId, docId,sPageDesc,options.dictTemp)
172 | traceln(jobid)
173 |
174 | traceln()
175 | traceln("- Done")
176 |
177 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_listCollec.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | List the content of a collection
6 |
7 | JL Meunier - Nov 2016
8 |
9 |
10 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 |
12 | This program is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 | This program is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU General Public License for more details.
21 |
22 | You should have received a copy of the GNU General Public License
23 | along with this program. If not, see .
24 |
25 |
26 | Developed for the EU project READ. The READ project has received funding
27 | from the European Union’s Horizon 2020 research and innovation programme
28 | under grant agreement No 674943.
29 |
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import print_function
33 | from __future__ import unicode_literals
34 |
35 | # TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
36 |
37 | import json
38 |
39 | #optional: useful if you want to choose the logging level to something else than logging.WARN
40 | import sys, os, logging
41 | from optparse import OptionParser
42 |
43 | try: #to ease the use without proper Python installation
44 | import TranskribusPyClient_version
45 | except ImportError:
46 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
47 | import TranskribusPyClient_version
48 |
49 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
50 | from TranskribusPyClient.client import TranskribusClient
51 | from TranskribusPyClient.common.trace import traceln, trace
52 |
53 | DEBUG = 0
54 |
55 | description = """List the content of one or several Transkribus collection.
56 | """ + _Trnskrbs_description
57 |
58 | usage = """%s +
59 | """%sys.argv[0]
60 |
61 | class DoListCollec(TranskribusClient):
62 | """
63 | List the content of a collection
64 | """
65 | sDefaultServerUrl = _Trnskrbs_default_url
66 | #--- INIT -------------------------------------------------------------------------------------------------------------
67 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
68 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
69 |
70 | def run(self, colId, options):
71 | """
72 |
73 | [{u'collectionList': {u'colList': [{u'colId': 3571,
74 | u'colName': u'READDU',
75 | u'description': u'created by herve.dejean@xrce.xerox.com'}]},
76 | u'createdFromTimestamp': 33175290,
77 | u'createdToTimestamp': 33175290,
78 | u'docId': 7749,
79 | u'fimgStoreColl': u'TrpDoc_DEA_7749',
80 | u'nrOfPages': 10,
81 | u'scriptType': u'HANDWRITTEN',
82 | u'status': 0,
83 | u'title': u'MM_1_001',
84 | u'uploadTimestamp': 1478161395893L,
85 | u'uploader': u'herve.dejean@xrce.xerox.com',
86 | u'uploaderId': 275},
87 | {u'collectionList': {u'colList': [{u'colId': 3571,
88 | u'colName': u'READDU',
89 | u'description': u'created by herve.dejean@xrce.xerox.com'}]},
90 | u'createdFromTimestamp': 0,
91 | u'createdToTimestamp': 0,
92 | u'docId': 7750,
93 | u'fimgStoreColl': u'TrpDoc_DEA_7750',
94 | u'nrOfPages': 10,
95 | u'scriptType': u'HANDWRITTEN',
96 | u'status': 0,
97 | u'title': u'MM_1_005',
98 | u'uploadTimestamp': 1478161451242L,
99 | u'uploader': u'herve.dejean@xrce.xerox.com',
100 | u'uploaderId': 275}]
101 |
102 | """
103 | bRaw=options.bRaw
104 | data = self.listDocsByCollectionId(colId)
105 | if options.trp:
106 | with open(options.trp, "wt",) as fd: json.dump(data, fd, indent=2)
107 | if bRaw:
108 | while data:
109 | dic = data.pop(0)
110 | print (dic[u'docId'])
111 | else:
112 | if data:
113 | _d = data[0][u'collectionList'][u'colList'][-1]
114 | print( "Collection: %s (%s)"%(_d[u'colName'], _d[u'colId']))
115 |
116 | while data:
117 | dic = data.pop(0)
118 | print (">> (%s) #p=%d '%s' by %s (status=%s)" % (dic[u'docId'], dic[u'nrOfPages'], dic[u'title'], dic[u'uploader'], dic[u'status']))
119 | else:
120 | print (">> Collection is empty!")
121 |
122 |
123 |
124 | if __name__ == '__main__':
125 | version = "v.01"
126 |
127 | #prepare for the parsing of the command line
128 | parser = OptionParser(usage=usage, version=version)
129 | parser.description = description
130 |
131 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
132 | __Trnskrbs_basic_options(parser, DoListCollec.sDefaultServerUrl)
133 |
134 | parser.add_option("--raw", dest='bRaw', action="store_true", default=False, help="Raw output, one docid per line")
135 | parser.add_option("--trp" , dest='trp' , action="store", type="string", default=None, help="Store the TRP data reflecting the documents in the given file.")
136 |
137 | # ---
138 | #parse the command line
139 | (options, args) = parser.parse_args()
140 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
141 |
142 | # ---
143 | #source collection(s)
144 | try:
145 | lColId = [ int(arg) for arg in args ]
146 | except Exception as e:
147 | _exit(usage, 1, e)
148 |
149 | # ---
150 | doer = DoListCollec(options.server, proxies, loggingLevel=logging.INFO)
151 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
152 | # ---
153 | # do the job...
154 | for colId in lColId:
155 | doer.run(colId, options)
156 | try:
157 | doer.run(colId, options)
158 | except Exception as e:
159 | traceln()
160 | traceln("ERROR: could not list collection '%d' "%colId)
161 | _exit("", 1, e)
162 | if not options.bRaw:
163 | traceln()
164 | traceln("- Done for %d collection(s)"%len(lColId))
165 |
166 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_listHtrHmm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | List the HTR Models
6 |
7 | JL Meunier - Dec 2016
8 |
9 |
10 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 |
12 | This program is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 | This program is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU General Public License for more details.
21 |
22 | You should have received a copy of the GNU General Public License
23 | along with this program. If not, see .
24 |
25 |
26 | Developed for the EU project READ. The READ project has received funding
27 | from the European Union’s Horizon 2020 research and innovation programme
28 | under grant agreement No 674943.
29 |
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import print_function
33 | from __future__ import unicode_literals
34 | # TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
35 |
36 |
37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
38 | import sys, os, logging
39 | from optparse import OptionParser
40 | # import json
41 |
42 | try: #to ease the use without proper Python installation
43 | import TranskribusPyClient_version
44 | except ImportError:
45 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
46 | import TranskribusPyClient_version
47 |
48 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, strTabularFormat
49 | from TranskribusPyClient.client import TranskribusClient
50 | from TranskribusPyClient.common.trace import traceln, trace
51 |
52 | DEBUG = 0
53 |
54 | description = """List HTR models available in Transkribus.
55 | """ + _Trnskrbs_description
56 |
57 | usage = """%s
58 | """%sys.argv[0]
59 |
60 | class DoListHtrModels(TranskribusClient):
61 | sDefaultServerUrl = _Trnskrbs_default_url
62 | #--- INIT -------------------------------------------------------------------------------------------------------------
63 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
64 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
65 |
66 | def run(self):
67 | lDic = self.listHmmHtrModels()
68 | #traceln(json.dumps(data, indent=4))
69 | traceln( strTabularFormat(lDic, ["modelName", "modelId", "isUsableInTranskribus", "nrOfTokens", "nrOfDictTokens", "nrOfLines"], "modelName") )
70 | return lDic
71 |
72 | if __name__ == '__main__':
73 | version = "v.01"
74 |
75 | #prepare for the parsing of the command line
76 | parser = OptionParser(usage=usage, version=version)
77 | parser.description = description
78 |
79 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
80 | __Trnskrbs_basic_options(parser, DoListHtrModels.sDefaultServerUrl)
81 |
82 | # ---
83 | #parse the command line
84 | (options, args) = parser.parse_args()
85 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
86 |
87 | # ---
88 | doer = DoListHtrModels(options.server, proxies, loggingLevel=logging.WARN)
89 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
90 |
91 | # ---
92 | # do the job...
93 | doer.run()
94 |
95 | traceln()
96 | traceln("- Done")
97 |
98 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_listHtrRnn.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | List the HTR RNN Models and Dictionaries
6 |
7 | JL Meunier - Dec 2016
8 |
9 |
10 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 |
12 | This program is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 | This program is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU General Public License for more details.
21 |
22 | You should have received a copy of the GNU General Public License
23 | along with this program. If not, see .
24 |
25 |
26 | Developed for the EU project READ. The READ project has received funding
27 | from the European Union’s Horizon 2020 research and innovation programme
28 | under grant agreement No 674943.
29 |
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import print_function
33 | from __future__ import unicode_literals
34 |
35 | # TranskribusCommands/do_copyDocToCollec.py 3571 3820 8251 8252
36 |
37 |
38 | #optional: useful if you want to choose the logging level to something else than logging.WARN
39 | import sys, os, logging
40 | from optparse import OptionParser
41 | # import json
42 |
43 | try: #to ease the use without proper Python installation
44 | import TranskribusPyClient_version
45 | except ImportError:
46 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
47 | import TranskribusPyClient_version
48 |
49 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit, strTabularFormat
50 | from TranskribusPyClient.client import TranskribusClient
51 | from TranskribusPyClient.common.trace import traceln, trace
52 |
53 | DEBUG = 0
54 |
55 | description = """List HTR RNN models and dictionaries available in Transkribus.
56 | """ + _Trnskrbs_description
57 |
58 | usage = """%s
59 | """%sys.argv[0]
60 |
61 | class DoListHtrRnn(TranskribusClient):
62 | sDefaultServerUrl = _Trnskrbs_default_url
63 | #--- INIT -------------------------------------------------------------------------------------------------------------
64 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
65 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
66 |
67 | def run(self,colid=None,bListDict=False):
68 | """
69 | 2 textual lists
70 | """
71 | sModels=None
72 | sColModels=None
73 | sDicts = None
74 | if colid is not None:
75 | sColModels = self.listRnns(colid)
76 | for models in sColModels:
77 | #print(models.keys())
78 | #some old? models do not have params field
79 | #try: traceln("%s\t%s\t%s\ndescription:%s" % (models['htrId'],models['name'].strip(),models['params'].strip(),models['description'].strip()))
80 | try: traceln("%s\t%s\t%s\ndescription:%s" % (models['htrId'],models['name'].strip(),models['provider'].strip(),models['description'].strip()))
81 | except KeyError: traceln("%s\t%s\tno params" % (models['htrId'],models['name']))
82 | traceln()
83 | else:
84 | sModels = self.listRnnsText()
85 | traceln("\n--- Models ---------------------------")
86 | traceln(sModels)
87 |
88 | if bListDict:
89 | sDicts = self.listDictsText()
90 | traceln("\n--- Dictionaries ---------------------")
91 | traceln(sDicts)
92 |
93 | return sModels, sColModels, sDicts
94 |
95 | if __name__ == '__main__':
96 | version = "v.01"
97 |
98 | #prepare for the parsing of the command line
99 | parser = OptionParser(usage=usage, version=version)
100 | parser.description = description
101 | parser.add_option("--colid", dest='colid', type='string', default=None, help = 'get models linked to the colid')
102 | parser.add_option("--dict", dest='dict', action='store_true', default=False, help = 'get dictionaries')
103 |
104 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
105 | __Trnskrbs_basic_options(parser, DoListHtrRnn.sDefaultServerUrl)
106 |
107 | # ---
108 | #parse the command line
109 | (options, args) = parser.parse_args()
110 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
111 | # ---
112 | doer = DoListHtrRnn(options.server, proxies, loggingLevel=logging.WARN)
113 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
114 |
115 | # ---
116 | # do the job...
117 | doer.run(options.colid,options.dict)
118 |
119 | traceln()
120 | traceln("- Done")
121 |
122 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_listPageLocks.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | list the locks for a colid/docid/page
6 |
7 | H. Déjean - Nov 2016
8 |
9 |
10 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 |
12 | This program is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 | This program is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU General Public License for more details.
21 |
22 | You should have received a copy of the GNU General Public License
23 | along with this program. If not, see .
24 |
25 |
26 | Developed for the EU project READ. The READ project has received funding
27 | from the European Union’s Horizon 2020 research and innovation programme
28 | under grant agreement No 674943.
29 |
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import print_function
33 | from __future__ import unicode_literals
34 | # TranskribusCommands/do_ListPageLocks.py
35 |
36 |
37 | #optional: useful if you want to choose the logging level to something else than logging.WARN
38 | import sys, os, logging
39 | from optparse import OptionParser
40 |
41 | try: #to ease the use without proper Python installation
42 | import TranskribusPyClient_version
43 | except ImportError:
44 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
45 | import TranskribusPyClient_version
46 |
47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
48 | from TranskribusPyClient.client import TranskribusClient
49 | from TranskribusPyClient.common.trace import traceln, trace
50 |
51 | DEBUG = 0
52 |
53 | description = """list the locked pages.
54 | """ + _Trnskrbs_description
55 |
56 | usage = """%s
57 | """%sys.argv[0]
58 |
59 | class listPageLocks(TranskribusClient):
60 |
61 | sDefaultServerUrl = _Trnskrbs_default_url
62 |
63 | #--- INIT -------------------------------------------------------------------------------------------------------------
64 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
65 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
66 |
67 |
68 |
69 | if __name__ == '__main__':
70 | version = "v.01"
71 |
72 | #prepare for the parsing of the command line
73 | parser = OptionParser(usage=usage, version=version)
74 | parser.description = description
75 |
76 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
77 | __Trnskrbs_basic_options(parser, listPageLocks.sDefaultServerUrl)
78 |
79 | # ---
80 | #parse the command line
81 | (options, args) = parser.parse_args()
82 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
83 |
84 | # ---
85 | #source collection(s)
86 | try:
87 | colid = int(args[0])
88 | except Exception as e:
89 | _exit(usage, 1, e)
90 | try:
91 | docid = int(args[0])
92 | except Exception as e:
93 | _exit(usage, 1, e)
94 | try:
95 | page = int(args[0])
96 | except Exception as e:
97 | _exit(usage, 1, e)
98 |
99 | # ---
100 | doer = listPageLocks(options.server, proxies, loggingLevel=logging.INFO)
101 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
102 |
103 | # ---
104 | # do the job...
105 | try:
106 | resp = doer.getListofLockedPages(colid, docid, page)
107 | except Exception as e: _exit("", 1, e)
108 | traceln(resp)
109 | traceln("- Done")
110 |
111 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_login.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | Utility to login into Transkribus and store the sessionId in a secure way for next commands
6 |
7 | JL Meunier - Nov 2016
8 |
9 |
10 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 |
12 | This program is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 | This program is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU General Public License for more details.
21 |
22 | You should have received a copy of the GNU General Public License
23 | along with this program. If not, see .
24 |
25 |
26 | Developed for the EU project READ. The READ project has received funding
27 | from the European Union’s Horizon 2020 research and innovation programme
28 | under grant agreement No 674943.
29 |
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import print_function
33 | from __future__ import unicode_literals
34 | #optional: useful if you want to choose the logging level to something else than logging.WARN
35 | import sys, os, logging
36 | from optparse import OptionParser
37 |
38 | try: #to ease the use without proper Python installation
39 | import TranskribusPyClient_version
40 | except ImportError:
41 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
42 | import TranskribusPyClient_version
43 |
44 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, _exit
45 | from TranskribusPyClient.client import TranskribusClient, getStoredCredentials
46 | from TranskribusPyClient.common.trace import traceln, trace
47 |
48 | DEBUG = 0
49 |
50 | description = """Login into Transkribus to avoid the need for login in next commands (until the session expires).
51 | """ + _Trnskrbs_description
52 |
53 | usage = """%s"""%sys.argv[0]
54 |
55 | class DoLogin(TranskribusClient):
56 | """
57 | Download a Transkribus collection as a DS structured dataset
58 | """
59 | sDefaultServerUrl = _Trnskrbs_default_url
60 |
61 | #--- INIT -------------------------------------------------------------------------------------------------------------
62 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
63 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
64 |
65 |
66 | if __name__ == '__main__':
67 | version = "v.01"
68 |
69 | #prepare for the parsing of the command line
70 | parser = OptionParser(usage=usage, version=version)
71 | parser.description = description
72 |
73 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
74 | __Trnskrbs_basic_options(parser, DoLogin.sDefaultServerUrl)
75 |
76 | #parse the command line
77 | (options, args) = parser.parse_args()
78 |
79 | # ---
80 | #credentials and proxy
81 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
82 |
83 | if options.login:
84 | login, pwd = options.login, options.pwd
85 | else:
86 | trace("- no login provided, looking for stored credentials... ")
87 | login, pwd = getStoredCredentials(bAsk=False)
88 | traceln("OK")
89 |
90 | # ------------------------------------------------------------------------------------------------
91 |
92 | doer = DoLogin(options.server, proxies, loggingLevel=logging.INFO)
93 |
94 | try:
95 | if options.persist:
96 | traceln("- Logging onto Transkribus as %s and making a persistent session"%login)
97 | doer.cleanPersistentSession()
98 | resp = doer.auth_login(login, pwd, bPersist=options.persist)
99 | traceln("\t --> %s"%os.path.join(DoLogin._sSESSION_FOLDER, DoLogin._sSESSION_FILENAME))
100 | else:
101 | trace("- Checking Transkribus login as %s "%login)
102 | resp = doer.auth_login(login, pwd, bPersist=options.persist)
103 | traceln(" OK!")
104 | except Exception as e: _exit("", 1, e)
105 |
106 | traceln("- Done")
107 |
108 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_logout.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 | Utility to remove any persistent session from the disk
6 |
7 | JL Meunier - Nov 2016
8 |
9 |
10 | Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
11 |
12 | This program is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 | This program is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU General Public License for more details.
21 |
22 | You should have received a copy of the GNU General Public License
23 | along with this program. If not, see .
24 |
25 |
26 | Developed for the EU project READ. The READ project has received funding
27 | from the European Union’s Horizon 2020 research and innovation programme
28 | under grant agreement No 674943.
29 |
30 | """
31 | from __future__ import absolute_import
32 | from __future__ import print_function
33 | from __future__ import unicode_literals
34 |
35 | #optional: useful if you want to choose the logging level to something else than logging.WARN
36 | import sys, os, logging
37 | from optparse import OptionParser
38 |
39 | try: #to ease the use without proper Python installation
40 | import TranskribusPyClient_version
41 | except ImportError:
42 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
43 | import TranskribusPyClient_version
44 |
45 | from TranskribusPyClient.common.trace import traceln, trace
46 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
47 | from TranskribusPyClient.client import TranskribusClient
48 |
49 | DEBUG = 0
50 |
51 | description = """Remove any persistent session from disk.
52 | """ + _Trnskrbs_description
53 |
54 | usage = """%s"""%sys.argv[0]
55 |
56 | class DoLogout(TranskribusClient):
57 | """
58 | Add a document to another collection.
59 | """
60 | sDefaultServerUrl = _Trnskrbs_default_url
61 |
62 | #--- INIT -------------------------------------------------------------------------------------------------------------
63 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
64 | TranskribusClient.__init__(self, sServerUrl=_Trnskrbs_default_url, proxies=sHttpProxy, loggingLevel=loggingLevel)
65 |
66 |
67 | if __name__ == '__main__':
68 | version = "v.01"
69 |
70 | #prepare for the parsing of the command line
71 | parser = OptionParser(usage=usage, version=version)
72 | parser.description = description
73 |
74 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
75 | __Trnskrbs_basic_options(parser, DoLogout.sDefaultServerUrl)
76 |
77 | #parse the command line
78 | (options, args) = parser.parse_args()
79 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
80 | # ------------------------------------------------------------------------------------------------
81 | doer = DoLogout(options.server, proxies, loggingLevel=logging.INFO)
82 | try:
83 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
84 | except:
85 | pass
86 |
87 | try:
88 | traceln('- cleaning any persistent session.')
89 | doer.auth_logout()
90 | except Exception as e:
91 | pass
92 | #_exit("", 1, e)
93 |
94 | traceln("- Done" )
95 |
96 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_tableTemplate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 |
6 | Hervé Déjean - Jan 2017
7 |
8 |
9 | Copyright Xerox(C) 2016
10 |
11 | This program is free software: you can redistribute it and/or modify
12 | it under the terms of the GNU General Public License as published by
13 | the Free Software Foundation, either version 3 of the License, or
14 | (at your option) any later version.
15 |
16 | This program is distributed in the hope that it will be useful,
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | GNU General Public License for more details.
20 |
21 | You should have received a copy of the GNU General Public License
22 | along with this program. If not, see .
23 |
24 |
25 | Developed for the EU project READ. The READ project has received funding
26 | from the European Union’s Horizon 2020 research and innovation programme
27 | under grant agreement No 674943.
28 |
29 |
30 | see https://transkribus.eu/wiki/index.php/HTR
31 | """
32 | from __future__ import absolute_import
33 | from __future__ import print_function
34 | from __future__ import unicode_literals
35 |
36 | # TranskribusCommands/do_htrTrainRnn model-name colId docid pages
37 |
38 |
39 | #optional: useful if you want to choose the logging level to something else than logging.WARN
40 | import sys, os, logging
41 | from optparse import OptionParser
42 | import json
43 | from lxml import etree
44 |
45 | try: #to ease the use without proper Python installation
46 | import TranskribusPyClient_version
47 | except ImportError:
48 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
49 | import TranskribusPyClient_version
50 |
51 | from TranskribusCommands import __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
52 |
53 | from do_analyzeLayout import DoLAbatch
54 | from TranskribusPyClient.common.IntegerRange import IntegerRange
55 | from TranskribusPyClient.TRP_FullDoc import TRP_FullDoc
56 | from TranskribusPyClient.common.trace import traceln, trace
57 |
58 |
59 | DEBUG = 0
60 |
61 | description = """Apply a table template to a list of pages
62 |
63 | The syntax for specifying the page range is:
64 | - one or several specifiers separated by a comma
65 | - one separator is a page number, or a range of page number, e.g. 3-8
66 | - Examples: 1 1,3,5 1-3 1,3,5-99,100
67 | """ + _Trnskrbs_description
68 |
69 |
70 | usage = """%s --templateID <>
71 | """%sys.argv[0]
72 |
73 | class DoTableTemplate(DoLAbatch):
74 |
75 |
76 | def run(self, templateID, colId, sDescription, sJobImpl):
77 | ret = self.tableMatching(templateID, colId, sDescription, sJobImpl)
78 | jobid= self.getJobIDsFromXMLStatuses(ret)
79 | return ret,jobid
80 |
81 |
82 | def jsonToXMLDescription(self,jsonDesc):
83 | """
84 | convert json description to XML
85 |
86 |
87 |
88 |
89 |
90 | 1
91 |
92 |
93 | 2
94 | 3
95 |
96 |
97 |
98 |
99 |
100 |
101 | templateId
102 | 1543
103 |
104 |
105 |
106 |
107 | """
108 | # s = '{"docId":17442,"pageList":{"pages":[{"pageId":400008,"tsId":1243509,"regionIds":[]}]}}'
109 | # s ='{"pageList": {"pages": [{"tsId": "1305027", "regionIds": [], "pageId": "478362"}]}, "docId": "18975"}'
110 | #
111 | jsonDesc=json.loads(jsonDesc)
112 |
113 | root = etree.Element("jobParameters")
114 | xmldesc= etree.ElementTree(root)
115 | root2 = etree.Element("jobParameters")
116 | root.append(root2)
117 |
118 | docList =etree.Element("docList")
119 | # root2.append(docList)
120 | root.append(docList)
121 |
122 | docs= etree.Element("docs")
123 | docList.append(docs)
124 |
125 | # docId
126 | node = etree.Element("docId")
127 | docs.append(node)
128 | node.text = str(jsonDesc["docId"])
129 |
130 | #pageList
131 | nodelp = etree.Element("pageList")
132 | docs.append(nodelp)
133 |
134 | for page in jsonDesc["pageList"]['pages']:
135 | nodep = etree.Element("pages")
136 | nodelp.append(nodep)
137 | pageId = etree.Element("pageId")
138 | pageId.text = str(page['pageId'])
139 | tsId=etree.Element("tsId")
140 | tsId.text= str(page['tsId'])
141 | # regId=etree.Element("regionIds")
142 | # regId.text = ''
143 | nodep.append(pageId)
144 | nodep.append(tsId)
145 | # nodep.append(regId)
146 |
147 | params= etree.Element('params')
148 | root.append(params)
149 |
150 | entry=etree.Element('entry')
151 | params.append(entry)
152 |
153 | key=etree.Element('key')
154 | key.text = 'templateId'
155 | entry.append(key)
156 |
157 | value=etree.Element('value')
158 | value.text= str(jsonDesc['template'])
159 | entry.append(value)
160 |
161 | return etree.tostring(xmldesc, encoding='utf-8',pretty_print=True)
162 |
163 | def buildDescription(self,colId,docpage,templateId,trp=None):
164 | """
165 |
166 |
167 |
168 |
169 | 1
170 |
171 |
172 | 2
173 | 3
174 |
175 |
176 |
177 |
178 |
179 |
180 | templateId
181 | 1543
182 |
183 |
184 |
185 | """
186 | jsonDesc = {}
187 |
188 | if trp is None:
189 | try: docId,pageRange= docpage.split('/')
190 | except ValueError: docId=docpage; pageRange = ""
191 | jsonDesc["docId"]=docId
192 | oPageRange = IntegerRange(pageRange)
193 | trpObj = self._trpMng.filter(colId,docId,page_filter=oPageRange,bLast=True)
194 | else:
195 | trpObj = TRP_FullDoc(trp)
196 | jsonDesc["pageList"]={}
197 | # pList= trpObj.getTranscriptList()
198 | jsonDesc["pageList"]['pages']= []
199 | jsonDesc['template'] = str(templateId)
200 | for page in trpObj.getPageList():
201 | docId = page['docId']
202 | jsonDesc["docId"]=page['docId']
203 | jsonDesc["pageList"]['pages'].append({"pageId":page['pageId'],"tsId":page['tsList']['transcripts'][0]['tsId'],"regionIds":[]})
204 |
205 | # return jsonDesc["docId"], json.dumps(jsonDesc,encoding='utf-8')
206 | return jsonDesc["docId"], json.dumps(jsonDesc)
207 |
208 | if __name__ == '__main__':
209 | version = "v.01"
210 | #prepare for the parsing of the command line
211 | parser = OptionParser(usage=usage, version=version)
212 | parser.description = description
213 |
214 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
215 | __Trnskrbs_basic_options(parser, DoTableTemplate.sDefaultServerUrl)
216 |
217 | parser.add_option("--trp" , dest='trp_doc', action="store", type="string",default=None, help="use trp doc file")
218 | parser.add_option("--templateID" , dest='templateID' , action="store", type="string" , help="template id")
219 | # parser.add_option("--batchjob" , dest='doBatchJob' , action="store_true", default=False, help="do one job per page")
220 |
221 | # ---
222 | #parse the command line
223 | (options, args) = parser.parse_args()
224 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
225 |
226 | # ---
227 | doer = DoTableTemplate(options.server, proxies, loggingLevel=logging.WARN)
228 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
229 | doer._trpMng.setSessionId(doer._sessionID)
230 |
231 | # ---
232 | try: colId = int(args.pop(0))
233 | except Exception as e: _exit(usage, 1, e)
234 | try: docidpages = args.pop(0)
235 | except Exception as e: _exit(usage, 1, e)
236 | if args: _exit(usage, 2, Exception("Extra arguments to the command"))
237 |
238 | # ---
239 | # do the job...
240 | if options.trp_doc:
241 | trpdoc = json.load(open(options.trp_doc, "r",encoding='utf-8'))
242 | docId,sPageDesc = doer.buildDescription(colId,docidpages,options.templateID,trpdoc)
243 | else:
244 | docId,sPageDesc = doer.buildDescription(colId,docidpages,options.templateID)
245 | # NcsrLaJob
246 | # CITlabAdvancedLaJob
247 | sPageDesc = doer.jsonToXMLDescription(sPageDesc)
248 | """
249 | do_tableTemplate.py --temp 6078228 23017 87023/14
250 | """
251 |
252 | # jobImpl = CvlTableJob
253 | status, jobid = doer.run(options.templateID,colId, sPageDesc,"CvlTableJob")
254 | traceln("job ID:",jobid)
255 | traceln("- Done")
256 |
257 |
--------------------------------------------------------------------------------
/src/TranskribusCommands/do_uploadDictionary.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 |
4 | """
5 |
6 | H Déjean
7 |
8 |
9 | Copyright NLE 2017
10 |
11 | This program is free software: you can redistribute it and/or modify
12 | it under the terms of the GNU General Public License as published by
13 | the Free Software Foundation, either version 3 of the License, or
14 | (at your option) any later version.
15 |
16 | This program is distributed in the hope that it will be useful,
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | GNU General Public License for more details.
20 |
21 | You should have received a copy of the GNU General Public License
22 | along with this program. If not, see .
23 |
24 |
25 | Developed for the EU project READ. The READ project has received funding
26 | from the European Union’s Horizon 2020 research and innovation programme
27 | under grant agreement No 674943.
28 |
29 | """
30 | from __future__ import absolute_import
31 | from __future__ import print_function
32 | from __future__ import unicode_literals
33 |
34 | #optional: useful if you want to choose the logging level to something else than logging.WARN
35 | import sys, os, logging
36 | from io import open
37 |
38 | from optparse import OptionParser
39 | # import json
40 |
41 | try: #to ease the use without proper Python installation
42 | import TranskribusPyClient_version
43 | except ImportError:
44 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
45 | import TranskribusPyClient_version
46 |
47 | from TranskribusCommands import _Trnskrbs_default_url, __Trnskrbs_basic_options, _Trnskrbs_description, __Trnskrbs_do_login_stuff, _exit
48 | from TranskribusPyClient.client import TranskribusClient
49 |
50 | from TranskribusPyClient.common.trace import traceln, trace
51 |
52 | DEBUG = 0
53 |
54 | description = """upload a private dictionary
55 |
56 | """ + _Trnskrbs_description
57 |
58 | usage = """%s -d
59 |
60 | a single file called will be created by concatenating and will be uploaded in the tempDict user ftp folder
61 | """%sys.argv[0]
62 |
63 | class DoHtrRnn(TranskribusClient):
64 | """
65 | Good morning,
66 |
67 | temp. dictionaries also can be sent now, see example below.
68 | The response will contain the dict. filename to be used in the HTR
69 | request's tempDict parameter. If extension of the given name does not
70 | match ".dict", this will be appended.
71 | The POST request's body should contain the dictionary data as UTF-8 String.
72 | The temp. dictionaries are now bound to the user account and you can
73 | check the transmission outcome by logging in via FTP to transkribus.eu
74 | with your credentials. There you will find a dir. called "dictTmp"
75 | containing the sent files, that will be used for HTR. You can also put
76 | dictionaries there via FTP and use them for HTR with the tempDict parameter.
77 |
78 | Best regards,
79 | Philip
80 |
81 | POST /TrpServerTesting/rest/recognition/tempDict?fileName=test.dict HTTP/1.1
82 | Host: transkribus.eu
83 | Content-Type: text/plain
84 | Cache-Control: no-cache
85 |
86 | er,124
87 | ...
88 | """
89 | sDefaultServerUrl = _Trnskrbs_default_url
90 | #--- INIT -------------------------------------------------------------------------------------------------------------
91 | def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN):
92 | TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
93 |
94 | def run(self, dictName,dictString):
95 | ret = self.uploadDict(dictName,dictString)
96 | return ret
97 |
98 | if __name__ == '__main__':
99 | version = "v.01"
100 |
101 | #prepare for the parsing of the command line
102 | parser = OptionParser(usage=usage, version=version)
103 | parser.description = description
104 |
105 | #"-s", "--server", "-l", "--login" , "-p", "--pwd", "--https_proxy" OPTIONS
106 | __Trnskrbs_basic_options(parser, DoHtrRnn.sDefaultServerUrl)
107 |
108 | parser.add_option("-d", "--dict" , dest='ldict', action="append", type="string", help="list of dictionaries")
109 |
110 | # ---
111 | #parse the command line
112 | (options, args) = parser.parse_args()
113 | proxies = {} if not options.https_proxy else {'https_proxy':options.https_proxy}
114 |
115 | # ---
116 | doer = DoHtrRnn(options.server, proxies, loggingLevel=logging.WARN)
117 | __Trnskrbs_do_login_stuff(doer, options, trace=trace, traceln=traceln)
118 | # ---
119 | try: dictName = args.pop(0)
120 | except Exception as e: _exit(usage, 1, e)
121 | # try: filename = args.pop(0)
122 | # except Exception as e: _exit(usage, 1, e)
123 |
124 | try:
125 | sfullDict=""
126 | for filename in options.ldict:
127 | dictFile = open(filename,'r',encoding='utf-8').read()
128 | dictFile = dictFile.replace('\t',',')
129 | sfullDict += dictFile #+ '\n'
130 | traceln( "loaded %s"%(filename))
131 | except IOError:print ('not possible to open file :%s'%(filename))
132 |
133 | # print sfullDict.encode("utf-8")
134 | # need to normalize the weights when build this different dictionaries???
135 | response = doer.run(dictName, sfullDict)
136 | traceln(response)
137 |
138 | traceln()
139 | traceln("- Done")
140 |
141 |
--------------------------------------------------------------------------------
/src/TranskribusDU/xml_formats/PageXmlExtractor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | '''
4 | Created on August 1st, 2017
5 |
6 |
7 | Utility to extract several pages from several document to a folder or a MultiPageXml file
8 |
9 | @author: meunier
10 | '''
11 |
12 | from __future__ import absolute_import
13 | from __future__ import print_function
14 | from __future__ import unicode_literals
15 |
16 | import os
17 | from io import open
18 | import json
19 | import shutil
20 | import math
21 |
22 | import xml_formats.PageXml as PageXml
23 |
24 | class DocPageSet:
25 | '''
26 | the list of pages of interest of a document
27 | take the textual form: docID=]
28 | a page-range-set takes the form: [,]+
29 | with pageRange taking the form: N|N-N
30 | For instance: 111=1 or 222=1-10 or 333=1,10-20,3,40-50
31 |
32 | NOTE: ranges should not overlap!!!
33 | '''
34 | def __init__(self, sSpec):
35 | try:
36 | sDocID, sPageRangeSet = sSpec.strip().split('=')
37 | except ValueError:
38 | raise ValueError("Malformed range: '%s'"%sSpec)
39 |
40 | self.sDocID = sDocID
41 | self._ltiRange = []
42 | prev_b = None
43 | for sPageRange in sPageRangeSet.split(","):
44 | lsN = sPageRange.split('-')
45 | if len(lsN) == 1:
46 | a = int(lsN[0])
47 | b = a
48 | elif len(lsN) == 2:
49 | a,b = int(lsN[0]), int(lsN[1])
50 | else:
51 | raise ValueError("invalid range: '%s'"%sPageRange)
52 | if not(a<=b): raise ValueError("Invalid range: '%s'"%sPageRange)
53 | self._ltiRange.append( (a,b) ) #222=1-10
54 | if prev_b < a:
55 | prev_b = b
56 | else:
57 | raise ValueError("unordered or overlapping ranges: '%d' >= '%d' '%s'"%(prev_b, a, sSpec))
58 | if not self.sDocID: raise ValueError("missing docID: '%s'"%sSpec)
59 | if not self._ltiRange: raise ValueError("empty range: '%s'"%sSpec)
60 |
61 | # -----
62 | def getDocID(self, bSkipPath=False):
63 | if bSkipPath:
64 | return os.path.basename(self.sDocID)
65 | else:
66 | return self.sDocID
67 |
68 | def getRangeString(self): return ",".join( "%d-%d"%(a,b) if a != b else "%d"%a for (a,b) in self._ltiRange )
69 |
70 | def iterPageNumber(self):
71 | """
72 | Iterator returning each page number in turn
73 | """
74 | for a,b in self._ltiRange:
75 | for n in range(a,b+1):
76 | yield n
77 | raise StopIteration
78 |
79 | # -----
80 | def __str__(self):
81 | return "%s=%s"%(self.sDocID, self.getRangeString())
82 |
83 | def testDocPageSet():
84 | import pytest
85 |
86 | for s in ["111=1", "222=1-10", "333=1,10-20,23,40-50"]:
87 | assert str(DocPageSet(s)) == s, s
88 |
89 | o = DocPageSet("111=1")
90 | assert o.getDocID() == "111"
91 | assert [i for i in o.iterPageNumber()] == [1]
92 |
93 | o = DocPageSet("a/b/c/111=1")
94 | assert o.getDocID() == "a/b/c/111"
95 | assert o.getDocID(True) == "111"
96 | assert [i for i in o.iterPageNumber()] == [1]
97 |
98 |
99 | o = DocPageSet("333=1,10-20,23,40-50")
100 | assert o.getDocID() == "333"
101 | assert [i for i in o.iterPageNumber()] == [1]+range(10,21)+[23]+range(40,51)
102 |
103 | with pytest.raises(ValueError): DocPageSet("aaa")
104 | with pytest.raises(ValueError): DocPageSet("aaa=")
105 | with pytest.raises(ValueError): DocPageSet("=1")
106 | with pytest.raises(ValueError): DocPageSet("=1-2")
107 | with pytest.raises(ValueError): DocPageSet("aaa=12=12")
108 | with pytest.raises(ValueError): DocPageSet("aaa=22-11")
109 | with pytest.raises(ValueError): DocPageSet("aaa=-11")
110 | with pytest.raises(ValueError): DocPageSet("aaa=-11-")
111 | with pytest.raises(ValueError): DocPageSet("aaa=-11-12")
112 | with pytest.raises(ValueError): DocPageSet("aaa=333=1,10-20,3,40-50")
113 |
114 | class PageXmlExtractor:
115 | '''
116 | Utility to extract several pages from several document to a folder
117 | '''
118 | sColDir = 'col'
119 |
120 | @classmethod
121 | def getFilename(self, sDocID, name):
122 | return os.path.join(sDocID, name)
123 |
124 | @classmethod
125 | def extractPagesToDir(cls, lDocPageSet, sToDir):
126 | """
127 | extract the pages from the given list of PageSet and store them in the given folder.
128 | (typically to be packaged as a MultiPageXml using PageXml.py)
129 | return the number of copied files, and list of tuple (pnum, orig-docID, orig-pnum, orig-filename)
130 | """
131 | if not os.path.isdir(sToDir):
132 | print(" - creating directory ", sToDir)
133 | os.mkdir(sToDir)
134 | else:
135 | if len(os.listdir(sToDir)) > 0: raise ValueError("Target folder (%s) must be empty."%sToDir)
136 | if not os.path.isdir(sToDir): raise ValueError("%s is not a directory"%sToDir)
137 |
138 | jsonOriginFilename = os.path.join(sToDir, "origin.json")
139 | cnt, ltOrigin = cls.getOriginTuple(lDocPageSet, jsonOriginFilename)
140 |
141 | print( " - total number of pages = %d"%cnt)
142 |
143 | nbDigit = math.log10(cnt)+1
144 | sFmt = "%%0%dd.pxml" % nbDigit #e.g. %03d.pxml
145 |
146 | for (cnt, docID, n, sFilename) in ltOrigin:
147 | sToFilename = os.path.join(sToDir, sFmt%cnt)
148 | print(" copying %s --> %s"%(sFilename, sToFilename))
149 | shutil.copy(sFilename, sToFilename)
150 |
151 | return cnt, ltOrigin
152 |
153 | @classmethod
154 | def extractPagesToFile(cls, lDocPageSet, sToFile, bIndent=True):
155 | """
156 | extract the pages from the given list of PageSet and store them in a MultiPageXml file
157 | (typically to be packaged as a MultiPageXml using PageXml.py)
158 | return the number of copied files, and list of tuple (pnum, orig-docID, orig-pnum, orig-filename)
159 | """
160 |
161 | sBaseName, _ = os.path.splitext(sToFile)
162 | jsonOriginFilename = sBaseName + "_origin.json"
163 | cnt, ltOrigin = cls.getOriginTuple(lDocPageSet, jsonOriginFilename)
164 |
165 | print( " - total number of pages = %d"%cnt)
166 |
167 | print( " Generating %s"%(sToFile))
168 | doc = PageXml.MultiPageXml.makeMultiPageXml([sFilename for (cnt, docID, n, sFilename) in ltOrigin] )
169 | doc.write(sToFile, xml_declaration='UTF-8',encoding="utf-8", pretty_print=bIndent)
170 |
171 | return cnt, ltOrigin
172 |
173 | @classmethod
174 | def getOriginTuple(cls, lDocPageSet, jsonOriginFilename=None):
175 | """
176 | prepare for extracting the pages from the given list of PageSet
177 | return the number of files, and list of tuple (pnum, orig-docID, orig-pnum, orig-filename)
178 | """
179 |
180 | ltOrigin = list()
181 | cnt = 0
182 | for o in lDocPageSet:
183 | print( " - Processing doc %s, pages %s"%(o.getDocID(), o.getRangeString()))
184 | lsFilename = cls.getPageFilenameList(o.getDocID(), ".pxml")
185 | for n in o.iterPageNumber():
186 | cnt += 1
187 | sFilename = lsFilename[n-1]
188 | ltOrigin.append( (cnt, o.getDocID(True), n, sFilename) ) # new-PNum, docID, orig-PNum, orig-filename
189 |
190 | if jsonOriginFilename:
191 | if sys.version_info > (3,0):
192 | with open(jsonOriginFilename, "wb",encoding='utf-8') as fd: json.dump(ltOrigin, fd, indent=True)
193 | else:
194 | with open(jsonOriginFilename, "wb") as fd: json.dump(ltOrigin, fd, indent=True)
195 |
196 | print( " (see %s)"%(jsonOriginFilename))
197 |
198 | return cnt, ltOrigin
199 |
200 |
201 | @classmethod
202 | def getPageFilenameList(cls, sDocID, sExt):
203 | assert sExt.startswith('.')
204 |
205 | #Look in trp.json file
206 | lsFilename = []
207 |
208 | trpFile = os.path.join(sDocID, 'trp.json')
209 | if not( os.path.exists(trpFile)): raise ValueError("Non-existing trp.json file %s" % trpFile)
210 | with open(trpFile, "rb",'utf-8') as fd:
211 | jTrp = json.load(fd)
212 |
213 | for i, page in enumerate(jTrp['pageList']['pages']):
214 | sImgFileName = page['imgFileName']
215 | sBaseName, _ = os.path.splitext(sImgFileName)
216 | sXmlFilename = cls.getFilename(sDocID, sBaseName + sExt)
217 | lsFilename .append( sXmlFilename )
218 | if page['pageNr'] != i+1: print( "\tWarning: expected page number %d , got %s"%(i+1, page['pageNr']))
219 |
220 | return lsFilename
221 |
222 | if __name__ == "__main__":
223 |
224 | import sys, optparse
225 | usage = """
226 | %s [--mpxml filename] [--dir dirname] [docID=]+
227 |
228 | Utility to extract a set of PageXml files from a set of documents and either:
229 | - store them into a target folder with simple numbering, with unambiguous order.
230 | - generate a MultiPageXMl document. In case of empty filename or "-", the filename is automatically composed from the arguments.
231 |
232 | a page-range-set takes the form: [,]+
233 | with pageRange taking the form: N|N-N
234 | Page ranges must be ordered, per document.
235 | For instance: 111=1 222=1-10 333=1,10-20,23,40-50
236 |
237 | JL Meunier - Aug. 2017
238 | """ % sys.argv[0]
239 |
240 | parser = optparse.OptionParser(usage=usage)
241 | parser.add_option("--dir" , dest='dir' , action="store", type="string", help="Store the extracted PageXml pages into the specified directory.")
242 | parser.add_option("--file", dest='file', action="store", type="string", help="Store the extracted PageXml pages into the specified MultiPageXml document.")
243 |
244 | (options, args) = parser.parse_args()
245 |
246 | if args:
247 | lsDocPageSet = args
248 | else:
249 | parser.print_help()
250 | parser.exit(1, "")
251 |
252 | lDocPageSet = []
253 | print("Parsing range(s)")
254 | for s in lsDocPageSet:
255 | o = DocPageSet(s)
256 | lDocPageSet.append(o)
257 |
258 | if options.dir:
259 | print( "Extracting into folder: ", options.dir)
260 | n = PageXmlExtractor.extractPagesToDir(lDocPageSet, options.dir)
261 |
262 | if options.file != None:
263 | if options.file in["", "-"]: options.file = "extraction_" + "_".join(map(str, lDocPageSet)) #automatic filename
264 | sToFile = options.file if options.file.lower().endswith(".mpxml") else options.file+".mpxml" #automatic .mpxml extension
265 | print( "Extracting into file: ", sToFile)
266 | n = PageXmlExtractor.extractPagesToFile(lDocPageSet, sToFile)
267 |
268 | print( "DONE")
269 |
--------------------------------------------------------------------------------
/src/TranskribusDU/xml_formats/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Transkribus/TranskribusPyClient/f3b0208751a553257ddf313b73278477aab1ffef/src/TranskribusDU/xml_formats/__init__.py
--------------------------------------------------------------------------------
/src/TranskribusDU/xml_formats/mpxml2pxml.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | mpxml to pxml convertor
4 |
5 | @author: H Déjean
6 |
7 | READ project
8 | 31/05/2017
9 | """
10 | from __future__ import absolute_import
11 | from __future__ import print_function
12 | from __future__ import unicode_literals
13 | import sys, os.path, optparse
14 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(sys.argv[0]))))
15 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(sys.argv[0])))))
16 |
17 | from lxml import etree
18 | import xml_formats.PageXml as PageXml
19 |
20 | if __name__ == "__main__":
21 |
22 | usage = """
23 | %s dir docid
24 | Utility to create a set of pageXml XML files from a mpxml file.
25 | """ % sys.argv[0]
26 |
27 | parser = optparse.OptionParser(usage=usage)
28 |
29 | parser.add_option("--format", dest='bIndent', action="store_true" , help="reformat/reindent the input")
30 | parser.add_option("--dir", dest='destdir', action="store", default='pxml' , help="directory ouptut")
31 | (options, args) = parser.parse_args()
32 |
33 | try:
34 | dir = args[0]
35 | docid= args[1]
36 | except:
37 | parser.print_help()
38 | parser.exit(1, "")
39 |
40 | sDocFilename = "%s%scol%s%s.mpxml" % (dir,os.sep,os.sep,docid)
41 |
42 | doc = etree.parse(sDocFilename)
43 |
44 | for pnum, pageDoc in PageXml.MultiPageXml._iter_splitMultiPageXml(doc, bInPlace=False):
45 | outfilename = "%s%s%s%s%s_%03d.pxml" % (dir,os.sep,options.destdir,os.sep,docid,pnum)
46 | print(outfilename)
47 | pageDoc.write(outfilename, xml_declaration ='UTF-8',encoding="utf-8", pretty_print = bool(options.bIndent))
48 | print ("DONE")
--------------------------------------------------------------------------------
/src/TranskribusDU/xml_formats/tests/testDS2PageXml/.gitignore:
--------------------------------------------------------------------------------
1 | /RRB_MM_01_033_Jahr_1810.mpxml
2 |
--------------------------------------------------------------------------------
/src/TranskribusDU/xml_formats/tests/test_DS2PageXml.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | test DS2PageXml convertor
4 | @author:déjean
5 | """
6 | import os.path
7 | from xml_formats.DS2PageXml import DS2PageXMLConvertor
8 | from xml_formats.PageXml import MultiPageXml
9 |
10 | sTESTS_DIR = os.path.dirname(os.path.abspath(__file__))
11 |
12 | def test_DS2PageXmlConversion():
13 | filename = os.path.join(sTESTS_DIR,
14 | 'testDS2PageXml/RRB_MM_01_033_Jahr_1810.ds.xml')
15 | conv= DS2PageXMLConvertor()
16 | conv.inputFileName = filename
17 | doc = conv.loadDom(filename)
18 | lPageXmlDocs = conv.run(doc)
19 | mp = MultiPageXml()
20 | # newDoc = mp.makeMultiPageXmlMemory(map(lambda (x,y):x,lPageXmlDocs))
21 | newDoc = mp.makeMultiPageXmlMemory([x for x,_y in lPageXmlDocs])
22 | newDoc.write(os.path.join(sTESTS_DIR,
23 | "testDS2PageXml/RRB_MM_01_033_Jahr_1810.mpxml"),
24 | xml_declaration=True,
25 | encoding="UTF-8",
26 | pretty_print=True)
27 |
28 |
29 | # res= conv.storePageXmlSetofFiles(lPageXmlDocs)
30 | # print 'test:', True if res == 0 else False
31 |
32 | if __name__ == "__main__":
33 | # test_setMetadata()
34 | test_DS2PageXmlConversion()
--------------------------------------------------------------------------------
/src/TranskribusDU/xml_formats/tests/test_PageXml.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | '''
4 | Created on 23 Nov 2016
5 |
6 | @author: meunier
7 | '''
8 | import pytest
9 | from lxml import etree
10 | from io import BytesIO
11 |
12 | from xml_formats.PageXml import PageXml, PageXmlException
13 |
14 |
15 | def test_custom():
16 | assert PageXml.parseCustomAttr("") == {}
17 | assert PageXml.parseCustomAttr(" ") == {}
18 | assert PageXml.parseCustomAttr(" ") == {}
19 |
20 | assert PageXml.parseCustomAttr("a {x:1;}") == { 'a': { 'x':'1' } }
21 | assert PageXml.parseCustomAttr(" a {x:1;}") == { 'a': { 'x':'1' } }
22 | assert PageXml.parseCustomAttr("a {x:1;} ") == { 'a': { 'x':'1' } }
23 | assert PageXml.parseCustomAttr(" a {x:1;} ") == { 'a': { 'x':'1' } }
24 | assert PageXml.parseCustomAttr("a {x:1 ;}") == { 'a': { 'x':'1' } }
25 | assert PageXml.parseCustomAttr("a {x:1 ; }") == { 'a': { 'x':'1' } }
26 | assert PageXml.parseCustomAttr("a { x:1 ; }") == { 'a': { 'x':'1' } }
27 |
28 | assert PageXml.parseCustomAttr("a{x:1;}") == { 'a': { 'x':'1' } }
29 | assert PageXml.parseCustomAttr("a{x:1 ;}") == { 'a': { 'x':'1' } }
30 | assert PageXml.parseCustomAttr("a{x:1 ; }") == { 'a': { 'x':'1' } }
31 | assert PageXml.parseCustomAttr("a{ x:1 ; }") == { 'a': { 'x':'1' } }
32 |
33 | assert PageXml.parseCustomAttr("a,b{x:1;}") == { 'a': { 'x':'1' }, 'b': { 'x':'1' } }
34 | assert PageXml.parseCustomAttr("a, b{x:1 ;}") == { 'a': { 'x':'1' }, 'b': { 'x':'1' } }
35 | assert PageXml.parseCustomAttr("a , b{x:1 ; }") == { 'a': { 'x':'1' }, 'b': { 'x':'1' } }
36 | assert PageXml.parseCustomAttr("a ,b{ x:1 ; }") == { 'a': { 'x':'1' }, 'b': { 'x':'1' } }
37 | assert PageXml.parseCustomAttr("a ,b { x:1 ; }") == { 'a': { 'x':'1' }, 'b': { 'x':'1' } }
38 |
39 | assert PageXml.parseCustomAttr("a { x:1 ; y:2 }") == { 'a': { 'x':'1', 'y':'2'} }
40 | assert PageXml.parseCustomAttr("a,b { x:1 ; y:2 }") == { 'a': { 'x':'1', 'y':'2'}, 'b': { 'x':'1', 'y':'2'} }
41 |
42 | assert PageXml.parseCustomAttr("a {}") == { 'a': { } }
43 |
44 | assert PageXml.parseCustomAttr("readingOrder {index:4;} structure {type:catch-word;}") == { 'readingOrder': { 'index':'4' }, 'structure':{'type':'catch-word'} }
45 |
46 | def test_malformed_custom():
47 | with pytest.raises(ValueError): PageXml.parseCustomAttr("a {x1;}")
48 | with pytest.raises(ValueError): PageXml.parseCustomAttr("a x1;}")
49 | with pytest.raises(ValueError): PageXml.parseCustomAttr("a { x1;")
50 | with pytest.raises(ValueError): PageXml.parseCustomAttr("a { x1 }")
51 |
52 | #with pytest.raises(ValueError): PageXml.parseCustomAttr("a { x:1 }") #should it fail?
53 | assert PageXml.parseCustomAttr("a { x:1 2}") == {'a': {'x': '1 2'}}
54 |
55 | #with pytest.raises(ValueError): PageXml.parseCustomAttr("a { x:1 2}")#should it fail? (or do we allow spaces in names or values?)
56 | assert PageXml.parseCustomAttr(" a b { x y : 1 2 }") == {'a b': {'x y': '1 2'}}
57 |
58 | def test_getsetCustomAttr():
59 | sXml = b"""
60 |
61 |
62 |
63 | """
64 | doc = etree.parse(BytesIO(sXml))
65 | nd = doc.getroot()
66 | assert PageXml.getCustomAttr(nd, "readingOrder", "index") == '9'
67 | assert PageXml.setCustomAttr(nd, "readingOrder", "index", 99) == 99
68 | assert PageXml.getCustomAttr(nd, "readingOrder", "index") == '99'
69 |
70 | assert PageXml.getCustomAttr(nd, "readingOrder") == {'index':'99'}
71 |
72 | assert PageXml.setCustomAttr(nd, "readingOrder", "toto", "zou") == "zou"
73 | assert PageXml.getCustomAttr(nd, "readingOrder", "toto") == 'zou'
74 |
75 | with pytest.raises(PageXmlException): PageXml.getCustomAttr(nd, "readingOrder", "axiste_pas")
76 | with pytest.raises(PageXmlException): PageXml.getCustomAttr(nd, "axiste_pas_non_plus", "axiste_pas")
77 |
78 | def getMetadataTestDOM():
79 | sXml = b"""
80 |
81 |
82 | Tilla
83 | 2016-08-18T13:35:08.252+07:00
84 | 2016-12-01T09:53:39.610+01:00
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 | """
105 | doc = etree.parse(BytesIO(sXml))
106 | return doc
107 |
108 | def test_getMetadata():
109 | doc = getMetadataTestDOM()
110 | nd = doc.getroot()
111 |
112 | md = PageXml.getMetadata(doc)
113 | assert md.Creator == "Tilla"
114 | assert md.Created == "2016-08-18T13:35:08.252+07:00"
115 | assert md.LastChange == "2016-12-01T09:53:39.610+01:00"
116 | assert md.Comments == None
117 |
118 | md = PageXml.getMetadata(None, nd[0])
119 | assert md.Creator == "Tilla"
120 | assert md.Created == "2016-08-18T13:35:08.252+07:00"
121 | assert md.LastChange == "2016-12-01T09:53:39.610+01:00"
122 |
123 | def test_setMetadata():
124 | import datetime
125 | doc = getMetadataTestDOM()
126 |
127 | nd = doc.getroot()
128 | _sutc = datetime.datetime.utcnow().isoformat()
129 | PageXml.setMetadata(doc, None, "Tigrette")
130 |
131 | sutc = datetime.datetime.utcnow().isoformat()
132 | md = PageXml.getMetadata(doc)
133 | assert md.Creator == "Tigrette"
134 | assert md.Created == "2016-08-18T13:35:08.252+07:00"
135 | assert md.LastChange.startswith(sutc[:15])
136 | assert md.Comments == None
137 | print(doc)
138 |
139 | sutc = datetime.datetime.utcnow().isoformat()
140 | PageXml.setMetadata(doc, None, "Bijoux", "Le chat de Martine")
141 | md = PageXml.getMetadata(None, nd[0])
142 | assert md.Creator == "Bijoux"
143 | assert md.Created == "2016-08-18T13:35:08.252+07:00"
144 | assert md.LastChange.startswith(sutc[:15])
145 | assert md.Comments == "Le chat de Martine"
146 | print(doc)
147 |
148 | def test_CreationPageXmlDocument():
149 | doc= PageXml.createPageXmlDocument(creatorName='HerveforTest', filename='hervefortest.jpg', imgW=100, imgH=100)
150 | print(doc)
151 |
152 | if __name__ == "__main__":
153 | test_setMetadata()
154 | test_CreationPageXmlDocument()
--------------------------------------------------------------------------------
/src/TranskribusPyClient/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Transkribus/TranskribusPyClient/f3b0208751a553257ddf313b73278477aab1ffef/src/TranskribusPyClient/__init__.py
--------------------------------------------------------------------------------
/src/TranskribusPyClient/common/IntegerRange.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | Integer range specification for Python clients
5 |
6 | A class to deal with integer range specifications like 1-5,8
7 |
8 | Copyright Naver(C) 2017, JL. Meunier, August 2017
9 |
10 | This program is free software: you can redistribute it and/or modify
11 | it under the terms of the GNU General Public License as published by
12 | the Free Software Foundation, either version 3 of the License, or
13 | (at your option) any later version.
14 |
15 | This program is distributed in the hope that it will be useful,
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | GNU General Public License for more details.
19 |
20 | You should have received a copy of the GNU General Public License
21 | along with this program. If not, see .
22 |
23 |
24 | Developed for the EU project READ. The READ project has received funding
25 | from the European Union’s Horizon 2020 research and innovation programme
26 | under grant agreement No 674943.
27 |
28 | """
29 | from __future__ import absolute_import
30 | from __future__ import print_function
31 | from __future__ import unicode_literals
32 |
33 | from builtins import int
34 |
35 | class IntegerRange:
36 | """
37 | A integer range object
38 |
39 | - at creation, pass a range specification of the form: 1 or 1-3 or 1,3 or 1,5-7,8
40 | IntegerRange = RANGE [, RANGE]+
41 | where RANGE if either an integer or 2 integer separated by a '-'
42 | RANGE = N
43 | RANGE = N-N
44 | Spaces are ignored, apart between digits.
45 | - the object is a container that supports:
46 | - iteration
47 | - len()
48 | - reversed()
49 | - contains test (if n in o: ...)
50 | """
51 | def __init__(self, sRange=""):
52 | self._ltAB = self.parseSpec(sRange)
53 | assert str(self) == "".join(sRange.split())
54 |
55 | def initFromEnumeration(self, lN):
56 | """
57 | create the list of ranges that exactly cover the enumeration.
58 | """
59 | if not lN:
60 | pass
61 | elif len(lN) == 1:
62 | self.addRange(lN[0])
63 | else:
64 | lN = sorted(lN)
65 | A = lN[0]
66 | Nprev = A
67 | for N in lN[1:]:
68 | if Nprev+1 < N:
69 | #hole in sequence, create an interval!
70 | self.addRange(A, Nprev)
71 | A = N
72 | Nprev = N
73 | self.addRange(A, Nprev)
74 | return self
75 |
76 | @classmethod
77 | def parseSpec(cls, sSpec):
78 | """
79 | parse a range specification of positive integers and return a list of pair of indices
80 | """
81 | ltAB = list()
82 | prev_b = -1
83 | for sRange in sSpec.split(","):
84 | if not sRange.split(): continue #empty spec!
85 | a,b = cls._getAB(sRange)
86 | ltAB.append( (a,b) )
87 | if prev_b < a:
88 | prev_b = b
89 | else:
90 | raise ValueError("unordered or overlapping ranges: '%s' >= '%s' '%s'"%(prev_b, a, sSpec))
91 | return ltAB
92 |
93 | def addRange(self, a, b=None):
94 | if b==None: b = a
95 | assert a <= b
96 | self._ltAB.append( (a,b) )
97 | self._ltAB.sort()
98 | if not self._check():
99 | self._ltAB.remove( (a,b) )
100 | raise ValueError("Overlapping range")
101 |
102 | def len(self):
103 | """
104 | For som subclass, this method can be useful as it is not forced by Python to return an int (like for return float('inf'))
105 | """
106 | return sum(b-a+1 for a,b in self._ltAB)
107 |
108 | @classmethod
109 | def _getAB(cls, sRange):
110 | lsN = sRange.split('-')
111 | if len(lsN) == 1:
112 | a = int(lsN[0])
113 | b = a
114 | elif len(lsN) == 2:
115 | sA, sB = lsN
116 | a,b = int(sA), int(sB)
117 | if not(a<=b): raise ValueError("Invalid range: '%s'"%sRange)
118 | else:
119 | raise ValueError("invalid range: '%s'"%sRange)
120 | return a, b
121 |
122 | def _check(self):
123 | """
124 | checking things are in order
125 | """
126 | prevB = -float('inf')
127 | for a,b in self._ltAB:
128 | if prevB > a: return False
129 | prevB = b
130 | return True
131 |
132 | def __str__(self):
133 | return ",".join( "%s-%s"%(a,b) if a != b else "%s"%a for (a,b) in self._ltAB )
134 |
135 | def __bool__(self):
136 | return bool(self._ltAB)
137 |
138 | def __nonzero__(self):
139 | return bool(self._ltAB)
140 |
141 | #--- Emulating Container type...
142 | def __iter__(self):
143 | """
144 | Iterator returning each number in turn
145 | """
146 | for a,b in self._ltAB:
147 | for n in range(a,b+1): yield n
148 | raise StopIteration
149 |
150 | def __reversed__(self):
151 | """
152 | Reversed iterator
153 | If we do not provide it, we must provide a __getitem__ (boring to code and how useful??)
154 | """
155 | for a,b in reversed(self._ltAB):
156 | for n in range(b,a-1,-1): yield n
157 | raise StopIteration
158 |
159 | def __len__(self):
160 | return sum(b-a+1 for a,b in self._ltAB)
161 |
162 | def __contains__(self, item):
163 | #All integers are long in python3 and call to covert is just int
164 | try:
165 | item = int(item)
166 | except TypeError:
167 | raise ValueError("A range contains numeric values not %s"%type(item))
168 | #if type(item) != types.IntType and type(item) != types.LongType: raise ValueError("A range contains integer values not %s"%type(item))
169 | a, b = None, None
170 | for a,b in self._ltAB:
171 | if b >= item: break
172 | #print a, item, b
173 | return a<= item and item <= b
174 |
175 |
176 |
177 | # ------ TESTS ----------------------------------------------------------------------------------
178 | def test_good_spec(capsys):
179 | def container_test(o, lref):
180 | assert list(o) == lref
181 | assert list(reversed(o)) == list(reversed(lref))
182 | for item in lref: assert item in o
183 | assert -99 not in o
184 |
185 | o = IntegerRange("1")
186 | # with capsys.disabled():
187 | # print "YOOOOOOOOOOOOOOOOOOOOOOOOOOO ", list(reversed(o))
188 | container_test(o, [1])
189 |
190 | o = IntegerRange("99")
191 | container_test(o, [99])
192 |
193 | o = IntegerRange("1,99")
194 | container_test(o, [1, 99])
195 |
196 | o = IntegerRange("1-5")
197 | container_test(o, range(1, 6))
198 |
199 | o = IntegerRange("1-5,6-88")
200 | container_test(o, range(1, 6)+range(6, 89))
201 |
202 | o = IntegerRange("1-3,4-8")
203 | container_test(o, range(1, 9))
204 | assert len(o) == len(range(1, 9))
205 |
206 | def test_spaced_good_spec():
207 | def container_test(o, lref):
208 | assert list(o) == lref
209 | assert list(reversed(o))== list(reversed(lref))
210 | for item in lref: assert item in o
211 | assert -99 not in o
212 |
213 | o = IntegerRange(" 1\t\t")
214 | container_test(o, [1])
215 |
216 | o = IntegerRange("99 ")
217 | container_test(o, [99])
218 |
219 | o = IntegerRange("1 , 99")
220 | container_test(o, [1, 99])
221 |
222 | o = IntegerRange(" 1\t- 5\t")
223 | container_test(o, range(1, 6))
224 |
225 | o = IntegerRange("1-5, 6-88")
226 | container_test(o, range(1, 6)+range(6, 89))
227 |
228 | o = IntegerRange("1 -3\t,4- 8")
229 | container_test(o, range(1, 9))
230 | assert len(o) == len(range(1, 9))
231 |
232 | def test_errors():
233 | import pytest
234 | with pytest.raises(ValueError): IntegerRange("1 3")
235 | with pytest.raises(ValueError): IntegerRange("3-1")
236 | with pytest.raises(ValueError): IntegerRange("3,1")
237 | with pytest.raises(ValueError): IntegerRange("1-3,2")
238 | with pytest.raises(ValueError): IntegerRange("3,1-2")
239 | with pytest.raises(ValueError): IntegerRange("1-3,3-8")
240 | with pytest.raises(ValueError): IntegerRange("1-3 3,3-8")
241 | with pytest.raises(ValueError): IntegerRange("1-3,3-8 8")
242 |
243 |
244 | def test_limit():
245 | o = IntegerRange("")
246 | assert list(o) == []
247 | assert len(o) == 0
248 | o = IntegerRange("\t \t ")
249 | assert list(o) == []
250 | assert len(o) == 0
251 |
252 | def test_add():
253 | import pytest
254 |
255 | def container_test(o, lref):
256 | assert list(o) == lref
257 | assert list(reversed(o)) == list(reversed(lref))
258 | for item in lref: assert item in o
259 | assert -99 not in o
260 |
261 | o = IntegerRange()
262 | container_test(o, [])
263 |
264 | o.addRange(1)
265 | container_test(o, [1])
266 |
267 | o.addRange(0)
268 | container_test(o, [0, 1])
269 |
270 | with pytest.raises(ValueError): o.addRange(1)
271 | with pytest.raises(ValueError): o.addRange(0,1)
272 | with pytest.raises(ValueError): o.addRange(-3,0)
273 | with pytest.raises(ValueError): o.addRange(-3,3)
274 | with pytest.raises(ValueError): o.addRange(1,3)
275 | with pytest.raises(ValueError): o.addRange(0,3)
276 |
277 | o.addRange(90, 99)
278 | container_test(o, [0, 1]+range(90, 100))
279 |
280 | o.addRange(60, 66)
281 | container_test(o, [0, 1]+range(60, 67)+range(90, 100))
282 |
283 | with pytest.raises(ValueError): o.addRange(0,1000)
284 | with pytest.raises(ValueError): o.addRange(10,60)
285 | with pytest.raises(ValueError): o.addRange(70,95)
286 | with pytest.raises(ValueError): o.addRange(95)
287 | o.addRange(80, 88)
288 | container_test(o, [0, 1]+range(60, 67)+range(80, 89)+range(90, 100))
289 |
290 | assert 1 in o
291 | assert 0 in o
292 | assert 90 in o
293 | assert 80 in o
294 | assert 60 in o
295 | assert 66 in o
296 | assert 99 in o
297 | assert 88 in o
298 |
299 | assert 50 not in o
300 |
301 | def test_enum():
302 | def test_enum(l):
303 | ll = set(l)
304 | o = IntegerRange()
305 | o.initFromEnumeration(l)
306 | assert set(o) == ll
307 |
308 | test_enum([])
309 | test_enum([2])
310 | test_enum([-2])
311 | test_enum([2,1])
312 | test_enum([1,2])
313 | test_enum([1,2,2]) #bad case that we cover anyway
314 | test_enum([1,2,4,2,5])
315 | test_enum([7,4,6,1])
316 | test_enum([0])
317 |
318 |
319 |
--------------------------------------------------------------------------------
/src/TranskribusPyClient/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Transkribus/TranskribusPyClient/f3b0208751a553257ddf313b73278477aab1ffef/src/TranskribusPyClient/common/__init__.py
--------------------------------------------------------------------------------
/src/TranskribusPyClient/common/trace.py:
--------------------------------------------------------------------------------
1 | #
2 | # A simple trace module
3 | #
4 | # JL Meunier - May 2004
5 | # Copyright XRCE, 2004
6 | #
7 |
8 | import sys
9 |
10 | global traceFD
11 | traceFD = sys.stderr
12 |
13 | def setTraceFD(fd):
14 | global traceFD
15 | traceFD = fd
16 |
17 | def trace(*msg):
18 | global traceFD
19 | for i in msg:
20 | try: traceFD.write(str(i))
21 | except UnicodeEncodeError:sys.stderr.write(i.encode("utf-8"))
22 |
23 | def traceln(*msg):
24 | global traceFD
25 |
26 | trace(*msg)
27 | # apply(trace, msg)
28 | trace("\n")
29 | traceFD.flush()
30 |
31 | def flush():
32 | traceFD.flush()
33 |
34 |
35 |
36 | #SELF-TEST
37 | if __name__=="__main__":
38 |
39 | trace(1)
40 | trace(" aut")
41 | trace("o")
42 | traceln("-test")
43 | trace("2 auto", "-", "test")
44 | trace()
45 | traceln()
46 | traceln("Done")
47 |
--------------------------------------------------------------------------------
/src/TranskribusPyClient/test/__init__.py:
--------------------------------------------------------------------------------
1 | #-*- coding:utf-8 -*-
2 |
3 | """
4 | If you run all the test using pytest, you should first set some appropriate values here!!
5 |
6 | Created on 25 Nov 2016
7 |
8 | @author: meunier
9 | """
10 |
11 |
12 | # an existing collection A
13 | _colId_A = 3571
14 |
15 | #some existing documents in collection A
16 | _docId_a = 7749
17 | _docId_b = 7750
18 | _docId_c = 8251
19 | _docId_d = 8252
20 |
21 |
22 | #A different collection where you can do whatever you want
23 | _coldId_Sandbox = 3820
24 |
25 |
--------------------------------------------------------------------------------
/src/TranskribusPyClient/test/test_collections_addDocToCollection.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | #optional: useful if you want to choose the logging level to something else than logging.WARN
4 | import sys, os
5 | import logging
6 |
7 | try: #to ease the use without proper Python installation
8 | import TranskribusPyClient_version
9 | except ImportError:
10 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
11 | import TranskribusPyClient_version
12 |
13 | from TranskribusPyClient.test import _coldId_Sandbox, _docId_a
14 | from TranskribusPyClient.client import TranskribusClient, getStoredCredentials
15 |
16 | login, pwd = getStoredCredentials()
17 |
18 | conn = TranskribusClient(proxies={'https':'http://cornillon:8000'}, loggingLevel=logging.INFO)
19 | sessionID = conn.auth_login(login, pwd)
20 |
21 | data = conn.addDocToCollection(_coldId_Sandbox, _docId_a)
22 | """
23 | True or Exception
24 | """
25 |
26 | print conn.auth_logout()
27 |
28 |
--------------------------------------------------------------------------------
/src/TranskribusPyClient/test/test_collections_copyDocToCollection.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | #optional: useful if you want to choose the logging level to something else than logging.WARN
4 | import sys, os
5 | import logging
6 |
7 | try: #to ease the use without proper Python installation
8 | import TranskribusPyClient_version
9 | except ImportError:
10 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
11 | import TranskribusPyClient_version
12 |
13 | from TranskribusPyClient.test import _colId_A, _coldId_Sandbox, _docId_c, _docId_d
14 | from TranskribusPyClient.client import TranskribusClient, getStoredCredentials
15 |
16 | login, pwd = getStoredCredentials()
17 |
18 | conn = TranskribusClient(proxies={'https':'http://cornillon:8000'}, loggingLevel=logging.INFO)
19 | sessionID = conn.auth_login(login, pwd)
20 |
21 | data = conn.duplicateDoc(_colId_A, _docId_c, _coldId_Sandbox, "named_by_JL")
22 | data = conn.duplicateDoc(_colId_A, _docId_d, _coldId_Sandbox)
23 | """
24 | True or Exception
25 | """
26 |
27 | print conn.auth_logout()
28 |
29 |
--------------------------------------------------------------------------------
/src/TranskribusPyClient/test/test_collections_fulldoc_xml.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | #optional: useful if you want to choose the logging level to something else than logging.WARN
4 | import sys, os
5 | import logging
6 |
7 | try: #to ease the use without proper Python installation
8 | import TranskribusPyClient_version
9 | except ImportError:
10 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
11 | import TranskribusPyClient_version
12 |
13 | from TranskribusPyClient.test import _colId_A, _docId_a
14 | from TranskribusPyClient.client import TranskribusClient, getStoredCredentials
15 |
16 |
17 | login, pwd = getStoredCredentials()
18 |
19 | conn = TranskribusClient(proxies={'https':'http://cornillon:8000'}
20 | , loggingLevel=logging.INFO)
21 | print conn
22 |
23 | #print conn.auth_logout()
24 |
25 | sessionID = conn.auth_login(login, pwd)
26 | print sessionID
27 |
28 | #sessionID = conn.auth_login("jean-luc.meunier@xrce.xerox.com", "trnjluc", sHttpsProxyUrl='http://cornillon:8000')
29 |
30 |
31 |
32 | # ret = conn.getDocumentFromServer(colid, docid)
33 | #ret = conn.getDocumentFromServer("3571", "7750")
34 | data = conn.getDocByIdAsXml(_colId_A, str(_docId_a)) #str just to stress-test
35 | #data = conn.getDocByIdAsXml(3571, "7750")
36 | print data
37 | """
38 |
39 | """
40 |
41 | conn.setProxies({'https':'http://cornillon:8000'})
42 |
43 | print conn.auth_logout()
44 |
45 |
--------------------------------------------------------------------------------
/src/TranskribusPyClient/test/test_collections_list.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import sys, os
4 | import logging
5 |
6 | try: #to ease the use without proper Python installation
7 | import TranskribusPyClient_version
8 | except ImportError:
9 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
10 | import TranskribusPyClient_version
11 |
12 | from TranskribusPyClient.test import _colId_A, _docId_a
13 | from TranskribusPyClient.client import TranskribusClient, getStoredCredentials
14 |
15 | login, pwd = getStoredCredentials()
16 |
17 | conn = TranskribusClient(proxies={'https':'http://cornillon:8000'}
18 | , loggingLevel=logging.INFO)
19 |
20 | sessionID = conn.auth_login(login, pwd)
21 | data = conn.listDocsByCollectionId(_colId_A)
22 | import pprint
23 | pprint.pprint(data)
24 |
25 | print conn.auth_logout()
26 |
27 | """
28 |
29 | [{u'collectionList': {u'colList': [{u'colId': 3571,
30 | u'colName': u'READDU',
31 | u'description': u'created by herve.dejean@xrce.xerox.com'}]},
32 | u'createdFromTimestamp': 33175290,
33 | u'createdToTimestamp': 33175290,
34 | u'docId': 7749,
35 | u'fimgStoreColl': u'TrpDoc_DEA_7749',
36 | u'nrOfPages': 10,
37 | u'scriptType': u'HANDWRITTEN',
38 | u'status': 0,
39 | u'title': u'MM_1_001',
40 | u'uploadTimestamp': 1478161395893L,
41 | u'uploader': u'herve.dejean@xrce.xerox.com',
42 | u'uploaderId': 275},
43 | {u'collectionList': {u'colList': [{u'colId': 3571,
44 | u'colName': u'READDU',
45 | u'description': u'created by herve.dejean@xrce.xerox.com'}]},
46 | u'createdFromTimestamp': 0,
47 | u'createdToTimestamp': 0,
48 | u'docId': 7750,
49 | u'fimgStoreColl': u'TrpDoc_DEA_7750',
50 | u'nrOfPages': 10,
51 | u'scriptType': u'HANDWRITTEN',
52 | u'status': 0,
53 | u'title': u'MM_1_005',
54 | u'uploadTimestamp': 1478161451242L,
55 | u'uploader': u'herve.dejean@xrce.xerox.com',
56 | u'uploaderId': 275}]
57 |
58 | """
--------------------------------------------------------------------------------
/src/TranskribusPyClient/test/test_collections_listEditDeclFeatures.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import sys, os
4 | import logging
5 |
6 | try: #to ease the use without proper Python installation
7 | import TranskribusPyClient_version
8 | except ImportError:
9 | sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) )
10 | import TranskribusPyClient_version
11 |
12 | from TranskribusPyClient.test import _colId_A
13 | from TranskribusPyClient.client import TranskribusClient, getStoredCredentials
14 |
15 |
16 | login, pwd = getStoredCredentials()
17 |
18 | conn = TranskribusClient(proxies={'https':'http://cornillon:8000'}
19 | , loggingLevel=logging.INFO)
20 |
21 | sessionID = conn.auth_login(login, pwd)
22 | doc = conn.listEditDeclFeatures(_colId_A)
23 | doc.saveFormatFileEnc("-", "UTF-8", True)
24 | conn.xmlFreeDoc(doc)
25 |
26 | print conn.auth_logout()
27 |
28 | """
29 |
30 |
31 |
32 | 1
33 | Long S
34 | Source uses long "s"
35 |
36 |
37 | 1
38 | 1
39 | Long s is normalized to "s"
40 | false
41 |
42 |
43 | 2
44 | 1
45 | Long s is transcribed as "ſ" U+017F "Latin small letter long s"
46 | false
47 |
48 |
49 |
50 |
51 | 2
52 | u and v
53 | Source uses v for u
54 |
55 |
56 | 3
57 | 2
58 | Transcribed as in source
59 | false
60 |
61 |
62 | 4
63 | 2
64 | Transcribed according to modern spelling
65 | false
66 |
67 |
68 |
69 |
70 | 3
71 | i and j
72 | Source uses "i" and "j" differently to modern spelling
73 |
74 |
75 | 7
76 | 3
77 | Normalized according to modern lexicon
78 | false
79 |
80 |
81 | 5
82 | 3
83 | Transcribed as in source
84 | false
85 |
86 |
87 | 279
88 | 3
89 | Capital letter "J" is normalized to "I" at the beginning of a word
90 | false
91 |
92 |
93 |
94 |
95 | 5
96 | Printspace
97 | The printspace indicates the overall text region.
98 |
99 |
100 | 9
101 | 5
102 | Created by FineReader
103 | false
104 |
105 |
106 | 8
107 | 5
108 | Manually corrected
109 | false
110 |
111 |
112 |
113 |
114 | 6
115 | Ligature "sz"
116 | "sz" is set as ligature
117 |
118 |
119 | 10
120 | 6
121 | Transcribed as "sz"
122 | false
123 |
124 |
125 | 11
126 | 6
127 | Normalized to "ß"
128 | false
129 |
130 |
131 |
132 |
133 | 28
134 | Text regions
135 | Regions which contain handwritten text
136 |
137 |
138 | 34
139 | 28
140 | Manually corrected
141 | false
142 |
143 |
144 | 33
145 | 28
146 | Automatically created
147 | false
148 |
149 |
150 |
151 |
152 | 29
153 | Line Regions
154 | Contain the text of line
155 |
156 |
157 | 35
158 | 29
159 | Automatically created
160 | false
161 |
162 |
163 | 36
164 | 29
165 | Manually corrected
166 | false
167 |
168 |
169 |
170 |
171 | 30
172 | Baselines
173 | The baseline is defined as in Wikipedia - characters are "sitting" on the baseline
174 |
175 |
176 | 38
177 | 30
178 | Manually corrected
179 | false
180 |
181 |
182 | 37
183 | 30
184 | Automatically created
185 | false
186 |
187 |
188 |
189 |
190 | 47
191 | Omitted text
192 | Even in diplomatic transcriptions the editor may decide to not transcribe specific notes or marginalia which do not contribute to the overall objective of the transcription
193 |
194 |
195 | 59
196 | 47
197 | Some text was omitted, e.g. marginalia, notes of librarians
198 | false
199 |
200 |
201 | 60
202 | 47
203 | No text was omitted
204 | false
205 |
206 |
207 |
208 |
209 | 48
210 | Person names
211 | Tagging of person names
212 |
213 |
214 | 61
215 | 48
216 | Person names were tagged
217 | false
218 |
219 |
220 | 62
221 | 48
222 | Person names were not tagged
223 | false
224 |
225 |
226 |
227 |
228 | 49
229 | Geo-Names
230 | Tagging of geo-names
231 |
232 |
233 | 63
234 | 49
235 | Geo-names were tagged
236 | false
237 |
238 |
239 | 64
240 | 49
241 | Geo-names wer not tagged
242 | false
243 |
244 |
245 |
246 |
247 | 50
248 | Abbreviations - common
249 | Common abbreviations are usually known to most readers of a text, for example: e.g., i.e., &, etc.
250 |
251 |
252 | 65
253 | 50
254 | Common abbreviations were not expanded
255 | false
256 |
257 |
258 | 66
259 | 50
260 | Common abbreviations were expanded
261 | false
262 |
263 |
264 |
265 |
266 | 51
267 | Abbreviations
268 | Especially in medieval texts and early modern handwritting many words are abbreviated, or even characters are left out in the middle of a word. These abbreviations often need deep grammatical understanding to be correctly expanded.
269 |
270 |
271 | 68
272 | 51
273 | Abbreviations were not marked
274 | false
275 |
276 |
277 | 67
278 | 51
279 | Abbreviations were marked, but not explanded
280 | false
281 |
282 |
283 | 399
284 | 51
285 | Abbreviations were marked and expanded
286 | false
287 |
288 |
289 |
290 |
291 | 52
292 | Blackening
293 | Sensible text can be marked as "blackened" and can be suppressed when exporting the text and the images
294 |
295 |
296 | 70
297 | 52
298 | Blackeing was not applied
299 | false
300 |
301 |
302 | 69
303 | 52
304 | Blackening was applied to names of persons and companies
305 | false
306 |
307 |
308 |
309 |
310 | """
--------------------------------------------------------------------------------
/src/TranskribusPyClient_version.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on 29 Nov 2016
3 |
4 | @author: meunier
5 | '''
6 | version="0.3"
7 |
--------------------------------------------------------------------------------
/src/Transkribus_credential.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | Store in this file your transkribus credentials.
5 | Change acess right to protect this information.
6 |
7 | Alternatively, use do_login --persist to make a persistent session usable by next commands.
8 |
9 | Created on 15 Nov 2016
10 |
11 | @author: meunier
12 | """
13 |
14 | # Either you store your credentials here, or you use the --persist options
15 |
16 | login = "herve.dejean@naverlabs.com"
17 | password = ""
18 |
--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
1 | /trnskrbs_3571/
2 |
--------------------------------------------------------------------------------
/tests/test_commands.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Tests of the TranskribusPyClient command-line utilities
4 | #
5 | # JL Meunier - Nov 29th 2016
6 | #
7 | # Copyright Xerox(C) 2016 H. Déjean, JL. Meunier
8 | #
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | # Developed for the EU project READ. The READ project has received funding
19 | # from the European Union's Horizon 2020 research and innovation programme
20 | # under grant agreement No 674943.
21 |
22 | # ------------------------------------------------------------------------------------------------------------------------
23 | # --- CONFIGURATION SECTION
24 | # ------------------------------------------------------------------------------------------------------------------------
25 |
26 | #transkribus valid login
27 | login="herve.dejean@naverlabs.com"
28 | passwd=""
29 |
30 | #some existing collection with read access for you
31 | colId=3571
32 | #2 existing documents, forming a small range
33 | docId_A=7749
34 | docId_B=7750
35 | TRP=tst.trp
36 |
37 | #PYTHON=python
38 | PYTHON=/drives/c/Local/anaconda3/envs/py36/python.exe
39 |
40 | # ------------------------------------------------------------------------------------------------------------------------
41 | # --- GENERIC STUF BELOW
42 | # ------------------------------------------------------------------------------------------------------------------------
43 |
44 | SRC=`dirname "$0"`/../src
45 |
46 | tmp_col_name="toto_$$"
47 |
48 | # ------------------------------------------------------------------------------------------------------------------------
49 |
50 | function error {
51 | echo "ERROR: $1"
52 | exit 1
53 | }
54 |
55 | # ------------------------------------------------------------------------------------------------------------------------
56 |
57 | #---------------------------------------------------
58 | #cleaning any persistent login info
59 | echo "==================================================================="
60 | echo "--- logout"
61 | tmp_col_id=`$PYTHON $SRC/TranskribusCommands/do_logout.py --persist`
62 | echo "OK"
63 |
64 | #testing a bad login
65 | echo
66 | echo "--- login"
67 | tmp_col_id=`$PYTHON $SRC/TranskribusCommands/do_login.py --persist -l "tilla" -p "miaouuuu"` && error "login should have failed"
68 | echo
69 | echo "OK"
70 |
71 | #making a login and persisting the session token
72 | echo
73 | echo "--- login"
74 | tmp_col_id=`$PYTHON $SRC/TranskribusCommands/do_login.py --persist -l "$login" -p "$passwd"` || error "login error"
75 | echo "OK"
76 |
77 | #---------------------------------------------------
78 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
79 |
80 | echo
81 | echo "--- creating a collection $tmp_col_name"
82 | tmp_col_id=`$PYTHON $SRC/TranskribusCommands/do_createCollec.py --persist $tmp_col_name` || error "collection creation error"
83 | echo "--> $tmp_col_id"
84 | echo "OK"
85 |
86 | #---------------------------------------------------
87 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
88 | echo
89 | echo "--- adding doc $docId_A - $docId_B to the new collection"
90 | $PYTHON $SRC/TranskribusCommands/do_addDocToCollec.py --persist $tmp_col_id $docId_A || error "collection add error 1"
91 | echo "OK"
92 |
93 | echo
94 | echo "--- adding doc $docId_A - $docId_B to the new collection"
95 | $PYTHON $SRC/TranskribusCommands/do_addDocToCollec.py --persist $tmp_col_id $docId_A-$docId_B || error "collection add error 2"
96 | echo "OK"
97 |
98 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
99 | echo
100 | echo "--- copying doc $docId_A from collection $colId to the new collection"
101 | $PYTHON $SRC/TranskribusCommands/do_duplicateDoc.py --persist $colId $tmp_col_id $docId_A || error "collection copy error 1"
102 | echo "OK"
103 |
104 | #---------------------------------------------------
105 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
106 | echo
107 | echo "--- deleting it ( $tmp_col_id ) "
108 | tmp_col_id=`$PYTHON $SRC/TranskribusCommands/do_deleteCollec.py --persist $tmp_col_id` || error "collection deletion error"
109 | echo "OK"
110 |
111 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
112 | echo
113 | echo "--- display trpdoc of the first page of $docId_A from collection $colId "
114 | $PYTHON $SRC/TranskribusCommands/do_getDocTrp.py --persist $colId $docId_A 1 || error "getDocTrp error 1"
115 | echo "OK"
116 |
117 |
118 |
119 | #---------------------------------------------------
120 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
121 | echo
122 | echo "--- listing collection $colId "
123 | $PYTHON $SRC/TranskribusCommands/do_listCollec.py --persist $colId || error "collection list error"
124 | echo "OK"
125 |
126 | #---------------------------------------------------
127 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
128 | echo
129 | echo "--- Layout Analysis in collection $colId "
130 | $PYTHON $SRC/TranskribusCommands/do_analyzeLayout.py $colId $docId_A/1 || error "layout analysis error"
131 | echo "OK"
132 |
133 | #---------------------------------------------------
134 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
135 | echo
136 | echo "--- delete last transcript $colid / $docid / 1 "
137 | $PYTHON $SRC/TranskribusCommands/do_transcript.py $colId $docId_A 1 --last --rm || error " delete last transcript error"
138 | echo "OK"
139 |
140 | #---------------------------------------------------
141 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
142 | echo
143 | echo "--- list of locked pages for $docId_A in $colId "
144 | $PYTHON $SRC/TranskribusCommands/do_listPageLocks.py $colId $docId_A || error "locked pages error"
145 | echo "OK"
146 |
147 | #---------------------------------------------------
148 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
149 | echo
150 | echo "--- list HTR models in collection $colId "
151 | $PYTHON $SRC/TranskribusCommands/do_listHtrRnn.py --colid=$colId || error "list HTR models error"
152 | echo "OK"
153 |
154 | #---------------------------------------------------
155 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
156 | echo
157 | echo "--- list trpdoc for document $docId_A in $colId "
158 | $PYTHON $SRC/TranskribusCommands/do_transcript.py $colId $docId_A || error " transcript list models error"
159 | echo "OK"
160 |
161 |
162 | #---------------------------------------------------
163 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
164 | echo
165 | echo "--- save trpdoc for document $docId_A in $TRP "
166 | $PYTHON $SRC/TranskribusCommands/do_transcript.py $colId $docId_A 2 --trp=$TRP || error " transcript list models error"
167 |
168 | echo "OK"
169 |
170 |
171 | #---------------------------------------------------
172 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
173 | echo
174 | echo "--- download as per trp ---"
175 | rm -rf trnskrbs_$colId
176 | echo "--- download using $TRP "
177 | $PYTHON $SRC/TranskribusCommands/Transkribus_downloader.py $colId --trp=$TRP || error " download error"
178 | echo "OK"
179 | #---------------------------------------------------
180 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
181 | echo
182 | echo "--- download trnskrbs_$colId document $docId_A ---"
183 | rm -rf trnskrbs_$colId
184 | echo "--- download document $docId_A ($colId) "
185 | $PYTHON $SRC/TranskribusCommands/Transkribus_downloader.py $colId --docid=$docId_A --noimage || error " download error"
186 | echo "OK"
187 |
188 | #---------------------------------------------------
189 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
190 | echo
191 | echo "--- upload document $docId_A ($colId ) "
192 | $PYTHON $SRC/TranskribusCommands/TranskribusDU_transcriptUploader.py trnskrbs_$colId $colId $docId_A --nodu || error " TranskribusDU_transcriptUploaderupload error"
193 | echo "OK"
194 |
195 | #---------------------------------------------------
196 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
197 | echo
198 | echo "--- upload as per trp $TRP "
199 | $PYTHON $SRC/TranskribusCommands/Transkribus_uploader.py trnskrbs_$colId $colId $docId_A --trp=$TRP || error " Transkribus_uploader upload error"
200 | echo "OK"
201 | echo "--- rm $TRP"
202 | rm $TRP
203 |
204 | #---------------------------------------------------
205 | echo "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
206 | echo
207 | echo "--- test only --help"
208 | $PYTHON $SRC/TranskribusCommands/do_htrTrainRnn.py --help
209 |
210 | echo "==================================================================="
211 | echo "TESTs done"
212 |
213 |
214 |
215 |
216 |
--------------------------------------------------------------------------------