├── README ├── tlgu.1.pdf ├── .gitignore ├── makedocs ├── Makefile ├── tlgu.h ├── README.md ├── tlgu.1 ├── COPYING ├── LICENSE ├── tlgu.1.html ├── tlgcodes.h ├── tlgu.1.ps └── tlgu.c /README: -------------------------------------------------------------------------------- 1 | man ./tlgu.1 -------------------------------------------------------------------------------- /tlgu.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cltk/grc_software_tlgu/master/tlgu.1.pdf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | -------------------------------------------------------------------------------- /makedocs: -------------------------------------------------------------------------------- 1 | # makedocs 2 | # Generates .html .ps .pdf files from a man page (requires (man), groff, ps2pdf) 3 | # e.g. makedocs tlgu.1 will produce tlgu.1.html, tlgu.1.ps, tlgu.1.pdf 4 | # 20-Aug-2005 5 | # 02-Oct-2011 replaced man invocation by groff 6 | if (test -z $1); then 7 | echo Usage: makedocs MAN_PAGE_\(GROFF\)_FILE 8 | else 9 | groff -T html -man $1 > $1.html 10 | groff -T ps -man $1 > $1.ps 11 | ps2pdf $1.ps $1.pdf 12 | fi -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for tlgu 2 | # 111205 dm 3 | 4 | #SHELL = /bin/sh 5 | 6 | INCPATH=-I . 7 | INSTDIR=/usr/local/bin 8 | CC = gcc 9 | #CPP = 10 | #LN = ln 11 | #RANLIB = ranlib 12 | CFLAGS = -O2 -g $(INCPATH) 13 | #CPPFLAGS = 14 | #LDFLAGS = 15 | #LIBS = 16 | SOURCE=tlgu.c 17 | #OBJECT= 18 | TARGET= tlgu 19 | 20 | #---------------------------------------------------------------------------------- 21 | all: $(TARGET) 22 | 23 | clean: 24 | rm -f $(OBJECT) $(TARGET) 25 | 26 | cleaner: clean 27 | rm -f core *.out *~ *.bak *.old 28 | 29 | .c: 30 | $(CC) -c $(CFLAGS) -o $@ $< 31 | 32 | $(TARGET): tlgu.c tlgu.h tlgcodes.h 33 | $(CC) -o $(TARGET) $(LDFLAGS) $(SOURCE) 34 | 35 | install: clean all 36 | cp -f $(TARGET) $(INSTDIR) 37 | 38 | -------------------------------------------------------------------------------- /tlgu.h: -------------------------------------------------------------------------------- 1 | /* tlgu.h 2 | * 3 | * This is part of the tlgu utility 4 | * Copyright (C) 2004, 2005, 2011, 2020 Dimitri Marinakis 5 | * see the file tlgu.c for copying conditions 6 | * 7 | * Hellenic character codes 8 | * Relevant Unicode standard tables: 9 | * Greek and Coptic: 0370 - 03FF 10 | * Greek Extended: 1F00 - 1FFF 11 | * 12 | * 22-Apr-2006 dm -- added includes to satisfy picky gcc 13 | * 02-Oct-2011 dm -- added unistd.h to get STDOUT_FILENO 14 | * 16-Oct-2011 dm -- corrected lower case phi code to 03c6 (was phi symbol 3d5) 15 | * 15-May-2020 dm -- accented character table extension to accommodate U0370 block characters 16 | */ 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #define INRECSIZE 0x2000 28 | #define OUTRECSIZE 0xFFFFF 29 | 30 | /* Beta code escapes and state processing codes */ 31 | #define HELLENIC 1 32 | #define ROMAN 2 33 | #define PUNCTUATION 3 34 | #define QUOTATION 4 35 | #define PAGE 5 36 | #define BRACKET 6 37 | #define QUASIBRACKET 7 38 | #define NONTEXT 8 39 | #define SYMBOL 9 40 | #define HELLENIC_UPPER 0xa 41 | #define HELLENIC_SELECT 0xb 42 | #define HELLENIC_SIGMA 0xc 43 | /* Accent is an existing code above 0x1f */ 44 | #define ACCENT 0x2f 45 | #define HELLENIC_SIGMA_UPPER 0x10 46 | #define TABHALF 0x11 47 | #define ROMAN_SELECT 0x16 48 | #define PUNCTUATION_SELECT 0x1f 49 | #define QUOTATION_SELECT 0x29 50 | #define PAGE_SELECT 0x33 51 | #define BRACKET_SELECT 0x3d 52 | #define QUASIBRACKET_SELECT 0x47 53 | #define NONTEXT_SELECT 0x51 54 | #define SYMBOL_SELECT 0x5b 55 | #define TABHALF_SELECT 0x61 56 | 57 | /* code defines */ 58 | #define SIGMEDIAL 0x3c3 59 | #define SIGMEDIALUPPER 0x3a3 60 | #define SIGFINAL 0x3c2 61 | #define SIGFINALUPPER 0x3a3 62 | #define SIGLUNATE 0x3f2 63 | #define SIGLUNATEUPPER 0x3f9 64 | 65 | /* accents */ 66 | #define PSILI 0x313 67 | #define DASIA 0x314 68 | #define DIALYTIKA 0x308 69 | #define VARIA 0x300 70 | #define OXIA 0x301 71 | #define PERISPOMENI 0x342 72 | #define YPOGEGRAMMENI 0x345 73 | #define CARET 0x302 74 | 75 | /* TLG stream translation table -- Unicode 76 | A B G D E Z H Q I K L M N C O P R S T U F X Y W V; V is digamma 77 | A value under 0x20 is a state change control code. 78 | Zero means no character. 79 | */ 80 | unsigned int hellenic[] = { 81 | /* sp ! " # $ % & ' */ 82 | 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 83 | /* ( ) * + , - . / */ 84 | ACCENT, ACCENT, HELLENIC_UPPER, ACCENT, 0x2c, 0x2d, 0x2e, ACCENT, 85 | /* 0 1 2 3 4 5 6 7 */ 86 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 87 | /* 8 9 : ; < = > ? @ */ 88 | 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 89 | /* a b c d e f g h */ 90 | 0x3b1, 0x3b2, 0x3be, 0x3b4, 0x3b5, 0x3c6, 0x3b3, 0x3b7, 91 | /* i j k l m n o p */ 92 | 0x3b9, 0x3c2, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3bf, 0x3c0, 93 | /* q r s t u v w x */ 94 | 0x3b8, 0x3c1, 0x3c2, 0x3c4, 0x3c5, 0x3dd, 0x3c9, 0x3c7, 95 | /* y z [ \ ] ^ _ sep`*/ 96 | 0x3c8, 0x3b6, 0x54, 0x55, 0x56, 0x57, 0x00, 0x00, 97 | /* A B C D E F G H */ 98 | 0x391, 0x392, 0x39e, 0x394, 0x395, 0x3a6, 0x393, 0x397, 99 | /* I J K L M N O P */ 100 | 0x399, 0x3A3, 0x39a, 0x39b, 0x39c, 0x39d, 0x39f, 0x3a0, 101 | /* Q R S T U V W X */ 102 | 0x398, 0x3a1, 0x3a2, 0x3a4, 0x3a5, 0x3dc, 0x3a9, 0x3a7, 103 | /* Y Z { | } ~ DEL */ 104 | 0x3a8, 0x396, 0x7b, 0x7c, 0x7d, 0x00, 0x00}; 105 | 106 | 107 | /* Accents can be described in three groups, all optional 108 | * In the first group are - mutually exclusive - psili, daseia or dialytika 109 | * In the second group are - mutually exclusive - oxia, varia or perispomeni 110 | * In the third group are - mutually exclusive - ypogegrammeni, subscript dot or missing letter dot 111 | * as the last two are not part of fully-formed characters, will be used as combining diacritical marks 112 | * The simplified form is then: 113 | * [ ) or ( or + ] [ / or \ or = ] [ | ] 114 | * 115 | * This can be described by 5 accent flag bits (reverse order) 116 | * 117 | * 0 00 00 --- 0 00 00 no accent 118 | * | | | 119 | * | | ---- 01 psili, 10 dasia, 11 dialytika 120 | * | ------- 01 varia, 10 oxia, 11 perispomeni 121 | * ----------- 1 ypogegrammeni 122 | * 123 | * The resulting table of accentable characters will have 32-character rows 124 | * with the formed character codes in the appropriate positions, or zero: 125 | * plain, psili, dasia, dialytika, varia, psili-varia, dasia-varia, dialytika-varia 126 | * oxia, psili-oxia, dasia-oxia, dialytika-oxia, perispomeni, psili-perisp, dasia-perisp, dialytika-perisp 127 | * ditto with ypogegrammeni 128 | * entries for U370 oxia, dialytica, dialytica-oxia 129 | * 130 | * If zero is returned, combining diacritical marks should be generated from the accent flags. 131 | * 132 | * 15-May-2020 dm -- extra codes (offsets 0x20, 0x21, 0x22) added to the tables 133 | * for U0370 acute (oxia) and diaeresis (dialytika) representations 134 | */ 135 | unsigned int alpha[] = { 136 | 0x03b1, 0x1f00, 0x1f01, 0x0000, 0x1f70, 0x1f02, 0x1f03, 0x0000, 137 | 0x1f71, 0x1f04, 0x1f05, 0x0000, 0x1fb6, 0x1f06, 0x1f07, 0x0000, 138 | 0x1fb3, 0x1f80, 0x1f81, 0x0000, 0x1fb2, 0x1f82, 0x1f83, 0x0000, 139 | 0x1fb4, 0x1f84, 0x1f85, 0x0000, 0x1fb7, 0x1f86, 0x1f87, 0x0000, 140 | 0x03ac, 0x0000, 0x0000 141 | }; 142 | unsigned int Alpha[] = { 143 | 0x0391, 0x1f08, 0x1f09, 0x0000, 0x1fba, 0x1f0a, 0x1f0b, 0x0000, 144 | 0x1fbb, 0x1f0c, 0x1f0d, 0x0000, 0x0000, 0x1f0e, 0x1f0f, 0x0000, 145 | 0x1fbc, 0x1f88, 0x1f89, 0x0000, 0x0000, 0x1f8a, 0x1f8b, 0x0000, 146 | 0x0000, 0x1f8c, 0x1f8d, 0x0000, 0x0000, 0x1f8e, 0x1f8f, 0x0000, 147 | 0x0386, 0x0000, 0x0000 148 | }; 149 | unsigned int epsilon[] = { 150 | 0x03b5, 0x1f10, 0x1f11, 0x0000, 0x1f72, 0x1f12, 0x1f13, 0x0000, 151 | 0x1f73, 0x1f14, 0x1f15, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 152 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 153 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 154 | 0x03ad, 0x0000, 0x0000 155 | }; 156 | unsigned int Epsilon[] = { 157 | 0x0395, 0x1f18, 0x1f19, 0x0000, 0x1fc8, 0x1f1a, 0x1f1b, 0x0000, 158 | 0x1fc9, 0x1f1c, 0x1f1d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 159 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 160 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 161 | 0x0388, 0x0000, 0x0000 162 | }; 163 | unsigned int eta[] = { 164 | 0x03b7, 0x1f20, 0x1f21, 0x0000, 0x1f74, 0x1f22, 0x1f23, 0x0000, 165 | 0x1f75, 0x1f24, 0x1f25, 0x0000, 0x1fc6, 0x1f26, 0x1f27, 0x0000, 166 | 0x1fc3, 0x1f90, 0x1f91, 0x0000, 0x1fc2, 0x1f92, 0x1f93, 0x0000, 167 | 0x1fc4, 0x1f94, 0x1f95, 0x0000, 0x1fc7, 0x1f96, 0x1f97, 0x0000, 168 | 0x03ae, 0x0000, 0x0000 169 | }; 170 | unsigned int Eta[] = { 171 | 0x0397, 0x1f28, 0x1f29, 0x0000, 0x1fca, 0x1f2a, 0x1f2b, 0x0000, 172 | 0x1fcb, 0x1f2c, 0x1f2d, 0x0000, 0x0000, 0x1f2e, 0x1f2f, 0x0000, 173 | 0x1fcc, 0x1f98, 0x1f99, 0x0000, 0x0000, 0x1f9a, 0x1f9b, 0x0000, 174 | 0x0000, 0x1f9c, 0x1f9d, 0x0000, 0x0000, 0x1f9e, 0x1f9f, 0x0000, 175 | 0x0389, 0x0000, 0x0000 176 | }; 177 | unsigned int iota[] = { 178 | 0x03b9, 0x1f30, 0x1f31, 0x03ca, 0x1f76, 0x1f32, 0x1f33, 0x1fd2, 179 | 0x1f77, 0x1f34, 0x1f35, 0x1fd3, 0x1fd6, 0x1f36, 0x1f37, 0x1fd7, 180 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 181 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 182 | 0x03af, 0x03ca, 0x0390 183 | }; 184 | unsigned int Iota[] = { 185 | 0x0399, 0x1f38, 0x1f39, 0x03aa, 0x1fda, 0x1f3a, 0x1f3b, 0x0000, 186 | 0x1fdb, 0x1f3c, 0x1f3d, 0x0000, 0x0000, 0x1f3e, 0x1f3f, 0x0000, 187 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 188 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 189 | 0x038a, 0x03aa, 0x0000 190 | }; 191 | unsigned int omicron[] = { 192 | 0x03bf, 0x1f40, 0x1f41, 0x0000, 0x1f78, 0x1f42, 0x1f43, 0x0000, 193 | 0x1f79, 0x1f44, 0x1f45, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 194 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 195 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 196 | 0x03cc, 0x0000, 0x0000 197 | }; 198 | unsigned int Omicron[] = { 199 | 0x039f, 0x1f48, 0x1f49, 0x0000, 0x1ff8, 0x1f4a, 0x1f4b, 0x0000, 200 | 0x1ff9, 0x1f4c, 0x1f4d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 201 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 202 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 203 | 0x038c, 0x0000, 0x0000 204 | }; 205 | unsigned int ypsilon[] = { 206 | 0x03c5, 0x1f50, 0x1f51, 0x03cb, 0x1f7a, 0x1f52, 0x1f53, 0x1fe2, 207 | 0x1f7b, 0x1f54, 0x1f55, 0x1fe3, 0x1fe6, 0x1f56, 0x1f57, 0x1fe7, 208 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 209 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 210 | 0x03cd, 0x03cb, 0x03b0 211 | }; 212 | unsigned int Ypsilon[] = { 213 | 0x03a5, 0x0000, 0x1f59, 0x03ab, 0x1fea, 0x0000, 0x1f5b, 0x0000, 214 | 0x1feb, 0x0000, 0x1f5d, 0x0000, 0x0000, 0x0000, 0x1f5f, 0x0000, 215 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 216 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 217 | 0x038e, 0x03ab, 0x0000 218 | }; 219 | unsigned int omega[] = { 220 | 0x03c9, 0x1f60, 0x1f61, 0x0000, 0x1f7c, 0x1f62, 0x1f63, 0x0000, 221 | 0x1f7d, 0x1f64, 0x1f65, 0x0000, 0x1ff6, 0x1f66, 0x1f67, 0x0000, 222 | 0x1ff3, 0x1fa0, 0x1fa1, 0x0000, 0x1ff2, 0x1fa2, 0x1fa3, 0x0000, 223 | 0x1ff4, 0x1fa4, 0x1fa5, 0x0000, 0x1ff7, 0x1fa6, 0x1fa7, 0x0000, 224 | 0x03ce, 0x0000, 0x0000 225 | }; 226 | unsigned int Omega[] = { 227 | 0x03a9, 0x1f68, 0x1f69, 0x0000, 0x1ffa, 0x1f6a, 0x1f6b, 0x0000, 228 | 0x1ffb, 0x1f6c, 0x1f6d, 0x0000, 0x03a9, 0x1f6e, 0x1f6f, 0x0000, 229 | 0x1ffc, 0x1fa8, 0x1fa9, 0x0000, 0x0000, 0x1faa, 0x1fab, 0x0000, 230 | 0x0000, 0x1fac, 0x1fad, 0x0000, 0x0000, 0x1fae, 0x1faf, 0x0000, 231 | 0x038f, 0x0000, 0x0000 232 | }; 233 | unsigned int rho[] = { 234 | 0x03c1, 0x1fe4, 0x1fe5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 235 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 236 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 237 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 238 | 0x0000, 0x0000, 0x0000 239 | }; 240 | unsigned int Rho[] = { 241 | 0x03a1, 0x0000, 0x1fec, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 242 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 243 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 244 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 245 | 0x0000, 0x0000, 0x0000 246 | }; 247 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # About 2 | 3 | This application was written Dimitri Marinakis. It is hosted here to allow for better integration with the CLTK. 4 | 5 | [Project homepage](http://tlgu.carmen.gr/). 6 | 7 | # Manual 8 | ## Name 9 | tlgu − convert beta code TLG and PHI CD-ROM txt files to Unicode 10 | 11 | ## Synopsis 12 | tlgu [ options ] input_file [ output_file ] 13 | 14 | 15 | ## Description 16 | tlgu will convert an input_file from Thesaurus Linguae Graeca (TLG) and Packard Humanities Institute (PHI) representation to a Unicode (UTF-8) output_file. If output_file is not specified, the Unicode text is directed to standard output. The TLG/PHI representation consists of beta-code text and citation information. 17 | OPTIONS 18 | 19 | 20 | −b 21 | 22 | 23 | inserts a form feed and citation information (levels a, b, c, d) on every "book" citation change. By default the program will output line feeds only (see also −p). 24 | 25 | 26 | −p 27 | 28 | 29 | observes paging instructions. By default the program will output line feeds only. 30 | 31 | 32 | −r 33 | 34 | 35 | primarily Roman text (PHI). Some TLG texts, notably doccan1.txt and doccan2.txt are mostly roman texts lacking explicit language change codes. Setting this option will force a change to roman text after each citation block is encountered. 36 | 37 | 38 | −v 39 | 40 | 41 | highest-level reference citation is included before each text line (v-level) 42 | 43 | 44 | −w 45 | 46 | 47 | reference citation is included before each text line (w-level) 48 | 49 | 50 | −x 51 | 52 | 53 | reference citation is included before each text line (x-level) 54 | 55 | 56 | −y 57 | 58 | 59 | reference citation is included before each text line (y-level) 60 | 61 | 62 | −z 63 | 64 | 65 | lowest-level reference citation is included before each text line (z-level). 66 | 67 | 68 | −Z 69 | 70 | 71 | an arbitrary combination of citation information is included before each text line; see also -e option e.g. "%A/%B/%x/%y/%z\t" will output the contents of the A, B citation description levels, followed by x, y, z citation reference levels, followed by a TAB character. 72 | 73 | 74 | −e 75 | 76 | 77 | if there is no citation information for a citation level defined with the -Z option above, a single right-hand slash is substituted by default; you may define any string with this option e.g. "-" or "[NONE]" are valid inputs 78 | 79 | 80 | −B 81 | 82 | 83 | inserts blank space (a tab) before each and every line. 84 | 85 | 86 | −X 87 | 88 | 89 | compact format; v, w, x citations are inserted as they change at the beginning of each section. 90 | 91 | 92 | −Y 93 | 94 | 95 | compact format; w, x, y citations are inserted as they change at the beginning of each section. 96 | 97 | 98 | −N 99 | 100 | 101 | no spaces; line ends and hyphens before an ID code are removed while hyphens and spaces before page and column ends are (still) retained. 102 | 103 | 104 | −C 105 | 106 | 107 | citation debug information is output. 108 | 109 | 110 | −S 111 | 112 | 113 | special code debug information is output. 114 | 115 | 116 | −V 117 | 118 | 119 | block processing information is output (verbose). 120 | 121 | 122 | −W 123 | 124 | 125 | each work (book) is output as a separate file in the form output_file-xxx.txt; if an output file is not specified, this option has no effect. 126 | 127 | 128 | ## History and intended use 129 | The purpose of tlgu is to translate binary TLG/PHI-format files into readable and editable text. It is based on an earlier program written in 80x86 assembly language (1996) outputting codes for a home-made font which used the prevalent hellenic font encodings of that time complemented by dead accent characters - not very attractive, but readable. 130 | 131 | Then came Unicode and a plethora of accented character glyphs; Polytonic fonts are already available (Cardo, Gentium, Athena, Athenian, Porson); new fonts are being created and older fonts are being expanded as special-use code points are included in the Unicode definition (musical symbols, other special symbols). A notable effort since this note was originally drafted is that of the Greek Font Society, now featuring a great, and expanding, selection of open polytonic fonts. 132 | 133 | So, at this point in time, tlgu will crunch a file which has been formatted according to the published TLG/PHI format and produce codes for most glyphs generally available. No attempt has been made to introduce multi-character sequences or formatting codes (font changes). If a code has not been defined, the program will output the respective "code family" glyph. You may use the −S option to check such codes against the published beta code definition. 134 | 135 | July 2005 - Troy A. Griffitts (scribe, crosswire org) contributed the arbitrary citation output code and added per-line processing of the input file. 136 | 137 | April 2006 - Final sigma will now be output at end-of-line (!) from free-form input text (thank you Jan). 138 | 139 | October 2011 - stdout is used if output_file is not specified. 140 | 141 | November 2011 - citations (v, w, x) at the start of section changes (e-book option) 142 | 143 | ## Examples 144 | ./tlgu -r DOCCAN2.TXT doccanu.txt Translate the TLG canon to a unicode text file. Note the use of the -r option (this file expects Roman as the default font). 145 | 146 | 147 | ./tlgu -x -y -z TLG1799.TXT tlg1799u.txt 148 | 149 | 150 | Generate a continuous file with the texts of granpa Euclides. Available citations (-x -y -z) are Book//demonstratio/line as shown in the respective "cit" field of doccan2.txt. 151 | 152 | 153 | ./tlgu -b -B TLG1799.TXT tlg1799u.txt 154 | 155 | 156 | Generate the same texts, this time with a page feed and book citation information on the first page of each book and a tab before each line (use with OOo versions earlier than 1.1.4). 157 | 158 | 159 | ./tlgu -C TLG1799.TXT tlg1799u.txt 160 | 161 | 162 | See how the citation information changes within each TLG block. 163 | 164 | 165 | ./tlgu -S TLG1799.TXT tlg1799u.txt | sort > symbols1799.txt 166 | 167 | 168 | Check out the symbols used in a work. Book and x, y, z references are printed on a separate line for each symbol. Sort / grep the output to locate specific symbols of interest; save in a file for later use. 169 | 170 | 171 | ./tlgu -W TLG0006.TXT tlg0006u 172 | 173 | 174 | Will produce separate files for each work, named tlg006u-001.txt etc. 175 | 176 | 177 | ./tlgu -Z "%A/%B/%D/%c/%d/%Z/%x/%y/%z\t" -e "-" chr0010.txt chr0010u.txt 178 | 179 | 180 | Will generate a file with citation description (A, B, D, Z) and citation reference (c, d, x, y, z) levels, separated by "/" followed by a TAB character and the respective text. Blank citation elements will be filled with a single "-" e.g. Asia/Smyrna/1222-1223 ac/IGChAs/Asia Min [Chr]/88/-/2A/7p1 [TAB] inscription text etc. 181 | 182 | 183 | ./tlgu -r -N -X LAT0448.TXT LAT0448.xx.TXT 184 | 185 | 186 | will produce a compact version of the Gaius Iulius Caesar texts with v and x citations printed as they change; similarly, ./tlgu -r -N -Y LAT2150.TXT LAT2150.yy.TXT will produce a compact version of Zeno’s texts. 187 | 188 | ## Post-processing examples 189 | I use the OpenOffice/LibreOffice suite for most of my work. This example shows one of many possible ways of using the search and replace facility to create a readable version of the Suda lexicon. 190 | 191 | 192 | ./tlgu -B TLG4085.TXT tlg4085u.txt 193 | 194 | 195 | A Unicode file with the text is created 196 | 197 | 198 | Open the generated file with Openoffice/LibreOffice: 199 | 200 | 201 | File | Open | Filename: tlg4085u.txt, File Type: Text Encoded −− Press Open 202 | 203 | The ASCII Filter Options window appears. Select the Unicode (UTF-8) character set and a proper Unicode font installed in your machine (e.g. Cardo). Press OK. 204 | 205 | 206 | Replace angle brackets with expanded text 207 | 208 | 209 | Lexicon terms are enclosed in . The actual beta codes indicate the use of expanded text for emphasis. Select Edit | Find & Replace. The Find & Replace window appears. 210 | 211 | In the Search For field, type the following expression: <[^<>]*> This means "find any characters between angle brackets, not including angle brackets". 212 | 213 | In the Replace With window insert a single ampersand: & This means that we need to add formatting information (this case) or additional text to the text found. Press More Options, Format... and select the Position tab; select Spacing Expanded by 2.0 points. Press OK. 214 | 215 | Check the Regular Expressions box and press Replace All. 216 | 217 | You may now replace the angle brackets with nothings. 218 | 219 | Repeat the above procedure for titles enclosed in {braces}. Write a macro... 220 | 221 | 222 | Other useful information 223 | 224 | 225 | If you are using your wordprocessor with a locale setting other than Hellenic (el_GR), the following invocation with the desired character classification may prove useful for the occasional polytonic editing: 226 | 227 | LC_CTYPE=el_GR.UTF-8 /usr/bin/soffice (or/opt/libreoffice3.4/program/soffice ). 228 | 229 | I put my default locale and keyboard definitions in my .bashrc or .profile: 230 | 231 | export LC_CTYPE=el_GR.UTF-8 232 | setxkbmap us,el ,polytonic -option grp:ctrl_shift_toggle -option grp_led:scroll 233 | 234 | This way multi-lingual text can be entered; keyboard layout switching is done by pressing Ctrl/Shift; alternate keyboard layout is indicated by the Scroll Lock light on the keyboard. 235 | 236 | ## Further development 237 | You may not like the character output for a specific code. Check out the tlgcodes.h file containing the special symbol and punctuation codes and select one to suit you better. It will probably be a while before the beta to Unicode correspondence settles down. 238 | 239 | Drop me a line, if you need a new feature; let me know if you do find an interesting applications that others can profit from. 240 | REFERENCES 241 | 242 | 243 | There are several texts describing the internal representation of PHI and TLG text, ID data, citation data and index files. The originator of this format is the Packard Humanities Institute. The TLG is maintained by UCI − see www.tlg.uci.edu − where you may find the latest versions of the TLG Beta Code Manual and the TLG Beta Code Quick Reference Guide. 244 | 245 | Unicode consortium (www.unicode.org) publications pertaining to the codification of characters used in Hellenic literature, scientific and musical texts. 246 | 247 | The OpenOffice/Libreoffice suite in its various editions (www.openoffice.org - apache.org, www.libreoffice.org, www.neooffice.org) includes a word processor that you can use to load, process and create new polytonic texts. 248 | 249 | Greek Font Society: www.greekfontsociety.gr 250 | 251 | 252 | ## Copyright 253 | Copyright (C) 2004, 2005, 2011 Dimitri Marinakis (dm, ssa gr). 254 | 255 | This file is part of tlgu which is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License (version 2) as published by the Free Software Foundation. 256 | 257 | tlgu is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 258 | 259 | You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 260 | -------------------------------------------------------------------------------- /tlgu.1: -------------------------------------------------------------------------------- 1 | .\" Copyright (C) 2004, 2005, 2011, 2013, 2020 Dimitri Marinakis (dm, ssa gr). 2 | .\" 3 | .\" This file is part of tlgu which is free software; you can redistribute it and/or modify 4 | .\" it under the terms of the GNU General Public License (version 2) 5 | .\" as published by the Free Software Foundation. 6 | .\" 7 | .\" tlgu is distributed in the hope that it will be useful, 8 | .\" but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | .\" GNU General Public License for more details. 11 | .\" 12 | .\" You should have received a copy of the GNU General Public License 13 | .\" along with GNU Emacs; see the file COPYING. If not, write to the 14 | .\" Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, 15 | .\" Boston, MA 02110-1301 USA. 16 | .\" 17 | .TH tlgu 1 "27-July-2021" "Version 1.9" "TLG to Unicode Converter" 18 | .SH NAME 19 | 20 | .B tlgu 21 | \- convert beta code TLG and PHI CD-ROM txt files to Unicode 22 | 23 | .SH SYNOPSIS 24 | .B tlgu 25 | [ 26 | .I options 27 | ] [ 28 | .I input_file 29 | ] [ 30 | .I output_file 31 | ] 32 | 33 | .SH DESCRIPTION 34 | .B tlgu 35 | will convert an \fIinput_file\fP from Thesaurus Linguae Graeca (TLG) and Packard Humanities Institute (PHI) representation 36 | to a Unicode (UTF-8) \fIoutput_file\fP. 37 | If \fIinput_file\fP is not specified, standard input will be read. 38 | If \fIoutput_file\fP is not specified, the Unicode text is directed to standard output. 39 | The TLG/PHI representation consists of \fBbeta-code\fP text and \fBcitation\fP information. 40 | 41 | .SH OPTIONS 42 | .TP 43 | .B \-b 44 | inserts a form feed and citation information (levels a, b, c, d) on every "book" citation 45 | change. By default the program will output line feeds only (see also \fB\-p\fP). 46 | .TP 47 | .B \-p 48 | observes paging instructions. 49 | By default the program will output line feeds only. 50 | .TP 51 | .B \-r 52 | primarily Roman text (PHI). Some TLG texts, notably doccan1.txt and doccan2.txt are mostly 53 | roman texts lacking explicit language change codes. Setting this option will force 54 | a change to roman text after each citation block is encountered. 55 | .TP 56 | .B \-v 57 | highest-level reference citation is included before each text line (v-level) 58 | .TP 59 | .B \-w 60 | reference citation is included before each text line (w-level) 61 | .TP 62 | .B \-x 63 | reference citation is included before each text line (x-level) 64 | .TP 65 | .B \-y 66 | reference citation is included before each text line (y-level) 67 | .TP 68 | .B \-z 69 | lowest-level reference citation is included before each text line (z-level). 70 | .TP 71 | .B \-Z 72 | an arbitrary combination of citation information is included before each text line; 73 | see also -e option e.g. "%A/%B/%x/%y/%z\\t" will output the contents of the 74 | A, B \fBcitation description\fP levels, followed by x, y, z \fBcitation reference\fP levels, 75 | followed by a TAB character. 76 | .TP 77 | .B \-e 78 | if there is no citation information for a citation level defined with the -Z option above, 79 | a single right-hand slash is substituted by default; you may define any string with this option 80 | e.g. "-" or "[NONE]" are valid inputs 81 | .sp 1 82 | .TP 83 | .B \-B 84 | inserts blank space (a tab) before each and every line. 85 | .TP 86 | .B \-X 87 | compact format; v, w, x citations are inserted as they change at the beginning of each section. 88 | .TP 89 | .B \-Y 90 | compact format; w, x, y citations are inserted as they change at the beginning of each section. 91 | .TP 92 | .B \-N 93 | no spaces; line ends and hyphens before an ID code are removed while hyphens and spaces before page 94 | and column ends are (still) retained. 95 | .sp 1 96 | .TP 97 | .B \-C 98 | citation debug information is output. 99 | .TP 100 | .B \-S 101 | special code debug information is output. 102 | .TP 103 | .B \-V 104 | block processing information is output (verbose). 105 | .TP 106 | .B \-U 107 | vowels with acute accent are output using the Unicode 0x0370 codes rather than the 0x1F00 ones for compatibility with most current (as of 2020) keyboard encoders. 108 | .TP 109 | .B \-W 110 | each work (book) is output as a separate file in the form output_file-xxx.txt; 111 | if an output file is not specified, this option has no effect. 112 | 113 | .SH HISTORY AND INTENDED USE 114 | The purpose of \fBtlgu\fP is to translate binary TLG/PHI-format files into readable and editable text. 115 | It is based on an earlier program written in 80x86 assembly language (1996) outputting codes for 116 | a home-made font which used the prevalent hellenic font encodings of that time complemented 117 | by dead accent characters - not very attractive, but readable. 118 | .sp 1 119 | Then came Unicode and a plethora of accented character glyphs; 120 | Polytonic fonts are already available (Cardo, Gentium, Athena, Athenian, Porson); new fonts 121 | are being created and older fonts are being expanded as special-use code points are included 122 | in the Unicode definition (musical symbols, other special symbols). 123 | A notable effort since this note was originally drafted is that of the Greek Font Society, 124 | now featuring a great, and expanding, selection of open polytonic fonts. 125 | .sp 1 126 | So, at this point in time, \fBtlgu\fP will crunch a file which has been formatted 127 | according to the published TLG/PHI format and produce codes for most glyphs 128 | generally available. No attempt has been made to introduce multi-character sequences 129 | or formatting codes (font changes). If a code has not been defined, the program will output 130 | the respective "code family" glyph. You may use the \fB\-S\fP option to check such codes 131 | against the published beta code definition. 132 | .sp 1 133 | July 2005 - Troy A. Griffitts (scribe, crosswire org) contributed the arbitrary citation output code and added per-line processing of the input file. 134 | .sp 1 135 | April 2006 - Final sigma will now be output at end-of-line (!) from free-form input text (thank you Jan). 136 | .sp 1 137 | October 2011 - stdout is used if output_file is not specified. 138 | .sp 1 139 | November 2011 - citations (v, w, x) at the start of section changes (e-book option) 140 | .sp 1 141 | May 2012 - Nick White (nick white, durham ac uk) revised the input arguments to use tlgu as a filter; stdin is used if input_file is not specified 142 | .sp 1 143 | May 2020 - Alternate output codes for vowels with acute accent (-U option) 144 | .sp 1 145 | July 2021 - Corrections to citation code 146 | .SH EXAMPLES 147 | .B ./tlgu -r DOCCAN2.TXT doccanu.txt 148 | Translate the TLG canon to a unicode text file. Note the use of the \fB-r\fP option (this file 149 | expects Roman as the default font). 150 | .TP 151 | .B ./tlgu -x -y -z TLG1799.TXT tlg1799u.txt 152 | Generate a continuous file with the texts of granpa Euclides. Available citations (-x -y -z) 153 | are Book//demonstratio/line as shown in the respective "cit" field of doccan2.txt. 154 | .TP 155 | .B ./tlgu -b -B TLG1799.TXT tlg1799u.txt 156 | Generate the same texts, this time with a page feed and book citation information on the first 157 | page of each book and a tab before each line (use with OOo versions earlier than 1.1.4). 158 | .TP 159 | .B ./tlgu -C TLG1799.TXT tlg1799u.txt 160 | See how the citation information changes within each TLG block. 161 | .TP 162 | .B ./tlgu -S TLG1799.TXT tlg1799u.txt | sort > symbols1799.txt 163 | Check out the symbols used in a work. Book and x, y, z references are printed on a separate 164 | line for each symbol. Sort / grep the output to locate specific symbols of interest; save in 165 | a file for later use. 166 | .TP 167 | .B ./tlgu -W TLG0006.TXT tlg0006u 168 | Will produce separate files for each work, named tlg006u-001.txt etc. 169 | .TP 170 | .B ./tlgu -Z \N'34'%A/%B/%D/%c/%d/%Z/%x/%y/%z\et\N'34' -e \N'34'-\N'34' chr0010.txt chr0010u.txt 171 | Will generate a file with citation description (A, B, D, Z) and citation reference (c, d, x, y, z) 172 | levels, separated by "/" followed by a TAB character and the respective text. 173 | Blank citation elements will be filled with a single "-" 174 | e.g. Asia/Smyrna/1222-1223 ac/IGChAs/Asia Min [Chr]/88/-/2A/7p1 [TAB] inscription text etc. 175 | .TP 176 | .B ./tlgu -r -N -X LAT0448.TXT LAT0448.xx.TXT 177 | will produce a compact version of the Gaius Iulius Caesar texts with v and x citations printed 178 | as they change; similarly, 179 | .B ./tlgu -r -N -Y LAT2150.TXT LAT2150.yy.TXT 180 | will produce a compact version of Zeno's texts. 181 | .SH POST-PROCESSING EXAMPLES 182 | I use the OpenOffice/LibreOffice suite for most of my work. This example shows one of many possible 183 | ways of using the search and replace facility to create a readable version of the Suda lexicon. 184 | .TP 185 | .B ./tlgu -B TLG4085.TXT tlg4085u.txt 186 | A Unicode file with the text is created 187 | .TP 188 | .B Open the generated file with Openoffice/LibreOffice: 189 | File | Open | Filename: tlg4085u.txt, 190 | File Type: Text Encoded \-\- Press Open 191 | .sp 1 192 | The ASCII Filter Options window appears. Select the Unicode (UTF-8) character set and 193 | a proper Unicode font installed in your machine (e.g. Cardo). Press OK. 194 | .TP 195 | .B Replace angle brackets with expanded text 196 | Lexicon terms are enclosed in . The actual beta codes indicate the use of 197 | expanded text for emphasis. Select Edit | Find & Replace. The \fBFind & Replace\fP window appears. 198 | .sp 1 199 | In the \fBSearch For\fP field, type the following expression: \fB<[^<>]*>\fP 200 | This means "find any characters between angle brackets, not including angle brackets". 201 | .sp 1 202 | In the \fBReplace With\fP window insert a single ampersand: \fB&\fP 203 | This means that we need to \fBadd\fP formatting information (this case) or additional text to 204 | the text found. Press \fBMore Options\fP, \fBFormat...\fP and select the \fBPosition\fP tab; select Spacing 205 | Expanded by 2.0 points. Press OK. 206 | .sp 1 207 | Check the \fBRegular Expressions\fP box and press \fBReplace All\fP. 208 | .sp 1 209 | You may now replace the angle brackets with nothings. 210 | .sp 1 211 | Repeat the above procedure for titles enclosed in {braces}. Write a macro... 212 | .TP 213 | .B Other useful information 214 | If you are using your wordprocessor with a locale setting other than Hellenic (el_GR), the following 215 | invocation with the desired character classification may prove useful for the occasional polytonic editing: 216 | .br 217 | .sp 1 218 | \fBLC_CTYPE=el_GR.UTF-8 /usr/bin/soffice\fP (or \fB/opt/libreoffice3.4/program/soffice\fP ). 219 | .br 220 | .sp 1 221 | I put my default locale and keyboard definitions in my \fB.bashrc\fP or \fB.profile\fP: 222 | .br 223 | .sp 1 224 | .na 225 | .B export LC_CTYPE=el_GR.UTF-8 226 | .br 227 | .na 228 | .B setxkbmap us,el ,polytonic -option grp:ctrl_shift_toggle -option grp_led:scroll 229 | .br 230 | .sp 1 231 | This way multi-lingual text can be entered; keyboard layout switching is done by pressing Ctrl/Shift; 232 | alternate keyboard layout is indicated by the Scroll Lock light on the keyboard. 233 | 234 | .SH FURTHER DEVELOPMENT 235 | You may not like the character output for a specific code. Check out the \fBtlgcodes.h\fP file 236 | containing the special symbol and punctuation codes and select one to suit you better. It will 237 | probably be a while before the beta to Unicode correspondence settles down. 238 | .sp 1 239 | Drop me a line, if you need a new feature; let me know if you do find 240 | an interesting applications that others can profit from. 241 | 242 | .SH REFERENCES 243 | There are several texts describing the internal representation of \fBPHI\fP and 244 | \fBTLG\fP text, ID data, citation data and index files. The originator of this 245 | format is the Packard Humanities Institute. The TLG is maintained by UCI \- see 246 | \fBwww.tlg.uci.edu\fP \- where you may find the latest versions of the \fBTLG Beta Code Manual\fP and the 247 | \fBTLG Beta Code Quick Reference Guide\fP. 248 | .sp 1 249 | Unicode consortium (\fBwww.unicode.org\fP) publications pertaining to the codification 250 | of characters used in Hellenic literature, scientific and musical texts. 251 | .sp 1 252 | The OpenOffice/Libreoffice suite in its various editions 253 | (\fBwww.openoffice.org\fP - apache.org, \fBwww.libreoffice.org\fP, \fBwww.neooffice.org\fP) 254 | includes a word processor that you can use to load, process and create new polytonic texts. 255 | .sp 1 256 | Greek Font Society: \fBwww.greekfontsociety.gr\fP 257 | 258 | .SH COPYRIGHT 259 | Copyright (C) 2004, 2005, 2011, 2013, 2020, 2021 Dimitri Marinakis (dm, ssa gr). 260 | 261 | This file is part of tlgu which is free software; you can redistribute it and/or modify 262 | it under the terms of the GNU General Public License (version 2) as published by 263 | the Free Software Foundation. 264 | 265 | tlgu is distributed in the hope that it will be useful, 266 | but WITHOUT ANY WARRANTY; without even the implied warranty of 267 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 268 | GNU General Public License for more details. 269 | 270 | You should have received a copy of the GNU General Public License 271 | along with this program; if not, write to the Free Software 272 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 273 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc. 5 | 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Library General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License 307 | along with this program; if not, write to the Free Software 308 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 309 | 310 | 311 | Also add information on how to contact you by electronic and paper mail. 312 | 313 | If the program is interactive, make it output a short notice like this 314 | when it starts in an interactive mode: 315 | 316 | Gnomovision version 69, Copyright (C) year name of author 317 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 318 | This is free software, and you are welcome to redistribute it 319 | under certain conditions; type `show c' for details. 320 | 321 | The hypothetical commands `show w' and `show c' should show the appropriate 322 | parts of the General Public License. Of course, the commands you use may 323 | be called something other than `show w' and `show c'; they could even be 324 | mouse-clicks or menu items--whatever suits your program. 325 | 326 | You should also get your employer (if you work as a programmer) or your 327 | school, if any, to sign a "copyright disclaimer" for the program, if 328 | necessary. Here is a sample; alter the names: 329 | 330 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 331 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 332 | 333 | , 1 April 1989 334 | Ty Coon, President of Vice 335 | 336 | This General Public License does not permit incorporating your program into 337 | proprietary programs. If your program is a subroutine library, you may 338 | consider it more useful to permit linking proprietary applications with the 339 | library. If this is what you want to do, use the GNU Library General 340 | Public License instead of this License. 341 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /tlgu.1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 16 | tlgu 17 | 18 | 19 | 20 | 21 |

tlgu

22 | 23 | NAME
24 | SYNOPSIS
25 | DESCRIPTION
26 | OPTIONS
27 | HISTORY AND INTENDED USE
28 | EXAMPLES
29 | POST-PROCESSING EXAMPLES
30 | FURTHER DEVELOPMENT
31 | REFERENCES
32 | COPYRIGHT
33 | 34 |
35 | 36 | 37 |

NAME 38 | 39 |

40 | 41 | 42 |

tlgu 43 | − convert beta code TLG and PHI CD-ROM txt files to 44 | Unicode

45 | 46 |

SYNOPSIS 47 | 48 |

49 | 50 | 51 |

tlgu [ 52 | options ] [ input_file ] [ output_file 53 | ]

54 | 55 |

DESCRIPTION 56 | 57 |

58 | 59 | 60 |

tlgu 61 | will convert an input_file from Thesaurus Linguae 62 | Graeca (TLG) and Packard Humanities Institute (PHI) 63 | representation to a Unicode (UTF-8) output_file. If 64 | input_file is not specified, standard input will be 65 | read. If output_file is not specified, the Unicode 66 | text is directed to standard output. The TLG/PHI 67 | representation consists of beta-code text and 68 | citation information.

69 | 70 |

OPTIONS 71 | 72 |

73 | 74 | 75 | 77 | 78 | 79 | 83 | 84 | 91 | 92 | 93 | 97 | 98 | 103 | 104 | 105 | 109 | 110 | 118 | 119 | 120 | 124 | 125 | 130 | 131 | 132 | 136 | 137 | 142 | 143 | 144 | 148 | 149 | 154 | 155 | 156 | 160 | 161 | 166 | 167 | 168 | 172 | 173 | 178 |
80 | 81 | 82 |

−b

85 | 86 | 87 |

inserts a form feed and citation 88 | information (levels a, b, c, d) on every "book" 89 | citation change. By default the program will output line 90 | feeds only (see also −p).

94 | 95 | 96 |

−p

99 | 100 | 101 |

observes paging instructions. By default the program 102 | will output line feeds only.

106 | 107 | 108 |

−r

111 | 112 | 113 |

primarily Roman text (PHI). Some TLG texts, notably 114 | doccan1.txt and doccan2.txt are mostly roman texts lacking 115 | explicit language change codes. Setting this option will 116 | force a change to roman text after each citation block is 117 | encountered.

121 | 122 | 123 |

−v

126 | 127 | 128 |

highest-level reference citation is included before each 129 | text line (v-level)

133 | 134 | 135 |

−w

138 | 139 | 140 |

reference citation is included before each text line 141 | (w-level)

145 | 146 | 147 |

−x

150 | 151 | 152 |

reference citation is included before each text line 153 | (x-level)

157 | 158 | 159 |

−y

162 | 163 | 164 |

reference citation is included before each text line 165 | (y-level)

169 | 170 | 171 |

−z

174 | 175 | 176 |

lowest-level reference citation is included before each 177 | text line (z-level).

179 | 180 |

−Z 181 | <custom_citation_format_string>

182 | 183 |

an arbitrary combination of 184 | citation information is included before each text line; see 185 | also -e option e.g. "%A/%B/%x/%y/%z\t" will output 186 | the contents of the A, B citation description levels, 187 | followed by x, y, z citation reference levels, 188 | followed by a TAB character.

189 | 190 |

−e 191 | <custom_blank_citation_string>

192 | 193 |

if there is no citation 194 | information for a citation level defined with the -Z option 195 | above, a single right-hand slash is substituted by default; 196 | you may define any string with this option e.g. 197 | "-" or "[NONE]" are valid inputs

198 | 199 | 201 | 202 | 203 | 207 | 208 | 213 | 214 | 215 | 219 | 220 | 225 | 226 | 227 | 231 | 232 | 237 | 238 | 239 | 243 | 244 | 250 | 251 | 252 | 256 | 257 | 261 | 262 | 263 | 267 | 268 | 272 | 273 | 274 | 278 | 279 | 283 | 284 | 285 | 289 | 290 | 296 | 297 | 298 | 302 | 303 | 309 |
204 | 205 | 206 |

−B

209 | 210 | 211 |

inserts blank space (a tab) before each and every 212 | line.

216 | 217 | 218 |

−X

221 | 222 | 223 |

compact format; v, w, x citations are inserted as they 224 | change at the beginning of each section.

228 | 229 | 230 |

−Y

233 | 234 | 235 |

compact format; w, x, y citations are inserted as they 236 | change at the beginning of each section.

240 | 241 | 242 |

−N

245 | 246 | 247 |

no spaces; line ends and hyphens before an ID code are 248 | removed while hyphens and spaces before page and column ends 249 | are (still) retained.

253 | 254 | 255 |

−C

258 | 259 | 260 |

citation debug information is output.

264 | 265 | 266 |

−S

269 | 270 | 271 |

special code debug information is output.

275 | 276 | 277 |

−V

280 | 281 | 282 |

block processing information is output (verbose).

286 | 287 | 288 |

−U

291 | 292 | 293 |

vowels with acute accent are output using the Unicode 294 | 0x0370 codes rather than the 0x1F00 ones for compatibility 295 | with most current (as of 2020) keyboard encoders.

299 | 300 | 301 |

−W

304 | 305 | 306 |

each work (book) is output as a separate file in the 307 | form output_file-xxx.txt; if an output file is not 308 | specified, this option has no effect.

310 | 311 |

HISTORY AND INTENDED USE 312 | 313 |

314 | 315 | 316 |

The purpose of 317 | tlgu is to translate binary TLG/PHI-format files into 318 | readable and editable text. It is based on an earlier 319 | program written in 80x86 assembly language (1996) outputting 320 | codes for a home-made font which used the prevalent hellenic 321 | font encodings of that time complemented by dead accent 322 | characters - not very attractive, but readable.

323 | 324 |

Then came 325 | Unicode and a plethora of accented character glyphs; 326 | Polytonic fonts are already available (Cardo, Gentium, 327 | Athena, Athenian, Porson); new fonts are being created and 328 | older fonts are being expanded as special-use code points 329 | are included in the Unicode definition (musical symbols, 330 | other special symbols). A notable effort since this note was 331 | originally drafted is that of the Greek Font Society, now 332 | featuring a great, and expanding, selection of open 333 | polytonic fonts.

334 | 335 |

So, at this 336 | point in time, tlgu will crunch a file which has been 337 | formatted according to the published TLG/PHI format and 338 | produce codes for most glyphs generally available. No 339 | attempt has been made to introduce multi-character sequences 340 | or formatting codes (font changes). If a code has not been 341 | defined, the program will output the respective "code 342 | family" glyph. You may use the −S option 343 | to check such codes against the published beta code 344 | definition.

345 | 346 |

July 2005 - 347 | Troy A. Griffitts (scribe, crosswire org) contributed the 348 | arbitrary citation output code and added per-line processing 349 | of the input file.

350 | 351 |

April 2006 - 352 | Final sigma will now be output at end-of-line (!) from 353 | free-form input text (thank you Jan).

354 | 355 |

October 2011 - 356 | stdout is used if output_file is not specified.

357 | 358 |

November 2011 - 359 | citations (v, w, x) at the start of section changes (e-book 360 | option)

361 | 362 |

May 2012 - Nick 363 | White (nick white, durham ac uk) revised the input arguments 364 | to use tlgu as a filter; stdin is used if input_file is not 365 | specified

366 | 367 |

May 2020 - 368 | Alternate output codes for vowels with acute accent (-U 369 | option)

370 | 371 |

July 2021 - 372 | Corrections to citation code

373 | 374 |

EXAMPLES 375 | 376 |

377 | 378 | 379 |

./tlgu -r 380 | DOCCAN2.TXT doccanu.txt Translate the TLG canon to a 381 | unicode text file. Note the use of the -r option 382 | (this file expects Roman as the default font).
383 | ./tlgu -x -y -z TLG1799.TXT tlg1799u.txt

384 | 385 |

Generate a continuous file with 386 | the texts of granpa Euclides. Available citations (-x -y -z) 387 | are Book//demonstratio/line as shown in the respective 388 | "cit" field of doccan2.txt.

389 | 390 |

./tlgu -b -B TLG1799.TXT 391 | tlg1799u.txt

392 | 393 |

Generate the same texts, this 394 | time with a page feed and book citation information on the 395 | first page of each book and a tab before each line (use with 396 | OOo versions earlier than 1.1.4).

397 | 398 |

./tlgu -C TLG1799.TXT 399 | tlg1799u.txt

400 | 401 |

See how the citation 402 | information changes within each TLG block.

403 | 404 |

./tlgu -S TLG1799.TXT 405 | tlg1799u.txt | sort > symbols1799.txt

406 | 407 |

Check out the symbols used in a 408 | work. Book and x, y, z references are printed on a separate 409 | line for each symbol. Sort / grep the output to locate 410 | specific symbols of interest; save in a file for later 411 | use.

412 | 413 |

./tlgu -W TLG0006.TXT 414 | tlg0006u

415 | 416 |

Will produce separate files for 417 | each work, named tlg006u-001.txt etc.

418 | 419 |

./tlgu -Z 420 | "%A/%B/%D/%c/%d/%Z/%x/%y/%z\t" -e "-" 421 | chr0010.txt
422 | chr0010u.txt

423 | 424 |

Will generate a file with 425 | citation description (A, B, D, Z) and citation reference (c, 426 | d, x, y, z) levels, separated by "/" followed by a 427 | TAB character and the respective text. Blank citation 428 | elements will be filled with a single "-" e.g. 429 | Asia/Smyrna/1222-1223 ac/IGChAs/Asia Min [Chr]/88/-/2A/7p1 430 | [TAB] inscription text etc.

431 | 432 |

./tlgu -r -N -X LAT0448.TXT 433 | LAT0448.xx.TXT

434 | 435 |

will produce a compact version 436 | of the Gaius Iulius Caesar texts with v and x citations 437 | printed as they change; similarly, ./tlgu -r -N -Y 438 | LAT2150.TXT LAT2150.yy.TXT will produce a compact 439 | version of Zeno’s texts.

440 | 441 |

POST-PROCESSING EXAMPLES 442 | 443 |

444 | 445 | 446 |

I use the 447 | OpenOffice/LibreOffice suite for most of my work. This 448 | example shows one of many possible ways of using the search 449 | and replace facility to create a readable version of the 450 | Suda lexicon.
451 | ./tlgu -B TLG4085.TXT tlg4085u.txt

452 | 453 |

A Unicode file with the text is 454 | created

455 | 456 |

Open the generated file with 457 | Openoffice/LibreOffice:

458 | 459 |

File | Open | Filename: 460 | tlg4085u.txt, File Type: Text Encoded −− Press 461 | Open

462 | 463 |

The ASCII 464 | Filter Options window appears. Select the Unicode (UTF-8) 465 | character set and a proper Unicode font installed in your 466 | machine (e.g. Cardo). Press OK.

467 | 468 |

Replace angle brackets with 469 | expanded text

470 | 471 |

Lexicon terms are enclosed in 472 | <angle brackets>. The actual beta codes indicate the 473 | use of expanded text for emphasis. Select Edit | Find & 474 | Replace. The Find & Replace window appears.

475 | 476 |

In the 477 | Search For field, type the following expression: 478 | <[^<>]*> This means "find any 479 | characters between angle brackets, not including angle 480 | brackets".

481 | 482 |

In the 483 | Replace With window insert a single ampersand: 484 | & This means that we need to add 485 | formatting information (this case) or additional text to the 486 | text found. Press More Options, Format... and 487 | select the Position tab; select Spacing Expanded by 488 | 2.0 points. Press OK.

489 | 490 |

Check the 491 | Regular Expressions box and press Replace 492 | All.

493 | 494 |

You may now 495 | replace the angle brackets with nothings.

496 | 497 |

Repeat the 498 | above procedure for titles enclosed in {braces}. Write a 499 | macro...

500 | 501 |

Other useful 502 | information

503 | 504 |

If you are using your 505 | wordprocessor with a locale setting other than Hellenic 506 | (el_GR), the following invocation with the desired character 507 | classification may prove useful for the occasional polytonic 508 | editing:

509 | 510 | 511 |

LC_CTYPE=el_GR.UTF-8 512 | /usr/bin/soffice (or 513 | /opt/libreoffice3.4/program/soffice ).

514 | 515 |

I put my 516 | default locale and keyboard definitions in my .bashrc 517 | or .profile:

518 | 519 |

export 520 | LC_CTYPE=el_GR.UTF-8
521 | setxkbmap us,el ,polytonic -option grp:ctrl_shift_toggle 522 | -option grp_led:scroll

523 | 524 |

This way 525 | multi-lingual text can be entered; keyboard layout switching 526 | is done by pressing Ctrl/Shift; alternate keyboard layout is 527 | indicated by the Scroll Lock light on the keyboard.

528 | 529 |

FURTHER DEVELOPMENT 530 | 531 |

532 | 533 | 534 |

You may not 535 | like the character output for a specific code. Check out the 536 | tlgcodes.h file containing the special symbol and 537 | punctuation codes and select one to suit you better. It will 538 | probably be a while before the beta to Unicode 539 | correspondence settles down.

540 | 541 |

Drop me a line, 542 | if you need a new feature; let me know if you do find an 543 | interesting applications that others can profit from.

544 | 545 |

REFERENCES 546 | 547 |

548 | 549 | 550 |

There are 551 | several texts describing the internal representation of 552 | PHI and TLG text, ID data, citation data and 553 | index files. The originator of this format is the Packard 554 | Humanities Institute. The TLG is maintained by UCI − 555 | see www.tlg.uci.edu − where you may find the 556 | latest versions of the TLG Beta Code Manual and the 557 | TLG Beta Code Quick Reference Guide.

558 | 559 |

Unicode 560 | consortium (www.unicode.org) publications pertaining 561 | to the codification of characters used in Hellenic 562 | literature, scientific and musical texts.

563 | 564 |

The 565 | OpenOffice/Libreoffice suite in its various editions 566 | (www.openoffice.org - apache.org, 567 | www.libreoffice.org, www.neooffice.org) 568 | includes a word processor that you can use to load, process 569 | and create new polytonic texts.

570 | 571 |

Greek Font 572 | Society: www.greekfontsociety.gr

573 | 574 |

COPYRIGHT 575 | 576 |

577 | 578 | 579 |

Copyright (C) 580 | 2004, 2005, 2011, 2013, 2020, 2021 Dimitri Marinakis (dm, 581 | ssa gr).

582 | 583 |

This file is 584 | part of tlgu which is free software; you can redistribute it 585 | and/or modify it under the terms of the GNU General Public 586 | License (version 2) as published by the Free Software 587 | Foundation.

588 | 589 |

tlgu is 590 | distributed in the hope that it will be useful, but WITHOUT 591 | ANY WARRANTY; without even the implied warranty of 592 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 593 | GNU General Public License for more details.

594 | 595 |

You should have 596 | received a copy of the GNU General Public License along with 597 | this program; if not, write to the Free Software Foundation, 598 | Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 599 | USA

600 |
601 | 602 | 603 | -------------------------------------------------------------------------------- /tlgcodes.h: -------------------------------------------------------------------------------- 1 | /* tlgcodes.h 2 | * 3 | * This is part of the tlgu utility 4 | * Copyright (C) 2004, 2005, 2011, 2020 Dimitri Marinakis 5 | * see the file tlgu.c for copying conditions 6 | * 7 | * Code arrays for escape sequences 8 | * See: handle_escape_codes 9 | * 10 | */ 11 | 12 | /* Punctuation codes (%) 0 - 170 13 | FIXME: %12, %29, %47, %48-%49, (%50 - %81), %105, 14 | %138, %139, %140, %144, %145, %151-153, %157, %171, %186 15 | 15-May-2020 dm -- updated i.a.w. quickbeta.pdf (14-Jan-2016) 16 | */ 17 | #define MAX_PUNCTUATION 190 18 | unsigned int punctuation[] = { 19 | 0x2020, 0x003f, 0x002a, 0x002f, 0x0021, 0x007c, 0x003d, 0x002b, 0x0025, 0x0026, 20 | 0x003a, 0x2022, 0x203b, 0x2021, 0x00a7, 0x02c8, 0x00a6, 0x2016, 0x0027, 0x2013, 21 | 0x0301, 0x0300, 0x0302, 0x0308, 0x0342, 0x0327, 0x0304, 0x0306, 0x0308, 0x0324, 22 | 0x02bc, 0x02bd, 0x00b4, 0x0060, 0x1fc0, 0x1fce, 0x1fde, 0x1fdd, 0x1fdf, 0x00a8, 23 | 0x23d1, 0x2013, 0x23D5, 0x00d7, 0x23d2, 0x23d3, 0x23d4, 0x003d, 0x0025, 0x0025, 24 | 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 25 | 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 26 | 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 27 | 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 28 | 0x0025, 0x0485, 0x0486, 0x1dc0, 0x0307, 0x1dc1, 0x035c, 0x0307, 0x0022, 0x2248, 29 | 0x003b, 0x0023, 0x2018, 0x005c, 0x005e, 0x2980, 0x224c, 0x007e, 0x00b1, 0x00b7, 30 | 0x25cb, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 31 | 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x032f, 0x030C, 0x2020, 32 | 0x0307, 0x0025, 0x0385, 0x1fcd, 0x1fcf, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 33 | 0x2261, 0x23D6, 0x2510, 0x0025, 0x0025, 0x0025, 0x00b7, 0x030a, 0x030c, 0x0328, 34 | 0x007c, 0x002d, 0x2219, 0x002d, 0x2234, 0x2235, 0x0025, 0x0025, 0x2042, 0x00d7, 35 | 0x002d, 0x00f7, 0x0338, 0x00b6, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 36 | 0x0359, 0x002f, 0x02bc, 0x02bd, 0x00b4, 0x0060, 0x1fc0, 0x0313, 0x0314, 0x0323, 37 | 0x033d, 0x032d, 0x0361, 0x035d, 0x035e, 0x2319, 0x0025, 0x00ac, 0x031a, 0x0025 38 | }; 39 | 40 | /* Text Symbols (#) 0 - 1528 41 | * May be preceded by upper case (*) this table contains only upper case characters 42 | * 43 | * FIXME-TEST: #20, #21, #24, #25, #27, #30, #31, #53, #54, #56, 44 | * #61, #62, #64, #66, #68, #87, #102, -#134, #136-#150, #152-#199 45 | * #240-end 46 | * 15-May-2020 dm -- updated i.a.w. quickbeta.pdf (14-Jan-2016) 47 | */ 48 | #define MAX_TEXT_SYMBOLS 1530 49 | unsigned int text_symbols[] = { 50 | 0x00374, 0x003de, 0x003da, 0x003d8, 0x003de, 0x003e0, 0x02e0f, 0x00023, 0x02e10, 0x00301, 51 | 0x003fd, 0x003ff, 0x02014, 0x0203b, 0x02e16, 0x0003e, 0x003fe, 0x0002f, 0x0003c, 0x00300, 52 | 0x10175, 0x10176, 0x00375, 0x003d8, 0x10176, 0x10176, 0x02e0f, 0x0221a, 0x00023, 0x000b7, 53 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 54 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 55 | 0x00023, 0x000b7, 0x0205a, 0x0205d, 0x00023, 0x02059, 0x00023, 0x00023, 0x00023, 0x003fd, 56 | 0x00399, 0x10142, 0x10143, 0x00394, 0x10144, 0x00397, 0x10145, 0x003a7, 0x10146, 0x0039c, 57 | 0x0002e, 0x000b7, 0x002d9, 0x0205a, 0x0205d, 0x0002e, 0x00023, 0x00023, 0x00023, 0x00023, 58 | 0x00308, 0x00027, 0x002ca, 0x002cb, 0x01fc0, 0x002bd, 0x002bc, 0x00023, 0x00023, 0x00023, 59 | 0x02014, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 60 | /* 100 */ 61 | 0x10186, 0x1017b, 0x10182, 0x0039b, 0x10182, 0x00023, 0x10184, 0x00023, 0x00023, 0x10182, 62 | 0x00023, 0x10182, 0x10188, 0x1017c, 0x10140, 0x10189, 0x1017c, 0x10183, 0x003bb, 0x1017d, 63 | 0x10184, 0x003be, 0x1017d, 0x1017c, 0x00023, 0x10182, 0x00023, 0x00023, 0x003fc, 0x00023, 64 | 0x1018a, 0x10177, 0x00023, 0x00023, 0x00023, 0x002d9, 0x003a3, 0x00023, 0x00023, 0x00023, 65 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 66 | 0x0221e, 0x02014, 0x00023, 0x00023, 0x00023, 0x00023, 0x02310, 0x00023, 0x00023, 0x00023, 67 | 0x00043, 0x10175, 0x025a1, 0x00375, 0x00023, 0x005d0, 0x02a5a, 0x00023, 0x00023, 0x10175, 68 | 0x02161, 0x10175, 0x10176, 0x10175, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 69 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 70 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 71 | /* 200 */ 72 | 0x02643, 0x025a1, 0x0264f, 0x0264d, 0x02640, 0x02650, 0x02644, 0x02609, 0x0263f, 0x0263e, 73 | 0x02642, 0x02651, 0x0264c, 0x02648, 0x0264e, 0x0264a, 0x0264b, 0x02653, 0x02652, 0x02649, 74 | 0x0260d, 0x0263d, 0x0260c, 0x02605, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 75 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 76 | 0x10177, 0x0260b, 0x02651, 0x00023, 0x0264c, 0x0264e, 0x02126, 0x02127, 0x00023, 0x00023, 77 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 78 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 79 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 80 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 81 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 82 | /* 300 */ 83 | 0x02321, 0x00023, 0x02e0e, 0x0003e, 0x02e0e, 0x02e0e, 0x0003d, 0x02e0e, 0x00023, 0x02e0e, 84 | 0x02e0e, 0x02e0e, 0x02e0e, 0x02e0e, 0x02e0e, 0x02251, 0x00023, 0x00023, 0x00023, 0x02022, 85 | 0x02629, 0x02629, 0x02627, 0x0003e, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 86 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 87 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 88 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 89 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 90 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 91 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 92 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 93 | /* 400 */ 94 | 0x00370, 0x00373, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 95 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 96 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 97 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 98 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 99 | 0x00023, 0x00283, 0x02e10, 0x02e11, 0x02e10, 0x02e11, 0x02e0e, 0x02e0f, 0x003a7, 0x000b7, 100 | 0x02014, 0x0007c, 0x02627, 0x00023, 0x00023, 0x02627, 0x02138, 0x02192, 0x00023, 0x00023, 101 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00283, 0x00023, 0x00023, 0x00023, 102 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 103 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 104 | /* 500 */ 105 | 0x00023, 0x00023, 0x02609, 0x00023, 0x02e0e, 0x0205c, 0x02e15, 0x02e14, 0x0203b, 0x00023, 106 | 0x00023, 0x00023, 0x003fd, 0x00023, 0x00023, 0x10185, 0x00023, 0x10185, 0x10179, 0x02191, 107 | 0x02629, 0x00023, 0x00023, 0x02e13, 0x02297, 0x0271b, 0x02190, 0x002c6, 0x00023, 0x00023, 108 | 0x00023, 0x0035c, 0x02e12, 0x003da, 0x00311, 0x00023, 0x00023, 0x00023, 0x001b7, 0x00023, 109 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x02263, 110 | 0x02237, 0x025cc, 0x005e6, 0x005d1, 0x022bb, 0x02208, 0x02629, 0x00023, 0x00023, 0x00023, 111 | 0x00023, 0x02191, 0x00305, 0x1d242, 0x1d243, 0x1d244, 0x1d231, 0x1d213, 0x1d233, 0x1d236, 112 | 0x003f9, 0x10143, 0x1d229, 0x1d212, 0x00393, 0x1d215, 0x1d216, 0x003a6, 0x003a1, 0x0039c, 113 | 0x00399, 0x00398, 0x02228, 0x0039d, 0x02127, 0x00396, 0x1d239, 0x00395, 0x1d208, 0x1d21a, 114 | 0x1d23f, 0x1d21b, 0x1d240, 0x0039b, 0x022b8, 0x00036, 0x00039, 0x0230b, 0x00394, 0x1d214, 115 | /* 600 */ 116 | 0x1d228, 0x00023, 0x1d237, 0x003a0, 0x1d226, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 117 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x1d230, 0x1d21e, 0x003a9, 0x00023, 0x003bb, 118 | 0x00023, 0x1d205, 0x1d201, 0x00023, 0x00023, 0x00023, 0x00023, 0x1d217, 0x0039f, 0x0039e, 119 | 0x00394, 0x00399, 0x1d20e, 0x1d232, 0x1d239, 0x1d200, 0x1d203, 0x1d207, 0x1d209, 0x1d20c, 120 | 0x1d211, 0x003a9, 0x00397, 0x1d21d, 0x1d21f, 0x1d221, 0x1d225, 0x1d22c, 0x1d235, 0x1d20b, 121 | 0x1d20f, 0x003a7, 0x003a4, 0x1d219, 0x1d21c, 0x1d202, 0x1d224, 0x1d22e, 0x1d23e, 0x1d241, 122 | 0x00391, 0x00392, 0x003a5, 0x003a8, 0x1d23a, 0x1d234, 0x1d22f, 0x1d22d, 0x1d210, 0x1d20a, 123 | 0x1d207, 0x1d21b, 0x1d218, 0x1d223, 0x1d222, 0x1d240, 0x1d23d, 0x003bc, 0x1d220, 0x1d204, 124 | 0x00023, 0x00023, 0x00023, 0x02733, 0x1d22a, 0x00023, 0x00023, 0x00023, 0x00023, 0x10175, 125 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x1d227, 0x1d245, 0x00023, 0x00023, 126 | /* 700 */ 127 | 0x0205e, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x0223b, 128 | 0x00023, 0x02201, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 129 | 0x00023, 0x00023, 0x00023, 0x1d516, 0x0210c, 0x1d510, 0x00023, 0x00023, 0x00023, 0x00023, 130 | 0x02014, 0x023d7, 0x023d8, 0x023d9, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 131 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 132 | 0x00023, 0x00661, 0x00662, 0x00663, 0x00664, 0x00665, 0x00666, 0x00667, 0x00668, 0x00669, 133 | 0x00660, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 134 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 135 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 136 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 137 | /* 800 */ 138 | 0x02733, 0x10141, 0x10140, 0x003a7, 0x0002f, 0x003a4, 0x0039a, 0x10166, 0x10148, 0x00023, 139 | 0x00023, 0x00023, 0x10148, 0x10149, 0x1014a, 0x1014b, 0x1014c, 0x1014d, 0x1014e, 0x00023, 140 | 0x00023, 0x00023, 0x1014f, 0x10150, 0x10151, 0x10152, 0x10153, 0x10154, 0x00023, 0x10155, 141 | 0x10147, 0x10147, 0x10156, 0x0039c, 0x10157, 0x003a7, 0x003a3, 0x003a4, 0x10143, 0x10141, 142 | 0x02551, 0x02980, 0x000b7, 0x1015b, 0x0205b, 0x10158, 0x10110, 0x1015e, 0x10112, 0x00023, 143 | 0x00023, 0x00023, 0x00023, 0x00399, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 144 | 0x00023, 0x00023, 0x00394, 0x10144, 0x00023, 0x10145, 0x003a7, 0x10146, 0x00023, 0x00023, 145 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 146 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 147 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 148 | /* 900 */ 149 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 150 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 151 | 0x00023, 0x00023, 0x1d228, 0x00023, 0x00023, 0x1d217, 0x1d232, 0x00057, 0x1d20b, 0x1d214, 152 | 0x00023, 0x00023, 0x02733, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x001a7, 0x0007e, 153 | 0x00023, 0x1d205, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 154 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 155 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 156 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 157 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 158 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 159 | /* 1000 */ 160 | 0x1017c, 0x1017d, 0x1017e, 0x1017f, 0x10180, 0x003a7, 0x00023, 0x00023, 0x00023, 0x00023, 161 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 162 | 0x0003c, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 163 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 164 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 165 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 166 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 167 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 168 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 169 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 170 | /* 1100 */ 171 | 0x02183, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 172 | 0x00023, 0x00023, 0x00023, 0x00023, 0x1d201, 0x0007c, 0x001a7, 0x0005a, 0x00023, 0x00110, 173 | 0x00023, 0x0005a, 0x00023, 0x00023, 0x0211e, 0x00023, 0x0004f, 0x00023, 0x00023, 0x00023, 174 | 0x0005c, 0x00023, 0x00023, 0x00023, 0x00023, 0x00039, 0x02112, 0x00023, 0x00023, 0x00023, 175 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 176 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 177 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 178 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 179 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 180 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 181 | /* 1200 */ 182 | 0x000a2, 0x02021, 0x020a4, 0x000df, 0x000b0, 0x00023, 0x00023, 0x00023, 0x00023, 0x00127, 183 | 0x00023, 0x00023, 0x00023, 0x00152, 0x00153, 0x000c6, 0x000e6, 0x00023, 0x00023, 0x00024, 184 | 0x00040, 0x00131, 0x00130, 0x00023, 0x02295, 0x000a9, 0x02731, 0x02021, 0x00023, 0x00023, 185 | 0x025ad, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 186 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 187 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 188 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 189 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 190 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 191 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 192 | /* 1300 */ 193 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 194 | 0x00023, 0x00023, 0x00023, 0x0223d, 0x00023, 0x00023, 0x00023, 0x00023, 0x0223b, 0x00023, 195 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 196 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x0003e, 0x1017e, 0x00023, 197 | 0x02116, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 198 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 199 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 200 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 201 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 202 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 203 | /* 1400 */ 204 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 205 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 206 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 207 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 208 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 209 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 210 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 211 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 212 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 213 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 214 | /* 1500 */ 215 | 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 216 | 0x00023, 0x00023, 0x0003c, 0x10175, 0x000f7, 0x1d20f, 0x00023, 0x00023, 0x1d229, 0x00023, 217 | 0x00023, 0x00222, 0x00023, 0x0205b, 0x00023, 0x00023, 0x00023, 0x00023, 0x00023, 0x02227 218 | }; 219 | 220 | /* Quotation Marks (") 0 - 60 221 | * FIXME: check pairs, find symbols for "50-"69 222 | * 15-May-2020 dm -- updated i.a.w. quickbeta.pdf (14-Jan-2016) 223 | */ 224 | #define MAX_QUOTATION 61 225 | unsigned int quotation_open[MAX_QUOTATION]; 226 | unsigned int quotation_open_symbol[] = { 227 | 0x201c, 0x201e, 0x201c, 0x2018, 0x201a, 0x201b, 0x00ab, 0x2039, 0x201c, 0x0022, 228 | 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 229 | 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 230 | 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 231 | 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 232 | 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 233 | 0x0022 234 | }; 235 | unsigned int quotation_close_symbol[] = { 236 | 0x201d, 0x201d, 0x201d, 0x2019, 0x2019, 0x2019, 0x00bb, 0x203a, 0x201e, 0x0022, 237 | 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 238 | 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 239 | 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 240 | 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 241 | 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 242 | 0x0022 243 | }; 244 | 245 | 246 | /* Brackets ([) 0 - 89 247 | * FIXME: [10, [13, [14, [15, [17, [33-[69 248 | * 15-May-2020 dm -- updated i.a.w. quickbeta.pdf (14-Jan-2016) 249 | */ 250 | #define MAX_BRACKET 90 251 | unsigned int bracket_open[MAX_BRACKET]; 252 | unsigned int bracket_open_symbol[] = { 253 | 0x005b, 0x0028, 0x2329, 0x007b, 0x27e6, 0x2e24, 0x2e22, 0x2e22, 0x2e24, 0x2027, 254 | 0x005b, 0x208d, 0x2192, 0x005b, 0x005b, 0x005b, 0x27e6, 0x300e, 0x27ea, 0x005b, 255 | 0x23a7, 0x23aa, 0x23a8, 0x23a9, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 256 | 0x239b, 0x239c, 0x239d, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 257 | 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 258 | 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 259 | 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 260 | 0x2e02, 0x2e04, 0x2e09, 0x2e0b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 261 | 0x002f, 0x2afd, 0x2e20, 0x2e21, 0x2e26, 0x2e28, 0x005b, 0x005b, 0x005b, 0x005b, 262 | }; 263 | unsigned int bracket_close_symbol[] = { 264 | 0x005d, 0x0029, 0x232a, 0x007d, 0x27e7, 0x2e25, 0x2e23, 0x2e25, 0x2e23, 0x2027, 265 | 0x005d, 0x208e, 0x2190, 0x005d, 0x005d, 0x005d, 0x27e7, 0x300f, 0x27eb, 0x005d, 266 | 0x23ab, 0x23aa, 0x23ac, 0x23ad, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 267 | 0x239e, 0x239f, 0x23a0, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 268 | 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 269 | 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 270 | 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 271 | 0x2e03, 0x2e05, 0x2e0a, 0x2e0c, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 272 | 0x002f, 0x2afd, 0x2e21, 0x2e20, 0x2e27, 0x2e29, 0x005d, 0x005d, 0x005d, 0x005d, 273 | }; 274 | 275 | /* Quasi-Brackets (<) 0 - 100 */ 276 | /* FIXME: markup handling 277 | * * 15-May-2020 dm -- updated i.a.w. quickbeta.pdf (14-Jan-2016) 278 | */ 279 | #define MAX_QUASI_BRACKET 101 280 | unsigned int quasi_bracket_code = 0; 281 | unsigned int quasi_bracket_open[MAX_QUASI_BRACKET]; 282 | unsigned int quasi_bracket_open_symbol[] = { 283 | 0x0305, 0x0332, 0x2035, 0x0361, 0x035c, 0x035d, 0x003c, 0x003c, 0x0333, 0x003c, 284 | 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x2035, 0x0333, 0x0336, 0x2035, 285 | 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 286 | 0x003c, 0x0336, 0x0332, 0x221a, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 287 | 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 288 | 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 289 | 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 290 | 0x25ba, 0x0028, 0x0028, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 291 | 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 292 | 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 293 | 0x003c, 294 | }; 295 | unsigned int quasi_bracket_close_symbol[] = { 296 | 0x003e, 0x003e, 0x2032, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 297 | 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x2032, 0x003e, 0x003e, 0x2032, 298 | 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 299 | 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 300 | 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 301 | 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 302 | 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 303 | 0x25c4, 0x0029, 0x0029, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 304 | 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 305 | 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 306 | 0x003e, 307 | }; 308 | 309 | /* Non-Text ({) 0 - 71 */ 310 | /*FIXME: decide on representation */ 311 | #define MAX_NON_TEXT 72 312 | unsigned int non_text_open[MAX_NON_TEXT]; 313 | unsigned int non_text_open_symbol[] = { 314 | 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 315 | 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 316 | 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 317 | 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 318 | 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 319 | 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 320 | 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 321 | 0x007b, 0x007b, 322 | }; 323 | unsigned int non_text_close_symbol[] = { 324 | 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 325 | 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 326 | 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 327 | 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 328 | 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 329 | 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 330 | 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 331 | 0x007d, 0x007d, 332 | }; 333 | -------------------------------------------------------------------------------- /tlgu.1.ps: -------------------------------------------------------------------------------- 1 | %!PS-Adobe-3.0 2 | %%Creator: groff version 1.22.4 3 | %%CreationDate: Wed Jul 28 08:24:16 2021 4 | %%DocumentNeededResources: font Times-Roman 5 | %%+ font Times-Bold 6 | %%+ font Times-Italic 7 | %%DocumentSuppliedResources: procset grops 1.22 4 8 | %%Pages: 4 9 | %%PageOrder: Ascend 10 | %%DocumentMedia: Default 612 792 0 () () 11 | %%Orientation: Portrait 12 | %%EndComments 13 | %%BeginDefaults 14 | %%PageMedia: Default 15 | %%EndDefaults 16 | %%BeginProlog 17 | %%BeginResource: procset grops 1.22 4 18 | %!PS-Adobe-3.0 Resource-ProcSet 19 | /setpacking where{ 20 | pop 21 | currentpacking 22 | true setpacking 23 | }if 24 | /grops 120 dict dup begin 25 | /SC 32 def 26 | /A/show load def 27 | /B{0 SC 3 -1 roll widthshow}bind def 28 | /C{0 exch ashow}bind def 29 | /D{0 exch 0 SC 5 2 roll awidthshow}bind def 30 | /E{0 rmoveto show}bind def 31 | /F{0 rmoveto 0 SC 3 -1 roll widthshow}bind def 32 | /G{0 rmoveto 0 exch ashow}bind def 33 | /H{0 rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def 34 | /I{0 exch rmoveto show}bind def 35 | /J{0 exch rmoveto 0 SC 3 -1 roll widthshow}bind def 36 | /K{0 exch rmoveto 0 exch ashow}bind def 37 | /L{0 exch rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def 38 | /M{rmoveto show}bind def 39 | /N{rmoveto 0 SC 3 -1 roll widthshow}bind def 40 | /O{rmoveto 0 exch ashow}bind def 41 | /P{rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def 42 | /Q{moveto show}bind def 43 | /R{moveto 0 SC 3 -1 roll widthshow}bind def 44 | /S{moveto 0 exch ashow}bind def 45 | /T{moveto 0 exch 0 SC 5 2 roll awidthshow}bind def 46 | /SF{ 47 | findfont exch 48 | [exch dup 0 exch 0 exch neg 0 0]makefont 49 | dup setfont 50 | [exch/setfont cvx]cvx bind def 51 | }bind def 52 | /MF{ 53 | findfont 54 | [5 2 roll 55 | 0 3 1 roll 56 | neg 0 0]makefont 57 | dup setfont 58 | [exch/setfont cvx]cvx bind def 59 | }bind def 60 | /level0 0 def 61 | /RES 0 def 62 | /PL 0 def 63 | /LS 0 def 64 | /MANUAL{ 65 | statusdict begin/manualfeed true store end 66 | }bind def 67 | /PLG{ 68 | gsave newpath clippath pathbbox grestore 69 | exch pop add exch pop 70 | }bind def 71 | /BP{ 72 | /level0 save def 73 | 1 setlinecap 74 | 1 setlinejoin 75 | DEFS/BPhook known{DEFS begin BPhook end}if 76 | 72 RES div dup scale 77 | LS{ 78 | 90 rotate 79 | }{ 80 | 0 PL translate 81 | }ifelse 82 | 1 -1 scale 83 | }bind def 84 | /EP{ 85 | level0 restore 86 | showpage 87 | }def 88 | /DA{ 89 | newpath arcn stroke 90 | }bind def 91 | /SN{ 92 | transform 93 | .25 sub exch .25 sub exch 94 | round .25 add exch round .25 add exch 95 | itransform 96 | }bind def 97 | /DL{ 98 | SN 99 | moveto 100 | SN 101 | lineto stroke 102 | }bind def 103 | /DC{ 104 | newpath 0 360 arc closepath 105 | }bind def 106 | /TM matrix def 107 | /DE{ 108 | TM currentmatrix pop 109 | translate scale newpath 0 0 .5 0 360 arc closepath 110 | TM setmatrix 111 | }bind def 112 | /RC/rcurveto load def 113 | /RL/rlineto load def 114 | /ST/stroke load def 115 | /MT/moveto load def 116 | /CL/closepath load def 117 | /Fr{ 118 | setrgbcolor fill 119 | }bind def 120 | /setcmykcolor where{ 121 | pop 122 | /Fk{ 123 | setcmykcolor fill 124 | }bind def 125 | }if 126 | /Fg{ 127 | setgray fill 128 | }bind def 129 | /FL/fill load def 130 | /LW/setlinewidth load def 131 | /Cr/setrgbcolor load def 132 | /setcmykcolor where{ 133 | pop 134 | /Ck/setcmykcolor load def 135 | }if 136 | /Cg/setgray load def 137 | /RE{ 138 | findfont 139 | dup maxlength 1 index/FontName known not{1 add}if dict begin 140 | { 141 | 1 index/FID ne 142 | 2 index/UniqueID ne 143 | and 144 | {def}{pop pop}ifelse 145 | }forall 146 | /Encoding exch def 147 | dup/FontName exch def 148 | currentdict end definefont pop 149 | }bind def 150 | /DEFS 0 def 151 | /EBEGIN{ 152 | moveto 153 | DEFS begin 154 | }bind def 155 | /EEND/end load def 156 | /CNT 0 def 157 | /level1 0 def 158 | /PBEGIN{ 159 | /level1 save def 160 | translate 161 | div 3 1 roll div exch scale 162 | neg exch neg exch translate 163 | 0 setgray 164 | 0 setlinecap 165 | 1 setlinewidth 166 | 0 setlinejoin 167 | 10 setmiterlimit 168 | []0 setdash 169 | /setstrokeadjust where{ 170 | pop 171 | false setstrokeadjust 172 | }if 173 | /setoverprint where{ 174 | pop 175 | false setoverprint 176 | }if 177 | newpath 178 | /CNT countdictstack def 179 | userdict begin 180 | /showpage{}def 181 | /setpagedevice{}def 182 | mark 183 | }bind def 184 | /PEND{ 185 | cleartomark 186 | countdictstack CNT sub{end}repeat 187 | level1 restore 188 | }bind def 189 | end def 190 | /setpacking where{ 191 | pop 192 | setpacking 193 | }if 194 | %%EndResource 195 | %%EndProlog 196 | %%BeginSetup 197 | %%BeginFeature: *PageSize Default 198 | << /PageSize [ 612 792 ] /ImagingBBox null >> setpagedevice 199 | %%EndFeature 200 | %%IncludeResource: font Times-Roman 201 | %%IncludeResource: font Times-Bold 202 | %%IncludeResource: font Times-Italic 203 | grops begin/DEFS 1 dict def DEFS begin/u{.001 mul}bind def end/RES 72 204 | def/PL 792 def/LS false def/ENC0[/asciicircum/asciitilde/Scaron/Zcaron 205 | /scaron/zcaron/Ydieresis/trademark/quotesingle/Euro/.notdef/.notdef 206 | /.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef 207 | /.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef 208 | /.notdef/.notdef/space/exclam/quotedbl/numbersign/dollar/percent 209 | /ampersand/quoteright/parenleft/parenright/asterisk/plus/comma/hyphen 210 | /period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon 211 | /semicolon/less/equal/greater/question/at/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O 212 | /P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft/backslash/bracketright/circumflex 213 | /underscore/quoteleft/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y 214 | /z/braceleft/bar/braceright/tilde/.notdef/quotesinglbase/guillemotleft 215 | /guillemotright/bullet/florin/fraction/perthousand/dagger/daggerdbl 216 | /endash/emdash/ff/fi/fl/ffi/ffl/dotlessi/dotlessj/grave/hungarumlaut 217 | /dotaccent/breve/caron/ring/ogonek/quotedblleft/quotedblright/oe/lslash 218 | /quotedblbase/OE/Lslash/.notdef/exclamdown/cent/sterling/currency/yen 219 | /brokenbar/section/dieresis/copyright/ordfeminine/guilsinglleft 220 | /logicalnot/minus/registered/macron/degree/plusminus/twosuperior 221 | /threesuperior/acute/mu/paragraph/periodcentered/cedilla/onesuperior 222 | /ordmasculine/guilsinglright/onequarter/onehalf/threequarters 223 | /questiondown/Agrave/Aacute/Acircumflex/Atilde/Adieresis/Aring/AE 224 | /Ccedilla/Egrave/Eacute/Ecircumflex/Edieresis/Igrave/Iacute/Icircumflex 225 | /Idieresis/Eth/Ntilde/Ograve/Oacute/Ocircumflex/Otilde/Odieresis 226 | /multiply/Oslash/Ugrave/Uacute/Ucircumflex/Udieresis/Yacute/Thorn 227 | /germandbls/agrave/aacute/acircumflex/atilde/adieresis/aring/ae/ccedilla 228 | /egrave/eacute/ecircumflex/edieresis/igrave/iacute/icircumflex/idieresis 229 | /eth/ntilde/ograve/oacute/ocircumflex/otilde/odieresis/divide/oslash 230 | /ugrave/uacute/ucircumflex/udieresis/yacute/thorn/ydieresis]def 231 | /Times-Italic@0 ENC0/Times-Italic RE/Times-Bold@0 ENC0/Times-Bold RE 232 | /Times-Roman@0 ENC0/Times-Roman RE 233 | %%EndSetup 234 | %%Page: 1 1 235 | %%BeginPageSetup 236 | BP 237 | %%EndPageSetup 238 | /F0 10/Times-Roman@0 SF 150.26(tlgu\(1\) TLG)72 48 R(to Unicode Con)2.5 239 | E -.15(ve)-.4 G 150.26(rter tlgu\(1\)).15 F/F1 10.95/Times-Bold@0 SF 240 | -.219(NA)72 84 S(ME).219 E/F2 10/Times-Bold@0 SF(tlgu)108 96 Q F0 2.5 241 | 2.5 G(on)-2.5 E -.15(ve)-.4 G(rt beta code TLG and PHI CD-R).15 E 242 | (OM txt \214les to Unicode)-.4 E F1(SYNOPSIS)72 124.8 Q F2(tlgu)108 243 | 136.8 Q F0([)2.5 E/F3 10/Times-Italic@0 SF(options)2.73 E F0 2.5(][)2.77 244 | G F3(input_\214le).01 E F0 2.5(][)2.68 G F3(output_\214le).23 E F0(]) 245 | 2.68 E F1(DESCRIPTION)72 165.6 Q F2(tlgu)108 177.6 Q F0 1.347(will con) 246 | 3.847 F -.15(ve)-.4 G 1.346(rt an).15 F F3(input_\214le)3.846 E F0 1.346 247 | (from Thesaurus Linguae Graeca \(TLG\) and P)3.846 F 1.346 248 | (ackard Humanities Institute)-.15 F .396 249 | (\(PHI\) representation to a Unicode \(UTF-8\))108 189.6 R F3 250 | (output_\214le)2.896 E F0 5.396(.I)C(f)-5.396 E F3(input_\214le)2.896 E 251 | F0 .396(is not speci\214ed, standard input will be)2.896 F 3.316 252 | (read. If)108 201.6 R F3(output_\214le)3.316 E F0 .816 253 | (is not speci\214ed, the Unicode te)3.316 F .815 254 | (xt is directed to standard output.)-.15 F .815(The TLG/PHI repre-)5.815 255 | F(sentation consists of)108 213.6 Q F2(beta-code)2.5 E F0(te)2.5 E 256 | (xt and)-.15 E F2(citation)2.5 E F0(information.)2.5 E F1(OPTIONS)72 257 | 242.4 Q F2108 254.4 Q F0 .218 258 | (inserts a form feed and citation information \(le)144 254.4 R -.15(ve) 259 | -.25 G .218(ls a, b, c, d\) on e).15 F -.15(ve)-.25 G .219 260 | (ry "book" citation change.).15 F(By)5.219 E(def)144 266.4 Q 261 | (ault the program will output line feeds only \(see also)-.1 E F2 262 | 2.5 E F0(\).)A F2108 283.2 Q F0(observ)144 283.2 Q 263 | (es paging instructions.)-.15 E(By def)5 E 264 | (ault the program will output line feeds only)-.1 E(.)-.65 E F2108 265 | 300 Q F0 .424(primarily Roman te)144 300 R .424(xt \(PHI\). Some TLG te) 266 | -.15 F .424(xts, notably doccan1.txt and doccan2.txt are mostly ro-)-.15 267 | F .034(man te)144 312 R .034(xts lacking e)-.15 F .034 268 | (xplicit language change codes.)-.15 F .035 269 | (Setting this option will force a change to roman)5.035 F(te)144 324 Q 270 | (xt after each citation block is encountered.)-.15 E F2108 340.8 Q 271 | F0(highest-le)144 340.8 Q -.15(ve)-.25 G 2.5(lr).15 G 272 | (eference citation is included before each te)-2.5 E(xt line \(v-le)-.15 273 | E -.15(ve)-.25 G(l\)).15 E F2108 357.6 Q F0 274 | (reference citation is included before each te)144 357.6 Q 275 | (xt line \(w-le)-.15 E -.15(ve)-.25 G(l\)).15 E F2108 374.4 Q F0 276 | (reference citation is included before each te)144 374.4 Q 277 | (xt line \(x-le)-.15 E -.15(ve)-.25 G(l\)).15 E F2108 391.2 Q F0 278 | (reference citation is included before each te)144 391.2 Q 279 | (xt line \(y-le)-.15 E -.15(ve)-.25 G(l\)).15 E F2108 408 Q F0(lo) 280 | 144 408 Q(west-le)-.25 E -.15(ve)-.25 G 2.5(lr).15 G 281 | (eference citation is included before each te)-2.5 E(xt line \(z-le)-.15 282 | E -.15(ve)-.25 G(l\).).15 E F2(\255Z )-.25 E F0 .767(an arbitrary combination of citation info\ 284 | rmation is included before each te)144 436.8 R .766 285 | (xt line; see also -e op-)-.15 F .741 286 | (tion e.g. "%A/%B/%x/%y/%z\\t" will output the contents of the A, B)144 287 | 448.8 R F2 .742(citation description)3.242 F F0(le)3.242 E -.15(ve)-.25 288 | G(ls,).15 E(follo)144 460.8 Q(wed by)-.25 E(x, y)5 E 2.5(,z)-.65 G F2 289 | (citation r)A(efer)-.18 E(ence)-.18 E F0(le)2.5 E -.15(ve)-.25 G 290 | (ls, follo).15 E(wed by a T)-.25 E(AB character)-.93 E(.)-.55 E F2 291 | (\255e )108 477.6 Q F0 1.148 292 | (if there is no citation information for a citation le)144 489.6 R -.15 293 | (ve)-.25 G 3.648(ld).15 G 1.148(e\214ned with the -Z option abo)-3.648 F 294 | -.15(ve)-.15 G 3.648(,as).15 G(ingle)-3.648 E 1.131 295 | (right-hand slash is substituted by def)144 501.6 R 1.131 296 | (ault; you may de\214ne an)-.1 F 3.631(ys)-.15 G 1.132 297 | (tring with this option e.g. "-" or)-3.631 F("[NONE]" are v)144 513.6 Q 298 | (alid inputs)-.25 E F2108 542.4 Q F0 299 | (inserts blank space \(a tab\) before each and e)144 542.4 Q -.15(ve) 300 | -.25 G(ry line.).15 E F2108 559.2 Q F0(compact format; v)144 559.2 301 | Q 2.5(,w)-.65 G 2.5(,xc)-3.15 G(itations are inserted as the)-2.5 E 2.5 302 | (yc)-.15 G(hange at the be)-2.5 E(ginning of each section.)-.15 E F2 303 | 108 576 Q F0(compact format; w)144 576 Q 2.5(,x)-.65 G 2.5(,yc) 304 | -2.5 G(itations are inserted as the)-2.5 E 2.5(yc)-.15 G 305 | (hange at the be)-2.5 E(ginning of each section.)-.15 E F2108 306 | 592.8 Q F0 .199(no spaces; line ends and h)144 592.8 R .199 307 | (yphens before an ID code are remo)-.05 F -.15(ve)-.15 G 2.699(dw).15 G 308 | .198(hile h)-2.699 F .198(yphens and spaces before)-.05 F 309 | (page and column ends are \(still\) retained.)144 604.8 Q F2108 310 | 633.6 Q F0(citation deb)144 633.6 Q(ug information is output.)-.2 E F2 311 | 108 650.4 Q F0(special code deb)144 650.4 Q 312 | (ug information is output.)-.2 E F2108 667.2 Q F0 313 | (block processing information is output \(v)144 667.2 Q(erbose\).)-.15 E 314 | F2108 684 Q F0 -.2(vo)144 684 S .416(wels with acute accent are o\ 315 | utput using the Unicode 0x0370 codes rather than the 0x1F00 ones)-.05 F 316 | (for compatibility with most current \(as of 2020\) k)144 696 Q -.15(ey) 317 | -.1 G(board encoders.).15 E F2108 712.8 Q F0 .264(each w)144 712.8 318 | R .263(ork \(book\) is output as a separate \214le in the form output_\ 319 | \214le-xxx.txt; if an output \214le is not)-.1 F 320 | (speci\214ed, this option has no ef)144 724.8 Q(fect.)-.25 E -1.11(Ve)72 321 | 768 S(rsion 1.9)1.11 E 199.835(27-July-2021 1)161.785 F 0 Cg EP 322 | %%Page: 2 2 323 | %%BeginPageSetup 324 | BP 325 | %%EndPageSetup 326 | /F0 10/Times-Roman@0 SF 150.26(tlgu\(1\) TLG)72 48 R(to Unicode Con)2.5 327 | E -.15(ve)-.4 G 150.26(rter tlgu\(1\)).15 F/F1 10.95/Times-Bold@0 SF 328 | (HIST)72 84 Q(OR)-.197 E 2.738(YA)-.383 G(ND INTENDED USE)-2.738 E F0 329 | .44(The purpose of)108 96 R/F2 10/Times-Bold@0 SF(tlgu)2.94 E F0 .441(i\ 330 | s to translate binary TLG/PHI-format \214les into readable and editable\ 331 | te)2.94 F 2.941(xt. It)-.15 F .441(is based)2.941 F .749(on an earlier\ 332 | program written in 80x86 assembly language \(1996\) outputting codes f\ 333 | or a home-made font)108 108 R .058(which used the pre)108 120 R -.25(va) 334 | -.25 G .058(lent hellenic font encodings of that time complemented by d\ 335 | ead accent characters - not).25 F -.15(ve)108 132 S(ry attracti).15 E 336 | -.15(ve)-.25 G 2.5(,b).15 G(ut readable.)-2.7 E 1.771(Then came Unicode\ 337 | and a plethora of accented character glyphs; Polytonic fonts are alrea\ 338 | dy a)108 156 R -.25(va)-.2 G(ilable).25 E 1.37 339 | (\(Cardo, Gentium, Athena, Athenian, Porson\); ne)108 168 R 3.87(wf)-.25 340 | G 1.37(onts are being created and older fonts are being e)-3.87 F(x-) 341 | -.15 E 1.04(panded as special-use code points are included in the Unico\ 342 | de de\214nition \(musical symbols, other special)108 180 R 2.646 343 | (symbols\). A)108 192 R .146(notable ef)2.646 F .146 344 | (fort since this note w)-.25 F .146 345 | (as originally drafted is that of the Greek F)-.1 F .146(ont Society) 346 | -.15 F 2.647(,n)-.65 G .647 -.25(ow f)-2.647 H(ea-).25 E 347 | (turing a great, and e)108 204 Q 348 | (xpanding, selection of open polytonic fonts.)-.15 E 1.841 349 | (So, at this point in time,)108 228 R F2(tlgu)4.341 E F0 1.841(will cru\ 350 | nch a \214le which has been formatted according to the published)4.341 F 351 | .435(TLG/PHI format and produce codes for most glyphs generally a)108 352 | 240 R -.25(va)-.2 G 2.936(ilable. No).25 F .436 353 | (attempt has been made to in-)2.936 F .697(troduce multi-character sequ\ 354 | ences or formatting codes \(font changes\).)108 252 R .696 355 | (If a code has not been de\214ned, the)5.697 F .788 356 | (program will output the respecti)108 264 R 1.088 -.15(ve ")-.25 H .788 357 | (code f).15 F .788(amily" glyph.)-.1 F -1.1(Yo)5.788 G 3.288(um)1.1 G 358 | .788(ay use the)-3.288 F F23.288 E F0 .789 359 | (option to check such codes)3.288 F(ag)108 276 Q 360 | (ainst the published beta code de\214nition.)-.05 E .163(July 2005 - T) 361 | 108 300 R(ro)-.35 E 2.663(yA)-.1 G 2.663(.G)-2.663 G(rif)-2.663 E .163 362 | (\214tts \(scribe, crosswire or)-.25 F .162(g\) contrib)-.18 F .162 363 | (uted the arbitrary citation output code and added)-.2 F(per)108 312 Q 364 | (-line processing of the input \214le.)-.2 E 365 | (April 2006 - Final sigma will no)108 336 Q 2.5(wb)-.25 G 2.5(eo)-2.5 G 366 | (utput at end-of-line \(!\) from free-form input te)-2.5 E 367 | (xt \(thank you Jan\).)-.15 E 368 | (October 2011 - stdout is used if output_\214le is not speci\214ed.)108 369 | 360 Q(No)108 384 Q -.15(ve)-.15 G(mber 2011 - citations \(v).15 E 2.5 370 | (,w)-.65 G 2.5(,x)-3.15 G 2.5(\)a)-2.5 G 2.5(tt)-2.5 G 371 | (he start of section changes \(e-book option\))-2.5 E .124(May 2012 -) 372 | 108 408 R .124(Nick White \(nick white, durham ac uk\) re)5.124 F .124 373 | (vised the input ar)-.25 F .124 374 | (guments to use tlgu as a \214lter; stdin)-.18 F 375 | (is used if input_\214le is not speci\214ed)108 420 Q 376 | (May 2020 - Alternate output codes for v)108 444 Q -.25(ow)-.2 G 377 | (els with acute accent \(-U option\)).25 E 378 | (July 2021 - Corrections to citation code)108 468 Q F1(EXAMPLES)72 484.8 379 | Q F2 .363(./tlgu -r DOCCAN2.TXT doccanu.txt)108 496.8 R F0 -.35(Tr)2.863 380 | G .363(anslate the TLG canon to a unicode te).35 F .363 381 | (xt \214le. Note the use of the)-.15 F F2(-r)108 508.8 Q F0 382 | (option \(this \214le e)2.5 E(xpects Roman as the def)-.15 E 383 | (ault font\).)-.1 E F2(./tlgu -x -y -z TLG1799.TXT tlg1799u.txt)108 384 | 525.6 Q F0 1.666(Generate a continuous \214le with the te)144 537.6 R 385 | 1.667(xts of granpa Euclides. A)-.15 F -.25(va)-.74 G 1.667 386 | (ilable citations \(-x -y -z\) are).25 F(Book//demonstratio/line as sho) 387 | 144 549.6 Q(wn in the respecti)-.25 E .3 -.15(ve ")-.25 H 388 | (cit" \214eld of doccan2.txt.).15 E F2 389 | (./tlgu -b -B TLG1799.TXT tlg1799u.txt)108 566.4 Q F0 .267 390 | (Generate the same te)144 578.4 R .267(xts, this time with a page feed \ 391 | and book citation information on the \214rst page)-.15 F 392 | (of each book and a tab before each line \(use with OOo v)144 590.4 Q 393 | (ersions earlier than 1.1.4\).)-.15 E F2 394 | (./tlgu -C TLG1799.TXT tlg1799u.txt)108 607.2 Q F0(See ho)144 619.2 Q 395 | 2.5(wt)-.25 G(he citation information changes within each TLG block.) 396 | -2.5 E F2(./tlgu -S TLG1799.TXT tlg1799u.txt | sort > symbols1799.txt) 397 | 108 636 Q F0 .52(Check out the symbols used in a w)144 648 R 3.02 398 | (ork. Book)-.1 F .521(and x, y)3.02 F 3.021(,zr)-.65 G .521 399 | (eferences are printed on a separate line)-3.021 F .191(for each symbol\ 400 | . Sort / grep the output to locate speci\214c symbols of interest; sa) 401 | 144 660 R .49 -.15(ve i)-.2 H 2.69(na\214).15 G .19(le for later)-2.69 F 402 | (use.)144 672 Q F2(./tlgu -W TLG0006.TXT tlg0006u)108 688.8 Q F0 -.4(Wi) 403 | 144 700.8 S(ll produce separate \214les for each w).4 E 404 | (ork, named tlg006u-001.txt etc.)-.1 E -1.11(Ve)72 768 S(rsion 1.9)1.11 405 | E 199.835(27-July-2021 2)161.785 F 0 Cg EP 406 | %%Page: 3 3 407 | %%BeginPageSetup 408 | BP 409 | %%EndPageSetup 410 | /F0 10/Times-Roman@0 SF 150.26(tlgu\(1\) TLG)72 48 R(to Unicode Con)2.5 411 | E -.15(ve)-.4 G 150.26(rter tlgu\(1\)).15 F/F1 10/Times-Bold@0 SF(./tlg\ 412 | u -Z "%A/%B/%D/%c/%d/%Z/%x/%y/%z\\t" -e "-" chr0010.txt chr0010u.txt)108 413 | 84 Q F0 -.4(Wi)144 96 S .506(ll generate a \214le with citation descrip\ 414 | tion \(A, B, D, Z\) and citation reference \(c, d, x, y).4 F 3.006(,z) 415 | -.65 G 3.006(\)l)-3.006 G -.25(ev)-3.006 G(-).25 E .31 416 | (els, separated by "/" follo)144 108 R .31(wed by a T)-.25 F .31 417 | (AB character and the respecti)-.93 F .61 -.15(ve t)-.25 H -.15(ex).15 G 418 | 2.81(t. Blank).15 F .31(citation elements)2.81 F 7.23(will be \214lled \ 419 | with a single "-" e.g. Asia/Smyrna/1222-1223 ac/IGChAs/Asia Min)144 120 420 | R([Chr]/88/-/2A/7p1 [T)144 132 Q(AB] inscription te)-.93 E(xt etc.)-.15 421 | E F1(./tlgu -r -N -X LA)108 148.8 Q(T0448.TXT LA)-.95 E(T0448.xx.TXT) 422 | -.95 E F0 .75(will produce a compact v)144 160.8 R .75 423 | (ersion of the Gaius Iulius Caesar te)-.15 F .749 424 | (xts with v and x citations printed as)-.15 F(the)144 172.8 Q 3.03(yc) 425 | -.15 G .53(hange; similarly)-3.03 F(,)-.65 E F1 .531(./tlgu -r -N -Y LA) 426 | 3.031 F .531(T2150.TXT LA)-.95 F(T2150.yy)-.95 E(.TXT)-.7 E F0 .531 427 | (will produce a compact)3.031 F -.15(ve)144 184.8 S(rsion of Zeno').15 E 428 | 2.5(st)-.55 G -.15(ex)-2.5 G(ts.).15 E/F2 10.95/Times-Bold@0 SF(POST)72 429 | 201.6 Q(-PR)-1.007 E(OCESSING EXAMPLES)-.329 E F0 3.569(Iu)108 213.6 S 430 | 1.069(se the OpenOf)-3.569 F(\214ce/LibreOf)-.25 E 1.069 431 | (\214ce suite for most of my w)-.25 F 3.569(ork. This)-.1 F -.15(ex) 432 | 3.569 G 1.069(ample sho).15 F 1.068(ws one of man)-.25 F 3.568(yp)-.15 G 433 | (ossible)-3.568 E -.1(wa)108 225.6 S 434 | (ys of using the search and replace f).1 E 435 | (acility to create a readable v)-.1 E(ersion of the Suda le)-.15 E 436 | (xicon.)-.15 E F1(./tlgu -B TLG4085.TXT tlg4085u.txt)108 242.4 Q F0 2.5 437 | (AU)144 254.4 S(nicode \214le with the te)-2.5 E(xt is created)-.15 E F1 438 | (Open the generated \214le with Openof\214ce/Libr)108 271.2 Q 439 | (eOf\214ce:)-.18 E F0(File | Open | Filename: tlg4085u.txt, File T)144 440 | 283.2 Q(ype: T)-.8 E -.15(ex)-.7 G 2.5(tE).15 G 441 | (ncoded \255\255 Press Open)-2.5 E .274(The ASCII Filter Options windo) 442 | 144 307.2 R 2.774(wa)-.25 G .274 443 | (ppears. Select the Unicode \(UTF-8\) character set and a proper)-2.774 444 | F(Unicode font installed in your machine \(e.g. Cardo\).)144 319.2 Q 445 | (Press OK.)5 E F1(Replace angle brack)108 336 Q(ets with expanded text) 446 | -.1 E F0(Le)144 348 Q 1.367(xicon terms are enclosed in . The)-.1 F 1.366 448 | (actual beta codes indicate the use of e)3.867 F(x-)-.15 E(panded te)144 449 | 360 Q(xt for emphasis.)-.15 E(Select Edit | Find & Replace.)5 E(The)5 E 450 | F1(Find & Replace)2.5 E F0(windo)2.5 E 2.5(wa)-.25 G(ppears.)-2.5 E .467 451 | (In the)144 384 R F1(Sear)2.967 E .468(ch F)-.18 F(or)-.25 E F0 .468 452 | (\214eld, type the follo)2.968 F .468(wing e)-.25 F(xpression:)-.15 E F1 453 | (<[^<>]*>)2.968 E F0 .468(This means "\214nd an)2.968 F 2.968(yc)-.15 G 454 | (haracters)-2.968 E(between angle brack)144 396 Q 455 | (ets, not including angle brack)-.1 E(ets".)-.1 E .769(In the)144 420 R 456 | F1 .769(Replace W)3.269 F(ith)-.18 E F0(windo)3.269 E 3.269(wi)-.25 G 457 | .769(nsert a single ampersand:)-3.269 F F1(&)3.269 E F0 .769 458 | (This means that we need to)3.269 F F1(add)3.268 E F0(for)3.268 E(-)-.2 459 | E 1.223(matting information \(this case\) or additional te)144 432 R 460 | 1.224(xt to the te)-.15 F 1.224(xt found.)-.15 F(Press)6.224 E F1(Mor) 461 | 6.224 E 3.724(eO)-.18 G(ptions)-3.724 E F0(,)A F1 -.25(Fo)3.724 G -.37 462 | (r-).25 G(mat...)144 444 Q F0(and select the)2.5 E F1 -.2(Po)2.5 G 463 | (sition).2 E F0(tab; select Spacing Expanded by 2.0 points.)2.5 E 464 | (Press OK.)5 E(Check the)144 468 Q F1(Regular Expr)2.5 E(essions)-.18 E 465 | F0(box and press)2.5 E F1(Replace All)2.5 E F0(.)A -1.1(Yo)144 492 S 2.5 466 | (um)1.1 G(ay no)-2.5 E 2.5(wr)-.25 G(eplace the angle brack)-2.5 E 467 | (ets with nothings.)-.1 E(Repeat the abo)144 516 Q .3 -.15(ve p)-.15 H 468 | (rocedure for titles enclosed in {braces}.).15 E(Write a macro...)5 E F1 469 | (Other useful inf)108 532.8 Q(ormation)-.25 E F0 .318 470 | (If you are using your w)144 544.8 R .317(ordprocessor with a locale se\ 471 | tting other than Hellenic \(el_GR\), the follo)-.1 F(w-)-.25 E .771 472 | (ing in)144 556.8 R -.2(vo)-.4 G .771 473 | (cation with the desired character classi\214cation may pro).2 F 1.072 474 | -.15(ve u)-.15 H .772(seful for the occasional poly-).15 F 475 | (tonic editing:)144 568.8 Q F1(LC_CTYPE=el_GR.UTF-8 /usr/bin/sof\214ce) 476 | 144 592.8 Q F0(\(or)2.5 E F1(/opt/libr)2.5 E(eof\214ce3.4/pr)-.18 E 477 | (ogram/sof\214ce)-.18 E F0(\).)2.5 E 2.5(Ip)144 616.8 S(ut my def)-2.5 E 478 | (ault locale and k)-.1 E -.15(ey)-.1 G(board de\214nitions in my).15 E 479 | F1(.bashr)2.5 E(c)-.18 E F0(or)2.5 E F1(.pr)2.5 E(o\214le)-.18 E F0(:)A 480 | F1(export LC_CTYPE=el_GR.UTF-8)144 640.8 Q 481 | (setxkbmap us,el ,polytonic -option gr)144 652.8 Q 482 | (p:ctrl_shift_toggle -option gr)-.1 E(p_led:scr)-.1 E(oll)-.18 E F0 483 | (This w)144 676.8 Q(ay multi-lingual te)-.1 E(xt can be entered;)-.15 E 484 | -.1(ke)5 G(yboard layout switching is done by pressing)-.05 E 485 | (Ctrl/Shift; alternate k)144 688.8 Q -.15(ey)-.1 G 486 | (board layout is indicated by the Scroll Lock light on the k).15 E -.15 487 | (ey)-.1 G(board.).15 E -1.11(Ve)72 768 S(rsion 1.9)1.11 E 199.835 488 | (27-July-2021 3)161.785 F 0 Cg EP 489 | %%Page: 4 4 490 | %%BeginPageSetup 491 | BP 492 | %%EndPageSetup 493 | /F0 10/Times-Roman@0 SF 150.26(tlgu\(1\) TLG)72 48 R(to Unicode Con)2.5 494 | E -.15(ve)-.4 G 150.26(rter tlgu\(1\)).15 F/F1 10.95/Times-Bold@0 SF 495 | (FUR)72 84 Q(THER DEVELOPMENT)-.438 E F0 -1.1(Yo)108 96 S 2.5(um)1.1 G 496 | (ay not lik)-2.5 E 2.5(et)-.1 G 497 | (he character output for a speci\214c code.)-2.5 E(Check out the)5 E/F2 498 | 10/Times-Bold@0 SF(tlgcodes.h)2.5 E F0(\214le containing the spe-)2.5 E 499 | (cial symbol and punctuation codes and select one to suit you better)108 500 | 108 Q 5(.I)-.55 G 2.5(tw)-5 G(ill probably be a while before the)-2.5 E 501 | (beta to Unicode correspondence settles do)108 120 Q(wn.)-.25 E 502 | (Drop me a line, if you need a ne)108 144 Q 2.5(wf)-.25 G 503 | (eature; let me kno)-2.5 E 2.5(wi)-.25 G 2.5(fy)-2.5 G 504 | (ou do \214nd an interesting applications that others)-2.5 E 505 | (can pro\214t from.)108 156 Q F1(REFERENCES)72 184.8 Q F0(There are se) 506 | 108 196.8 Q -.15(ve)-.25 G(ral te).15 E 507 | (xts describing the internal representation of)-.15 E F2(PHI)2.5 E F0 508 | (and)2.5 E F2(TLG)2.5 E F0(te)2.5 E(xt, ID data, citation data)-.15 E 509 | (and inde)108 208.8 Q 2.5<788c>-.15 G 2.5(les. The)-2.5 F 510 | (originator of this format is the P)2.5 E(ackard Humanities Institute.) 511 | -.15 E(The TLG is maintained)5 E(by UCI \255 see)108 220.8 Q F2(www)2.5 512 | E(.tlg)-.7 E(.uci.edu)-.15 E F0 2.52.5 G 513 | (here you may \214nd the latest v)-2.5 E(ersions of the)-.15 E F2 514 | (TLG Beta Code Manual)2.5 E F0(and the)108 232.8 Q F2 515 | (TLG Beta Code Quick Refer)2.5 E(ence Guide)-.18 E F0(.)A 516 | (Unicode consortium \()108 256.8 Q F2(www)A(.unicode.or)-.7 E(g)-.1 E F0 517 | 2.5(\)p)C 518 | (ublications pertaining to the codi\214cation of characters used in)-2.5 519 | E(Hellenic literature, scienti\214c and musical te)108 268.8 Q(xts.)-.15 520 | E(The OpenOf)108 292.8 Q(\214ce/Libreof)-.25 E(\214ce suite in its v) 521 | -.25 E(arious editions \()-.25 E F2(www)A(.openof\214ce.or)-.7 E(g)-.1 E 522 | F0 5(-a)2.5 G(pache.or)-5 E(g,)-.18 E F2(www)2.5 E(.libr)-.7 E(eof-)-.18 523 | E(\214ce.or)108 304.8 Q(g)-.1 E F0(,)A F2(www)2.5 E(.neoof\214ce.or)-.7 524 | E(g)-.1 E F0 2.5(\)i)C(ncludes a w)-2.5 E 525 | (ord processor that you can use to load, process and create ne)-.1 E(w) 526 | -.25 E(polytonic te)108 316.8 Q(xts.)-.15 E(Greek F)108 340.8 Q 527 | (ont Society:)-.15 E F2(www)2.5 E(.gr)-.7 E(eekf)-.18 E(ontsociety)-.25 528 | E(.gr)-.7 E F1(COPYRIGHT)72 369.6 Q F0(Cop)108 381.6 Q(yright \(C\) 200\ 529 | 4, 2005, 2011, 2013, 2020, 2021 Dimitri Marinakis \(dm, ssa gr\).)-.1 E 530 | (This \214le is part of tlgu which is free softw)108 405.6 Q 531 | (are; you can redistrib)-.1 E 532 | (ute it and/or modify it under the terms of the)-.2 E 533 | (GNU General Public License \(v)108 417.6 Q 534 | (ersion 2\) as published by the Free Softw)-.15 E(are F)-.1 E 535 | (oundation.)-.15 E(tlgu is distrib)108 441.6 Q 536 | (uted in the hope that it will be useful, b)-.2 E(ut WITHOUT ANY W)-.2 E 537 | (ARRANTY)-1.2 E 2.5(;w)-.92 G(ithout e)-2.5 E -.15(ve)-.25 G 2.5(nt).15 538 | G(he)-2.5 E(implied w)108 453.6 Q(arranty of MERCHANT)-.1 E 539 | (ABILITY or FITNESS FOR A P)-.93 E(AR)-.92 E(TICULAR PURPOSE.)-.6 E 540 | (See the)5 E(GNU General Public License for more details.)108 465.6 Q 541 | -1.1(Yo)108 489.6 S 2.5(us)1.1 G(hould ha)-2.5 E .3 -.15(ve r)-.2 H 542 | (ecei).15 E -.15(ve)-.25 G 2.5(dac).15 G(op)-2.5 E 2.5(yo)-.1 G 2.5(ft) 543 | -2.5 G 544 | (he GNU General Public License along with this program; if not, write) 545 | -2.5 E(to the Free Softw)108 501.6 Q(are F)-.1 E 546 | (oundation, Inc., 51 Franklin St, Fifth Floor)-.15 E 2.5(,B)-.4 G 547 | (oston, MA)-2.5 E 2.5(02110-1301 USA)5 F -1.11(Ve)72 768 S(rsion 1.9) 548 | 1.11 E 199.835(27-July-2021 4)161.785 F 0 Cg EP 549 | %%Trailer 550 | end 551 | %%EOF 552 | -------------------------------------------------------------------------------- /tlgu.c: -------------------------------------------------------------------------------- 1 | /* tlgu: Translates TLG (D) / PHI text files to Unicode text 2 | * 3 | * Copyright (C) 2004, 2005, 2011, 2013, 2020, 2021 Dimitri Marinakis 4 | * 5 | * This program is free software; you can redistribute it and/or 6 | * modify it under the terms of the GNU General Public License Version 2 7 | * as published by the Free Software Foundation. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | * 18 | * Usage: 19 | * tlgu [options] [beta_code_file] [unicode_text_file] 20 | * 21 | * Options: 22 | * -r -- primarily Roman text (e.g. Canon, PHI); default betastate = ROMAN, reset on every ID code 23 | * -v -w -x -y -z -- work reference citations are printed in the form xxx.xxx...xxx 24 | * -Z -- use reference and description citation codes in string 25 | * reference a-z, description A-Z also special codes \t(ab) \n(new line) \r(eturn) 26 | * e.g. "%A/%Z/%v/%w/%y/%z\t" 27 | * -e -- companion to -Z above; a string to put out when a defined citation slot 28 | * is empty (e.g. "NONE", or "-") 29 | * 30 | * -b -- books are preceded by a page feed and description citations are printed 31 | * -p -- pagination is observed, otherwise book lines are printed continuously 32 | * -B -- output blank space (tab) after each new line (beginning of line) 33 | * 34 | * -X -- citations are printed as they change (v, w, x) 35 | * -Y -- citations are printed as they change (w, x, y) 36 | * -N -- no spaces - line ends, and hyphens before an ID code, are removed 37 | * 38 | * -C -- citation debug information is printed 39 | * -S -- special code debug information is printed 40 | * -T -- bracket debug information is printed 41 | * -V -- processing debug information is printed 42 | * -W -- multiple output files, one for each work 43 | * 44 | * Returns: exit code 1 if unsuccesful 45 | * 46 | * Make: gcc tlgu.c -o tlgu 47 | * 48 | * History: This is a re-write of a DOS program (tlgft.asm) written several 49 | * years ago to translate Hellenic texts distributed on the TLG CD-ROM from 50 | * "beta code" to something readable, editable and printable. 51 | * 52 | * Contributors: Troy Griffitts (tg), Nick White (nw) 53 | * 54 | * Pointers / References: 55 | * TLG Project - www.tlg.uci.edu 56 | * PHI CD ROM Format Description, Packard Humanities Institute, 19 April 1992 57 | * Beta code reference - Text versions: tlgbeta.txt, tlgcode.txt, BCM2004.pdf (23-Jun-2004) 58 | * 59 | * ID locator reference - Text version tlgcodes.txt 60 | * 61 | * 14-Jun-2001 dm -- c port: ELOT-928 with custom dead-accent codes 62 | * 14-Jun-2004 dm -- Unicode 63 | * 26-Jun-2004 dm -- Command-line options 64 | * 26-Feb-2005 dm -- Output file separation (-W option) 65 | * 06-Mar-2005 dm -- Latin accent characters added (without parentheses) 66 | * 02-Aug-2005 tg -- Free-form citations (options -Z, -e) and per-line processing 67 | * 22-Apr-2006 dm -- Includes to make gcc (4.x) happy, final sigma fix for free text 68 | * 02-Oct-2011 dm -- Output written to stdout if an output file name is not provided 69 | * 16-Oct-2011 dm -- Code correction for lower case phi 70 | * 20-Nov-2011 dm -- stop processing gracefully when writing to stdout 71 | * 20-Nov-2011 dm -- e-book type citations (options -X -Y) 72 | * 01-May-2012 nw -- reading from stdin 73 | * 17-Mar-2013 dm -- minor error handling corrections 74 | * 15-May-2020 dm -- vowels with acute accent may be output with U0370 codes (option -U), additional code points 75 | * 18-May-2020 dm -- rudimentary quasi bracket handling 76 | * 27-Jul-2021 dm -- citation handling corrections 77 | */ 78 | 79 | #include "tlgu.h" 80 | #include "tlgcodes.h" 81 | 82 | /****************** PROTOTYPES FROM THE TOP DOWN *******************/ 83 | 84 | int tlgu (char * input_file, char * output_file); 85 | void output_utf2(int ucode); 86 | void output_utf(int ucode); 87 | void output_string(char *outstr); 88 | int process_beta (int input_count); 89 | void beta_code(int input_count); 90 | int id_code(int input_count); 91 | void store_accents(unsigned char bufferchar); 92 | const char *resolve_cite_format(const char *cformat); 93 | 94 | /****************** PROGRAM VERSION INFORMATION *******************/ 95 | char *prog_version="1.9"; 96 | 97 | /****************** COMMAND LINE OPTIONS **************************/ 98 | int opt_roman = 0; 99 | int opt_page = 0; 100 | int opt_blank = 0; 101 | int opt_acit = 0; 102 | int opt_bcit = 0; 103 | int opt_ccit = 0; 104 | int opt_dcit = 0; 105 | int opt_cit_id = 0; /* combines a, b, c */ 106 | int opt_vcit = 0; 107 | int opt_wcit = 0; 108 | int opt_xcit = 0; 109 | int opt_ycit = 0; 110 | int opt_cprefix = 0; 111 | char cformat[253]; 112 | int opt_ecit_blank = 0; 113 | char ecite[253]; 114 | int opt_zcit = 0; 115 | int opt_verbose = 0; 116 | int opt_debug_bracket = 0; 117 | int opt_debug_cit = 0; 118 | int opt_debug_special = 0; 119 | int opt_multiple = 0; 120 | int opt_ebook_cit_x = 0; 121 | int opt_ebook_cit_y = 0; 122 | int opt_nospace = 0; 123 | int opt_U370 = 0; 124 | 125 | /****************** GLOBAL VARIABLES *******************************/ 126 | 127 | int iptr = 0; /* input buffer pointer, reset before every read */ 128 | int optr = 0; /* output buffer pointer, reset after every write */ 129 | unsigned char input_buffer[INRECSIZE]; 130 | unsigned char output_buffer[OUTRECSIZE]; 131 | #define MAXFILELEN 256 132 | int prev_cit_w = 0; 133 | int prev_cit_x = 0; 134 | int prev_cit_y = 0; 135 | /************ GLOBAL BETA CODE PROCESSING VARIABLES **************/ 136 | 137 | unsigned int outcode; 138 | int betastate; /* translation state machine */ 139 | int previous_state; /* needed for symbol translations */ 140 | int start_new_line = 0; /* needed for symbol translations */ 141 | int book_change = 0; /* needed for symbol translations */ 142 | int accents; /* holds accent combinations */ 143 | char *accented_chars = "AEHIOUWR"; 144 | char *accent_chars = ")(+/\\=|"; 145 | char *latin_accent_chars = "+/\\=|"; 146 | char *escape_codes = "$&%\"@#^[]<>{}"; 147 | char *punctuation_codes = " .,:;_\"%{}$&"; /* used by which_sigma */ 148 | char previous_bcit[52][32]; /* holds previous work (book) citation */ 149 | 150 | /****************** GLOBAL DESCRIPTOR VARIABLES *****************/ 151 | 152 | /* 153 | Space is reserved for descriptive data as follows: 154 | 155 | citations, binary component -- z, y, x, w, v, n (1 to 16383) 156 | citations, ascii component -- a-z (1 to 15 characters + null, only a-d, n, v-z are actually used) 157 | descriptors, binary component -- a-z (1 to 16383) 158 | descriptors, ascii component -- a-z (1 to 31 characters + null) 159 | 160 | Citation data --- 161 | a - author citation 162 | b - work citation 163 | c - preferred abbreviation for the work 164 | d - preferred abbreviation for the author 165 | 166 | n - if present signifies a document within a work 167 | when it changes, v-z are nulled but are then independent 168 | if n is not present, a change in an upper level nulls out the rest 169 | 170 | v-z hierarchical citation levels, high to low 171 | 172 | v 173 | w 174 | x - (chapter) 175 | y - (verse) (book) 176 | z - line 177 | 178 | Descriptors --- 179 | 180 | z - comment sequence number within a work (PHI) 181 | 182 | In the common data structures below, citations will hold the first 26 positions (0-25) 183 | while descriptors will hold the next 26 positions. 184 | 185 | The maximum number of characters in string citations is 31 (+ null byte) 186 | */ 187 | #define MAX_CITATION 32 188 | int icitation[52]; 189 | char citation[52][MAX_CITATION]; 190 | int id_level; /* holds translated current id level as an index to ID arrays */ 191 | int id_char; /* holds the pointer for the ascii part of the ID arrays */ 192 | int id_command; /* holds the current instruction for ID handling */ 193 | int id_process; /* if non-zero, command must be processed */ 194 | 195 | 196 | /****************** HANDLE ARGUMENTS AND SYNTAX *******************/ 197 | 198 | void usage_info(void) 199 | { 200 | printf("\ntlgu: TLG/PHI beta code file to Unicode translator ver. %s\n", prog_version); 201 | printf("\ntlgu: Copyright (C) 2004, 2005, 2011, 2013, 2020, 2021 Dimitri Marinakis"); 202 | printf("\ntlgu: This program is free software; you are encouraged to redistribute it under"); 203 | printf("\ntlgu: the terms of the GNU General Public License (version 2).\n"); 204 | printf("\ntlgu: This program comes with ABSOLUTELY NO WARRANTY. See the GNU General Public"); 205 | printf("\ntlgu: License in the file named `COPYING' for more details.\n"); 206 | printf("\ntlgu: Syntax: tlgu [options] [beta_code_file] [unicode_text_file]\n"); 207 | printf("\ntlgu: -r -- primarily Roman text (e.g. Canon, PHI); default betastate = ROMAN, reset on every ID code"); 208 | printf("\ntlgu: -v -w -x -y -z -- work reference citations are printed in the form xxx.xxx...xxx"); 209 | printf("\ntlgu: -Z -- use reference and description citation codes in string"); 210 | printf("\ntlgu: reference a-z, description A-Z also special codes \\t(ab) \\n(new line) \\r(eturn)"); 211 | printf("\ntlgu: e.g. \"%%A/%%Z/%%v/%%w/%%y/%%z\\t\" \n"); 212 | printf("\ntlgu: -e -- e.g. \"[NONE]\" instead of default \"\""); 213 | printf("\ntlgu: -X -- citations are printed as they change (v, w, x)"); 214 | printf("\ntlgu: -Y -- citations are printed as they change (w, x, y)"); 215 | printf("\ntlgu: -N -- no spaces - line ends, and hyphens before an ID code, are removed"); 216 | printf("\n"); 217 | printf("\ntlgu: -b -- books are preceded by a page feed and description citations are printed"); 218 | printf("\ntlgu: -p -- pagination is observed, otherwise book lines are printed continuously"); 219 | printf("\ntlgu: -B -- output blank space (tab) at the beginning of each line"); 220 | printf("\n"); 221 | printf("\ntlgu: -C -- citation debug information is printed"); 222 | printf("\ntlgu: -S -- special code debug information is printed"); 223 | printf("\ntlgu: -V -- processing debug information is printed"); 224 | printf("\ntlgu: -U -- output acute accents as tonoi in the Greek and Coptic Unicode block (U+0370 ff.)"); 225 | printf("\ntlgu: -W -- multiple output files, one for each work (book); output filename must be specified"); 226 | printf("\n"); 227 | } 228 | 229 | int main(int argc, char * argv[]) 230 | { 231 | unsigned char ucc; /* test variable */ 232 | int idx; 233 | 234 | if (sizeof(ucc) != 1) { 235 | printf("\ntlgu: I need 8-bit characters to work\n"); 236 | exit(1); 237 | } 238 | 239 | while(argc > 1 && argv[1][0] == '-') { 240 | switch(argv[1][1]) { 241 | case 'U': 242 | opt_U370 = 1; 243 | break; 244 | case 'N': 245 | opt_nospace = 1; 246 | break; 247 | case 'W': 248 | opt_multiple = 1; 249 | break ; 250 | case 'V': 251 | opt_verbose = 1; 252 | break ; 253 | case 'S': 254 | opt_debug_special = 1; 255 | break ; 256 | case 'T': 257 | opt_debug_bracket = 1; 258 | break ; 259 | case 'C': 260 | opt_debug_cit = 1; 261 | break ; 262 | case 'X': 263 | opt_ebook_cit_x = 1; 264 | opt_ebook_cit_y = 0; 265 | break; 266 | case 'Y': 267 | opt_ebook_cit_y = 1; 268 | opt_ebook_cit_x = 0; 269 | break; 270 | case 'B': 271 | opt_blank = 1; 272 | break ; 273 | case 'p': 274 | opt_page = 1; 275 | break ; 276 | case 'r': 277 | opt_roman = 1; 278 | break ; 279 | case 'a': 280 | opt_acit = 1; 281 | opt_cit_id = 1; 282 | break ; 283 | case 'b': 284 | opt_bcit = 1; 285 | opt_cit_id = 1; 286 | break ; 287 | case 'c': 288 | opt_ccit = 1; 289 | opt_cit_id = 1; 290 | break ; 291 | case 'd': 292 | opt_dcit = 1; 293 | opt_cit_id = 1; 294 | break ; 295 | case 'v': 296 | opt_vcit = 1; 297 | break ; 298 | case 'w': 299 | opt_wcit = 1; 300 | break ; 301 | case 'x': 302 | opt_xcit = 1; 303 | break; 304 | case 'y': 305 | opt_ycit = 1; 306 | break ; 307 | case 'z': 308 | opt_zcit = 1; 309 | break; 310 | case 'e': 311 | opt_ecit_blank = 1; 312 | strcpy(ecite, argv[1]); 313 | argc-- ; 314 | argv++ ; 315 | break; 316 | case 'Z': 317 | opt_cprefix = 1; 318 | strcpy(cformat, argv[1]); 319 | argc-- ; 320 | argv++ ; 321 | break; 322 | default: 323 | usage_info() ; 324 | exit(0) ; 325 | } 326 | argc-- ; 327 | argv++ ; 328 | } 329 | 330 | switch(argc) { 331 | case 1: 332 | return tlgu("", ""); 333 | break; 334 | case 2: 335 | return tlgu(argv[1], ""); 336 | break; 337 | default: 338 | return tlgu(argv[1], argv[2]); 339 | } 340 | 341 | } 342 | 343 | 344 | /****************** FILE READ-WRITE LOOP **************************/ 345 | 346 | int tlgu(char *input_file, char *output_file) 347 | { 348 | int i; /* counter */ 349 | int j; /* counter */ 350 | int infile; /* input file descriptor */ 351 | int outfile;/* output file descriptor */ 352 | 353 | int icnt; /* input file bytes read in input buffer */ 354 | int ocnt; /* output file bytes written */ 355 | int bytes_to_process; /* bytes read minus bytes already processed */ 356 | 357 | int wehaveinput; /* flag for while */ 358 | int beta_return; /* process beta return code */ 359 | 360 | char new_file[MAXFILELEN]; 361 | struct stat filestat; 362 | 363 | /* Open input and output files 364 | */ 365 | // infile = open(input_file, O_RDONLY); 366 | if (strlen(input_file) == 0) { 367 | infile = STDIN_FILENO; 368 | } else { 369 | infile = open(input_file, O_RDONLY); 370 | } 371 | 372 | if (infile < 0) { 373 | perror("\ntlgu: input file open"); 374 | return(1); 375 | } else { 376 | if (strlen(output_file) == 0) { 377 | outfile = STDOUT_FILENO; 378 | } else { 379 | if (strlen(output_file) < MAXFILELEN-5) { 380 | strcpy(new_file, output_file); 381 | } else { 382 | printf("\ntlgu: output filename too long - exiting\n"); 383 | return(1); 384 | } 385 | outfile = open(new_file, O_WRONLY | O_CREAT | O_TRUNC); 386 | } 387 | if (outfile < 0) { 388 | perror("\ntlgu: output file create"); 389 | close(infile); 390 | return(1); 391 | } 392 | } 393 | 394 | /* Initialize citation 395 | * and descriptor indicators 396 | */ 397 | id_level = 0; 398 | for (i = 0; i < 52; i++) { 399 | icitation[i] = 0; 400 | for (j = 0; j < MAX_CITATION; j++) { 401 | citation[i][j]=0; 402 | } 403 | } 404 | 405 | /* Initialize beta processing defaults 406 | * e.g. The TLG Canon needs ROMAN as default 407 | * Hellenic should be reset at each ID CODE 408 | */ 409 | if (opt_roman) betastate = ROMAN; 410 | else betastate = HELLENIC; 411 | 412 | /* Read, process and write file blocks, 413 | * Optionally create one file per book (-W), unless no output file name is specified 414 | * Change file mode (equivalent to chmod 644 output_file), 415 | * and return. 416 | * Note: Local deblocking usually yields higher speeds 417 | */ 418 | wehaveinput = 1; 419 | while (wehaveinput) { 420 | /* Read and process beta code in input_buffer */ 421 | icnt = read(infile, input_buffer, sizeof(input_buffer)); 422 | if (icnt == 0) wehaveinput = 0; 423 | 424 | iptr = 0; 425 | while ((icnt > 0) && (iptr < icnt)) { 426 | bytes_to_process = icnt - iptr; 427 | beta_return = process_beta(bytes_to_process); 428 | 429 | /* Write processed data and reset output buffer pointer */ 430 | if (optr > 0) { 431 | ocnt = write(outfile, output_buffer, optr); 432 | optr = 0; 433 | if (ocnt < 0) { 434 | perror("\ntlgu output file write"); 435 | wehaveinput = 0; 436 | } 437 | } else if (beta_return != -2) { /* no more bytes to write, no book change request */ 438 | if (opt_verbose) printf("\ntlgu: no more bytes to write"); 439 | wehaveinput = 0; /* signal no more input */ 440 | } 441 | if ((beta_return == -2) && (outfile != STDOUT_FILENO)) { 442 | /* book change request, close current file and open a new one */ 443 | if (opt_verbose) printf("\ntlgu: book change request: %s", previous_bcit[1]); 444 | if (close(outfile)) return(1); 445 | if (chmod(new_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) return(1); 446 | 447 | /* request file information and delete zero-length files 448 | */ 449 | stat(new_file, &filestat); 450 | if (filestat.st_size == 0) unlink(new_file); 451 | 452 | sprintf(new_file, "%s-%s.txt", output_file, previous_bcit[1]); 453 | outfile = open(new_file, O_WRONLY | O_CREAT | O_TRUNC); 454 | if (outfile < 0) { 455 | perror("\ntlgu: new_file create"); 456 | close(infile); 457 | return(1); 458 | } 459 | } 460 | } 461 | } 462 | 463 | /* Close input and output files, 464 | * make output file readable 465 | */ 466 | close(infile); 467 | 468 | if (opt_verbose) printf("\ntlgu: processing complete\n"); 469 | if (outfile == STDOUT_FILENO) printf("\n"); 470 | 471 | if (close(outfile)) { 472 | perror("\ntlgu output file close"); 473 | return(1); 474 | } 475 | if (outfile != STDOUT_FILENO) { 476 | if (chmod(new_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) { 477 | perror("\ntlgu output file chmod"); 478 | return(1); 479 | } 480 | } 481 | return(0); 482 | } 483 | 484 | /****************** PROCESSING *************************************/ 485 | 486 | /* process_beta: 487 | * Processes bytes in 488 | * Returns: -1 for EOF, -2 for book change 489 | * Changes: iptr 490 | */ 491 | int process_beta (int input_count) 492 | { 493 | unsigned char inchar; 494 | unsigned int outcode; 495 | int processing; 496 | int iptr_max; /* holds the calculated maximum input pointer value */ 497 | int return_code; /* id_code and beta_code bytes written; error if negative */ 498 | char outstring[511]; 499 | char nstring[253]; 500 | 501 | return_code = 0; 502 | /* A beta code stream includes two kinds of data: 503 | * ID data - always has the high bit set. 504 | * Text data - always has the high bit reset. 505 | */ 506 | processing = 1; 507 | iptr_max = iptr + input_count; 508 | if (opt_verbose) printf("\n\ntlgu: process_beta - %d bytes, iptr = %4.4x, iptr_max = %4.4x", input_count, iptr, iptr_max); 509 | while (processing) { 510 | if ((iptr < INRECSIZE) && (iptr < iptr_max)) { 511 | inchar = input_buffer[iptr++]; 512 | if (optr < OUTRECSIZE) { 513 | if (inchar == 0) { 514 | /* do nothing for null characters */ 515 | } else if (inchar > 0x7F) { 516 | /* ID data - decrement input pointer before processing */ 517 | --iptr; 518 | 519 | /* Reset beta decoding state if roman option specified */ 520 | if (opt_roman) betastate = ROMAN; 521 | 522 | /* Process ID code */ 523 | return_code = id_code(input_count); 524 | if (return_code == -1) { 525 | if (opt_verbose) printf("\ntlgu: EOF while processing id code"); 526 | processing = 0; 527 | } else if (return_code == -2) { 528 | if (opt_verbose) printf("\ntlgu: book change request"); 529 | processing = 0; 530 | } 531 | start_new_line = 1; 532 | } else { 533 | /* text data < 0x80 - decrement input pointer before processing */ 534 | --iptr; 535 | if (start_new_line) { 536 | /* Write info on (book) citation change */ 537 | if (book_change) { 538 | if (opt_cit_id) { 539 | sprintf(outstring, "\n\f[%s] ", citation[0]); 540 | output_string(outstring); 541 | sprintf(outstring, "[%s] ", citation[1]); 542 | output_string(outstring); 543 | sprintf(outstring, "[%s] ", citation[2]); 544 | output_string(outstring); 545 | sprintf(outstring, "[%s]\n", citation[3]); 546 | output_string(outstring); 547 | } 548 | book_change = 0; 549 | } 550 | if (opt_nospace) { 551 | outstring[0] = 0x0; 552 | } 553 | else { 554 | sprintf(outstring, "\n"); 555 | } 556 | 557 | if (opt_blank) 558 | strcat(outstring, "\t"); 559 | else if (opt_cprefix) { 560 | strcat(outstring, resolve_cite_format(cformat)); 561 | } 562 | else if (opt_ebook_cit_x) { 563 | if (prev_cit_x != icitation[23]) { 564 | prev_cit_x = icitation[23]; 565 | if (prev_cit_w != icitation[22]) { 566 | prev_cit_w = icitation[22]; 567 | sprintf(nstring, "\n[%d.%d] ", icitation[22], icitation[23]); 568 | } 569 | else { 570 | sprintf(nstring, "\n[%d] ", icitation[23]); 571 | } 572 | strcat(outstring, nstring); 573 | } 574 | } 575 | else if (opt_ebook_cit_y) { 576 | if (prev_cit_y != icitation[24]) { 577 | prev_cit_y = icitation[24]; 578 | if (prev_cit_x != icitation[23]) { 579 | prev_cit_x = icitation[23]; 580 | sprintf(nstring, "\n[%d.%d] ", icitation[23], icitation[24]); 581 | } 582 | else { 583 | sprintf(nstring, "\n[%d] ", icitation[24]); 584 | } 585 | strcat(outstring, nstring); 586 | } 587 | } 588 | else if (opt_vcit || opt_wcit || opt_xcit || opt_ycit || opt_zcit) { 589 | if (opt_vcit) { 590 | if (icitation[21] == 0) sprintf(nstring, "%s.", citation[21]); 591 | else sprintf(nstring, "%d%s.", icitation[21], citation[21]); 592 | if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite); 593 | strcat(outstring, nstring); 594 | } 595 | if (opt_wcit) { 596 | if (icitation[22] == 0) sprintf(nstring, "%s.", citation[22]); 597 | else sprintf(nstring, "%d%s.", icitation[22], citation[22]); 598 | if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite); 599 | strcat(outstring, nstring); 600 | } 601 | if (opt_xcit) { 602 | if (icitation[23] == 0) sprintf(nstring, "%s.", citation[23]); 603 | else sprintf(nstring, "%d%s.", icitation[23], citation[23]); 604 | if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite); 605 | strcat(outstring, nstring); 606 | } 607 | if (opt_ycit) { 608 | if (icitation[24] == 0) sprintf(nstring, "%s.", citation[24]); 609 | else sprintf(nstring, "%d%s.", icitation[24], citation[24]); 610 | if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite); 611 | strcat(outstring, nstring); 612 | } 613 | if (opt_zcit) { 614 | if (icitation[25] == 0) sprintf(nstring, "%s.", citation[25]); 615 | else sprintf(nstring, "%d%s", icitation[25], citation[25]); 616 | if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite); 617 | strcat(outstring, nstring); 618 | } 619 | /* Separate text from citation using a tab character */ 620 | strcat(outstring, "\t"); 621 | } 622 | if (input_buffer[iptr] < 0x80) { 623 | /* Print only if not followed by another ID byte */ 624 | output_string(outstring); 625 | } 626 | start_new_line = 0; 627 | if (opt_roman) betastate = ROMAN; 628 | else betastate = HELLENIC; 629 | } 630 | beta_code(input_count); 631 | } 632 | } else { 633 | /* Output size is greater than input -- intermediate write */ 634 | printf("\ntlgu: FIXME -- DATA LOSS: ERROR output size iptr - %x optr - %x", iptr, optr); 635 | processing = 0; 636 | } 637 | } else { 638 | /* Finished processing all input */ 639 | processing = 0; 640 | } 641 | } /* end while processing*/ 642 | 643 | if (opt_verbose) printf("\ntlgu: iptr - %4.4x, optr - %4.4x ", iptr, optr); 644 | return return_code; 645 | } 646 | /****************** LIBRARY FUNCTIONS ******************************/ 647 | /* get_accents: 648 | * gets accents in 649 | * Returns: number of accents found or zero 650 | * Changes: accents, iptr 651 | */ 652 | int get_accents(void) 653 | { 654 | unsigned char bufferchar; 655 | int processing = 1; 656 | int number_of_accents = 0; 657 | 658 | accents = 0; 659 | 660 | while (processing) { 661 | if (iptr < INRECSIZE) { 662 | bufferchar = input_buffer[iptr++]; 663 | if (betastate == ROMAN) { 664 | if (strchr(latin_accent_chars, bufferchar)) { 665 | store_accents(bufferchar); 666 | number_of_accents++; 667 | } else { 668 | --iptr; 669 | processing = 0; 670 | } 671 | } else if (strchr(accent_chars, bufferchar)) { 672 | store_accents(bufferchar); 673 | number_of_accents++; 674 | } else { 675 | --iptr; 676 | processing = 0; 677 | } 678 | } else { 679 | processing = 0; 680 | } 681 | } 682 | return number_of_accents; 683 | } 684 | 685 | /* store_accents: 686 | * Stores accent character passed as a parameter to 687 | * 0 00 00 --- 0 00 00 no accent 688 | * | | | 689 | * | | ---- 01 psili, 10 dasia, 11 dialytika 690 | * | ------- 01 varia, 10 oxia, 11 perispomeni 691 | * ----------- 1 ypogegrammeni 692 | * Changes: accents 693 | * Caveat: currently only ORs new accent... expects an all-zero accent variable 694 | * 695 | * 15-May-2020 dm -- single acute accent option (Unicode U0370 code block) 696 | */ 697 | void store_accents(unsigned char bufferchar) 698 | { 699 | switch (bufferchar) 700 | { 701 | case ')': 702 | accents = accents | 1; 703 | break; 704 | case '(': 705 | accents = accents | 2; 706 | break; 707 | case '+': 708 | accents = accents | 3; 709 | break; 710 | case '\\': 711 | accents = accents | 4; 712 | break; 713 | case '/': 714 | accents = accents | 8; 715 | break; 716 | case '=': 717 | accents = accents | 0xc; 718 | break; 719 | case '|': 720 | accents = accents | 0x10; 721 | break; 722 | default: 723 | break; 724 | } 725 | accents &= 0x1f; 726 | } 727 | 728 | /* mod_accents: 729 | * 730 | * Part of the U0370 character output option: 731 | * 732 | * If accents are acute, diaeresis (dialytica) or a combination of the two 733 | * which is used as an index to the accented character tables is 734 | * modified for pointing to the alternate character 735 | * 736 | * Changes: accents 737 | * 738 | * 15-May 2020 dm 739 | */ 740 | void mod_accents(void) 741 | { 742 | if (accents == 8) accents = 0x20; 743 | else if (accents == 3) accents = 0x21; 744 | else if (accents == 11) accents = 0x22; 745 | } 746 | 747 | /* output_accents: 748 | * Input: 749 | * 0 00 00 --- 0 00 00 no accent 750 | * | | | 751 | * | | ---- 01 psili, 10 dasia, 11 dialytika 752 | * | ------- 01 varia, 10 oxia, 11 perispomeni 753 | * ----------- 1 ypogegrammeni 754 | * Changes: optr (output_utf) 755 | */ 756 | void output_accents(void) 757 | { 758 | int paccents; 759 | 760 | paccents = accents & 3; 761 | if (paccents == 1) 762 | output_utf(PSILI); 763 | else if (paccents == 2) 764 | output_utf(DASIA); 765 | else if (paccents == 3) 766 | output_utf(DIALYTIKA); 767 | 768 | paccents = (accents & 0xc) >> 2; 769 | if (paccents == 1) 770 | output_utf(VARIA); 771 | else if (paccents == 2) 772 | output_utf(OXIA); 773 | else if (paccents == 3) { 774 | if (betastate == ROMAN) 775 | output_utf(CARET); 776 | else 777 | output_utf(PERISPOMENI); 778 | } 779 | paccents = accents & 0x10; 780 | if (paccents) 781 | output_utf(YPOGEGRAMMENI); 782 | } 783 | 784 | 785 | /* getnum: 786 | * Collects a non-zero number from the current position. 787 | * Returns: an integer or zero if no number found, -1 on end of buffer 788 | * Changes: iptr 789 | * 22-Apr-2006 dm - dropped unsigned attribute from bufferchar and modnumber 790 | */ 791 | int getnum(void) 792 | { 793 | #define MAXNUMBERS 32 794 | char bufferchar; 795 | char modnumber[MAXNUMBERS]; /* symbol or font modifier number string */ 796 | int imodnumber = 0; /* index to modnumber */ 797 | int convnumber = 0; /* converted modnumber string */ 798 | int processing = 1; 799 | 800 | modnumber[0] = 0; 801 | 802 | while (processing) { 803 | if ( (iptr < INRECSIZE) && (imodnumber < MAXNUMBERS) ) { 804 | bufferchar = input_buffer[iptr++]; 805 | if (isdigit(bufferchar)) { 806 | modnumber[imodnumber++] = bufferchar; 807 | } else { 808 | --iptr; 809 | modnumber[imodnumber] = 0; 810 | sscanf(modnumber, "%d", &convnumber); 811 | processing = 0; 812 | } 813 | } else { 814 | convnumber = -1; 815 | processing = 0; 816 | } 817 | } 818 | if (convnumber < 0) perror("\ndid not complete number\n"); 819 | return convnumber; 820 | } 821 | 822 | /* output_utf2: 823 | * Converts the input code into a UTF-8 byte sequence in output_buffer 824 | * Changes: optr, output_buffer 825 | * NOTE: this is a duplicate of output_utf (in order to avoid recursion) 826 | */ 827 | void output_utf2(int ucode) 828 | { 829 | if ((optr+3) > OUTRECSIZE) { 830 | perror("\noptr out of range"); 831 | } else if (ucode == 0){ 832 | /* do nothing */ 833 | } else if (ucode < 0x80) { 834 | output_buffer[optr++] = ucode; 835 | } else if (ucode < 0x800) { 836 | output_buffer[optr++] = (ucode >> 6) | 0xc0; 837 | output_buffer[optr++] = (ucode & 0x3f) | 0x80; 838 | } else if (ucode <= 0xffff) { 839 | output_buffer[optr++] = ((ucode & 0xf000) >> 12) | 0xe0; 840 | output_buffer[optr++] = ((ucode & 0x0fc0) >> 6) | 0x80; 841 | output_buffer[optr++] = (ucode & 0x3f) | 0x80; 842 | } else if (ucode <= 0x10ffff) { 843 | output_buffer[optr++] = ((ucode & 0x1C0000) >> 18) | 0xF0; 844 | output_buffer[optr++] = ((ucode & 0x03f000) >> 12) | 0x80; 845 | output_buffer[optr++] = ((ucode & 0x000fc0) >> 6) | 0x80; 846 | output_buffer[optr++] = (ucode & 0x3f) | 0x80; 847 | } else { 848 | /* higher codes are not defined for UTF-8*/ 849 | } 850 | } 851 | 852 | /* output_utf: 853 | * Converts the input code into a UTF-8 byte sequence in output_buffer 854 | * Changes: optr, output_buffer 855 | */ 856 | void output_utf(int ucode) 857 | { 858 | if ((optr+3) > OUTRECSIZE) { 859 | perror("\noptr out of range"); 860 | } else if (ucode == 0){ 861 | /* do nothing */ 862 | } else if (ucode < 0x80) { 863 | output_buffer[optr++] = ucode; 864 | } else if (ucode < 0x800) { 865 | output_buffer[optr++] = (ucode >> 6) | 0xc0; 866 | output_buffer[optr++] = (ucode & 0x3f) | 0x80; 867 | } else if (ucode <= 0xffff) { 868 | output_buffer[optr++] = ((ucode & 0xf000) >> 12) | 0xe0; 869 | output_buffer[optr++] = ((ucode & 0x0fc0) >> 6) | 0x80; 870 | output_buffer[optr++] = (ucode & 0x3f) | 0x80; 871 | } else if (ucode <= 0x10ffff) { 872 | output_buffer[optr++] = ((ucode & 0x1C0000) >> 18) | 0xF0; 873 | output_buffer[optr++] = ((ucode & 0x03f000) >> 12) | 0x80; 874 | output_buffer[optr++] = ((ucode & 0x000fc0) >> 6) | 0x80; 875 | output_buffer[optr++] = (ucode & 0x3f) | 0x80; 876 | } else { 877 | /* higher codes are not defined for UTF-8*/ 878 | } 879 | 880 | /* Output combining codes for brackets */ 881 | if (quasi_bracket_code) output_utf2(quasi_bracket_code); 882 | } 883 | 884 | /* output_string: 885 | * Calls output_utf to write a string in 886 | * Returns: the number of bytes written 887 | * Changes: optr, output_buffer 888 | */ 889 | void output_string(char *outstr) 890 | { 891 | int nextchar; 892 | int cnt; 893 | 894 | for (cnt = 0; cnt < strlen(outstr); cnt++) { 895 | output_utf(outstr[cnt]); 896 | } 897 | } 898 | 899 | /* handle_escape_codes: 900 | * Formatting and character output based on escape codes: $&%"@#^[]<>{} 901 | * Input: escape code, optional number 902 | * Changes: optr, output_buffer 903 | */ 904 | void handle_escape_codes(unsigned char beta, int number) 905 | { 906 | int temp = 0; 907 | 908 | switch (beta) 909 | { 910 | case '$': 911 | betastate = HELLENIC; 912 | accents = 0; 913 | break; 914 | case '&': 915 | betastate = ROMAN; 916 | accents = 0; 917 | break; 918 | case '%': 919 | if (opt_debug_special) printf("%%%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); 920 | if (number < MAX_PUNCTUATION) 921 | output_utf(punctuation[number]); 922 | break; 923 | case '\"': 924 | if (opt_debug_special) printf("\"%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); 925 | if (number < MAX_QUOTATION) { 926 | if (quotation_open[number]) { 927 | output_utf(quotation_close_symbol[number]); 928 | quotation_open[number] = 0; 929 | } else { 930 | output_utf(quotation_open_symbol[number]); 931 | quotation_open[number] = 1; 932 | } 933 | } 934 | break; 935 | case '@': 936 | /* FIXME: If citations are active, paging should be disabled */ 937 | if (opt_debug_special) printf("@%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); 938 | /* Page formats -- FIXME: incomplete */ 939 | if (number == 0) { 940 | output_utf(0x20); 941 | output_utf(0x20); 942 | } else if (number == 1) { 943 | if (opt_page) output_utf(0xc); 944 | //FIXME: reinstate else output_utf(0xa); 945 | } //fixme: reinstate else output_utf(0xa); 946 | break; 947 | case '#': 948 | if (opt_debug_special) printf("#%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); 949 | if (number < MAX_TEXT_SYMBOLS) { 950 | output_utf(text_symbols[number]); 951 | } 952 | break; 953 | case '^': 954 | /* quarter-spaces: will output at least one space */ 955 | if (number > 0) temp = number / 4; 956 | while (temp >= 0) { 957 | output_utf(0x20); 958 | temp--; 959 | } 960 | break; 961 | case '[': 962 | if (opt_debug_bracket) printf("[%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); 963 | if (number < MAX_BRACKET) { 964 | output_utf(bracket_open_symbol[number]); 965 | } 966 | break; 967 | case ']': 968 | if (opt_debug_bracket) printf("]%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); 969 | if (number < MAX_BRACKET) { 970 | output_utf(bracket_close_symbol[number]); 971 | } 972 | break; 973 | case '<': 974 | if (opt_debug_bracket) printf("<%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); 975 | if (number < MAX_QUASI_BRACKET) { 976 | if (number == 0 || number == 1 || number == 3 || number == 4 || number == 5 \ 977 | || number == 8 || number == 17 || number == 18) { 978 | quasi_bracket_code = quasi_bracket_open_symbol[number]; 979 | } else { 980 | output_utf(quasi_bracket_open_symbol[number]); 981 | } 982 | } 983 | break; 984 | case '>': 985 | if (opt_debug_bracket) printf(">%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); 986 | if (number < MAX_QUASI_BRACKET) { 987 | if (quasi_bracket_code) { 988 | quasi_bracket_code = 0; /* stop outputting combining codes */ 989 | } else { 990 | output_utf(quasi_bracket_close_symbol[number]); 991 | } 992 | } 993 | break; 994 | case '{': 995 | if (opt_debug_bracket) printf("{%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); 996 | if (number < MAX_NON_TEXT) { 997 | output_utf(non_text_open_symbol[number]); 998 | 999 | } 1000 | break; 1001 | case '}': 1002 | if (opt_debug_bracket) printf("{%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); 1003 | if (number < MAX_NON_TEXT) { 1004 | output_utf(non_text_close_symbol[number]); 1005 | } 1006 | break; 1007 | default: 1008 | break; 1009 | } 1010 | } 1011 | 1012 | /* which_sigma: 1013 | * Tries to decide on which sigma form to use. 1014 | * Input: index of input_buffer (iptr) after the sigma 1015 | * Returns: output character code 1016 | * 22-Apr-2006 dm -- nextcode less than space will now produce a final sigma, as well 1017 | */ 1018 | int which_sigma(int nextptr) 1019 | { 1020 | int scanning; 1021 | int nextcode; 1022 | /* If the next character is a hyphen, it is a medial sigma 1023 | * Otherwise, a few characters are examined in the input buffer: 1024 | * if an alphabetic character is found before we hit a space, or 1025 | * other punctuation character, it is a medial sigma 1026 | * otherwise it is a final sigma (there is one exception in 4085 - POS(.)) 1027 | */ 1028 | if (input_buffer[iptr] == '-') 1029 | return(SIGMEDIAL); 1030 | else { 1031 | scanning = 10; 1032 | while(scanning) { 1033 | nextcode = input_buffer[nextptr++]; 1034 | if (isalpha(nextcode)) 1035 | return(SIGMEDIAL); 1036 | if ((nextcode > 0x7f) || (nextcode < 0x20)) 1037 | return(SIGFINAL); 1038 | if (strchr(punctuation_codes, nextcode)) 1039 | return(SIGFINAL); 1040 | scanning--; 1041 | } 1042 | return(SIGMEDIAL); 1043 | } 1044 | } 1045 | 1046 | /* beta_code: 1047 | * Processes characters in and 1048 | * writes processed output to output_buffer> 1049 | * Changes: optr, output_buffer 1050 | * 27-Nov-2011 hyphen handling at the end of a line (opt_nospace) 1051 | */ 1052 | void beta_code(int input_count) 1053 | { 1054 | int processing; 1055 | int input_pointer_max; 1056 | unsigned char betachar; 1057 | unsigned int outputchar; 1058 | int tmp; 1059 | int tmp_iptr; 1060 | 1061 | input_pointer_max = iptr + input_count; 1062 | processing = 1; 1063 | 1064 | while (processing) { 1065 | if ( (iptr < INRECSIZE) && (iptr < input_pointer_max) ) { 1066 | betachar = input_buffer[iptr++]; 1067 | 1068 | /* Skip hyphen if next character is ID data or spaces */ 1069 | if ((betachar == '-') && (opt_nospace)) { 1070 | tmp_iptr = iptr; 1071 | while (input_buffer[iptr] == 0x20) { 1072 | iptr++; 1073 | } 1074 | if (input_buffer[iptr] > 0x7F) { 1075 | betachar = input_buffer[iptr++]; 1076 | } else { 1077 | /* Not a space, not an ID code, restore pointer */ 1078 | iptr=tmp_iptr; 1079 | } 1080 | } 1081 | if ((betachar > 0x7F)) { 1082 | /* ID data found - restore pointer and stop processing*/ 1083 | --iptr; 1084 | processing = 0; 1085 | } else { 1086 | outputchar = 0; 1087 | 1088 | if (strchr(escape_codes, betachar)) { 1089 | /* Handle escape codes */ 1090 | handle_escape_codes(betachar, getnum()); 1091 | } else if (betastate == HELLENIC && betachar == '*') { 1092 | /* Handle Hellenic uppercase character */ 1093 | get_accents(); 1094 | betachar = input_buffer[iptr++]; 1095 | if (accents == 0) get_accents(); //FIXME: handle suffix accents differently 1096 | if (strchr(accented_chars, betachar)) { 1097 | if (opt_U370) mod_accents(); 1098 | switch (betachar) { 1099 | case 'A': 1100 | outputchar = Alpha[accents]; 1101 | break; 1102 | case 'E': 1103 | outputchar = Epsilon[accents]; 1104 | break; 1105 | case 'H': 1106 | outputchar = Eta[accents]; 1107 | break; 1108 | case 'I': 1109 | outputchar = Iota[accents]; 1110 | break; 1111 | case 'O': 1112 | outputchar = Omicron[accents]; 1113 | break; 1114 | case 'U': 1115 | outputchar = Ypsilon[accents]; 1116 | break; 1117 | case 'W': 1118 | outputchar = Omega[accents]; 1119 | break; 1120 | case 'R': 1121 | outputchar = Rho[accents]; 1122 | break; 1123 | default: 1124 | break; 1125 | } 1126 | } else if (betachar == 'S') { 1127 | tmp = getnum(); 1128 | if (tmp == 3) outputchar = SIGLUNATEUPPER; 1129 | else outputchar = SIGMEDIALUPPER; 1130 | } else if (isalpha(betachar)) { 1131 | /* not an accented character */ 1132 | outputchar = hellenic[betachar]; 1133 | } else { 1134 | outputchar = hellenic[betachar - 0x20]; 1135 | } 1136 | if (outputchar == 0) outputchar = hellenic[betachar]; /* error condition */ 1137 | output_utf(outputchar); 1138 | } else if (betastate == HELLENIC && isalpha(betachar)) { 1139 | /* Handle hellenic lower case: 1140 | * Get default character and then try to pin accents 1141 | */ 1142 | if (strchr(accented_chars, betachar)) { 1143 | get_accents(); 1144 | if (opt_U370) mod_accents(); 1145 | switch (betachar) { 1146 | case 'A': 1147 | outputchar = alpha[accents]; 1148 | break; 1149 | case 'E': 1150 | outputchar = epsilon[accents]; 1151 | break; 1152 | case 'H': 1153 | outputchar = eta[accents]; 1154 | break; 1155 | case 'I': 1156 | outputchar = iota[accents]; 1157 | break; 1158 | case 'O': 1159 | outputchar = omicron[accents]; 1160 | break; 1161 | case 'U': 1162 | outputchar = ypsilon[accents]; 1163 | break; 1164 | case 'W': 1165 | outputchar = omega[accents]; 1166 | break; 1167 | case 'R': 1168 | outputchar = rho[accents]; 1169 | break; 1170 | default: 1171 | break; 1172 | } 1173 | } else if (betachar == 'S') { 1174 | tmp = getnum(); 1175 | if (tmp == 1) outputchar = SIGMEDIAL; 1176 | else if (tmp == 2)outputchar = SIGFINAL; 1177 | else if (tmp == 3) outputchar = SIGLUNATE; 1178 | if (outputchar == 0) { 1179 | outputchar = which_sigma(iptr); 1180 | } 1181 | } 1182 | 1183 | if (outputchar == 0) outputchar = hellenic[betachar - 0x20]; 1184 | output_utf(outputchar); 1185 | } else if (betastate == ROMAN && isalpha(betachar)) { 1186 | /* Handle Roman characters */ 1187 | //FIXME: need to process roman characters 1188 | if (isalpha(betachar)) get_accents(); 1189 | outputchar = betachar; 1190 | output_utf(outputchar); 1191 | /* ROMAN uses combining accent forms */ 1192 | output_accents(); 1193 | } else { 1194 | //FIXME: placeholder 1195 | if (betachar != '`') outputchar = betachar; 1196 | if (betachar == '_') outputchar = 0x2014; /* EM DASH */ 1197 | if (betachar == ':') outputchar = 0x00b7; /* Ano teleia */ 1198 | output_utf(outputchar); 1199 | } 1200 | } 1201 | } else { 1202 | /* Requested number of characters have been processed 1203 | * or no more characters available in buffer 1204 | */ 1205 | processing = 0; 1206 | } 1207 | } 1208 | } 1209 | 1210 | 1211 | const char *resolve_cite_format(const char *cformat) { 1212 | static char *outbuf[511]; 1213 | char nstring[253]; 1214 | *outbuf = 0; 1215 | const char *c; 1216 | for (c = cformat; *c; c++) { 1217 | if (*c == '%') { 1218 | const char c2 = *(c+1); 1219 | signed char cstart = -1; 1220 | if ((c2 >= 'a') && (c2 <= 'z')) { 1221 | cstart = c2 - 'a'; 1222 | } 1223 | else if ((c2 >= 'A') && (c2 <= 'Z')) { 1224 | cstart = 26 + (c2 - 'A'); 1225 | } 1226 | else if (c2 == '%') { 1227 | *nstring = '%'; nstring[1] = 0; strcat((char *)outbuf, nstring); 1228 | } 1229 | else { 1230 | fprintf(stderr, "unknown escape sequence: %%%c\n", c2); 1231 | } 1232 | c++; //skip both our '%' and following character (by loop inc); 1233 | 1234 | if (cstart > 20) { 1235 | if (icitation[cstart] == 0) sprintf(nstring, "%s",citation[cstart]); 1236 | else sprintf(nstring, "%d%s", icitation[cstart], citation[cstart]); 1237 | if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite); 1238 | strcat((char *)outbuf, nstring); 1239 | } 1240 | else if (cstart > -1) { 1241 | if (!citation[cstart] || !citation[cstart][0]) { 1242 | if (opt_ecit_blank) strcat((char *)outbuf, ecite); 1243 | } 1244 | else { 1245 | strcat((char *)outbuf, citation[cstart]); 1246 | } 1247 | } 1248 | } 1249 | else if (*c == '\\') { 1250 | switch (*(c+1)) { 1251 | case 't': strcat((char *)outbuf, "\t"); break; 1252 | case 'n': strcat((char *)outbuf, "\n"); break; 1253 | case 'r': strcat((char *)outbuf, "\r"); break; 1254 | default: *nstring = *(c+1); nstring[1] = 0; strcat((char *)outbuf, nstring); break; 1255 | } 1256 | c++; //skip both our '%' and following character (by loop inc); 1257 | } 1258 | else { 1259 | *nstring = *c; nstring[1] = 0; strcat((char *)outbuf, nstring); 1260 | } 1261 | } 1262 | return (char *)outbuf; 1263 | } 1264 | 1265 | 1266 | /* id_code: 1267 | * points to the next character in the to process; 1268 | * points to the next empty = 0xF0) { 1295 | /* 1296 | * Special code handling 1297 | */ 1298 | switch (idchar) 1299 | { 1300 | case 0xF0: /* EOF */ 1301 | return_code = -1; /* indicate EOF */ 1302 | processing = 0; 1303 | break; 1304 | case 0xFE: /* End of block -- block is padded with nulls */ 1305 | while (!input_buffer[iptr] && iptr= 0xE0) { 1326 | /* 1327 | * Escape code handling 1328 | * The byte following an escape code is an ID byte 1329 | * Citation IDs can only be 0=a, 1=b, 2=c and 4=d 1330 | */ 1331 | if (opt_debug_cit) printf("tlgu: Escape %x", idchar); 1332 | id_command = idchar & 0xF; /* get "command" nybble */ 1333 | idchar = input_buffer[iptr++] & 0x7F; /* get ID level byte */ 1334 | if (idchar >= 97) { /* descriptors hold the upper part of the array */ 1335 | id_level = idchar - 97 + 26; /* create an index offset */ 1336 | if (id_level > 51) {id_level = 51;} /* default to z */ 1337 | } else { 1338 | /* For escape codes, citation IDs can only be a=0, b=1, c=2 and d=4*/ 1339 | id_level = idchar & 7; 1340 | if (id_level == 4) {id_level = 3;} /* adjust d level */ 1341 | } 1342 | if (opt_debug_cit) printf(" ID level: %d\n", id_level); 1343 | id_process = 1; /* command must be processed */ 1344 | } else if ((idchar >= 0x80) && (id_process == 0)) { 1345 | id_command = idchar & 0xF; /* get command first */ 1346 | /* create a case number 0 to 5, corresponding to 0x8_ - 0xD_ */ 1347 | scratch = (idchar >> 4) & 0x7; 1348 | if (opt_debug_cit) printf("tlgu: IDchar %x case %x\n", idchar, scratch); 1349 | 1350 | switch (scratch) 1351 | { 1352 | case 0: 1353 | id_level = 25; /* z */ 1354 | id_process = 1; /* command must be processed */ 1355 | break; 1356 | case 1: 1357 | id_level = 24; /* y */ 1358 | id_process = 1; /* command must be processed */ 1359 | break; 1360 | case 2: 1361 | id_level = 23; /* x */ 1362 | id_process = 1; /* command must be processed */ 1363 | break; 1364 | case 3: 1365 | id_level = 22; /* w */ 1366 | id_process = 1; /* command must be processed */ 1367 | break; 1368 | case 4: 1369 | id_level = 21; /* v */ 1370 | id_process = 1; /* command must be processed */ 1371 | break; 1372 | case 5: 1373 | id_level = 13; /* n */ 1374 | id_process = 1; /* command must be processed */ 1375 | break; 1376 | default: 1377 | break; 1378 | } 1379 | 1380 | } 1381 | if (id_process) { 1382 | switch (id_command) 1383 | { 1384 | case 0: 1385 | /* increment the last character of the ID string 1386 | * at this level, if present, else increment 1387 | * the numeric value 1388 | */ 1389 | scratch = strlen(citation[id_level]); 1390 | if (scratch > 0) { 1391 | citation[id_level][scratch-1]++; 1392 | } else { 1393 | icitation[id_level]++; /* increment numeric ID */ 1394 | } 1395 | break; 1396 | case 1: 1397 | icitation[id_level] = 1; /* literal value */ 1398 | citation[id_level][0] = 0x0; 1399 | break; 1400 | case 2: 1401 | icitation[id_level] = 2; /* literal value */ 1402 | citation[id_level][0] = 0x0; 1403 | break; 1404 | case 3: 1405 | icitation[id_level] = 3; /* literal value */ 1406 | citation[id_level][0] = 0x0; 1407 | break; 1408 | case 4: 1409 | icitation[id_level] = 4; /* literal value */ 1410 | citation[id_level][0] = 0x0; 1411 | break; 1412 | case 5: 1413 | icitation[id_level] = 5; /* literal value */ 1414 | citation[id_level][0] = 0x0; 1415 | break; 1416 | case 6: 1417 | icitation[id_level] = 6; /* literal value */ 1418 | citation[id_level][0] = 0x0; 1419 | break; 1420 | case 7: 1421 | icitation[id_level] = 7; /* literal value */ 1422 | citation[id_level][0] = 0x0; 1423 | break; 1424 | case 8: 1425 | idchar = input_buffer[iptr++]; /* 7 bit binary value */ 1426 | icitation[id_level] = idchar & 0x7F; 1427 | citation[id_level][0] = 0x0; 1428 | break; 1429 | case 9: 1430 | idchar = input_buffer[iptr++]; /* 7 bit binary value */ 1431 | icitation[id_level] = idchar & 0x7F; 1432 | idchar = input_buffer[iptr++]; /* single character */ 1433 | citation[id_level][0] = idchar & 0x7F; 1434 | citation[id_level][1] = 0; 1435 | break; 1436 | case 0xa: 1437 | idchar = input_buffer[iptr++]; /* 7 bit binary value */ 1438 | icitation[id_level] = idchar & 0x7F; 1439 | for (id_char=0; id_char < 31; id_char++) { 1440 | idchar = input_buffer[iptr++]; /* string */ 1441 | if (idchar == 0xFF) { 1442 | citation[id_level][id_char] = 0; /* end of string */ 1443 | break; 1444 | } else { 1445 | citation[id_level][id_char] = idchar & 0x7F; 1446 | } 1447 | } 1448 | break; 1449 | case 0xb: 1450 | idchar = input_buffer[iptr++]; /* 14 bit binary value */ 1451 | scratch = (idchar & 0x7F) << 7; /* shift upper */ 1452 | idchar = input_buffer[iptr++]; /* 14 bit binary value */ 1453 | idchar &= 0x7F; /* mask sign bit */ 1454 | scratch = scratch | idchar; /* combine */ 1455 | icitation[id_level] = scratch; 1456 | citation[id_level][0] = 0x0; 1457 | break; 1458 | case 0xc: 1459 | idchar = input_buffer[iptr++]; /* 14 bit binary value */ 1460 | scratch = (idchar & 0x7F) << 7; /* shift upper */ 1461 | idchar = input_buffer[iptr++]; /* 14 bit binary value */ 1462 | idchar &= 0x7F; /* mask sign bit */ 1463 | scratch = scratch | idchar; /* combine */ 1464 | icitation[id_level] = scratch; 1465 | idchar = input_buffer[iptr++]; /* single character */ 1466 | citation[id_level][0] = idchar & 0x7F; 1467 | citation[id_level][1] = 0x0; /* end of string */ 1468 | break; 1469 | case 0xd: 1470 | idchar = input_buffer[iptr++]; /* 14 bit binary value */ 1471 | scratch = (idchar & 0x7F) << 7; /* shift upper */ 1472 | idchar = input_buffer[iptr++]; /* 14 bit binary value */ 1473 | idchar &= 0x7F; /* mask sign bit */ 1474 | scratch = scratch | idchar; /* combine */ 1475 | icitation[id_level] = scratch; 1476 | for (id_char=0; id_char < 31; id_char++) { 1477 | idchar = input_buffer[iptr++]; /* string */ 1478 | if (idchar == 0xFF) { 1479 | citation[id_level][id_char] = 0x0; /* end of string */ 1480 | break; 1481 | } else { 1482 | citation[id_level][id_char] = idchar & 0x7F; 1483 | } 1484 | } 1485 | break; 1486 | case 0xe: 1487 | /* same binary value, single character */ 1488 | idchar = input_buffer[iptr++]; /* single character */ 1489 | citation[id_level][0] = idchar & 0x7F; 1490 | citation[id_level][1] = 0x0; /* end of string */ 1491 | break; 1492 | case 0xf: 1493 | icitation[id_level] = 0; /* no binary value */ 1494 | for (id_char=0; id_char < 31; id_char++) { 1495 | idchar = input_buffer[iptr++]; /* string */ 1496 | if (idchar == 0xFF) { 1497 | citation[id_level][id_char] = 0x0; /* end of string */ 1498 | break; 1499 | } else { 1500 | citation[id_level][id_char] = idchar & 0x7F; 1501 | } 1502 | } 1503 | 1504 | /* Keep tab of book changes, optionally split into books */ 1505 | if (id_level == 1) { 1506 | if (strncmp(citation[1], previous_bcit[1], 31)) { 1507 | if (opt_multiple) { 1508 | /* Signal outer loop to stop 1509 | * after processing citation change 1510 | */ 1511 | return_code = -2; 1512 | processing = 0; 1513 | if (opt_verbose) printf("\ntlgu: book citation: %s, previous: %s", citation[1], previous_bcit[1]); 1514 | } 1515 | strncpy(previous_bcit[1], citation[1], 31); 1516 | previous_bcit[1][31] = 0; 1517 | } 1518 | book_change = 1; 1519 | } 1520 | break; 1521 | default: 1522 | printf("tlgu: Unknown id_command: %x, iptr %x\n", id_command, iptr); 1523 | break; 1524 | } 1525 | if (opt_debug_cit) printf("tlgu: Command: %x ID level: %d, Binary: %d, ASCII: %s iptr++ %x\n",\ 1526 | id_command, id_level,icitation[id_level], citation[id_level], iptr); 1527 | 1528 | /* Adjust lower citation levels - 1529 | */ 1530 | switch (id_level) 1531 | { 1532 | /* a or b level changes; 1533 | * lower citation levels are set to zero / null 1534 | * descriptor ID levels are set to null 1535 | */ 1536 | case 0: 1537 | case 1: 1538 | icitation[13] = 0; 1539 | citation[13][0] = 0x0; 1540 | icitation[21] = 0; 1541 | citation[21][0] = 0x0; 1542 | icitation[22] = 0; 1543 | citation[22][0] = 0x0; 1544 | icitation[23] = 0; 1545 | citation[23][0] = 0x0; 1546 | icitation[24] = 0; 1547 | citation[24][0] = 0x0; 1548 | icitation[25] = 0; 1549 | citation[25][0] = 0x0; 1550 | for (scratch = 26; scratch < 52; scratch++) { 1551 | citation[scratch][0] = 0x0; 1552 | } 1553 | break; 1554 | case 13: 1555 | /* n level change; 1556 | * set all other citation levels to zero / null 1557 | */ 1558 | icitation[21] = 0; 1559 | citation[21][0] = 0x0; 1560 | icitation[22] = 0; 1561 | citation[22][0] = 0x0; 1562 | icitation[23] = 0; 1563 | citation[23][0] = 0x0; 1564 | icitation[24] = 0; 1565 | citation[24][0] = 0x0; 1566 | icitation[25] = 0; 1567 | citation[25][0] = 0x0; 1568 | break; 1569 | case 21: 1570 | /* v level and lower change; 1571 | * set lower citation levels to 1, null citation strings 1572 | */ 1573 | icitation[22] = 1; 1574 | citation[22][0] = 0x0; 1575 | case 22: 1576 | icitation[23] = 1; 1577 | citation[23][0] = 0x0; 1578 | case 23: 1579 | icitation[24] = 1; 1580 | citation[24][0] = 0x0; 1581 | case 24: 1582 | icitation[25] = 1; 1583 | citation[25][0] = 0x0; 1584 | case 25: 1585 | outcode = 0; 1586 | break; 1587 | default: 1588 | break; 1589 | } 1590 | } /* id_process */ 1591 | 1592 | if (outcode) { 1593 | output_utf(outcode); 1594 | } 1595 | 1596 | } else { 1597 | --iptr; /* output buffer full - restore pointer and return */ 1598 | processing = 0; 1599 | } 1600 | } /* ID data processing */ 1601 | } else { /* Finished processing all input */ 1602 | processing = 0; 1603 | } 1604 | } /* while processing loop */ 1605 | return return_code; 1606 | } 1607 | --------------------------------------------------------------------------------