├── .ecrc ├── .editorconfig ├── .github └── workflows │ ├── manpage.yml │ ├── mega-linter.yml │ └── test.yml ├── .gitignore ├── .mega-linter.yml ├── CONTRIBUTING ├── COPYRIGHT ├── ChangeLog ├── INSTALL ├── Makefile ├── README ├── README-development.md ├── detex.1 ├── detex.h ├── detex.l ├── release.sh ├── test.pl ├── test ├── comments-correct.txt ├── comments.tex ├── correct.txt ├── in.tex ├── noinclude-correct.txt ├── noinclude.tex ├── nouns-correct.txt ├── nouns.tex ├── part-2.tex ├── part.tex ├── unterminated-verb-eol.tex ├── unterminated-verb-nofinal.tex ├── unterminated-verb-pipe.tex ├── unterminated-verb.tex ├── unterminated.tex ├── with-srcloc-correct.txt ├── with-srcloc.tex ├── words-correct.txt └── words.tex └── valgrind.sh /.ecrc: -------------------------------------------------------------------------------- 1 | { 2 | "Exclude": [ 3 | "test/", 4 | "\\.o/" 5 | ], 6 | "Disable": { 7 | "IndentSize": true, 8 | "MaxLineLength": true 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | [*] 3 | charset = utf-8 4 | end_of_line = lf 5 | indent_style = space 6 | indent_size = 4 7 | insert_final_newline = true 8 | max_line_length = 120 9 | trim_trailing_whitespace = true 10 | 11 | [*.yml] 12 | indent_size = 2 13 | 14 | [{Makefile,README,*.pl,*.sh,*.l,*.h}] 15 | indent_style = tab 16 | 17 | [{*.txt,*.tex}] 18 | insert_final_newline = false 19 | -------------------------------------------------------------------------------- /.github/workflows/manpage.yml: -------------------------------------------------------------------------------- 1 | name: Manpage 2 | on: 3 | push: 4 | paths: 5 | - 'detex.1' 6 | 7 | jobs: 8 | manpage: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Setup dependencies 14 | run: sudo apt install mandoc 15 | - run: mandoc -T lint detex.1 16 | -------------------------------------------------------------------------------- /.github/workflows/mega-linter.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # MegaLinter GitHub Action configuration file 3 | # More info at https://megalinter.io 4 | name: MegaLinter 5 | 6 | on: 7 | # Trigger mega-linter at every push. Action will also be visible from Pull Requests to main 8 | push: # Comment this line to trigger action only on pull-requests (not recommended if you don't pay for GH Actions) 9 | # pull_request: 10 | # branches: [master, main] 11 | 12 | # env: # Comment env block if you don't want to apply fixes 13 | # Apply linter fixes configuration 14 | # APPLY_FIXES: all # When active, APPLY_FIXES must also be defined as environment variable (in github/workflows/mega-linter.yml or other CI tool) 15 | # APPLY_FIXES_EVENT: pull_request # Decide which event triggers application of fixes in a commit or a PR (pull_request, push, all) 16 | # APPLY_FIXES_MODE: commit # If APPLY_FIXES is used, defines if the fixes are directly committed (commit) or posted in a PR (pull_request) 17 | 18 | concurrency: 19 | group: ${{ github.ref }}-${{ github.workflow }} 20 | cancel-in-progress: true 21 | 22 | jobs: 23 | megalinter: 24 | name: MegaLinter 25 | runs-on: ubuntu-latest 26 | # permissions: 27 | # Give the default GITHUB_TOKEN write permission to commit and push, comment issues & post new PR 28 | # Remove the ones you do not need 29 | # contents: write 30 | # issues: write 31 | # pull-requests: write 32 | steps: 33 | # Git Checkout 34 | - name: Checkout Code 35 | uses: actions/checkout@v3 36 | with: 37 | token: ${{ secrets.PAT || secrets.GITHUB_TOKEN }} 38 | fetch-depth: 0 # If you use VALIDATE_ALL_CODEBASE = true, you can remove this line to improve performances 39 | 40 | # MegaLinter 41 | - name: MegaLinter 42 | id: ml 43 | # You can override MegaLinter flavor used to have faster performances 44 | # More info at https://megalinter.io/flavors/ 45 | uses: oxsecurity/megalinter@v7 46 | env: 47 | # All available variables are described in documentation 48 | # https://megalinter.io/configuration/ 49 | VALIDATE_ALL_CODEBASE: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} # Validates all source when push on main, else just the git diff with main. Override with true if you always want to lint all sources 50 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 51 | # ADD YOUR CUSTOM ENV VARIABLES HERE OR DEFINE THEM IN A FILE .mega-linter.yml AT THE ROOT OF YOUR REPOSITORY 52 | # DISABLE: COPYPASTE,SPELL # Uncomment to disable copy-paste and spell checks 53 | 54 | # Upload MegaLinter artifacts 55 | - name: Archive production artifacts 56 | if: success() || failure() 57 | uses: actions/upload-artifact@v3 58 | with: 59 | name: MegaLinter reports 60 | path: | 61 | megalinter-reports 62 | mega-linter.log 63 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | on: push 3 | 4 | jobs: 5 | test: 6 | 7 | # https://github.com/actions/runner-images 8 | strategy: 9 | matrix: 10 | include: 11 | - os: ubuntu-latest 12 | compiler: gcc 13 | valgrind: true 14 | - os: ubuntu-latest 15 | compiler: clang 16 | valgrind: true 17 | - os: ubuntu-20.04 18 | compiler: gcc 19 | valgrind: false 20 | - os: ubuntu-20.04 21 | compiler: clang 22 | valgrind: false 23 | 24 | runs-on: ${{ matrix.os }} 25 | 26 | steps: 27 | - uses: actions/checkout@v4 28 | - name: Setup dependencies 29 | run: sudo apt install libfl-dev 30 | - name: Set compiler 31 | run: | 32 | export CC=${{matrix.compiler}} 33 | $CC --version 34 | - name: Compile and test with valgrind 35 | if: ${{ matrix.valgrind == true }} 36 | run: | 37 | CFLAGS=-g make 38 | sudo apt install valgrind 39 | ./test.pl --valgrind 40 | - name: Compile and test without valgrind 41 | if: ${{ matrix.valgrind != true }} 42 | run: | 43 | make 44 | ./test.pl 45 | - name: Test packaging 46 | if: ${{ matrix.valgrind != true }} 47 | run: make package 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | delatex 2 | detex 3 | detex.c 4 | detex.o 5 | *.tar.bz2 6 | -------------------------------------------------------------------------------- /.mega-linter.yml: -------------------------------------------------------------------------------- 1 | # ENABLE: 2 | # - ACTION 3 | # - EDITORCONFIG 4 | # - REPOSITORY 5 | # - YAML 6 | 7 | DISABLE: 8 | - C 9 | - CPP 10 | - LATEX 11 | - SPELL 12 | 13 | DISABLE_ERRORS_LINTERS: 14 | - MAKEFILE_CHECKMAKE 15 | - REPOSITORY_CHECKOV 16 | - REPOSITORY_KICS 17 | -------------------------------------------------------------------------------- /CONTRIBUTING: -------------------------------------------------------------------------------- 1 | Before submitting pull requests please run './valgrind.sh' and make sure tests pass. 2 | You will need Valgrind (http://valgrind.org/) installed. 3 | -------------------------------------------------------------------------------- /COPYRIGHT: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 1986-2007 Purdue University 3 | All rights reserved. 4 | 5 | Developed by: Daniel Trinkle 6 | Department of Computer Science, Purdue University 7 | http://www.cs.purdue.edu/ 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining 10 | a copy of this software and associated documentation files (the 11 | "Software"), to deal with the Software without restriction, including 12 | without limitation the rights to use, copy, modify, merge, publish, 13 | distribute, sublicense, and/or sell copies of the Software, and to 14 | permit persons to whom the Software is furnished to do so, subject to 15 | the following conditions: 16 | 17 | o Redistributions of source code must retain the above copyright 18 | notice, this list of conditions and the following disclaimers. 19 | 20 | o Redistributions in binary form must reproduce the above copyright 21 | notice, this list of conditions and the following disclaimers in the 22 | documentation and/or other materials provided with the distribution. 23 | 24 | o Neither the names of Daniel Trinkle, Purdue University, nor the 25 | names of its contributors may be used to endorse or promote products 26 | derived from this Software without specific prior written 27 | permission. 28 | 29 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 32 | IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR 33 | ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 34 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 35 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. 36 | 37 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | April 26, 1986 2 | 3 | Modified -- June 4, 1986 4 | Changed so that it automatically recognizes LaTeX source and ignores several 5 | environment modes such as array. 6 | 7 | 8 | Modified (Version 2.0) -- June 30, 1984 9 | Now handles white space in sequences like "\begin { document }". Detex is not 10 | as easily confused by such things as display mode ends and begins that don't 11 | match up. 12 | 13 | 14 | Modified -- September 19, 1986 15 | Added the "-e " option to ignore specified LaTeX 16 | environments. 17 | 18 | 19 | Modified -- June 30, 1987 20 | Added the "-n" no-follow option, to allow detex to ignore \input and \include 21 | commands. Also changed the algorithm for locating the input files. It now 22 | interprets the "." more reasonably (i.e. it is not always the beginning of an 23 | extension). 24 | 25 | 26 | Modified -- December 15, 1988 27 | Added handling of verbatim environment in LaTeX mode and added it to the list 28 | of modes ignored by default. Because of limitations with lex, it was 29 | necessary to shorten the names of some of the existing start states before 30 | adding a new one (ugh). 31 | 32 | 33 | Modified -- January 3, 1988 34 | Added ignore of \$ inside $$ (math) pair. 35 | 36 | 37 | Modified (Version 2.2) -- June 25, 1990 38 | Control sequences are no longer replaced by space, but just removed. This 39 | means accents no longer cause output words to be broken. The "-c" option was 40 | added to show the arguments of \cite, \ref, and \pageref macros. This is 41 | useful when using something like style on the output. 42 | 43 | 44 | Modified (Version 2.3) -- September 7, 1990 45 | Fixed the handling of Ctl mode a little better and added an exception 46 | for \index on suggestions from kcb@hss.caltech.edu (KC Border). Also 47 | changed the value for DEFAULTINPUTS to coincide with a local change. 48 | 49 | 50 | Modified -- February 10, 1991 51 | Added -t option to force TeX mode even when seeing the "\begin{document}" 52 | string in the input. 53 | 54 | 55 | Modified -- February 23, 1991 56 | Based on suggestions from pinard@iro.umontreal.ca (Francois Pinard), I 57 | added support for the SysV string routines (-DUSG), added defines for 58 | the flex lexical scanner (-DFLEX_SCANNER), changed NULL to '\0' when 59 | using it as a character (his sys defined NULL as (void *)0), changed 60 | the Makefile to use ${CC} instead of cc, and added comments about the 61 | new compile time options. 62 | 63 | 64 | Modified (Version 2.4) -- September 2, 1992 65 | Corrected the way CITEBEGIN worked. Due to serious braindeath I had 66 | the condition wrong in the if test. It should be (fLatex && !fCite). 67 | This solves the problem a couple people reported with amstex style 68 | \ref entries. 69 | 70 | Added a preprocessing sed(1) command to replace all the long, easy to 71 | read state names with short two letter state names (SA-S?) so that lex 72 | won't overflow and I don't have to keep shortening the state names 73 | every time I add one. If a state is added, it must also be added to 74 | states.sed (order is important) along with its unique S? replacement. 75 | 76 | Added \pagestyle, \setcounter, and \verb handling from 77 | K.Sattar@cs.exeter.ac.uk (Khalid Sattar). Also allows invocation as 78 | "delatex" to force LaTeX mode. 79 | 80 | Applied patches from queinnec@cenatls.cena.dgac.fr (Philippe Queinnec) 81 | to handle nested {}s in state (\bibitem, \cite, \index). 82 | 83 | Added special ligature handling (\aa, \ae, \oe, \l, \o, \i, \j, \ss) 84 | at the suggestion of gwp@dido.caltech.edu (G. W. Pigman II). 85 | 86 | Cleaned up the comments on detex.h, added mathmatica to DEFAULTENV. 87 | 88 | 89 | Modified (Version 2.5) -- January 28, 1993 90 | Leading spaces in macros are no longer stripped. This means 91 | "foo\footnote{ bar}" comes out as "foo bar" instead of "foobar". 92 | 93 | Fixed special ligature handling so it works in cases line {\ss} instead of 94 | just when followed by a space. 95 | 96 | 97 | Modified (Version 2.6) -- July 30, 1993 98 | Added OS/2 port from hankedr@mail.auburn.edu (Darrel R Hankerson). 99 | 100 | Added handling of leading and trailing ':' in TEXINPUTS per the latest 101 | TeX as suggested by jnp@tfl.dk (J|rgen N|rgaard). 102 | 103 | Changed the way the input path is constructed in SetInputPaths() so we 104 | never try to modify a constant string. 105 | 106 | Changed the way the the ignore environment list is contructed in 107 | SetEnvIgnore() so we never try to modify a constant string. 108 | 109 | Changed the USG define to HAVE_STRING_H. 110 | 111 | Fixed the states.sed script so it only replaces "Input" in the correct 112 | places. I would like to use the \< \> word separator patterns but 113 | they are not supported by all versions of sed. This as least works. 114 | 115 | Changed the detex.c target in the Makefile to use a temporary file 116 | because I experienced problems (segmentation fault) with lex on 117 | Solaris 2.1 when input was from stdin. 118 | 119 | 120 | Modified (Version 2.7) -- September 10, 1997 121 | Removed line breaks in detex.l between a few patterns and actions. It 122 | appears that flex is no longer able to handle this. Thanks to Anthony 123 | Harris and Marty Leisner 124 | for reporting this. 125 | 126 | 127 | Porting notes -- March 30, 1992 128 | When using gcc, or compiling on a NeXT, you should compile with 129 | -fwritable-strings. With the change to SetInputPaths() in 2.6 this 130 | should no longer be necessary. 131 | 132 | On a NeXT, it has been reported that lex replaces the '\0' with NULL, 133 | and then the compiler complains about it. I think this is an old bug 134 | that is no longer applicable. 135 | 136 | July 30, 1993 137 | The flex scanner generator does not work because it does not handle 138 | input buffering the same way as lex. I don't know of any reasonable 139 | way to rewrite detex to get around this problem. 140 | 141 | May 25, 1995 142 | According to alain@ia1.u-strasbg.fr (Alain Ketterlin), using flex 143 | allows 8-bit characters to be handled correctly. 144 | 145 | Modified (Version 2.8) -- January 1, 2008 146 | Added NCSA/University of Illinois Open Source License to facilitate 147 | free redistribution. 148 | 149 | (Version 2.8.1) -- October 2, 2008 150 | First version of OpenDetex. 151 | Making program compile on modern UNIX systems; many fixes in LaTeX 152 | code handling (quotes, figure and table environments, headings and 153 | many more). 154 | 155 | (Version 2.8.2) -- February, 2016 156 | Improved ANSI C compatibility. Used kpse_find_file() and 157 | kpse_program_name(). Handle `\kern', allow `em/ex'for dimensions, 158 | handle glue specs. Added the never-interactive option to prevent flex 159 | from creating a problematic prototype for isatty(). Renamed 160 | ERROR -> my_ERROR to avoid a conflict with WIN32 headers. Used binary 161 | mode for output file (Peter Breitenlohner and Werner Lemberg) 162 | 163 | Insert a space in place of right curly brace (Eric Miotto). 164 | 165 | Provide 'make uninstall'. Install man page when running 'make install'. 166 | 167 | (Version 2.8.3) -- February, 2018 168 | Added -r option that replaces math environments with nouns and verbs in a way that keeps sentences readable. Thanks to Sophie Huiberts. 169 | 170 | Changed Makefile to fix a Mac OS X problem where frequently an empty output file was produced. 171 | 172 | Removed states.sed that modified detex.l to avoid Lex memory overrun - no longer needed on current machines. Added detex.c to the distributed package to allow compiling OpenDetex without having Flex. 173 | 174 | Removed 'mathmatica' from the default list of environments which contents are ignored - 'mathmatica' is not supported by TeX. Added more ignored environments: algorithm, align, smallmatrix and several more. 175 | 176 | Fixed compilation on Cygwin. 177 | 178 | Removed special files for OS/2 - they required compilers that no longer exist. OpenDetex should be able to be compiled using GCC for OS/2. 179 | 180 | (Version 2.8.4) -- July, 2018 181 | 182 | Stoped generating additional spaces after commands (#41). Thanks to Martin Nowack. 183 | 184 | Fixed space missing in output when two newlines follow a macro (#42). Thanks to sgerwk. 185 | 186 | Started treating newlines in inline math as spaces (#45). Thanks to sgerwk. 187 | 188 | (Version 2.8.5) -- January, 2019 189 | 190 | Fixed man page on macOS (#47). Thanks to FX Coudert. 191 | 192 | Added -v option for showing version. 193 | 194 | (Version 2.8.6) -- January, 2020 195 | 196 | Fixed buffer overflow (CVE-2019-19601 #60). Thanks to Akira Kakuto, Norbert Preining and TeX Live team. 197 | 198 | (Version 2.8.7) -- June, 2020 199 | 200 | Reduced size of .tar.bz2 package by excluding object file added by mistake. 201 | 202 | Handle forcing placement with ! (#62). Thanks to schrc3b6. 203 | 204 | Added option to print source information with -1 (#64). Thanks to Victor Cacciari Miraldo. 205 | 206 | (Version 2.8.8) -- June, 2020 207 | 208 | Fixed compilation on old compilers supported by TeX Live (#65). Thanks to Akira Kakuto and Hironobu Yamashita. 209 | 210 | (Version 2.8.9) -- December, 2020 211 | 212 | Include escaped percent sign in output (#70). Thanks to Max Leonhardt. 213 | 214 | (Version 2.8.10) -- December, 2023 215 | 216 | Fixes to man page. Thanks to kberry and Hilmar Preusse. 217 | 218 | (Version 2.8.11) -- December, 2023 219 | 220 | Fixed segmentation fault on unterminated \verb. 221 | 222 | (Version 2.8.12) -- UNRELEASED 223 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | To build the program, you need: 2 | * 'make' program 3 | * a C compiler (for example, Clang or GCC) 4 | * 'lex' implementation (for example, flex) 5 | 6 | Debian/Ubuntu users can install those by executing: 7 | sudo apt-get install make gcc flex 8 | 9 | To compile the program, simply execute 10 | make 11 | 12 | To install the program, execute 13 | sudo make install 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 1986-2007 Purdue University 2 | # All rights reserved. 3 | # 4 | # Developed by: Daniel Trinkle 5 | # Department of Computer Science, Purdue University 6 | # http://www.cs.purdue.edu/ 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining 9 | # a copy of this software and associated documentation files (the 10 | # "Software"), to deal with the Software without restriction, including 11 | # without limitation the rights to use, copy, modify, merge, publish, 12 | # distribute, sublicense, and/or sell copies of the Software, and to 13 | # permit persons to whom the Software is furnished to do so, subject to 14 | # the following conditions: 15 | # 16 | # o Redistributions of source code must retain the above copyright 17 | # notice, this list of conditions and the following disclaimers. 18 | # 19 | # o Redistributions in binary form must reproduce the above copyright 20 | # notice, this list of conditions and the following disclaimers in the 21 | # documentation and/or other materials provided with the distribution. 22 | # 23 | # o Neither the names of Daniel Trinkle, Purdue University, nor the 24 | # names of its contributors may be used to endorse or promote products 25 | # derived from this Software without specific prior written 26 | # permission. 27 | # 28 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 31 | # IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR 32 | # ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 33 | # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 34 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. 35 | # 36 | # 37 | # Makefile for detex and delatex 38 | # 39 | # Detex is a program to remove TeX and LaTeX constructs from text source. 40 | 41 | UNAME_S := $(shell uname -s) 42 | 43 | # Installation directory 44 | # 45 | DESTDIR = /usr/local/bin 46 | 47 | # Specify you favorite compiler 48 | # 49 | #CC = gcc 50 | 51 | # Compile time flags, just uncomment the necessary lines 52 | # Some say GNU make does not correctly handle += -- you may have to use := 53 | # 54 | DEFS = 55 | # 56 | # Add -traditional for GNU cc on ISC 386/ix system and possibly others 57 | # (reported by pinard@iro.umontreal.ca) 58 | # 59 | #DEFS += ${DEFS} -traditional 60 | # 61 | # Add -DHAVE_STRING_H for the SysV string manipulation routines 62 | # 63 | #DEFS += ${DEFS} -DHAVE_STRING_H 64 | # 65 | # Add -DMAXPATHLEN= if it is not defined in /usr/include/sys/param.h 66 | # 67 | #DEFS += ${DEFS} -DMAXPATHLEN=1024 68 | # 69 | # Add -DNO_MALLOC_DECL if your system does not like the malloc() declaration 70 | # in detex.l (reported by pinard@iro.umontreal.ca) 71 | # 72 | #DEFS += ${DEFS} -DNO_MALLOC_DECL 73 | # 74 | CFLAGS += -O -DVERSION=\"${VERSION}\" ${DEFS} -Wall 75 | 76 | # Use your favorite lexical scanner 77 | # 78 | #LEX = lex 79 | LEX = flex 80 | 81 | #LFLAGS = -8 -C 82 | 83 | # scanner library 84 | # 85 | LEXLIB = -lfl 86 | ifeq ($(UNAME_S),Darwin) 87 | LEXLIB = -ll 88 | endif 89 | 90 | LPR = lpr -p 91 | 92 | # Program names 93 | # 94 | PROGS = detex delatex 95 | 96 | # Header files 97 | # 98 | HDR = detex.h 99 | 100 | # Sources 101 | # 102 | SRC = detex.l 103 | 104 | # Objects for various programs 105 | # 106 | D_OBJ = detex.o 107 | 108 | VERSION = 2.8.12-SNAPSHOT 109 | 110 | all: ${PROGS} 111 | 112 | detex: ${D_OBJ} 113 | ${CC} ${CFLAGS} -o $@ ${D_OBJ} ${LEXLIB} 114 | 115 | delatex: detex 116 | cp detex delatex 117 | 118 | detex.c: detex.l 119 | ${LEX} ${LFLAGS} detex.l 120 | mv lex.yy.c detex.c 121 | 122 | man-page: 123 | troff -man detex.1 124 | 125 | # If you want detex available as delatex, uncomment the last two lines of 126 | # this target 127 | install: all 128 | rm -f ${DESTDIR}/detex 129 | install -c -m 775 -g staff -s detex ${DESTDIR} 130 | install detex.1 /usr/local/share/man/man1 131 | # rm -f ${DESTDIR}/delatex 132 | # ln ${DESTDIR}/detex ${DESTDIR}/delatex 133 | 134 | uninstall: 135 | rm -f ${DESTDIR}/detex 136 | rm -f ${DESTDIR}/delatex 137 | rm -f /usr/local/share/man/man1/detex.* 138 | 139 | clean: 140 | -rm -f a.out core *.s *.o ERRS errs .,* .emacs_[0-9]* 141 | -rm -f ${PROGS} xxx.l lex.yy.c detex.c 142 | -rm -f *.tar.bz2 143 | 144 | print: ${HDR} ${SRC} 145 | ${LPR} Makefile ${HDR} ${SRC} 146 | 147 | test: all 148 | ./test.pl 149 | 150 | run: delatex 151 | ./delatex in > out.txt 152 | 153 | package: clean detex.c 154 | tar cjfv opendetex-${VERSION}.tar.bz2 --exclude='*.o' ChangeLog COPYRIGHT detex.* INSTALL Makefile README 155 | 156 | # Dependencies 157 | # 158 | detex.c: detex.h 159 | detex.c: detex.l 160 | 161 | inpdf: 162 | cd test && xelatex -interaction=nonstopmode in.tex || echo there were errors 163 | rm -f test/*.toc test/*.log test/*.aux 164 | mv test/in.pdf . 165 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | OpenDetex - Version 2.8.12 UNRELEASED 2 | 3 | OpenDetex is a program to remove TeX constructs from a text file. It recognizes 4 | the \input command. 5 | 6 | This program assumes it is dealing with LaTeX input if it sees the string 7 | "\begin{document}" in the text. It recognizes the \include and \includeonly 8 | commands. 9 | 10 | This directory contains the following files: 11 | 12 | README - you're looking at it. 13 | 14 | ChangeLog - detailed history of changes done in different versions of the program 15 | 16 | COPYRIGHT - information about program authors and license 17 | 18 | INSTALL - how to compile the program 19 | 20 | Makefile - makefile for generating detex 21 | 22 | detex.1 - troff source for the detex manual page. 23 | Assuming you have the -man macros, use "make man-page" to 24 | generate it. 25 | 26 | detex.c - C code generated from detex.l distributed to allow 27 | building from source without having Flex installed 28 | 29 | detex.h - Various global definitions. These should be modified to suit 30 | the local installation. 31 | 32 | detex.l - Lex/Flex and C source for the detex program. 33 | 34 | 35 | This software package is distributed under the NCSA/University of 36 | Illinois Open Source License. 37 | 38 | OpenDetex was created by Piotr Kubowicz from version 2.8 of Detex 39 | program by Daniel Trinkle, obtained from 40 | http://www.cs.purdue.edu/homes/trinkle/detex/ 41 | 42 | Currently home of OpenDetex is GitHub: 43 | https://github.com/pkubowicz/opendetex 44 | You can check for latest version, submit bug reports or patches, or 45 | ask for becoming team member there. 46 | -------------------------------------------------------------------------------- /README-development.md: -------------------------------------------------------------------------------- 1 | ## Coding conventions 2 | 3 | Use tabs for indentation, however try to minimize changes and don't re-format lines that aren't part of your task. The project has a long history and when most of the code was created, there were many authors but no coding conventions. 4 | 5 | ## Building 6 | 7 | Compiling: 8 | ``` 9 | make 10 | ``` 11 | 12 | Running tests: 13 | ``` 14 | make test 15 | ``` 16 | 17 | It is recommended that you install Valgrind and run tests using it (as it is done in Travis CI): 18 | ``` 19 | ./test.pl --valgrind 20 | ``` 21 | 22 | Also recommended: check if compilation is not broken on oldest compilers supported by Tex Live (again, if you don't do this, it will be checked on CI): 23 | ``` 24 | make clean all DEFS='-std=iso9899:199409' 25 | ``` 26 | 27 | After pushing check cross-platform compilation results on commits list on GitHub or directly on [Travis CI](https://travis-ci.org/pkubowicz/opendetex). 28 | 29 | ## Releasing a new version 30 | 31 | 1. Make sure you have GnuPG configured in order to sign commits 32 | 2. Run `./release.sh` which will 33 | - create a commit releasing the current version and tag it 34 | - create a `.tar.bz2` file with the released version 35 | - create a commit starting work on the next version 36 | 3. git-push using the instruction printed by the script 37 | 4. On GitHub go to Code → Releases → Draft a new release 38 | - attach the `.tar.bz2` file 39 | -------------------------------------------------------------------------------- /detex.1: -------------------------------------------------------------------------------- 1 | .TH DETEX 1 "August 12, 1993" "Purdue University" 2 | .SH NAME 3 | detex \- a filter to strip \fITeX\fP commands from a .tex file. 4 | .SH SYNOPSIS 5 | .B detex 6 | [ \fB\-clnstw\fR ] [ \fB\-e\fI environment-list\fR ] 7 | [ \fIfilename\fR[.tex] ... ] 8 | .SH DESCRIPTION 9 | .I Detex 10 | reads each file in sequence, removes all comments and 11 | .I TeX 12 | control sequences 13 | and writes the remainder on the standard output. 14 | All text in math mode and display mode is removed. 15 | By default, 16 | .I detex 17 | follows \einput commands. 18 | If a file cannot be opened, a warning message is 19 | printed and the command is ignored. 20 | If the 21 | .B \-n 22 | option is used, no \einput or \einclude commands will be processed. 23 | This allows single file processing. 24 | If no input file is given on the command line, 25 | .I detex 26 | reads from standard input. 27 | .PP 28 | If the magic sequence ``\ebegin{document}'' appears in the text, 29 | .I detex 30 | assumes it is dealing with 31 | .I LaTeX 32 | source and 33 | .I detex 34 | recognizes additional constructs used in 35 | .IR LaTeX . 36 | These include the \einclude and \eincludeonly commands. 37 | The 38 | .B \-l 39 | option can be used to force 40 | .I LaTeX 41 | mode and the 42 | .B \-t 43 | option can be used to force 44 | .I TeX 45 | mode regardless of input content. 46 | .PP 47 | Text in various environment modes of 48 | .I LaTeX 49 | is ignored. The default modes are array, eqnarray, equation, longtable, 50 | picture, tabular and verbatim. The 51 | .B \-e 52 | option can be used to specify a comma separated 53 | .I environment-list 54 | of environments to ignore. The list replaces the defaults so specifying an 55 | empty list effectively causes no environments to be ignored. 56 | .PP 57 | The 58 | .B \-c 59 | option can be used in 60 | .I LaTeX 61 | mode to have detex echo the arguments to \ecite, 62 | \eref, and \epageref macros. This can be useful when sending the output to 63 | a style checker. 64 | .PP 65 | .I Detex 66 | assumes the standard character classes are being used for 67 | .IR TeX . 68 | .I Detex 69 | allows white space between control sequences 70 | and magic characters like `{' when recognizing things like 71 | .I LaTeX 72 | environments. 73 | .PP 74 | The 75 | .B \-r 76 | option tries to naively replace $..$, $$..$$, \e(..\e) and \e[..\e] 77 | with nouns and verbs (in particular, "noun" and "verbs") 78 | in a way that keeps sentences readable. 79 | .PP 80 | If the 81 | .B \-w 82 | flag is given, the output is a word list, one `word' (string of two or more 83 | letters and apostrophes beginning with a letter) 84 | per line, and all other characters ignored. 85 | Without \fB\-w\fR the output follows the original, 86 | with the deletions mentioned above. Newline characters are 87 | preserved where possible 88 | so that the lines of output match the input as closely as possible. 89 | .PP 90 | The 91 | .B \-1 92 | option will prefix each printed line with `filename:linenumber:` indicating 93 | where that line is coming from in terms of the original (La)TeX document. 94 | .PP 95 | The TEXINPUTS environment variable is used to find \einput and \einclude 96 | files. Like \fITeX\fP, it interprets a leading or trailing `:' as the default 97 | TEXINPUTS. 98 | It does \fInot\fP support the `//' directory expansion magic sequence. 99 | .PP 100 | Detex now handles the basic \fITeX\fP ligatures as a special case, replacing the 101 | ligatures with acceptable character4 substitutes. This eliminates 102 | spelling errors introduced by merely removing them. The ligatures are 103 | \eaa, \eae, \eoe, \ess, \eo, \el (and their upper-case 104 | equivalents). The special "dotless" characters \ei and \ej are also 105 | replaced with i and j respectively. 106 | .PP 107 | Note that previous versions of 108 | .I detex 109 | would replace control sequences with a space character to prevent words 110 | from running together. 111 | However, this caused accents in the middle of words to break words, generating 112 | "spelling errors" that were not desirable. 113 | Therefore, the new version merely removes these accents. 114 | The old functionality can be essentially duplicated by using the 115 | .B \-s 116 | option. 117 | .SH SEE ALSO 118 | .BR tex(1) 119 | .SH DIAGNOSTICS 120 | Nesting of \einput is allowed but the number of opened files must not 121 | exceed the system's limit on the number of simultaneously opened files. 122 | .I Detex 123 | ignores unrecognized option characters after printing a warning message. 124 | .SH AUTHOR 125 | Originally written by Daniel Trinkle, Computer Science Department, 126 | Purdue University. 127 | .PP 128 | Maintained by Piotr Kubowicz . 129 | .SH BUGS 130 | .I Detex 131 | is not a 132 | .I TeX 133 | interpreter (it essentially reads the input with a (f)lex program), so 134 | it is easily confused by some constructs. Most errors result in too much 135 | rather than too little output. 136 | .PP 137 | Running \fILaTeX\fR 138 | source without a ``\ebegin{document}'' 139 | through \fIdetex\fR may produce 140 | errors. 141 | .PP 142 | Suggestions for improvements are (mildly) encouraged. 143 | -------------------------------------------------------------------------------- /detex.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 1986-2007 Purdue University 3 | * All rights reserved. 4 | * 5 | * Developed by: Daniel Trinkle 6 | * Department of Computer Science, Purdue University 7 | * http://www.cs.purdue.edu/ 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining 10 | * a copy of this software and associated documentation files (the 11 | * "Software"), to deal with the Software without restriction, including 12 | * without limitation the rights to use, copy, modify, merge, publish, 13 | * distribute, sublicense, and/or sell copies of the Software, and to 14 | * permit persons to whom the Software is furnished to do so, subject to 15 | * the following conditions: 16 | * 17 | * o Redistributions of source code must retain the above copyright 18 | * notice, this list of conditions and the following disclaimers. 19 | * 20 | * o Redistributions in binary form must reproduce the above copyright 21 | * notice, this list of conditions and the following disclaimers in the 22 | * documentation and/or other materials provided with the distribution. 23 | * 24 | * o Neither the names of Daniel Trinkle, Purdue University, nor the 25 | * names of its contributors may be used to endorse or promote products 26 | * derived from this Software without specific prior written 27 | * permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 32 | * IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR 33 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 34 | * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 35 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. 36 | */ 37 | 38 | /****** 39 | ** The following parameters should be modified as necessary 40 | ** MAXINCLIST - maximum number of files allowed in an \includeonly list 41 | ** 42 | ** DEFAULTINPUTS - this should be the same as the default TEXINPUTS 43 | ** CHPATHSEP - the path separator character in TEXINPUTS 44 | ** MAXINPUTPATHS - (arbitrary) number of separate paths in TEXINPUTS 45 | ** 46 | ** DEFAULTENV - list of LaTeX environments ignored 47 | ** CHENVSEP - the list separator character in the ignore envronment list 48 | ** MAXENVS - maximum number of environments listed in the ignore list 49 | ** CCHMAXENV - maximum count of characters in an environment name (LaTex) 50 | ******/ 51 | 52 | #define MAXINCLIST 40 53 | 54 | #ifndef KPATHSEA 55 | #ifdef OS2 56 | #define DEFAULTINPUTS ".;/emtex/texinput" 57 | #define CHPATHSEP ';' 58 | #else 59 | #define DEFAULTINPUTS ".:/usr/local/tex/inputs" 60 | #define CHPATHSEP ':' 61 | #endif 62 | #endif 63 | 64 | #define MAXINPUTPATHS 10 65 | 66 | #define DEFAULTENV "algorithm,align,array,bmatrix,displaymath,eqnarray,equation,floatfig,floating,longtable,picture,pmatrix,psfrags,pspicture,smallmatrix,smallpmatrix,tabular,tikzpicture,verbatim,vmatrix,wrapfigure" 67 | #define CHENVSEP ',' 68 | #define MAXENVS 25 69 | #define CCHMAXENV 100 70 | 71 | /****** 72 | ** These probably should not be changed 73 | ******/ 74 | 75 | #define CHOPT '-' 76 | #define CHCITEOPT 'c' 77 | #define CHENVOPT 'e' 78 | #define CHLATEXOPT 'l' 79 | #define CHNOFOLLOWOPT 'n' 80 | #define CHSPACEOPT 's' 81 | #define CHTEXOPT 't' 82 | #define CHWORDOPT 'w' 83 | #define CHSRCLOC '1' 84 | #define CHREPLACE 'r' 85 | #define CHVERSIONOPT 'v' 86 | 87 | #define my_ERROR -1 88 | -------------------------------------------------------------------------------- /detex.l: -------------------------------------------------------------------------------- 1 | %option never-interactive 2 | %option nounput 3 | 4 | %top{ 5 | /* 6 | * Copyright (c) 1986-2007 Purdue University 7 | * All rights reserved. 8 | * 9 | * Developed by: Daniel Trinkle 10 | * Department of Computer Science, Purdue University 11 | * http://www.cs.purdue.edu/ 12 | * 13 | * Permission is hereby granted, free of charge, to any person obtaining 14 | * a copy of this software and associated documentation files (the 15 | * "Software"), to deal with the Software without restriction, including 16 | * without limitation the rights to use, copy, modify, merge, publish, 17 | * distribute, sublicense, and/or sell copies of the Software, and to 18 | * permit persons to whom the Software is furnished to do so, subject to 19 | * the following conditions: 20 | * 21 | * o Redistributions of source code must retain the above copyright 22 | * notice, this list of conditions and the following disclaimers. 23 | * 24 | * o Redistributions in binary form must reproduce the above copyright 25 | * notice, this list of conditions and the following disclaimers in the 26 | * documentation and/or other materials provided with the distribution. 27 | * 28 | * o Neither the names of Daniel Trinkle, Purdue University, nor the 29 | * names of its contributors may be used to endorse or promote products 30 | * derived from this Software without specific prior written 31 | * permission. 32 | * 33 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 34 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 35 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 36 | * IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR 37 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 38 | * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 39 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. 40 | */ 41 | 42 | 43 | /* 44 | * detex [-e environment-list] [-c] [-l] [-n] [-s] [-t] [-w] [-1] [file[.tex] ] 45 | * 46 | * This program is used to remove TeX or LaTeX constructs from a text 47 | * file. 48 | */ 49 | 50 | #include "detex.h" 51 | 52 | #ifdef KPATHSEA 53 | 54 | #include 55 | #include 56 | #include 57 | #include 58 | #include 59 | #include 60 | #include 61 | 62 | #ifdef HAVE_SYS_PARAM_H 63 | #include 64 | #endif 65 | 66 | #else /* KPATHSEA */ 67 | 68 | #ifdef HAVE_STRING_H 69 | #include 70 | #else 71 | #include 72 | #define strrchr rindex 73 | #endif 74 | 75 | #ifdef HAVE_LIMITS_H 76 | #include 77 | #else 78 | #include 79 | #ifndef PATH_MAX /* for old BSD */ 80 | #define PATH_MAX MAXPATHLEN 81 | #endif 82 | #endif /* HAVE_LIMITS_H */ 83 | 84 | #ifdef OS2 85 | #include 86 | #endif 87 | 88 | #ifdef WIN32 89 | #include 90 | #include 91 | #endif 92 | 93 | #endif /* KPATHSEA */ 94 | } 95 | 96 | %{ 97 | #undef IGNORE 98 | #undef ECHO 99 | 100 | #define LaBEGIN if (fLatex) BEGIN 101 | #define IGNORE Ignore() 102 | #define INCRLINENO IncrLineNo() 103 | #define ECHO Echo() 104 | #define NOUN if (fSpace && !fWord && !fReplace) putchar(' '); else {if (fReplace) printf("noun");} 105 | #define VERBNOUN if (fReplace) printf(" verbs noun"); /* puts a verb and a noun to make grammar checking work */ 106 | #define SPACE if (!fWord) putchar(' ') 107 | #define NEWLINE LineBreak() 108 | #define LATEX fLatex=!fForcetex 109 | #define KILLARGS(x) cArgs=x; LaBEGIN LaMacro 110 | #define STRIPARGS(x) cArgs=x; LaBEGIN LaMacro2 111 | #define CITE(x) if (fLatex && !fCite) KILLARGS(x) 112 | 113 | /* avoid gratuitous gcc warning from passing -D for this on command line */ 114 | #ifndef NO_MALLOC_DECL 115 | #define NO_MALLOC_DECL 116 | #endif 117 | 118 | void LineBreak(); 119 | void Ignore(); 120 | void IncrLineNo(); 121 | void Echo(); 122 | void AddInclude(char *sbFile); 123 | void ErrorExit(const char *sb1); 124 | void UsageExit(void); 125 | void VersionExit(void); 126 | void IncludeFile(char *sbFile); 127 | void InputFile(char *sbFile); 128 | void SetEnvIgnore(const char *sbEnvList); 129 | #ifndef KPATHSEA 130 | void SetInputPaths(void); 131 | #endif 132 | void Warning(const char *sb1, const char *sb2); 133 | int BeginEnv(const char *sbEnv); 134 | int EndEnv(const char *sbEnv); 135 | int InList(char *sbFile); 136 | int SeparateList(char *sbList, char *rgsbList[] ,char chSep, int csbMax); 137 | FILE *TexOpen(char *sbFile); 138 | char *SafeMalloc(int cch, const char *sbMessage); 139 | #ifndef KPATHSEA 140 | char *getenv(); 141 | #ifndef NO_MALLOC_DECL 142 | char *malloc(); 143 | #endif 144 | #ifdef OS2 145 | void yyless(int); 146 | void OS2UsageExit(void); 147 | #endif 148 | #endif /* KPATHSEA */ 149 | 150 | char *rgsbEnvIgnore[MAXENVS]; /* list of environments ignored */ 151 | char *rgsbIncList[MAXINCLIST]; /* list of includeonly files */ 152 | char *rgsbInputPaths[MAXINPUTPATHS]; /* list of input paths in order */ 153 | char sbCurrentIgnoredEnv[CCHMAXENV]; /* current environment being ignored */ 154 | char *sbProgName; /* name we were invoked with */ 155 | #ifndef NOFILE /* might be defined in */ 156 | #define NOFILE 256 157 | #endif 158 | FILE *rgfp[NOFILE+1]; /* stack of input/include files */ 159 | int cfp = 0; /* count of files in stack */ 160 | int cOpenBrace = 0; /* count of `{' in and */ 161 | int cArgs = 0; /* argument connt in */ 162 | int csbEnvIgnore; /* count of environments ignored */ 163 | int csbIncList = 0; /* count of includeonly files */ 164 | int csbInputPaths; /* count of input paths */ 165 | int fLatex = 0; /* flag to indicated delatex */ 166 | int fWord = 0; /* flag for -w option */ 167 | int fFollow = 1; /* flag to follow input/include */ 168 | int fCite = 0; /* flag to echo \cite and \ref args */ 169 | int fSpace = 0; /* flag to replace \cs with space */ 170 | int fForcetex = 0; /* flag to inhibit latex mode */ 171 | int fSrcLoc = 0; /* flag to display source location of original file */ 172 | int fShowPictures = 0; /* flag to show picture names */ 173 | int fReplace = 0; /* flag to replace envirnments with "noun" */ 174 | 175 | int currBracesLevel = 0; 176 | int footnoteLevel = -100; 177 | 178 | #ifdef FLEX_SCANNER 179 | /* flex has contexts for buffers that need to be switched when file changes 180 | * otherwise output contains imported files in reverse order. Weird, but 181 | * true. 182 | */ 183 | YY_BUFFER_STATE rgsb[NOFILE + 1]; /* flex context stack */ 184 | char* fFileNames[NOFILE + 1]; /* names of the buffers in context stack */ 185 | int fFileLines[NOFILE + 1]; /* line number in each of the context files */ 186 | int fIsColumn0 = 1; /* Are we at the begining of a line? */ 187 | int csb = 0; /* depth of flex context stack */ 188 | #endif /* FLEX_SCANNER */ 189 | 190 | %} 191 | 192 | S [ \t\n]* 193 | W [a-zA-Z]+ 194 | N [+-]?(([0-9]+(\.[0-9]+)?)|(\.[0-9]+)) 195 | U pt|pc|in|bp|cm|mm|dd|cc|sp 196 | P [a-zA-Z!]+ 197 | HU {U}|em 198 | HD {S}(({N}{S}{HU})|(\\{W})){S} 199 | HG {HD}(plus{HD})?(minus{HD})? 200 | VU {U}|ex 201 | VD {S}(({N}{S}{VU})|(\\{W})){S} 202 | VG {VD}(plus{VD})?(minus{VD})? 203 | Z \*? 204 | VERBSYMBOL =|\\leq|\\geq|\\in|>|<|\\subseteq|\subseteq|\\subset|\\supset|\\sim|\\neq|\\mapsto 205 | 206 | %Start Define Display IncludeOnly Input Math Normal Control 207 | %Start LaBegin LaDisplay LaEnd LaEnv LaFormula LaInclude LaSubfile 208 | %Start LaMacro LaOptArg LaMacro2 LaOptArg2 LaVerbatim 209 | %start LaBreak LaPicture 210 | 211 | %% 212 | "%".* /* ignore comments */ {INCRLINENO;} 213 | 214 | "\\begin"{S}"{"{S}"document"{S}"}""\n"* {LATEX; IGNORE;} 215 | 216 | "\\begin" /* environment start */ {LaBEGIN LaBegin; IGNORE;} 217 | 218 | {S}"{"{S}"verbatim"{S}"}" { if (BeginEnv("verbatim")) 219 | BEGIN LaEnv; 220 | else 221 | BEGIN LaVerbatim; 222 | IGNORE; 223 | } 224 | 225 | "\\end"{S}"{"{S}"verbatim"{S}"}" /* verbatim mode */ {BEGIN Normal; IGNORE;} 226 | [^\\]+ ECHO; 227 | . ECHO; 228 | 229 | {S}"{"{S}"minipage"{S}"}" { KILLARGS(1); 230 | if (BeginEnv("minipage")) 231 | BEGIN LaEnv; 232 | else 233 | BEGIN LaMacro; /* Normal; */ 234 | IGNORE; 235 | } 236 | 237 | {S}"{"{S}"table"{S}"}"{S}"["{P}"]" { 238 | if (BeginEnv("table")) 239 | BEGIN LaEnv; 240 | else 241 | BEGIN Normal; 242 | IGNORE; 243 | } 244 | 245 | {S}"{"{S}"figure"{S}"}"{S}"["{P}"]" { 246 | if (BeginEnv("figure")) 247 | BEGIN LaEnv; 248 | else 249 | BEGIN Normal; 250 | IGNORE; 251 | } 252 | 253 | {W} { if (BeginEnv(yytext)) 254 | BEGIN LaEnv; 255 | else 256 | BEGIN Normal; 257 | IGNORE; 258 | } 259 | /*"\n" NEWLINE;*/ 260 | . {INCRLINENO;} 261 | 262 | "\\end" /* absorb some environments */ {LaBEGIN LaEnd; IGNORE;} 263 | "\n"+ ;/*NEWLINE;*/ 264 | . {INCRLINENO;} 265 | 266 | {W} /* end environment */ { if (EndEnv(yytext)) 267 | BEGIN Normal; 268 | IGNORE; 269 | } 270 | "}" {BEGIN LaEnv; IGNORE;} 271 | /*"\n" NEWLINE;*/ 272 | . {INCRLINENO;} 273 | 274 | "\\kern"{HD} ; 275 | "\\vskip"{VG} ; 276 | "\\vspace"{Z}{S}"{"{VG}"}" ; 277 | "\\hskip"{HG} ; 278 | "\\hspace"{Z}{S}"{"{HG}"}" ; 279 | "\\addvspace"{S}"{"{VG}"}" ; 280 | "{"{N}"pt}" ; /* hack to fix \begin{minipage}{300pt} */ 281 | 282 | "\\newlength" { KILLARGS(1); } 283 | "\\setlength" { KILLARGS(2); } 284 | "\\addtolength" { KILLARGS(2); } 285 | "\\settowidth" { KILLARGS(2); } 286 | "\\settoheight" { KILLARGS(2); } 287 | "\\settodepth" { KILLARGS(2); } 288 | "\\newsavebox" { KILLARGS(1); } 289 | "\\sbox" { KILLARGS(1); } 290 | "\\savebox" { KILLARGS(2); } 291 | "\\usebox" { KILLARGS(1); } 292 | "\\raisebox" { STRIPARGS(2); } 293 | "\\parbox" { KILLARGS(1); } 294 | "\\scalebox" { STRIPARGS(2); } 295 | "\\resizebox"{Z} { KILLARGS(2); } 296 | "\\reflectbox" ; 297 | "\\rotatebox" { KILLARGS(1); } 298 | "\\includegraphics"[^{]* { LaBEGIN LaPicture; } 299 | 300 | "{" ; 301 | [^{}]+ { if(fShowPictures) { printf("", yytext); } } 302 | "\}"{S}"\n"+ { BEGIN Normal; INCRLINENO; } 303 | "\}" BEGIN Normal; 304 | 305 | "\\definecolor" { KILLARGS(3); } 306 | "\\color" { KILLARGS(1); } 307 | "\\textcolor" { KILLARGS(2); } 308 | "\\colorbox" { KILLARGS(2); } 309 | "\\fcolorbox" { KILLARGS(3); } 310 | "\\pagecolor" { KILLARGS(1); } 311 | "\\foilhead" { STRIPARGS(1); } 312 | "\\addfontfeature" { KILLARGS(1); } 313 | "\\thispagestyle" { KILLARGS(1); } 314 | "\\addcontentsline" { KILLARGS(3); } 315 | 316 | "\\part"{Z} ;/*NEWLINE;*/ 317 | "\\chapter"{Z} ;/*NEWLINE;*/ 318 | "\\section"{Z} ;/*NEWLINE;*/ 319 | "\\subsection"{Z} ;/*NEWLINE;*/ 320 | "\\subsubsection"{Z} ;/*NEWLINE;*/ 321 | "\\paragraph"{Z} ;/*NEWLINE;*/ 322 | "\\subparagraph"{Z} ;/*NEWLINE;*/ 323 | 324 | "\\bibitem" /* ignore args */ {KILLARGS(1); IGNORE;} 325 | "\\bibliography" /* of these \cs */ {KILLARGS(1); IGNORE;} 326 | "\\bibstyle" {KILLARGS(1); IGNORE;} 327 | " "?"\\cite" {KILLARGS(1);} /* kill space before */ 328 | "\\documentstyle" {LATEX; KILLARGS(1); IGNORE;} 329 | "\\documentclass" {LATEX; KILLARGS(1); IGNORE;} 330 | "\\usepackage" {KILLARGS(1); IGNORE;} 331 | "\\end" {KILLARGS(1); IGNORE;} 332 | "\\hypersetup" {KILLARGS(1);} 333 | "\\index" {KILLARGS(1);} 334 | /*"\\footnote" {KILLARGS(1); SPACE;}*/ 335 | "\\label" {KILLARGS(1); IGNORE;} 336 | "\\nameref" {CITE(1); IGNORE;} 337 | "\\pageref" {CITE(1); IGNORE;} 338 | "\\pagestyle" {KILLARGS(1); IGNORE;} 339 | "\\ref" {CITE(1); IGNORE;} 340 | "\\setcounter" {KILLARGS(2); IGNORE;} 341 | "\\addtocounter" {KILLARGS(2); IGNORE;} 342 | "\\newcounter" { KILLARGS(1); } 343 | "\\stepcounter" { KILLARGS(2); } 344 | 345 | "\\fontspec" {KILLARGS(1);} 346 | 347 | "\\footnote"(\[([^\]])+\])?"{" { 348 | putchar('('); 349 | footnoteLevel = currBracesLevel; 350 | ++currBracesLevel; 351 | } 352 | "\\verb" /* ignore \verb... */ { 353 | if (fLatex) { 354 | char verbchar, c; 355 | verbchar = input(); 356 | if (verbchar < ' ') { 357 | /* would be nice to include input filenames and line numbers */ 358 | ErrorExit("\\verb not complete before eof"); 359 | } 360 | while ((c = input()) != verbchar && c != '\n' && c != EOF && c != 0) { 361 | putchar(c); 362 | } 363 | if (c != verbchar) { 364 | ErrorExit("\\verb not complete before eof"); 365 | } 366 | } 367 | } 368 | 369 | "\\newcommand" { LATEX; KILLARGS(2); } 370 | "\\renewcommand" { LATEX; KILLARGS(2); } 371 | "\\newenvironment" { LATEX; KILLARGS(3); } 372 | 373 | "\\def" /* ignore def begin */ {BEGIN Define; IGNORE;} 374 | "{" BEGIN Normal; 375 | "\n" NEWLINE; 376 | . ; 377 | 378 | "\\(" /* formula mode */ {LaBEGIN LaFormula; NOUN;} 379 | "\\)" BEGIN Normal; 380 | "\n" NEWLINE; 381 | {VERBSYMBOL} VERBNOUN; 382 | . ; 383 | 384 | "\\[" /* display mode */ {LaBEGIN LaDisplay; NOUN;} 385 | "\\]" BEGIN Normal; 386 | "\n" NEWLINE; 387 | {VERBSYMBOL} VERBNOUN; 388 | . ; 389 | 390 | "$$" /* display mode */ {BEGIN Display; NOUN;} 391 | "$$" BEGIN Normal; 392 | "\n" NEWLINE; 393 | {VERBSYMBOL} VERBNOUN; 394 | . ; 395 | 396 | "$" /* math mode */ {BEGIN Math; NOUN;} 397 | "$" BEGIN Normal; 398 | "\n" ; 399 | "\\$" ; 400 | {VERBSYMBOL} VERBNOUN; 401 | . ; 402 | 403 | "\\include" /* process files */ {LaBEGIN LaInclude; IGNORE;} 404 | [^{ \t\n}]+ { IncludeFile(yytext); 405 | BEGIN Normal; 406 | } 407 | "\n"+ NEWLINE; 408 | . ; 409 | 410 | "\\includeonly" {BEGIN IncludeOnly; IGNORE;} 411 | [^{ \t,\n}]+ AddInclude(yytext); 412 | "}" { if (csbIncList == 0) 413 | rgsbIncList[csbIncList++] = '\0'; 414 | BEGIN Normal; 415 | } 416 | "\n"+ NEWLINE; 417 | . ; 418 | 419 | "\\subfile" /* process files */ {LaBEGIN LaSubfile; IGNORE;} 420 | [^{ \t\n}]+ { IncludeFile(yytext); 421 | BEGIN Normal; 422 | } 423 | "\n"+ NEWLINE; 424 | . ; 425 | 426 | "\\input" {BEGIN Input; IGNORE;} 427 |