├── AUTHORS ├── COPYING ├── ChangeLog ├── INSTALL ├── Makefile.am ├── Makefile.in ├── NEWS ├── README ├── aclocal.m4 ├── autogen.sh ├── compile ├── config.guess ├── config.sub ├── configure ├── configure.ac ├── depcomp ├── install-sh ├── ltmain.sh ├── m4 ├── ac_cxx_namespaces.m4 ├── google_namespace.m4 ├── python.m4 └── stl_namespace.m4 ├── missing ├── mkinstalldirs ├── packages ├── deb.sh ├── deb │ ├── README │ ├── changelog │ ├── compat │ ├── control │ ├── copyright │ ├── docs │ ├── libstreamhtmlparser-dev.dirs │ ├── libstreamhtmlparser-dev.install │ ├── libstreamhtmlparser0.dirs │ ├── libstreamhtmlparser0.install │ └── rules ├── rpm.sh └── rpm │ └── rpm.spec └── src ├── config.h.in ├── htmlparser.c ├── htmlparser_fsm.config ├── htmlparser_fsm.h ├── jsparser.c ├── jsparser_fsm.config ├── jsparser_fsm.h ├── py_streamhtmlparser.c ├── statemachine.c ├── streamhtmlparser ├── htmlparser.h ├── htmlparser_cpp.h.in ├── jsparser.h └── statemachine.h ├── tests ├── generate_fsm_c_test.c ├── generate_fsm_test.sh ├── htmlparser_cpp_test.cc ├── htmlparser_test.c ├── jsparser_test.c ├── py_streamhtmlparser_test.py ├── statemachine_test.c ├── statemachine_test_fsm.config ├── statemachine_test_fsm.h └── testdata │ ├── regtest │ ├── cdata.html │ ├── comments.html │ ├── context.html │ ├── javascript_attribute.html │ ├── javascript_block.html │ ├── javascript_regexp.html │ ├── position.html │ ├── reset.html │ ├── simple.html │ └── tags.html │ ├── sample_fsm.c │ └── sample_fsm.config └── tools ├── fsm_config.py └── generate_fsm.py /AUTHORS: -------------------------------------------------------------------------------- 1 | opensource@google.com 2 | 3 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2006, Google Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following disclaimer 12 | in the documentation and/or other materials provided with the 13 | distribution. 14 | * Neither the name of Google Inc. nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | Thu Mar 19 20:20:46 2009 Google Inc. 2 | 3 | * streamhtmlparser: initial release 4 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | Installation Instructions 2 | ************************* 3 | 4 | Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005 Free 5 | Software Foundation, Inc. 6 | 7 | This file is free documentation; the Free Software Foundation gives 8 | unlimited permission to copy, distribute and modify it. 9 | 10 | Basic Installation 11 | ================== 12 | 13 | These are generic installation instructions. 14 | 15 | The `configure' shell script attempts to guess correct values for 16 | various system-dependent variables used during compilation. It uses 17 | those values to create a `Makefile' in each directory of the package. 18 | It may also create one or more `.h' files containing system-dependent 19 | definitions. Finally, it creates a shell script `config.status' that 20 | you can run in the future to recreate the current configuration, and a 21 | file `config.log' containing compiler output (useful mainly for 22 | debugging `configure'). 23 | 24 | It can also use an optional file (typically called `config.cache' 25 | and enabled with `--cache-file=config.cache' or simply `-C') that saves 26 | the results of its tests to speed up reconfiguring. (Caching is 27 | disabled by default to prevent problems with accidental use of stale 28 | cache files.) 29 | 30 | If you need to do unusual things to compile the package, please try 31 | to figure out how `configure' could check whether to do them, and mail 32 | diffs or instructions to the address given in the `README' so they can 33 | be considered for the next release. If you are using the cache, and at 34 | some point `config.cache' contains results you don't want to keep, you 35 | may remove or edit it. 36 | 37 | The file `configure.ac' (or `configure.in') is used to create 38 | `configure' by a program called `autoconf'. You only need 39 | `configure.ac' if you want to change it or regenerate `configure' using 40 | a newer version of `autoconf'. 41 | 42 | The simplest way to compile this package is: 43 | 44 | 1. `cd' to the directory containing the package's source code and type 45 | `./configure' to configure the package for your system. If you're 46 | using `csh' on an old version of System V, you might need to type 47 | `sh ./configure' instead to prevent `csh' from trying to execute 48 | `configure' itself. 49 | 50 | Running `configure' takes awhile. While running, it prints some 51 | messages telling which features it is checking for. 52 | 53 | 2. Type `make' to compile the package. 54 | 55 | 3. Optionally, type `make check' to run any self-tests that come with 56 | the package. 57 | 58 | 4. Type `make install' to install the programs and any data files and 59 | documentation. 60 | 61 | 5. You can remove the program binaries and object files from the 62 | source code directory by typing `make clean'. To also remove the 63 | files that `configure' created (so you can compile the package for 64 | a different kind of computer), type `make distclean'. There is 65 | also a `make maintainer-clean' target, but that is intended mainly 66 | for the package's developers. If you use it, you may have to get 67 | all sorts of other programs in order to regenerate files that came 68 | with the distribution. 69 | 70 | Compilers and Options 71 | ===================== 72 | 73 | Some systems require unusual options for compilation or linking that the 74 | `configure' script does not know about. Run `./configure --help' for 75 | details on some of the pertinent environment variables. 76 | 77 | You can give `configure' initial values for configuration parameters 78 | by setting variables in the command line or in the environment. Here 79 | is an example: 80 | 81 | ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix 82 | 83 | *Note Defining Variables::, for more details. 84 | 85 | Compiling For Multiple Architectures 86 | ==================================== 87 | 88 | You can compile the package for more than one kind of computer at the 89 | same time, by placing the object files for each architecture in their 90 | own directory. To do this, you must use a version of `make' that 91 | supports the `VPATH' variable, such as GNU `make'. `cd' to the 92 | directory where you want the object files and executables to go and run 93 | the `configure' script. `configure' automatically checks for the 94 | source code in the directory that `configure' is in and in `..'. 95 | 96 | If you have to use a `make' that does not support the `VPATH' 97 | variable, you have to compile the package for one architecture at a 98 | time in the source code directory. After you have installed the 99 | package for one architecture, use `make distclean' before reconfiguring 100 | for another architecture. 101 | 102 | Installation Names 103 | ================== 104 | 105 | By default, `make install' installs the package's commands under 106 | `/usr/local/bin', include files under `/usr/local/include', etc. You 107 | can specify an installation prefix other than `/usr/local' by giving 108 | `configure' the option `--prefix=PREFIX'. 109 | 110 | You can specify separate installation prefixes for 111 | architecture-specific files and architecture-independent files. If you 112 | pass the option `--exec-prefix=PREFIX' to `configure', the package uses 113 | PREFIX as the prefix for installing programs and libraries. 114 | Documentation and other data files still use the regular prefix. 115 | 116 | In addition, if you use an unusual directory layout you can give 117 | options like `--bindir=DIR' to specify different values for particular 118 | kinds of files. Run `configure --help' for a list of the directories 119 | you can set and what kinds of files go in them. 120 | 121 | If the package supports it, you can cause programs to be installed 122 | with an extra prefix or suffix on their names by giving `configure' the 123 | option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. 124 | 125 | Optional Features 126 | ================= 127 | 128 | Some packages pay attention to `--enable-FEATURE' options to 129 | `configure', where FEATURE indicates an optional part of the package. 130 | They may also pay attention to `--with-PACKAGE' options, where PACKAGE 131 | is something like `gnu-as' or `x' (for the X Window System). The 132 | `README' should mention any `--enable-' and `--with-' options that the 133 | package recognizes. 134 | 135 | For packages that use the X Window System, `configure' can usually 136 | find the X include and library files automatically, but if it doesn't, 137 | you can use the `configure' options `--x-includes=DIR' and 138 | `--x-libraries=DIR' to specify their locations. 139 | 140 | Specifying the System Type 141 | ========================== 142 | 143 | There may be some features `configure' cannot figure out automatically, 144 | but needs to determine by the type of machine the package will run on. 145 | Usually, assuming the package is built to be run on the _same_ 146 | architectures, `configure' can figure that out, but if it prints a 147 | message saying it cannot guess the machine type, give it the 148 | `--build=TYPE' option. TYPE can either be a short name for the system 149 | type, such as `sun4', or a canonical name which has the form: 150 | 151 | CPU-COMPANY-SYSTEM 152 | 153 | where SYSTEM can have one of these forms: 154 | 155 | OS KERNEL-OS 156 | 157 | See the file `config.sub' for the possible values of each field. If 158 | `config.sub' isn't included in this package, then this package doesn't 159 | need to know the machine type. 160 | 161 | If you are _building_ compiler tools for cross-compiling, you should 162 | use the option `--target=TYPE' to select the type of system they will 163 | produce code for. 164 | 165 | If you want to _use_ a cross compiler, that generates code for a 166 | platform different from the build platform, you should specify the 167 | "host" platform (i.e., that on which the generated programs will 168 | eventually be run) with `--host=TYPE'. 169 | 170 | Sharing Defaults 171 | ================ 172 | 173 | If you want to set default values for `configure' scripts to share, you 174 | can create a site shell script called `config.site' that gives default 175 | values for variables like `CC', `cache_file', and `prefix'. 176 | `configure' looks for `PREFIX/share/config.site' if it exists, then 177 | `PREFIX/etc/config.site' if it exists. Or, you can set the 178 | `CONFIG_SITE' environment variable to the location of the site script. 179 | A warning: not all `configure' scripts look for a site script. 180 | 181 | Defining Variables 182 | ================== 183 | 184 | Variables not defined in a site shell script can be set in the 185 | environment passed to `configure'. However, some packages may run 186 | configure again during the build, and the customized values of these 187 | variables may be lost. In order to avoid this problem, you should set 188 | them in the `configure' command line, using `VAR=value'. For example: 189 | 190 | ./configure CC=/usr/local2/bin/gcc 191 | 192 | causes the specified `gcc' to be used as the C compiler (unless it is 193 | overridden in the site shell script). Here is a another example: 194 | 195 | /bin/bash ./configure CONFIG_SHELL=/bin/bash 196 | 197 | Here the `CONFIG_SHELL=/bin/bash' operand causes subsequent 198 | configuration-related scripts to be executed by `/bin/bash'. 199 | 200 | `configure' Invocation 201 | ====================== 202 | 203 | `configure' recognizes the following options to control how it operates. 204 | 205 | `--help' 206 | `-h' 207 | Print a summary of the options to `configure', and exit. 208 | 209 | `--version' 210 | `-V' 211 | Print the version of Autoconf used to generate the `configure' 212 | script, and exit. 213 | 214 | `--cache-file=FILE' 215 | Enable the cache: use and save the results of the tests in FILE, 216 | traditionally `config.cache'. FILE defaults to `/dev/null' to 217 | disable caching. 218 | 219 | `--config-cache' 220 | `-C' 221 | Alias for `--cache-file=config.cache'. 222 | 223 | `--quiet' 224 | `--silent' 225 | `-q' 226 | Do not print messages saying which checks are being made. To 227 | suppress all normal output, redirect it to `/dev/null' (any error 228 | messages will still be shown). 229 | 230 | `--srcdir=DIR' 231 | Look for the package's source code in directory DIR. Usually 232 | `configure' can determine that directory automatically. 233 | 234 | `configure' also accepts some other, not widely useful, options. Run 235 | `configure --help' for more details. 236 | 237 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | ## Process this file with automake to produce Makefile.in 2 | 3 | # Make sure that when we re-make ./configure, we get the macros we need 4 | ACLOCAL_AMFLAGS = -I m4 5 | 6 | # This is so we can #include 7 | AM_CPPFLAGS = -I$(top_srcdir)/src 8 | 9 | # These are good warnings to turn on by default 10 | if GCC 11 | AM_CXXFLAGS = -Wall -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare 12 | endif 13 | 14 | # The -no-undefined flag allows libtool to generate shared libraries for 15 | # Cygwin and MinGW. LIBSTDCXX_LA_LINKER_FLAG is used to fix a Solaris bug. 16 | AM_LDFLAGS = -no-undefined $(LIBSTDCXX_LA_LINKER_FLAG) 17 | 18 | googleincludedir = $(includedir)/$(PACKAGE) 19 | 20 | ## The .h files you want to install (that is, .h files that people 21 | ## who install this package can include in their own applications.) 22 | googleinclude_HEADERS = \ 23 | src/streamhtmlparser/statemachine.h \ 24 | src/streamhtmlparser/jsparser.h \ 25 | src/streamhtmlparser/htmlparser.h \ 26 | src/streamhtmlparser/htmlparser_cpp.h 27 | 28 | noinst_HEADERS = \ 29 | src/streamhtmlparser/htmlparser_cpp.h.in 30 | 31 | docdir = $(prefix)/share/doc/$(PACKAGE)-$(VERSION) 32 | ## This is for HTML and other documentation you want to install. 33 | ## Add your documentation files (in doc/) in addition to these 34 | ## top-level boilerplate files. Also add a TODO file if you have one. 35 | dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README 36 | 37 | lib_LTLIBRARIES = libstreamhtmlparser.la 38 | 39 | if ENABLE_PYTHON 40 | 41 | pyexec_LTLIBRARIES = py_streamhtmlparser.la 42 | 43 | endif ENABLE_PYTHON 44 | 45 | ## unittests you want to run when people type 'make check'. 46 | ## TESTS is for binary unittests, check_SCRIPTS for script-based unittests. 47 | ## TESTS_ENVIRONMENT sets environment variables for when you run unittest, 48 | ## but it only seems to take effect for *binary* unittests (argh!) 49 | TESTS = 50 | TESTS_ENVIRONMENT = TESTDATA_PATH=$(top_srcdir)/src/tests/testdata 51 | check_SCRIPTS = src/tests/generate_fsm_test_sh 52 | # Every time you add a unittest to check_SCRIPTS, add it here too 53 | noinst_SCRIPTS = src/tests/generate_fsm_test.sh 54 | 55 | TESTDATA = 56 | 57 | ## vvvv RULES TO MAKE THE LIBRARIES, BINARIES, AND UNITTESTS 58 | 59 | TESTS += statemachine_test 60 | statemachine_test_SOURCES = src/tests/statemachine_test.c \ 61 | src/statemachine.c \ 62 | src/tests/statemachine_test_fsm.h 63 | 64 | TESTS += jsparser_test 65 | jsparser_test_SOURCES = src/tests/jsparser_test.c \ 66 | src/statemachine.c \ 67 | src/jsparser.c 68 | 69 | TESTS += htmlparser_test 70 | htmlparser_test_SOURCES = src/tests/htmlparser_test.c \ 71 | src/statemachine.c \ 72 | src/jsparser.c \ 73 | src/htmlparser.c 74 | 75 | htmlparser_test_LDADD = libstreamhtmlparser.la 76 | 77 | TESTS += htmlparser_cpp_test 78 | htmlparser_cpp_test_SOURCES = src/tests/htmlparser_cpp_test.cc \ 79 | src/streamhtmlparser/htmlparser_cpp.h 80 | htmlparser_cpp_test_LDADD = libstreamhtmlparser.la 81 | TESTDATA += \ 82 | src/tests/testdata/regtest/cdata.html \ 83 | src/tests/testdata/regtest/comments.html \ 84 | src/tests/testdata/regtest/context.html \ 85 | src/tests/testdata/regtest/javascript_attribute.html \ 86 | src/tests/testdata/regtest/javascript_block.html \ 87 | src/tests/testdata/regtest/javascript_regexp.html \ 88 | src/tests/testdata/regtest/position.html \ 89 | src/tests/testdata/regtest/reset.html \ 90 | src/tests/testdata/regtest/simple.html \ 91 | src/tests/testdata/regtest/tags.html 92 | 93 | TESTS += generate_fsm_c_test 94 | generate_fsm_c_test_SOURCES = src/tests/generate_fsm_c_test.c 95 | generate_fsm_c_test_DEPENDENCIES = src/tests/testdata/sample_fsm.c 96 | 97 | src/tests/generate_fsm_test_sh: src/tests/generate_fsm_test.sh \ 98 | src/tests/testdata/sample_fsm.config \ 99 | src/tests/testdata/sample_fsm.c \ 100 | src/tools/generate_fsm.py 101 | $(top_srcdir)/src/tests/generate_fsm_test.sh $(top_srcdir) 102 | 103 | TESTDATA += \ 104 | src/tests/testdata/sample_fsm.config \ 105 | src/tests/testdata/sample_fsm.c 106 | 107 | if ENABLE_PYTHON 108 | 109 | check_SCRIPTS += src/tests/py_streamhtmlparser_test_py 110 | noinst_SCRIPTS += src/tests/py_streamhtmlparser_test.py 111 | 112 | src/tests/py_streamhtmlparser_test_py: src/tests/py_streamhtmlparser_test.py 113 | export $(TESTS_ENVIRONMENT); \ 114 | PYTHONPATH=".libs:$(PYTHONPATH)" \ 115 | $(top_srcdir)/src/tests/py_streamhtmlparser_test.py 116 | 117 | endif ENABLE_PYTHON 118 | 119 | libstreamhtmlparser_la_SOURCES = src/statemachine.c \ 120 | src/jsparser.c \ 121 | src/htmlparser.c \ 122 | src/htmlparser_fsm.h \ 123 | src/jsparser_fsm.h 124 | 125 | STREAMHTMLPARSER_SYMBOLS='^htmlparser_' 126 | 127 | libstreamhtmlparser_la_CFLAGS = -DNDEBUG $(AM_CFLAGS) 128 | libstreamhtmlparser_la_LDFLAGS = -export-symbols-regex $(STREAMHTMLPARSER_SYMBOLS) 129 | 130 | if ENABLE_PYTHON 131 | 132 | PYTHON_CFLAGS = -I/usr/include/python$(PYTHON_VERSION) -fpic 133 | PYTHON_LDFLAGS = -avoid-version -module 134 | 135 | py_streamhtmlparser_la_SOURCES = src/py_streamhtmlparser.c 136 | py_streamhtmlparser_la_LIBADD = libstreamhtmlparser.la 137 | py_streamhtmlparser_la_LDFLAGS = $(PYTHON_LDFLAGS) 138 | py_streamhtmlparser_la_CFLAGS = $(PYTHON_CFLAGS) 139 | 140 | endif ENABLE_PYTHON 141 | 142 | ## ^^^^ END OF RULES TO MAKE THE LIBRARIES, BINARIES, AND UNITTESTS 143 | 144 | 145 | ## This should always include $(TESTS), but may also include other 146 | ## binaries that you compile but don't want automatically installed. 147 | noinst_PROGRAMS = $(TESTS) 148 | 149 | rpm: dist-gzip packages/rpm.sh packages/rpm/rpm.spec 150 | @cd packages && ./rpm.sh ${PACKAGE} ${VERSION} 151 | 152 | deb: dist-gzip packages/deb.sh packages/deb/* 153 | @cd packages && ./deb.sh ${PACKAGE} ${VERSION} 154 | 155 | dist-hook: 156 | test ! -e "$(distdir)/vsprojects" \ 157 | || chmod -R u+w $(distdir)/*.sln $(distdir)/vsprojects/ 158 | 159 | EXTRA_DIST = packages/rpm.sh packages/rpm/rpm.spec packages/deb.sh packages/deb \ 160 | autogen.sh $(SCRIPTS) src/solaris/libstdc++.la $(TESTDATA) \ 161 | src/tools/generate_fsm.py src/tools/fsm_config.py \ 162 | src/htmlparser_fsm.config src/jsparser_fsm.config \ 163 | src/tests/statemachine_test_fsm.config \ 164 | src/tests/testdata/sample_fsm.c \ 165 | $(BUILT_SOURCES) 166 | 167 | # These files are auto-generated via generate_fsm.py. Since we don't want 168 | # to require python to build ctemplate, we include these in the dist 169 | src/htmlparser_fsm.h: src/tools/generate_fsm.py src/tools/fsm_config.py src/htmlparser_fsm.config 170 | $(top_srcdir)/src/tools/generate_fsm.py $(top_srcdir)/src/htmlparser_fsm.config > $@ 171 | 172 | src/jsparser_fsm.h: src/tools/generate_fsm.py src/tools/fsm_config.py src/jsparser_fsm.config 173 | $(top_srcdir)/src/tools/generate_fsm.py $(top_srcdir)/src/jsparser_fsm.config > $@ 174 | 175 | libtool: $(LIBTOOL_DEPS) 176 | $(SHELL) ./config.status --recheck 177 | 178 | EXTRA_DIST += libtool 179 | 180 | src/tests/statemachine_test_fsm.h: src/tools/generate_fsm.py src/tools/fsm_config.py src/tests/statemachine_test_fsm.config 181 | $(top_srcdir)/src/tools/generate_fsm.py $(top_srcdir)/src/tests/statemachine_test_fsm.config > $@ 182 | 183 | # This is a required hack for auto-generated .h files: cf the automake info pages 184 | # NOTE: since we put these .h files in BUILT_SOURCES, we don't need to put 185 | # them in as deps for the binaries that use them. In fact, it's important 186 | # that we don't; instead we put them in EXTRA_DIST. This mean, in practice, 187 | # they'll go at the end of the distribution tarfile, which means they'll 188 | # have a later timestamp than the .config files that generate them, which 189 | # means users won't try to rebuild them. Ah, the joys of automake. 190 | BUILT_SOURCES = src/htmlparser_fsm.h \ 191 | src/jsparser_fsm.h \ 192 | src/tests/statemachine_test_fsm.h 193 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/streamhtmlparser/52bf6f7bc0566bdd5a8066740b67bcb4f1ae4fb3/NEWS -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Copyright 2008 Google Inc. All Rights Reserved. 2 | 3 | This directory contains an implementation of an html context scanner with no 4 | lookahead. Its purpose is to scan an html stream and provide context 5 | information at any point within the input stream. An example of a user of this 6 | scanner would be an auto escaping templating system, which would require 7 | html context information at very specific points within the html stream. The 8 | implementation is based on a simplified state machine of HTML4.1. 9 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Before using, you should figure out all the .m4 macros that your 4 | # configure.m4 script needs and make sure they exist in the autoconf/ 5 | # directory. 6 | # 7 | # These are the files that this script might edit: 8 | # aclocal.m4 configure Makefile.in src/config.h.in \ 9 | # depcomp config.guess config.sub install-sh missing mkinstalldirs \ 10 | # ltmain.sh 11 | # 12 | # Here's a command you can run to see what files aclocal will import: 13 | # aclocal -I ../autoconf --output=- | sed -n 's/^m4_include..\([^]]*\).*/\1/p' 14 | 15 | set -ex 16 | rm -rf autom4te.cache 17 | 18 | trap 'rm -f aclocal.m4.tmp' EXIT 19 | 20 | # Use version 1.9 of aclocal and automake if available. 21 | ACLOCAL=aclocal-1.9 22 | if test -z `which "$ACLOCAL"`; then 23 | ACLOCAL=aclocal 24 | fi 25 | 26 | AUTOMAKE=automake-1.9 27 | if test -z `which "$AUTOMAKE"`; then 28 | AUTOMAKE=automake 29 | fi 30 | 31 | # glibtoolize is used for Mac OS X 32 | LIBTOOLIZE=libtoolize 33 | if test -z `which "$LIBTOOLIZE"`; then 34 | LIBTOOLIZE=glibtoolize 35 | fi 36 | 37 | # aclocal tries to overwrite aclocal.m4 even if the contents haven't 38 | # changed, which is annoying when the file is not open for edit (in 39 | # p4). We work around this by writing to a temp file and just 40 | # updating the timestamp if the file hasn't change. 41 | "$ACLOCAL" --force -I m4 --output=aclocal.m4.tmp 42 | if cmp aclocal.m4.tmp aclocal.m4; then 43 | touch aclocal.m4 # pretend that we regenerated the file 44 | rm -f aclocal.m4.tmp 45 | else 46 | mv aclocal.m4.tmp aclocal.m4 # we did set -e above, so we die if this fails 47 | fi 48 | 49 | grep -q '^[^#]*AC_PROG_LIBTOOL' configure.ac && "$LIBTOOLIZE" -c -f 50 | autoconf -f -W all,no-obsolete 51 | autoheader -f -W all 52 | "$AUTOMAKE" -a -c -f -W all 53 | 54 | rm -rf autom4te.cache 55 | exit 0 56 | -------------------------------------------------------------------------------- /compile: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # Wrapper for compilers which do not understand `-c -o'. 3 | 4 | scriptversion=2005-05-14.22 5 | 6 | # Copyright (C) 1999, 2000, 2003, 2004, 2005 Free Software Foundation, Inc. 7 | # Written by Tom Tromey . 8 | # 9 | # This program is free software; you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation; either version 2, or (at your option) 12 | # any later version. 13 | # 14 | # This program is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU General Public License 20 | # along with this program; if not, write to the Free Software 21 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 22 | 23 | # As a special exception to the GNU General Public License, if you 24 | # distribute this file as part of a program that contains a 25 | # configuration script generated by Autoconf, you may include it under 26 | # the same distribution terms that you use for the rest of that program. 27 | 28 | # This file is maintained in Automake, please report 29 | # bugs to or send patches to 30 | # . 31 | 32 | case $1 in 33 | '') 34 | echo "$0: No command. Try \`$0 --help' for more information." 1>&2 35 | exit 1; 36 | ;; 37 | -h | --h*) 38 | cat <<\EOF 39 | Usage: compile [--help] [--version] PROGRAM [ARGS] 40 | 41 | Wrapper for compilers which do not understand `-c -o'. 42 | Remove `-o dest.o' from ARGS, run PROGRAM with the remaining 43 | arguments, and rename the output as expected. 44 | 45 | If you are trying to build a whole package this is not the 46 | right script to run: please start by reading the file `INSTALL'. 47 | 48 | Report bugs to . 49 | EOF 50 | exit $? 51 | ;; 52 | -v | --v*) 53 | echo "compile $scriptversion" 54 | exit $? 55 | ;; 56 | esac 57 | 58 | ofile= 59 | cfile= 60 | eat= 61 | 62 | for arg 63 | do 64 | if test -n "$eat"; then 65 | eat= 66 | else 67 | case $1 in 68 | -o) 69 | # configure might choose to run compile as `compile cc -o foo foo.c'. 70 | # So we strip `-o arg' only if arg is an object. 71 | eat=1 72 | case $2 in 73 | *.o | *.obj) 74 | ofile=$2 75 | ;; 76 | *) 77 | set x "$@" -o "$2" 78 | shift 79 | ;; 80 | esac 81 | ;; 82 | *.c) 83 | cfile=$1 84 | set x "$@" "$1" 85 | shift 86 | ;; 87 | *) 88 | set x "$@" "$1" 89 | shift 90 | ;; 91 | esac 92 | fi 93 | shift 94 | done 95 | 96 | if test -z "$ofile" || test -z "$cfile"; then 97 | # If no `-o' option was seen then we might have been invoked from a 98 | # pattern rule where we don't need one. That is ok -- this is a 99 | # normal compilation that the losing compiler can handle. If no 100 | # `.c' file was seen then we are probably linking. That is also 101 | # ok. 102 | exec "$@" 103 | fi 104 | 105 | # Name of file we expect compiler to create. 106 | cofile=`echo "$cfile" | sed -e 's|^.*/||' -e 's/\.c$/.o/'` 107 | 108 | # Create the lock directory. 109 | # Note: use `[/.-]' here to ensure that we don't use the same name 110 | # that we are using for the .o file. Also, base the name on the expected 111 | # object file name, since that is what matters with a parallel build. 112 | lockdir=`echo "$cofile" | sed -e 's|[/.-]|_|g'`.d 113 | while true; do 114 | if mkdir "$lockdir" >/dev/null 2>&1; then 115 | break 116 | fi 117 | sleep 1 118 | done 119 | # FIXME: race condition here if user kills between mkdir and trap. 120 | trap "rmdir '$lockdir'; exit 1" 1 2 15 121 | 122 | # Run the compile. 123 | "$@" 124 | ret=$? 125 | 126 | if test -f "$cofile"; then 127 | mv "$cofile" "$ofile" 128 | elif test -f "${cofile}bj"; then 129 | mv "${cofile}bj" "$ofile" 130 | fi 131 | 132 | rmdir "$lockdir" 133 | exit $ret 134 | 135 | # Local Variables: 136 | # mode: shell-script 137 | # sh-indentation: 2 138 | # eval: (add-hook 'write-file-hooks 'time-stamp) 139 | # time-stamp-start: "scriptversion=" 140 | # time-stamp-format: "%:y-%02m-%02d.%02H" 141 | # time-stamp-end: "$" 142 | # End: 143 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | ## Process this file with autoconf to produce configure. 2 | ## In general, the safest way to proceed is to run ./autogen.sh 3 | 4 | # make sure we're interpreted by some minimal autoconf 5 | AC_PREREQ(2.57) 6 | 7 | AC_INIT(streamhtmlparser, 0.1, opensource@google.com) 8 | # The argument here is just something that should be in the current directory 9 | # (for sanity checking) 10 | AC_CONFIG_SRCDIR(README) 11 | AM_INIT_AUTOMAKE 12 | AM_CONFIG_HEADER(src/config.h) 13 | 14 | # Checks for programs. 15 | AC_PROG_CC 16 | AC_PROG_CXX 17 | AC_PROG_CPP 18 | AM_CONDITIONAL(GCC, test "$GCC" = yes) # let the Makefile know if we're gcc 19 | AC_PROG_LIBTOOL 20 | AM_PATH_PYTHON([2.2], [have_python=yes], [have_python=no]) 21 | 22 | # Populate $host_cpu, $host_os, etc. 23 | AC_CANONICAL_HOST 24 | # Checks for header files. 25 | AC_HEADER_STDC 26 | AC_HEADER_STDBOOL 27 | case $host_os in 28 | *mingw*) 29 | # Disabling fast install keeps libtool from creating wrapper scripts 30 | # around the executables it builds. Such scripts have caused failures on 31 | # MinGW. Using this option means an extra link step is executed during 32 | # "make install". 33 | AC_DISABLE_FAST_INSTALL 34 | ;; 35 | esac 36 | 37 | # Checks for typedefs, structures, and compiler characteristics. 38 | AC_C_CONST 39 | AC_C_INLINE 40 | AC_TYPE_SIZE_T 41 | 42 | # Checks for library functions. 43 | AC_FUNC_MALLOC 44 | AC_CHECK_FUNCS([strcasecmp strtol strstr]) 45 | AC_CHECK_FUNCS([memchr strchr]) 46 | 47 | AC_SUBST(LIBTOOL_DEPS) 48 | 49 | # Find out what namespace 'normal' STL code lives in, and also what namespace 50 | # the user wants our classes to be defined in 51 | AC_CXX_STL_NAMESPACE 52 | AC_DEFINE_GOOGLE_NAMESPACE(streamhtmlparser) 53 | 54 | # Python configure flag 55 | AC_ARG_ENABLE([python], 56 | [AS_HELP_STRING([--enable-python], 57 | [build python bindings])], 58 | [], 59 | [enable_python="$have_python"]) 60 | 61 | AM_CONDITIONAL(ENABLE_PYTHON, test "$enable_python" = yes) 62 | 63 | if test "$have_python" != yes -a "$enable_python" = yes; then 64 | AC_MSG_ERROR(Python not found or too old) 65 | fi 66 | 67 | # Solaris 10 6/06 has a bug where /usr/sfw/lib/libstdc++.la is empty. 68 | # If so, we replace it with our own version. 69 | LIBSTDCXX_LA_LINKER_FLAG= 70 | if test -f /usr/sfw/lib/libstdc++.la && ! test -s /usr/sfw/lib/libstdc++.la 71 | then 72 | LIBSTDCXX_LA_LINKER_FLAG='-L$(top_srcdir)/src/solaris' 73 | fi 74 | AC_SUBST(LIBSTDCXX_LA_LINKER_FLAG) 75 | 76 | AC_SUBST(ac_google_namespace) 77 | AC_SUBST(ac_google_start_namespace) 78 | AC_SUBST(ac_google_end_namespace) 79 | 80 | # Write generated configuration file 81 | AC_CONFIG_FILES([Makefile \ 82 | src/streamhtmlparser/htmlparser_cpp.h \ 83 | ]) 84 | AC_OUTPUT 85 | -------------------------------------------------------------------------------- /install-sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # install - install a program, script, or datafile 3 | 4 | scriptversion=2005-05-14.22 5 | 6 | # This originates from X11R5 (mit/util/scripts/install.sh), which was 7 | # later released in X11R6 (xc/config/util/install.sh) with the 8 | # following copyright and license. 9 | # 10 | # Copyright (C) 1994 X Consortium 11 | # 12 | # Permission is hereby granted, free of charge, to any person obtaining a copy 13 | # of this software and associated documentation files (the "Software"), to 14 | # deal in the Software without restriction, including without limitation the 15 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 16 | # sell copies of the Software, and to permit persons to whom the Software is 17 | # furnished to do so, subject to the following conditions: 18 | # 19 | # The above copyright notice and this permission notice shall be included in 20 | # all copies or substantial portions of the Software. 21 | # 22 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 | # X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 26 | # AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- 27 | # TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | # 29 | # Except as contained in this notice, the name of the X Consortium shall not 30 | # be used in advertising or otherwise to promote the sale, use or other deal- 31 | # ings in this Software without prior written authorization from the X Consor- 32 | # tium. 33 | # 34 | # 35 | # FSF changes to this file are in the public domain. 36 | # 37 | # Calling this script install-sh is preferred over install.sh, to prevent 38 | # `make' implicit rules from creating a file called install from it 39 | # when there is no Makefile. 40 | # 41 | # This script is compatible with the BSD install script, but was written 42 | # from scratch. It can only install one file at a time, a restriction 43 | # shared with many OS's install programs. 44 | 45 | # set DOITPROG to echo to test this script 46 | 47 | # Don't use :- since 4.3BSD and earlier shells don't like it. 48 | doit="${DOITPROG-}" 49 | 50 | # put in absolute paths if you don't have them in your path; or use env. vars. 51 | 52 | mvprog="${MVPROG-mv}" 53 | cpprog="${CPPROG-cp}" 54 | chmodprog="${CHMODPROG-chmod}" 55 | chownprog="${CHOWNPROG-chown}" 56 | chgrpprog="${CHGRPPROG-chgrp}" 57 | stripprog="${STRIPPROG-strip}" 58 | rmprog="${RMPROG-rm}" 59 | mkdirprog="${MKDIRPROG-mkdir}" 60 | 61 | chmodcmd="$chmodprog 0755" 62 | chowncmd= 63 | chgrpcmd= 64 | stripcmd= 65 | rmcmd="$rmprog -f" 66 | mvcmd="$mvprog" 67 | src= 68 | dst= 69 | dir_arg= 70 | dstarg= 71 | no_target_directory= 72 | 73 | usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE 74 | or: $0 [OPTION]... SRCFILES... DIRECTORY 75 | or: $0 [OPTION]... -t DIRECTORY SRCFILES... 76 | or: $0 [OPTION]... -d DIRECTORIES... 77 | 78 | In the 1st form, copy SRCFILE to DSTFILE. 79 | In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. 80 | In the 4th, create DIRECTORIES. 81 | 82 | Options: 83 | -c (ignored) 84 | -d create directories instead of installing files. 85 | -g GROUP $chgrpprog installed files to GROUP. 86 | -m MODE $chmodprog installed files to MODE. 87 | -o USER $chownprog installed files to USER. 88 | -s $stripprog installed files. 89 | -t DIRECTORY install into DIRECTORY. 90 | -T report an error if DSTFILE is a directory. 91 | --help display this help and exit. 92 | --version display version info and exit. 93 | 94 | Environment variables override the default commands: 95 | CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG 96 | " 97 | 98 | while test -n "$1"; do 99 | case $1 in 100 | -c) shift 101 | continue;; 102 | 103 | -d) dir_arg=true 104 | shift 105 | continue;; 106 | 107 | -g) chgrpcmd="$chgrpprog $2" 108 | shift 109 | shift 110 | continue;; 111 | 112 | --help) echo "$usage"; exit $?;; 113 | 114 | -m) chmodcmd="$chmodprog $2" 115 | shift 116 | shift 117 | continue;; 118 | 119 | -o) chowncmd="$chownprog $2" 120 | shift 121 | shift 122 | continue;; 123 | 124 | -s) stripcmd=$stripprog 125 | shift 126 | continue;; 127 | 128 | -t) dstarg=$2 129 | shift 130 | shift 131 | continue;; 132 | 133 | -T) no_target_directory=true 134 | shift 135 | continue;; 136 | 137 | --version) echo "$0 $scriptversion"; exit $?;; 138 | 139 | *) # When -d is used, all remaining arguments are directories to create. 140 | # When -t is used, the destination is already specified. 141 | test -n "$dir_arg$dstarg" && break 142 | # Otherwise, the last argument is the destination. Remove it from $@. 143 | for arg 144 | do 145 | if test -n "$dstarg"; then 146 | # $@ is not empty: it contains at least $arg. 147 | set fnord "$@" "$dstarg" 148 | shift # fnord 149 | fi 150 | shift # arg 151 | dstarg=$arg 152 | done 153 | break;; 154 | esac 155 | done 156 | 157 | if test -z "$1"; then 158 | if test -z "$dir_arg"; then 159 | echo "$0: no input file specified." >&2 160 | exit 1 161 | fi 162 | # It's OK to call `install-sh -d' without argument. 163 | # This can happen when creating conditional directories. 164 | exit 0 165 | fi 166 | 167 | for src 168 | do 169 | # Protect names starting with `-'. 170 | case $src in 171 | -*) src=./$src ;; 172 | esac 173 | 174 | if test -n "$dir_arg"; then 175 | dst=$src 176 | src= 177 | 178 | if test -d "$dst"; then 179 | mkdircmd=: 180 | chmodcmd= 181 | else 182 | mkdircmd=$mkdirprog 183 | fi 184 | else 185 | # Waiting for this to be detected by the "$cpprog $src $dsttmp" command 186 | # might cause directories to be created, which would be especially bad 187 | # if $src (and thus $dsttmp) contains '*'. 188 | if test ! -f "$src" && test ! -d "$src"; then 189 | echo "$0: $src does not exist." >&2 190 | exit 1 191 | fi 192 | 193 | if test -z "$dstarg"; then 194 | echo "$0: no destination specified." >&2 195 | exit 1 196 | fi 197 | 198 | dst=$dstarg 199 | # Protect names starting with `-'. 200 | case $dst in 201 | -*) dst=./$dst ;; 202 | esac 203 | 204 | # If destination is a directory, append the input filename; won't work 205 | # if double slashes aren't ignored. 206 | if test -d "$dst"; then 207 | if test -n "$no_target_directory"; then 208 | echo "$0: $dstarg: Is a directory" >&2 209 | exit 1 210 | fi 211 | dst=$dst/`basename "$src"` 212 | fi 213 | fi 214 | 215 | # This sed command emulates the dirname command. 216 | dstdir=`echo "$dst" | sed -e 's,/*$,,;s,[^/]*$,,;s,/*$,,;s,^$,.,'` 217 | 218 | # Make sure that the destination directory exists. 219 | 220 | # Skip lots of stat calls in the usual case. 221 | if test ! -d "$dstdir"; then 222 | defaultIFS=' 223 | ' 224 | IFS="${IFS-$defaultIFS}" 225 | 226 | oIFS=$IFS 227 | # Some sh's can't handle IFS=/ for some reason. 228 | IFS='%' 229 | set x `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'` 230 | shift 231 | IFS=$oIFS 232 | 233 | pathcomp= 234 | 235 | while test $# -ne 0 ; do 236 | pathcomp=$pathcomp$1 237 | shift 238 | if test ! -d "$pathcomp"; then 239 | $mkdirprog "$pathcomp" 240 | # mkdir can fail with a `File exist' error in case several 241 | # install-sh are creating the directory concurrently. This 242 | # is OK. 243 | test -d "$pathcomp" || exit 244 | fi 245 | pathcomp=$pathcomp/ 246 | done 247 | fi 248 | 249 | if test -n "$dir_arg"; then 250 | $doit $mkdircmd "$dst" \ 251 | && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \ 252 | && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \ 253 | && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \ 254 | && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; } 255 | 256 | else 257 | dstfile=`basename "$dst"` 258 | 259 | # Make a couple of temp file names in the proper directory. 260 | dsttmp=$dstdir/_inst.$$_ 261 | rmtmp=$dstdir/_rm.$$_ 262 | 263 | # Trap to clean up those temp files at exit. 264 | trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 265 | trap '(exit $?); exit' 1 2 13 15 266 | 267 | # Copy the file name to the temp name. 268 | $doit $cpprog "$src" "$dsttmp" && 269 | 270 | # and set any options; do chmod last to preserve setuid bits. 271 | # 272 | # If any of these fail, we abort the whole thing. If we want to 273 | # ignore errors from any of these, just make sure not to ignore 274 | # errors from the above "$doit $cpprog $src $dsttmp" command. 275 | # 276 | { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \ 277 | && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \ 278 | && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \ 279 | && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } && 280 | 281 | # Now rename the file to the real destination. 282 | { $doit $mvcmd -f "$dsttmp" "$dstdir/$dstfile" 2>/dev/null \ 283 | || { 284 | # The rename failed, perhaps because mv can't rename something else 285 | # to itself, or perhaps because mv is so ancient that it does not 286 | # support -f. 287 | 288 | # Now remove or move aside any old file at destination location. 289 | # We try this two ways since rm can't unlink itself on some 290 | # systems and the destination file might be busy for other 291 | # reasons. In this case, the final cleanup might fail but the new 292 | # file should still install successfully. 293 | { 294 | if test -f "$dstdir/$dstfile"; then 295 | $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \ 296 | || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \ 297 | || { 298 | echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2 299 | (exit 1); exit 1 300 | } 301 | else 302 | : 303 | fi 304 | } && 305 | 306 | # Now rename the file to the real destination. 307 | $doit $mvcmd "$dsttmp" "$dstdir/$dstfile" 308 | } 309 | } 310 | fi || { (exit 1); exit 1; } 311 | done 312 | 313 | # The final little trick to "correctly" pass the exit status to the exit trap. 314 | { 315 | (exit 0); exit 0 316 | } 317 | 318 | # Local variables: 319 | # eval: (add-hook 'write-file-hooks 'time-stamp) 320 | # time-stamp-start: "scriptversion=" 321 | # time-stamp-format: "%:y-%02m-%02d.%02H" 322 | # time-stamp-end: "$" 323 | # End: 324 | -------------------------------------------------------------------------------- /m4/ac_cxx_namespaces.m4: -------------------------------------------------------------------------------- 1 | dnl @synopsis AC_CXX_NAMESPACES 2 | dnl 3 | dnl If the compiler can prevent names clashes using namespaces, define 4 | dnl HAVE_NAMESPACES. 5 | dnl 6 | dnl @category Cxx 7 | dnl @author Todd Veldhuizen 8 | dnl @author Luc Maisonobe 9 | dnl @version 2004-02-04 10 | dnl @license AllPermissive 11 | 12 | AC_DEFUN([AC_CXX_NAMESPACES], 13 | [AC_CACHE_CHECK(whether the compiler implements namespaces, 14 | ac_cv_cxx_namespaces, 15 | [AC_LANG_SAVE 16 | AC_LANG_CPLUSPLUS 17 | AC_TRY_COMPILE([namespace Outer { namespace Inner { int i = 0; }}], 18 | [using namespace Outer::Inner; return i;], 19 | ac_cv_cxx_namespaces=yes, ac_cv_cxx_namespaces=no) 20 | AC_LANG_RESTORE 21 | ]) 22 | if test "$ac_cv_cxx_namespaces" = yes; then 23 | AC_DEFINE(HAVE_NAMESPACES,,[define if the compiler implements namespaces]) 24 | fi 25 | ]) 26 | -------------------------------------------------------------------------------- /m4/google_namespace.m4: -------------------------------------------------------------------------------- 1 | # Allow users to override the namespace we define our application's classes in 2 | # Arg $1 is the default namespace to use if --enable-namespace isn't present. 3 | 4 | # In general, $1 should be 'google', so we put all our exported symbols in a 5 | # unique namespace that is not likely to conflict with anyone else. However, 6 | # when it makes sense -- for instance, when publishing stl-like code -- you 7 | # may want to go with a different default, like 'std'. 8 | 9 | # We guarantee the invariant that GOOGLE_NAMESPACE starts with ::, 10 | # unless it's the empty string. Thus, it's always safe to do 11 | # GOOGLE_NAMESPACE::foo and be sure you're getting the foo that's 12 | # actually in the google namespace, and not some other namespace that 13 | # the namespace rules might kick in. 14 | 15 | AC_DEFUN([AC_DEFINE_GOOGLE_NAMESPACE], 16 | [google_namespace_default=[$1] 17 | AC_ARG_ENABLE(namespace, [ --enable-namespace=FOO to define these Google 18 | classes in the FOO namespace. --disable-namespace 19 | to define them in the global namespace. Default 20 | is to define them in namespace $1.], 21 | [case "$enableval" in 22 | yes) google_namespace="$google_namespace_default" ;; 23 | no) google_namespace="" ;; 24 | *) google_namespace="$enableval" ;; 25 | esac], 26 | [google_namespace="$google_namespace_default"]) 27 | if test -n "$google_namespace"; then 28 | ac_google_namespace="::$google_namespace" 29 | ac_google_start_namespace="namespace $google_namespace {" 30 | ac_google_end_namespace="}" 31 | else 32 | ac_google_namespace="" 33 | ac_google_start_namespace="" 34 | ac_google_end_namespace="" 35 | fi 36 | AC_DEFINE_UNQUOTED(GOOGLE_NAMESPACE, $ac_google_namespace, 37 | Namespace for Google classes) 38 | AC_DEFINE_UNQUOTED(_START_GOOGLE_NAMESPACE_, $ac_google_start_namespace, 39 | Puts following code inside the Google namespace) 40 | AC_DEFINE_UNQUOTED(_END_GOOGLE_NAMESPACE_, $ac_google_end_namespace, 41 | Stops putting the code inside the Google namespace) 42 | ]) 43 | -------------------------------------------------------------------------------- /m4/python.m4: -------------------------------------------------------------------------------- 1 | ## ------------------------ -*- Autoconf -*- 2 | ## Python file handling 3 | ## From Andrew Dalke 4 | ## Updated by James Henstridge 5 | ## ------------------------ 6 | # Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005 7 | # Free Software Foundation, Inc. 8 | # 9 | # This file is free software; the Free Software Foundation 10 | # gives unlimited permission to copy and/or distribute it, 11 | # with or without modifications, as long as this notice is preserved. 12 | 13 | # AM_PATH_PYTHON([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) 14 | # --------------------------------------------------------------------------- 15 | # Adds support for distributing Python modules and packages. To 16 | # install modules, copy them to $(pythondir), using the python_PYTHON 17 | # automake variable. To install a package with the same name as the 18 | # automake package, install to $(pkgpythondir), or use the 19 | # pkgpython_PYTHON automake variable. 20 | # 21 | # The variables $(pyexecdir) and $(pkgpyexecdir) are provided as 22 | # locations to install python extension modules (shared libraries). 23 | # Another macro is required to find the appropriate flags to compile 24 | # extension modules. 25 | # 26 | # If your package is configured with a different prefix to python, 27 | # users will have to add the install directory to the PYTHONPATH 28 | # environment variable, or create a .pth file (see the python 29 | # documentation for details). 30 | # 31 | # If the MINIMUM-VERSION argument is passed, AM_PATH_PYTHON will 32 | # cause an error if the version of python installed on the system 33 | # doesn't meet the requirement. MINIMUM-VERSION should consist of 34 | # numbers and dots only. 35 | AC_DEFUN([AM_PATH_PYTHON], 36 | [ 37 | dnl Find a Python interpreter. Python versions prior to 1.5 are not 38 | dnl supported because the default installation locations changed from 39 | dnl $prefix/lib/site-python in 1.4 to $prefix/lib/python1.5/site-packages 40 | dnl in 1.5. 41 | m4_define_default([_AM_PYTHON_INTERPRETER_LIST], 42 | [python python2 python2.5 python2.4 python2.3 python2.2 dnl 43 | python2.1 python2.0 python1.6 python1.5]) 44 | 45 | m4_if([$1],[],[ 46 | dnl No version check is needed. 47 | # Find any Python interpreter. 48 | if test -z "$PYTHON"; then 49 | AC_PATH_PROGS([PYTHON], _AM_PYTHON_INTERPRETER_LIST, :) 50 | fi 51 | am_display_PYTHON=python 52 | ], [ 53 | dnl A version check is needed. 54 | if test -n "$PYTHON"; then 55 | # If the user set $PYTHON, use it and don't search something else. 56 | AC_MSG_CHECKING([whether $PYTHON version >= $1]) 57 | AM_PYTHON_CHECK_VERSION([$PYTHON], [$1], 58 | [AC_MSG_RESULT(yes)], 59 | [AC_MSG_ERROR(too old)]) 60 | am_display_PYTHON=$PYTHON 61 | else 62 | # Otherwise, try each interpreter until we find one that satisfies 63 | # VERSION. 64 | AC_CACHE_CHECK([for a Python interpreter with version >= $1], 65 | [am_cv_pathless_PYTHON],[ 66 | for am_cv_pathless_PYTHON in _AM_PYTHON_INTERPRETER_LIST none; do 67 | test "$am_cv_pathless_PYTHON" = none && break 68 | AM_PYTHON_CHECK_VERSION([$am_cv_pathless_PYTHON], [$1], [break]) 69 | done]) 70 | # Set $PYTHON to the absolute path of $am_cv_pathless_PYTHON. 71 | if test "$am_cv_pathless_PYTHON" = none; then 72 | PYTHON=: 73 | else 74 | AC_PATH_PROG([PYTHON], [$am_cv_pathless_PYTHON]) 75 | fi 76 | am_display_PYTHON=$am_cv_pathless_PYTHON 77 | fi 78 | ]) 79 | 80 | if test "$PYTHON" = :; then 81 | dnl Run any user-specified action, or abort. 82 | m4_default([$3], [AC_MSG_ERROR([no suitable Python interpreter found])]) 83 | else 84 | 85 | dnl Query Python for its version number. Getting [:3] seems to be 86 | dnl the best way to do this; it's what "site.py" does in the standard 87 | dnl library. 88 | 89 | AC_CACHE_CHECK([for $am_display_PYTHON version], [am_cv_python_version], 90 | [am_cv_python_version=`$PYTHON -c "import sys; print sys.version[[:3]]"`]) 91 | AC_SUBST([PYTHON_VERSION], [$am_cv_python_version]) 92 | 93 | dnl Use the values of $prefix and $exec_prefix for the corresponding 94 | dnl values of PYTHON_PREFIX and PYTHON_EXEC_PREFIX. These are made 95 | dnl distinct variables so they can be overridden if need be. However, 96 | dnl general consensus is that you shouldn't need this ability. 97 | 98 | AC_SUBST([PYTHON_PREFIX], ['${prefix}']) 99 | AC_SUBST([PYTHON_EXEC_PREFIX], ['${exec_prefix}']) 100 | 101 | dnl At times (like when building shared libraries) you may want 102 | dnl to know which OS platform Python thinks this is. 103 | 104 | AC_CACHE_CHECK([for $am_display_PYTHON platform], [am_cv_python_platform], 105 | [am_cv_python_platform=`$PYTHON -c "import sys; print sys.platform"`]) 106 | AC_SUBST([PYTHON_PLATFORM], [$am_cv_python_platform]) 107 | 108 | 109 | dnl Set up 4 directories: 110 | 111 | dnl pythondir -- where to install python scripts. This is the 112 | dnl site-packages directory, not the python standard library 113 | dnl directory like in previous automake betas. This behavior 114 | dnl is more consistent with lispdir.m4 for example. 115 | dnl Query distutils for this directory. distutils does not exist in 116 | dnl Python 1.5, so we fall back to the hardcoded directory if it 117 | dnl doesn't work. 118 | AC_CACHE_CHECK([for $am_display_PYTHON script directory], 119 | [am_cv_python_pythondir], 120 | [am_cv_python_pythondir=`$PYTHON -c "from distutils import sysconfig; print sysconfig.get_python_lib(0,0,prefix='$PYTHON_PREFIX')" 2>/dev/null || 121 | echo "$PYTHON_PREFIX/lib/python$PYTHON_VERSION/site-packages"`]) 122 | AC_SUBST([pythondir], [$am_cv_python_pythondir]) 123 | 124 | dnl pkgpythondir -- $PACKAGE directory under pythondir. Was 125 | dnl PYTHON_SITE_PACKAGE in previous betas, but this naming is 126 | dnl more consistent with the rest of automake. 127 | 128 | AC_SUBST([pkgpythondir], [\${pythondir}/$PACKAGE]) 129 | 130 | dnl pyexecdir -- directory for installing python extension modules 131 | dnl (shared libraries) 132 | dnl Query distutils for this directory. distutils does not exist in 133 | dnl Python 1.5, so we fall back to the hardcoded directory if it 134 | dnl doesn't work. 135 | AC_CACHE_CHECK([for $am_display_PYTHON extension module directory], 136 | [am_cv_python_pyexecdir], 137 | [am_cv_python_pyexecdir=`$PYTHON -c "from distutils import sysconfig; print sysconfig.get_python_lib(1,0,prefix='$PYTHON_EXEC_PREFIX')" 2>/dev/null || 138 | echo "${PYTHON_EXEC_PREFIX}/lib/python${PYTHON_VERSION}/site-packages"`]) 139 | AC_SUBST([pyexecdir], [$am_cv_python_pyexecdir]) 140 | 141 | dnl pkgpyexecdir -- $(pyexecdir)/$(PACKAGE) 142 | 143 | AC_SUBST([pkgpyexecdir], [\${pyexecdir}/$PACKAGE]) 144 | 145 | dnl Run any user-specified action. 146 | $2 147 | fi 148 | 149 | ]) 150 | 151 | 152 | # AM_PYTHON_CHECK_VERSION(PROG, VERSION, [ACTION-IF-TRUE], [ACTION-IF-FALSE]) 153 | # --------------------------------------------------------------------------- 154 | # Run ACTION-IF-TRUE if the Python interpreter PROG has version >= VERSION. 155 | # Run ACTION-IF-FALSE otherwise. 156 | # This test uses sys.hexversion instead of the string equivalent (first 157 | # word of sys.version), in order to cope with versions such as 2.2c1. 158 | # hexversion has been introduced in Python 1.5.2; it's probably not 159 | # worth to support older versions (1.5.1 was released on October 31, 1998). 160 | AC_DEFUN([AM_PYTHON_CHECK_VERSION], 161 | [prog="import sys, string 162 | # split strings by '.' and convert to numeric. Append some zeros 163 | # because we need at least 4 digits for the hex conversion. 164 | minver = map(int, string.split('$2', '.')) + [[0, 0, 0]] 165 | minverhex = 0 166 | for i in xrange(0, 4): minverhex = (minverhex << 8) + minver[[i]] 167 | sys.exit(sys.hexversion < minverhex)" 168 | AS_IF([AM_RUN_LOG([$1 -c "$prog"])], [$3], [$4])]) 169 | -------------------------------------------------------------------------------- /m4/stl_namespace.m4: -------------------------------------------------------------------------------- 1 | # We check what namespace stl code like vector expects to be executed in 2 | 3 | AC_DEFUN([AC_CXX_STL_NAMESPACE], 4 | [AC_CACHE_CHECK( 5 | what namespace STL code is in, 6 | ac_cv_cxx_stl_namespace, 7 | [AC_REQUIRE([AC_CXX_NAMESPACES]) 8 | AC_LANG_SAVE 9 | AC_LANG_CPLUSPLUS 10 | AC_TRY_COMPILE([#include ], 11 | [vector t; return 0;], 12 | ac_cv_cxx_stl_namespace=none) 13 | AC_TRY_COMPILE([#include ], 14 | [std::vector t; return 0;], 15 | ac_cv_cxx_stl_namespace=std) 16 | AC_LANG_RESTORE]) 17 | if test "$ac_cv_cxx_stl_namespace" = none; then 18 | AC_DEFINE(STL_NAMESPACE,, 19 | [the namespace where STL code like vector<> is defined]) 20 | fi 21 | if test "$ac_cv_cxx_stl_namespace" = std; then 22 | AC_DEFINE(STL_NAMESPACE,std, 23 | [the namespace where STL code like vector<> is defined]) 24 | fi 25 | ]) 26 | -------------------------------------------------------------------------------- /missing: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # Common stub for a few missing GNU programs while installing. 3 | 4 | scriptversion=2005-06-08.21 5 | 6 | # Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005 7 | # Free Software Foundation, Inc. 8 | # Originally by Fran,cois Pinard , 1996. 9 | 10 | # This program is free software; you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation; either version 2, or (at your option) 13 | # any later version. 14 | 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program; if not, write to the Free Software 22 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 23 | # 02110-1301, USA. 24 | 25 | # As a special exception to the GNU General Public License, if you 26 | # distribute this file as part of a program that contains a 27 | # configuration script generated by Autoconf, you may include it under 28 | # the same distribution terms that you use for the rest of that program. 29 | 30 | if test $# -eq 0; then 31 | echo 1>&2 "Try \`$0 --help' for more information" 32 | exit 1 33 | fi 34 | 35 | run=: 36 | 37 | # In the cases where this matters, `missing' is being run in the 38 | # srcdir already. 39 | if test -f configure.ac; then 40 | configure_ac=configure.ac 41 | else 42 | configure_ac=configure.in 43 | fi 44 | 45 | msg="missing on your system" 46 | 47 | case "$1" in 48 | --run) 49 | # Try to run requested program, and just exit if it succeeds. 50 | run= 51 | shift 52 | "$@" && exit 0 53 | # Exit code 63 means version mismatch. This often happens 54 | # when the user try to use an ancient version of a tool on 55 | # a file that requires a minimum version. In this case we 56 | # we should proceed has if the program had been absent, or 57 | # if --run hadn't been passed. 58 | if test $? = 63; then 59 | run=: 60 | msg="probably too old" 61 | fi 62 | ;; 63 | 64 | -h|--h|--he|--hel|--help) 65 | echo "\ 66 | $0 [OPTION]... PROGRAM [ARGUMENT]... 67 | 68 | Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an 69 | error status if there is no known handling for PROGRAM. 70 | 71 | Options: 72 | -h, --help display this help and exit 73 | -v, --version output version information and exit 74 | --run try to run the given command, and emulate it if it fails 75 | 76 | Supported PROGRAM values: 77 | aclocal touch file \`aclocal.m4' 78 | autoconf touch file \`configure' 79 | autoheader touch file \`config.h.in' 80 | automake touch all \`Makefile.in' files 81 | bison create \`y.tab.[ch]', if possible, from existing .[ch] 82 | flex create \`lex.yy.c', if possible, from existing .c 83 | help2man touch the output file 84 | lex create \`lex.yy.c', if possible, from existing .c 85 | makeinfo touch the output file 86 | tar try tar, gnutar, gtar, then tar without non-portable flags 87 | yacc create \`y.tab.[ch]', if possible, from existing .[ch] 88 | 89 | Send bug reports to ." 90 | exit $? 91 | ;; 92 | 93 | -v|--v|--ve|--ver|--vers|--versi|--versio|--version) 94 | echo "missing $scriptversion (GNU Automake)" 95 | exit $? 96 | ;; 97 | 98 | -*) 99 | echo 1>&2 "$0: Unknown \`$1' option" 100 | echo 1>&2 "Try \`$0 --help' for more information" 101 | exit 1 102 | ;; 103 | 104 | esac 105 | 106 | # Now exit if we have it, but it failed. Also exit now if we 107 | # don't have it and --version was passed (most likely to detect 108 | # the program). 109 | case "$1" in 110 | lex|yacc) 111 | # Not GNU programs, they don't have --version. 112 | ;; 113 | 114 | tar) 115 | if test -n "$run"; then 116 | echo 1>&2 "ERROR: \`tar' requires --run" 117 | exit 1 118 | elif test "x$2" = "x--version" || test "x$2" = "x--help"; then 119 | exit 1 120 | fi 121 | ;; 122 | 123 | *) 124 | if test -z "$run" && ($1 --version) > /dev/null 2>&1; then 125 | # We have it, but it failed. 126 | exit 1 127 | elif test "x$2" = "x--version" || test "x$2" = "x--help"; then 128 | # Could not run --version or --help. This is probably someone 129 | # running `$TOOL --version' or `$TOOL --help' to check whether 130 | # $TOOL exists and not knowing $TOOL uses missing. 131 | exit 1 132 | fi 133 | ;; 134 | esac 135 | 136 | # If it does not exist, or fails to run (possibly an outdated version), 137 | # try to emulate it. 138 | case "$1" in 139 | aclocal*) 140 | echo 1>&2 "\ 141 | WARNING: \`$1' is $msg. You should only need it if 142 | you modified \`acinclude.m4' or \`${configure_ac}'. You might want 143 | to install the \`Automake' and \`Perl' packages. Grab them from 144 | any GNU archive site." 145 | touch aclocal.m4 146 | ;; 147 | 148 | autoconf) 149 | echo 1>&2 "\ 150 | WARNING: \`$1' is $msg. You should only need it if 151 | you modified \`${configure_ac}'. You might want to install the 152 | \`Autoconf' and \`GNU m4' packages. Grab them from any GNU 153 | archive site." 154 | touch configure 155 | ;; 156 | 157 | autoheader) 158 | echo 1>&2 "\ 159 | WARNING: \`$1' is $msg. You should only need it if 160 | you modified \`acconfig.h' or \`${configure_ac}'. You might want 161 | to install the \`Autoconf' and \`GNU m4' packages. Grab them 162 | from any GNU archive site." 163 | files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` 164 | test -z "$files" && files="config.h" 165 | touch_files= 166 | for f in $files; do 167 | case "$f" in 168 | *:*) touch_files="$touch_files "`echo "$f" | 169 | sed -e 's/^[^:]*://' -e 's/:.*//'`;; 170 | *) touch_files="$touch_files $f.in";; 171 | esac 172 | done 173 | touch $touch_files 174 | ;; 175 | 176 | automake*) 177 | echo 1>&2 "\ 178 | WARNING: \`$1' is $msg. You should only need it if 179 | you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'. 180 | You might want to install the \`Automake' and \`Perl' packages. 181 | Grab them from any GNU archive site." 182 | find . -type f -name Makefile.am -print | 183 | sed 's/\.am$/.in/' | 184 | while read f; do touch "$f"; done 185 | ;; 186 | 187 | autom4te) 188 | echo 1>&2 "\ 189 | WARNING: \`$1' is needed, but is $msg. 190 | You might have modified some files without having the 191 | proper tools for further handling them. 192 | You can get \`$1' as part of \`Autoconf' from any GNU 193 | archive site." 194 | 195 | file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'` 196 | test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'` 197 | if test -f "$file"; then 198 | touch $file 199 | else 200 | test -z "$file" || exec >$file 201 | echo "#! /bin/sh" 202 | echo "# Created by GNU Automake missing as a replacement of" 203 | echo "# $ $@" 204 | echo "exit 0" 205 | chmod +x $file 206 | exit 1 207 | fi 208 | ;; 209 | 210 | bison|yacc) 211 | echo 1>&2 "\ 212 | WARNING: \`$1' $msg. You should only need it if 213 | you modified a \`.y' file. You may need the \`Bison' package 214 | in order for those modifications to take effect. You can get 215 | \`Bison' from any GNU archive site." 216 | rm -f y.tab.c y.tab.h 217 | if [ $# -ne 1 ]; then 218 | eval LASTARG="\${$#}" 219 | case "$LASTARG" in 220 | *.y) 221 | SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` 222 | if [ -f "$SRCFILE" ]; then 223 | cp "$SRCFILE" y.tab.c 224 | fi 225 | SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` 226 | if [ -f "$SRCFILE" ]; then 227 | cp "$SRCFILE" y.tab.h 228 | fi 229 | ;; 230 | esac 231 | fi 232 | if [ ! -f y.tab.h ]; then 233 | echo >y.tab.h 234 | fi 235 | if [ ! -f y.tab.c ]; then 236 | echo 'main() { return 0; }' >y.tab.c 237 | fi 238 | ;; 239 | 240 | lex|flex) 241 | echo 1>&2 "\ 242 | WARNING: \`$1' is $msg. You should only need it if 243 | you modified a \`.l' file. You may need the \`Flex' package 244 | in order for those modifications to take effect. You can get 245 | \`Flex' from any GNU archive site." 246 | rm -f lex.yy.c 247 | if [ $# -ne 1 ]; then 248 | eval LASTARG="\${$#}" 249 | case "$LASTARG" in 250 | *.l) 251 | SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` 252 | if [ -f "$SRCFILE" ]; then 253 | cp "$SRCFILE" lex.yy.c 254 | fi 255 | ;; 256 | esac 257 | fi 258 | if [ ! -f lex.yy.c ]; then 259 | echo 'main() { return 0; }' >lex.yy.c 260 | fi 261 | ;; 262 | 263 | help2man) 264 | echo 1>&2 "\ 265 | WARNING: \`$1' is $msg. You should only need it if 266 | you modified a dependency of a manual page. You may need the 267 | \`Help2man' package in order for those modifications to take 268 | effect. You can get \`Help2man' from any GNU archive site." 269 | 270 | file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` 271 | if test -z "$file"; then 272 | file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'` 273 | fi 274 | if [ -f "$file" ]; then 275 | touch $file 276 | else 277 | test -z "$file" || exec >$file 278 | echo ".ab help2man is required to generate this page" 279 | exit 1 280 | fi 281 | ;; 282 | 283 | makeinfo) 284 | echo 1>&2 "\ 285 | WARNING: \`$1' is $msg. You should only need it if 286 | you modified a \`.texi' or \`.texinfo' file, or any other file 287 | indirectly affecting the aspect of the manual. The spurious 288 | call might also be the consequence of using a buggy \`make' (AIX, 289 | DU, IRIX). You might want to install the \`Texinfo' package or 290 | the \`GNU make' package. Grab either from any GNU archive site." 291 | # The file to touch is that specified with -o ... 292 | file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` 293 | if test -z "$file"; then 294 | # ... or it is the one specified with @setfilename ... 295 | infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` 296 | file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $infile` 297 | # ... or it is derived from the source name (dir/f.texi becomes f.info) 298 | test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info 299 | fi 300 | # If the file does not exist, the user really needs makeinfo; 301 | # let's fail without touching anything. 302 | test -f $file || exit 1 303 | touch $file 304 | ;; 305 | 306 | tar) 307 | shift 308 | 309 | # We have already tried tar in the generic part. 310 | # Look for gnutar/gtar before invocation to avoid ugly error 311 | # messages. 312 | if (gnutar --version > /dev/null 2>&1); then 313 | gnutar "$@" && exit 0 314 | fi 315 | if (gtar --version > /dev/null 2>&1); then 316 | gtar "$@" && exit 0 317 | fi 318 | firstarg="$1" 319 | if shift; then 320 | case "$firstarg" in 321 | *o*) 322 | firstarg=`echo "$firstarg" | sed s/o//` 323 | tar "$firstarg" "$@" && exit 0 324 | ;; 325 | esac 326 | case "$firstarg" in 327 | *h*) 328 | firstarg=`echo "$firstarg" | sed s/h//` 329 | tar "$firstarg" "$@" && exit 0 330 | ;; 331 | esac 332 | fi 333 | 334 | echo 1>&2 "\ 335 | WARNING: I can't seem to be able to run \`tar' with the given arguments. 336 | You may want to install GNU tar or Free paxutils, or check the 337 | command line arguments." 338 | exit 1 339 | ;; 340 | 341 | *) 342 | echo 1>&2 "\ 343 | WARNING: \`$1' is needed, and is $msg. 344 | You might have modified some files without having the 345 | proper tools for further handling them. Check the \`README' file, 346 | it often tells you about the needed prerequisites for installing 347 | this package. You may also peek at any GNU archive site, in case 348 | some other package would contain this missing \`$1' program." 349 | exit 1 350 | ;; 351 | esac 352 | 353 | exit 0 354 | 355 | # Local variables: 356 | # eval: (add-hook 'write-file-hooks 'time-stamp) 357 | # time-stamp-start: "scriptversion=" 358 | # time-stamp-format: "%:y-%02m-%02d.%02H" 359 | # time-stamp-end: "$" 360 | # End: 361 | -------------------------------------------------------------------------------- /mkinstalldirs: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # mkinstalldirs --- make directory hierarchy 3 | 4 | scriptversion=2005-06-29.22 5 | 6 | # Original author: Noah Friedman 7 | # Created: 1993-05-16 8 | # Public domain. 9 | # 10 | # This file is maintained in Automake, please report 11 | # bugs to or send patches to 12 | # . 13 | 14 | errstatus=0 15 | dirmode= 16 | 17 | usage="\ 18 | Usage: mkinstalldirs [-h] [--help] [--version] [-m MODE] DIR ... 19 | 20 | Create each directory DIR (with mode MODE, if specified), including all 21 | leading file name components. 22 | 23 | Report bugs to ." 24 | 25 | # process command line arguments 26 | while test $# -gt 0 ; do 27 | case $1 in 28 | -h | --help | --h*) # -h for help 29 | echo "$usage" 30 | exit $? 31 | ;; 32 | -m) # -m PERM arg 33 | shift 34 | test $# -eq 0 && { echo "$usage" 1>&2; exit 1; } 35 | dirmode=$1 36 | shift 37 | ;; 38 | --version) 39 | echo "$0 $scriptversion" 40 | exit $? 41 | ;; 42 | --) # stop option processing 43 | shift 44 | break 45 | ;; 46 | -*) # unknown option 47 | echo "$usage" 1>&2 48 | exit 1 49 | ;; 50 | *) # first non-opt arg 51 | break 52 | ;; 53 | esac 54 | done 55 | 56 | for file 57 | do 58 | if test -d "$file"; then 59 | shift 60 | else 61 | break 62 | fi 63 | done 64 | 65 | case $# in 66 | 0) exit 0 ;; 67 | esac 68 | 69 | # Solaris 8's mkdir -p isn't thread-safe. If you mkdir -p a/b and 70 | # mkdir -p a/c at the same time, both will detect that a is missing, 71 | # one will create a, then the other will try to create a and die with 72 | # a "File exists" error. This is a problem when calling mkinstalldirs 73 | # from a parallel make. We use --version in the probe to restrict 74 | # ourselves to GNU mkdir, which is thread-safe. 75 | case $dirmode in 76 | '') 77 | if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then 78 | echo "mkdir -p -- $*" 79 | exec mkdir -p -- "$@" 80 | else 81 | # On NextStep and OpenStep, the `mkdir' command does not 82 | # recognize any option. It will interpret all options as 83 | # directories to create, and then abort because `.' already 84 | # exists. 85 | test -d ./-p && rmdir ./-p 86 | test -d ./--version && rmdir ./--version 87 | fi 88 | ;; 89 | *) 90 | if mkdir -m "$dirmode" -p --version . >/dev/null 2>&1 && 91 | test ! -d ./--version; then 92 | echo "mkdir -m $dirmode -p -- $*" 93 | exec mkdir -m "$dirmode" -p -- "$@" 94 | else 95 | # Clean up after NextStep and OpenStep mkdir. 96 | for d in ./-m ./-p ./--version "./$dirmode"; 97 | do 98 | test -d $d && rmdir $d 99 | done 100 | fi 101 | ;; 102 | esac 103 | 104 | for file 105 | do 106 | case $file in 107 | /*) pathcomp=/ ;; 108 | *) pathcomp= ;; 109 | esac 110 | oIFS=$IFS 111 | IFS=/ 112 | set fnord $file 113 | shift 114 | IFS=$oIFS 115 | 116 | for d 117 | do 118 | test "x$d" = x && continue 119 | 120 | pathcomp=$pathcomp$d 121 | case $pathcomp in 122 | -*) pathcomp=./$pathcomp ;; 123 | esac 124 | 125 | if test ! -d "$pathcomp"; then 126 | echo "mkdir $pathcomp" 127 | 128 | mkdir "$pathcomp" || lasterr=$? 129 | 130 | if test ! -d "$pathcomp"; then 131 | errstatus=$lasterr 132 | else 133 | if test ! -z "$dirmode"; then 134 | echo "chmod $dirmode $pathcomp" 135 | lasterr= 136 | chmod "$dirmode" "$pathcomp" || lasterr=$? 137 | 138 | if test ! -z "$lasterr"; then 139 | errstatus=$lasterr 140 | fi 141 | fi 142 | fi 143 | fi 144 | 145 | pathcomp=$pathcomp/ 146 | done 147 | done 148 | 149 | exit $errstatus 150 | 151 | # Local Variables: 152 | # mode: shell-script 153 | # sh-indentation: 2 154 | # eval: (add-hook 'write-file-hooks 'time-stamp) 155 | # time-stamp-start: "scriptversion=" 156 | # time-stamp-format: "%:y-%02m-%02d.%02H" 157 | # time-stamp-end: "$" 158 | # End: 159 | -------------------------------------------------------------------------------- /packages/deb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # This takes one commandline argument, the name of the package. If no 4 | # name is given, then we'll end up just using the name associated with 5 | # an arbitrary .tar.gz file in the rootdir. That's fine: there's probably 6 | # only one. 7 | # 8 | # Run this from the 'packages' directory, just under rootdir 9 | 10 | ## Set LIB to lib if exporting a library, empty-string else 11 | LIB= 12 | #LIB=lib 13 | 14 | PACKAGE="$1" 15 | VERSION="$2" 16 | 17 | # We can only build Debian packages, if the Debian build tools are installed 18 | if [ \! -x /usr/bin/debuild ]; then 19 | echo "Cannot find /usr/bin/debuild. Not building Debian packages." 1>&2 20 | exit 0 21 | fi 22 | 23 | # Double-check we're in the packages directory, just under rootdir 24 | if [ \! -r ../Makefile -a \! -r ../INSTALL ]; then 25 | echo "Must run $0 in the 'packages' directory, under the root directory." 1>&2 26 | echo "Also, you must run \"make dist\" before running this script." 1>&2 27 | exit 0 28 | fi 29 | 30 | # Find the top directory for this package 31 | topdir="${PWD%/*}" 32 | 33 | # Find the tar archive built by "make dist" 34 | archive="${PACKAGE}-${VERSION}" 35 | archive_with_underscore="${PACKAGE}_${VERSION}" 36 | if [ -z "${archive}" ]; then 37 | echo "Cannot find ../$PACKAGE*.tar.gz. Run \"make dist\" first." 1>&2 38 | exit 0 39 | fi 40 | 41 | # Create a pristine directory for building the Debian package files 42 | trap 'rm -rf '`pwd`/tmp'; exit $?' EXIT SIGHUP SIGINT SIGTERM 43 | 44 | rm -rf tmp 45 | mkdir -p tmp 46 | cd tmp 47 | 48 | # Debian has very specific requirements about the naming of build 49 | # directories, and tar archives. It also wants to write all generated 50 | # packages to the parent of the source directory. We accommodate these 51 | # requirements by building directly from the tar file. 52 | ln -s "${topdir}/${archive}.tar.gz" "${LIB}${archive}.orig.tar.gz" 53 | # Some version of debuilder want foo.orig.tar.gz with _ between versions. 54 | ln -s "${topdir}/${archive}.tar.gz" "${LIB}${archive_with_underscore}.orig.tar.gz" 55 | tar zfx "${LIB}${archive}.orig.tar.gz" 56 | [ -n "${LIB}" ] && mv "${archive}" "${LIB}${archive}" 57 | cd "${LIB}${archive}" 58 | # This is one of those 'specific requirements': where the deb control files live 59 | cp -a "packages/deb" "debian" 60 | 61 | # Now, we can call Debian's standard build tool 62 | debuild -uc -us 63 | cd ../.. # get back to the original top-level dir 64 | 65 | # We'll put the result in a subdirectory that's named after the OS version 66 | # we've made this .deb file for. 67 | destdir="debian-$(cat /etc/debian_version 2>/dev/null || echo UNKNOWN)" 68 | 69 | rm -rf "$destdir" 70 | mkdir -p "$destdir" 71 | mv $(find tmp -mindepth 1 -maxdepth 1 -type f) "$destdir" 72 | 73 | echo 74 | echo "The Debian package files are located in $PWD/$destdir" 75 | -------------------------------------------------------------------------------- /packages/deb/README: -------------------------------------------------------------------------------- 1 | Copyright 2008 Google Inc. All Rights Reserved. 2 | 3 | This directory contains an implementation of an html context scanner with no 4 | lookahead. Its purpose is to scan an html stream and provide context 5 | information at any point within the input stream. An example of a user of this 6 | scanner would be an auto escaping templating system, which would require 7 | html context information at very specific points within the html stream. The 8 | implementation is based on a simplified state machine of HTML4.1. 9 | -------------------------------------------------------------------------------- /packages/deb/changelog: -------------------------------------------------------------------------------- 1 | streamhtmlparser (0.1-1) unstable; urgency=low 2 | 3 | * Initial release. 4 | 5 | -- Google Inc. Thu, 19 Mar 2009 21:22:41 +0100 6 | -------------------------------------------------------------------------------- /packages/deb/compat: -------------------------------------------------------------------------------- 1 | 4 2 | -------------------------------------------------------------------------------- /packages/deb/control: -------------------------------------------------------------------------------- 1 | Source: streamhtmlparser 2 | Section: libdevel 3 | Priority: optional 4 | Maintainer: Google Inc. 5 | Standards-Version: 3.6.1 6 | 7 | Package: libstreamhtmlparser-dev 8 | Section: libdevel 9 | Architecture: any 10 | Depends: libstreamhtmlparser0 (= ${Source-Version}) 11 | Description: Implementation of an html context scanner with no lookahead. Its 12 | purpose is to scan an html stream and provide context information at any point 13 | within the input stream. An example of a user of this scanner would be an auto 14 | escaping templating system, which would require html context information at 15 | very specific points within the html stream. The implementation is based on a 16 | simplified state machine of HTML4.1. 17 | 18 | Package: libstreamhtmlparser0 19 | Section: libs 20 | Architecture: any 21 | Description: Implementation of an html context scanner with no lookahead. Its 22 | purpose is to scan an html stream and provide context information at any point 23 | within the input stream. An example of a user of this scanner would be an auto 24 | escaping templating system, which would require html context information at 25 | very specific points within the html stream. The implementation is based on a 26 | simplified state machine of HTML4.1. 27 | -------------------------------------------------------------------------------- /packages/deb/copyright: -------------------------------------------------------------------------------- 1 | This package was debianized by Google Inc. on 2 | 19 March 2009. 3 | 4 | It was downloaded from http://code.google.com/ 5 | 6 | Upstream Author: opensource@google.com 7 | 8 | Copyright (c) 2009, Google Inc. 9 | All rights reserved. 10 | 11 | Redistribution and use in source and binary forms, with or without 12 | modification, are permitted provided that the following conditions are 13 | met: 14 | 15 | * Redistributions of source code must retain the above copyright 16 | notice, this list of conditions and the following disclaimer. 17 | * Redistributions in binary form must reproduce the above 18 | copyright notice, this list of conditions and the following disclaimer 19 | in the documentation and/or other materials provided with the 20 | distribution. 21 | * Neither the name of Google Inc. nor the names of its 22 | contributors may be used to endorse or promote products derived from 23 | this software without specific prior written permission. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 | -------------------------------------------------------------------------------- /packages/deb/docs: -------------------------------------------------------------------------------- 1 | AUTHORS 2 | COPYING 3 | ChangeLog 4 | INSTALL 5 | NEWS 6 | README 7 | -------------------------------------------------------------------------------- /packages/deb/libstreamhtmlparser-dev.dirs: -------------------------------------------------------------------------------- 1 | usr/lib 2 | usr/include/streamhtmlparser 3 | -------------------------------------------------------------------------------- /packages/deb/libstreamhtmlparser-dev.install: -------------------------------------------------------------------------------- 1 | usr/include/streamhtmlparser/* 2 | usr/lib/lib*.so 3 | usr/lib/lib*.a 4 | usr/lib/lib*.la 5 | debian/tmp/usr/include/streamhtmlparser/* 6 | debian/tmp/usr/lib/lib*.so 7 | debian/tmp/usr/lib/lib*.a 8 | debian/tmp/usr/lib/lib*.la 9 | -------------------------------------------------------------------------------- /packages/deb/libstreamhtmlparser0.dirs: -------------------------------------------------------------------------------- 1 | usr/lib 2 | -------------------------------------------------------------------------------- /packages/deb/libstreamhtmlparser0.install: -------------------------------------------------------------------------------- 1 | usr/lib/lib*.so.* 2 | debian/tmp/usr/lib/lib*.so.* 3 | -------------------------------------------------------------------------------- /packages/deb/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | # -*- makefile -*- 3 | # Sample debian/rules that uses debhelper. 4 | # This file was originally written by Joey Hess and Craig Small. 5 | # As a special exception, when this file is copied by dh-make into a 6 | # dh-make output file, you may use that output file without restriction. 7 | # This special exception was added by Craig Small in version 0.37 of dh-make. 8 | 9 | # Uncomment this to turn on verbose mode. 10 | #export DH_VERBOSE=1 11 | 12 | 13 | # These are used for cross-compiling and for saving the configure script 14 | # from having to guess our platform (since we know it already) 15 | DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) 16 | DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) 17 | 18 | 19 | CFLAGS = -Wall -g 20 | 21 | ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS))) 22 | CFLAGS += -O0 23 | else 24 | CFLAGS += -O2 25 | endif 26 | ifeq (,$(findstring nostrip,$(DEB_BUILD_OPTIONS))) 27 | INSTALL_PROGRAM += -s 28 | endif 29 | 30 | # shared library versions, option 1 31 | #version=2.0.5 32 | #major=2 33 | # option 2, assuming the library is created as src/.libs/libfoo.so.2.0.5 or so 34 | version=`ls src/.libs/lib*.so.* | \ 35 | awk '{if (match($$0,/[0-9]+\.[0-9]+\.[0-9]+$$/)) print substr($$0,RSTART)}'` 36 | major=`ls src/.libs/lib*.so.* | \ 37 | awk '{if (match($$0,/\.so\.[0-9]+$$/)) print substr($$0,RSTART+4)}'` 38 | 39 | config.status: configure 40 | dh_testdir 41 | # Add here commands to configure the package. 42 | CFLAGS="$(CFLAGS)" ./configure --host=$(DEB_HOST_GNU_TYPE) --build=$(DEB_BUILD_GNU_TYPE) --prefix=/usr --mandir=\$${prefix}/share/man --infodir=\$${prefix}/share/info 43 | 44 | 45 | build: build-stamp 46 | build-stamp: config.status 47 | dh_testdir 48 | 49 | # Add here commands to compile the package. 50 | $(MAKE) 51 | 52 | touch build-stamp 53 | 54 | clean: 55 | dh_testdir 56 | dh_testroot 57 | rm -f build-stamp 58 | 59 | # Add here commands to clean up after the build process. 60 | -$(MAKE) distclean 61 | ifneq "$(wildcard /usr/share/misc/config.sub)" "" 62 | cp -f /usr/share/misc/config.sub config.sub 63 | endif 64 | ifneq "$(wildcard /usr/share/misc/config.guess)" "" 65 | cp -f /usr/share/misc/config.guess config.guess 66 | endif 67 | 68 | 69 | dh_clean 70 | 71 | install: build 72 | dh_testdir 73 | dh_testroot 74 | dh_clean -k 75 | dh_installdirs 76 | 77 | # Add here commands to install the package into debian/tmp 78 | $(MAKE) install DESTDIR=$(CURDIR)/debian/tmp 79 | 80 | 81 | # Build architecture-independent files here. 82 | binary-indep: build install 83 | # We have nothing to do by default. 84 | 85 | # Build architecture-dependent files here. 86 | binary-arch: build install 87 | dh_testdir 88 | dh_testroot 89 | dh_installchangelogs ChangeLog 90 | dh_installdocs 91 | dh_installexamples 92 | dh_install --sourcedir=debian/tmp 93 | # dh_installmenu 94 | # dh_installdebconf 95 | # dh_installlogrotate 96 | # dh_installemacsen 97 | # dh_installpam 98 | # dh_installmime 99 | # dh_installinit 100 | # dh_installcron 101 | # dh_installinfo 102 | dh_installman 103 | dh_link 104 | dh_strip 105 | dh_compress 106 | dh_fixperms 107 | # dh_perl 108 | # dh_python 109 | dh_makeshlibs 110 | dh_installdeb 111 | dh_shlibdeps 112 | dh_gencontrol 113 | dh_md5sums 114 | dh_builddeb 115 | 116 | binary: binary-indep binary-arch 117 | .PHONY: build clean binary-indep binary-arch binary install 118 | -------------------------------------------------------------------------------- /packages/rpm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | # Run this from the 'packages' directory, just under rootdir 4 | 5 | # We can only build rpm packages, if the rpm build tools are installed 6 | if [ \! -x /usr/bin/rpmbuild ] 7 | then 8 | echo "Cannot find /usr/bin/rpmbuild. Not building an rpm." 1>&2 9 | exit 0 10 | fi 11 | 12 | # Check the commandline flags 13 | PACKAGE="$1" 14 | VERSION="$2" 15 | fullname="${PACKAGE}-${VERSION}" 16 | archive=../$fullname.tar.gz 17 | 18 | if [ -z "$1" -o -z "$2" ] 19 | then 20 | echo "Usage: $0 " 1>&2 21 | exit 0 22 | fi 23 | 24 | # Double-check we're in the packages directory, just under rootdir 25 | if [ \! -r ../Makefile -a \! -r ../INSTALL ] 26 | then 27 | echo "Must run $0 in the 'packages' directory, under the root directory." 1>&2 28 | echo "Also, you must run \"make dist\" before running this script." 1>&2 29 | exit 0 30 | fi 31 | 32 | if [ \! -r "$archive" ] 33 | then 34 | echo "Cannot find $archive. Run \"make dist\" first." 1>&2 35 | exit 0 36 | fi 37 | 38 | # Create the directory where the input lives, and where the output should live 39 | RPM_SOURCE_DIR="/tmp/rpmsource-$fullname" 40 | RPM_BUILD_DIR="/tmp/rpmbuild-$fullname" 41 | 42 | trap 'rm -rf $RPM_SOURCE_DIR $RPM_BUILD_DIR; exit $?' EXIT SIGHUP SIGINT SIGTERM 43 | 44 | rm -rf "$RPM_SOURCE_DIR" "$RPM_BUILD_DIR" 45 | mkdir "$RPM_SOURCE_DIR" 46 | mkdir "$RPM_BUILD_DIR" 47 | 48 | cp "$archive" "$RPM_SOURCE_DIR" 49 | 50 | # rpmbuild -- as far as I can tell -- asks the OS what CPU it has. 51 | # This may differ from what kind of binaries gcc produces. dpkg 52 | # does a better job of this, so if we can run 'dpkg --print-architecture' 53 | # to get the build CPU, we use that in preference of the rpmbuild 54 | # default. 55 | target=`dpkg --print-architecture 2>/dev/null` # "" if dpkg isn't found 56 | if [ -n "$target" ] 57 | then 58 | target=" --target $target" 59 | fi 60 | 61 | rpmbuild -bb rpm/rpm.spec $target \ 62 | --define "NAME $PACKAGE" \ 63 | --define "VERSION $VERSION" \ 64 | --define "_sourcedir $RPM_SOURCE_DIR" \ 65 | --define "_builddir $RPM_BUILD_DIR" \ 66 | --define "_rpmdir $RPM_SOURCE_DIR" 67 | 68 | # We put the output in a directory based on what system we've built for 69 | destdir=rpm-unknown 70 | if [ -r /etc/issue ] 71 | then 72 | grep "Red Hat.*release 7" /etc/issue >/dev/null 2>&1 && destdir=rh7 73 | grep "Red Hat.*release 8" /etc/issue >/dev/null 2>&1 && destdir=rh8 74 | grep "Red Hat.*release 9" /etc/issue >/dev/null 2>&1 && destdir=rh9 75 | if grep Fedora /etc/issue >/dev/null; then 76 | destdir=fc`grep Fedora /etc/issue | cut -d' ' -f 4`; 77 | fi 78 | fi 79 | 80 | rm -rf "$destdir" 81 | mkdir -p "$destdir" 82 | # We want to get not only the main package but devel etc, hence the middle * 83 | mv "$RPM_SOURCE_DIR"/*/"${PACKAGE}"-*"${VERSION}"*.rpm "$destdir" 84 | 85 | echo 86 | echo "The rpm package file(s) are located in $PWD/$destdir" 87 | -------------------------------------------------------------------------------- /packages/rpm/rpm.spec: -------------------------------------------------------------------------------- 1 | %define RELEASE 1 2 | %define rel %{?CUSTOM_RELEASE} %{!?CUSTOM_RELEASE:%RELEASE} 3 | %define prefix /usr 4 | 5 | Name: %NAME 6 | Summary: Streamming html context scanner in C 7 | Version: %VERSION 8 | Release: %rel 9 | Group: Development/Libraries 10 | URL: http://code.google.com/p/streamhtmlparser/ 11 | License: BSD 12 | Vendor: Google 13 | Packager: Google Inc. 14 | Source: http://%{NAME}.googlecode.com/files/%{NAME}-%{VERSION}.tar.gz 15 | Distribution: Redhat 7 and above. 16 | Buildroot: %{_tmppath}/%{name}-root 17 | Prefix: %prefix 18 | 19 | %description 20 | Implementation of an html context scanner with no lookahead. Its purpose is to 21 | scan an html stream and provide context information at any point within the 22 | input stream. An example of a user of this scanner would be an auto escaping 23 | templating system, which would require html context information at very 24 | specific points within the html stream. The implementation is based on a 25 | simplified state machine of HTML4.1. 26 | 27 | %package devel 28 | Summary: Streamming html context scanner in C 29 | Group: Development/Libraries 30 | Requires: %{NAME} = %{VERSION} 31 | 32 | %description devel 33 | Implementation of an html context scanner with no lookahead. Its purpose is to 34 | scan an html stream and provide context information at any point within the 35 | input stream. An example of a user of this scanner would be an auto escaping 36 | templating system, which would require html context information at very 37 | specific points within the html stream. The implementation is based on a 38 | simplified state machine of HTML4.1. 39 | 40 | %changelog 41 | * Thu Mar 19 2009 42 | - First draft 43 | 44 | %prep 45 | %setup 46 | 47 | %build 48 | %configure 49 | make 50 | 51 | %install 52 | rm -rf $RPM_BUILD_ROOT 53 | make DESTDIR=$RPM_BUILD_ROOT install 54 | 55 | %clean 56 | rm -rf $RPM_BUILD_ROOT 57 | 58 | %files 59 | %defattr(-,root,root) 60 | 61 | ## Mark all installed files within /usr/share/doc/{package name} as 62 | ## documentation (eg README, Changelog). This depends on the following 63 | ## two lines appearing in Makefile.am: 64 | ## docdir = $(prefix)/share/doc/$(PACKAGE)-$(VERSION) 65 | ## dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README 66 | %docdir %{prefix}/share/doc/%{NAME}-%{VERSION} 67 | %{prefix}/share/doc/%{NAME}-%{VERSION}/* 68 | ## This captures the rest of your documentation; the stuff in doc/ 69 | ## %doc doc/* 70 | 71 | %{_libdir}/* 72 | 73 | %files devel 74 | %defattr(-,root,root) 75 | %{prefix}/include/streamhtmlparser 76 | -------------------------------------------------------------------------------- /src/config.h.in: -------------------------------------------------------------------------------- 1 | /* src/config.h.in. Generated from configure.ac by autoheader. */ 2 | 3 | /* Namespace for Google classes */ 4 | #undef GOOGLE_NAMESPACE 5 | 6 | /* Define to 1 if you have the header file. */ 7 | #undef HAVE_DLFCN_H 8 | 9 | /* Define to 1 if you have the header file. */ 10 | #undef HAVE_INTTYPES_H 11 | 12 | /* Define to 1 if your system has a GNU libc compatible `malloc' function, and 13 | to 0 otherwise. */ 14 | #undef HAVE_MALLOC 15 | 16 | /* Define to 1 if you have the `memchr' function. */ 17 | #undef HAVE_MEMCHR 18 | 19 | /* Define to 1 if you have the header file. */ 20 | #undef HAVE_MEMORY_H 21 | 22 | /* define if the compiler implements namespaces */ 23 | #undef HAVE_NAMESPACES 24 | 25 | /* Define to 1 if stdbool.h conforms to C99. */ 26 | #undef HAVE_STDBOOL_H 27 | 28 | /* Define to 1 if you have the header file. */ 29 | #undef HAVE_STDINT_H 30 | 31 | /* Define to 1 if you have the header file. */ 32 | #undef HAVE_STDLIB_H 33 | 34 | /* Define to 1 if you have the `strcasecmp' function. */ 35 | #undef HAVE_STRCASECMP 36 | 37 | /* Define to 1 if you have the `strchr' function. */ 38 | #undef HAVE_STRCHR 39 | 40 | /* Define to 1 if you have the header file. */ 41 | #undef HAVE_STRINGS_H 42 | 43 | /* Define to 1 if you have the header file. */ 44 | #undef HAVE_STRING_H 45 | 46 | /* Define to 1 if you have the `strstr' function. */ 47 | #undef HAVE_STRSTR 48 | 49 | /* Define to 1 if you have the `strtol' function. */ 50 | #undef HAVE_STRTOL 51 | 52 | /* Define to 1 if you have the header file. */ 53 | #undef HAVE_SYS_STAT_H 54 | 55 | /* Define to 1 if you have the header file. */ 56 | #undef HAVE_SYS_TYPES_H 57 | 58 | /* Define to 1 if you have the header file. */ 59 | #undef HAVE_UNISTD_H 60 | 61 | /* Define to 1 if the system has the type `_Bool'. */ 62 | #undef HAVE__BOOL 63 | 64 | /* Name of package */ 65 | #undef PACKAGE 66 | 67 | /* Define to the address where bug reports for this package should be sent. */ 68 | #undef PACKAGE_BUGREPORT 69 | 70 | /* Define to the full name of this package. */ 71 | #undef PACKAGE_NAME 72 | 73 | /* Define to the full name and version of this package. */ 74 | #undef PACKAGE_STRING 75 | 76 | /* Define to the one symbol short name of this package. */ 77 | #undef PACKAGE_TARNAME 78 | 79 | /* Define to the version of this package. */ 80 | #undef PACKAGE_VERSION 81 | 82 | /* Define to 1 if you have the ANSI C header files. */ 83 | #undef STDC_HEADERS 84 | 85 | /* the namespace where STL code like vector<> is defined */ 86 | #undef STL_NAMESPACE 87 | 88 | /* Version number of package */ 89 | #undef VERSION 90 | 91 | /* Stops putting the code inside the Google namespace */ 92 | #undef _END_GOOGLE_NAMESPACE_ 93 | 94 | /* Puts following code inside the Google namespace */ 95 | #undef _START_GOOGLE_NAMESPACE_ 96 | 97 | /* Define to empty if `const' does not conform to ANSI C. */ 98 | #undef const 99 | 100 | /* Define to `__inline__' or `__inline' if that's what the C compiler 101 | calls it, or to nothing if 'inline' is not supported under any name. */ 102 | #ifndef __cplusplus 103 | #undef inline 104 | #endif 105 | 106 | /* Define to rpl_malloc if the replacement function should be used. */ 107 | #undef malloc 108 | 109 | /* Define to `unsigned' if does not define. */ 110 | #undef size_t 111 | -------------------------------------------------------------------------------- /src/htmlparser_fsm.config: -------------------------------------------------------------------------------- 1 | # Copyright 2008 Google Inc. All Rights Reserved. 2 | # Author: falmeida@google.com (Filipe Almeida) 3 | 4 | # TODO(falmeida): Add more descriptive names to the states and drop the 5 | # abbreviations. 6 | # TODO(falmeida): Reorder the states so that it's easier to read. 7 | # TODO(falmeida): Support CDATA blocks in the form: ', '>') 15 | condition('=', '=') 16 | 17 | # TODO(falmeida): This is not the correct expression. tag and attribute names 18 | # can only consist of alpha character. 19 | condition('id', 'A-Za-z0-9_:-') 20 | condition('idtag', 'A-Za-z0-9/_:-') 21 | 22 | # Whitespace according to: http://www.w3.org/TR/html401/struct/text.html#h-9.1 23 | condition('space', ' \t\n\r') 24 | condition('!', '!') 25 | condition('q', '\'') 26 | condition('dq', '\"') 27 | condition('/', '/') 28 | condition('*', '*') 29 | condition('-', '-') 30 | condition('?', '?') 31 | condition('lf', '\n') 32 | condition('quote', '\\') 33 | 34 | # TODO(falmeida): This default rule is a hack and shouldn't be here. 35 | condition('default', '[:default:]') 36 | 37 | state(name = 'text', 38 | external = 'text', 39 | transitions = [ 40 | ['<', 'tag_start'], 41 | ['default', 'text'] 42 | ]) 43 | 44 | # When we found the < character in text. 45 | # Tag opening is defined in the HTML5 draft here: 46 | # http://www.whatwg.org/specs/web-apps/current-work/#tag-open-state 47 | # We don't exactly follow this and are much more loose in order to mimic the way 48 | # the major browsers behave. 49 | state(name = 'tag_start', 50 | external = 'tag', 51 | transitions = [ 52 | ['idtag', 'tag_name'], 53 | ['?', 'pi'], 54 | ['!', 'declaration_start'], 55 | ['<', 'tag_start'], 56 | ['default', 'text'] 57 | ]) 58 | 59 | # Name of the tag. Includes the closing tag character '/'. 60 | state(name = 'tag_name', 61 | external = 'tag', 62 | transitions = [ 63 | ['idtag', 'tag_name'], 64 | ['space', 'tag_space'], 65 | ['>', 'tag_close'] 66 | ]) 67 | 68 | # HTML declaration and comment parsing 69 | # 70 | # We don't expose declaration state because at this point we only want to 71 | # ensure that we are parsing them correctly so we don't get out of sync. 72 | # This is specifically made for DOCTYPE declarations and won't work if DTD's 73 | # are defined inside the declaration. 74 | # The HTML5 spec says we should specificly look for the string '', 'text'], 86 | ['default', 'declaration_body'] 87 | ]) 88 | 89 | # Inside a declaration. Ie: ' 90 | state(name = 'declaration_body', 91 | external = 'text', 92 | transitions = [ 93 | ['>', 'text'], 94 | ['default', 'declaration_body'] 95 | ]) 96 | 97 | # Got '' 106 | state(name = 'comment_body', 107 | external = 'comment', 108 | transitions = [ 109 | ['-', 'comment_dash'], 110 | ['default', 'comment_body'] 111 | ]) 112 | 113 | # Got '-' inside a comment. 114 | state(name = 'comment_dash', 115 | external = 'comment', 116 | transitions = [ 117 | ['-', 'comment_dash_dash'], 118 | ['default', 'comment_body'] 119 | ]) 120 | 121 | # Got '--' inside a comment. 122 | state(name = 'comment_dash_dash', 123 | external = 'comment', 124 | transitions = [ 125 | ['-', 'comment_dash_dash'], 126 | ['>', 'text'], 127 | ['default', 'comment_body'] 128 | ]) 129 | 130 | # XML Processing instruction parsing according to: 131 | # http://www.w3.org/TR/REC-xml/#sec-pi 132 | # 133 | # Everything between the characters is considered to be part of the 134 | # processing instruction. 135 | state(name = 'pi', 136 | external = 'text', 137 | transitions = [ 138 | ['?', 'pi_may_end'], 139 | ['default', 'pi'] 140 | ]) 141 | 142 | state(name = 'pi_may_end', 143 | external = 'text', 144 | transitions = [ 145 | ['>', 'text'], 146 | ['default', 'pi'] 147 | ]) 148 | 149 | # Whitespace between tag name, attributes. 150 | state(name = 'tag_space', 151 | external = 'tag', 152 | transitions = [ 153 | ['>', 'tag_close'], 154 | ['space', 'tag_space'], 155 | ['id', 'attr'], 156 | ['/', 'tag_space'] 157 | ]) 158 | 159 | state(name = 'tag_close', 160 | external = 'text', 161 | transitions = [ 162 | ['<', 'tag_start'], 163 | ['default', 'text'] 164 | ]) 165 | 166 | # Name of the attribute. 167 | state(name = 'attr', 168 | external = 'attr', 169 | transitions = [ 170 | ['id', 'attr'], 171 | ['>', 'tag_close'], 172 | ['/', 'tag_space'], 173 | ['=', 'value'], 174 | ['space', 'attr_space'] 175 | ]) 176 | 177 | # After the attribute name. 178 | state(name = 'attr_space', 179 | external = 'attr', 180 | transitions = [ 181 | ['>', 'tag_close'], 182 | ['space', 'attr_space'], 183 | ['id', 'attr'], 184 | ['/', 'tag_space'], 185 | ['=', 'value'] 186 | ]) 187 | 188 | # Expecting a value, after attribute= 189 | state(name = 'value', 190 | external = 'value', 191 | transitions = [ 192 | ['q', 'value_q_start'], 193 | ['dq', 'value_dq_start'], 194 | ['space', 'value'], 195 | ['>', 'tag_close'], 196 | ['default', 'value_text'] 197 | ]) 198 | 199 | # Unquoted attribute value. 200 | state(name = 'value_text', 201 | external = 'value', 202 | transitions = [ 203 | ['>', 'tag_close'], 204 | ['space', 'tag_space'], 205 | ['default', 'value_text'] 206 | ]) 207 | 208 | # First character of a single quoted attribute value. 209 | state(name = 'value_q_start', 210 | external = 'value', 211 | transitions = [ 212 | ['q', 'tag_space'], 213 | ['default', 'value_q'] 214 | ]) 215 | 216 | # In the middle of a single quoted attribute value. 217 | state(name = 'value_q', 218 | external = 'value', 219 | transitions = [ 220 | ['q', 'tag_space'], 221 | ['default', 'value_q'] 222 | ]) 223 | 224 | # First character of a double quoted attribute value. 225 | state(name = 'value_dq_start', 226 | external = 'value', 227 | transitions = [ 228 | ['dq', 'tag_space'], 229 | ['default', 'value_dq'] 230 | ]) 231 | 232 | # In the middle of a double quoted attribute value. 233 | state(name = 'value_dq', 234 | external = 'value', 235 | transitions = [ 236 | ['dq', 'tag_space'], 237 | ['default', 'value_dq'] 238 | ]) 239 | 240 | # CDATA escaping text spans. 241 | # TODO(falmeida): These states should go after cdata_text. 242 | 243 | # Got '', 'cdata_text'], 281 | ['default', 'cdata_comment_body'] 282 | ]) 283 | 284 | # CDATA processing 285 | # 286 | # To simplify the code, we treat RCDATA and CDATA sections the same since the 287 | # differences between them don't affect the context we are in. 288 | state(name = 'cdata_text', 289 | external = 'text', 290 | transitions = [ 291 | ['<', 'cdata_lt'], 292 | ['default', 'cdata_text'] 293 | ]) 294 | 295 | # Possible beginning of the closing tag. 296 | state(name = 'cdata_lt', 297 | external = 'text', 298 | transitions = [ 299 | ['/', 'cdata_may_close'], 300 | ['!', 'cdata_comment_start'], 301 | ['default', 'cdata_text'] 302 | ]) 303 | 304 | # If we encounter ', 'text'], 311 | ['space', 'tag_space'], 312 | ['default', 'cdata_text'] 313 | ]) 314 | 315 | # The next states are used for specialized parser modes. 316 | state(name = 'js_file', 317 | external = 'js_file', 318 | transitions = [ 319 | ['default', 'js_file'] 320 | ]) 321 | 322 | # TODO(falmeida): Having css_file and js_file as the external name doesn't make 323 | # sense. This should instead be text and the js/css state be 324 | # returned by # in_js() and in_css(). 325 | state(name = 'css_file', 326 | external = 'css_file', 327 | transitions = [ 328 | ['default', 'css_file'] 329 | ]) 330 | 331 | state(name = 'null', 332 | external = 'text', 333 | transitions = [ 334 | ['default', 'null'] 335 | ]) 336 | 337 | -------------------------------------------------------------------------------- /src/jsparser_fsm.config: -------------------------------------------------------------------------------- 1 | # Copyright 2008 Google Inc. All Rights Reserved. 2 | # Author: falmeida@google.com (Filipe Almeida) 3 | 4 | name = 'jsparser' 5 | 6 | comment = 'Simplified finite state machine for tracking of javascript states' 7 | 8 | condition('q', '\''), 9 | condition('dq', '\"'), 10 | condition('/', '/'), 11 | condition('*', '*'), 12 | condition('[', '['), 13 | condition(']', ']'), 14 | condition('lf', '\n'), 15 | condition('backslash', '\\'), 16 | condition('default', '[:default:]') 17 | 18 | # Main javascript body. 19 | state(name = 'js_text', 20 | external = 'text', 21 | transitions = [ 22 | ['q', 'js_q'], 23 | ['dq', 'js_dq'], 24 | ['/', 'js_slash'], 25 | ['default', 'js_text'] 26 | ]) 27 | 28 | # Single quoted string literal. 29 | state(name = 'js_q', 30 | external = 'q', 31 | transitions = [ 32 | ['backslash', 'js_q_e'], 33 | ['q', 'js_text'], 34 | ['default', 'js_q'] 35 | ]) 36 | 37 | # Javascript escaped character in a single quoted string literal. 38 | state(name = 'js_q_e', 39 | external = 'q', 40 | transitions = [ 41 | ['default', 'js_q'] 42 | ]) 43 | 44 | # Double quoted string literal 45 | state(name = 'js_dq', 46 | external = 'dq', 47 | transitions = [ 48 | ['backslash', 'js_dq_e'], 49 | ['dq', 'js_text'], 50 | ['default', 'js_dq'] 51 | ]) 52 | 53 | # Javascript escaped character in a double quoted string literal. 54 | state(name = 'js_dq_e', 55 | external = 'dq', 56 | transitions = [ 57 | ['default', 'js_dq'] 58 | ]) 59 | 60 | # Possible start of a javascript comment. 61 | state(name = 'js_slash', 62 | external = 'text', 63 | transitions = [ 64 | ['/', 'js_comment_ln'], 65 | ['*', 'js_comment_ml'], 66 | ['default', 'js_text'] 67 | ]) 68 | 69 | # Possible start of a regular expression literal. 70 | # 71 | # The state diagram does not reach this state directly. When js_slash is 72 | # reached, the function enter_state_js_slash() is called, which checks if the 73 | # last token belongs to the set of tokens that can precede a regular 74 | # expression, in which case it changes the state to js_regexp_slash. 75 | # 76 | # For more information please read the comments in 77 | # jsparser.c:enter_state_js_slash(). 78 | state(name = 'js_regexp_slash', 79 | external = 'text', 80 | transitions = [ 81 | ['/', 'js_comment_ln'], 82 | ['*', 'js_comment_ml'], 83 | ['backslash', 'js_regexp_e'], 84 | ['[', 'js_regexp_bracket'], 85 | ['default', 'js_regexp'] 86 | ]) 87 | 88 | # Regular expression literal. 89 | state(name = 'js_regexp', 90 | external = 'regexp', 91 | transitions = [ 92 | ['backslash', 'js_regexp_e'], 93 | ['[', 'js_regexp_bracket'], 94 | ['/', 'js_text'], 95 | ['default', 'js_regexp'] 96 | ]) 97 | 98 | # Regexp bracket expression 99 | state(name = 'js_regexp_bracket', 100 | external = 'regexp', 101 | transitions = [ 102 | ['backslash', 'js_regexp_bracket_e'], 103 | [']', 'js_regexp'], 104 | ['default', 'js_regexp_bracket'] 105 | ]) 106 | 107 | # Backslash escaped regexp bracket expression 108 | state(name = 'js_regexp_bracket_e', 109 | external = 'regexp', 110 | transitions = [ 111 | ['default', 'js_regexp_bracket'] 112 | ]) 113 | 114 | # Escaped regular expression char. 115 | state(name = 'js_regexp_e', 116 | external = 'regexp', 117 | transitions = [ 118 | ['default', 'js_regexp'] 119 | ]) 120 | 121 | # Start of a single line javascript comment (//). 122 | state(name = 'js_comment_ln', 123 | external = 'comment', 124 | transitions = [ 125 | ['lf', 'js_comment_after'], 126 | ['default', 'js_comment_ln'] 127 | ]) 128 | 129 | # Start of a multiline javascript comment (/*). 130 | state(name = 'js_comment_ml', 131 | external = 'comment', 132 | transitions = [ 133 | ['*', 'js_comment_ml_close'], 134 | ['default', 'js_comment_ml'] 135 | ]) 136 | 137 | # Close of a multiline javascript comment (*/). 138 | state(name = 'js_comment_ml_close', 139 | external = 'comment', 140 | transitions = [ 141 | ['/', 'js_comment_after'], 142 | ['default', 'js_comment_ml'] 143 | ]) 144 | 145 | # Ending character of a javascript comment. 146 | # In can either be a '/ in the case of a multiline comment, or a line 147 | # terminator in the case of a single line comment. 148 | # This is needed so we don't insert the '/' or the new line character into the 149 | # ring buffer. 150 | state(name = 'js_comment_after', 151 | external = 'text', 152 | transitions = [ 153 | ['q', 'js_q'], 154 | ['dq', 'js_dq'], 155 | ['/', 'js_slash'], 156 | ['default', 'js_text'] 157 | ]) 158 | -------------------------------------------------------------------------------- /src/streamhtmlparser/htmlparser_cpp.h.in: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2008, Google Inc. 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | // 30 | // --- 31 | // Author: Filipe Almeida 32 | // 33 | // c++ bindings for htmlparser. 34 | 35 | #ifndef STREAMHTMLPARSER_HTMLPARSER_CPP_H__ 36 | #define STREAMHTMLPARSER_HTMLPARSER_CPP_H__ 37 | 38 | #include 39 | #include 40 | extern "C" { 41 | #include 42 | #include 43 | } 44 | 45 | @ac_google_start_namespace@ 46 | 47 | class JavascriptParser { 48 | public: 49 | enum State { 50 | STATE_TEXT = JSPARSER_STATE_TEXT, 51 | STATE_Q = JSPARSER_STATE_Q, 52 | STATE_DQ = JSPARSER_STATE_DQ, 53 | STATE_REGEXP = JSPARSER_STATE_REGEXP, 54 | STATE_COMMENT = JSPARSER_STATE_COMMENT, 55 | }; 56 | }; 57 | 58 | class HtmlParser { 59 | public: 60 | 61 | /* html states */ 62 | enum State { 63 | STATE_TEXT = HTMLPARSER_STATE_TEXT, 64 | STATE_TAG = HTMLPARSER_STATE_TAG, 65 | STATE_ATTR = HTMLPARSER_STATE_ATTR, 66 | STATE_VALUE = HTMLPARSER_STATE_VALUE, 67 | STATE_COMMENT = HTMLPARSER_STATE_COMMENT, 68 | STATE_JS_FILE = HTMLPARSER_STATE_JS_FILE, 69 | STATE_CSS_FILE = HTMLPARSER_STATE_CSS_FILE, 70 | STATE_ERROR = HTMLPARSER_STATE_ERROR 71 | }; 72 | 73 | /* attribute types */ 74 | enum AttributeType { 75 | ATTR_NONE = HTMLPARSER_ATTR_NONE, 76 | ATTR_REGULAR = HTMLPARSER_ATTR_REGULAR, 77 | ATTR_URI = HTMLPARSER_ATTR_URI, 78 | ATTR_JS = HTMLPARSER_ATTR_JS, 79 | ATTR_STYLE = HTMLPARSER_ATTR_STYLE 80 | }; 81 | 82 | /* Parser modes */ 83 | enum Mode { 84 | MODE_HTML = HTMLPARSER_MODE_HTML, 85 | MODE_JS = HTMLPARSER_MODE_JS, 86 | MODE_CSS = HTMLPARSER_MODE_CSS, 87 | MODE_HTML_IN_TAG = HTMLPARSER_MODE_HTML_IN_TAG 88 | }; 89 | 90 | HtmlParser() { 91 | parser_ = htmlparser_new(); 92 | assert(parser_ != NULL); 93 | }; 94 | 95 | /* Parses the input html stream and returns the finishing state. 96 | * 97 | * Returns HtmlParser::STATE_ERROR if unable to parse the input. If 98 | * htmlparser_parse() is called after an error situation was encountered 99 | * the behaviour is unspecified. At this point, Reset() or ResetMode() 100 | * can be called to reset the state so it can be used to parse a new file. 101 | */ 102 | int Parse(const char *str, int len) { 103 | return htmlparser_parse(parser_, str, len); 104 | }; 105 | 106 | int Parse(const std::string &str) { 107 | return Parse(str.c_str(), static_cast(str.length())); 108 | }; 109 | 110 | /* Returns the current state the parser is in */ 111 | int state() const { 112 | return htmlparser_state(parser_); 113 | }; 114 | 115 | /* Returns the current tag or NULL if not available. 116 | * 117 | * There is no stack implemented because we currently don't have a need for 118 | * it, which means tag names are tracked only one level deep. 119 | * 120 | * This is better understood by looking at the following example: 121 | * 122 | * 123 | * [tag=b] 124 | * 125 | * [tag=i] 126 | * 127 | * [tag=NULL] 128 | * 129 | * 130 | * The tag is correctly filled inside the tag itself and before any new 131 | * inner tag is closed, at which point the tag will be set to NULL. 132 | * 133 | * For our current purposes this is not a problem, but we may implement a 134 | * tag tracking stack in the future for completeness. 135 | */ 136 | const char *tag() const { 137 | return htmlparser_tag(parser_); 138 | } 139 | 140 | /* Returns the current attribute name if inside an attribute name or an 141 | * attribute value. Returns NULL otherwise. */ 142 | const char *attribute() const { 143 | return htmlparser_attr(parser_); 144 | } 145 | 146 | /* Returns the contents of the current attribute value. */ 147 | const char *value() const { 148 | return htmlparser_value(parser_); 149 | } 150 | 151 | /* Returns true if inside javascript. This can be a javascript block, a 152 | * javascript attribute value or the parser may just be in javascript mode 153 | * (HtmlParser::MODE_JS) */ 154 | bool InJavascript() const { 155 | return static_cast(htmlparser_in_js(parser_)); 156 | } 157 | 158 | /* Returns true if the parser is currently inside a CSS construct. 159 | * 160 | * Currently this can be either a STYLE tag, a STYLE attribute or the fact 161 | * that the parser was reset using MODE_CSS using ResetMode(). 162 | */ 163 | bool InCss() const { 164 | return static_cast(htmlparser_in_css(parser_)); 165 | } 166 | 167 | /* Returns true if the current attribute is quoted */ 168 | bool IsAttributeQuoted() const { 169 | return static_cast(htmlparser_is_attr_quoted(parser_)); 170 | } 171 | 172 | /* Returns true if the parser is inside a js string literal. 173 | */ 174 | bool IsJavascriptQuoted() const { 175 | return static_cast(htmlparser_is_js_quoted(parser_)); 176 | } 177 | 178 | /* Returns the index within the current value or -1 if the parser is not 179 | * inside an attribute value */ 180 | int ValueIndex() const { 181 | return htmlparser_value_index(parser_); 182 | } 183 | 184 | /* Returns true if this is the first character of a url inside an attribute. 185 | * 186 | * This function can be used by an html sanitizer or auto escaping system as 187 | * a hint that it should validate the url for a whitelist of protocol 188 | * handlers and for well-formedness, or that it should just escape a 189 | * component of it. 190 | * 191 | * For attributes that expect a url this will return true if we are at the 192 | * first character of the attribute, but for the special case of a meta 193 | * redirect tag some analysis is made in order to verify if we are at the 194 | * start of a url or not. 195 | * 196 | * For any other attributes, the result will always be false. 197 | * 198 | */ 199 | bool IsUrlStart() const { 200 | return htmlparser_is_url_start(parser_); 201 | } 202 | 203 | /* Returns the current attribute type. 204 | * 205 | * The attribute type can be one of: 206 | * ATTR_NONE - not inside an attribute 207 | * ATTR_REGULAR - Inside a normal attribute 208 | * ATTR_URI - Inside an attribute that accepts a uri 209 | * ATTR_JS - Inside a javascript attribute 210 | * ATTR_STYLE - Inside a css style attribute 211 | * */ 212 | int AttributeType() const { 213 | return htmlparser_attr_type(parser_); 214 | } 215 | 216 | /* Return the current line number. */ 217 | int line_number() const { 218 | return htmlparser_get_line_number(parser_); 219 | } 220 | 221 | /* Set the current line number. */ 222 | void set_line_number(int line) { 223 | return htmlparser_set_line_number(parser_, line); 224 | } 225 | 226 | /* Return the current column number. */ 227 | int column_number() const { 228 | return htmlparser_get_column_number(parser_); 229 | } 230 | 231 | /* Set the current line number. */ 232 | void set_column_number(int column) { 233 | return htmlparser_set_column_number(parser_, column); 234 | } 235 | 236 | /* Retrieve a human readable error message in case an error occurred. 237 | * 238 | * NULL is returned if the parser didn't encounter an error. 239 | */ 240 | const char *GetErrorMessage() { 241 | return htmlparser_get_error_msg(parser_); 242 | } 243 | 244 | /* Returns the current state the javascript parser is in. 245 | * 246 | * Should only be used for testing. 247 | */ 248 | int javascript_state() const { 249 | return htmlparser_js_state(parser_); 250 | }; 251 | 252 | /* Resets the parser to it's initial state and changes the parser mode. 253 | * 254 | * Internal state (tag name, attribute name, state of statemachine) is 255 | * reset as * though the object was just created. 256 | * 257 | * Available modes: 258 | * MODE_HTML - Parses html text 259 | * MODE_JS - Parses javascript files 260 | * MODE_CSS - Parses CSS files. No actual parsing is actually done 261 | * but InCss() always returns true. 262 | * MODE_HTML_IN_TAG - Parses an attribute list inside a tag. To 263 | * be used in a template expanded in the 264 | * following context: 265 | */ 266 | void ResetMode(enum Mode mode) { 267 | return htmlparser_reset_mode(parser_, mode); 268 | } 269 | 270 | /* Resets the parser to it's initial state and to the default mode, which is 271 | * MODE_HTML. 272 | * 273 | * All internal context like tag name, attribute name or the state of the 274 | * statemachine are reset to it's original values as if the object was just 275 | * created. 276 | */ 277 | void Reset() { 278 | return htmlparser_reset(parser_); 279 | } 280 | 281 | /* Invoked when text is inserted by the caller. 282 | * 283 | * Should be called before a template directive that expands to content is 284 | * found. This changes the current state by following the default rule, 285 | * ensuring we stay in sync with template. 286 | * 287 | * Returns true if template directives are accepted for this state and 288 | * false if they are not, which should result in an error condition. 289 | * 290 | * Right now the only case being handled are unquoted attribute values and 291 | * it always returns true. In the future we can handle more cases and 292 | * restrict the states were we allow template directives by returning false 293 | * for those. 294 | */ 295 | bool InsertText() { 296 | return static_cast(htmlparser_insert_text(parser_)); 297 | } 298 | 299 | /* Copies the context of the HtmlParser object referenced in source to the 300 | * current object. 301 | */ 302 | void CopyFrom(const HtmlParser *source) { 303 | assert(this != source); 304 | assert(source != NULL); 305 | htmlparser_copy(parser_, source->parser_); 306 | } 307 | 308 | ~HtmlParser() { 309 | htmlparser_delete(parser_); 310 | }; 311 | 312 | 313 | private: 314 | htmlparser_ctx *parser_; 315 | HtmlParser(const HtmlParser&); // disallow copy 316 | void operator=(const HtmlParser&); // and assign 317 | 318 | }; 319 | 320 | @ac_google_end_namespace@ 321 | 322 | #endif // STREAMHTMLPARSER_HTMLPARSER_CPP_H__ 323 | -------------------------------------------------------------------------------- /src/streamhtmlparser/jsparser.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2007, Google Inc. 2 | * All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are 6 | * met: 7 | * 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above 11 | * copyright notice, this list of conditions and the following disclaimer 12 | * in the documentation and/or other materials provided with the 13 | * distribution. 14 | * * Neither the name of Google Inc. nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | * 30 | * --- 31 | * Author: Filipe Almeida 32 | */ 33 | 34 | #ifndef STREAMHTMLPARSER_JSPARSER_H 35 | #define STREAMHTMLPARSER_JSPARSER_H 36 | 37 | #include 38 | 39 | /* Size of the ring buffer used to lookup the last token in the javascript 40 | * stream. The size is pretty much arbitrary at this point but must be bigger 41 | * than the biggest token we want to lookup plus 3: Two delimiters plus an empty 42 | * ring buffer slot. */ 43 | #define JSPARSER_RING_BUFFER_SIZE 18 44 | 45 | enum js_state_external_enum { 46 | JSPARSER_STATE_TEXT, 47 | JSPARSER_STATE_Q, 48 | JSPARSER_STATE_DQ, 49 | JSPARSER_STATE_REGEXP, 50 | JSPARSER_STATE_COMMENT 51 | }; 52 | 53 | /* Stores the context of the javascript parser. 54 | * 55 | * If this structure is changed, jsparser_new(), jsparser_copy() and 56 | * jsparser_reset() should be updated accordingly. 57 | */ 58 | typedef struct jsparser_ctx_s { 59 | 60 | /* Reference to the statemachine context. */ 61 | statemachine_ctx *statemachine; 62 | 63 | /* Reference to the statemachine definition. 64 | * 65 | * It should be readonly and contain the same values across jsparser 66 | * instances. 67 | */ 68 | /* TODO(falmeida): Change statemachine_def to const. */ 69 | statemachine_definition *statemachine_def; 70 | 71 | /* Index to the start of the buffer. */ 72 | int buffer_start; 73 | 74 | /* Index the current writing position (end of the buffer plus one). */ 75 | int buffer_end; 76 | 77 | /* Ring buffer used to lookup the last token. */ 78 | char buffer[JSPARSER_RING_BUFFER_SIZE]; 79 | 80 | } jsparser_ctx; 81 | 82 | 83 | void jsparser_reset(jsparser_ctx *ctx); 84 | jsparser_ctx *jsparser_new(void); 85 | 86 | /* Returns a pointer to a context which is a duplicate of the jsparser src. 87 | */ 88 | jsparser_ctx *jsparser_duplicate(jsparser_ctx *src); 89 | 90 | /* Copies the context of the jsparser pointed to by src to the jsparser dst. 91 | */ 92 | void jsparser_copy(jsparser_ctx *dst, jsparser_ctx *src); 93 | int jsparser_state(jsparser_ctx *ctx); 94 | int jsparser_parse(jsparser_ctx *ctx, const char *str, int size); 95 | 96 | void jsparser_delete(jsparser_ctx *ctx); 97 | 98 | /** 99 | * Ring buffer functions. 100 | * 101 | * These functions are only exported for testing and should not be called from 102 | * outside of jsparser.c in production code. 103 | */ 104 | 105 | /* Appends a character to the ring buffer. 106 | * 107 | * Sequences of whitespaces and newlines are folded into one character. 108 | */ 109 | void jsparser_buffer_append_chr(jsparser_ctx *js, char chr); 110 | 111 | /* Appends a string to the ring buffer. 112 | * 113 | * Sequences of whitespaces and newlines are folded into one character. 114 | */ 115 | void jsparser_buffer_append_str(jsparser_ctx *js, const char *str); 116 | 117 | /* Returns the last appended character and removes it from the buffer. If the 118 | * buffer is empty, then it returns ASCII 0 ('\0'). 119 | */ 120 | char jsparser_buffer_pop(jsparser_ctx *js); 121 | 122 | /* Returns the value of the character at a certain index in the buffer or an 123 | * ASCII 0 ('\0') character if the index is extends beyond the size of the 124 | * buffer, either because we don't have as many characters in the buffer, or 125 | * because the index points to a place bigger than the size of the buffer.. 126 | * 127 | * Index positions must be negative, where -1 is the last character appended to 128 | * the buffer. 129 | */ 130 | char jsparser_buffer_get(jsparser_ctx *js, int pos); 131 | 132 | /* Sets the value of the character at a certain index in the buffer. Returns 133 | * true if the write was successful or false if there was an attempt to write 134 | * outside of the buffer boundaries. 135 | * 136 | * Index positions are negative, were -1 is the last character appended to the 137 | * buffer. Using positive integers for the index will result in undefined 138 | * behaviour. 139 | */ 140 | int jsparser_buffer_set(jsparser_ctx *js, int pos, char value); 141 | 142 | /* Copies a slice of the buffer to the string pointed to by output. start and 143 | * end are the indexes of the sliced region. If the start argument extends 144 | * beyond the beginning of the buffer, the slice will only contain characters 145 | * starting from beginning of the buffer. 146 | */ 147 | void jsparser_buffer_slice(jsparser_ctx *js, char *buffer, int start, int end); 148 | 149 | /* Copy the last javascript identifier or keyword found in the buffer to the 150 | * string pointed by identifier. 151 | */ 152 | int jsparser_buffer_last_identifier(jsparser_ctx *js, char *identifier); 153 | 154 | 155 | #define jsparser_parse_chr(a,b) jsparser_parse(a, &(b), 1); 156 | #ifdef __cplusplus 157 | #define jsparser_parse_str(a,b) jsparser_parse(a, b, \ 158 | static_cast(strlen(b))); 159 | #else 160 | #define jsparser_parse_str(a,b) jsparser_parse(a, b, (int)strlen(b)); 161 | #endif 162 | 163 | #endif /* STREAMHTMLPARSER_JSPARSER_H */ 164 | -------------------------------------------------------------------------------- /src/streamhtmlparser/statemachine.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2007, Google Inc. 2 | * All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are 6 | * met: 7 | * 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above 11 | * copyright notice, this list of conditions and the following disclaimer 12 | * in the documentation and/or other materials provided with the 13 | * distribution. 14 | * * Neither the name of Google Inc. nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | * 30 | * --- 31 | * Author: Filipe Almeida 32 | */ 33 | 34 | #ifndef STREAMHTMLPARSER_STATEMACHINE_H 35 | #define STREAMHTMLPARSER_STATEMACHINE_H 36 | 37 | /* TODO(falmeida): I'm not sure about these limits, but since right now we only 38 | * have 24 states it should be fine */ 39 | 40 | enum { 41 | STATEMACHINE_ERROR = 127 42 | }; 43 | 44 | #define STATEMACHINE_RECORD_BUFFER_SIZE 256 45 | 46 | #define STATEMACHINE_MAX_STR_ERROR 80 47 | 48 | struct statemachine_ctx_s; 49 | 50 | typedef void(*state_event_function)(struct statemachine_ctx_s *, int, char, 51 | int); 52 | 53 | typedef struct statemachine_definition_s { 54 | int num_states; 55 | const int* const* transition_table; 56 | 57 | /* Array containing the name of the states as a C string. 58 | * This field is optional and if not in use it should be set to NULL. 59 | */ 60 | const char* const* state_names; 61 | state_event_function *in_state_events; 62 | state_event_function *enter_state_events; 63 | state_event_function *exit_state_events; 64 | } statemachine_definition; 65 | 66 | typedef struct statemachine_ctx_s { 67 | int current_state; 68 | int next_state; 69 | statemachine_definition *definition; 70 | char current_char; 71 | 72 | /* Current line number. */ 73 | int line_number; 74 | 75 | /* Current column number. */ 76 | int column_number; 77 | char record_buffer[STATEMACHINE_RECORD_BUFFER_SIZE]; 78 | size_t record_pos; 79 | 80 | /* True if we are recording the stream to record_buffer. */ 81 | int recording; 82 | 83 | /* In case there was an error (we are in state STATEMACHINE_ERROR), it will 84 | * contain a human readable description of the error. 85 | */ 86 | char error_msg[STATEMACHINE_MAX_STR_ERROR]; 87 | 88 | /* Storage space for the layer above. */ 89 | void *user; 90 | } statemachine_ctx; 91 | 92 | /* Populates the statemachine definition. 93 | * 94 | * Receives a transition table and an optional array of state names. It uses 95 | * this data to populate the state machine definition. 96 | * 97 | * The transition table structure is a list of lists of ints (int **). The 98 | * outer list indexes the source state and the inner list contains the 99 | * destination state for each of the possible input characters: 100 | * 101 | * const int* const* transitions[source][input] == destination. 102 | * 103 | * The optional argument state_names points to a list of strings containing 104 | * human readable state names. These strings are used when reporting error 105 | * messages. 106 | */ 107 | void statemachine_definition_populate(statemachine_definition *def, 108 | const int* const* transition_table, 109 | const char* const* state_names); 110 | 111 | void statemachine_in_state(statemachine_definition *def, int st, 112 | state_event_function func); 113 | void statemachine_enter_state(statemachine_definition *def, int st, 114 | state_event_function func); 115 | void statemachine_exit_state(statemachine_definition *def, int st, 116 | state_event_function func); 117 | 118 | statemachine_definition *statemachine_definition_new(int states); 119 | void statemachine_definition_delete(statemachine_definition *def); 120 | 121 | int statemachine_get_state(statemachine_ctx *ctx); 122 | void statemachine_set_state(statemachine_ctx *ctx, int state); 123 | 124 | void statemachine_start_record(statemachine_ctx *ctx); 125 | const char *statemachine_stop_record(statemachine_ctx *ctx); 126 | const char *statemachine_record_buffer(statemachine_ctx *ctx); 127 | 128 | /* Returns the the number of characters currently stored in the record buffer. 129 | */ 130 | static inline size_t statemachine_record_length(statemachine_ctx *ctx) { 131 | return ctx->record_pos + 1; 132 | } 133 | 134 | /* Return the current line number. */ 135 | static inline int statemachine_get_line_number(statemachine_ctx *ctx) { 136 | return ctx->line_number; 137 | } 138 | 139 | /* Set the current line number. */ 140 | static inline void statemachine_set_line_number(statemachine_ctx *ctx, 141 | int line) { 142 | ctx->line_number = line; 143 | } 144 | 145 | /* Return the current column number. */ 146 | static inline int statemachine_get_column_number(statemachine_ctx *ctx) { 147 | return ctx->column_number; 148 | } 149 | 150 | /* Set the current column number. */ 151 | static inline void statemachine_set_column_number(statemachine_ctx *ctx, 152 | int column) { 153 | ctx->column_number = column; 154 | } 155 | 156 | 157 | /* Retrieve a human readable error message in case an error occurred. 158 | * 159 | * NULL is returned if the parser didn't encounter an error. 160 | */ 161 | static inline const char *statemachine_get_error_msg(statemachine_ctx *ctx) { 162 | if (ctx->next_state == STATEMACHINE_ERROR) { 163 | return ctx->error_msg; 164 | } else { 165 | return NULL; 166 | } 167 | } 168 | 169 | /* Reset the statemachine. 170 | * 171 | * The state is set to the initialization values. This includes setting the 172 | * state to the default state (0), stopping recording and setting the line 173 | * number to 1. 174 | */ 175 | void statemachine_reset(statemachine_ctx *ctx); 176 | 177 | /* Initializes a new statemachine. Receives a statemachine definition object 178 | * that should have been initialized with statemachine_definition_new() and a 179 | * user reference to be used by the caller. 180 | * 181 | * Returns NULL if initialization fails. 182 | * 183 | * Initialization failure is fatal, and if this function fails it may not 184 | * deallocate all previsouly allocated memory. 185 | */ 186 | statemachine_ctx *statemachine_new(statemachine_definition *def, 187 | void *user); 188 | 189 | /* Returns a pointer to a context which is a duplicate of the statemachine src. 190 | * The statemachine definition and the user pointer have to be provided since 191 | * these references are not owned by the statemachine itself. 192 | */ 193 | statemachine_ctx *statemachine_duplicate(statemachine_ctx *ctx, 194 | statemachine_definition *def, 195 | void *user); 196 | 197 | /* Copies the context of the statemachine pointed to by src to the statemachine 198 | * provided by dst. 199 | * The statemachine definition and the user pointer have to be provided since 200 | * these references are not owned by the statemachine itself. 201 | */ 202 | void statemachine_copy(statemachine_ctx *dst, 203 | statemachine_ctx *src, 204 | statemachine_definition *def, 205 | void *user); 206 | 207 | int statemachine_parse(statemachine_ctx *ctx, const char *str, int size); 208 | 209 | void statemachine_delete(statemachine_ctx *ctx); 210 | 211 | 212 | /***** 213 | * The following functions are only exported for testing purposes and should 214 | * be treated as private. */ 215 | 216 | 217 | /* Encode the character as an escaped C string. 218 | * 219 | * Encode the character chr into the string output. Writes at most len 220 | * characters to the output string but makes sure output is NULL terminated. 221 | */ 222 | void statemachine_encode_char(char chr, char *output, size_t len); 223 | 224 | #endif /* STREAMHTMLPARSER_STATEMACHINE_H */ 225 | -------------------------------------------------------------------------------- /src/tests/generate_fsm_c_test.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2007, Google Inc. 2 | * All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are 6 | * met: 7 | * 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above 11 | * copyright notice, this list of conditions and the following disclaimer 12 | * in the documentation and/or other materials provided with the 13 | * distribution. 14 | * * Neither the name of Google Inc. nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | * 30 | * --- 31 | * Author: Filipe Almeida 32 | * 33 | * Validate that sample_fsm.c compiles. 34 | */ 35 | 36 | 37 | #include "config.h" 38 | #include 39 | #include 40 | #include 41 | 42 | enum states { 43 | STRINGPARSER_STATE_TEXT, 44 | STRINGPARSER_STATE_STRING 45 | }; 46 | 47 | #include "tests/testdata/sample_fsm.c" 48 | 49 | int main() 50 | { 51 | (void)stringparser_states_internal_names; 52 | (void)stringparser_state_transitions; 53 | printf("DONE.\n"); 54 | exit(0); 55 | } 56 | -------------------------------------------------------------------------------- /src/tests/generate_fsm_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright (c) 2008, Google Inc. 4 | # All rights reserved. 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions are 8 | # met: 9 | # 10 | # * Redistributions of source code must retain the above copyright 11 | # notice, this list of conditions and the following disclaimer. 12 | # * Redistributions in binary form must reproduce the above 13 | # copyright notice, this list of conditions and the following disclaimer 14 | # in the documentation and/or other materials provided with the 15 | # distribution. 16 | # * Neither the name of Google Inc. nor the names of its 17 | # contributors may be used to endorse or promote products derived from 18 | # this software without specific prior written permission. 19 | # 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | # 32 | # --- 33 | # Author: Filipe Almeida 34 | 35 | die() { 36 | echo "Test failed: $@" 1>&2 37 | exit 1 38 | } 39 | 40 | TEST_SRCDIR=${1:-TEST_SRCDIR} 41 | 42 | # Find input files 43 | INPUT_FILE="$TEST_SRCDIR/src/tests/testdata/sample_fsm.config" 44 | OUTPUT_FILE="$TEST_SRCDIR/src/tests/testdata/sample_fsm.c" 45 | GENERATE_FSM="$TEST_SRCDIR/src/tools/generate_fsm.py" 46 | 47 | EXPECTED="`cat $OUTPUT_FILE`" 48 | if [ -z "$EXPECTED" ]; then die "Error reading $OUTPUT_FILE"; fi 49 | 50 | # Let's make sure the script works with python2.2 and above 51 | for PYTHON in "" "python2.2" "python2.3" "python2.4"; do 52 | GENERATED="`$PYTHON $GENERATE_FSM $INPUT_FILE`" 53 | if [ -z "$GENERATED" ]; then die "Error running $GENERATE_FSM"; fi 54 | 55 | if [ "$EXPECTED" != "$GENERATED" ]; then 56 | echo "Test failed ($PYTHON $GENERATE_FSM $INPUT_FILE)" 1>&2 57 | echo "-- EXPECTED --" 1>&2 58 | echo "$EXPECTED" 1>&2 59 | echo "-- GENERATED --" 1>&2 60 | echo "$GENERATED" 1>&2 61 | echo "--" 62 | exit 1 63 | fi 64 | done 65 | 66 | echo "PASS" 67 | -------------------------------------------------------------------------------- /src/tests/htmlparser_test.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2008, Google Inc. 2 | * All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are 6 | * met: 7 | * 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above 11 | * copyright notice, this list of conditions and the following disclaimer 12 | * in the documentation and/or other materials provided with the 13 | * distribution. 14 | * * Neither the name of Google Inc. nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | * 30 | * --- 31 | * Author: Filipe Almeida 32 | */ 33 | 34 | #include "config.h" 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | #include 41 | 42 | /* Taken from google templates */ 43 | 44 | #define ASSERT(cond) do { \ 45 | if (!(cond)) { \ 46 | printf("%s: %d: ASSERT FAILED: %s\n", __FILE__, __LINE__, \ 47 | #cond); \ 48 | assert(cond); \ 49 | exit(1); \ 50 | } \ 51 | } while (0) 52 | 53 | #define ASSERT_STREQ(a, b) do { \ 54 | if (strcmp((a), (b))) { \ 55 | printf("%s: %d: ASSERT FAILED: '%s' != '%s'\n", __FILE__, __LINE__, \ 56 | (a), (b)); \ 57 | assert(!strcmp((a), (b))); \ 58 | exit(1); \ 59 | } \ 60 | } while (0) 61 | 62 | #define ASSERT_STRSTR(text, substr) do { \ 63 | if (!strstr((text), (substr))) { \ 64 | printf("%s: %d: ASSERT FAILED: '%s' not in '%s'\n", \ 65 | __FILE__, __LINE__, (substr), (text)); \ 66 | assert(strstr((text), (substr))); \ 67 | exit(1); \ 68 | } \ 69 | } while (0) 70 | 71 | 72 | /* Process a string using entityfilter_process(). */ 73 | void entityfilter_process_str(entityfilter_ctx *filter, const char *in, 74 | char *out) 75 | { 76 | out[0] = '\0'; 77 | while (*in) { 78 | strcat(out, entityfilter_process(filter, *in)); 79 | ++in; 80 | } 81 | } 82 | 83 | /* Verify that the code still builds with a C compiler. */ 84 | void test_c_build() 85 | { 86 | htmlparser_ctx *html; 87 | 88 | #ifdef __cplusplus 89 | printf("C build test compiled in c++ mode."); 90 | exit(1); 91 | #endif /* __cplusplus */ 92 | 93 | html = htmlparser_new(); 94 | htmlparser_delete(html); 95 | printf("DONE.\n"); 96 | } 97 | 98 | /* Entity filter tests. */ 99 | void test_entityfilter() 100 | { 101 | entityfilter_ctx *filter = entityfilter_new(); 102 | char buffer[256]; 103 | 104 | entityfilter_process_str(filter, "test", buffer); 105 | ASSERT_STREQ("test", buffer); 106 | 107 | entityfilter_process_str(filter, "testtest", buffer); 108 | ASSERT_STREQ("testtest", buffer); 109 | 110 | entityfilter_process_str(filter, "test'test", buffer); 111 | ASSERT_STREQ("test'test", buffer); 112 | 113 | entityfilter_process_str(filter, "test'test", buffer); 114 | ASSERT_STREQ("test'test", buffer); 115 | 116 | entityfilter_process_str(filter, "test'test", buffer); 117 | ASSERT_STREQ("test'test", buffer); 118 | 119 | entityfilter_process_str(filter, "test'test", buffer); 120 | ASSERT_STREQ("test'test", buffer); 121 | 122 | entityfilter_process_str(filter, "AAAA", buffer); 123 | ASSERT_STREQ("AAAA", buffer); 124 | 125 | entityfilter_process_str(filter, "AA A A", buffer); 126 | ASSERT_STREQ("AAAA", buffer); 127 | 128 | entityfilter_process_str(filter, "test&invalid;test", buffer); 129 | ASSERT_STREQ("test&invalid;test", buffer); 130 | 131 | entityfilter_process_str(filter, "test&invalid;01234567890123456789", buffer); 132 | ASSERT_STREQ("test&invalid;01234567890123456789", buffer); 133 | 134 | entityfilter_process_str(filter, "test&incomplete01234567890123456789", buffer); 135 | ASSERT_STREQ("test&incomplete01234567890123456789", buffer); 136 | 137 | entityfilter_process_str(filter, "test&012345;big", buffer); 138 | ASSERT_STREQ("test&012345;big", buffer); 139 | 140 | entityfilter_process_str(filter, "test&0123456;big", buffer); 141 | ASSERT_STREQ("test&0123456;big", buffer); 142 | 143 | entityfilter_process_str(filter, "test&01234567;big", buffer); 144 | ASSERT_STREQ("test&01234567;big", buffer); 145 | 146 | entityfilter_process_str(filter, "test&012345678;big", buffer); 147 | ASSERT_STREQ("test&012345678;big", buffer); 148 | 149 | entityfilter_process_str(filter, "test&0123456789;big", buffer); 150 | ASSERT_STREQ("test&0123456789;big", buffer); 151 | 152 | entityfilter_process_str(filter, "test&01234567890;big", buffer); 153 | ASSERT_STREQ("test&01234567890;big", buffer); 154 | 155 | entityfilter_process_str(filter, "test&012345678901;big", buffer); 156 | ASSERT_STREQ("test&012345678901;big", buffer); 157 | 158 | entityfilter_process_str(filter, "test& & & & & & & & & & &", buffer); 159 | ASSERT_STREQ("test& & & & & & & & & & ", buffer); 160 | 161 | entityfilter_delete(filter); 162 | } 163 | 164 | void test_position() 165 | { 166 | htmlparser_ctx *html; 167 | html = htmlparser_new(); 168 | 169 | ASSERT(htmlparser_get_line_number(html) == 1); 170 | 171 | htmlparser_parse_str(html, "\n\n"); 172 | ASSERT(htmlparser_get_line_number(html) == 3); 173 | ASSERT(htmlparser_get_column_number(html) == 1); 174 | 175 | htmlparser_parse_str(html, "

blah

"); 176 | ASSERT(htmlparser_get_line_number(html) == 3); 177 | ASSERT(htmlparser_get_column_number(html) == 14); 178 | 179 | htmlparser_parse_str(html, "

blah

\n\n\n\n\n"); 180 | ASSERT(htmlparser_get_line_number(html) == 8); 181 | ASSERT(htmlparser_get_column_number(html) == 1); 182 | 183 | htmlparser_set_line_number(html, 2); 184 | ASSERT(htmlparser_get_line_number(html) == 2); 185 | ASSERT(htmlparser_get_column_number(html) == 1); 186 | 187 | htmlparser_parse_str(html, "\n\n"); 188 | ASSERT(htmlparser_get_line_number(html) == 4); 189 | ASSERT(htmlparser_get_column_number(html) == 1); 190 | 191 | htmlparser_set_column_number(html, 4); 192 | htmlparser_parse_str(html, "

blah

"); 193 | ASSERT(htmlparser_get_line_number(html) == 4); 194 | ASSERT(htmlparser_get_column_number(html) == 17); 195 | 196 | htmlparser_parse_str(html, "

blah

\n\n\n\n\n"); 197 | ASSERT(htmlparser_get_line_number(html) == 9); 198 | 199 | htmlparser_reset(html); 200 | ASSERT(htmlparser_get_line_number(html) == 1); 201 | 202 | htmlparser_parse_str(html, "- \n - \n - \n - \n - \n"); 203 | ASSERT(htmlparser_get_line_number(html) == 6); 204 | 205 | htmlparser_reset(html); 206 | htmlparser_parse_str(html, "- \n\r - \n - \r - \r\n - \r - \n\r"); 207 | ASSERT(htmlparser_get_line_number(html) == 5); 208 | 209 | htmlparser_parse_str(html, "\n\n
\n\n"); 214 | ASSERT(htmlparser_get_line_number(html) == 9); 215 | ASSERT(htmlparser_get_column_number(html) == 8); 216 | 217 | htmlparser_delete(html); 218 | } 219 | 220 | int main(int argc, char **argv) 221 | { 222 | test_c_build(); 223 | test_entityfilter(); 224 | test_position(); 225 | printf("DONE.\n"); 226 | return 0; 227 | } 228 | -------------------------------------------------------------------------------- /src/tests/jsparser_test.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2008, Google Inc. 2 | * All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are 6 | * met: 7 | * 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above 11 | * copyright notice, this list of conditions and the following disclaimer 12 | * in the documentation and/or other materials provided with the 13 | * distribution. 14 | * * Neither the name of Google Inc. nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | * 30 | * --- 31 | * Author: Filipe Almeida 32 | */ 33 | 34 | #include "config.h" 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | #include 41 | 42 | /* Taken from google templates */ 43 | 44 | #define ASSERT(cond) do { \ 45 | if (!(cond)) { \ 46 | printf("%s: %d: ASSERT FAILED: %s\n", __FILE__, __LINE__, \ 47 | #cond); \ 48 | assert(cond); \ 49 | exit(1); \ 50 | } \ 51 | } while (0) 52 | 53 | #define ASSERT_STREQ(a, b) do { \ 54 | if (strcmp((a), (b))) { \ 55 | printf("%s: %d: ASSERT FAILED: '%s' != '%s'\n", __FILE__, __LINE__, \ 56 | (a), (b)); \ 57 | assert(!strcmp((a), (b))); \ 58 | exit(1); \ 59 | } \ 60 | } while (0) 61 | 62 | #define ASSERT_STRSTR(text, substr) do { \ 63 | if (!strstr((text), (substr))) { \ 64 | printf("%s: %d: ASSERT FAILED: '%s' not in '%s'\n", \ 65 | __FILE__, __LINE__, (substr), (text)); \ 66 | assert(strstr((text), (substr))); \ 67 | exit(1); \ 68 | } \ 69 | } while (0) 70 | 71 | 72 | /* Tests for jsparser_buffer_get(). */ 73 | void test_buffer_get() 74 | { 75 | jsparser_ctx *js; 76 | js = jsparser_new(); 77 | 78 | ASSERT(jsparser_buffer_get(js, -1) == '\0'); 79 | ASSERT(jsparser_buffer_get(js, -2) == '\0'); 80 | ASSERT(jsparser_buffer_get(js, -3) == '\0'); 81 | 82 | jsparser_buffer_append_chr(js, 'a'); 83 | ASSERT(jsparser_buffer_get(js, -1) == 'a'); 84 | ASSERT(jsparser_buffer_get(js, -2) == '\0'); 85 | 86 | 87 | jsparser_buffer_append_chr(js, 'b'); 88 | ASSERT(jsparser_buffer_get(js, -1) == 'b'); 89 | ASSERT(jsparser_buffer_get(js, -2) == 'a'); 90 | ASSERT(jsparser_buffer_get(js, -3) == '\0'); 91 | 92 | jsparser_buffer_append_str(js, "1234567890"); 93 | ASSERT(jsparser_buffer_get(js, -1) == '0'); 94 | ASSERT(jsparser_buffer_get(js, -2) == '9'); 95 | ASSERT(jsparser_buffer_get(js, -3) == '8'); 96 | 97 | jsparser_buffer_append_str(js, "ABCDEGHIJKLMN"); 98 | ASSERT(jsparser_buffer_get(js, -1) == 'N'); 99 | ASSERT(jsparser_buffer_get(js, -2) == 'M'); 100 | ASSERT(jsparser_buffer_get(js, -3) == 'L'); 101 | ASSERT(jsparser_buffer_get(js, -200) == '\0'); 102 | 103 | jsparser_delete(js); 104 | } 105 | 106 | /* Tests for jsparser_buffer_set(). */ 107 | void test_buffer_set() 108 | { 109 | jsparser_ctx *js; 110 | js = jsparser_new(); 111 | 112 | ASSERT(jsparser_buffer_set(js, -1, 'a') == 0); 113 | ASSERT(jsparser_buffer_set(js, -2, 'b') == 0); 114 | ASSERT(jsparser_buffer_set(js, -3, 'c') == 0); 115 | 116 | jsparser_buffer_append_chr(js, 'a'); 117 | ASSERT(jsparser_buffer_set(js, -1, 'b') != 0); 118 | ASSERT(jsparser_buffer_get(js, -1) == 'b'); 119 | 120 | jsparser_delete(js); 121 | } 122 | 123 | /* Tests for jsparser_buffer_pop(). */ 124 | void test_buffer_pop() 125 | { 126 | jsparser_ctx *js; 127 | js = jsparser_new(); 128 | 129 | ASSERT(jsparser_buffer_pop(js) == '\0'); 130 | 131 | jsparser_buffer_append_str(js, "012345"); 132 | ASSERT(jsparser_buffer_pop(js) == '5'); 133 | ASSERT(jsparser_buffer_pop(js) == '4'); 134 | ASSERT(jsparser_buffer_pop(js) == '3'); 135 | ASSERT(jsparser_buffer_pop(js) == '2'); 136 | ASSERT(jsparser_buffer_pop(js) == '1'); 137 | ASSERT(jsparser_buffer_pop(js) == '0'); 138 | ASSERT(jsparser_buffer_pop(js) == '\0'); 139 | 140 | jsparser_buffer_append_str(js, "ABCDEGHIJKLMN"); 141 | jsparser_buffer_append_str(js, "ABCDEGHIJKLMN"); 142 | jsparser_buffer_append_str(js, "012345"); 143 | ASSERT(jsparser_buffer_pop(js) == '5'); 144 | ASSERT(jsparser_buffer_pop(js) == '4'); 145 | ASSERT(jsparser_buffer_pop(js) == '3'); 146 | ASSERT(jsparser_buffer_pop(js) == '2'); 147 | ASSERT(jsparser_buffer_pop(js) == '1'); 148 | ASSERT(jsparser_buffer_pop(js) == '0'); 149 | ASSERT(jsparser_buffer_pop(js) == 'N'); 150 | 151 | jsparser_delete(js); 152 | } 153 | 154 | /* Tests for jsparser_buffer_last_identifier(). */ 155 | void test_buffer_last_identifier() 156 | { 157 | jsparser_ctx *js; 158 | char buffer[256]; 159 | js = jsparser_new(); 160 | 161 | jsparser_buffer_append_str(js, "abc"); 162 | jsparser_buffer_last_identifier(js, buffer); 163 | ASSERT_STREQ("abc", buffer); 164 | 165 | jsparser_buffer_append_str(js, "abc"); 166 | jsparser_buffer_last_identifier(js, buffer); 167 | ASSERT_STREQ("abcabc", buffer); 168 | 169 | jsparser_buffer_append_str(js, " abc2"); 170 | jsparser_buffer_last_identifier(js, buffer); 171 | ASSERT_STREQ("abc2", buffer); 172 | 173 | jsparser_buffer_append_str(js, " abc3 "); 174 | jsparser_buffer_last_identifier(js, buffer); 175 | ASSERT_STREQ("abc3", buffer); 176 | 177 | jsparser_buffer_append_str(js, " abc4 "); 178 | jsparser_buffer_last_identifier(js, buffer); 179 | ASSERT_STREQ("abc4", buffer); 180 | 181 | jsparser_buffer_append_str(js, " abc5 "); 182 | jsparser_buffer_last_identifier(js, buffer); 183 | ASSERT_STREQ("abc5", buffer); 184 | 185 | jsparser_buffer_append_str(js, "test testtesttest"); 186 | jsparser_buffer_last_identifier(js, buffer); 187 | ASSERT_STREQ("testtesttest", buffer); 188 | 189 | jsparser_buffer_append_str(js, "01234567890123456789"); 190 | jsparser_buffer_last_identifier(js, buffer); 191 | ASSERT_STREQ("34567890123456789", buffer); 192 | 193 | jsparser_delete(js); 194 | } 195 | 196 | /* Tests for jsparser_buffer_slice(). */ 197 | void test_buffer_slice() 198 | { 199 | jsparser_ctx *js; 200 | char buffer[256]; 201 | js = jsparser_new(); 202 | 203 | jsparser_buffer_append_str(js, "test"); 204 | jsparser_buffer_slice(js, buffer, -4, -1); 205 | ASSERT_STREQ("test", buffer); 206 | 207 | jsparser_buffer_slice(js, buffer, -10, -1); 208 | ASSERT_STREQ("test", buffer); 209 | 210 | jsparser_buffer_append_str(js, " test2"); 211 | jsparser_buffer_slice(js, buffer, -5, -1); 212 | ASSERT_STREQ("test2", buffer); 213 | 214 | jsparser_buffer_slice(js, buffer, -9, -1); 215 | ASSERT_STREQ("est test2", buffer); 216 | 217 | jsparser_buffer_slice(js, buffer, -10, -1); 218 | ASSERT_STREQ("test test2", buffer); 219 | 220 | jsparser_buffer_slice(js, buffer, -100, -1); 221 | ASSERT_STREQ("test test2", buffer); 222 | 223 | jsparser_buffer_append_str(js, " \n\r test3 \n\r "); 224 | jsparser_buffer_slice(js, buffer, -6, -1); 225 | ASSERT_STREQ("test3 ", buffer); 226 | 227 | jsparser_buffer_slice(js, buffer, -10, -1); 228 | ASSERT_STREQ("st2 test3 ", buffer); 229 | 230 | jsparser_buffer_slice(js, buffer, -12, -1); 231 | ASSERT_STREQ("test2 test3 ", buffer); 232 | 233 | jsparser_buffer_slice(js, buffer, -17, -1); 234 | ASSERT_STREQ("test test2 test3 ", buffer); 235 | 236 | jsparser_buffer_slice(js, buffer, -100, -1); 237 | ASSERT_STREQ("test test2 test3 ", buffer); 238 | 239 | jsparser_buffer_append_str(js, "0123456789"); 240 | jsparser_buffer_append_str(js, "0123456789"); 241 | jsparser_buffer_append_str(js, "0123456789"); 242 | jsparser_buffer_append_str(js, "0123456789"); 243 | jsparser_buffer_append_str(js, "0123456789"); 244 | 245 | jsparser_buffer_slice(js, buffer, -10, -1); 246 | ASSERT_STREQ("0123456789", buffer); 247 | 248 | jsparser_buffer_append_str(js, " "); 249 | jsparser_buffer_append_str(js, " "); 250 | jsparser_buffer_append_str(js, " "); 251 | jsparser_buffer_append_str(js, "0123456789"); 252 | jsparser_buffer_slice(js, buffer, -11, -1); 253 | ASSERT_STREQ(" 0123456789", buffer); 254 | 255 | jsparser_buffer_slice(js, buffer, -13, -1); 256 | ASSERT_STREQ("89 0123456789", buffer); 257 | 258 | 259 | jsparser_delete(js); 260 | } 261 | 262 | /* Tests for combination of calls. */ 263 | void test_buffer_misc() 264 | { 265 | jsparser_ctx *js; 266 | char buffer[256]; 267 | js = jsparser_new(); 268 | 269 | jsparser_buffer_append_str(js, "012345 test test"); 270 | jsparser_buffer_last_identifier(js, buffer); 271 | ASSERT_STREQ("test", buffer); 272 | 273 | ASSERT(jsparser_buffer_pop(js) == 't'); 274 | jsparser_buffer_last_identifier(js, buffer); 275 | ASSERT_STREQ("tes", buffer); 276 | 277 | jsparser_buffer_append_chr(js, 'X'); 278 | jsparser_buffer_last_identifier(js, buffer); 279 | ASSERT_STREQ("tesX", buffer); 280 | 281 | jsparser_buffer_set(js, -3, 'W'); 282 | jsparser_buffer_last_identifier(js, buffer); 283 | ASSERT_STREQ("tWsX", buffer); 284 | 285 | jsparser_buffer_append_chr(js, ' '); 286 | jsparser_buffer_last_identifier(js, buffer); 287 | ASSERT_STREQ("tWsX", buffer); 288 | 289 | jsparser_buffer_append_chr(js, '\n'); 290 | jsparser_buffer_last_identifier(js, buffer); 291 | ASSERT_STREQ("tWsX", buffer); 292 | 293 | jsparser_reset(js); 294 | 295 | ASSERT(jsparser_buffer_get(js, -1) == '\0'); 296 | 297 | jsparser_buffer_append_str(js, "0123456789"); 298 | jsparser_buffer_append_str(js, "0123456789"); 299 | ASSERT(jsparser_buffer_pop(js) == '9'); 300 | ASSERT(jsparser_buffer_pop(js) == '8'); 301 | ASSERT(jsparser_buffer_pop(js) == '7'); 302 | ASSERT(jsparser_buffer_pop(js) == '6'); 303 | ASSERT(jsparser_buffer_pop(js) == '5'); 304 | ASSERT(jsparser_buffer_pop(js) == '4'); 305 | ASSERT(jsparser_buffer_pop(js) == '3'); 306 | 307 | jsparser_delete(js); 308 | } 309 | 310 | void test_copy() 311 | { 312 | jsparser_ctx *js1; 313 | jsparser_ctx *js2; 314 | char buffer[256]; 315 | 316 | js1 = jsparser_new(); 317 | 318 | ASSERT(jsparser_state(js1) == JSPARSER_STATE_TEXT); 319 | 320 | jsparser_parse_str(js1, "var xpto='"); 321 | jsparser_buffer_slice(js1, buffer, -9, -1); 322 | 323 | /* We won't find the single quote in the buffer as we only record in the text 324 | * state. 325 | */ 326 | ASSERT_STREQ("var xpto=", buffer); 327 | ASSERT(jsparser_state(js1) == JSPARSER_STATE_Q); 328 | 329 | js2 = jsparser_duplicate(js1); 330 | ASSERT(jsparser_state(js2) == JSPARSER_STATE_Q); 331 | jsparser_parse_str(js1, "'; test()"); 332 | ASSERT(jsparser_state(js1) == JSPARSER_STATE_TEXT); 333 | ASSERT(jsparser_state(js2) == JSPARSER_STATE_Q); 334 | 335 | jsparser_buffer_slice(js1, buffer, -8, -1); 336 | ASSERT_STREQ("; test()", buffer); 337 | 338 | jsparser_buffer_slice(js2, buffer, -9, -1); 339 | ASSERT_STREQ("var xpto=", buffer); 340 | 341 | jsparser_delete(js1); 342 | jsparser_delete(js2); 343 | } 344 | 345 | int main(int argc, char **argv) 346 | { 347 | test_buffer_get(); 348 | test_buffer_set(); 349 | test_buffer_pop(); 350 | test_buffer_last_identifier(); 351 | test_buffer_slice(); 352 | test_buffer_misc(); 353 | test_copy(); 354 | printf("DONE.\n"); 355 | return 0; 356 | } 357 | -------------------------------------------------------------------------------- /src/tests/statemachine_test.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2008, Google Inc. 2 | * All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are 6 | * met: 7 | * 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above 11 | * copyright notice, this list of conditions and the following disclaimer 12 | * in the documentation and/or other materials provided with the 13 | * distribution. 14 | * * Neither the name of Google Inc. nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | * 30 | * --- 31 | * Author: Filipe Almeida 32 | */ 33 | 34 | #include "config.h" 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | #include 41 | 42 | enum { 43 | SIMPLE_STATE_A, 44 | SIMPLE_STATE_B, 45 | SIMPLE_STATE_C, 46 | SIMPLE_STATE_D, 47 | SIMPLE_STATE_ERROR_TEST 48 | }; 49 | 50 | /* Include the test state machine definition. */ 51 | #include 52 | 53 | /* Taken from google templates */ 54 | 55 | #define ASSERT(cond) do { \ 56 | if (!(cond)) { \ 57 | printf("%s: %d: ASSERT FAILED: %s\n", __FILE__, __LINE__, \ 58 | #cond); \ 59 | assert(cond); \ 60 | exit(1); \ 61 | } \ 62 | } while (0) 63 | 64 | #define ASSERT_STREQ(a, b) do { \ 65 | if (strcmp((a), (b))) { \ 66 | printf("%s: %d: ASSERT FAILED: '%s' != '%s'\n", __FILE__, __LINE__, \ 67 | (a), (b)); \ 68 | assert(!strcmp((a), (b))); \ 69 | exit(1); \ 70 | } \ 71 | } while (0) 72 | 73 | #define ASSERT_STRSTR(text, substr) do { \ 74 | if (!strstr((text), (substr))) { \ 75 | printf("%s: %d: ASSERT FAILED: '%s' not in '%s'\n", \ 76 | __FILE__, __LINE__, (substr), (text)); \ 77 | assert(strstr((text), (substr))); \ 78 | exit(1); \ 79 | } \ 80 | } while (0) 81 | 82 | 83 | #define NUM_STATES 10 84 | 85 | /* To simply the tests */ 86 | #define statemachine_parse_str(a,b) statemachine_parse(a, b, strlen(b)); 87 | 88 | /* Simple state machine test. */ 89 | int test_simple() 90 | { 91 | statemachine_definition *def; 92 | statemachine_ctx *sm; 93 | def = statemachine_definition_new(NUM_STATES); 94 | sm = statemachine_new(def, NULL); 95 | 96 | statemachine_definition_populate(def, simple_state_transitions, 97 | simple_states_internal_names); 98 | ASSERT(sm->current_state == SIMPLE_STATE_A); 99 | 100 | statemachine_parse(sm, "001", 3); 101 | ASSERT(sm->current_state == SIMPLE_STATE_B); 102 | 103 | statemachine_parse(sm, "001", 3); 104 | ASSERT(sm->current_state == SIMPLE_STATE_C); 105 | 106 | statemachine_parse(sm, "2", 1); 107 | ASSERT(sm->current_state == SIMPLE_STATE_B); 108 | 109 | statemachine_parse(sm, "11", 2); 110 | ASSERT(sm->current_state == SIMPLE_STATE_D); 111 | 112 | statemachine_delete(sm); 113 | return 0; 114 | } 115 | 116 | /* Tests error handling logic when we try to follow non existent transitions. */ 117 | int test_error() 118 | { 119 | statemachine_definition *def; 120 | statemachine_ctx *sm; 121 | int res; 122 | 123 | def = statemachine_definition_new(NUM_STATES); 124 | sm = statemachine_new(def, NULL); 125 | 126 | statemachine_definition_populate(def, simple_state_transitions, 127 | NULL); 128 | ASSERT(sm->current_state == SIMPLE_STATE_A); 129 | 130 | ASSERT(statemachine_get_error_msg(sm) == NULL); 131 | 132 | res = statemachine_parse_str(sm, "00E"); 133 | ASSERT(sm->current_state == SIMPLE_STATE_ERROR_TEST); 134 | ASSERT(sm->current_state == res); 135 | 136 | res = statemachine_parse(sm, "3", 1); 137 | ASSERT(res == STATEMACHINE_ERROR); 138 | ASSERT_STREQ(statemachine_get_error_msg(sm), 139 | "Unexpected character '3'"); 140 | 141 | statemachine_reset(sm); 142 | ASSERT(statemachine_get_error_msg(sm) == NULL); 143 | 144 | statemachine_delete(sm); 145 | 146 | def = statemachine_definition_new(NUM_STATES); 147 | sm = statemachine_new(def, NULL); 148 | 149 | statemachine_definition_populate(def, simple_state_transitions, 150 | simple_states_internal_names); 151 | ASSERT(sm->current_state == SIMPLE_STATE_A); 152 | 153 | res = statemachine_parse_str(sm, "00E"); 154 | ASSERT(sm->current_state == SIMPLE_STATE_ERROR_TEST); 155 | ASSERT(sm->current_state == res); 156 | 157 | res = statemachine_parse(sm, "3", 1); 158 | ASSERT(res == STATEMACHINE_ERROR); 159 | ASSERT_STREQ(statemachine_get_error_msg(sm), 160 | "Unexpected character '3' in state 'error_test'"); 161 | 162 | statemachine_delete(sm); 163 | 164 | return 0; 165 | } 166 | 167 | /* Tests htmlparser_start_record() and htmlparser_end_record() logic. */ 168 | 169 | int test_record() 170 | { 171 | statemachine_definition *def; 172 | statemachine_ctx *sm; 173 | const char *actual; 174 | char expected[STATEMACHINE_RECORD_BUFFER_SIZE]; 175 | int res; 176 | int counter; 177 | def = statemachine_definition_new(NUM_STATES); 178 | sm = statemachine_new(def, NULL); 179 | 180 | statemachine_definition_populate(def, simple_state_transitions, 181 | simple_states_internal_names); 182 | 183 | ASSERT(sm->current_state == SIMPLE_STATE_A); 184 | 185 | res = statemachine_parse_str(sm, "001"); 186 | ASSERT(sm->current_state == SIMPLE_STATE_B); 187 | ASSERT(sm->current_state == res); 188 | 189 | statemachine_start_record(sm); 190 | statemachine_parse_str(sm, "121212"); 191 | ASSERT_STREQ("121212", statemachine_stop_record(sm)); 192 | 193 | statemachine_parse_str(sm, "not recorded"); 194 | 195 | statemachine_start_record(sm); 196 | statemachine_parse_str(sm, "121212000"); 197 | ASSERT_STREQ("121212000", statemachine_stop_record(sm)); 198 | 199 | statemachine_start_record(sm); 200 | statemachine_parse_str(sm, "1234567890"); 201 | ASSERT_STREQ("1234567890", statemachine_record_buffer(sm)); 202 | 203 | statemachine_parse_str(sm, "test"); 204 | ASSERT_STREQ("1234567890test", statemachine_stop_record(sm)); 205 | 206 | statemachine_start_record(sm); 207 | 208 | /* Record 1000 chars + strlen("beginning-") */ 209 | statemachine_parse_str(sm, "beginning-"); 210 | for (counter = 0; counter < 100; counter++) { 211 | statemachine_parse_str(sm, "1234567890"); 212 | } 213 | 214 | /* Make sure we preserved the start of the buffer. */ 215 | ASSERT_STRSTR(statemachine_record_buffer(sm), "beginning-"); 216 | 217 | /* And make sure the size is what we expect. */ 218 | ASSERT(STATEMACHINE_RECORD_BUFFER_SIZE - 1 == 219 | strlen(statemachine_stop_record(sm))); 220 | 221 | statemachine_start_record(sm); 222 | for (counter = 0; counter < 100; counter++) { 223 | statemachine_parse_str(sm, "0123456789ABCDEF"); 224 | } 225 | 226 | expected[0] = '\0'; 227 | /* Fill the buffer with a pattern 255 chars long (16 * 15 + 15). */ 228 | for (counter = 0; counter < 15; counter++) { 229 | strcat(expected, "0123456789ABCDEF"); 230 | } 231 | strcat(expected, "0123456789ABCDE"); 232 | actual = statemachine_stop_record(sm); 233 | ASSERT_STREQ(expected, actual); 234 | 235 | statemachine_delete(sm); 236 | return 0; 237 | } 238 | 239 | /* Test with characters outside of the ascii range */ 240 | int test_no_ascii() 241 | { 242 | statemachine_definition *def; 243 | statemachine_ctx *sm; 244 | def = statemachine_definition_new(NUM_STATES); 245 | sm = statemachine_new(def, NULL); 246 | 247 | statemachine_definition_populate(def, simple_state_transitions, 248 | simple_states_internal_names); 249 | 250 | ASSERT(sm->current_state == SIMPLE_STATE_A); 251 | 252 | statemachine_parse(sm, "\xf0\xf0\xf1", 3); 253 | ASSERT(sm->current_state == SIMPLE_STATE_B); 254 | 255 | statemachine_parse(sm, "\xf0\xf0\xf1", 3); 256 | ASSERT(sm->current_state == SIMPLE_STATE_C); 257 | 258 | statemachine_parse(sm, "\xf2", 1); 259 | ASSERT(sm->current_state == SIMPLE_STATE_B); 260 | 261 | statemachine_parse(sm, "\xf1\xf1", 2); 262 | ASSERT(sm->current_state == SIMPLE_STATE_D); 263 | 264 | statemachine_delete(sm); 265 | return 0; 266 | 267 | } 268 | 269 | int test_copy() 270 | { 271 | statemachine_definition *def; 272 | statemachine_ctx *sm1; 273 | statemachine_ctx *sm2; 274 | statemachine_ctx *sm3; 275 | def = statemachine_definition_new(NUM_STATES); 276 | sm1 = statemachine_new(def, NULL); 277 | 278 | statemachine_definition_populate(def, simple_state_transitions, 279 | simple_states_internal_names); 280 | 281 | ASSERT(sm1->current_state == SIMPLE_STATE_A); 282 | 283 | sm2 = statemachine_duplicate(sm1, def, NULL); 284 | ASSERT(sm2->current_state == SIMPLE_STATE_A); 285 | 286 | statemachine_parse(sm1, "001", 3); 287 | ASSERT(sm1->current_state == SIMPLE_STATE_B); 288 | ASSERT(sm2->current_state == SIMPLE_STATE_A); 289 | 290 | 291 | statemachine_parse(sm1, "001", 3); 292 | statemachine_parse(sm2, "001", 3); 293 | ASSERT(sm1->current_state == SIMPLE_STATE_C); 294 | ASSERT(sm2->current_state == SIMPLE_STATE_B); 295 | 296 | sm3 = statemachine_duplicate(sm2, def, NULL); 297 | ASSERT(sm3->current_state == SIMPLE_STATE_B); 298 | 299 | statemachine_parse(sm1, "001", 3); 300 | statemachine_parse(sm2, "001", 3); 301 | statemachine_parse(sm3, "002", 3); 302 | ASSERT(sm1->current_state == SIMPLE_STATE_D); 303 | ASSERT(sm2->current_state == SIMPLE_STATE_C); 304 | ASSERT(sm3->current_state == SIMPLE_STATE_A); 305 | 306 | statemachine_delete(sm1); 307 | statemachine_delete(sm2); 308 | statemachine_delete(sm3); 309 | 310 | return 0; 311 | } 312 | 313 | /* Tests statemachine_encode_char(). 314 | */ 315 | int test_encode_char() 316 | { 317 | char encoded_char[10]; 318 | int i; 319 | 320 | struct { 321 | char chr; 322 | const char *result; 323 | } encode_map[] = { 324 | { 'x', "x" }, 325 | { '0', "0" }, 326 | { '\n', "\\n" }, 327 | { '\r', "\\r" }, 328 | { '\t', "\\t" }, 329 | { '\\', "\\\\" }, 330 | { '\0', "\\x00" }, 331 | { '\xF0', "\\xf0" }, 332 | { '\0', NULL} // Terminates when output == NULL 333 | }; 334 | 335 | for (i = 0; encode_map[i].result; i++) { 336 | statemachine_encode_char(encode_map[i].chr, encoded_char, 337 | sizeof(encoded_char) / sizeof(*encoded_char)); 338 | ASSERT_STREQ(encoded_char, encode_map[i].result); 339 | } 340 | 341 | statemachine_encode_char('\xFF', encoded_char, 1); 342 | ASSERT_STREQ(encoded_char, ""); 343 | 344 | statemachine_encode_char('\xFF', encoded_char, 2); 345 | ASSERT_STREQ(encoded_char, "\\"); 346 | 347 | statemachine_encode_char('\xFF', encoded_char, 3); 348 | ASSERT_STREQ(encoded_char, "\\x"); 349 | 350 | statemachine_encode_char('\xFF', encoded_char, 4); 351 | ASSERT_STREQ(encoded_char, "\\xf"); 352 | 353 | statemachine_encode_char('\xFF', encoded_char, 5); 354 | ASSERT_STREQ(encoded_char, "\\xff"); 355 | 356 | return 0; 357 | } 358 | 359 | int main(int argc, char **argv) 360 | { 361 | test_simple(); 362 | test_error(); 363 | test_record(); 364 | test_no_ascii(); 365 | test_copy(); 366 | test_encode_char(); 367 | printf("DONE.\n"); 368 | return 0; 369 | } 370 | -------------------------------------------------------------------------------- /src/tests/statemachine_test_fsm.config: -------------------------------------------------------------------------------- 1 | # Copyright 2008 Google Inc. All Rights Reserved. 2 | # Author: falmeida@google.com (Filipe Almeida) 3 | # 4 | # Simple state machine definition used in for testing the state machine. 5 | 6 | name = 'simple' 7 | 8 | comment = 'Simple state machine' 9 | 10 | condition('1', '1\xf1') 11 | condition('2', '2\xf2') 12 | condition('E', 'E') 13 | condition('default', '[:default:]') 14 | 15 | state(name = 'A', 16 | external = 'A', 17 | transitions = [ 18 | ['1', 'B'], 19 | ['E', 'error_test'], 20 | ['default', 'A'], 21 | ]) 22 | 23 | state(name = 'B', 24 | external = 'B', 25 | transitions = [ 26 | ['1', 'C'], 27 | ['2', 'A'], 28 | ['default', 'B'], 29 | ]) 30 | 31 | state(name = 'C', 32 | external = 'C', 33 | transitions = [ 34 | ['1', 'D'], 35 | ['2', 'B'], 36 | ['default', 'C'], 37 | ]) 38 | 39 | state(name = 'D', 40 | external = 'D', 41 | transitions = [ 42 | ['2', 'C'], 43 | ['default', 'D'], 44 | ]) 45 | 46 | state(name = 'error_test', 47 | external = 'error_test', 48 | transitions = [ 49 | ['2', 'A'], 50 | ]) 51 | -------------------------------------------------------------------------------- /src/tests/testdata/regtest/cdata.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | <?state state=text, tag=title ?> 9 | <h1> 10 | <?state state=text, tag=title ?> 11 | </h1> 12 | <!-- 13 | <?state state=text, tag=title ?> 14 | 15 | 16 | --> 17 | 18 | 19 | 20 | 21 | 22 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 49 | 50 | --> 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 70 | 71 | 72 | 73 | 74 | 79 | 80 | 81 | 82 | 83 | 90 | 91 | 92 | 93 | 95 | 100 | 101 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /src/tests/testdata/regtest/comments.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | "http://www.w3.org/TR/html4/strict.dtd"> 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | test test --> 39 | 40 | ' 41 | 42 | 43 | 44 | 46 | 47 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /src/tests/testdata/regtest/context.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 |

7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | '"> 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | blah= 68 | '"> 74 | 75 | xpto 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /src/tests/testdata/regtest/javascript_attribute.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | test 12 | 13 | 14 | 15 | test 16 | 17 | 18 | 19 | test 20 | 21 | 22 | test 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/tests/testdata/regtest/javascript_block.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 12 | 13 | 14 | 19 | 20 | 21 | 22 | 26 | 27 | //--> 28 | 29 | 30 | 31 | 36 | 37 | --> 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /src/tests/testdata/regtest/javascript_regexp.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 163 | 164 | 169 | 170 | 171 | 172 | -------------------------------------------------------------------------------- /src/tests/testdata/regtest/position.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /src/tests/testdata/regtest/reset.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | "> 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | <123 41 | ?> 42 | 43 | ?q=tt>test 44 | 45 | 46 | test 47 | test 48 | test 49 | test 50 | test 51 | test 53 | 54 | 55 | 56 | color> 57 | 58 | 59 | color> 60 | 61 | 63 | 64 | alt> 66 | link 67 | 68 | 69 | > 70 | 71 | 72 | 74 | 75 | 77 | 78 | 79 |
80 | 81 |
82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 |
92 | 93 |
94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | <?state attr_type=regular?> 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 116 | 117 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | > 146 |
>
147 | < br >
148 | << > 149 | < alt=""> 150 | > 151 | <> 152 | 153 | 154 | 156 | <0 > 157 | <1 > 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | -------------------------------------------------------------------------------- /src/tests/testdata/sample_fsm.config: -------------------------------------------------------------------------------- 1 | # Copyright 2008 Google Inc. All Rights Reserved. 2 | # Author: falmeida@google.com (Filipe Almeida) 3 | 4 | name = 'stringparser' 5 | 6 | comment = 'Parses C style strings' 7 | 8 | condition('dq', '\\"'), 9 | condition('backslash', '\\\\'), 10 | condition('default', '[:default:]') 11 | 12 | # Outside a string 13 | state(name = 'text', 14 | external = 'text', 15 | transitions = [ 16 | ['dq', 'string'], 17 | ['default', 'text'] 18 | ]) 19 | 20 | # String literal 21 | state(name = 'string', 22 | external = 'string', 23 | transitions = [ 24 | ['backslash', 'string_escape'], 25 | ['dq', 'text'], 26 | ['default', 'string'] 27 | ]) 28 | 29 | # Escaped character in a string literal. Ignore the next character 30 | state(name = 'string_escape', 31 | external = 'string', 32 | transitions = [ 33 | ['default', 'string'] 34 | ]) 35 | 36 | -------------------------------------------------------------------------------- /src/tools/fsm_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (c) 2008, Google Inc. 4 | # All rights reserved. 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions are 8 | # met: 9 | # 10 | # * Redistributions of source code must retain the above copyright 11 | # notice, this list of conditions and the following disclaimer. 12 | # * Redistributions in binary form must reproduce the above 13 | # copyright notice, this list of conditions and the following disclaimer 14 | # in the documentation and/or other materials provided with the 15 | # distribution. 16 | # * Neither the name of Google Inc. nor the names of its 17 | # contributors may be used to endorse or promote products derived from 18 | # this software without specific prior written permission. 19 | # 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | 32 | # --- 33 | # Author: Filipe Almeida 34 | # 35 | # Create a state machine object based on a definition file. 36 | # 37 | 38 | __author__ = 'falmeida@google.com (Filipe Almeida)' 39 | 40 | class OrderedDict: 41 | """Ordered dictionary implementation.""" 42 | 43 | # Define the minimum functionality we need for our application. 44 | # Easiser would be to subclass from UserDict.DictMixin, and only 45 | # define __getitem__, __setitem__, __delitem__, and keys, but that's 46 | # not as portable. We don't need to define much more, so we just do. 47 | 48 | def __init__(self): 49 | self._dict = {} 50 | self._keys = [] 51 | 52 | def __getitem__(self, key): 53 | return self._dict[key] 54 | 55 | def __setitem__(self, key, value): 56 | if key not in self._keys: 57 | self._keys.append(key) 58 | self._dict[key] = value 59 | 60 | def __delitem__(self, key): 61 | self._keys.remove(key) 62 | del self._dict[key] 63 | 64 | def keys(self): 65 | return self._keys 66 | 67 | # Below are all we have to define in addition to what DictMixin would need 68 | def __len__(self): 69 | return len(self.keys()) 70 | 71 | def __contains__(self, key): 72 | return self.has_key(key) 73 | 74 | def __iter__(self): 75 | # It's not as portable -- though it would be more space-efficient -- to do 76 | # for k in self.keys(): yield k 77 | return iter(self.keys()) 78 | 79 | class State(object): 80 | """Contains information about a specific state.""" 81 | 82 | def __init__(self): 83 | pass 84 | 85 | name = None 86 | external_name = None 87 | transitions = [] 88 | 89 | 90 | class Transition(object): 91 | """Contains information about a specific transition.""" 92 | 93 | def __init__(self, condition, source, destination): 94 | self.condition = condition 95 | self.source = source 96 | self.destination = destination 97 | 98 | 99 | class FSMConfig(object): 100 | """Container for the statemachine definition.""" 101 | 102 | sm = {} # dictionary that contains the finite state machine definition 103 | # loaded from a config file. 104 | transitions = [] # List of transitions. 105 | conditions = {} # Mapping between the condition name and the bracket 106 | # expression. 107 | states = OrderedDict() # Ordered dictionary of states. 108 | name = None 109 | comment = None 110 | 111 | def AddState(self, **dic): 112 | """Called from the definition file with the description of the state. 113 | 114 | Receives a dictionary and populates internal structures based on it. The 115 | dictionary is in the following format: 116 | 117 | {'name': state_name, 118 | 'external': exposed state name, 119 | 'transitions': [ 120 | [condition, destination_state ], 121 | [condition, destination_state ] 122 | ] 123 | } 124 | 125 | """ 126 | 127 | state = State() 128 | state.name = dic['name'] 129 | state.external_name = dic['external'] 130 | 131 | state_transitions = [] 132 | 133 | for (condition, destination) in dic['transitions']: 134 | transition = Transition(condition, state.name, destination) 135 | state_transitions.append(transition) 136 | 137 | self.transitions.extend(state_transitions) 138 | state.transitions = state_transitions 139 | self.states[state.name] = state 140 | 141 | def AddCondition(self, name, expression): 142 | """Called from the definition file with the definition of a condition. 143 | 144 | Receives the name of the condition and it's expression. 145 | """ 146 | self.conditions[name] = expression 147 | 148 | def Load(self, filename): 149 | """Load the state machine definition file. 150 | 151 | In the definition file, which is based on the python syntax, the following 152 | variables and functions are defined. 153 | 154 | name: Name of the state machine 155 | comment: Comment line on the generated file. 156 | condition(): A mapping between condition names and bracket expressions. 157 | state(): Defines a state and it's transitions. It accepts the following 158 | attributes: 159 | name: name of the state 160 | external: exported name of the state. The exported name can be used 161 | multiple times in order to create a super state. 162 | transitions: List of pairs containing the condition for the transition 163 | and the destination state. Transitions are ordered so if 164 | a default rule is used, it must be the last one in the list. 165 | 166 | Example: 167 | 168 | name = 'c comment parser' 169 | 170 | condition('/', '/') 171 | condition('*', '*') 172 | condition('linefeed', '\\n') 173 | condition('default', '[:default:]') 174 | 175 | state(name = 'text', 176 | external = 'comment', 177 | transitions = [ 178 | [ '/', 'comment_start' ], 179 | [ 'default', 'text' ] 180 | ]) 181 | 182 | state(name = 'comment_start', 183 | external = 'comment', 184 | transitions = [ 185 | [ '/', 'comment_line' ], 186 | [ '*', 'comment_multiline' ], 187 | [ 'default', 'text' ] 188 | ]) 189 | 190 | state(name = 'comment_line', 191 | external = 'comment', 192 | transitions = [ 193 | [ 'linefeed', 'text' ], 194 | [ 'default', 'comment_line' ] 195 | ]) 196 | 197 | state(name = 'comment_multiline', 198 | external = 'comment', 199 | transitions = [ 200 | [ '*', 'comment_multiline_close' ], 201 | [ 'default', 'comment_multiline' ] 202 | ]) 203 | 204 | state(name = 'comment_multiline_close', 205 | external = 'comment', 206 | transitions = [ 207 | [ '/', 'text' ], 208 | [ 'default', 'comment_multiline' ] 209 | ]) 210 | 211 | """ 212 | 213 | self.sm['state'] = self.AddState 214 | self.sm['condition'] = self.AddCondition 215 | execfile(filename, self.sm) 216 | self.name = self.sm['name'] 217 | if not self.name.isalnum(): 218 | raise Exception("State machine name must consist of only alphanumeric" 219 | "characters.") 220 | self.comment = self.sm['comment'] 221 | 222 | def __init__(self): 223 | pass 224 | -------------------------------------------------------------------------------- /src/tools/generate_fsm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (c) 2008, Google Inc. 4 | # All rights reserved. 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions are 8 | # met: 9 | # 10 | # * Redistributions of source code must retain the above copyright 11 | # notice, this list of conditions and the following disclaimer. 12 | # * Redistributions in binary form must reproduce the above 13 | # copyright notice, this list of conditions and the following disclaimer 14 | # in the documentation and/or other materials provided with the 15 | # distribution. 16 | # * Neither the name of Google Inc. nor the names of its 17 | # contributors may be used to endorse or promote products derived from 18 | # this software without specific prior written permission. 19 | # 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | 32 | # --- 33 | # Author: Filipe Almeida 34 | # 35 | # Generate a C include file from a finite state machine definition. 36 | # 37 | # Right now the form is the one expected by htmlparser.cc so this file is pretty 38 | # tightly coupled with htmlparser.cc. 39 | 40 | __author__ = 'falmeida@google.com (Filipe Almeida)' 41 | 42 | import sys 43 | 44 | from fsm_config import FSMConfig 45 | 46 | 47 | class FSMGenerateAbstract(object): 48 | 49 | def __init__(self, config): 50 | self._config = config 51 | 52 | def Generate(self): 53 | """Returns the generated FSM description for the specified language. 54 | 55 | Raises a TypeError, because abstract methods can not be called. 56 | 57 | Raises: 58 | TypeError 59 | """ 60 | raise TypeError('Abstract method %s.%s called' % (self._class.__name__, 61 | self._function)) 62 | 63 | 64 | class FSMGenerateC(FSMGenerateAbstract): 65 | """Generate the C definition from a statemachien configuration object.""" 66 | 67 | TABSTOP_ = 2 68 | 69 | def _Prefix(self): 70 | """Return a c declaration prefix.""" 71 | 72 | return self._config.name.lower() + '_' 73 | 74 | def _StateInternalC(self, st): 75 | """Return the internal name of the state.""" 76 | 77 | return '%sSTATE_INT_%s' % (self._Prefix().upper(), st.upper()) 78 | 79 | def _StateExternalC(self, st): 80 | """Return the external name of the state.""" 81 | 82 | return '%sSTATE_%s' % (self._Prefix().upper(), st.upper()) 83 | 84 | def _MakeTuple(self, data): 85 | """Converts data to a string representation of a C tuple.""" 86 | 87 | return '{ %s }' % ', '.join(data) 88 | 89 | def _CreateHeader(self): 90 | """Print the include file header.""" 91 | 92 | out = [] 93 | 94 | if self._config.comment: 95 | out.append('/* ' + self._config.comment) 96 | else: 97 | out.append('/* State machine definition for ' + self._config.name) 98 | out.append(' * Auto generated by generate_fsm.py. Please do not edit.') 99 | out.append(' */') 100 | 101 | return '\n'.join(out) 102 | 103 | def _ListToIndentedString(self, list): 104 | indented_list = [' ' + e for e in list] 105 | return ',\n'.join(indented_list) 106 | 107 | def _CreateEnum(self, name, data): 108 | """Print a c enum definition.""" 109 | 110 | return 'enum %s {\n%s\n};\n' % (name, 111 | self._ListToIndentedString(data)) 112 | 113 | def _CreateStructList(self, name, type, data): 114 | """Print a c flat list. 115 | 116 | Generic function to print list in c in the form of a struct. 117 | 118 | Args: 119 | name: name of the structure. 120 | type: type of the struct. 121 | data: contents of the struct as a list of elements 122 | 123 | Returns: 124 | String with the generated list. 125 | """ 126 | 127 | return "static const %s %s[] = {\n%s\n};\n" % ( 128 | type, 129 | name, 130 | self._ListToIndentedString(data)) 131 | 132 | def _CreateStatesEnum(self): 133 | """Print the internal states enum. 134 | 135 | Prints an enum containing all the valid states. 136 | 137 | Returns: 138 | String containing a C enumeration of the states. 139 | """ 140 | list = [] # output list 141 | 142 | for state in self._config.states: 143 | list.append(self._StateInternalC(state)) 144 | return self._CreateEnum(self._Prefix() + 'state_internal_enum', list) 145 | 146 | def _CreateStatesExternal(self): 147 | """Print a struct with a mapping from internal to external states.""" 148 | list = [] # output list 149 | 150 | for state_name in self._config.states: 151 | list.append(self._StateExternalC( 152 | self._config.states[state_name].external_name)) 153 | 154 | return self._CreateStructList(self._Prefix() + 'states_external', 155 | 'int', 156 | list) 157 | 158 | def _CreateStatesInternalNames(self): 159 | """Return a struct mapping internal states to a strings.""" 160 | out = [] # output list 161 | 162 | for state_name in self._config.states: 163 | out.append('"' + state_name + '"') 164 | 165 | return self._CreateStructList(self._Prefix() + 'states_internal_names', 166 | 'char *', 167 | out) 168 | 169 | def _CreateNumStates(self): 170 | """Print a Macro defining the number of states.""" 171 | 172 | return "#define %s_NUM_STATES %s" % (self._config.name.upper(), 173 | str(len(self._config.states) + 1)) 174 | 175 | def _ExpandBracketExpression(self, expression): 176 | """Expand ranges in a regexp bracket expression. 177 | 178 | Returns a string with the ranges in a bracket expression expanded. 179 | 180 | The bracket expression is similar to grep(1) or regular expression bracket 181 | expressions but it does not support the negation (^) modifier or named 182 | character classes like [:alpha:] or [:alnum:]. 183 | 184 | The especial character class [:default:] will expand to all elements in the 185 | ascii range. 186 | 187 | For example, the expression 'a-c13A-D' will expand to 'abc13ABCD'. 188 | 189 | Args: 190 | expression: A regexp bracket expression. Ie: 'A-Z0-9'. 191 | 192 | Returns: 193 | A string with the ranges in the bracket expression expanded. 194 | """ 195 | 196 | def ExpandRange(start, end): 197 | """Return a sequence of characters between start and end. 198 | 199 | Args: 200 | start: first character of the sequence. 201 | end: last character of the sequence. 202 | 203 | Returns: 204 | string containing the sequence of characters between start and end. 205 | """ 206 | return [chr(c) for c in range(ord(start), ord(end) + 1)] 207 | 208 | def ListNext(input_list): 209 | """Pop the first element of a list. 210 | 211 | Args: 212 | input_list: python list object. 213 | 214 | Returns: 215 | First element of the list or None if the list is empty. 216 | """ 217 | if input_list: 218 | return input_list.pop(0) 219 | else: 220 | return None 221 | 222 | out = [] # List containing the output 223 | 224 | # Special case for the character class [:default:] 225 | if expression == '[:default:]': 226 | out = [chr(c) for c in range(0, 255)] 227 | return ''.join(out) 228 | 229 | chars = [c for c in expression] # list o characters in the expression. 230 | 231 | current = ListNext(chars) 232 | while current: 233 | next = ListNext(chars) 234 | if next == '-': 235 | next = ListNext(chars) 236 | if next: 237 | out.extend(ExpandRange(current, next)) 238 | else: 239 | out.append(current) 240 | out.append('-') 241 | current = ListNext(chars) 242 | else: 243 | out.append(current) 244 | current = next 245 | 246 | return ''.join(out) 247 | 248 | def _CreateTransitionTable(self): 249 | """Print the state transition list. 250 | 251 | Returns a set of C structures that define the transition table for the state 252 | machine. This structure is a list of lists of ints (int **). The outer list 253 | indexes the source state and the inner list contains the destination state 254 | for each of the possible input characters: 255 | 256 | const int * const* transitions[source][input] == destination. 257 | 258 | The conditions are mapped from the conditions variable. 259 | 260 | Returns: 261 | String containing the generated transition table in a C struct. 262 | """ 263 | out = [] # output list 264 | default_state = 'STATEMACHINE_ERROR' 265 | state_table = {} 266 | 267 | for state in self._config.states: 268 | state_table[state] = [default_state for col in xrange(255)] 269 | 270 | # We process the transition in reverse order while updating the table. 271 | for i_transition in range(len(self._config.transitions) - 1, -1, -1): 272 | transition = self._config.transitions[i_transition] 273 | (condition_name, src, dst) = (transition.condition, 274 | transition.source, 275 | transition.destination) 276 | condition = self._config.conditions[condition_name] 277 | char_list = self._ExpandBracketExpression(condition) 278 | 279 | for c in char_list: 280 | state_table[src][ord(c)] = self._StateInternalC(dst) 281 | 282 | # Create the inner lists which map input characters to destination states. 283 | for state in self._config.states: 284 | transition_row = [] 285 | for c in xrange(0, 255): 286 | transition_row.append(' /* %06s */ %s' % (repr(chr(c)), 287 | state_table[state][c])) 288 | 289 | out.append(self._CreateStructList('%stransition_row_%s' % 290 | (self._Prefix(), 291 | state), 292 | 'int', 293 | transition_row)) 294 | out.append('\n') 295 | 296 | # Create the outer list, which map source states to input characters. 297 | out.append('static const %s %s[] = {\n' % ('int *', self._Prefix() + 298 | 'state_transitions')) 299 | 300 | row_list = [' %stransition_row_%s' % 301 | (self._Prefix(), row) for row in self._config.states] 302 | out.append(',\n'.join(row_list)) 303 | out.append('\n};\n') 304 | 305 | return ''.join(out) 306 | 307 | def Generate(self): 308 | """Returns the generated the C include statements for the statemachine.""" 309 | 310 | print '\n'.join((self._CreateHeader(), 311 | self._CreateNumStates(), 312 | self._CreateStatesEnum(), 313 | self._CreateStatesExternal(), 314 | self._CreateStatesInternalNames(), 315 | self._CreateTransitionTable())) 316 | 317 | 318 | def main(): 319 | if len(sys.argv) != 2: 320 | print "usage: generate_fsm.py config_file" 321 | sys.exit(1) 322 | 323 | config = FSMConfig() 324 | config.Load(sys.argv[1]) 325 | 326 | gen = FSMGenerateC(config) 327 | gen.Generate() 328 | 329 | 330 | if __name__ == "__main__": 331 | main() 332 | --------------------------------------------------------------------------------