├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── experiments.py ├── grammars ├── antlr │ ├── CSV.g4 │ ├── JSON.g4 │ ├── Markdown.g4 │ └── url.g4 └── tribble │ ├── csv.scala │ ├── json.scala │ ├── markdown.scala │ └── url.scala ├── luigi.cfg └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | tools/grammarinator 2 | tools/luigi 3 | 4 | # Created by https://www.gitignore.io/api/vim,python,jetbrains+iml,jetbrains+all 5 | # Edit at https://www.gitignore.io/?templates=vim,python,jetbrains+iml,jetbrains+all 6 | 7 | ### JetBrains+all ### 8 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 9 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 10 | 11 | # User-specific stuff 12 | .idea/**/workspace.xml 13 | .idea/**/tasks.xml 14 | .idea/**/usage.statistics.xml 15 | .idea/**/dictionaries 16 | .idea/**/shelf 17 | 18 | # Generated files 19 | .idea/**/contentModel.xml 20 | 21 | # Sensitive or high-churn files 22 | .idea/**/dataSources/ 23 | .idea/**/dataSources.ids 24 | .idea/**/dataSources.local.xml 25 | .idea/**/sqlDataSources.xml 26 | .idea/**/dynamic.xml 27 | .idea/**/uiDesigner.xml 28 | .idea/**/dbnavigator.xml 29 | 30 | # Gradle 31 | .idea/**/gradle.xml 32 | .idea/**/libraries 33 | 34 | # Gradle and Maven with auto-import 35 | # When using Gradle or Maven with auto-import, you should exclude module files, 36 | # since they will be recreated, and may cause churn. Uncomment if using 37 | # auto-import. 38 | # .idea/modules.xml 39 | # .idea/*.iml 40 | # .idea/modules 41 | # *.iml 42 | # *.ipr 43 | 44 | # CMake 45 | cmake-build-*/ 46 | 47 | # Mongo Explorer plugin 48 | .idea/**/mongoSettings.xml 49 | 50 | # File-based project format 51 | *.iws 52 | 53 | # IntelliJ 54 | out/ 55 | 56 | # mpeltonen/sbt-idea plugin 57 | .idea_modules/ 58 | 59 | # JIRA plugin 60 | atlassian-ide-plugin.xml 61 | 62 | # Cursive Clojure plugin 63 | .idea/replstate.xml 64 | 65 | # Crashlytics plugin (for Android Studio and IntelliJ) 66 | com_crashlytics_export_strings.xml 67 | crashlytics.properties 68 | crashlytics-build.properties 69 | fabric.properties 70 | 71 | # Editor-based Rest Client 72 | .idea/httpRequests 73 | 74 | # Android studio 3.1+ serialized cache file 75 | .idea/caches/build_file_checksums.ser 76 | 77 | ### JetBrains+all Patch ### 78 | # Ignores the whole .idea folder and all .iml files 79 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 80 | 81 | .idea/ 82 | 83 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 84 | 85 | *.iml 86 | modules.xml 87 | .idea/misc.xml 88 | *.ipr 89 | 90 | # Sonarlint plugin 91 | .idea/sonarlint 92 | 93 | ### JetBrains+iml ### 94 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 95 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 96 | 97 | # User-specific stuff 98 | 99 | # Generated files 100 | 101 | # Sensitive or high-churn files 102 | 103 | # Gradle 104 | 105 | # Gradle and Maven with auto-import 106 | # When using Gradle or Maven with auto-import, you should exclude module files, 107 | # since they will be recreated, and may cause churn. Uncomment if using 108 | # auto-import. 109 | # .idea/modules.xml 110 | # .idea/*.iml 111 | # .idea/modules 112 | # *.iml 113 | # *.ipr 114 | 115 | # CMake 116 | 117 | # Mongo Explorer plugin 118 | 119 | # File-based project format 120 | 121 | # IntelliJ 122 | 123 | # mpeltonen/sbt-idea plugin 124 | 125 | # JIRA plugin 126 | 127 | # Cursive Clojure plugin 128 | 129 | # Crashlytics plugin (for Android Studio and IntelliJ) 130 | 131 | # Editor-based Rest Client 132 | 133 | # Android studio 3.1+ serialized cache file 134 | 135 | ### JetBrains+iml Patch ### 136 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 137 | 138 | 139 | ### Python ### 140 | # Byte-compiled / optimized / DLL files 141 | __pycache__/ 142 | *.py[cod] 143 | *$py.class 144 | 145 | # C extensions 146 | *.so 147 | 148 | # Distribution / packaging 149 | .Python 150 | build/ 151 | develop-eggs/ 152 | dist/ 153 | downloads/ 154 | eggs/ 155 | .eggs/ 156 | lib/ 157 | lib64/ 158 | parts/ 159 | sdist/ 160 | var/ 161 | wheels/ 162 | pip-wheel-metadata/ 163 | share/python-wheels/ 164 | *.egg-info/ 165 | .installed.cfg 166 | *.egg 167 | MANIFEST 168 | 169 | # PyInstaller 170 | # Usually these files are written by a python script from a template 171 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 172 | *.manifest 173 | *.spec 174 | 175 | # Installer logs 176 | pip-log.txt 177 | pip-delete-this-directory.txt 178 | 179 | # Unit test / coverage reports 180 | htmlcov/ 181 | .tox/ 182 | .nox/ 183 | .coverage 184 | .coverage.* 185 | .cache 186 | nosetests.xml 187 | coverage.xml 188 | *.cover 189 | .hypothesis/ 190 | .pytest_cache/ 191 | 192 | # Translations 193 | *.mo 194 | *.pot 195 | 196 | # Django stuff: 197 | *.log 198 | local_settings.py 199 | db.sqlite3 200 | db.sqlite3-journal 201 | 202 | # Flask stuff: 203 | instance/ 204 | .webassets-cache 205 | 206 | # Scrapy stuff: 207 | .scrapy 208 | 209 | # Sphinx documentation 210 | docs/_build/ 211 | 212 | # PyBuilder 213 | target/ 214 | 215 | # Jupyter Notebook 216 | .ipynb_checkpoints 217 | 218 | # IPython 219 | profile_default/ 220 | ipython_config.py 221 | 222 | # pyenv 223 | .python-version 224 | 225 | # pipenv 226 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 227 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 228 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 229 | # install all needed dependencies. 230 | #Pipfile.lock 231 | 232 | # celery beat schedule file 233 | celerybeat-schedule 234 | 235 | # SageMath parsed files 236 | *.sage.py 237 | 238 | # Environments 239 | .env 240 | .venv 241 | env/ 242 | venv/ 243 | ENV/ 244 | env.bak/ 245 | venv.bak/ 246 | 247 | # Spyder project settings 248 | .spyderproject 249 | .spyproject 250 | 251 | # Rope project settings 252 | .ropeproject 253 | 254 | # mkdocs documentation 255 | /site 256 | 257 | # mypy 258 | .mypy_cache/ 259 | .dmypy.json 260 | dmypy.json 261 | 262 | # Pyre type checker 263 | .pyre/ 264 | 265 | ### Vim ### 266 | # Swap 267 | [._]*.s[a-v][a-z] 268 | [._]*.sw[a-p] 269 | [._]s[a-rt-v][a-z] 270 | [._]ss[a-gi-z] 271 | [._]sw[a-p] 272 | 273 | # Session 274 | Session.vim 275 | Sessionx.vim 276 | 277 | # Temporary 278 | .netrwhist 279 | *~ 280 | # Auto-generated tag files 281 | tags 282 | # Persistent undo 283 | [._]*.un~ 284 | 285 | # End of https://www.gitignore.io/api/vim,python,jetbrains+iml,jetbrains+all 286 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tools/tribble"] 2 | path = tools/tribble 3 | url = https://github.com/havrikov/tribble.git 4 | [submodule "tools/subjects"] 5 | path = tools/subjects 6 | url = https://github.com/havrikov/text-processing-java-projects.git 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Nikolas Havrikov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # K-Path Coverage Evaluation 2 | 3 | This is a replication package for the paper "Systematically Covering Input Structure" published at ASE 2019 ([preprint](https://havrikov.github.io/publications/ase19-preprint.pdf)). 4 | 5 | You can download the entire dataset as reported in the paper from [Google Drive](https://drive.google.com/open?id=1S_F5EWB0B5v8cxkTsXArvG0ViPs7wryS) (574MB download, 16GB uncompressed). 6 | 7 | You can also run the experiments yourself: 8 | 9 | ## Prerequisites 10 | You need Python `>= 3.6` and Java `>= 1.8`. 11 | 12 | ## Building 13 | 14 | Clone this repo _with submodules_ (namely the input generator [tribble](https://github.com/havrikov/tribble) and the [subjects](https://github.com/havrikov/text-processing-java-projects)): 15 | 16 | ```bash 17 | git clone --recurse-submodules https://github.com/havrikov/covering-input-structure.git 18 | ``` 19 | 20 | The rest of these instructions assume you are in the cloned directory: 21 | 22 | ```bash 23 | cd covering-input-structure 24 | ``` 25 | 26 | Install python dependencies: 27 | 28 | ```bash 29 | pip3 install -r requirements.txt 30 | ``` 31 | 32 | ## Configuring 33 | 34 | Edit `luigi.cfg` to set the amount of RAM available and the experiment directory. 35 | 36 | ## Running 37 | 38 | Start the luigi daemon (installed as part of the prerequisites). 39 | 40 | ```bash 41 | luigid --background --pidfile tools/luigi/pid --logdir tools/luigi/logs --state-path tools/luigi/state 42 | ``` 43 | 44 | Run the experiments. (Substitute the number of CPUs below) 45 | 46 | ```bash 47 | python3 ./experiments.py --k-params "[1,2,3,5]" --runs 50 --workers 48 | ``` 49 | 50 | Navigate to http://localhost:8082 to monitor the progress. 51 | 52 | > **NOTE** The process will take _a long time_ for a high number of runs, so preferably launch it on some sort of compute server. 53 | 54 | ## Inspecting Results 55 | 56 | After the experiment pipeline has finished, the results will be available as `report-50-runs.html` in your experiment directory. 57 | -------------------------------------------------------------------------------- /experiments.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import abc 4 | import csv 5 | import json 6 | import logging 7 | import os 8 | import re 9 | import subprocess 10 | import sys 11 | from collections import defaultdict, Counter 12 | from functools import reduce 13 | from pathlib import Path 14 | from typing import List, Dict 15 | 16 | import luigi 17 | import nbformat as nbf 18 | import pandas as pd 19 | from nbconvert import HTMLExporter 20 | from nbconvert.preprocessors import ExecutePreprocessor 21 | 22 | 23 | class ExperimentConfig(luigi.Config): 24 | experiment_dir: str = luigi.Parameter(description="The path to where all the experiments happen. (like ../k-path-experiments)") 25 | tree_depth: int = luigi.IntParameter(description="The maximum tree generation depth") 26 | drivers: Dict[str, Dict] = luigi.DictParameter(description="Test subject configuration", default={ 27 | "json": { 28 | "suffix": ".json", 29 | "tribble_grammar": "grammars/tribble/json.scala", 30 | "antlr_grammar": "grammars/antlr/JSON.g4", 31 | "drivers": { 32 | "argo": "argo-5.4", 33 | "fastjson": "fastjson-1.2.51", 34 | "genson": "genson-1.4", 35 | "gson": "gson-2.8.5", 36 | "jackson-databind": "jackson-databind-2.9.8", 37 | "json-flattener": "json-flattener-0.6.0", 38 | "json-java": "json-20180813", 39 | "json-simple": "json-simple-1.1.1", 40 | "json-simple-cliftonlabs": "json-simple-3.0.2", 41 | "minimal-json": "minimal-json-0.9.5", 42 | "pojo": "jsonschema2pojo-core-1.0.0", 43 | }, 44 | }, 45 | "csv": { 46 | "suffix": ".csv", 47 | "tribble_grammar": "grammars/tribble/csv.scala", 48 | "antlr_grammar": "grammars/antlr/CSV.g4", 49 | "drivers": { 50 | "commons-csv": "commons-csv-1.6", 51 | "jackson-dataformat-csv": "jackson-dataformat-csv-2.9.8", 52 | "jcsv": "jcsv-1.4.0", 53 | "sfm-csv": "sfm-csv-6.1.1", 54 | "simplecsv": "simplecsv-2.1", 55 | "super-csv": "super-csv-2.4.0", 56 | } 57 | }, 58 | "url": { 59 | "suffix": ".txt", 60 | "tribble_grammar": "grammars/tribble/url.scala", 61 | "antlr_grammar": "grammars/antlr/url.g4", 62 | "drivers": { 63 | "autolink": "autolink-0.9.0", 64 | "galimatias": "galimatias-0.2.1", 65 | "jurl": "jurl-v0.3.0", 66 | "url-detector": "url-detector-0.1.17", 67 | } 68 | }, 69 | "markdown": { 70 | "suffix": ".md", 71 | "tribble_grammar": "grammars/tribble/markdown.scala", 72 | "antlr_grammar": "grammars/antlr/Markdown.g4", 73 | "drivers": { 74 | "commonmark": "commonmark-0.11.0", 75 | "markdown4j": "markdown4j-2.2-cj-1.1", 76 | "txtmark": "txtmark-0.13", 77 | } 78 | } 79 | }) 80 | 81 | 82 | def root_dir() -> Path: 83 | return Path(ExperimentConfig().experiment_dir) 84 | 85 | 86 | def depth() -> str: 87 | return str(ExperimentConfig().tree_depth) 88 | 89 | 90 | class BuildGrammarinatorProducer(luigi.Task): 91 | format: str = luigi.Parameter(description="The name of the format directory (e.g. json)", positional=False) 92 | task_namespace = "prerequisites" 93 | 94 | def output(self): 95 | return luigi.LocalTarget(str(Path("tools") / "grammarinator" / self.format)) 96 | 97 | def run(self): 98 | grammar = ExperimentConfig().drivers[self.format]["antlr_grammar"] 99 | with self.output().temporary_path() as out: 100 | subprocess.run(["grammarinator-process", "--no-actions", grammar, "-o", out], check=True) 101 | 102 | 103 | class BuildTribble(luigi.Task): 104 | task_namespace = "prerequisites" 105 | 106 | def output(self): 107 | return luigi.LocalTarget(str(Path("tools") / "tribble" / "build" / "libs" / "tribble-0.1.jar")) 108 | 109 | def run(self): 110 | subprocess.run(["./gradlew", "assemble"], check=True, cwd=Path("tools") / "tribble") 111 | 112 | 113 | class BuildSubject(luigi.Task): 114 | subject_name: str = luigi.Parameter(description="The name of the subject to build") 115 | task_namespace = "prerequisites" 116 | 117 | def output(self): 118 | return luigi.LocalTarget(str(Path("tools") / "subjects" / self.subject_name / "build" / "libs" / f"{self.subject_name}-subject.jar")) 119 | 120 | def run(self): 121 | subprocess.run(["../gradlew", "build"], check=True, cwd=Path("tools") / "subjects" / self.subject_name) 122 | 123 | 124 | class DownloadOriginalBytecode(luigi.Task): 125 | subject_name: str = luigi.Parameter(description="The name of the subject to build") 126 | subject_original_artifact: str = luigi.Parameter(description="The name of the original bytecode artifact of the subject") 127 | task_namespace = "prerequisites" 128 | 129 | def output(self): 130 | return luigi.LocalTarget( 131 | str(Path("tools") / "subjects" / self.subject_name / "build" / "original" / f"{self.subject_original_artifact}.jar")) 132 | 133 | def run(self): 134 | subprocess.run(["../gradlew", "downloadOriginalJar"], check=True, cwd=Path("tools") / "subjects" / self.subject_name) 135 | 136 | 137 | class KPath(object): 138 | k: int = luigi.IntParameter(description="The k for the k-path coverage") 139 | 140 | def tool(self) -> str: 141 | return f"{self.k}-path" 142 | 143 | 144 | class Grammarinator(KPath): 145 | def tool(self) -> str: 146 | return f"{self.k}-grammarinator" 147 | 148 | 149 | class GenerateKPath(KPath, luigi.Task): 150 | format: str = luigi.Parameter(description="The name of the format directory (e.g. json)", positional=False) 151 | run_num: int = luigi.IntParameter(description="The number of this run", positional=False) 152 | task_namespace = 'generation' 153 | resources = {'ram': 4} 154 | 155 | def requires(self): 156 | return BuildTribble() 157 | 158 | def output(self): 159 | return luigi.LocalTarget(str(root_dir() / "generated-inputs" / self.format / self.tool() / f"run{self.run_num}")) 160 | 161 | def run(self): 162 | driver_info = ExperimentConfig().drivers[self.format] 163 | with self.output().temporary_path() as out: 164 | args = ["java", 165 | "-Xss100m", 166 | "-Xms256m", 167 | f'-Xmx{self.resources["ram"]}g', 168 | "-server", 169 | "-XX:ParallelGCThreads=2", 170 | "-XX:CICompilerCount=2", 171 | "-jar", 172 | self.input().path, 173 | f'--automaton-dir={str(root_dir() / "automaton-cache" / f"{self.k}" / f"run{self.run_num}")}', 174 | "generate", 175 | f'--suffix={driver_info["suffix"]}', 176 | f"--out-dir={out}", 177 | f'--grammar-file={driver_info["tribble_grammar"]}', 178 | f"--mode={self.k}-path-{depth()}", 179 | f"--heuristic={self.k}-path-coverage", 180 | "--unfold-regexes", 181 | "--merge-literals" 182 | ] 183 | logging.info('Launching %s', ' '.join(args)) 184 | subprocess.run(args, check=True, stdout=subprocess.DEVNULL) 185 | 186 | 187 | class RunGrammarinator(Grammarinator, luigi.Task): 188 | format: str = luigi.Parameter(description="The name of the format directory (e.g. json)", positional=False) 189 | run_num: int = luigi.IntParameter(description="The number of this run", positional=False) 190 | task_namespace = 'generation' 191 | resources = {'ram': 4} 192 | 193 | def requires(self): 194 | return {"fairness": FairnessParameters(format=self.format, k=self.k, run_num=self.run_num), 195 | "producer": BuildGrammarinatorProducer(format=self.format)} 196 | 197 | def output(self): 198 | return luigi.LocalTarget(str(root_dir() / "generated-inputs" / self.format / self.tool() / f"run{self.run_num}")) 199 | 200 | def run(self): 201 | driver_info = ExperimentConfig().drivers[self.format] 202 | with self.input()["fairness"].open("r") as f: 203 | row = next(csv.DictReader(f)) 204 | num_files = row["num_files"] 205 | producer_dir = Path(self.input()["producer"].path) 206 | grammar_name = Path(driver_info["antlr_grammar"]).stem 207 | with self.output().temporary_path() as out: 208 | args = ["grammarinator-generate", 209 | "-l", str(producer_dir / f"{grammar_name}Unlexer.py"), 210 | "-p", str(producer_dir / f"{grammar_name}Unparser.py"), 211 | "-n", num_files, 212 | "-o", str(Path(out) / f"file%d{driver_info['suffix']}"), 213 | "-d", depth(), 214 | "-c", "0.9", 215 | "-t", "grammarinator.runtime.simple_space_transformer" 216 | ] 217 | logging.debug("Launching %s", " ".join(args)) 218 | subprocess.run(args, check=True, stdout=subprocess.DEVNULL) 219 | 220 | 221 | class FairnessParameters(luigi.Task): 222 | format: str = luigi.Parameter(description="The name of the format directory (e.g. json)", positional=False) 223 | run_num: int = luigi.IntParameter(description="The number of this run", positional=False) 224 | k: int = luigi.IntParameter(description="The k for the corresponding k-path algorithm", positional=False) 225 | task_namespace = "meta-data" 226 | regex = re.compile("_(?P\\d+)_(?P\\d+)_") 227 | 228 | def requires(self): 229 | return GenerateKPath(format=self.format, run_num=self.run_num, k=self.k) 230 | 231 | def output(self): 232 | return luigi.LocalTarget(str(root_dir() / "input-metadata" / self.format / f"k{self.k}" / f"run{self.run_num}.csv")) 233 | 234 | def run(self): 235 | driver_info = ExperimentConfig().drivers[self.format] 236 | min_size = sys.maxsize 237 | agr_size = 0 238 | max_size = 0 239 | max_depth = 0 240 | num_files = 0 241 | for run_dir in self.deps(): 242 | file_names = [f for f in os.listdir(run_dir.output().path) if f.startswith("file") and f.endswith(driver_info["suffix"])] 243 | num_files += len(file_names) 244 | for file_name in file_names: 245 | match = re.search(self.regex, file_name) 246 | if match: 247 | size = int(match.group("size")) 248 | agr_size += size 249 | if size < min_size: 250 | min_size = size 251 | if size > max_size: 252 | max_size = size 253 | depth = int(match.group("depth")) 254 | if max_depth < depth: 255 | max_depth = depth 256 | with self.output().open('w') as f: 257 | f.write(f'num_files,min_size,avg_size,max_size,max_depth,grammar\n{num_files},{min_size},{agr_size // num_files},{max_size},{max_depth},{driver_info["tribble_grammar"]}') 258 | 259 | 260 | class RunDriver(luigi.Task): 261 | __metaclass__ = abc.ABCMeta 262 | format: str = luigi.Parameter(description="The name of the format directory (e.g. json)", positional=False) 263 | driver_name: str = luigi.Parameter(description="The driver name", positional=False) 264 | run_num: int = luigi.IntParameter(description="The number of this run", positional=False) 265 | task_namespace = 'evaluation' 266 | resources = {'ram': 1} 267 | 268 | @abc.abstractmethod 269 | def tool(self) -> str: 270 | return "" 271 | 272 | def output(self): 273 | return luigi.LocalTarget(str(root_dir() / "evaluation" / self.format / self.driver_name / "runs" / self.tool() / f"run{self.run_num}.csv")) 274 | 275 | def run(self): 276 | with self.output().temporary_path() as res: 277 | args = ["java", 278 | "-Xss10m", 279 | "-Xms256m", 280 | f'-Xmx{self.resources["ram"]}g', 281 | "-server", 282 | "-XX:ParallelGCThreads=2", 283 | "-XX:CICompilerCount=2", 284 | "-jar", 285 | self.input()["subject"].path, 286 | "--ignore-exceptions", 287 | "--log-exceptions", 288 | str(Path(res).with_suffix(".exceptions.json")), 289 | "--report-coverage", 290 | res, 291 | "--original-bytecode", 292 | self.input()["original"].path, 293 | self.input()["inputs"].path 294 | ] 295 | logging.info('Launching %s', ' '.join(args)) 296 | subprocess.run(args, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) 297 | 298 | 299 | class ExecuteKRun(KPath, RunDriver): 300 | def requires(self): 301 | original_name = ExperimentConfig().drivers[self.format]["drivers"][self.driver_name] 302 | return {"subject": BuildSubject(self.driver_name), 303 | "original": DownloadOriginalBytecode(self.driver_name, original_name), 304 | "inputs": GenerateKPath(format=self.format, run_num=self.run_num, k=self.k)} 305 | 306 | 307 | class ExecuteGrammarinatorRun(Grammarinator, RunDriver): 308 | def requires(self): 309 | original_name = ExperimentConfig().drivers[self.format]["drivers"][self.driver_name] 310 | return {"subject": BuildSubject(self.driver_name), 311 | "original": DownloadOriginalBytecode(self.driver_name, original_name), 312 | "inputs": RunGrammarinator(format=self.format, run_num=self.run_num, k=self.k)} 313 | 314 | 315 | class MergeExceptions(luigi.Task): 316 | __metaclass__ = abc.ABCMeta 317 | driver_name: str = luigi.Parameter(description="The driver name", positional=False) 318 | format: str = luigi.Parameter(description="The name of the format directory (e.g. json)", positional=False) 319 | runs: int = luigi.IntParameter(description="Number of runs to generate", positional=False) 320 | task_namespace = "postprocessing" 321 | 322 | @abc.abstractmethod 323 | def tool(self) -> str: 324 | return "" 325 | 326 | def output(self): 327 | return luigi.LocalTarget( 328 | str(root_dir() / "exceptions" / self.format / self.tool() / self.driver_name / f"exceptions-of-{self.runs}-runs.csv")) 329 | 330 | def run(self): 331 | all_exceptions = defaultdict(Counter) 332 | for f in self.input(): 333 | run_exceptions = set() 334 | try: 335 | with open(str(Path(f.path).with_suffix(".exceptions.json"))) as path: 336 | exc_run = json.load(path) 337 | for exc in exc_run: 338 | run_exceptions.add((exc["name"], exc["location"])) 339 | except FileNotFoundError: 340 | pass # no exceptions were reported for this run 341 | for n, loc in run_exceptions: 342 | all_exceptions[n][loc] += 1 343 | with self.output().open("w") as out: 344 | w = csv.writer(out) 345 | w.writerow(["exception", "location", f"{self.tool()} detection rate"]) 346 | for exc, run_exceptions in all_exceptions.items(): 347 | for loc, count in run_exceptions.items(): 348 | w.writerow((exc, loc, count / self.runs)) 349 | 350 | 351 | class MergeKPathExceptions(KPath, MergeExceptions): 352 | def requires(self): 353 | return [ExecuteKRun(format=self.format, run_num=run, driver_name=self.driver_name, k=self.k) for run in range(self.runs)] 354 | 355 | 356 | class MergeGrammarinatorExceptions(Grammarinator, MergeExceptions): 357 | def requires(self): 358 | return [ExecuteGrammarinatorRun(format=self.format, run_num=run, driver_name=self.driver_name, k=self.k) for run in range(self.runs)] 359 | 360 | 361 | class CombineExceptionResults(luigi.Task): 362 | format: str = luigi.Parameter(description="The name of the format directory (e.g. json)", positional=False) 363 | runs: int = luigi.IntParameter(description="Number of runs to generate", positional=False) 364 | k_params: List[int] = luigi.ListParameter(description="The k values for the k-path algorithm", positional=False) 365 | task_namespace = "postprocessing" 366 | 367 | def output(self): 368 | return luigi.LocalTarget(str(root_dir() / "postprocessing" / "exceptions" / self.format / f"exceptions-over-{self.runs}-runs.csv")) 369 | 370 | def requires(self): 371 | drivers = ExperimentConfig().drivers[self.format]["drivers"] 372 | return { 373 | **{f"{k}-path": {name: MergeKPathExceptions(format=self.format, runs=self.runs, driver_name=name, k=k) for name in drivers.keys()} for k in self.k_params}, 374 | **{f"{k}-grammarinator": {name: MergeGrammarinatorExceptions(format=self.format, runs=self.runs, driver_name=name, k=k) for name in drivers.keys()} for k in self.k_params}, 375 | } 376 | 377 | def run(self): 378 | df = pd.DataFrame() 379 | for driver in ExperimentConfig().drivers[self.format]["drivers"].keys(): 380 | dfs = [pd.read_csv(self.input()[f"{k}-path"][driver].path) for k in self.k_params] 381 | dfs.extend([pd.read_csv(self.input()[f"{k}-grammarinator"][driver].path) for k in self.k_params]) 382 | sf = reduce(lambda left, right: pd.merge(left, right, on=["exception", "location"], how="outer"), dfs).fillna(0) 383 | 384 | sf.insert(0, "subject", driver) 385 | sf.set_index(["subject", "exception", "location"], inplace=True) 386 | 387 | df = pd.concat((df, sf)) 388 | with self.output().temporary_path() as res: 389 | df.to_csv(res, encoding='utf-8') 390 | 391 | 392 | class ConcatenateRuns(luigi.Task): 393 | __metaclass__ = abc.ABCMeta 394 | runs: int = luigi.IntParameter(description="Number of runs to generate", positional=False) 395 | format: str = luigi.Parameter(description="The name of the format directory (e.g. json)", positional=False) 396 | driver_name: str = luigi.Parameter(description="The driver name", positional=False) 397 | task_namespace = "postprocessing" 398 | 399 | @abc.abstractmethod 400 | def tool(self) -> str: 401 | return "" 402 | 403 | @abc.abstractmethod 404 | def requirement(self, number) -> luigi.Task: 405 | return 406 | 407 | def requires(self): 408 | return [self.requirement(run) for run in range(self.runs)] 409 | 410 | def output(self): 411 | return luigi.LocalTarget(str(root_dir() / "postprocessing" / "concatenated" / self.format / self.driver_name / f"concat-of-{self.runs}-{self.tool()}-runs.csv")) 412 | 413 | def run(self): 414 | dfs = [] 415 | for run in range(self.runs): 416 | df = pd.read_csv(self.input()[run].path) 417 | df["run"] = run 418 | dfs.append(df) 419 | dd = pd.concat(dfs) 420 | dd["tool"] = self.tool() 421 | dd["format"] = self.format 422 | dd["subject"] = self.driver_name 423 | with self.output().temporary_path() as res: 424 | dd.to_csv(res, encoding='utf-8', index=False) 425 | 426 | 427 | class ConcatenateKRuns(KPath, ConcatenateRuns): 428 | def requirement(self, number): 429 | return ExecuteKRun(format=self.format, run_num=number, driver_name=self.driver_name, k=self.k) 430 | 431 | 432 | class ConcatenateGrammarinatorStats(Grammarinator, ConcatenateRuns): 433 | def requirement(self, number) -> luigi.Task: 434 | return ExecuteGrammarinatorRun(format=self.format, run_num=number, driver_name=self.driver_name, k=self.k) 435 | 436 | 437 | class ConcatDrivers(luigi.Task): 438 | runs: int = luigi.IntParameter(description="Number of runs to generate", positional=False) 439 | format: str = luigi.Parameter(description="The name of the format directory (e.g. json)", positional=False) 440 | k_params: List[int] = luigi.ListParameter(description="The k values for the k-path algorithm", positional=False) 441 | task_namespace = "orchestration" 442 | 443 | def requires(self): 444 | format_info = ExperimentConfig().drivers[self.format] 445 | drivers = format_info["drivers"].keys() 446 | return { 447 | **{f"{k}-path-detailed": {name: ConcatenateKRuns(format=self.format, k=k, runs=self.runs, driver_name=name) for name in drivers} for k in self.k_params}, 448 | **{f"grammarinator-{k}-detailed": {name: ConcatenateGrammarinatorStats(format=self.format, k=k, runs=self.runs, driver_name=name) for name in drivers} for k in self.k_params}, 449 | } 450 | 451 | def output(self): 452 | return luigi.LocalTarget(str(root_dir() / "postprocessing" / "concatenated" / self.format / f"concat-of-{self.runs}-{self.format}-runs.csv")) 453 | 454 | def run(self): 455 | dd = pd.concat((pd.read_csv(inp.output().path) for inp in self.deps())) 456 | with self.output().temporary_path() as res: 457 | dd.to_csv(res, encoding='utf-8', index=False) 458 | 459 | 460 | class RunAllDrivers(luigi.WrapperTask): 461 | runs: int = luigi.IntParameter(description="Number of random runs to generate", positional=False) 462 | k_params: List[int] = luigi.ListParameter(description="The k values for the k-path algorithm", positional=False) 463 | task_namespace = "orchestration" 464 | 465 | def requires(self): 466 | yield [ConcatDrivers(runs=self.runs, k_params=self.k_params, format=fmt) for fmt in ExperimentConfig().drivers.keys()] 467 | yield [CombineExceptionResults(runs=self.runs, k_params=self.k_params, format=fmt) for fmt in ExperimentConfig().drivers.keys()] 468 | 469 | 470 | class GenerateAndRunNotebook(luigi.Task): 471 | runs: int = luigi.IntParameter(description="Number of random runs to generate", positional=False) 472 | suffix = luigi.Parameter(description="The suffix to append to the notebook name", positional=False) 473 | k_params: List[int] = luigi.ListParameter(description="The k values for the k-path algorithm", positional=False) 474 | task_namespace = "presentation" 475 | 476 | def requires(self): 477 | return RunAllDrivers(runs=self.runs, k_params=self.k_params) 478 | 479 | def output(self): 480 | suffix = f"-{self.suffix}" if self.suffix else "" 481 | return luigi.LocalTarget(str(root_dir() / f"report-{self.runs}-runs{suffix}.ipynb")) 482 | 483 | def run(self): 484 | nb = nbf.v4.new_notebook() 485 | cells = [] 486 | cells.append(nbf.v4.new_markdown_cell("# Imports")) 487 | cells.append(nbf.v4.new_code_cell("""\ 488 | import pandas as pd 489 | pd.set_option("display.max_colwidth", -1) 490 | 491 | import plotly.offline as py 492 | import plotly.graph_objs as go 493 | py.init_notebook_mode(connected=True) 494 | import cufflinks 495 | cufflinks.go_offline() 496 | 497 | import matplotlib.pyplot as plt 498 | %matplotlib inline 499 | 500 | import seaborn as sns 501 | sns.set(font_scale=1.75) 502 | color_map = sns.light_palette("green", as_cmap=True)""")) 503 | cells.append(nbf.v4.new_markdown_cell(f"""\ 504 | # Configuration 505 | - experiment repetitions: `{self.runs}` 506 | """)) 507 | cells.append(nbf.v4.new_markdown_cell("# Utility Functions")) 508 | cells.append(nbf.v4.new_code_cell("""\ 509 | def load_fmt_data(fmt: str, runs: int): 510 | return pd.read_csv(f"postprocessing/concatenated/{fmt}/concat-of-{runs}-{fmt}-runs.csv", index_col=False)""")) 511 | cells.append(nbf.v4.new_code_cell("""\ 512 | def show_exceptions(fmt: str, runs: int): 513 | df = pd.read_csv(f"postprocessing/exceptions/{fmt}/exceptions-over-{runs}-runs.csv", index_col=["subject", "exception", "location"]).sort_index() 514 | return df.style.background_gradient(cmap=color_map, low=0.0, high=1.0).format("{:,.0%}")""")) 515 | cells.append(nbf.v4.new_code_cell("""\ 516 | def mean_coverage_comparison(fmt: str, runs: int): 517 | d = load_fmt_data(fmt, runs) 518 | # select for each run the row with the greatest file number 519 | grp = d.groupby(["format", "subject", "tool", "run"]).agg({"filenum": "max"}) 520 | grp.reset_index(inplace=True) 521 | d = pd.merge(d, grp) 522 | d = d[["branch", "tool", "subject"]] 523 | d = d.groupby(["subject", "tool"]) 524 | d = d.mean() 525 | d = d.reset_index() 526 | d = d.pivot(index="subject", columns="tool", values="branch") 527 | d.iplot(kind="bar", title=f"Format {fmt}", xTitle="Subject", yTitle="Mean Branch Coverage")""")) 528 | cells.append(nbf.v4.new_code_cell("""\ 529 | def coverage_progress_comparison(fmt: str, subject: str, runs: int): 530 | df = load_fmt_data(fmt, runs) 531 | df = df[df.subject==subject] 532 | plt.figure(figsize=(20, 8)) 533 | ax = sns.lineplot(data=df, x="filenum", y="branch", hue="tool", style="tool", dashes=False)#, units="run", estimator=None) 534 | ax.set_title(f"Subject {subject}") 535 | ax.set_xlabel("Number of Files") 536 | ax.set_ylabel("Branch Coverage")""")) 537 | cells.append(nbf.v4.new_code_cell("""\ 538 | def plot_coverage_dispersion(fmt: str, runs: int): 539 | df = load_fmt_data(fmt, runs) 540 | # select for each run the row with the greatest file number 541 | grp = df.groupby(["format", "subject", "tool", "run"]).agg({"filenum": "max"}) 542 | grp.reset_index(inplace=True) 543 | df = pd.merge(df, grp) 544 | df = df[["subject", "tool", "branch", "run"]] 545 | for sub in df.subject.unique(): 546 | df[df.subject==sub][["tool", "branch", "run"]]\\ 547 | .pivot(index="run", columns="tool", values="branch")\\ 548 | .iplot(kind="box", title=f"Subject {sub}", xTitle="Tool", yTitle="Branch Coverage")""")) 549 | cells.append(nbf.v4.new_markdown_cell("# Evaluation")) 550 | 551 | for fmt, subject in ExperimentConfig().drivers.items(): 552 | cells.append(nbf.v4.new_markdown_cell(f"## Format {fmt} ({len(subject['drivers'])} subjects)")) 553 | cells.append(nbf.v4.new_code_cell(f"show_exceptions({repr(fmt)}, runs={self.runs})")) 554 | cells.append(nbf.v4.new_code_cell(f"mean_coverage_comparison({repr(fmt)}, runs={self.runs})")) 555 | cells.append(nbf.v4.new_code_cell("\n".join([f"coverage_progress_comparison({repr(fmt)}, {repr(driver)}, runs={self.runs})" for driver in subject["drivers"]]))) 556 | cells.append(nbf.v4.new_code_cell(f"plot_coverage_dispersion({repr(fmt)}, runs={self.runs})")) 557 | nb['cells'] = cells 558 | 559 | ep = ExecutePreprocessor(kernel_name="python3", timeout=None, allow_errors=True, interrupt_on_timeout=True) 560 | ep.preprocess(nb, {'metadata': {'path': str(root_dir())}}) 561 | 562 | with self.output().open('w') as f: 563 | nbf.write(nb, f) 564 | 565 | 566 | class RenderNotebook(luigi.Task): 567 | runs = luigi.IntParameter(description="Number of random runs to generate", positional=False) 568 | k_params: List[int] = luigi.ListParameter(description="The k values for the k-path algorithm", positional=False) 569 | suffix = luigi.Parameter(description="The suffix to append to the notebook name", default="", positional=False) 570 | task_namespace = 'presentation' 571 | 572 | def requires(self): 573 | return GenerateAndRunNotebook(k_params=self.k_params, runs=self.runs, suffix=self.suffix) 574 | 575 | def output(self): 576 | return luigi.LocalTarget(str(Path(self.input().path).with_suffix(".html"))) 577 | 578 | def run(self): 579 | html_exporter = HTMLExporter() 580 | html, _ = html_exporter.from_filename(self.input().path) 581 | with self.output().open('w') as f: 582 | f.write(html) 583 | 584 | 585 | if __name__ == '__main__': 586 | logging.basicConfig(format="%(asctime)s %(levelname)s %(message)s", datefmt="%d.%m.%Y %H:%M:%S", level=logging.INFO, stream=sys.stdout) 587 | luigi.run(main_task_cls=RenderNotebook, local_scheduler=False) 588 | -------------------------------------------------------------------------------- /grammars/antlr/CSV.g4: -------------------------------------------------------------------------------- 1 | /* 2 | [The "BSD licence"] 3 | Copyright (c) 2013 Terence Parr 4 | All rights reserved. 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 1. Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 3. The name of the author may not be used to endorse or promote products 14 | derived from this software without specific prior written permission. 15 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | grammar CSV; 28 | 29 | csvFile: hdr row+ ; 30 | hdr : row ; 31 | 32 | row : field (',' field)* '\r'? '\n' ; 33 | 34 | field 35 | : TEXT 36 | | STRING 37 | | 38 | ; 39 | 40 | TEXT : ~[,\n\r"]+ ; 41 | STRING : '"' ('""'|~'"')* '"' ; // quote-quote is an escaped quote 42 | -------------------------------------------------------------------------------- /grammars/antlr/JSON.g4: -------------------------------------------------------------------------------- 1 | /** Taken from "The Definitive ANTLR 4 Reference" by Terence Parr */ 2 | 3 | // Derived from http://json.org 4 | grammar JSON; 5 | 6 | json 7 | : value 8 | ; 9 | 10 | obj 11 | : '{' pair (',' pair)* '}' 12 | | '{' '}' 13 | ; 14 | 15 | pair 16 | : STRING ':' value 17 | ; 18 | 19 | array 20 | : '[' value (',' value)* ']' 21 | | '[' ']' 22 | ; 23 | 24 | value 25 | : STRING 26 | | NUMBER 27 | | obj 28 | | array 29 | | 'true' 30 | | 'false' 31 | | 'null' 32 | ; 33 | 34 | 35 | STRING 36 | : '"' (ESC | SAFECODEPOINT)* '"' 37 | ; 38 | 39 | 40 | fragment ESC 41 | : '\\' (["\\/bfnrt] | UNICODE) 42 | ; 43 | fragment UNICODE 44 | : 'u' HEX HEX HEX HEX 45 | ; 46 | fragment HEX 47 | : [0-9a-fA-F] 48 | ; 49 | fragment SAFECODEPOINT 50 | : ~ ["\\\u0000-\u001F] 51 | ; 52 | 53 | 54 | NUMBER 55 | : '-'? INT ('.' [0-9] +)? EXP? 56 | ; 57 | 58 | 59 | fragment INT 60 | : '0' | [1-9] [0-9]* 61 | ; 62 | 63 | // no leading zeros 64 | 65 | fragment EXP 66 | : [Ee] [+\-]? INT 67 | ; 68 | 69 | // \- since - means "range" inside [...] 70 | 71 | WS 72 | : [ \t\n\r] + -> skip 73 | ; 74 | -------------------------------------------------------------------------------- /grammars/antlr/Markdown.g4: -------------------------------------------------------------------------------- 1 | // Translated from https://github.com/jgm/peg-markdown/blob/master/markdown_parser.leg 2 | 3 | grammar Markdown; 4 | 5 | 6 | doc : bOM? block*; 7 | 8 | block : blankLine* 9 | (blockQuote 10 | | verbatim 11 | | note 12 | | reference 13 | | horizontalRule 14 | | heading 15 | | orderedList 16 | | bulletList 17 | | htmlBlock 18 | | styleBlock 19 | | para 20 | | plain) 21 | 22 | ; 23 | 24 | para : nonindentSpace inlines blankLine+ 25 | 26 | 27 | ; 28 | plain : inlines 29 | 30 | ; 31 | atxInline : inline 32 | ; 33 | 34 | atxHeading : ('######' | '#####' | '####' | '###' | '##' | '#') sp atxInline+ (sp '#'+ sp)? Newline 35 | 36 | ; 37 | setextHeading : setextHeading1 | setextHeading2 38 | 39 | ; 40 | SetextBottom1 : '='+ Newline 41 | 42 | ; 43 | SetextBottom2 : '-'+ Newline 44 | 45 | ; 46 | setextHeading1 : inline+ sp Newline SetextBottom1 47 | 48 | ; 49 | setextHeading2 : inline+ sp Newline SetextBottom2 50 | 51 | ; 52 | heading : setextHeading | atxHeading 53 | 54 | ; 55 | blockQuote : blockQuoteRaw 56 | 57 | 58 | ; 59 | blockQuoteRaw : ('>' ' '? line line* blankLine*)+ 60 | 61 | 62 | ; 63 | nonblankIndentedLine : indentedLine 64 | 65 | ; 66 | verbatimChunk : blankLine* nonblankIndentedLine+ 67 | 68 | 69 | ; 70 | verbatim : verbatimChunk+ 71 | 72 | 73 | ; 74 | horizontalRule : nonindentSpace 75 | ('*' sp '*' sp '*' (sp '*')* 76 | | '-' sp '-' sp '-' (sp '-')* 77 | | '_' sp '_' sp '_' (sp '_')*) 78 | sp Newline blankLine+ 79 | 80 | 81 | ; 82 | bullet : nonindentSpace ('+' | '*' | '-') spacechar+ 83 | 84 | ; 85 | bulletList : (listTight | listLoose) 86 | 87 | 88 | ; 89 | listTight : listItemTight+ blankLine* 90 | 91 | ; 92 | listLoose : (listItem blankLine*)+ 93 | 94 | 95 | ; 96 | listItem : (bullet | Enumerator) listBlock listContinuationBlock*; 97 | 98 | 99 | listItemTight : (bullet | Enumerator) listBlock listContinuationBlock* 100 | 101 | 102 | ; 103 | listBlock : line listBlockLine* 104 | 105 | 106 | ; 107 | listContinuationBlock : blankLine* (Indent listBlock)+ 108 | 109 | 110 | ; 111 | Enumerator : NonindentSpace [0-9]+ '.' Spacechar+ 112 | 113 | ; 114 | orderedList : listTight | listLoose 115 | 116 | 117 | ; 118 | listBlockLine : 119 | 120 | 121 | optionallyIndentedLine 122 | 123 | // parsers for different kinds of block-level hTML content. 124 | // this is repetitive due to constraints of pEG grammar. 125 | 126 | ; 127 | htmlBlockOpenAddress : '<' spnl ('address' | 'ADDRESS') spnl HtmlAttribute* '>' 128 | ; 129 | htmlBlockCloseAddress : '<' spnl '/' ('address' | 'ADDRESS') spnl '>' 130 | ; 131 | htmlBlockAddress : htmlBlockOpenAddress htmlBlockAddress* htmlBlockCloseAddress 132 | 133 | ; 134 | htmlBlockOpenBlockquote : '<' spnl ('blockquote' | 'BLOCKQUOTE') spnl HtmlAttribute* '>' 135 | ; 136 | htmlBlockCloseBlockquote : '<' spnl '/' ('blockquote' | 'BLOCKQUOTE') spnl '>' 137 | ; 138 | htmlBlockBlockquote : htmlBlockOpenBlockquote htmlBlockBlockquote* htmlBlockCloseBlockquote 139 | 140 | ; 141 | htmlBlockOpenCenter : '<' spnl ('center' | 'CENTER') spnl HtmlAttribute* '>' 142 | ; 143 | htmlBlockCloseCenter : '<' spnl '/' ('center' | 'CENTER') spnl '>' 144 | ; 145 | htmlBlockCenter : htmlBlockOpenCenter htmlBlockCenter* htmlBlockCloseCenter 146 | 147 | ; 148 | htmlBlockOpenDir : '<' spnl ('dir' | 'DIR') spnl HtmlAttribute* '>' 149 | ; 150 | htmlBlockCloseDir : '<' spnl '/' ('dir' | 'DIR') spnl '>' 151 | ; 152 | htmlBlockDir : htmlBlockOpenDir htmlBlockDir* htmlBlockCloseDir 153 | 154 | ; 155 | htmlBlockOpenDiv : '<' spnl ('div' | 'DIV') spnl HtmlAttribute* '>' 156 | ; 157 | htmlBlockCloseDiv : '<' spnl '/' ('div' | 'DIV') spnl '>' 158 | ; 159 | htmlBlockDiv : htmlBlockOpenDiv htmlBlockDiv* htmlBlockCloseDiv 160 | 161 | ; 162 | htmlBlockOpenDl : '<' spnl ('dl' | 'DL') spnl HtmlAttribute* '>' 163 | ; 164 | htmlBlockCloseDl : '<' spnl '/' ('dl' | 'DL') spnl '>' 165 | ; 166 | htmlBlockDl : htmlBlockOpenDl htmlBlockDl* htmlBlockCloseDl 167 | 168 | ; 169 | htmlBlockOpenFieldset : '<' spnl ('fieldset' | 'FIELDSET') spnl HtmlAttribute* '>' 170 | ; 171 | htmlBlockCloseFieldset : '<' spnl '/' ('fieldset' | 'FIELDSET') spnl '>' 172 | ; 173 | htmlBlockFieldset : htmlBlockOpenFieldset htmlBlockFieldset* htmlBlockCloseFieldset 174 | 175 | ; 176 | htmlBlockOpenForm : '<' spnl ('form' | 'FORM') spnl HtmlAttribute* '>' 177 | ; 178 | htmlBlockCloseForm : '<' spnl '/' ('form' | 'FORM') spnl '>' 179 | ; 180 | htmlBlockForm : htmlBlockOpenForm htmlBlockForm* htmlBlockCloseForm 181 | 182 | ; 183 | htmlBlockOpenH1 : '<' spnl ('h1' | 'H1') spnl HtmlAttribute* '>' 184 | ; 185 | htmlBlockCloseH1 : '<' spnl '/' ('h1' | 'H1') spnl '>' 186 | ; 187 | htmlBlockH1 : htmlBlockOpenH1 htmlBlockH1* htmlBlockCloseH1 188 | 189 | ; 190 | htmlBlockOpenH2 : '<' spnl ('h2' | 'H2') spnl HtmlAttribute* '>' 191 | ; 192 | htmlBlockCloseH2 : '<' spnl '/' ('h2' | 'H2') spnl '>' 193 | ; 194 | htmlBlockH2 : htmlBlockOpenH2 htmlBlockH2* htmlBlockCloseH2 195 | 196 | ; 197 | htmlBlockOpenH3 : '<' spnl ('h3' | 'H3') spnl HtmlAttribute* '>' 198 | ; 199 | htmlBlockCloseH3 : '<' spnl '/' ('h3' | 'H3') spnl '>' 200 | ; 201 | htmlBlockH3 : htmlBlockOpenH3 htmlBlockH3* htmlBlockCloseH3 202 | 203 | ; 204 | htmlBlockOpenH4 : '<' spnl ('h4' | 'H4') spnl HtmlAttribute* '>' 205 | ; 206 | htmlBlockCloseH4 : '<' spnl '/' ('h4' | 'H4') spnl '>' 207 | ; 208 | htmlBlockH4 : htmlBlockOpenH4 htmlBlockH4* htmlBlockCloseH4 209 | 210 | ; 211 | htmlBlockOpenH5 : '<' spnl ('h5' | 'H5') spnl HtmlAttribute* '>' 212 | ; 213 | htmlBlockCloseH5 : '<' spnl '/' ('h5' | 'H5') spnl '>' 214 | ; 215 | htmlBlockH5 : htmlBlockOpenH5 htmlBlockH5* htmlBlockCloseH5 216 | 217 | ; 218 | htmlBlockOpenH6 : '<' spnl ('h6' | 'H6') spnl HtmlAttribute* '>' 219 | ; 220 | htmlBlockCloseH6 : '<' spnl '/' ('h6' | 'H6') spnl '>' 221 | ; 222 | htmlBlockH6 : htmlBlockOpenH6 htmlBlockH6* htmlBlockCloseH6 223 | 224 | ; 225 | htmlBlockOpenMenu : '<' spnl ('menu' | 'MENU') spnl HtmlAttribute* '>' 226 | ; 227 | htmlBlockCloseMenu : '<' spnl '/' ('menu' | 'MENU') spnl '>' 228 | ; 229 | htmlBlockMenu : htmlBlockOpenMenu htmlBlockMenu* htmlBlockCloseMenu 230 | 231 | ; 232 | htmlBlockOpenNoframes : '<' spnl ('noframes' | 'NOFRAMES') spnl HtmlAttribute* '>' 233 | ; 234 | htmlBlockCloseNoframes : '<' spnl '/' ('noframes' | 'NOFRAMES') spnl '>' 235 | ; 236 | htmlBlockNoframes : htmlBlockOpenNoframes htmlBlockNoframes* htmlBlockCloseNoframes 237 | 238 | ; 239 | htmlBlockOpenNoscript : '<' spnl ('noscript' | 'NOSCRIPT') spnl HtmlAttribute* '>' 240 | ; 241 | htmlBlockCloseNoscript : '<' spnl '/' ('noscript' | 'NOSCRIPT') spnl '>' 242 | ; 243 | htmlBlockNoscript : htmlBlockOpenNoscript htmlBlockNoscript* htmlBlockCloseNoscript 244 | 245 | ; 246 | htmlBlockOpenOl : '<' spnl ('ol' | 'OL') spnl HtmlAttribute* '>' 247 | ; 248 | htmlBlockCloseOl : '<' spnl '/' ('ol' | 'OL') spnl '>' 249 | ; 250 | htmlBlockOl : htmlBlockOpenOl htmlBlockOl* htmlBlockCloseOl 251 | 252 | ; 253 | htmlBlockOpenP : '<' spnl ('p' | 'P') spnl HtmlAttribute* '>' 254 | ; 255 | htmlBlockCloseP : '<' spnl '/' ('p' | 'P') spnl '>' 256 | ; 257 | htmlBlockP : htmlBlockOpenP htmlBlockP* htmlBlockCloseP 258 | 259 | ; 260 | htmlBlockOpenPre : '<' spnl ('pre' | 'PRE') spnl HtmlAttribute* '>' 261 | ; 262 | htmlBlockClosePre : '<' spnl '/' ('pre' | 'PRE') spnl '>' 263 | ; 264 | htmlBlockPre : htmlBlockOpenPre htmlBlockPre* htmlBlockClosePre 265 | 266 | ; 267 | htmlBlockOpenTable : '<' spnl ('table' | 'TABLE') spnl HtmlAttribute* '>' 268 | ; 269 | htmlBlockCloseTable : '<' spnl '/' ('table' | 'TABLE') spnl '>' 270 | ; 271 | htmlBlockTable : htmlBlockOpenTable htmlBlockTable* htmlBlockCloseTable 272 | 273 | ; 274 | htmlBlockOpenUl : '<' spnl ('ul' | 'UL') spnl HtmlAttribute* '>' 275 | ; 276 | htmlBlockCloseUl : '<' spnl '/' ('ul' | 'UL') spnl '>' 277 | ; 278 | htmlBlockUl : htmlBlockOpenUl htmlBlockUl* htmlBlockCloseUl 279 | 280 | ; 281 | htmlBlockOpenDd : '<' spnl ('dd' | 'DD') spnl HtmlAttribute* '>' 282 | ; 283 | htmlBlockCloseDd : '<' spnl '/' ('dd' | 'DD') spnl '>' 284 | ; 285 | htmlBlockDd : htmlBlockOpenDd htmlBlockDd* htmlBlockCloseDd 286 | 287 | ; 288 | htmlBlockOpenDt : '<' spnl ('dt' | 'DT') spnl HtmlAttribute* '>' 289 | ; 290 | htmlBlockCloseDt : '<' spnl '/' ('dt' | 'DT') spnl '>' 291 | ; 292 | htmlBlockDt : htmlBlockOpenDt htmlBlockDt* htmlBlockCloseDt 293 | 294 | ; 295 | htmlBlockOpenFrameset : '<' spnl ('frameset' | 'FRAMESET') spnl HtmlAttribute* '>' 296 | ; 297 | htmlBlockCloseFrameset : '<' spnl '/' ('frameset' | 'FRAMESET') spnl '>' 298 | ; 299 | htmlBlockFrameset : htmlBlockOpenFrameset htmlBlockFrameset* htmlBlockCloseFrameset 300 | 301 | ; 302 | htmlBlockOpenLi : '<' spnl ('li' | 'LI') spnl HtmlAttribute* '>' 303 | ; 304 | htmlBlockCloseLi : '<' spnl '/' ('li' | 'LI') spnl '>' 305 | ; 306 | htmlBlockLi : htmlBlockOpenLi htmlBlockLi* htmlBlockCloseLi 307 | 308 | ; 309 | htmlBlockOpenTbody : '<' spnl ('tbody' | 'TBODY') spnl HtmlAttribute* '>' 310 | ; 311 | htmlBlockCloseTbody : '<' spnl '/' ('tbody' | 'TBODY') spnl '>' 312 | ; 313 | htmlBlockTbody : htmlBlockOpenTbody htmlBlockTbody* htmlBlockCloseTbody 314 | 315 | ; 316 | htmlBlockOpenTd : '<' spnl ('td' | 'TD') spnl HtmlAttribute* '>' 317 | ; 318 | htmlBlockCloseTd : '<' spnl '/' ('td' | 'TD') spnl '>' 319 | ; 320 | htmlBlockTd : htmlBlockOpenTd htmlBlockTd* htmlBlockCloseTd 321 | 322 | ; 323 | htmlBlockOpenTfoot : '<' spnl ('tfoot' | 'TFOOT') spnl HtmlAttribute* '>' 324 | ; 325 | htmlBlockCloseTfoot : '<' spnl '/' ('tfoot' | 'TFOOT') spnl '>' 326 | ; 327 | htmlBlockTfoot : htmlBlockOpenTfoot htmlBlockTfoot* htmlBlockCloseTfoot 328 | 329 | ; 330 | htmlBlockOpenTh : '<' spnl ('th' | 'TH') spnl HtmlAttribute* '>' 331 | ; 332 | htmlBlockCloseTh : '<' spnl '/' ('th' | 'TH') spnl '>' 333 | ; 334 | htmlBlockTh : htmlBlockOpenTh htmlBlockTh* htmlBlockCloseTh 335 | 336 | ; 337 | htmlBlockOpenThead : '<' spnl ('thead' | 'THEAD') spnl HtmlAttribute* '>' 338 | ; 339 | htmlBlockCloseThead : '<' spnl '/' ('thead' | 'THEAD') spnl '>' 340 | ; 341 | htmlBlockThead : htmlBlockOpenThead htmlBlockThead* htmlBlockCloseThead 342 | 343 | ; 344 | htmlBlockOpenTr : '<' spnl ('tr' | 'TR') spnl HtmlAttribute* '>' 345 | ; 346 | htmlBlockCloseTr : '<' spnl '/' ('tr' | 'TR') spnl '>' 347 | ; 348 | htmlBlockTr : htmlBlockOpenTr htmlBlockTr* htmlBlockCloseTr 349 | 350 | ; 351 | htmlBlockOpenScript : '<' spnl ('script' | 'SCRIPT') spnl HtmlAttribute* '>' 352 | ; 353 | htmlBlockCloseScript : '<' spnl '/' ('script' | 'SCRIPT') spnl '>' 354 | ; 355 | htmlBlockScript : htmlBlockOpenScript htmlBlockCloseScript 356 | 357 | ; 358 | htmlBlockOpenHead : '<' spnl ('head' | 'HEAD') spnl HtmlAttribute* '>' 359 | ; 360 | htmlBlockCloseHead : '<' spnl '/' ('head' | 'HEAD') spnl '>' 361 | ; 362 | htmlBlockHead : htmlBlockOpenHead htmlBlockCloseHead 363 | 364 | ; 365 | htmlBlockInTags : htmlBlockAddress 366 | | htmlBlockBlockquote 367 | | htmlBlockCenter 368 | | htmlBlockDir 369 | | htmlBlockDiv 370 | | htmlBlockDl 371 | | htmlBlockFieldset 372 | | htmlBlockForm 373 | | htmlBlockH1 374 | | htmlBlockH2 375 | | htmlBlockH3 376 | | htmlBlockH4 377 | | htmlBlockH5 378 | | htmlBlockH6 379 | | htmlBlockMenu 380 | | htmlBlockNoframes 381 | | htmlBlockNoscript 382 | | htmlBlockOl 383 | | htmlBlockP 384 | | htmlBlockPre 385 | | htmlBlockTable 386 | | htmlBlockUl 387 | | htmlBlockDd 388 | | htmlBlockDt 389 | | htmlBlockFrameset 390 | | htmlBlockLi 391 | | htmlBlockTbody 392 | | htmlBlockTd 393 | | htmlBlockTfoot 394 | | htmlBlockTh 395 | | htmlBlockThead 396 | | htmlBlockTr 397 | | htmlBlockScript 398 | | htmlBlockHead 399 | 400 | ; 401 | htmlBlock : (htmlBlockInTags | HtmlComment | htmlBlockSelfClosing) blankLine+ 402 | 403 | 404 | ; 405 | htmlBlockSelfClosing : '<' spnl htmlBlockType spnl HtmlAttribute* '/' spnl '>' 406 | 407 | ; 408 | htmlBlockType : 'address' | 'blockquote' | 'center' | 'dir' | 'div' | 'dl' | 'fieldset' | 'form' | 'h1' | 'h2' | 'h3' | 409 | 'h4' | 'h5' | 'h6' | 'hr' | 'isindex' | 'menu' | 'noframes' | 'noscript' | 'ol' | 'p' | 'pre' | 'table' | 410 | 'ul' | 'dd' | 'dt' | 'frameset' | 'li' | 'tbody' | 'td' | 'tfoot' | 'th' | 'thead' | 'tr' | 'script' | 411 | 'ADDRESS' | 'BLOCKQUOTE' | 'CENTER' | 'DIR' | 'DIV' | 'DL' | 'FIELDSET' | 'FORM' | 'H1' | 'H2' | 'H3' | 412 | 'H4' | 'H5' | 'H6' | 'HR' | 'ISINDEX' | 'MENU' | 'NOFRAMES' | 'NOSCRIPT' | 'OL' | 'P' | 'PRE' | 'TABLE' | 413 | 'UL' | 'DD' | 'DT' | 'FRAMESET' | 'LI' | 'TBODY' | 'TD' | 'TFOOT' | 'TH' | 'THEAD' | 'TR' | 'SCRIPT' 414 | 415 | ; 416 | styleOpen : '<' spnl ('style' | 'STYLE') spnl HtmlAttribute* '>' 417 | ; 418 | styleClose : '<' spnl '/' ('style' | 'STYLE') spnl '>' 419 | ; 420 | inStyleTags : styleOpen styleClose 421 | ; 422 | styleBlock : inStyleTags blankLine* 423 | 424 | 425 | ; 426 | inlines : (inline 427 | | endline inline)+ endline? 428 | 429 | 430 | ; 431 | inline : str 432 | | endline 433 | | ulOrStarLine 434 | | SPace 435 | | strong 436 | | emph 437 | | strike 438 | | image 439 | | link 440 | | noteReference 441 | | inlineNote 442 | | Code 443 | | rawHtml 444 | | entity 445 | | EscapedChar 446 | | smart 447 | | symbol 448 | 449 | ; 450 | SPace : Spacechar+ 451 | 452 | 453 | ; 454 | str : NormalChar+ StrChunk* 455 | 456 | 457 | ; 458 | StrChunk : (NormalChar | '_'+ Alphanumeric)+ | AposChunk 459 | 460 | ; 461 | AposChunk : '\'' Alphanumeric 462 | 463 | 464 | ; 465 | EscapedChar : '\\' [\-\\`|*_{}[\]()# +.!><] 466 | 467 | 468 | ; 469 | entity : (HexEntity | DecEntity | CharEntity) 470 | 471 | 472 | ; 473 | endline : lineBreak | terminalEndline | normalEndline 474 | 475 | ; 476 | normalEndline : sp Newline 477 | 478 | 479 | ; 480 | terminalEndline : sp Newline EOF 481 | 482 | 483 | ; 484 | lineBreak : ' ' normalEndline 485 | 486 | 487 | ; 488 | symbol : SpecialChar 489 | 490 | 491 | // this keeps the parser from getting bogged down on long strings of * or _, 492 | // or strings of * or _ with SPace on each side: 493 | ; 494 | ulOrStarLine : (ulLine | starLine) 495 | ; 496 | starLine : '****' '*'* | spacechar '*'+ spacechar 497 | ; 498 | ulLine : '____' '_'* | spacechar '_'+ spacechar 499 | 500 | ; 501 | emph : emphStar | emphUl 502 | 503 | ; 504 | // whiteSPace : SPacechar | newline; 505 | 506 | emphStar : '*' (inline | strongStar)+ '*'; 507 | 508 | emphUl : '_' (inline | strongUl)+ '_'; 509 | 510 | 511 | strong : strongStar | strongUl 512 | 513 | ; 514 | strongStar : '**' inline+ '**'; 515 | 516 | 517 | strongUl : '__' inline+ '__' 518 | 519 | ; 520 | strike : '~~' inline+ '~~' 521 | 522 | 523 | ; 524 | image : '!' (explicitLink | referenceLink) 525 | 526 | ; 527 | link : explicitLink | referenceLink | AutoLink 528 | 529 | ; 530 | referenceLink : referenceLinkDouble | referenceLinkSingle 531 | 532 | ; 533 | referenceLinkDouble : label spnl label 534 | 535 | ; 536 | referenceLinkSingle : label (spnl '[]')? 537 | 538 | 539 | ; 540 | explicitLink : label '(' sp ('<' sourceContents '>' | sourceContents) spnl (TitleSingle | TitleDouble )? sp ')' 541 | 542 | 543 | ; 544 | sourceContents : (NonSpaceChar+ | '(' sourceContents ')')* 545 | 546 | ; 547 | 548 | TitleSingle : '\'' [^\n\r ')]* '\'' 549 | 550 | ; 551 | TitleDouble : '"' [^\n\r ")]* '"' 552 | 553 | ; 554 | AutoLink : AutoLinkUrl | AutoLinkEmail 555 | 556 | ; 557 | AutoLinkUrl : '<' [A-Za-z]+ '://' [^>\n\r]+ '>' 558 | 559 | 560 | ; 561 | AutoLinkEmail : '<' 'mailto:'? [-A-Za-z0-9.\\+_/!%~$]+ '@' [^>\n\r]+ '>' 562 | 563 | 564 | ; 565 | reference : nonindentSpace label ':' spnl RefSrc refTitle blankLine+ 566 | 567 | 568 | ; 569 | label : '[' inline* ']' 570 | 571 | ; 572 | RefSrc : NonSpaceChar+ 573 | 574 | 575 | ; 576 | refTitle : (RefTitleSingle | RefTitleDouble | RefTitleParens)? 577 | 578 | ; 579 | RefTitleSingle : Spnl '\'' [' \n\r]* '\'' 580 | 581 | ; 582 | RefTitleDouble : Spnl '"' [" \n\r]* '"' 583 | 584 | ; 585 | RefTitleParens : Spnl '(' [) \n\r]* ')' 586 | 587 | ; 588 | // references : (reference | skipBlock)*; 589 | 590 | 591 | fragment Ticks1 : '`' 592 | ; 593 | fragment Ticks2 : '``' 594 | ; 595 | fragment Ticks3 : '```' 596 | ; 597 | fragment Ticks4 : '````' 598 | ; 599 | fragment Ticks5 : '`````' 600 | 601 | ; 602 | Code : Ticks1 SP (NonSpaceChar+ | '`'+ | Spacechar | Newline)+ SP Ticks1 603 | | Ticks2 SP (NonSpaceChar+ | '`'+ | Spacechar | Newline)+ SP Ticks2 604 | | Ticks3 SP (NonSpaceChar+ | '`'+ | Spacechar | Newline)+ SP Ticks3 605 | | Ticks4 SP (NonSpaceChar+ | '`'+ | Spacechar | Newline)+ SP Ticks4 606 | | Ticks5 SP (NonSpaceChar+ | '`'+ | Spacechar | Newline)+ SP Ticks5 607 | ; 608 | rawHtml : HtmlComment | htmlBlockScript | HtmlTag 609 | 610 | ; 611 | blankLine : sp Newline 612 | 613 | ; 614 | Quoted : '"' [^"]* '"' | '\'' [^']* '\'' 615 | ; 616 | HtmlAttribute : (AlphanumericAscii | '-')+ Spnl ('=' Spnl (Quoted | NonSpaceChar+))? Spnl 617 | ; 618 | HtmlComment : '' 619 | ; 620 | HtmlTag : '<' Spnl '/'? AlphanumericAscii+ Spnl HtmlAttribute* '/'? Spnl '>' 621 | ; 622 | spacechar: ' ' | '\t'; 623 | 624 | fragment Spacechar : ' ' | '\t' 625 | ; 626 | NonSpaceChar : [^ \t\r\n] 627 | ; 628 | Newline : '\n' | '\r' '\n'? 629 | ; 630 | sp: spacechar*; 631 | 632 | fragment SP : Spacechar* 633 | ; 634 | spnl : sp (Newline sp)?; 635 | 636 | fragment Spnl : SP (Newline SP)?; 637 | 638 | SpecialChar : '~' | '*' | '_' | '`' | '&' | '[' | ']' | '(' | ')' | '<' | '!' | '#' | '\\' | '\'' | '"' | ExtendedSpecialChar 639 | ; 640 | NormalChar : ~ ('~' | '*' | '_' | '`' | '&' | '[' | ']' | '(' | ')' | '<' | '!' | '#' | '\\' | '"' | '.' | '-' | '\'' | '^' | ' ' | '\t' | '\r' | '\n' ) 641 | ; 642 | fragment Alphanumeric : [0-9A-Za-z] | '\u0200' | '\u0201' | '\u0202' | '\u0203' | '\u0204' | '\u0205' | '\u0206' | '\u0207' | '\u0210' | '\u0211' | '\u0212' | '\u0213' | '\u0214' 643 | | '\u0215' | '\u0216' | '\u0217' | '\u0220' | '\u0221' | '\u0222' | '\u0223' | '\u0224' | '\u0225' | '\u0226' | '\u0227' | '\u0230' | '\u0231' | '\u0232' | '\u0233' | '\u0234' | '\u0235' 644 | | '\u0236' | '\u0237' | '\u0240' | '\u0241' | '\u0242' | '\u0243' | '\u0244' | '\u0245' | '\u0246' | '\u0247' | '\u0250' | '\u0251' | '\u0252' | '\u0253' | '\u0254' | '\u0255' | '\u0256' 645 | | '\u0257' | '\u0260' | '\u0261' | '\u0262' | '\u0263' | '\u0264' | '\u0265' | '\u0266' | '\u0267' | '\u0270' | '\u0271' | '\u0272' | '\u0273' | '\u0274' | '\u0275' | '\u0276' | '\u0277' 646 | | '\u0300' | '\u0301' | '\u0302' | '\u0303' | '\u0304' | '\u0305' | '\u0306' | '\u0307' | '\u0310' | '\u0311' | '\u0312' | '\u0313' | '\u0314' | '\u0315' | '\u0316' | '\u0317' | '\u0320' 647 | | '\u0321' | '\u0322' | '\u0323' | '\u0324' | '\u0325' | '\u0326' | '\u0327' | '\u0330' | '\u0331' | '\u0332' | '\u0333' | '\u0334' | '\u0335' | '\u0336' | '\u0337' | '\u0340' | '\u0341' 648 | | '\u0342' | '\u0343' | '\u0344' | '\u0345' | '\u0346' | '\u0347' | '\u0350' | '\u0351' | '\u0352' | '\u0353' | '\u0354' | '\u0355' | '\u0356' | '\u0357' | '\u0360' | '\u0361' | '\u0362' 649 | | '\u0363' | '\u0364' | '\u0365' | '\u0366' | '\u0367' | '\u0370' | '\u0371' | '\u0372' | '\u0373' | '\u0374' | '\u0375' | '\u0376' | '\u0377' 650 | ; 651 | fragment AlphanumericAscii : [A-Za-z0-9] 652 | ; 653 | // Digit : [0-9]; 654 | 655 | bOM : '\u0357\u0273\u0277' 656 | 657 | ; 658 | HexEntity : '&' '#' [Xx] [0-9a-fA-F]+ ';'; 659 | DecEntity : '&' '#' [0-9]+ ';'; 660 | CharEntity : '&' [A-Za-z0-9]+ ';'; 661 | 662 | nonindentSpace : (' ' | ' ' | ' ')?; 663 | 664 | fragment NonindentSpace : (' ' | ' ' | ' ')? 665 | ; 666 | Indent : '\t' | ' ' 667 | ; 668 | indentedLine : Indent line 669 | ; 670 | optionallyIndentedLine : Indent? line 671 | 672 | ; 673 | line : RawLine 674 | 675 | ; 676 | RawLine : [^\r\n]* Newline | .+? EOF 677 | 678 | ; 679 | // skipBlock : htmlBlock | rawLine+ blankLine* | blankLine+ | rawLine; 680 | 681 | // syntax extensions 682 | 683 | ExtendedSpecialChar : '.' | '-' | '\'' | '"' | '^' 684 | 685 | ; 686 | smart : Ellipsis | Dash | singleQuoted | doubleQuoted | '\'' 687 | 688 | ; 689 | Ellipsis : '...' | '. . .' 690 | 691 | 692 | ; 693 | Dash : EmDash | EnDash 694 | 695 | ; 696 | EnDash : '-' 697 | 698 | 699 | ; 700 | EmDash : '---' | '--' 701 | 702 | 703 | ; 704 | singleQuoted : '\'' inline+ '\'' 705 | 706 | ; 707 | doubleQuoted : '"' inline+ '"' 708 | 709 | 710 | ; 711 | noteReference : 712 | RawNoteReference 713 | 714 | ; 715 | RawNoteReference : '[^' [^\n\r\]]+ ']' 716 | 717 | 718 | ; 719 | note : nonindentSpace RawNoteReference ':' sp rawNoteBlock (Indent rawNoteBlock)* 720 | 721 | ; 722 | inlineNote : '^[' inline+ ']' 723 | 724 | 725 | ; 726 | // notes := (note | skipBlock)*, 727 | 728 | 729 | rawNoteBlock : optionallyIndentedLine+ blankLine* 730 | 731 | ; 732 | -------------------------------------------------------------------------------- /grammars/antlr/url.g4: -------------------------------------------------------------------------------- 1 | /* 2 | BSD License 3 | Copyright (c) 2016, Tom Everett 4 | All rights reserved. 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 1. Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 3. Neither the name of Tom Everett nor the names of its contributors 14 | may be used to endorse or promote products derived from this software 15 | without specific prior written permission. 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | /** 30 | * scheme:[//[user:password@]host[:port]][/]path[?query][#fragment] 31 | */ 32 | grammar url; 33 | 34 | url 35 | : uri 36 | ; 37 | 38 | uri 39 | : scheme '://' login? host (':' port)? ('/' path)? query? frag? WS? 40 | ; 41 | 42 | scheme 43 | : string 44 | ; 45 | 46 | host 47 | : '/'? (hostname | hostnumber) 48 | ; 49 | 50 | hostname 51 | : string ('.' string)* 52 | ; 53 | 54 | hostnumber 55 | : DIGITS '.' DIGITS '.' DIGITS '.' DIGITS 56 | ; 57 | 58 | port 59 | : DIGITS 60 | ; 61 | 62 | path 63 | : string ('/' string)* 64 | ; 65 | 66 | user 67 | : string 68 | ; 69 | 70 | login 71 | : user ':' password '@' 72 | ; 73 | 74 | password 75 | : string 76 | ; 77 | 78 | frag 79 | : ('#' string) 80 | ; 81 | 82 | query 83 | : ('?' search) 84 | ; 85 | 86 | search 87 | : searchparameter ('&' searchparameter)* 88 | ; 89 | 90 | searchparameter 91 | : string ('=' (string | DIGITS | HEX))? 92 | ; 93 | 94 | string 95 | : STRING 96 | ; 97 | 98 | 99 | DIGITS 100 | : [0-9] + 101 | ; 102 | 103 | 104 | HEX 105 | : ('%' [a-fA-F0-9] [a-fA-F0-9]) + 106 | ; 107 | 108 | 109 | STRING 110 | : ([a-zA-Z~0-9] | HEX) ([a-zA-Z0-9.-] | HEX)* 111 | ; 112 | 113 | 114 | WS 115 | : [\r\n] + 116 | ; 117 | -------------------------------------------------------------------------------- /grammars/tribble/csv.scala: -------------------------------------------------------------------------------- 1 | import saarland.cispa.se.tribble.dsl._ 2 | 3 | // translated from https://github.com/antlr/grammars-v4/blob/master/csv/CSV.g4 4 | // as of commit 323e4a99aa693d057a413dde99469e4c70aa8916 5 | 6 | Grammar( 7 | 'csvFile := 'hdr ~ 'row.rep(1), 8 | 9 | 'hdr := 'row, 10 | 11 | 'row := 'field ~ ("," ~ 'field).rep ~ "\r".? ~ "\n", 12 | 13 | 'field := 'TEXT | 'STRING | "", 14 | 15 | 'TEXT := "[^,\n\r\"]+".regex, 16 | 17 | 'STRING := "\"" ~ ("\"\"" | "~[\"]".regex).rep ~ "\"" // quote-quote is an escaped quote 18 | ) 19 | -------------------------------------------------------------------------------- /grammars/tribble/json.scala: -------------------------------------------------------------------------------- 1 | import saarland.cispa.se.tribble.dsl._ 2 | 3 | // translated from https://github.com/antlr/grammars-v4/blob/master/json/JSON.g4 4 | // as of commit 16612bc74aea6e07afb9e0f54d9c7c5fac1475b1 5 | 6 | Grammar( 7 | 'json := 'value, 8 | 9 | 'obj := "{" ~ 'OWS ~ 'pair ~ 'OWS ~ ("," ~ 'OWS ~ 'pair).rep ~ 'OWS ~ "}" | "{" ~ 'OWS ~ "}", 10 | 11 | 'pair := 'STRING ~ 'OWS ~ ":" ~ 'OWS ~ 'value, 12 | 13 | 'array := "[" ~ 'OWS ~ 'value ~ 'OWS ~ ("," ~ 'OWS ~ 'value).rep ~ 'OWS ~ "]" | "[" ~ 'OWS ~ "]", 14 | 15 | 'value := 'STRING 16 | | 'NUMBER 17 | | 'obj 18 | | 'array 19 | | "true" 20 | | "false" 21 | | "null" 22 | , 23 | 24 | 'STRING := "\"" ~ ('ESC | 'SAFECODEPOINT).rep ~ "\"", 25 | 26 | 'ESC := "\\" ~ ("""[\"\\/bfnrt]""".regex | 'UNICODE), 27 | 28 | 'UNICODE := "u" ~ 'HEX ~ 'HEX ~ 'HEX ~ 'HEX, 29 | 30 | 'HEX := "[0-9a-fA-F]".regex, 31 | 32 | 'SAFECODEPOINT := "[^\u0000-\u001F\\\"\\\\]".regex, 33 | 34 | 'NUMBER := "-".? ~ 'INT ~ ("." ~ "[0-9]+".regex).? ~ 'EXP.?, 35 | 36 | 'INT := "0" | "[1-9][0-9]*".regex, 37 | 38 | 'EXP := "[Ee][+-]?".regex ~ 'INT, 39 | 40 | 'OWS := 'WS.rep, 41 | 42 | 'WS := " " | "\t" | "\n" | "\r" 43 | ) 44 | -------------------------------------------------------------------------------- /grammars/tribble/markdown.scala: -------------------------------------------------------------------------------- 1 | import saarland.cispa.se.tribble.dsl._ 2 | 3 | // Translated from https://github.com/jgm/peg-markdown/blob/master/markdown_parser.leg 4 | 5 | Grammar( 6 | 'Doc := 'BOM.? ~ 'StartList ~ 'Block.rep, 7 | 8 | 'Block := 'BlankLine.rep ~ 9 | ('BlockQuote 10 | | 'Verbatim 11 | | 'Note 12 | | 'Reference 13 | | 'HorizontalRule 14 | | 'Heading 15 | | 'OrderedList 16 | | 'BulletList 17 | | 'HtmlBlock 18 | | 'StyleBlock 19 | | 'Para 20 | | 'Plain) 21 | 22 | , 23 | 24 | 'Para := 'NonindentSpace ~ 'Inlines ~ 'BlankLine.rep(1) 25 | 26 | 27 | , 28 | 'Plain := 'Inlines 29 | 30 | , 31 | 'AtxInline := 'Inline 32 | 33 | , 34 | 'AtxStart := "######" | "#####" | "####" | "###" | "##" | "#" 35 | 36 | 37 | , 38 | 'AtxHeading := 'AtxStart ~ 'Sp ~ 'StartList ~ 'AtxInline.rep(1) ~ ('Sp ~ "#".rep ~ 'Sp).? ~ 'Newline 39 | 40 | 41 | , 42 | 'SetextHeading := 'SetextHeading1 | 'SetextHeading2 43 | 44 | , 45 | 'SetextBottom1 := "=".rep(1) ~ 'Newline 46 | 47 | , 48 | 'SetextBottom2 := "-".rep(1) ~ 'Newline 49 | 50 | , 51 | 'SetextHeading1 := 'StartList ~ 'Inline.rep(1) ~ 'Sp ~ 'Newline ~ 'SetextBottom1 52 | 53 | , 54 | 'SetextHeading2 := 'StartList ~ 'Inline.rep(1) ~ 'Sp ~ 'Newline ~ 'SetextBottom2 55 | 56 | , 57 | 'Heading := 'SetextHeading | 'AtxHeading 58 | 59 | , 60 | 'BlockQuote := 'BlockQuoteRaw 61 | 62 | 63 | , 64 | 'BlockQuoteRaw := 'StartList ~ (">" ~ " ".? ~ 'Line ~ 'Line.rep ~ 'BlankLine.rep).rep(1) 65 | 66 | 67 | , 68 | 'NonblankIndentedLine := 'IndentedLine 69 | 70 | , 71 | 'VerbatimChunk := 'StartList ~ 'BlankLine.rep ~ 'NonblankIndentedLine.rep(1) 72 | 73 | 74 | , 75 | 'Verbatim := 'StartList ~ 'VerbatimChunk.rep(1) 76 | 77 | 78 | , 79 | 'HorizontalRule := 'NonindentSpace ~ 80 | ("*" ~ 'Sp ~ "*" ~ 'Sp ~ "*" ~ ('Sp ~ "*").rep 81 | | "-" ~ 'Sp ~ "-" ~ 'Sp ~ "-" ~ ('Sp ~ "-").rep 82 | | "_" ~ 'Sp ~ "_" ~ 'Sp ~ "_" ~ ('Sp ~ "_").rep) ~ 83 | 'Sp ~ 'Newline ~ 'BlankLine.rep(1) 84 | 85 | 86 | , 87 | 'Bullet := 'NonindentSpace ~ ("+" | "*" | "-") ~ 'Spacechar.rep(1) 88 | 89 | , 90 | 'BulletList := ('ListTight | 'ListLoose) 91 | 92 | 93 | , 94 | 'ListTight := 'StartList ~ 'ListItemTight.rep(1) ~ 'BlankLine.rep 95 | 96 | , 97 | 'ListLoose := 'StartList ~ ('ListItem ~ 'BlankLine.rep).rep(1) 98 | 99 | 100 | , 101 | 'ListItem := ('Bullet | 'Enumerator) ~ 'StartList ~ 'ListBlock ~ 'ListContinuationBlock.rep, 102 | 103 | 104 | 'ListItemTight := ('Bullet | 'Enumerator) ~ 'StartList ~ 'ListBlock ~ 'ListContinuationBlock.rep 105 | 106 | 107 | , 108 | 'ListBlock := 'StartList ~ 'Line ~ 'ListBlockLine.rep 109 | 110 | 111 | , 112 | 'ListContinuationBlock := 'StartList ~ 'BlankLine.rep ~ ('Indent ~ 'ListBlock).rep(1) 113 | 114 | 115 | , 116 | 'Enumerator := 'NonindentSpace ~ "[0-9]".regex.rep(1) ~ "." ~ 'Spacechar.rep(1) 117 | 118 | , 119 | 'OrderedList := 'ListTight | 'ListLoose 120 | 121 | 122 | , 123 | 'ListBlockLine := 124 | 125 | 126 | 'OptionallyIndentedLine 127 | 128 | // 'Parsers 'for 'different 'kinds 'of 'block-level 'HTML 'content. 129 | // 'This 'is 'repetitive 'due 'to 'constraints 'of 'PEG 'grammar. 130 | 131 | , 132 | 'HtmlBlockOpenAddress := "<" ~ 'Spnl ~ ("address" | "ADDRESS") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 133 | , 134 | 'HtmlBlockCloseAddress := "<" ~ 'Spnl ~ "/" ~ ("address" | "ADDRESS") ~ 'Spnl ~ ">" 135 | , 136 | 'HtmlBlockAddress := 'HtmlBlockOpenAddress ~ 'HtmlBlockAddress.rep ~ 'HtmlBlockCloseAddress 137 | 138 | , 139 | 'HtmlBlockOpenBlockquote := "<" ~ 'Spnl ~ ("blockquote" | "BLOCKQUOTE") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 140 | , 141 | 'HtmlBlockCloseBlockquote := "<" ~ 'Spnl ~ "/" ~ ("blockquote" | "BLOCKQUOTE") ~ 'Spnl ~ ">" 142 | , 143 | 'HtmlBlockBlockquote := 'HtmlBlockOpenBlockquote ~ 'HtmlBlockBlockquote.rep ~ 'HtmlBlockCloseBlockquote 144 | 145 | , 146 | 'HtmlBlockOpenCenter := "<" ~ 'Spnl ~ ("center" | "CENTER") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 147 | , 148 | 'HtmlBlockCloseCenter := "<" ~ 'Spnl ~ "/" ~ ("center" | "CENTER") ~ 'Spnl ~ ">" 149 | , 150 | 'HtmlBlockCenter := 'HtmlBlockOpenCenter ~ 'HtmlBlockCenter.rep ~ 'HtmlBlockCloseCenter 151 | 152 | , 153 | 'HtmlBlockOpenDir := "<" ~ 'Spnl ~ ("dir" | "DIR") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 154 | , 155 | 'HtmlBlockCloseDir := "<" ~ 'Spnl ~ "/" ~ ("dir" | "DIR") ~ 'Spnl ~ ">" 156 | , 157 | 'HtmlBlockDir := 'HtmlBlockOpenDir ~ 'HtmlBlockDir.rep ~ 'HtmlBlockCloseDir 158 | 159 | , 160 | 'HtmlBlockOpenDiv := "<" ~ 'Spnl ~ ("div" | "DIV") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 161 | , 162 | 'HtmlBlockCloseDiv := "<" ~ 'Spnl ~ "/" ~ ("div" | "DIV") ~ 'Spnl ~ ">" 163 | , 164 | 'HtmlBlockDiv := 'HtmlBlockOpenDiv ~ 'HtmlBlockDiv.rep ~ 'HtmlBlockCloseDiv 165 | 166 | , 167 | 'HtmlBlockOpenDl := "<" ~ 'Spnl ~ ("dl" | "DL") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 168 | , 169 | 'HtmlBlockCloseDl := "<" ~ 'Spnl ~ "/" ~ ("dl" | "DL") ~ 'Spnl ~ ">" 170 | , 171 | 'HtmlBlockDl := 'HtmlBlockOpenDl ~ 'HtmlBlockDl.rep ~ 'HtmlBlockCloseDl 172 | 173 | , 174 | 'HtmlBlockOpenFieldset := "<" ~ 'Spnl ~ ("fieldset" | "FIELDSET") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 175 | , 176 | 'HtmlBlockCloseFieldset := "<" ~ 'Spnl ~ "/" ~ ("fieldset" | "FIELDSET") ~ 'Spnl ~ ">" 177 | , 178 | 'HtmlBlockFieldset := 'HtmlBlockOpenFieldset ~ 'HtmlBlockFieldset.rep ~ 'HtmlBlockCloseFieldset 179 | 180 | , 181 | 'HtmlBlockOpenForm := "<" ~ 'Spnl ~ ("form" | "FORM") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 182 | , 183 | 'HtmlBlockCloseForm := "<" ~ 'Spnl ~ "/" ~ ("form" | "FORM") ~ 'Spnl ~ ">" 184 | , 185 | 'HtmlBlockForm := 'HtmlBlockOpenForm ~ 'HtmlBlockForm.rep ~ 'HtmlBlockCloseForm 186 | 187 | , 188 | 'HtmlBlockOpenH1 := "<" ~ 'Spnl ~ ("h1" | "H1") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 189 | , 190 | 'HtmlBlockCloseH1 := "<" ~ 'Spnl ~ "/" ~ ("h1" | "H1") ~ 'Spnl ~ ">" 191 | , 192 | 'HtmlBlockH1 := 'HtmlBlockOpenH1 ~ 'HtmlBlockH1.rep ~ 'HtmlBlockCloseH1 193 | 194 | , 195 | 'HtmlBlockOpenH2 := "<" ~ 'Spnl ~ ("h2" | "H2") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 196 | , 197 | 'HtmlBlockCloseH2 := "<" ~ 'Spnl ~ "/" ~ ("h2" | "H2") ~ 'Spnl ~ ">" 198 | , 199 | 'HtmlBlockH2 := 'HtmlBlockOpenH2 ~ 'HtmlBlockH2.rep ~ 'HtmlBlockCloseH2 200 | 201 | , 202 | 'HtmlBlockOpenH3 := "<" ~ 'Spnl ~ ("h3" | "H3") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 203 | , 204 | 'HtmlBlockCloseH3 := "<" ~ 'Spnl ~ "/" ~ ("h3" | "H3") ~ 'Spnl ~ ">" 205 | , 206 | 'HtmlBlockH3 := 'HtmlBlockOpenH3 ~ 'HtmlBlockH3.rep ~ 'HtmlBlockCloseH3 207 | 208 | , 209 | 'HtmlBlockOpenH4 := "<" ~ 'Spnl ~ ("h4" | "H4") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 210 | , 211 | 'HtmlBlockCloseH4 := "<" ~ 'Spnl ~ "/" ~ ("h4" | "H4") ~ 'Spnl ~ ">" 212 | , 213 | 'HtmlBlockH4 := 'HtmlBlockOpenH4 ~ 'HtmlBlockH4.rep ~ 'HtmlBlockCloseH4 214 | 215 | , 216 | 'HtmlBlockOpenH5 := "<" ~ 'Spnl ~ ("h5" | "H5") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 217 | , 218 | 'HtmlBlockCloseH5 := "<" ~ 'Spnl ~ "/" ~ ("h5" | "H5") ~ 'Spnl ~ ">" 219 | , 220 | 'HtmlBlockH5 := 'HtmlBlockOpenH5 ~ 'HtmlBlockH5.rep ~ 'HtmlBlockCloseH5 221 | 222 | , 223 | 'HtmlBlockOpenH6 := "<" ~ 'Spnl ~ ("h6" | "H6") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 224 | , 225 | 'HtmlBlockCloseH6 := "<" ~ 'Spnl ~ "/" ~ ("h6" | "H6") ~ 'Spnl ~ ">" 226 | , 227 | 'HtmlBlockH6 := 'HtmlBlockOpenH6 ~ 'HtmlBlockH6.rep ~ 'HtmlBlockCloseH6 228 | 229 | , 230 | 'HtmlBlockOpenMenu := "<" ~ 'Spnl ~ ("menu" | "MENU") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 231 | , 232 | 'HtmlBlockCloseMenu := "<" ~ 'Spnl ~ "/" ~ ("menu" | "MENU") ~ 'Spnl ~ ">" 233 | , 234 | 'HtmlBlockMenu := 'HtmlBlockOpenMenu ~ 'HtmlBlockMenu.rep ~ 'HtmlBlockCloseMenu 235 | 236 | , 237 | 'HtmlBlockOpenNoframes := "<" ~ 'Spnl ~ ("noframes" | "NOFRAMES") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 238 | , 239 | 'HtmlBlockCloseNoframes := "<" ~ 'Spnl ~ "/" ~ ("noframes" | "NOFRAMES") ~ 'Spnl ~ ">" 240 | , 241 | 'HtmlBlockNoframes := 'HtmlBlockOpenNoframes ~ 'HtmlBlockNoframes.rep ~ 'HtmlBlockCloseNoframes 242 | 243 | , 244 | 'HtmlBlockOpenNoscript := "<" ~ 'Spnl ~ ("noscript" | "NOSCRIPT") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 245 | , 246 | 'HtmlBlockCloseNoscript := "<" ~ 'Spnl ~ "/" ~ ("noscript" | "NOSCRIPT") ~ 'Spnl ~ ">" 247 | , 248 | 'HtmlBlockNoscript := 'HtmlBlockOpenNoscript ~ 'HtmlBlockNoscript.rep ~ 'HtmlBlockCloseNoscript 249 | 250 | , 251 | 'HtmlBlockOpenOl := "<" ~ 'Spnl ~ ("ol" | "OL") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 252 | , 253 | 'HtmlBlockCloseOl := "<" ~ 'Spnl ~ "/" ~ ("ol" | "OL") ~ 'Spnl ~ ">" 254 | , 255 | 'HtmlBlockOl := 'HtmlBlockOpenOl ~ 'HtmlBlockOl.rep ~ 'HtmlBlockCloseOl 256 | 257 | , 258 | 'HtmlBlockOpenP := "<" ~ 'Spnl ~ ("p" | "P") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 259 | , 260 | 'HtmlBlockCloseP := "<" ~ 'Spnl ~ "/" ~ ("p" | "P") ~ 'Spnl ~ ">" 261 | , 262 | 'HtmlBlockP := 'HtmlBlockOpenP ~ 'HtmlBlockP.rep ~ 'HtmlBlockCloseP 263 | 264 | , 265 | 'HtmlBlockOpenPre := "<" ~ 'Spnl ~ ("pre" | "PRE") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 266 | , 267 | 'HtmlBlockClosePre := "<" ~ 'Spnl ~ "/" ~ ("pre" | "PRE") ~ 'Spnl ~ ">" 268 | , 269 | 'HtmlBlockPre := 'HtmlBlockOpenPre ~ 'HtmlBlockPre.rep ~ 'HtmlBlockClosePre 270 | 271 | , 272 | 'HtmlBlockOpenTable := "<" ~ 'Spnl ~ ("table" | "TABLE") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 273 | , 274 | 'HtmlBlockCloseTable := "<" ~ 'Spnl ~ "/" ~ ("table" | "TABLE") ~ 'Spnl ~ ">" 275 | , 276 | 'HtmlBlockTable := 'HtmlBlockOpenTable ~ 'HtmlBlockTable.rep ~ 'HtmlBlockCloseTable 277 | 278 | , 279 | 'HtmlBlockOpenUl := "<" ~ 'Spnl ~ ("ul" | "UL") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 280 | , 281 | 'HtmlBlockCloseUl := "<" ~ 'Spnl ~ "/" ~ ("ul" | "UL") ~ 'Spnl ~ ">" 282 | , 283 | 'HtmlBlockUl := 'HtmlBlockOpenUl ~ 'HtmlBlockUl.rep ~ 'HtmlBlockCloseUl 284 | 285 | , 286 | 'HtmlBlockOpenDd := "<" ~ 'Spnl ~ ("dd" | "DD") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 287 | , 288 | 'HtmlBlockCloseDd := "<" ~ 'Spnl ~ "/" ~ ("dd" | "DD") ~ 'Spnl ~ ">" 289 | , 290 | 'HtmlBlockDd := 'HtmlBlockOpenDd ~ 'HtmlBlockDd.rep ~ 'HtmlBlockCloseDd 291 | 292 | , 293 | 'HtmlBlockOpenDt := "<" ~ 'Spnl ~ ("dt" | "DT") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 294 | , 295 | 'HtmlBlockCloseDt := "<" ~ 'Spnl ~ "/" ~ ("dt" | "DT") ~ 'Spnl ~ ">" 296 | , 297 | 'HtmlBlockDt := 'HtmlBlockOpenDt ~ 'HtmlBlockDt.rep ~ 'HtmlBlockCloseDt 298 | 299 | , 300 | 'HtmlBlockOpenFrameset := "<" ~ 'Spnl ~ ("frameset" | "FRAMESET") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 301 | , 302 | 'HtmlBlockCloseFrameset := "<" ~ 'Spnl ~ "/" ~ ("frameset" | "FRAMESET") ~ 'Spnl ~ ">" 303 | , 304 | 'HtmlBlockFrameset := 'HtmlBlockOpenFrameset ~ 'HtmlBlockFrameset.rep ~ 'HtmlBlockCloseFrameset 305 | 306 | , 307 | 'HtmlBlockOpenLi := "<" ~ 'Spnl ~ ("li" | "LI") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 308 | , 309 | 'HtmlBlockCloseLi := "<" ~ 'Spnl ~ "/" ~ ("li" | "LI") ~ 'Spnl ~ ">" 310 | , 311 | 'HtmlBlockLi := 'HtmlBlockOpenLi ~ 'HtmlBlockLi.rep ~ 'HtmlBlockCloseLi 312 | 313 | , 314 | 'HtmlBlockOpenTbody := "<" ~ 'Spnl ~ ("tbody" | "TBODY") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 315 | , 316 | 'HtmlBlockCloseTbody := "<" ~ 'Spnl ~ "/" ~ ("tbody" | "TBODY") ~ 'Spnl ~ ">" 317 | , 318 | 'HtmlBlockTbody := 'HtmlBlockOpenTbody ~ 'HtmlBlockTbody.rep ~ 'HtmlBlockCloseTbody 319 | 320 | , 321 | 'HtmlBlockOpenTd := "<" ~ 'Spnl ~ ("td" | "TD") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 322 | , 323 | 'HtmlBlockCloseTd := "<" ~ 'Spnl ~ "/" ~ ("td" | "TD") ~ 'Spnl ~ ">" 324 | , 325 | 'HtmlBlockTd := 'HtmlBlockOpenTd ~ 'HtmlBlockTd.rep ~ 'HtmlBlockCloseTd 326 | 327 | , 328 | 'HtmlBlockOpenTfoot := "<" ~ 'Spnl ~ ("tfoot" | "TFOOT") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 329 | , 330 | 'HtmlBlockCloseTfoot := "<" ~ 'Spnl ~ "/" ~ ("tfoot" | "TFOOT") ~ 'Spnl ~ ">" 331 | , 332 | 'HtmlBlockTfoot := 'HtmlBlockOpenTfoot ~ 'HtmlBlockTfoot.rep ~ 'HtmlBlockCloseTfoot 333 | 334 | , 335 | 'HtmlBlockOpenTh := "<" ~ 'Spnl ~ ("th" | "TH") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 336 | , 337 | 'HtmlBlockCloseTh := "<" ~ 'Spnl ~ "/" ~ ("th" | "TH") ~ 'Spnl ~ ">" 338 | , 339 | 'HtmlBlockTh := 'HtmlBlockOpenTh ~ 'HtmlBlockTh.rep ~ 'HtmlBlockCloseTh 340 | 341 | , 342 | 'HtmlBlockOpenThead := "<" ~ 'Spnl ~ ("thead" | "THEAD") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 343 | , 344 | 'HtmlBlockCloseThead := "<" ~ 'Spnl ~ "/" ~ ("thead" | "THEAD") ~ 'Spnl ~ ">" 345 | , 346 | 'HtmlBlockThead := 'HtmlBlockOpenThead ~ 'HtmlBlockThead.rep ~ 'HtmlBlockCloseThead 347 | 348 | , 349 | 'HtmlBlockOpenTr := "<" ~ 'Spnl ~ ("tr" | "TR") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 350 | , 351 | 'HtmlBlockCloseTr := "<" ~ 'Spnl ~ "/" ~ ("tr" | "TR") ~ 'Spnl ~ ">" 352 | , 353 | 'HtmlBlockTr := 'HtmlBlockOpenTr ~ 'HtmlBlockTr.rep ~ 'HtmlBlockCloseTr 354 | 355 | , 356 | 'HtmlBlockOpenScript := "<" ~ 'Spnl ~ ("script" | "SCRIPT") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 357 | , 358 | 'HtmlBlockCloseScript := "<" ~ 'Spnl ~ "/" ~ ("script" | "SCRIPT") ~ 'Spnl ~ ">" 359 | , 360 | 'HtmlBlockScript := 'HtmlBlockOpenScript ~ 'HtmlBlockCloseScript 361 | 362 | , 363 | 'HtmlBlockOpenHead := "<" ~ 'Spnl ~ ("head" | "HEAD") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 364 | , 365 | 'HtmlBlockCloseHead := "<" ~ 'Spnl ~ "/" ~ ("head" | "HEAD") ~ 'Spnl ~ ">" 366 | , 367 | 'HtmlBlockHead := 'HtmlBlockOpenHead ~ 'HtmlBlockCloseHead 368 | 369 | , 370 | 'HtmlBlockInTags := 'HtmlBlockAddress 371 | | 'HtmlBlockBlockquote 372 | | 'HtmlBlockCenter 373 | | 'HtmlBlockDir 374 | | 'HtmlBlockDiv 375 | | 'HtmlBlockDl 376 | | 'HtmlBlockFieldset 377 | | 'HtmlBlockForm 378 | | 'HtmlBlockH1 379 | | 'HtmlBlockH2 380 | | 'HtmlBlockH3 381 | | 'HtmlBlockH4 382 | | 'HtmlBlockH5 383 | | 'HtmlBlockH6 384 | | 'HtmlBlockMenu 385 | | 'HtmlBlockNoframes 386 | | 'HtmlBlockNoscript 387 | | 'HtmlBlockOl 388 | | 'HtmlBlockP 389 | | 'HtmlBlockPre 390 | | 'HtmlBlockTable 391 | | 'HtmlBlockUl 392 | | 'HtmlBlockDd 393 | | 'HtmlBlockDt 394 | | 'HtmlBlockFrameset 395 | | 'HtmlBlockLi 396 | | 'HtmlBlockTbody 397 | | 'HtmlBlockTd 398 | | 'HtmlBlockTfoot 399 | | 'HtmlBlockTh 400 | | 'HtmlBlockThead 401 | | 'HtmlBlockTr 402 | | 'HtmlBlockScript 403 | | 'HtmlBlockHead 404 | 405 | , 406 | 'HtmlBlock := ('HtmlBlockInTags | 'HtmlComment | 'HtmlBlockSelfClosing) ~ 'BlankLine.rep(1) 407 | 408 | 409 | , 410 | 'HtmlBlockSelfClosing := "<" ~ 'Spnl ~ 'HtmlBlockType ~ 'Spnl ~ 'HtmlAttribute.rep ~ "/" ~ 'Spnl ~ ">" 411 | 412 | , 413 | 'HtmlBlockType := "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" | 414 | "h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" | 415 | "ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" | 416 | "ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" | 417 | "H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE" | "TABLE" | 418 | "UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT" 419 | 420 | , 421 | 'StyleOpen := "<" ~ 'Spnl ~ ("style" | "STYLE") ~ 'Spnl ~ 'HtmlAttribute.rep ~ ">" 422 | , 423 | 'StyleClose := "<" ~ 'Spnl ~ "/" ~ ("style" | "STYLE") ~ 'Spnl ~ ">" 424 | , 425 | 'InStyleTags := 'StyleOpen ~ 'StyleClose 426 | , 427 | 'StyleBlock := 'InStyleTags ~ 'BlankLine.rep 428 | 429 | 430 | , 431 | 'Inlines := 'StartList ~ ('Inline 432 | | 'Endline ~ 'Inline).rep(1) ~ 'Endline.? 433 | 434 | 435 | , 436 | 'Inline := 'Str 437 | | 'Endline 438 | | 'UlOrStarLine 439 | | 'Space 440 | | 'Strong 441 | | 'Emph 442 | | 'Strike 443 | | 'Image 444 | | 'Link 445 | | 'NoteReference 446 | | 'InlineNote 447 | | 'Code 448 | | 'RawHtml 449 | | 'Entity 450 | | 'EscapedChar 451 | | 'Smart 452 | | 'Symbol 453 | 454 | , 455 | 'Space := 'Spacechar.rep(1) 456 | 457 | 458 | , 459 | 'Str := 'StartList ~ 'NormalChar.rep(1) ~ 'StrChunk.rep 460 | 461 | 462 | , 463 | 'StrChunk := ('NormalChar | "_".rep(1) ~ 'Alphanumeric).rep(1) | 'AposChunk 464 | 465 | , 466 | 'AposChunk := "'" ~ 'Alphanumeric 467 | 468 | 469 | , 470 | 'EscapedChar := "\\" ~ """[\-\\`|*_{}[\]()# +.!><]""".regex 471 | 472 | 473 | , 474 | 'Entity := ('HexEntity | 'DecEntity | 'CharEntity) 475 | 476 | 477 | , 478 | 'Endline := 'LineBreak | 'TerminalEndline | 'NormalEndline 479 | 480 | , 481 | 'NormalEndline := 'Sp ~ 'Newline 482 | 483 | 484 | , 485 | 'TerminalEndline := 'Sp ~ 'Newline ~ 'Eof 486 | 487 | 488 | , 489 | 'LineBreak := " " ~ 'NormalEndline 490 | 491 | 492 | , 493 | 'Symbol := 'SpecialChar 494 | 495 | 496 | // 'This 'keeps 'the 'parser 'from 'getting 'bogged 'down 'on 'long 'strings 'of '*' 'or '_', 497 | // 'or 'strings 'of '*' 'or '_' 'with 'space 'on 'each 'side: 498 | , 499 | 'UlOrStarLine := ('UlLine | 'StarLine) 500 | , 501 | 'StarLine := "****" ~ "*".rep | 'Spacechar ~ "*".rep(1) ~ 'Spacechar 502 | , 503 | 'UlLine := "____" ~ "_".rep | 'Spacechar ~ "_".rep(1) ~ 'Spacechar 504 | 505 | , 506 | 'Emph := 'EmphStar | 'EmphUl 507 | 508 | , 509 | // 'Whitespace := 'Spacechar | 'Newline, 510 | 511 | 'EmphStar := "*" ~ 'StartList ~ ('Inline | 'StrongStar).rep(1) ~ "*", 512 | 513 | 'EmphUl := "_" ~ 'StartList ~ ('Inline | 'StrongUl).rep(1) ~ "_", 514 | 515 | 516 | 'Strong := 'StrongStar | 'StrongUl 517 | 518 | , 519 | 'StrongStar := "**" ~ 'StartList ~ 'Inline.rep(1) ~ "**", 520 | 521 | 522 | 'StrongUl := "__" ~ 'StartList ~ 'Inline.rep(1) ~ "__" 523 | 524 | , 525 | 'Strike := "~~" ~ 'StartList ~ 'Inline.rep(1) ~ "~~" 526 | 527 | 528 | , 529 | 'Image := "!" ~ ('ExplicitLink | 'ReferenceLink) 530 | 531 | , 532 | 'Link := 'ExplicitLink | 'ReferenceLink | 'AutoLink 533 | 534 | , 535 | 'ReferenceLink := 'ReferenceLinkDouble | 'ReferenceLinkSingle 536 | 537 | , 538 | 'ReferenceLinkDouble := 'Label ~ 'Spnl ~ 'Label 539 | 540 | , 541 | 'ReferenceLinkSingle := 'Label ~ ('Spnl ~ "[]").? 542 | 543 | 544 | , 545 | 'ExplicitLink := 'Label ~ "(" ~ 'Sp ~ 'Source ~ 'Spnl ~ 'Title ~ 'Sp ~ ")" 546 | 547 | 548 | , 549 | 'Source := ("<" ~ 'SourceContents ~ ">" | 'SourceContents) 550 | 551 | 552 | , 553 | 'SourceContents := ('Nonspacechar.rep(1) | "(" ~ 'SourceContents ~ ")").rep 554 | 555 | , 556 | 'Title := ('TitleSingle | 'TitleDouble | "") 557 | 558 | 559 | , 560 | 'TitleSingle := "'" ~ "[^\n\r\\)']*".regex ~ "'" 561 | 562 | , 563 | 'TitleDouble := "\"" ~ "[^\n\r\\)\\\"]*".regex ~ "\"" 564 | 565 | , 566 | 'AutoLink := 'AutoLinkUrl | 'AutoLinkEmail 567 | 568 | , 569 | 'AutoLinkUrl := "<" ~ "[A-Za-z]".regex.rep(1) ~ "://" ~ "[^>\n\r]+".regex ~ ">" 570 | 571 | 572 | , 573 | 'AutoLinkEmail := "<" ~ "mailto:".? ~ "[-A-Za-z0-9.\\+_./!%~$]+".regex ~ "@" ~ "[^>\n\r]+".regex ~ ">" 574 | 575 | 576 | , 577 | 'Reference := 'NonindentSpace ~ 'Label ~ ":" ~ 'Spnl ~ 'RefSrc ~ 'RefTitle ~ 'BlankLine.rep(1) 578 | 579 | 580 | , 581 | 'Label := "[" ~ 'StartList ~ 'Inline.rep ~ "]" 582 | 583 | , 584 | 'RefSrc := 'Nonspacechar.rep(1) 585 | 586 | 587 | , 588 | 'RefTitle := 'RefTitleSingle | 'RefTitleDouble | 'RefTitleParens | 'EmptyTitle 589 | 590 | 591 | , 592 | 'EmptyTitle := "" 593 | 594 | , 595 | 'RefTitleSingle := 'Spnl ~ "'" ~ "(' [\n\r]|[\n\r])*".regex ~ "'" 596 | 597 | , 598 | 'RefTitleDouble := 'Spnl ~ "\"" ~ "(\\\" [\n\r]|[\n\r])*".regex ~ "\"" 599 | 600 | , 601 | 'RefTitleParens := 'Spnl ~ "(" ~ "(\\) [\n\r]|[\n\r])*".regex ~ ")" 602 | 603 | , 604 | // 'References := 'StartList ~ ('Reference | 'SkipBlock).rep, 605 | 606 | 607 | 'Ticks1 := "`" 608 | , 609 | 'Ticks2 := "``" 610 | , 611 | 'Ticks3 := "```" 612 | , 613 | 'Ticks4 := "````" 614 | , 615 | 'Ticks5 := "`````" 616 | 617 | , 618 | 'Code := 'Ticks1 ~ 'Sp ~ ('Nonspacechar.rep(1) | "`".rep(1) | 'Spacechar | 'Newline).rep(1) ~ 'Sp ~ 'Ticks1 619 | | 'Ticks2 ~ 'Sp ~ ('Nonspacechar.rep(1) | "`".rep(1) | 'Spacechar | 'Newline).rep(1) ~ 'Sp ~ 'Ticks2 620 | | 'Ticks3 ~ 'Sp ~ ('Nonspacechar.rep(1) | "`".rep(1) | 'Spacechar | 'Newline).rep(1) ~ 'Sp ~ 'Ticks3 621 | | 'Ticks4 ~ 'Sp ~ ('Nonspacechar.rep(1) | "`".rep(1) | 'Spacechar | 'Newline).rep(1) ~ 'Sp ~ 'Ticks4 622 | | 'Ticks5 ~ 'Sp ~ ('Nonspacechar.rep(1) | "`".rep(1) | 'Spacechar | 'Newline).rep(1) ~ 'Sp ~ 'Ticks5 623 | , 624 | 'RawHtml := 'HtmlComment | 'HtmlBlockScript | 'HtmlTag 625 | 626 | , 627 | 'BlankLine := 'Sp ~ 'Newline 628 | 629 | , 630 | 'Quoted := "\"" ~ "[^\\\"]*".regex ~ "\"" | "'" ~ "[^']*".regex ~ "'" 631 | , 632 | 'HtmlAttribute := ('AlphanumericAscii | "-").rep(1) ~ 'Spnl ~ ("=" ~ 'Spnl ~ ('Quoted | 'Nonspacechar.rep(1))).? ~ 'Spnl 633 | , 634 | 'HtmlComment := ")".regex ~ "-->" 635 | , 636 | 'HtmlTag := "<" ~ 'Spnl ~ "/".? ~ 'AlphanumericAscii.rep(1) ~ 'Spnl ~ 'HtmlAttribute.rep ~ "/".? ~ 'Spnl ~ ">" 637 | , 638 | 'Eof := "\n" 639 | , 640 | 'Spacechar := " " | "\t" 641 | , 642 | 'Nonspacechar := "[^ \t\r\n]".regex 643 | , 644 | 'Newline := "\n" | "\r" ~ "\n".? 645 | , 646 | 'Sp := 'Spacechar.rep 647 | , 648 | 'Spnl := 'Sp ~ ('Newline ~ 'Sp).? 649 | , 650 | 'SpecialChar := "~" | "*" | "_" | "`" | "&" | "[" | "]" | "(" | ")" | "<" | "!" | "#" | "\\" | "'" | "\"" | 'ExtendedSpecialChar 651 | , 652 | 'NormalChar := "[^ \r\t\n\\~\\*_`&\\[\\]\\(\\)