├── .bazelrc ├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── WORKSPACE ├── app ├── BUILD ├── external │ ├── README.md │ ├── material-icons.css │ ├── material-icons.woff2 │ ├── material.css │ ├── material.js │ └── preact.js ├── image │ └── appicon.ico ├── lib │ ├── docview.js │ ├── mdl.js │ └── util.js └── style │ └── docview.css ├── data ├── nlp │ └── schemas │ │ ├── catalog.sling │ │ ├── constituency.sling │ │ ├── document-schema.sling │ │ └── meta-schema.sling └── wiki │ ├── aliases.sling │ ├── calendar.sling │ ├── countries.sling │ ├── da │ └── templates.sling │ ├── de │ └── templates.sling │ ├── en │ ├── phrases.txt │ └── templates.sling │ ├── es │ └── templates.sling │ ├── fi │ └── templates.sling │ ├── fr │ └── templates.sling │ ├── it │ └── templates.sling │ ├── languages.sling │ ├── nl │ └── templates.sling │ ├── no │ └── templates.sling │ ├── pl │ └── templates.sling │ ├── pt │ └── templates.sling │ ├── sv │ └── templates.sling │ ├── units.sling │ ├── wikidata.sling │ └── wikipedia.sling ├── doc ├── guide │ ├── README.md │ ├── caspar.md │ ├── flowasm.txt │ ├── flowin.svg │ ├── flowout.svg │ ├── frames.md │ ├── install.md │ ├── kb-browser.png │ ├── myelin.md │ ├── parsing.md │ ├── pyapi.md │ ├── training.md │ ├── wikiflow.md │ └── wikiflow.svg └── report │ ├── acl2017.sty │ ├── acl_natbib.bst │ ├── dev-eval.pdf │ ├── network.pdf │ ├── network.svg │ ├── runtime.pdf │ ├── runtime.svg │ ├── sling.bib │ └── sling.tex ├── python ├── BUILD ├── __init__.py ├── flags.py ├── log.py ├── myelin │ ├── __init__.py │ ├── builder.py │ ├── flow.py │ ├── nn.py │ ├── simulator.py │ └── tf.py ├── nlp │ ├── __init__.py │ ├── document.py │ └── parser.py ├── pysling.so ├── run.py ├── task │ ├── __init__.py │ ├── corpora.py │ ├── download.py │ ├── embedding.py │ ├── silver.py │ ├── wiki.py │ └── workflow.py └── wikibot │ ├── en_wp_dates.py │ ├── extract_dates.py │ ├── wikibot.py │ └── wikimonitor.py ├── run.sh ├── setup.sh ├── sling ├── base │ ├── BUILD │ ├── bitcast.h │ ├── clock.cc │ ├── clock.h │ ├── flags.cc │ ├── flags.h │ ├── init.cc │ ├── init.h │ ├── libinit.cc │ ├── logging.cc │ ├── logging.h │ ├── macros.h │ ├── perf.cc │ ├── perf.h │ ├── port.h │ ├── registry.cc │ ├── registry.h │ ├── slice.h │ ├── status.cc │ ├── status.h │ └── types.h ├── file │ ├── BUILD │ ├── embed.cc │ ├── embed.h │ ├── file.cc │ ├── file.h │ ├── posix.cc │ ├── posix.h │ ├── recordio.cc │ ├── recordio.h │ ├── repository.cc │ ├── repository.h │ ├── textmap.cc │ └── textmap.h ├── frame │ ├── BUILD │ ├── decoder.cc │ ├── decoder.h │ ├── encoder.cc │ ├── encoder.h │ ├── json.cc │ ├── json.h │ ├── object.cc │ ├── object.h │ ├── printer.cc │ ├── printer.h │ ├── reader.cc │ ├── reader.h │ ├── scanner.cc │ ├── scanner.h │ ├── serialization.cc │ ├── serialization.h │ ├── snapshot.cc │ ├── snapshot.h │ ├── store.cc │ ├── store.h │ ├── tokenizer.cc │ ├── tokenizer.h │ ├── turtle.cc │ ├── turtle.h │ ├── wire.h │ ├── xml.cc │ └── xml.h ├── http │ ├── BUILD │ ├── http-server.cc │ ├── http-server.h │ ├── http-stream.cc │ ├── http-stream.h │ ├── http-utils.cc │ ├── http-utils.h │ ├── static-content.cc │ ├── static-content.h │ ├── web-service.cc │ └── web-service.h ├── myelin │ ├── BUILD │ ├── analyze.cc │ ├── aot-linker.cc │ ├── aot-linker.h │ ├── builder.cc │ ├── builder.h │ ├── compiler.cc │ ├── compiler.h │ ├── compute.cc │ ├── compute.h │ ├── cuda │ │ ├── BUILD │ │ ├── cuda-api.cc │ │ ├── cuda-api.h │ │ ├── cuda-kernel.cc │ │ ├── cuda-kernel.h │ │ ├── cuda-runtime.cc │ │ ├── cuda-runtime.h │ │ ├── cuda.cc │ │ └── cuda.h │ ├── elf-linker.cc │ ├── elf-linker.h │ ├── express.cc │ ├── express.h │ ├── flow.cc │ ├── flow.h │ ├── generator │ │ ├── BUILD │ │ ├── elementwise.cc │ │ ├── elementwise.h │ │ ├── expression.cc │ │ ├── expression.h │ │ ├── index.cc │ │ ├── index.h │ │ ├── scalar-flt-avx.cc │ │ ├── scalar-flt-sse.cc │ │ ├── scalar-int.cc │ │ ├── vector-flt-avx128.cc │ │ ├── vector-flt-avx256.cc │ │ ├── vector-flt-avx512.cc │ │ ├── vector-flt-sse.cc │ │ ├── vector-int-avx128.cc │ │ ├── vector-int-avx256.cc │ │ └── vector-int-sse.cc │ ├── gradient.cc │ ├── gradient.h │ ├── graph.cc │ ├── graph.h │ ├── kernel │ │ ├── BUILD │ │ ├── arithmetic.cc │ │ ├── arithmetic.h │ │ ├── array.cc │ │ ├── avx-math.cc │ │ ├── avx-matmul.cc │ │ ├── avx-operators.cc │ │ ├── avx.cc │ │ ├── avx.h │ │ ├── cublas-matmul.cc │ │ ├── cuda-arithmetic.cc │ │ ├── cuda-array.cc │ │ ├── cuda-matmul.cc │ │ ├── cuda.cc │ │ ├── cuda.h │ │ ├── dragnn.cc │ │ ├── dragnn.h │ │ ├── generic-math.cc │ │ ├── generic-matmul.cc │ │ ├── generic-operators.cc │ │ ├── generic.cc │ │ ├── generic.h │ │ ├── gradients.cc │ │ ├── gradients.h │ │ ├── mkl.cc │ │ ├── mkl.h │ │ ├── precompute.cc │ │ ├── precompute.h │ │ ├── simd-matmul.cc │ │ ├── sse-matmul.cc │ │ ├── sse.cc │ │ ├── sse.h │ │ ├── tensorflow.cc │ │ └── tensorflow.h │ ├── learning.cc │ ├── learning.h │ ├── macro-assembler.cc │ ├── macro-assembler.h │ ├── multi-process.cc │ ├── multi-process.h │ ├── nnc.cc │ ├── profile.cc │ ├── profile.h │ ├── rnn.cc │ ├── rnn.h │ ├── simd-assembler.cc │ ├── simd-assembler.h │ └── tests │ │ ├── gradcheck.py │ │ ├── opcheck.py │ │ └── runall.sh ├── nlp │ ├── document │ │ ├── BUILD │ │ ├── affix.cc │ │ ├── affix.h │ │ ├── analyzer.cc │ │ ├── annotator.cc │ │ ├── annotator.h │ │ ├── app │ │ │ ├── analyzer.css │ │ │ ├── analyzer.html │ │ │ ├── analyzer.js │ │ │ ├── corpus.css │ │ │ ├── corpus.html │ │ │ └── corpus.js │ │ ├── corpus-browser.cc │ │ ├── document-corpus.cc │ │ ├── document-corpus.h │ │ ├── document-service.cc │ │ ├── document-service.h │ │ ├── document-tokenizer.cc │ │ ├── document-tokenizer.h │ │ ├── document.cc │ │ ├── document.h │ │ ├── features.cc │ │ ├── features.h │ │ ├── fingerprinter.cc │ │ ├── fingerprinter.h │ │ ├── lex.cc │ │ ├── lex.h │ │ ├── lexical-encoder.cc │ │ ├── lexical-encoder.h │ │ ├── lexicon.cc │ │ ├── lexicon.h │ │ ├── phrase-tokenizer.cc │ │ ├── phrase-tokenizer.h │ │ ├── text-tokenizer.cc │ │ ├── text-tokenizer.h │ │ ├── token-properties.h │ │ └── vocabulary-builder.cc │ ├── embedding │ │ ├── BUILD │ │ ├── embedding-model.cc │ │ ├── embedding-model.h │ │ ├── fact-embeddings.cc │ │ ├── fact-plausibility.cc │ │ ├── plausibility-model.cc │ │ ├── plausibility-model.h │ │ └── word-embeddings.cc │ ├── kb │ │ ├── BUILD │ │ ├── app │ │ │ ├── appicon.ico │ │ │ ├── index.html │ │ │ ├── kb.css │ │ │ └── kb.js │ │ ├── calendar.cc │ │ ├── calendar.h │ │ ├── fact-lexicon.cc │ │ ├── facts.cc │ │ ├── facts.h │ │ ├── knowledge-server.cc │ │ ├── knowledge-service.cc │ │ ├── knowledge-service.h │ │ ├── name-table-builder.cc │ │ ├── name-table.cc │ │ ├── name-table.h │ │ ├── phrase-table-builder.cc │ │ ├── phrase-table.cc │ │ ├── phrase-table.h │ │ ├── resolver.cc │ │ └── resolver.h │ ├── parser │ │ ├── BUILD │ │ ├── action-table.cc │ │ ├── action-table.h │ │ ├── caspar-trainer.cc │ │ ├── frame-evaluation.cc │ │ ├── frame-evaluation.h │ │ ├── multiclass-delegate.cc │ │ ├── ontonotes │ │ │ ├── annotations.py │ │ │ ├── head_finder.py │ │ │ ├── make_corpus.sh │ │ │ ├── ontonotesv5_to_sling.py │ │ │ ├── shuffle.py │ │ │ └── statistics.py │ │ ├── parser-action.cc │ │ ├── parser-action.h │ │ ├── parser-annotator.cc │ │ ├── parser-features.cc │ │ ├── parser-features.h │ │ ├── parser-state.cc │ │ ├── parser-state.h │ │ ├── parser-trainer.cc │ │ ├── parser-trainer.h │ │ ├── parser.cc │ │ ├── parser.h │ │ ├── roles.cc │ │ ├── roles.h │ │ ├── tools │ │ │ ├── BUILD │ │ │ ├── commons_from_corpora.py │ │ │ ├── parse.cc │ │ │ ├── parse.py │ │ │ ├── train.sh │ │ │ ├── train_caspar.py │ │ │ ├── train_pytorch.py │ │ │ ├── validate.py │ │ │ └── viewmodel.py │ │ ├── trainer │ │ │ ├── action.py │ │ │ ├── action_table.py │ │ │ ├── cascade.py │ │ │ ├── corpora.py │ │ │ ├── lexical_encoder.py │ │ │ ├── lexicon.py │ │ │ ├── parser_state.py │ │ │ ├── pytorch_modules.py │ │ │ ├── spec.py │ │ │ ├── trace.py │ │ │ ├── train_util.py │ │ │ ├── trainer.py │ │ │ └── transition_generator.py │ │ ├── transition-generator.cc │ │ └── transition-generator.h │ ├── silver │ │ ├── BUILD │ │ ├── anaphora.cc │ │ ├── chart.cc │ │ ├── chart.h │ │ ├── corpus-split.cc │ │ ├── idf.cc │ │ ├── idf.h │ │ ├── mentions.cc │ │ ├── mentions.h │ │ ├── nominal-pruning.cc │ │ ├── parse-chart.cc │ │ ├── phrases.cc │ │ ├── relations.cc │ │ └── types.cc │ ├── wiki │ │ ├── BUILD │ │ ├── aliases.cc │ │ ├── parse-wiki-text.cc │ │ ├── wiki-annotator.cc │ │ ├── wiki-annotator.h │ │ ├── wiki-extractor.cc │ │ ├── wiki-extractor.h │ │ ├── wiki-macros.cc │ │ ├── wiki-parser.cc │ │ ├── wiki-parser.h │ │ ├── wiki.cc │ │ ├── wiki.h │ │ ├── wikidata-converter.cc │ │ ├── wikidata-converter.h │ │ ├── wikidata-importer.cc │ │ ├── wikipedia-documents.cc │ │ ├── wikipedia-importer.cc │ │ ├── wikipedia-links.cc │ │ ├── wikipedia-map.cc │ │ └── wikipedia-map.h │ └── wikicat │ │ ├── app │ │ ├── appicon.ico │ │ ├── index.html │ │ ├── wikicat.css │ │ └── wikicat.js │ │ ├── fact_matcher.py │ │ ├── generator.py │ │ ├── prelim_ranker.py │ │ ├── server.py │ │ ├── util.py │ │ └── workflow.py ├── pyapi │ ├── BUILD │ ├── pyapi.cc │ ├── pyarray.cc │ ├── pyarray.h │ ├── pybase.cc │ ├── pybase.h │ ├── pydate.cc │ ├── pydate.h │ ├── pyframe.cc │ ├── pyframe.h │ ├── pymisc.cc │ ├── pymisc.h │ ├── pymyelin.cc │ ├── pymyelin.h │ ├── pyparser.cc │ ├── pyparser.h │ ├── pyphrase.cc │ ├── pyphrase.h │ ├── pyrecordio.cc │ ├── pyrecordio.h │ ├── pystore.cc │ ├── pystore.h │ ├── pytask.cc │ ├── pytask.h │ ├── pywiki.cc │ └── pywiki.h ├── stream │ ├── BUILD │ ├── bounded.cc │ ├── bounded.h │ ├── bzip2.cc │ ├── bzip2.h │ ├── file-input.cc │ ├── file-input.h │ ├── file.cc │ ├── file.h │ ├── gzip.cc │ ├── gzip.h │ ├── input.cc │ ├── input.h │ ├── memory.cc │ ├── memory.h │ ├── output.cc │ ├── output.h │ ├── stream.h │ ├── unix-file.cc │ ├── unix-file.h │ ├── zipfile.cc │ └── zipfile.h ├── string │ ├── BUILD │ ├── ctype.cc │ ├── ctype.h │ ├── numbers.cc │ ├── numbers.h │ ├── printf.cc │ ├── printf.h │ ├── strcat.cc │ ├── strcat.h │ ├── text.cc │ └── text.h ├── task │ ├── BUILD │ ├── accumulator.cc │ ├── accumulator.h │ ├── app │ │ ├── appicon.ico │ │ ├── dashboard.css │ │ ├── dashboard.js │ │ ├── digital-7.mono.ttf │ │ └── index.html │ ├── dashboard.cc │ ├── dashboard.h │ ├── documents.cc │ ├── documents.h │ ├── environment.h │ ├── frame-store-reader.cc │ ├── frame-store-writer.cc │ ├── frames.cc │ ├── frames.h │ ├── identity.cc │ ├── job.cc │ ├── job.h │ ├── learner.cc │ ├── learner.h │ ├── mapper.cc │ ├── mapper.h │ ├── message-printer.cc │ ├── message.cc │ ├── message.h │ ├── null-sink.cc │ ├── pipe-reader.cc │ ├── process.cc │ ├── process.h │ ├── record-file-reader.cc │ ├── record-file-writer.cc │ ├── reducer.cc │ ├── reducer.h │ ├── rekey.cc │ ├── sharder.cc │ ├── sorter.cc │ ├── task.cc │ ├── task.h │ ├── text-file-reader.cc │ ├── text-file-writer.cc │ ├── text-map-reader.cc │ ├── text-map-writer.cc │ └── workers.cc ├── util │ ├── BUILD │ ├── arena.h │ ├── asset.h │ ├── bloom.h │ ├── city.cc │ ├── city.h │ ├── elf-writer.cc │ ├── elf-writer.h │ ├── embeddings.cc │ ├── embeddings.h │ ├── fingerprint.cc │ ├── fingerprint.h │ ├── mutex.h │ ├── random.h │ ├── snappy.h │ ├── sortmap.h │ ├── thread.cc │ ├── thread.h │ ├── threadpool.cc │ ├── threadpool.h │ ├── top.h │ ├── unicode.cc │ ├── unicode.h │ ├── unicodetab.cc │ ├── varint.cc │ ├── varint.h │ ├── vocabulary.cc │ └── vocabulary.h └── web │ ├── BUILD │ ├── entity-ref.cc │ ├── entity-ref.h │ ├── xml-parser.cc │ └── xml-parser.h ├── third_party ├── bz2lib │ ├── BUILD │ ├── LICENSE │ ├── README │ ├── blocksort.c │ ├── bzlib.c │ ├── bzlib.h │ ├── bzlib_private.h │ ├── compress.c │ ├── crctable.c │ ├── decompress.c │ ├── huffman.c │ └── randtable.c ├── jit │ ├── BUILD │ ├── LICENSE │ ├── assembler.cc │ ├── assembler.h │ ├── avx512.inc │ ├── avx512ops.txt │ ├── code.cc │ ├── code.h │ ├── cpu.cc │ ├── cpu.h │ ├── instructions.h │ ├── memory.h │ ├── registers.h │ └── types.h ├── snappy │ ├── BUILD │ ├── LICENSE │ ├── bits.h │ ├── endian.h │ ├── snappy-sinksource.cc │ ├── snappy-sinksource.h │ ├── snappy.cc │ └── snappy.h └── zlib │ ├── BUILD │ ├── README │ ├── adler32.c │ ├── compress.c │ ├── crc32.c │ ├── crc32.h │ ├── deflate.c │ ├── deflate.h │ ├── gzclose.c │ ├── gzguts.h │ ├── gzlib.c │ ├── gzread.c │ ├── gzwrite.c │ ├── infback.c │ ├── inffast.c │ ├── inffast.h │ ├── inffixed.h │ ├── inflate.c │ ├── inflate.h │ ├── inftrees.c │ ├── inftrees.h │ ├── trees.c │ ├── trees.h │ ├── uncompr.c │ ├── zconf.h │ ├── zlib.h │ ├── zutil.c │ └── zutil.h └── tools ├── BUILD ├── build-wheel.py ├── buildall.sh ├── codex.cc ├── docv1to2.py ├── embed-data.cc ├── embed.bzl ├── index.cc ├── optohdr.py └── snaps.cc /.bazelrc: -------------------------------------------------------------------------------- 1 | build --color=yes 2 | build --cxxopt=-Wno-unknown-warning-option 3 | build --cxxopt=-Wno-unused-command-line-argument 4 | build --cxxopt=-Wno-deprecated 5 | build --cxxopt=-Wno-sign-compare 6 | build --cxxopt=-Wno-unused-local-typedefs 7 | build --cxxopt=-Wno-undefined-var-template 8 | build --cxxopt=-Wno-attributes 9 | build --spawn_strategy=standalone 10 | 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bazel-* 2 | examples 3 | local 4 | *.pyc 5 | *.aux 6 | *.bbl 7 | *.blg 8 | *.log 9 | *.out 10 | *.pdf_tex 11 | doc/report/sling.pdf 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | language: 3 | - cpp 4 | - python 5 | compiler: gcc 6 | python: "3.5" 7 | 8 | addons: 9 | apt: 10 | sources: 11 | - ubuntu-toolchain-r-test 12 | packages: 13 | - wget 14 | - pkg-config 15 | - g++-4.8 16 | - python3.5-dev 17 | 18 | before_install: 19 | - wget https://github.com/bazelbuild/bazel/releases/download/1.0.0/bazel_1.0.0-linux-x86_64.deb 20 | - sudo dpkg -i bazel_1.0.0-linux-x86_64.deb 21 | 22 | script: 23 | - tools/buildall.sh 24 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution, 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | -------------------------------------------------------------------------------- /WORKSPACE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/WORKSPACE -------------------------------------------------------------------------------- /app/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | load("//tools:embed.bzl", "embed_data") 4 | 5 | embed_data( 6 | name = "app", 7 | srcs = [ 8 | "external/material.js", 9 | "external/material.css", 10 | "external/material-icons.css", 11 | "external/material-icons.woff2", 12 | "external/preact.js", 13 | "image/appicon.ico", 14 | "lib/docview.js", 15 | "lib/mdl.js", 16 | "lib/util.js", 17 | "style/docview.css", 18 | ], 19 | ) 20 | -------------------------------------------------------------------------------- /app/external/README.md: -------------------------------------------------------------------------------- 1 | # External web components for SLING. 2 | 3 | ## `preact.js`
4 | Fast alternative to React. 5 | * Website: https://preactjs.com 6 | * Retrieved from: https://unpkg.com/preact 7 | * Date: 2018-09-24 8 | * License: [MIT](https://github.com/developit/preact/blob/master/LICENSE) 9 | 10 | ## `material.js` and `material.css` 11 | Material Design Light (MDL) components. 12 | * Website: https://getmdl.io 13 | * Retrieved from: https://code.getmdl.io/1.3.0/mdl.zip 14 | * Date: 2018-09-24 15 | * Version: 1.3.0 16 | * License: [Apache 2.0](https://github.com/google/material-design-lite/blob/mdl-1.x/LICENSE) 17 | 18 | ## `material-icons.css` and `material-icons.woff2` 19 | Material Design Icon Font. 20 | * Website: https://material.io/tools/icons 21 | * Retrieved from: https://fonts.googleapis.com/icon?family=Material+Icons 22 | * Date: 2018-09-25 23 | * License: [Apache 2.0](https://github.com/google/material-design-icons/blob/master/LICENSE) 24 | -------------------------------------------------------------------------------- /app/external/material-icons.css: -------------------------------------------------------------------------------- 1 | /* fallback */ 2 | @font-face { 3 | font-family: 'Material Icons'; 4 | font-style: normal; 5 | font-weight: 400; 6 | src: url(/common/external/material-icons.woff2) format('woff2'); 7 | } 8 | 9 | .material-icons { 10 | font-family: 'Material Icons'; 11 | font-weight: normal; 12 | font-style: normal; 13 | font-size: 24px; 14 | line-height: 1; 15 | letter-spacing: normal; 16 | text-transform: none; 17 | display: inline-block; 18 | white-space: nowrap; 19 | word-wrap: normal; 20 | direction: ltr; 21 | -webkit-font-feature-settings: 'liga'; 22 | -webkit-font-smoothing: antialiased; 23 | } 24 | -------------------------------------------------------------------------------- /app/external/material-icons.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/app/external/material-icons.woff2 -------------------------------------------------------------------------------- /app/image/appicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/app/image/appicon.ico -------------------------------------------------------------------------------- /app/lib/util.js: -------------------------------------------------------------------------------- 1 | // Web utility functions. 2 | 3 | export function stylesheet(url) { 4 | if (document.getElementById(url)) return; 5 | var head = document.getElementsByTagName('head')[0]; 6 | var link = document.createElement('link'); 7 | link.id = url; 8 | link.rel = 'stylesheet'; 9 | link.type = 'text/css'; 10 | link.href = url; 11 | head.appendChild(link); 12 | } 13 | 14 | -------------------------------------------------------------------------------- /data/nlp/schemas/catalog.sling: -------------------------------------------------------------------------------- 1 | ; Schema catalog. 2 | 3 | {=global :catalog :named 4 | name: "Global catalog" 5 | catalog_schema_family: /schema/meta 6 | catalog_schema_family: /schema/document 7 | catalog_schema_family: /schema/lang 8 | catalog_schema_family: /schema/wikidata 9 | catalog_schema_family: /schema/wikipedia 10 | } 11 | -------------------------------------------------------------------------------- /data/nlp/schemas/constituency.sling: -------------------------------------------------------------------------------- 1 | ; Schema for constituency parse information 2 | 3 | {=/constituency/constituents name: "constituents"} 4 | {=/constituency/constituent name: "constituent"} 5 | {=/constituency/tag name: "constituency-tag"} 6 | {=/constituency/parent name: "constituency-parent"} 7 | {=/constituency/children name: "constituency-children"} 8 | {=/constituency/head name: "constituency-head"} 9 | -------------------------------------------------------------------------------- /data/wiki/de/templates.sling: -------------------------------------------------------------------------------- 1 | {=/wp/templates/de 2 | 3 | "'": {type: "text" text: "'"} 4 | 5 | "datum": {type: "date" year: 3 month: 2 day: 1 yearn: "jahr" monthn: "monat" dayn: "tag" format: /w/dates/de} 6 | "FormatDate": {type: "date" full: 1 format: /w/dates/de} 7 | 8 | } 9 | 10 | -------------------------------------------------------------------------------- /data/wiki/en/phrases.txt: -------------------------------------------------------------------------------- 1 | ; Custom phrase annotations for English. 2 | 3 | ; New York (Q1384) 4 | [New York|Q1384] 5 | 6 | ; South Korea (Q884) 7 | [South Korea|Q884] 8 | 9 | ; United Kingdom (Q145) 10 | [United Kingdom|Q145] 11 | 12 | ; United States of America (Q30) 13 | [United States|Q30] 14 | 15 | ; Washington DC (Q61) 16 | [Washington, [D.C.|{=#1 +Q3551781}]|{+Q61 P131: #1}] 17 | [Washington [D.C.|{=#1 +Q3551781}]|{+Q61 P131: #1}] 18 | [Washington [DC|{=#1 +Q3551781}]|{+Q61 P131: #1}] 19 | 20 | -------------------------------------------------------------------------------- /data/wiki/es/templates.sling: -------------------------------------------------------------------------------- 1 | {=/wp/templates/es 2 | 3 | "!": {type: "text" text: "|"} 4 | "!!": {type: "text" text: "||"} 5 | "'": {type: "text" text: "'"} 6 | 7 | "!((": {type: "text" text: "[["} 8 | "))!": {type: "text" text: "]]"} 9 | "(": {type: "text" text: "{"} 10 | ")": {type: "text" text: "}"} 11 | "((": {type: "text" text: "{{"} 12 | "))": {type: "text" text: "}}"} 13 | 14 | "·": {type: "text" text: " ·"} 15 | "=": {type: "text" text: "="} 16 | 17 | "fecha": {type: "date" year: 1 month: 2 day: 3 reverse: 1 format: /w/dates/es} 18 | "fecha de muerte": {type: "date" year: 1 month: 2 day: 3 reverse: 1 format: /w/dates/es} 19 | "año": {type: "year" bc: 2 format: /w/dates/es} 20 | 21 | } 22 | 23 | -------------------------------------------------------------------------------- /data/wiki/fi/templates.sling: -------------------------------------------------------------------------------- 1 | {=/wp/templates/fi 2 | 3 | "!!": {type: "text" text: "||"} 4 | "'": {type: "text" text: "'"} 5 | "-\"": {type: "text" text: " \""} 6 | "'-": {type: "text" text: "' "} 7 | 8 | "((": {type: "text" text: "{{"} 9 | "))": {type: "text" text: "}}"} 10 | 11 | "·": {type: "text" text: " ·"} 12 | "•": {type: "text" text: " •"} 13 | "=": {type: "text" text: "="} 14 | 15 | "päiväys": {type: "date" full: 1 year: 1 month: 2 day:3 format: /w/dates/fi} 16 | 17 | } 18 | 19 | -------------------------------------------------------------------------------- /data/wiki/fr/templates.sling: -------------------------------------------------------------------------------- 1 | {=/wp/templates/fr 2 | 3 | "!": {type: "text" text: "|"} 4 | "!!": {type: "text" text: "||"} 5 | "'": {type: "text" text: "'"} 6 | 7 | ")!": {type: "text" text: "]"} 8 | "!((": {type: "text" text: "[["} 9 | "))!": {type: "text" text: "]]"} 10 | "(": {type: "text" text: "{"} 11 | ")": {type: "text" text: "}"} 12 | "((": {type: "text" text: "{{"} 13 | "))": {type: "text" text: "}}"} 14 | "(((": {type: "text" text: "{{{"} 15 | ")))": {type: "text" text: "}}}"} 16 | 17 | "·": {type: "text" text: " ·"} 18 | "•": {type: "text" text: " •"} 19 | "=": {type: "text" text: "="} 20 | 21 | "date": {type: "date" full: 1 year: 1 month: 2 day: 3 qual: 4 format: /w/dates/fr} 22 | "date-": {type: "date" full: 1 year: 1 month: 2 day: 3 qual: 4 format: /w/dates/fr} 23 | "date sport": {type: "date" year: 3 month: 2 day: 1 qual: 4 format: /w/dates/fr} 24 | "Date France": {type: "date" year: 3 month: 2 day: 1 post: " en France" format: /w/dates/fr} 25 | "date de naissance": {type: "date" year: 3 month: 2 day: 1 qual: 4 format: /w/dates/fr} 26 | "date de décès": {type: "date" year: 3 month: 2 day: 1 qual: 4 format: /w/dates/fr} 27 | "date de décès-": {type: "date" year: 3 month: 2 day: 1 qual: 4 format: /w/dates/fr} 28 | "existe depuis": {type: "date" year: 3 month: 2 day: 1 format: /w/dates/fr} 29 | 30 | } 31 | 32 | -------------------------------------------------------------------------------- /data/wiki/it/templates.sling: -------------------------------------------------------------------------------- 1 | {=/wp/templates/it 2 | 3 | "!!": {type: "text" text: "||"} 4 | "'": {type: "text" text: "'"} 5 | 6 | ")!": {type: "text" text: "]"} 7 | "(": {type: "text" text: "{"} 8 | ")": {type: "text" text: "}"} 9 | 10 | "·": {type: "text" text: " ·"} 11 | "•": {type: "text" text: " •"} 12 | "=": {type: "text" text: "="} 13 | 14 | "data": {type: "date" year: 3 month: 2 day: 1 yearn: "A" monthn: "M" dayn: "G" format: /w/dates/it} 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /data/wiki/nl/templates.sling: -------------------------------------------------------------------------------- 1 | {=/wp/templates/nl 2 | 3 | "!!": {type: "text" text: "||"} 4 | ")!": {type: "text" text: "]"} 5 | "!((": {type: "text" text: "[["} 6 | "))!": {type: "text" text: "]]"} 7 | "=": {type: "text" text: "="} 8 | 9 | } 10 | 11 | -------------------------------------------------------------------------------- /data/wiki/no/templates.sling: -------------------------------------------------------------------------------- 1 | {=/wp/templates/no 2 | 3 | "!!": {type: "text" text: "||"} 4 | "'": {type: "text" text: "'"} 5 | 6 | ")!": {type: "text" text: "]"} 7 | "(": {type: "text" text: "{"} 8 | ")": {type: "text" text: "}"} 9 | "((": {type: "text" text: "{{"} 10 | "))": {type: "text" text: "}}"} 11 | "(((": {type: "text" text: "{{{"} 12 | ")))": {type: "text" text: "}}}"} 13 | 14 | "·": {type: "text" text: " ·"} 15 | "•": {type: "text" text: " •"} 16 | "\\": {type: "text" text: " /"} 17 | "=": {type: "text" text: "="} 18 | 19 | "startdato": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/no} 20 | "startdato og alder": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/no} 21 | "fødselsdato": {type: "date" year: 1 month: 2 day: 3 yearn: "år" monthn: "måned" dayn: "dag" format: /w/dates/no} 22 | "fødselsdato og alder": {type: "date" year: 1 month: 2 day: 3 yearn: "år" monthn: "måned" dayn: "dag" format: /w/dates/no} 23 | "fødselsdato og alder2": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/no} 24 | 25 | "dødsdato": {type: "date" year: 1 month: 2 day: 3 yearn: "år" monthn: "måned" dayn: "dag" format: /w/dates/no} 26 | "dødsdato og alder": {type: "date" year: 1 month: 2 day: 3 yearn: "år" monthn: "måned" dayn: "dag" format: /w/dates/no} 27 | 28 | } 29 | 30 | -------------------------------------------------------------------------------- /data/wiki/pl/templates.sling: -------------------------------------------------------------------------------- 1 | {=/wp/templates/pl 2 | 3 | "!": {type: "text" text: "|"} 4 | "!!": {type: "text" text: "||"} 5 | ")!": {type: "text" text: "]"} 6 | "!((": {type: "text" text: "[["} 7 | "))!": {type: "text" text: "]]"} 8 | "(": {type: "text" text: "{"} 9 | ")": {type: "text" text: "}"} 10 | "((": {type: "text" text: "{{"} 11 | "))": {type: "text" text: "}}"} 12 | "(((": {type: "text" text: "{{{"} 13 | ")))": {type: "text" text: "}}}"} 14 | "·": {type: "text" text: " ·"} 15 | "•": {type: "text" text: " •"} 16 | "=": {type: "text" text: "="} 17 | 18 | } 19 | 20 | -------------------------------------------------------------------------------- /data/wiki/pt/templates.sling: -------------------------------------------------------------------------------- 1 | {=/wp/templates/pt 2 | 3 | "!!": {type: "text" text: "||"} 4 | "'": {type: "text" text: "'"} 5 | "\" '": {type: "text" text: "\" '"} 6 | 7 | ")!": {type: "text" text: "]"} 8 | "!((": {type: "text" text: "[["} 9 | "))!": {type: "text" text: "]]"} 10 | "(": {type: "text" text: "{"} 11 | ")": {type: "text" text: "}"} 12 | "((": {type: "text" text: "{{"} 13 | "))": {type: "text" text: "}}"} 14 | "·": {type: "text" text: " ·"} 15 | "•": {type: "text" text: " •"} 16 | "\\": {type: "text" text: " /"} 17 | "=": {type: "text" text: "="} 18 | 19 | "data": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/pt} 20 | "DataExt": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/pt} 21 | "dtlink": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/pt} 22 | "dtext": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/pt} 23 | "dni": {type: "date" year: 3 month: 2 day: 1 format: /w/dates/pt} 24 | "morte": {type: "date" year: 3 month: 2 day: 1 format: /w/dates/pt} 25 | "ani": {type: "date" year: 1 month: 2 day: 3 yearn: "ano" monthn: "mês" dayn: "dia" format: /w/dates/pt} 26 | 27 | } 28 | 29 | -------------------------------------------------------------------------------- /data/wiki/sv/templates.sling: -------------------------------------------------------------------------------- 1 | {=/wp/templates/sv 2 | 3 | "!": {type: "text" text: "|"} 4 | "!!": {type: "text" text: "||"} 5 | "'": {type: "text" text: "'"} 6 | "'s": {type: "text" text: "'s"} 7 | "'-": {type: "text" text: "' "} 8 | 9 | ")!": {type: "text" text: "]"} 10 | "!((": {type: "text" text: "[["} 11 | "))!": {type: "text" text: "]]"} 12 | "(": {type: "text" text: "{"} 13 | ")": {type: "text" text: "}"} 14 | "((": {type: "text" text: "{{"} 15 | "))": {type: "text" text: "}}"} 16 | "(((": {type: "text" text: "{{{"} 17 | ")))": {type: "text" text: "}}}"} 18 | 19 | "·": {type: "text" text: " ·"} 20 | "•": {type: "text" text: " •"} 21 | "\\": {type: "text" text: " /"} 22 | "=": {type: "text" text: "="} 23 | 24 | ;"datum": {type: "date" year: 3 month: 2 day: 1 format: /w/dates/se} 25 | "startdatum": {type: "date" year: 1 month: 1 day: 3 format: /w/dates/se} 26 | "slutdatum": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/se} 27 | "startdatum och ålder": {type: "date" year: 1 month: 1 day: 3 format: /w/dates/se} 28 | ;"slutdatum och ålder": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/se} 29 | 30 | "död datum och ålder": {type: "date" year: 3 month: 2 day: 1 format: /w/dates/se} 31 | "död år och ålder": {type: "year" format: /w/dates/se} 32 | 33 | } 34 | 35 | -------------------------------------------------------------------------------- /data/wiki/wikipedia.sling: -------------------------------------------------------------------------------- 1 | ; Wikipedia schemas. 2 | 3 | {=/wp/page +/s/document :schema 4 | name: "Wikipedia page" 5 | family: /schema/wikipedia 6 | 7 | role: {=/wp/page/pageid :slot 8 | name: "page id" 9 | description: "Wikipedia page id" 10 | source: /wp/page 11 | target: int 12 | } 13 | 14 | role: {=/wp/page/title :slot 15 | name: "page title" 16 | description: "Wikipedia page title" 17 | source: /wp/page 18 | target: string 19 | } 20 | 21 | role: {=/wp/page/text :slot 22 | name: "wiki text" 23 | description: "Wikipedia page text in wiki markup format" 24 | source: /wp/page 25 | target: string 26 | } 27 | 28 | role: {=/wp/page/item :slot 29 | name: "Wikidata item" 30 | description: "Wikidata item" 31 | source: /wp/page 32 | target: /w/item 33 | } 34 | 35 | role: {=/wp/page/category :slot 36 | name: "category" 37 | description: "Wikipedia category" 38 | source: /wp/page 39 | target: /wp/category 40 | multi: 1 41 | } 42 | } 43 | 44 | {=/wp/redirect :schema 45 | name: "Wikipedia redirect" 46 | family: /schema/wikipedia 47 | 48 | role: {=/wp/redirect/pageid :slot 49 | name: "page id" 50 | description: "Wikipedia page id" 51 | source: /wp/redirect 52 | target: int 53 | } 54 | 55 | role: {=/wp/redirect/title :slot 56 | name: "redirect title" 57 | description: "Wikipedia redirect page title" 58 | source: /wp/redirect 59 | target: string 60 | } 61 | 62 | role: {=/wp/redirect/link :slot 63 | name: "redirect link" 64 | description: "Wikipedia redirect link" 65 | source: /wp/redirect 66 | target: /wp/page 67 | } 68 | } 69 | 70 | {=/wp/link :schema 71 | name: "Wikipedia link" 72 | family: /schema/wikipedia 73 | } 74 | 75 | {=/wp/category :schema 76 | name: "Wikipedia category" 77 | family: /schema/wikipedia 78 | } 79 | 80 | {=/schema/wikipedia :schema_family 81 | name: "Wikipedia schemas" 82 | 83 | member_schema: /wp/page 84 | member_schema: /wp/redirect 85 | member_schema: /wp/link 86 | member_schema: /wp/category 87 | } 88 | 89 | -------------------------------------------------------------------------------- /doc/guide/README.md: -------------------------------------------------------------------------------- 1 | # SLING Guides 2 | 3 | * [SLING installation and building](install.md) 4 | * [CASPAR frame semantics parser](caspar.md) 5 | * [SLING frames](frames.md) 6 | * [SLING Python API](pyapi.md) 7 | * [Myelin neural network JIT compiler](myelin.md) 8 | * [Wikipedia and Wikidata processing](wikiflow.md) 9 | 10 | ## Out-dated guides 11 | 12 | * [Training a SLING parser](training.md) 13 | * [Parsing with SLING](parsing.md) 14 | 15 | -------------------------------------------------------------------------------- /doc/guide/kb-browser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/doc/guide/kb-browser.png -------------------------------------------------------------------------------- /doc/report/dev-eval.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/doc/report/dev-eval.pdf -------------------------------------------------------------------------------- /doc/report/network.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/doc/report/network.pdf -------------------------------------------------------------------------------- /doc/report/runtime.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/doc/report/runtime.pdf -------------------------------------------------------------------------------- /python/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | py_library( 4 | name = "flow", 5 | srcs = ["flow.py"], 6 | ) 7 | 8 | py_library( 9 | name = "mnist", 10 | srcs = ["mnist.py"], 11 | deps = [":flow"], 12 | ) 13 | 14 | -------------------------------------------------------------------------------- /python/__init__.py: -------------------------------------------------------------------------------- 1 | import sling.pysling as api 2 | 3 | from sling.log import * 4 | from sling.nlp.document import * 5 | from sling.nlp.parser import * 6 | 7 | Store=api.Store 8 | Frame=api.Frame 9 | Array=api.Array 10 | 11 | RecordReader=api.RecordReader 12 | RecordDatabase=api.RecordDatabase 13 | RecordWriter=api.RecordWriter 14 | PhraseTable=api.PhraseTable 15 | Calendar=api.Calendar 16 | Date=api.Date 17 | WikiConverter=api.WikiConverter 18 | FactExtractor=api.FactExtractor 19 | PlausibilityModel=api.PlausibilityModel 20 | 21 | MILLENNIUM=api.MILLENNIUM 22 | CENTURY=api.CENTURY 23 | DECADE=api.DECADE 24 | YEAR=api.YEAR 25 | MONTH=api.MONTH 26 | DAY=api.DAY 27 | 28 | CASE_INVALID=api.CASE_INVALID 29 | CASE_NONE=api.CASE_NONE 30 | CASE_UPPER=api.CASE_UPPER 31 | CASE_LOWER=api.CASE_LOWER 32 | CASE_TITLE=api.CASE_TITLE 33 | 34 | -------------------------------------------------------------------------------- /python/log.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Simple logging.""" 16 | 17 | import inspect 18 | import os 19 | import sling.pysling as api 20 | 21 | INFO = 0 22 | WARNING = 1 23 | ERROR = 2 24 | FATAL = 3 25 | 26 | def _log_message(msg, severity): 27 | caller = inspect.stack()[2] 28 | fn = os.path.basename(caller[1]) 29 | line = caller[2] 30 | api.log_message(severity, fn, line, msg) 31 | 32 | def info(msg): 33 | _log_message(msg, INFO) 34 | 35 | def warning(msg): 36 | _log_message(msg, WARNING) 37 | 38 | def error(msg): 39 | _log_message(msg, ERROR) 40 | 41 | def fatal(msg): 42 | _log_message(msg, FATAL) 43 | 44 | -------------------------------------------------------------------------------- /python/myelin/__init__.py: -------------------------------------------------------------------------------- 1 | import sling.pysling as api 2 | 3 | from .builder import * 4 | from .flow import * 5 | 6 | Compiler=api.Compiler 7 | 8 | -------------------------------------------------------------------------------- /python/nlp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/python/nlp/__init__.py -------------------------------------------------------------------------------- /python/pysling.so: -------------------------------------------------------------------------------- 1 | ../bazel-bin/sling/pyapi/pysling.so -------------------------------------------------------------------------------- /python/task/__init__.py: -------------------------------------------------------------------------------- 1 | from sling.task.workflow import * 2 | 3 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | python3 python/run.py $* 4 | 5 | -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Exit on errors. 4 | set -e 5 | 6 | echo "=========================================================================" 7 | echo "Set up SLING development environment" 8 | echo "=========================================================================" 9 | 10 | # Install packages. 11 | echo 12 | echo "=== Install SLING dependencies" 13 | PYVER=3.5 14 | PYPKGS="python${PYVER} python${PYVER}-dev python3-pip" 15 | PKGS="pkg-config zip g++ zlib1g-dev unzip lbzip2 ${PYPKGS}" 16 | sudo apt-get install ${PKGS} 17 | 18 | # Install bazel. 19 | BAZELVER=1.0.0 20 | BAZELSH=bazel-${BAZELVER}-installer-linux-x86_64.sh 21 | BAZELREPO=https://github.com/bazelbuild/bazel 22 | BAZELURL=${BAZELREPO}/releases/download/${BAZELVER}/${BAZELSH} 23 | 24 | if [[ $UPGRADE_BAZEL = "1" ]]; then 25 | echo "=== Forcing reinstall of Bazel" 26 | sudo rm $(which bazel) 27 | fi 28 | 29 | if ! which bazel > /dev/null; then 30 | echo 31 | echo "=== Install Bazel build system" 32 | wget -P /tmp ${BAZELURL} 33 | chmod +x /tmp/${BAZELSH} 34 | sudo /tmp/${BAZELSH} 35 | rm /tmp/${BAZELSH} 36 | fi 37 | 38 | # Build SLING. 39 | echo 40 | echo "=== Build SLING" 41 | tools/buildall.sh 42 | 43 | # Install SLING Python API. 44 | echo 45 | echo "=== Set up SLING Python API" 46 | SLINGPKG=/usr/lib/python3/dist-packages/sling 47 | 48 | PIP="sudo -H pip3 --disable-pip-version-check" 49 | 50 | if [[ -L "/usr/lib/python2.7/dist-packages/sling" ]]; then 51 | echo "Removing deprecated SLING Python 2.7 package" 52 | sudo rm /usr/lib/python2.7/dist-packages/sling 53 | fi 54 | if [[ -L "/usr/local/lib/python2.7/dist-packages/sling" ]]; then 55 | echo "Removing deprecated SLING Python 2.7 local package" 56 | sudo rm /usr/local/lib/python2.7/dist-packages/sling 57 | fi 58 | 59 | if [[ $(${PIP} freeze | grep "sling==") ]]; then 60 | echo "Removing existing SLING pip package" 61 | ${PIP} uninstall sling 62 | fi 63 | 64 | if [[ -x "${SLINGPKG}" ]]; then 65 | echo "SLING Python package already installed" 66 | else 67 | echo "Adding link for SLING Python package" 68 | sudo ln -s $(realpath python) ${SLINGPKG} 69 | fi 70 | 71 | # Done. 72 | echo 73 | echo "=== SLING is now set up." 74 | 75 | -------------------------------------------------------------------------------- /sling/base/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | cc_library( 4 | name = "base", 5 | srcs = [ 6 | "flags.cc", 7 | "init.cc", 8 | "logging.cc", 9 | "status.cc", 10 | ], 11 | hdrs = [ 12 | "bitcast.h", 13 | "flags.h", 14 | "init.h", 15 | "logging.h", 16 | "macros.h", 17 | "port.h", 18 | "slice.h", 19 | "status.h", 20 | "types.h", 21 | ], 22 | linkopts = [ 23 | "-lpthread", 24 | ], 25 | ) 26 | 27 | cc_library( 28 | name = "libinit", 29 | srcs = ["libinit.cc"], 30 | deps = [ 31 | ":base", 32 | ], 33 | alwayslink = 1, 34 | ) 35 | 36 | cc_library( 37 | name = "registry", 38 | srcs = ["registry.cc"], 39 | hdrs = ["registry.h"], 40 | deps = [ 41 | ":base", 42 | ], 43 | ) 44 | 45 | cc_library( 46 | name = "clock", 47 | srcs = ["clock.cc"], 48 | hdrs = ["clock.h"], 49 | deps = [ 50 | ":base", 51 | ], 52 | ) 53 | 54 | cc_library( 55 | name = "perf", 56 | srcs = ["perf.cc"], 57 | hdrs = ["perf.h"], 58 | deps = [ 59 | ":base", 60 | ], 61 | ) 62 | 63 | -------------------------------------------------------------------------------- /sling/base/bitcast.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_BASE_BITCAST_H_ 16 | #define SLING_BASE_BITCAST_H_ 17 | 18 | #include 19 | 20 | namespace sling { 21 | 22 | // bit_cast is a template function that implements the equivalent 23 | // of "*reinterpret_cast(&source)". 24 | template 25 | inline Dest bit_cast(const Source &source) { 26 | // Compile time assertion: sizeof(Dest) == sizeof(Source) 27 | // A compile error here means your Dest and Source have different sizes. 28 | typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]; 29 | 30 | Dest dest; 31 | memcpy(&dest, &source, sizeof(dest)); 32 | return dest; 33 | } 34 | 35 | } // namespace sling 36 | 37 | #endif // SLING_BASE_BITCAST_H_ 38 | 39 | -------------------------------------------------------------------------------- /sling/base/clock.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_BASE_CLOCK_H_ 16 | #define SLING_BASE_CLOCK_H_ 17 | 18 | #include "sling/base/types.h" 19 | 20 | namespace sling { 21 | 22 | // Cycle-counting clock for performance measurements. 23 | class Clock { 24 | public: 25 | // TSC timestamp. 26 | typedef int64_t Timestamp; 27 | 28 | // Return timestamp from cycle counter. 29 | static inline Timestamp now() { 30 | uint64_t low, high; 31 | __asm__ volatile("rdtsc" : "=a"(low), "=d"(high)); 32 | return (high << 32) | low; 33 | } 34 | 35 | // Return clock speed in Hz. 36 | static double hz(); 37 | 38 | // Return clock speed in MHz. 39 | static double mhz(); 40 | 41 | // Start clock. 42 | void start() { start_ = now(); } 43 | 44 | // Stop clock. 45 | void stop() { end_ = now(); } 46 | 47 | // Return clock cycles elapsed since start. 48 | Timestamp elapsed() const { return now() - start_; } 49 | 50 | // Return clock cycles between start and stop. 51 | Timestamp cycles() const { return end_ - start_; } 52 | 53 | // Return time in seconds. 54 | double secs() const; 55 | 56 | // Return time in milliseconds. 57 | double ms() const; 58 | 59 | // Return time in microseconds. 60 | double us() const; 61 | 62 | // Return time in nanoseconds. 63 | double ns() const; 64 | 65 | private: 66 | Timestamp start_; // start timestamp 67 | Timestamp end_; // end timestamp 68 | }; 69 | 70 | } // namespace sling 71 | 72 | #endif // SLING_BASE_CLOCK_H_ 73 | 74 | -------------------------------------------------------------------------------- /sling/base/init.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/base/init.h" 16 | 17 | #include 18 | 19 | #include "sling/base/flags.h" 20 | #include "sling/base/logging.h" 21 | #include "sling/base/types.h" 22 | 23 | namespace sling { 24 | 25 | // Linked list of module initializers. 26 | ModuleInitializer *ModuleInitializer::first = nullptr; 27 | ModuleInitializer *ModuleInitializer::last = nullptr; 28 | 29 | ModuleInitializer::ModuleInitializer(const char *n, Handler h) 30 | : name(n), handler(h) { 31 | if (first == nullptr) first = this; 32 | if (last != nullptr) last->next = this; 33 | last = this; 34 | } 35 | 36 | static void RunModuleInitializers() { 37 | ModuleInitializer *initializer = ModuleInitializer::first; 38 | while (initializer != nullptr) { 39 | VLOG(2) << "Initializing " << initializer->name << " module"; 40 | initializer->handler(); 41 | initializer = initializer->next; 42 | } 43 | } 44 | 45 | void InitProgram(int *argc, char ***argv) { 46 | // Initialize command line flags. 47 | if (*argc > 0) { 48 | string usage; 49 | usage.append((*argv)[0]); 50 | usage.append(" [OPTIONS]\n"); 51 | Flag::SetUsageMessage(usage); 52 | if (Flag::ParseCommandLineFlags(argc, *argv) != 0) exit(1); 53 | } 54 | 55 | // Run module initializers. 56 | RunModuleInitializers(); 57 | } 58 | 59 | void InitSharedLibrary() { 60 | // Run module initializers. 61 | RunModuleInitializers(); 62 | } 63 | 64 | } // namespace sling 65 | 66 | -------------------------------------------------------------------------------- /sling/base/init.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_BASE_INIT_H_ 16 | #define SLING_BASE_INIT_H_ 17 | 18 | namespace sling { 19 | 20 | // Module initializers are called in order of registration at startup. 21 | struct ModuleInitializer { 22 | typedef void (*Handler)(void); 23 | 24 | // Add module initializer. 25 | ModuleInitializer(const char *n, Handler h); 26 | 27 | // Module name. 28 | const char *name; 29 | 30 | // Handler for initializing module. 31 | Handler handler; 32 | 33 | // Next initializer. 34 | ModuleInitializer *next; 35 | 36 | // Linked list of module initializers. 37 | static ModuleInitializer *first; 38 | static ModuleInitializer *last; 39 | }; 40 | 41 | #define REGISTER_INITIALIZER(name, body) \ 42 | namespace { \ 43 | static void init_module_##name () { body; } \ 44 | __attribute__((init_priority(1000))) \ 45 | sling::ModuleInitializer initializer_module_##name \ 46 | (#name, init_module_##name); \ 47 | } 48 | 49 | // Run module initializers for program. 50 | void InitProgram(int *argc, char **argv[]); 51 | 52 | // Run module initializers for shared library. 53 | void InitSharedLibrary(); 54 | 55 | } // namespace sling 56 | 57 | #endif // SLING_BASE_INIT_H_ 58 | 59 | -------------------------------------------------------------------------------- /sling/base/libinit.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/base/init.h" 16 | #include "sling/base/logging.h" 17 | 18 | namespace sling { 19 | 20 | // Class for initializing program modules. 21 | class LibraryInitializer { 22 | public: 23 | LibraryInitializer() { 24 | InitSharedLibrary(); 25 | }; 26 | }; 27 | 28 | // The initialization priority should be set higher than the priority of the 29 | // module initializers in init.h. 30 | static LibraryInitializer init __attribute__((init_priority(2000))); 31 | 32 | } // namespace sling 33 | 34 | -------------------------------------------------------------------------------- /sling/base/registry.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/base/registry.h" 16 | 17 | namespace sling { 18 | 19 | // Global list of all component registries. 20 | RegistryMetadata *RegistryMetadata::global_registry_list = nullptr; 21 | 22 | void RegistryMetadata::GetComponents( 23 | std::vector *components) const { 24 | components->clear(); 25 | ComponentMetadata *meta = *components_; 26 | while (meta != nullptr) { 27 | components->push_back(meta); 28 | meta = meta->link(); 29 | } 30 | } 31 | 32 | const ComponentMetadata *RegistryMetadata::GetComponent( 33 | const string &name) const { 34 | ComponentMetadata *meta = *components_; 35 | while (meta != nullptr) { 36 | if (name == meta->name()) return meta; 37 | meta = meta->link(); 38 | } 39 | return nullptr; 40 | } 41 | 42 | void RegistryMetadata::Register(RegistryMetadata *registry) { 43 | registry->set_link(global_registry_list); 44 | global_registry_list = registry; 45 | } 46 | 47 | void RegistryMetadata::GetRegistries( 48 | std::vector *registries) { 49 | registries->clear(); 50 | RegistryMetadata *meta = global_registry_list; 51 | while (meta != nullptr) { 52 | registries->push_back(meta); 53 | meta = meta->next(); 54 | } 55 | } 56 | 57 | const RegistryMetadata *RegistryMetadata::GetRegistry(const string &name) { 58 | RegistryMetadata *meta = global_registry_list; 59 | while (meta != nullptr) { 60 | if (name == meta->name()) return meta; 61 | meta = meta->next(); 62 | } 63 | return nullptr; 64 | } 65 | 66 | } // namespace sling 67 | 68 | -------------------------------------------------------------------------------- /sling/file/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | # File system interface. 4 | 5 | cc_library( 6 | name = "file", 7 | srcs = ["file.cc"], 8 | hdrs = ["file.h"], 9 | deps = [ 10 | "//sling/base", 11 | "//sling/base:registry", 12 | ], 13 | copts = [ 14 | "-pthread", 15 | ], 16 | ) 17 | 18 | cc_library( 19 | name = "posix", 20 | srcs = ["posix.cc"], 21 | hdrs = ["posix.h"], 22 | deps = [ 23 | ":file", 24 | "//sling/base", 25 | ], 26 | alwayslink = 1, 27 | ) 28 | 29 | cc_library( 30 | name = "embed", 31 | srcs = ["embed.cc"], 32 | hdrs = ["embed.h"], 33 | deps = [ 34 | ":file", 35 | "//sling/base", 36 | ], 37 | alwayslink = 1, 38 | ) 39 | 40 | # File utility libraries. 41 | 42 | cc_library( 43 | name = "recordio", 44 | srcs = ["recordio.cc"], 45 | hdrs = ["recordio.h"], 46 | deps = [ 47 | ":file", 48 | "//sling/base", 49 | "//sling/util:fingerprint", 50 | "//sling/util:snappy", 51 | "//sling/util:varint", 52 | ], 53 | ) 54 | 55 | cc_library( 56 | name = "textmap", 57 | srcs = ["textmap.cc"], 58 | hdrs = ["textmap.h"], 59 | deps = [ 60 | ":file", 61 | "//sling/base", 62 | "//sling/string:text", 63 | ], 64 | ) 65 | 66 | cc_library( 67 | name = "repository", 68 | srcs = ["repository.cc"], 69 | hdrs = ["repository.h"], 70 | deps = [ 71 | ":file", 72 | "//sling/base", 73 | ], 74 | ) 75 | 76 | -------------------------------------------------------------------------------- /sling/file/embed.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_FILE_EMBED_H_ 16 | #define SLING_FILE_EMBED_H_ 17 | 18 | #include 19 | #include 20 | 21 | #include "sling/base/types.h" 22 | 23 | namespace sling { 24 | 25 | // File information for embedded files created with the embed-data tool. 26 | struct EmbeddedFile { 27 | const char *name; // file name 28 | uint64_t size; // file size 29 | const char *data; // file content 30 | uint64_t mtime; // file modification time 31 | }; 32 | 33 | // Find embedded file. 34 | const EmbeddedFile *GetEmbeddedFile(const string &name); 35 | 36 | // Return contents of embedded file. 37 | const char *GetEmbeddedFileContent(const string &name); 38 | 39 | } // namespace sling 40 | 41 | #endif // SLING_FILE_EMBED_H_ 42 | 43 | -------------------------------------------------------------------------------- /sling/file/posix.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_FILE_POSIX_H_ 16 | #define SLING_FILE_POSIX_H_ 17 | 18 | #include 19 | 20 | #include "sling/base/types.h" 21 | #include "sling/file/file.h" 22 | 23 | namespace sling { 24 | 25 | // Create file from POSIX file descriptor. The returned file takes ownership 26 | // of the file descriptor. 27 | File *NewFileFromDescriptor(const string &name, int fd); 28 | 29 | // Create file for standard output. 30 | File *NewStdoutFile(); 31 | 32 | } // namespace sling 33 | 34 | #endif // SLING_FILE_POSIX_H_ 35 | 36 | -------------------------------------------------------------------------------- /sling/frame/snapshot.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_FRAME_SNAPSHOT_H_ 16 | #define SLING_FRAME_SNAPSHOT_H_ 17 | 18 | #include 19 | 20 | #include "sling/base/status.h" 21 | #include "sling/base/types.h" 22 | #include "sling/frame/store.h" 23 | 24 | namespace sling { 25 | 26 | // Global frame stores can be snapshot and saved to .snap files. These can then 27 | // be loaded into a new empty global store. For large stores, this is faster 28 | // than reading the frame store in encoded format. 29 | class Snapshot { 30 | public: 31 | // Filename for snapshot. 32 | static string Filename(const string &filename); 33 | 34 | // Check if there is a valid snapshot file for the store. 35 | static bool Valid(const string &filename); 36 | 37 | // Read snapshot into empty global store. 38 | static Status Read(Store *store, const string &filename); 39 | 40 | // Write store to snapshot file. 41 | static Status Write(Store *store, const string &filename); 42 | 43 | private: 44 | // Current magic and version for snapshots. 45 | static const int MAGIC = 0x50414e53; 46 | static const int VERSION = 2; 47 | 48 | // Snapshot file header. 49 | struct Header { 50 | int magic; // magic number for identifying snapshot file 51 | int version; // snapshot file format version 52 | int heaps; // number of heaps in snapshot 53 | int handles; // size of handle table 54 | Word symtab; // symbol table handle 55 | int symbols; // number of symbols in symbol table 56 | int buckets; // number of hash buckets in the symbol table 57 | int symheap; // heap for symbol table (-1 means no separate heap) 58 | }; 59 | }; 60 | 61 | } // namespace sling 62 | 63 | #endif // SLING_FRAME_SNAPSHOT_H_ 64 | 65 | -------------------------------------------------------------------------------- /sling/frame/wire.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_FRAME_WIRE_H_ 16 | #define SLING_FRAME_WIRE_H_ 17 | 18 | namespace sling { 19 | 20 | // Tag numbers for encoding objects in binary wire format. The wire type is 21 | // three bits, but the special tags can be up to 64-3=61 bits. 22 | enum WireType { 23 | WIRE_REF = 0, // reference to previous object (argument is refnum) 24 | WIRE_FRAME = 1, // frame (argument is the number of slots) 25 | WIRE_STRING = 2, // string (argument is the string length in bytes) 26 | WIRE_SYMBOL = 3, // unbound symbol (argument is the symbol name length) 27 | WIRE_LINK = 4, // bound symbol (argument is the symbol name length) 28 | WIRE_INTEGER = 5, // integer (argument is the integer value) 29 | WIRE_FLOAT = 6, // floating-point number (argument is the float value) 30 | WIRE_SPECIAL = 7, // special values 31 | }; 32 | 33 | enum WireSpecial { 34 | WIRE_NIL = 1, // "nil" value 35 | WIRE_ID = 2, // "id" value 36 | WIRE_ISA = 3, // "isa" value 37 | WIRE_IS = 4, // "is" value 38 | WIRE_ARRAY = 5, // array, followed by array size and the arguments 39 | WIRE_INDEX = 6, // index value, followed by varint32 encoded integer 40 | WIRE_RESOLVE = 7, // resolve link, followed by slots and replacement index 41 | }; 42 | 43 | // The binary marker (i.e. a nul character) is used for prefixing serialized 44 | // SLING objects to indicate that they are binary encoded. The textual encoding 45 | // will never contain a nul character. In binary encoding, a nul character is 46 | // decoded as REF(0). This will never be the first tag in a binary encoding 47 | // since initially there are no references to refer to. 48 | enum EncodingMarker { 49 | WIRE_BINARY_MARKER = 0, 50 | }; 51 | 52 | } // namespace sling 53 | 54 | #endif // SLING_FRAME_WIRE_H_ 55 | 56 | -------------------------------------------------------------------------------- /sling/frame/xml.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_FRAME_XML_H_ 16 | #define SLING_FRAME_XML_H_ 17 | 18 | #include 19 | 20 | #include "sling/frame/object.h" 21 | #include "sling/frame/store.h" 22 | #include "sling/stream/input.h" 23 | #include "sling/web/xml-parser.h" 24 | 25 | namespace sling { 26 | 27 | // The XML reader parses XML input and converts it into frame format. Each XML 28 | // element is converted into a frame slot where the name is the XML tag name and 29 | // the value is the content of the XML element. XML attributes and child 30 | // elements are converted into slots in a sub-frame. If the XML element only 31 | // contains text, the value is just a string with the text. 32 | class XMLReader : public XMLParser { 33 | public: 34 | // Initializes XML reader with store and input. 35 | XMLReader(Store *store, Input *input) 36 | : store_(store), input_(input), slots_(store) {} 37 | 38 | // Parse XML input and return frame with content or nil on errors. 39 | Frame Read(); 40 | 41 | private: 42 | // Callbacks from XML parser. 43 | bool StartElement(const XMLElement &element) override; 44 | bool EndElement(const char *name) override; 45 | bool Text(const char *str) override; 46 | 47 | // Object store. 48 | Store *store_; 49 | 50 | // Input with XML. 51 | Input *input_; 52 | 53 | // Stack with slots for the elements currently being parsed. 54 | Slots slots_; 55 | 56 | // Stack which marks the first slot for the elements being parsed. 57 | std::vector marks_; 58 | }; 59 | 60 | } // namespace sling 61 | 62 | #endif // SLING_FRAME_XML_H_ 63 | 64 | -------------------------------------------------------------------------------- /sling/http/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | cc_library( 4 | name = "http-utils", 5 | srcs = ["http-utils.cc"], 6 | hdrs = ["http-utils.h"], 7 | deps = [ 8 | "//sling/base", 9 | "//sling/string:ctype", 10 | ], 11 | ) 12 | 13 | cc_library( 14 | name = "http-server", 15 | srcs = ["http-server.cc"], 16 | hdrs = ["http-server.h"], 17 | deps = [ 18 | ":http-utils", 19 | "//sling/base", 20 | "//sling/file", 21 | "//sling/string:numbers", 22 | "//sling/util:mutex", 23 | "//sling/util:thread", 24 | ], 25 | ) 26 | 27 | cc_library( 28 | name = "http-stream", 29 | srcs = ["http-stream.cc"], 30 | hdrs = ["http-stream.h"], 31 | deps = [ 32 | ":http-server", 33 | "//sling/stream", 34 | ], 35 | ) 36 | 37 | cc_library( 38 | name = "static-content", 39 | srcs = ["static-content.cc"], 40 | hdrs = ["static-content.h"], 41 | deps = [ 42 | ":http-server", 43 | "//sling/base", 44 | "//sling/file", 45 | ], 46 | ) 47 | 48 | cc_library( 49 | name = "web-service", 50 | srcs = ["web-service.cc"], 51 | hdrs = ["web-service.h"], 52 | deps = [ 53 | ":http-server", 54 | ":http-stream", 55 | "//sling/base", 56 | "//sling/frame:decoder", 57 | "//sling/frame:encoder", 58 | "//sling/frame:json", 59 | "//sling/frame:printer", 60 | "//sling/frame:object", 61 | "//sling/frame:reader", 62 | "//sling/frame:store", 63 | "//sling/string:text", 64 | ], 65 | ) 66 | 67 | -------------------------------------------------------------------------------- /sling/http/http-stream.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/http/http-stream.h" 16 | 17 | #include "sling/http/http-server.h" 18 | #include "sling/stream/stream.h" 19 | 20 | namespace sling { 21 | 22 | HTTPInputStream::HTTPInputStream(HTTPBuffer *buffer) : buffer_(buffer) {} 23 | 24 | bool HTTPInputStream::Next(const void **data, int *size) { 25 | int n = buffer_->size(); 26 | if (n > 0) { 27 | *data = buffer_->start; 28 | *size = n; 29 | buffer_->start = buffer_->end; 30 | return true; 31 | } else { 32 | return false; 33 | } 34 | } 35 | 36 | void HTTPInputStream::BackUp(int count) { 37 | buffer_->start -= count; 38 | } 39 | 40 | bool HTTPInputStream::Skip(int count) { 41 | int left = buffer_->size(); 42 | if (count > left) { 43 | buffer_->start = buffer_->end; 44 | return false; 45 | } else { 46 | buffer_->start += count; 47 | return true; 48 | } 49 | } 50 | 51 | int64 HTTPInputStream::ByteCount() const { 52 | return buffer_->start - buffer_->floor; 53 | } 54 | 55 | HTTPOutputStream::HTTPOutputStream(HTTPBuffer *buffer, int block_size) 56 | : buffer_(buffer), block_size_(block_size) {} 57 | 58 | bool HTTPOutputStream::Next(void **data, int *size) { 59 | if (buffer_->full()) buffer_->ensure(block_size_); 60 | 61 | int n = buffer_->remaining(); 62 | if (n > block_size_) n = block_size_; 63 | *data = buffer_->end; 64 | *size = n; 65 | buffer_->end += n; 66 | return true; 67 | } 68 | 69 | void HTTPOutputStream::BackUp(int count) { 70 | buffer_->end -= count; 71 | } 72 | 73 | int64 HTTPOutputStream::ByteCount() const { 74 | return buffer_->size(); 75 | } 76 | 77 | } // namespace sling 78 | 79 | -------------------------------------------------------------------------------- /sling/http/http-stream.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_HTTP_HTTP_STREAM_H_ 16 | #define SLING_HTTP_HTTP_STREAM_H_ 17 | 18 | #include "sling/http/http-server.h" 19 | #include "sling/stream/stream.h" 20 | 21 | namespace sling { 22 | 23 | // An InputStream for reading from a HTTP buffer. 24 | class HTTPInputStream : public InputStream { 25 | public: 26 | HTTPInputStream(HTTPBuffer *buffer); 27 | 28 | // InputStream interface. 29 | bool Next(const void **data, int *size) override; 30 | void BackUp(int count) override; 31 | bool Skip(int count) override; 32 | int64 ByteCount() const override; 33 | 34 | private: 35 | HTTPBuffer *buffer_; 36 | }; 37 | 38 | // An OutputStream backed by a HTTP buffer. 39 | class HTTPOutputStream : public OutputStream { 40 | public: 41 | HTTPOutputStream(HTTPBuffer *buffer, int block_size = 8192); 42 | 43 | // OutputStream interface. 44 | bool Next(void **data, int *size) override; 45 | void BackUp(int count) override; 46 | int64 ByteCount() const override; 47 | 48 | private: 49 | HTTPBuffer *buffer_; 50 | int block_size_; 51 | }; 52 | 53 | } // namespace sling 54 | 55 | #endif // SLING_HTTP_HTTP_STREAM_H_ 56 | 57 | -------------------------------------------------------------------------------- /sling/http/static-content.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_HTTP_STATIC_CONTENT_H_ 16 | #define SLING_HTTP_STATIC_CONTENT_H_ 17 | 18 | #include 19 | 20 | #include "sling/base/types.h" 21 | #include "sling/http/http-server.h" 22 | 23 | namespace sling { 24 | 25 | // HTTP handler for serving static web content. 26 | class StaticContent { 27 | public: 28 | // Initialize handler for serving files from a directory. 29 | StaticContent(const string &url, const string &path); 30 | 31 | // Register handler with HTTP server. 32 | void Register(HTTPServer *http); 33 | 34 | // Serve static web content from directory. 35 | void HandleFile(HTTPRequest *request, HTTPResponse *response); 36 | 37 | private: 38 | // URL path for static content. 39 | string url_; 40 | 41 | // Directory with static web content to be served. 42 | string dir_; 43 | }; 44 | 45 | } // namespace sling 46 | 47 | #endif // SLING_HTTP_STATIC_CONTENT_H_ 48 | 49 | -------------------------------------------------------------------------------- /sling/myelin/compiler.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_MYELIN_COMPILER_H_ 16 | #define SLING_MYELIN_COMPILER_H_ 17 | 18 | #include "sling/myelin/compute.h" 19 | #include "sling/myelin/flow.h" 20 | 21 | namespace sling { 22 | namespace myelin { 23 | 24 | // Myelin neural network JIT compiler for compiling a flow to a network. 25 | class Compiler { 26 | public: 27 | // Initialize compiler. 28 | Compiler(); 29 | ~Compiler(); 30 | 31 | // Compile flow to network. 32 | void Compile(Flow *flow, Network *net); 33 | 34 | // Library with transformations and kernels for compilation. 35 | Library *library() { return library_; } 36 | 37 | // Custom runtime. 38 | Runtime *runtime() const { return runtime_; } 39 | void set_runtime(Runtime *runtime) { runtime_ = runtime; } 40 | 41 | // Perf module FLOP counter support. 42 | bool perf_flopctr() const { return perf_flopctr_; } 43 | void set_perf_flopctr(bool enabled) { perf_flopctr_ = enabled; } 44 | 45 | private: 46 | // Write graph to output as DOT and/or SVG file. 47 | static void WriteGraph(const Flow &flow, 48 | const string &dot, 49 | const string &svg); 50 | 51 | // Compiler library with kernels, transformations, etc. 52 | Library *library_ = nullptr; 53 | 54 | // Custom runtime for generated network. 55 | Runtime *runtime_ = nullptr; 56 | 57 | // Enable perf FLOP counter. 58 | bool perf_flopctr_ = true; 59 | }; 60 | 61 | // Enable/disable CPU features for compiler. 62 | void SetCPUFeatures(const string &features); 63 | 64 | } // namespace myelin 65 | } // namespace sling 66 | 67 | #endif // SLING_MYELIN_COMPILE_H_ 68 | 69 | -------------------------------------------------------------------------------- /sling/myelin/cuda/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | cc_library( 4 | name = "cuda-api", 5 | srcs = ["cuda-api.cc"], 6 | hdrs = ["cuda-api.h"], 7 | deps = [ 8 | "//sling/base", 9 | ], 10 | linkopts = [ 11 | "-ldl", 12 | ], 13 | ) 14 | 15 | cc_library( 16 | name = "cuda", 17 | srcs = ["cuda.cc"], 18 | hdrs = ["cuda.h"], 19 | deps = [ 20 | ":cuda-api", 21 | "//sling/base", 22 | ], 23 | ) 24 | 25 | cc_library( 26 | name = "cuda-runtime", 27 | srcs = ["cuda-runtime.cc"], 28 | hdrs = ["cuda-runtime.h"], 29 | deps = [ 30 | ":cuda", 31 | "//sling/base", 32 | "//sling/myelin:compute", 33 | ], 34 | ) 35 | 36 | cc_library( 37 | name = "cuda-kernel", 38 | srcs = ["cuda-kernel.cc"], 39 | hdrs = ["cuda-kernel.h"], 40 | deps = [ 41 | ":cuda", 42 | ":cuda-runtime", 43 | "//sling/base", 44 | "//sling/myelin:compute", 45 | ], 46 | ) 47 | 48 | -------------------------------------------------------------------------------- /sling/myelin/generator/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | cc_library( 4 | name = "index", 5 | srcs = ["index.cc"], 6 | hdrs = ["index.h"], 7 | deps = [ 8 | "//sling/base", 9 | "//sling/myelin:compute", 10 | "//sling/myelin:express", 11 | ], 12 | ) 13 | 14 | cc_library( 15 | name = "elementwise", 16 | srcs = ["elementwise.cc"], 17 | hdrs = ["elementwise.h"], 18 | deps = [ 19 | ":index", 20 | ], 21 | ) 22 | 23 | cc_library( 24 | name = "expression", 25 | srcs = [ 26 | "expression.cc", 27 | "scalar-flt-sse.cc", 28 | "scalar-flt-avx.cc", 29 | "vector-flt-sse.cc", 30 | "vector-flt-avx128.cc", 31 | "vector-flt-avx256.cc", 32 | "vector-flt-avx512.cc", 33 | "scalar-int.cc", 34 | "vector-int-sse.cc", 35 | "vector-int-avx128.cc", 36 | "vector-int-avx256.cc", 37 | ], 38 | hdrs = ["expression.h"], 39 | deps = [ 40 | ":index", 41 | "//sling/base", 42 | "//sling/myelin:compute", 43 | "//sling/myelin:express", 44 | ], 45 | ) 46 | 47 | -------------------------------------------------------------------------------- /sling/myelin/kernel/arithmetic.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_MYELIN_KERNEL_ARITHMETIC_H_ 16 | #define SLING_MYELIN_KERNEL_ARITHMETIC_H_ 17 | 18 | #include "sling/myelin/compute.h" 19 | #include "sling/myelin/express.h" 20 | 21 | namespace sling { 22 | namespace myelin { 23 | 24 | // Register arithmetic library. 25 | void RegisterArithmeticLibrary(Library *library); 26 | 27 | // Register arithmetic transforms. 28 | void RegisterArithmeticTransforms(Library *library); 29 | 30 | // Initialize expression for step. 31 | void InitExpression(const Step *step, Express *expr); 32 | 33 | } // namespace myelin 34 | } // namespace sling 35 | 36 | #endif // SLING_MYELIN_KERNEL_ARITHMETIC_H_ 37 | 38 | -------------------------------------------------------------------------------- /sling/myelin/kernel/avx.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/myelin/kernel/avx.h" 16 | 17 | #include "sling/myelin/compute.h" 18 | 19 | namespace sling { 20 | namespace myelin { 21 | 22 | // avx-math.cc 23 | void RegisterAVXMath(Library *library); 24 | 25 | // avx-matmul.cc 26 | void RegisterAVXMatMul(Library *library); 27 | 28 | // avx-operators.cc 29 | void RegisterAVXOperators(Library *library); 30 | 31 | // simd-matmul.cc 32 | void RegisterSIMDMatMulLibrary(Library *library); 33 | 34 | // Register AVX library. 35 | void RegisterAVXLibrary(Library *library) { 36 | RegisterAVXMath(library); 37 | RegisterSIMDMatMulLibrary(library); 38 | RegisterAVXMatMul(library); 39 | RegisterAVXOperators(library); 40 | } 41 | 42 | } // namespace myelin 43 | } // namespace sling 44 | 45 | -------------------------------------------------------------------------------- /sling/myelin/kernel/avx.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_MYELIN_KERNEL_AVX_H_ 16 | #define SLING_MYELIN_KERNEL_AVX_H_ 17 | 18 | #include "sling/myelin/compute.h" 19 | 20 | namespace sling { 21 | namespace myelin { 22 | 23 | // AVX vectors. 24 | typedef float FloatVec8[8] __attribute__ ((aligned (32))); 25 | #define CONST8(x) {x, x, x, x, x, x, x, x} 26 | 27 | // Register AVX library. 28 | void RegisterAVXLibrary(Library *library); 29 | 30 | } // namespace myelin 31 | } // namespace sling 32 | 33 | #endif // SLING_MYELIN_KERNEL_AVX_H_ 34 | 35 | -------------------------------------------------------------------------------- /sling/myelin/kernel/cuda.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/myelin/compute.h" 16 | 17 | namespace sling { 18 | namespace myelin { 19 | 20 | // cuda-matmul.cc 21 | void RegisterCUDAMatMulLibrary(Library *library); 22 | 23 | // cublas-matmul.cc 24 | void RegisterCUBLASMatMulLibrary(Library *library); 25 | 26 | // cuda-arithmetic.cc 27 | void RegisterCUDAArithmeticLibrary(Library *library); 28 | 29 | // cuda-array.cc 30 | void RegisterCUDAArrayLibrary(Library *library); 31 | 32 | // Register CUDA kernels. 33 | void RegisterCUDALibrary(Library *library) { 34 | RegisterCUDAMatMulLibrary(library); 35 | RegisterCUBLASMatMulLibrary(library); 36 | RegisterCUDAArithmeticLibrary(library); 37 | RegisterCUDAArrayLibrary(library); 38 | } 39 | 40 | } // namespace myelin 41 | } // namespace sling 42 | 43 | -------------------------------------------------------------------------------- /sling/myelin/kernel/cuda.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_MYELIN_KERNEL_CUDA_H_ 16 | #define SLING_MYELIN_KERNEL_CUDA_H_ 17 | 18 | #include "sling/myelin/compute.h" 19 | 20 | namespace sling { 21 | namespace myelin { 22 | 23 | // Register CUDA kernels. 24 | void RegisterCUDALibrary(Library *library); 25 | 26 | } // namespace myelin 27 | } // namespace sling 28 | 29 | #endif // SLING_MYELIN_KERNEL_CUDA_H_ 30 | 31 | -------------------------------------------------------------------------------- /sling/myelin/kernel/dragnn.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_MYELIN_KERNEL_DRAGNN_H_ 16 | #define SLING_MYELIN_KERNEL_DRAGNN_H_ 17 | 18 | #include "sling/myelin/compute.h" 19 | 20 | namespace sling { 21 | namespace myelin { 22 | 23 | // Register Dragnn library. 24 | void RegisterDragnnLibrary(Library *library); 25 | 26 | } // namespace myelin 27 | } // namespace sling 28 | 29 | #endif // SLING_MYELIN_KERNEL_DRAGNN_H_ 30 | 31 | -------------------------------------------------------------------------------- /sling/myelin/kernel/generic.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_MYELIN_KERNEL_GENERIC_H_ 16 | #define SLING_MYELIN_KERNEL_GENERIC_H_ 17 | 18 | #include "sling/myelin/compute.h" 19 | 20 | namespace sling { 21 | namespace myelin { 22 | 23 | // Register generic transforms. 24 | void RegisterGenericTransforms(Library *library); 25 | 26 | // Register generic library. 27 | void RegisterGenericLibrary(Library *library); 28 | 29 | } // namespace myelin 30 | } // namespace sling 31 | 32 | #endif // SLING_MYELIN_KERNEL_GENERIC_H_ 33 | 34 | -------------------------------------------------------------------------------- /sling/myelin/kernel/gradients.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_MYELIN_KERNEL_GRADIENTS_H_ 16 | #define SLING_MYELIN_KERNEL_GRADIENTS_H_ 17 | 18 | #include "sling/myelin/gradient.h" 19 | #include "sling/myelin/flow.h" 20 | 21 | namespace sling { 22 | namespace myelin { 23 | 24 | // Register gradient functions for standard ops. 25 | void RegisterStandardGradients(); 26 | 27 | } // namespace myelin 28 | } // namespace sling 29 | 30 | #endif // SLING_MYELIN_KERNEL_GRADIENTS_H_ 31 | 32 | -------------------------------------------------------------------------------- /sling/myelin/kernel/mkl.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_MYELIN_KERNEL_MKL_H_ 16 | #define SLING_MYELIN_KERNEL_MKL_H_ 17 | 18 | #include "sling/myelin/compute.h" 19 | 20 | namespace sling { 21 | namespace myelin { 22 | 23 | // Register MKL library. 24 | void RegisterMKLLibrary(Library *library); 25 | 26 | } // namespace myelin 27 | } // namespace sling 28 | 29 | #endif // SLING_MYELIN_KERNEL_MKL_H_ 30 | 31 | -------------------------------------------------------------------------------- /sling/myelin/kernel/precompute.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_MYELIN_KERNEL_PRECOMPUTE_H_ 16 | #define SLING_MYELIN_KERNEL_PRECOMPUTE_H_ 17 | 18 | #include "sling/myelin/compute.h" 19 | 20 | namespace sling { 21 | namespace myelin { 22 | 23 | // Register precompute library. 24 | void RegisterPrecomputeLibrary(Library *library); 25 | 26 | } // namespace myelin 27 | } // namespace sling 28 | 29 | #endif // SLING_MYELIN_KERNEL_PRECOMPUTE_H_ 30 | 31 | -------------------------------------------------------------------------------- /sling/myelin/kernel/sse.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/myelin/kernel/sse.h" 16 | 17 | #include "sling/myelin/compute.h" 18 | 19 | namespace sling { 20 | namespace myelin { 21 | 22 | // sse-matmul.cc 23 | void RegisterSSEMatMul(Library *library); 24 | 25 | // Register SSE library. 26 | void RegisterSSELibrary(Library *library) { 27 | RegisterSSEMatMul(library); 28 | } 29 | 30 | } // namespace myelin 31 | } // namespace sling 32 | 33 | -------------------------------------------------------------------------------- /sling/myelin/kernel/sse.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_MYELIN_KERNEL_SSE_H_ 16 | #define SLING_MYELIN_KERNEL_SSE_H_ 17 | 18 | #include "sling/myelin/compute.h" 19 | 20 | namespace sling { 21 | namespace myelin { 22 | 23 | // SSE vectors. 24 | typedef float FloatVec4[8] __attribute__ ((aligned (16))); 25 | #define CONST4(x) {x, x, x, x} 26 | 27 | // Register SSE library. 28 | void RegisterSSELibrary(Library *library); 29 | 30 | } // namespace myelin 31 | } // namespace sling 32 | 33 | #endif // SLING_MYELIN_KERNEL_SSE_H_ 34 | 35 | -------------------------------------------------------------------------------- /sling/myelin/kernel/tensorflow.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/myelin/kernel/tensorflow.h" 16 | 17 | #include 18 | 19 | #include "sling/myelin/compute.h" 20 | #include "sling/myelin/kernel/arithmetic.h" 21 | #include "sling/myelin/kernel/avx.h" 22 | #include "sling/myelin/kernel/generic.h" 23 | #include "sling/myelin/kernel/gradients.h" 24 | #include "sling/myelin/kernel/sse.h" 25 | #include "sling/myelin/kernel/precompute.h" 26 | 27 | namespace sling { 28 | namespace myelin { 29 | 30 | static std::once_flag gradients_initialized; 31 | 32 | // Register Tensorflow ops. 33 | void RegisterTensorflowLibrary(Library *library) { 34 | RegisterArithmeticTransforms(library); 35 | RegisterGenericLibrary(library); 36 | RegisterSSELibrary(library); 37 | RegisterAVXLibrary(library); 38 | RegisterArithmeticLibrary(library); 39 | RegisterPrecomputeLibrary(library); 40 | RegisterGenericTransforms(library); 41 | 42 | std::call_once(gradients_initialized, RegisterStandardGradients); 43 | } 44 | 45 | } // namespace myelin 46 | } // namespace sling 47 | 48 | -------------------------------------------------------------------------------- /sling/myelin/kernel/tensorflow.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_MYELIN_KERNEL_TENSORFLOW_H_ 16 | #define SLING_MYELIN_KERNEL_TENSORFLOW_H_ 17 | 18 | #include "sling/myelin/compute.h" 19 | 20 | namespace sling { 21 | namespace myelin { 22 | 23 | // Register Tensorflow library. 24 | void RegisterTensorflowLibrary(Library *library); 25 | 26 | } // namespace myelin 27 | } // namespace sling 28 | 29 | #endif // SLING_MYELIN_KERNEL_TENSORFLOW_H_ 30 | 31 | -------------------------------------------------------------------------------- /sling/myelin/multi-process.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_MYELIN_MULTI_PROCESS_H_ 16 | #define SLING_MYELIN_MULTI_PROCESS_H_ 17 | 18 | #include "sling/myelin/compute.h" 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace sling { 25 | namespace myelin { 26 | 27 | class Worker; 28 | 29 | // Myelin runtime for multi-processor execution. 30 | class MultiProcessorRuntime : public Runtime { 31 | public: 32 | ~MultiProcessorRuntime(); 33 | string Description() override { return "Multi-processor"; } 34 | 35 | // Instance data allocation. 36 | void AllocateInstance(Instance *instance) override; 37 | void FreeInstance(Instance *instance) override; 38 | void ClearInstance(Instance *instance) override; 39 | 40 | // Channel allocation. 41 | char *AllocateChannel(char *data, 42 | size_t old_size, 43 | size_t new_size, 44 | size_t alignment, 45 | Placement placement) override; 46 | void ClearChannel(char *data, size_t pos, 47 | size_t size, 48 | Placement placement) override; 49 | void FreeChannel(char *data, Placement placement) override; 50 | 51 | // Multi-processor runtime support. 52 | bool SupportsAsync() override { return true; } 53 | TaskFunc StartTaskFunc() override; 54 | TaskFunc WaitTaskFunc() override; 55 | 56 | private: 57 | // Mutex for synchronizing access to worker pool. 58 | std::mutex mu_; 59 | 60 | // Worker pool. 61 | std::vector workers_; 62 | }; 63 | 64 | } // namespace myelin 65 | } // namespace sling 66 | 67 | #endif // SLING_MYELIN_MULTI_PROCESS_H_ 68 | 69 | -------------------------------------------------------------------------------- /sling/myelin/tests/runall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2018 Google Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http:#www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Run all Myelin tests. 18 | 19 | TESTPGM="python3 sling/myelin/tests/opcheck.py" 20 | EXTRA=$@ 21 | 22 | # Determine CPU feature support. 23 | AVX512=$(grep avx512 /proc/cpuinfo) 24 | FMA=$(grep fma /proc/cpuinfo) 25 | AVX2=$(grep avx2 /proc/cpuinfo) 26 | AVX=$(grep avx /proc/cpuinfo) 27 | 28 | # Run all CPU tests for data type. 29 | testcpu() { 30 | DT=$1 31 | echo "Test data type $DT" 32 | $TESTPGM --dt $DT ${EXTRA} 33 | 34 | if [[ $AVX512 ]]; then 35 | echo "Test data type $DT without AVX512" 36 | $TESTPGM --dt $DT --cpu=-avx512 ${EXTRA} 37 | fi 38 | if [[ $FMA ]]; then 39 | echo "Test data type $DT without FMA3" 40 | $TESTPGM --dt $DT --cpu=-avx512-fma3 ${EXTRA} 41 | fi 42 | if [[ $AVX2 ]]; then 43 | echo "Test data type $DT without AVX2" 44 | $TESTPGM --dt $DT --cpu=-avx512-avx2 ${EXTRA} 45 | if [[ $FMA ]]; then 46 | echo "Test data type $DT without AVX2 and FMA3" 47 | $TESTPGM --dt $DT --cpu=-avx512-fma3-avx2 ${EXTRA} 48 | fi 49 | fi 50 | if [[ $AVX ]]; then 51 | echo "Test data type $DT without AVX" 52 | $TESTPGM --dt $DT --cpu=-avx512-fma3-avx2-avx ${EXTRA} 53 | fi 54 | } 55 | 56 | # Run all GPU tests for data type. 57 | testgpu() { 58 | DT=$1 59 | echo "Test data type $DT on GPU" 60 | $TESTPGM --gpu --dt $DT ${EXTRA} 61 | } 62 | 63 | # Stop on errors. 64 | set -e 65 | 66 | # Test float types on CPU. 67 | testcpu float32 68 | testcpu float64 69 | 70 | # Test integer types on CPU. 71 | testcpu int8 72 | testcpu int16 73 | testcpu int32 74 | testcpu int64 75 | 76 | # Test on GPU if CUDA is installed. 77 | if [ -f /usr/lib/x86_64-linux-gnu/libcuda.so.1 ]; then 78 | # Test float types on GPU. 79 | testgpu float32 80 | testgpu float64 81 | 82 | # Test integer types on GPU. 83 | testgpu int16 84 | testgpu int32 85 | testgpu int64 86 | fi 87 | 88 | echo "==== ALL TESTS PASSED =====" 89 | 90 | -------------------------------------------------------------------------------- /sling/nlp/document/app/analyzer.css: -------------------------------------------------------------------------------- 1 | #app { 2 | width: 100%; 3 | height: 100%; 4 | } 5 | 6 | #main { 7 | background-color: #eeeeee; 8 | width: 100%; 9 | height: 100%; 10 | overflow: hidden; 11 | padding: 10px; 12 | box-sizing: border-box; 13 | } 14 | 15 | #docid { 16 | background-color: #FFF; 17 | color: #000; 18 | padding: 10px; 19 | } 20 | 21 | .editor { 22 | width: 100%; 23 | height: 100%; 24 | box-sizing: border-box; 25 | padding: 10px; 26 | } 27 | 28 | -------------------------------------------------------------------------------- /sling/nlp/document/app/analyzer.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | SLING document analyzer 7 | 8 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /sling/nlp/document/app/corpus.css: -------------------------------------------------------------------------------- 1 | #app { 2 | width: 100%; 3 | height: 100%; 4 | } 5 | 6 | #main { 7 | background-color: #eeeeee; 8 | width: 100%; 9 | height: 100%; 10 | overflow: hidden; 11 | padding: 10px; 12 | box-sizing: border-box; 13 | } 14 | 15 | #docid { 16 | background-color: #FFF; 17 | color: #000; 18 | padding: 10px; 19 | } 20 | 21 | -------------------------------------------------------------------------------- /sling/nlp/document/app/corpus.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Corpus browser 7 | 8 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /sling/nlp/document/app/corpus.js: -------------------------------------------------------------------------------- 1 | import {Component, h, render} from "/common/external/preact.js"; 2 | import {Layout, TextField, Button, Icon} from "/common/lib/mdl.js"; 3 | import {Document, DocumentViewer} from "/common/lib/docview.js"; 4 | import {stylesheet} from "/common/lib/util.js"; 5 | 6 | stylesheet("/doc/corpus.css"); 7 | 8 | class App extends Component { 9 | constructor(props) { 10 | super(props); 11 | this.state = { document: null }; 12 | } 13 | 14 | update(url) { 15 | var self = this; 16 | fetch(url) 17 | .then(response => { 18 | if (response.ok) { 19 | return response.json(); 20 | } else { 21 | console.log("fetch error", response.status, response.message); 22 | return null; 23 | } 24 | }) 25 | .then(response => { 26 | self.setState({document: new Document(response)}); 27 | }); 28 | } 29 | 30 | search(e) { 31 | var docid = e.target.value 32 | if (docid) { 33 | this.update("/fetch?docid=" + docid + "&fmt=cjson"); 34 | } 35 | } 36 | 37 | forward(e) { 38 | this.update("/forward?fmt=cjson"); 39 | } 40 | 41 | back(e) { 42 | this.update("/back?fmt=cjson"); 43 | } 44 | 45 | render(props, state) { 46 | return ( 47 | h("div", {id: "app"}, 48 | h(Layout, null, 49 | h(Layout.Header, null, 50 | h(Layout.HeaderRow, null, 51 | h(Layout.Title, null, "Corpus Browser"), 52 | h(Layout.Spacer), 53 | h(TextField, { 54 | id: "docid", 55 | placeholder: "Document ID", 56 | type: "search", 57 | value: state.document ? state.document.key : "", 58 | onsearch: e => this.search(e), 59 | }), 60 | h(Button, {icon: true, onclick: e => this.back(e)}, 61 | h(Icon, {icon: "arrow_backward"}) 62 | ), 63 | h(Button, {icon: true, onclick: e => this.forward(e)}, 64 | h(Icon, {icon: "arrow_forward"}) 65 | ), 66 | ), 67 | ), 68 | h(Layout.Drawer, null, h(Layout.Title, null, "Menu")), 69 | h(Layout.DrawerButton), 70 | 71 | h(Layout.Content, {id: "main"}, 72 | h(DocumentViewer, {document: state.document}) 73 | ) 74 | ) 75 | ) 76 | ); 77 | } 78 | } 79 | 80 | render(h(App), document.body); 81 | -------------------------------------------------------------------------------- /sling/nlp/document/document-corpus.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/nlp/document/document-corpus.h" 16 | 17 | #include "sling/frame/serialization.h" 18 | 19 | namespace sling { 20 | namespace nlp { 21 | 22 | DocumentCorpus::DocumentCorpus(Store *commons, const string &filepattern) 23 | : corpus_(filepattern, RecordFileOptions()) { 24 | docnames_ = commons->frozen() ? nullptr : new DocumentNames(commons); 25 | } 26 | 27 | DocumentCorpus::DocumentCorpus(Store *commons, 28 | const std::vector &filenames) 29 | : corpus_(filenames, RecordFileOptions()) { 30 | docnames_ = commons->frozen() ? nullptr : new DocumentNames(commons); 31 | } 32 | 33 | DocumentCorpus::~DocumentCorpus() { 34 | if (docnames_ != nullptr) docnames_->Release(); 35 | } 36 | 37 | Document *DocumentCorpus::Next(Store *store) { 38 | // Return null if there are no more document. 39 | if (corpus_.Done()) return nullptr; 40 | 41 | // Read next record. 42 | Record record; 43 | CHECK(corpus_.Next(&record)); 44 | 45 | // Decode document frame. 46 | ArrayInputStream stream(record.value.data(), record.value.size()); 47 | InputParser parser(store, &stream); 48 | Frame frame = parser.Read().AsFrame(); 49 | CHECK(frame.valid()); 50 | 51 | // Return new document. 52 | return new Document(frame, docnames_); 53 | } 54 | 55 | void DocumentCorpus::Rewind() { 56 | CHECK(corpus_.Rewind()); 57 | } 58 | 59 | } // namespace nlp 60 | } // namespace sling 61 | 62 | -------------------------------------------------------------------------------- /sling/nlp/document/document-corpus.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_NLP_DOCUMENT_DOCUMENT_CORPUS_H_ 16 | #define SLING_NLP_DOCUMENT_DOCUMENT_CORPUS_H_ 17 | 18 | #include 19 | #include 20 | 21 | #include "sling/file/recordio.h" 22 | #include "sling/frame/store.h" 23 | #include "sling/nlp/document/document.h" 24 | 25 | namespace sling { 26 | namespace nlp { 27 | 28 | // A document corpus is a set of record files with SLING-encoded documents. 29 | class DocumentCorpus { 30 | public: 31 | // Initialize document corpus. 32 | DocumentCorpus(Store *commons, const string &filepattern); 33 | DocumentCorpus(Store *commons, const std::vector &filenames); 34 | ~DocumentCorpus(); 35 | 36 | // Read next document into store and return it or null of there are no 37 | // more document. The returned document is owned by the caller. 38 | Document *Next(Store *store); 39 | 40 | // Rewind to the start of the corpus. 41 | void Rewind(); 42 | 43 | private: 44 | // Record files with documents. 45 | RecordDatabase corpus_; 46 | 47 | // Document schema. 48 | DocumentNames *docnames_; 49 | }; 50 | 51 | } // namespace nlp 52 | } // namespace sling 53 | 54 | #endif // SLING_NLP_DOCUMENT_DOCUMENT_CORPUS_H_ 55 | -------------------------------------------------------------------------------- /sling/nlp/document/document-tokenizer.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/nlp/document/document-tokenizer.h" 16 | 17 | #include "sling/base/types.h" 18 | #include "sling/nlp/document/document.h" 19 | #include "sling/nlp/document/text-tokenizer.h" 20 | #include "sling/string/text.h" 21 | 22 | namespace sling { 23 | namespace nlp { 24 | 25 | DocumentTokenizer::DocumentTokenizer() { 26 | // Initialize tokenizer. 27 | tokenizer_.InitLDC(); 28 | } 29 | 30 | void DocumentTokenizer::Tokenize(Document *document, Text text) const { 31 | document->SetText(text); 32 | Tokenize(document); 33 | } 34 | 35 | void DocumentTokenizer::Tokenize(Document *document) const { 36 | const string &text = document->text(); 37 | tokenizer_.Tokenize(text, 38 | [document](const Tokenizer::Token &t) { 39 | document->AddToken(t.text, t.begin, t.end, t.brk, t.style); 40 | } 41 | ); 42 | } 43 | 44 | } // namespace nlp 45 | } // namespace sling 46 | 47 | -------------------------------------------------------------------------------- /sling/nlp/document/document-tokenizer.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_NLP_DOCUMENT_DOCUMENT_TOKENIZER_H_ 16 | #define SLING_NLP_DOCUMENT_DOCUMENT_TOKENIZER_H_ 17 | 18 | #include "sling/nlp/document/document.h" 19 | #include "sling/nlp/document/text-tokenizer.h" 20 | #include "sling/string/text.h" 21 | 22 | namespace sling { 23 | namespace nlp { 24 | 25 | class DocumentTokenizer { 26 | public: 27 | DocumentTokenizer(); 28 | 29 | // Add tokenized text to document 30 | void Tokenize(Document *document, Text text) const; 31 | 32 | // Tokenize text in document. 33 | void Tokenize(Document *document) const; 34 | 35 | private: 36 | // Text tokenizer. 37 | Tokenizer tokenizer_; 38 | }; 39 | 40 | } // namespace nlp 41 | } // namespace sling 42 | 43 | #endif // SLING_NLP_DOCUMENT_DOCUMENT_TOKENIZER_H_ 44 | 45 | -------------------------------------------------------------------------------- /sling/nlp/document/features.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/nlp/document/features.h" 16 | 17 | #include "sling/base/types.h" 18 | #include "sling/nlp/document/document.h" 19 | #include "sling/util/unicode.h" 20 | 21 | namespace sling { 22 | namespace nlp { 23 | 24 | void DocumentFeatures::Extract(const Document &document, int begin, int end) { 25 | if (end == -1) end = document.num_tokens(); 26 | int length = end - begin; 27 | features_.resize(length); 28 | bool in_quote = false; 29 | for (int i = 0; i < length; ++i) { 30 | const string &word = document.token(begin + i).word(); 31 | TokenFeatures &f = features_[i]; 32 | 33 | // Look up token word in lexicon and get word features. 34 | f.word = lexicon_->Lookup(word, &f.prefix, &f.suffix, &f.shape); 35 | 36 | // Re-compute context-sensitive features. 37 | if (i == 0 || document.token(i).brk() >= SENTENCE_BREAK) { 38 | if (f.shape.capitalization == WordShape::CAPITALIZED) { 39 | f.shape.capitalization = WordShape::INITIAL; 40 | } 41 | } 42 | if (f.shape.quote == WordShape::UNKNOWN_QUOTE) { 43 | f.shape.quote = in_quote ? WordShape::CLOSE_QUOTE : WordShape::OPEN_QUOTE; 44 | in_quote = !in_quote; 45 | } 46 | } 47 | } 48 | 49 | } // namespace nlp 50 | } // namespace sling 51 | 52 | -------------------------------------------------------------------------------- /sling/nlp/document/fingerprinter.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/nlp/document/fingerprinter.h" 16 | 17 | #include 18 | 19 | #include "sling/util/unicode.h" 20 | 21 | namespace sling { 22 | namespace nlp { 23 | 24 | uint64 Fingerprinter::Fingerprint(Text word, Normalization normalization) { 25 | // Normalize string. 26 | string normalized; 27 | UTF8::Normalize(word.data(), word.size(), normalization, &normalized); 28 | 29 | // Ignore degenerate words. 30 | if (normalized.empty()) return 1; 31 | 32 | // Return fingerprint for normalized word. 33 | return Hash(normalized); 34 | } 35 | 36 | uint64 Fingerprinter::Fingerprint(Text word, uint64 seed, 37 | Normalization normalization) { 38 | uint64 fp = Fingerprint(word, normalization); 39 | return fp == 1 ? seed : Mix(fp, seed); 40 | } 41 | 42 | uint64 Fingerprinter::Fingerprint(const std::vector &words, 43 | Normalization normalization) { 44 | uint64 fp = 1; 45 | for (const Text &word : words) { 46 | uint64 word_fp = Fingerprint(word, normalization); 47 | if (word_fp == 1) continue; 48 | fp = Mix(word_fp, fp); 49 | } 50 | return fp; 51 | } 52 | 53 | } // namespace nlp 54 | } // namespace sling 55 | 56 | -------------------------------------------------------------------------------- /sling/nlp/document/lex.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // LEX is a light-weight frame annotation format for text. 16 | 17 | #ifndef SLING_NLP_DOCUMENT_LEX_H_ 18 | #define SLING_NLP_DOCUMENT_LEX_H_ 19 | 20 | #include "sling/base/types.h" 21 | #include "sling/nlp/document/document.h" 22 | #include "sling/nlp/document/document-tokenizer.h" 23 | 24 | namespace sling { 25 | namespace nlp { 26 | 27 | class DocumentLexer { 28 | public: 29 | // Initialize document lexer. 30 | DocumentLexer(const DocumentTokenizer *tokenizer) : tokenizer_(tokenizer) {} 31 | 32 | // parse text in LEX format and add text and annotations to document. 33 | bool Lex(Document *document, Text lex) const; 34 | 35 | private: 36 | // Markable span in LEX-encoded text. 37 | struct Markable { 38 | Markable(int pos) : begin(pos) {} 39 | // Range of bytes in plain text covering the span. 40 | int begin; 41 | int end = -1; 42 | 43 | // Annotation object number. 44 | int object = -1; 45 | }; 46 | 47 | // Document tokenizer. 48 | const DocumentTokenizer *tokenizer_; 49 | }; 50 | 51 | // Convert document to LEX format. 52 | string ToLex(const Document &document); 53 | 54 | } // namespace nlp 55 | } // namespace sling 56 | 57 | #endif // SLING_NLP_DOCUMENT_LEX_H_ 58 | 59 | -------------------------------------------------------------------------------- /sling/nlp/document/phrase-tokenizer.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_NLP_DOCUMENT_PHRASE_TOKENIZER_H_ 16 | #define SLING_NLP_DOCUMENT_PHRASE_TOKENIZER_H_ 17 | 18 | #include "sling/base/types.h" 19 | #include "sling/nlp/document/text-tokenizer.h" 20 | #include "sling/nlp/document/fingerprinter.h" 21 | #include "sling/string/text.h" 22 | #include "sling/util/unicode.h" 23 | 24 | namespace sling { 25 | namespace nlp { 26 | 27 | class PhraseTokenizer { 28 | public: 29 | PhraseTokenizer(); 30 | 31 | // Tokenize phrase into tokens. 32 | void Tokenize(Text text, std::vector *tokens) const; 33 | 34 | // Tokenize phrase and return token fingerprints for each token. 35 | uint64 TokenFingerprints(Text text, std::vector *tokens) const; 36 | 37 | // Compute fingerprint for phrase. 38 | uint64 Fingerprint(Text text) const; 39 | 40 | // Compute fingerprint and case form for phrase. 41 | void FingerprintAndForm(Text text, uint64 *fingerprint, CaseForm *form) const; 42 | 43 | // Set/get phrase normalization flags. 44 | Normalization normalization() const { return normalization_; } 45 | void set_normalization(Normalization normalization) { 46 | normalization_ = normalization; 47 | } 48 | 49 | private: 50 | // Phrase text normalization. 51 | Normalization normalization_ = NORMALIZE_DEFAULT; 52 | 53 | // Text tokenizer. 54 | Tokenizer tokenizer_; 55 | }; 56 | 57 | } // namespace nlp 58 | } // namespace sling 59 | 60 | #endif // SLING_NLP_DOCUMENT_PHRASE_TOKENIZER_H_ 61 | 62 | -------------------------------------------------------------------------------- /sling/nlp/kb/app/appicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/sling/nlp/kb/app/appicon.ico -------------------------------------------------------------------------------- /sling/nlp/kb/knowledge-server.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/base/flags.h" 16 | #include "sling/base/init.h" 17 | #include "sling/base/logging.h" 18 | #include "sling/frame/serialization.h" 19 | #include "sling/http/http-server.h" 20 | #include "sling/nlp/kb/knowledge-service.h" 21 | 22 | DEFINE_int32(port, 8080, "HTTP server port"); 23 | DEFINE_string(kb, "local/data/e/wiki/kb.sling", "Knowledge base"); 24 | DEFINE_string(names, "local/data/e/wiki/en/name-table.repo", "Name table"); 25 | 26 | using namespace sling; 27 | using namespace sling::nlp; 28 | 29 | int main(int argc, char *argv[]) { 30 | InitProgram(&argc, &argv); 31 | 32 | LOG(INFO) << "Loading knowledge base from " << FLAGS_kb; 33 | Store commons; 34 | LoadStore(FLAGS_kb, &commons); 35 | 36 | LOG(INFO) << "Start HTTP server on port " << FLAGS_port; 37 | HTTPServerOptions options; 38 | HTTPServer http(options, FLAGS_port); 39 | 40 | KnowledgeService kb; 41 | kb.Load(&commons, FLAGS_names); 42 | commons.Freeze(); 43 | 44 | kb.Register(&http); 45 | http.Register("/", [](HTTPRequest *req, HTTPResponse *rsp) { 46 | rsp->TempRedirectTo("/kb"); 47 | }); 48 | 49 | CHECK(http.Start()); 50 | 51 | LOG(INFO) << "HTTP server running"; 52 | http.Wait(); 53 | 54 | LOG(INFO) << "HTTP server done"; 55 | return 0; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /sling/nlp/parser/action-table.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_NLP_PARSER_ACTION_TABLE_H_ 16 | #define SLING_NLP_PARSER_ACTION_TABLE_H_ 17 | 18 | #include 19 | #include 20 | 21 | #include "sling/base/types.h" 22 | #include "sling/frame/object.h" 23 | #include "sling/frame/store.h" 24 | #include "sling/nlp/parser/parser-action.h" 25 | 26 | namespace sling { 27 | namespace nlp { 28 | 29 | // The action table is a set of parser actions indexed by id. 30 | class ActionTable { 31 | public: 32 | // Add action to the table. 33 | void Add(const ParserAction &action); 34 | 35 | // Return the index of action. 36 | int Index(const ParserAction &action) const { 37 | const auto &it = mapping_.find(action); 38 | return it == mapping_.end() ? -1 : it->second; 39 | } 40 | 41 | // Return the number of parser actions. 42 | int size() const { return actions_.size(); } 43 | 44 | // Return the ith parser action. 45 | const ParserAction &Action(int index) const { return actions_[index]; } 46 | 47 | // Return list of actions. 48 | const std::vector &list() const { return actions_; } 49 | 50 | // Read action table from frame. 51 | void Read(const Frame &frame); 52 | 53 | // Write action table to frame. 54 | void Write(Builder *frame) const; 55 | 56 | private: 57 | // List of actions. 58 | std::vector actions_; 59 | 60 | // Mapping from parser action to index. 61 | std::unordered_map mapping_; 62 | }; 63 | 64 | } // namespace nlp 65 | } // namespace sling 66 | 67 | #endif // SLING_NLP_PARSER_ACTION_TABLE_H_ 68 | -------------------------------------------------------------------------------- /sling/nlp/parser/multiclass-delegate.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/nlp/parser/action-table.h" 16 | #include "sling/nlp/parser/parser.h" 17 | 18 | namespace sling { 19 | namespace nlp { 20 | 21 | using namespace myelin; 22 | 23 | // Deletegate for fixed action classification. 24 | class MultiClassDelegate : public Delegate { 25 | public: 26 | void Initialize(const Network &network, const Frame &spec) override { 27 | cell_ = network.GetCell(spec.GetString("cell")); 28 | input_ = cell_->GetParameter(cell_->name() + "/input"); 29 | output_ = cell_->GetParameter(cell_->name() + "/output"); 30 | actions_.Read(spec); 31 | } 32 | 33 | DelegateInstance *CreateInstance() override { 34 | return new MultiClassDelegateInstance(this); 35 | } 36 | 37 | // Multi-class delegate instance. 38 | class MultiClassDelegateInstance : public DelegateInstance { 39 | public: 40 | MultiClassDelegateInstance(MultiClassDelegate *delegate) 41 | : delegate_(delegate), 42 | data_(delegate->cell_) {} 43 | 44 | void Predict(float *activation, ParserAction *action) override { 45 | // Predict action from activations. 46 | data_.SetReference(delegate_->input_, activation); 47 | data_.Compute(); 48 | int argmax = *data_.Get(delegate_->output_); 49 | *action = delegate_->actions_.Action(argmax); 50 | } 51 | 52 | private: 53 | MultiClassDelegate *delegate_; 54 | Instance data_; 55 | }; 56 | 57 | private: 58 | ActionTable actions_; // action table for multi-class classification 59 | 60 | Cell *cell_ = nullptr; // cell for computation 61 | Tensor *input_ = nullptr; // input for activations 62 | Tensor *output_ = nullptr; // output prediction 63 | }; 64 | 65 | REGISTER_DELEGATE("multiclass", MultiClassDelegate); 66 | 67 | } // namespace nlp 68 | } // namespace sling 69 | 70 | -------------------------------------------------------------------------------- /sling/nlp/parser/ontonotes/shuffle.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Shuffle training corpus. 16 | 17 | import random 18 | import sling 19 | import sling.flags as flags 20 | 21 | flags.define('--input', 22 | help='input file with documents') 23 | flags.define('--output', 24 | help='output for shuffled documents') 25 | flags.define('--seed', 26 | help='seed for shuffling the corpus', 27 | default="314159", 28 | type=int, 29 | metavar='NUM') 30 | 31 | if __name__ == '__main__': 32 | flags.parse() 33 | 34 | # Read input corpus. 35 | reader = sling.RecordReader(flags.arg.input) 36 | records = [(key, value) for key, value in reader] 37 | reader.close() 38 | 39 | # Shufle documents. 40 | r = random.Random(flags.arg.seed) 41 | r.shuffle(records) 42 | 43 | # Write shuffled documents to output. 44 | writer = sling.RecordWriter(flags.arg.output) 45 | for key, value in records: 46 | writer.write(key, value) 47 | writer.close() 48 | 49 | -------------------------------------------------------------------------------- /sling/nlp/parser/parser-annotator.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/nlp/parser/parser.h" 16 | #include "sling/nlp/document/annotator.h" 17 | 18 | namespace sling { 19 | namespace nlp { 20 | 21 | // Document annotator for adding semantic parse annotations to document. 22 | class ParserAnnotator : public Annotator { 23 | public: 24 | void Init(task::Task *task, Store *commons) override { 25 | // Load parser model. 26 | string model = task->GetInputFile("parser"); 27 | LOG(INFO) << "Loading parser model from " << model; 28 | parser_.Load(commons, model); 29 | } 30 | 31 | void Annotate(Document *document) override { 32 | // Parse document. 33 | parser_.Parse(document); 34 | } 35 | 36 | private: 37 | // Parser model. 38 | Parser parser_; 39 | }; 40 | 41 | REGISTER_ANNOTATOR("parser", ParserAnnotator); 42 | 43 | // Document annotator for adding names to frame based on first mention. 44 | class MentionNameAnnotator : public Annotator { 45 | public: 46 | void Init(task::Task *task, Store *commons) override { 47 | names_.Bind(commons); 48 | } 49 | 50 | void Annotate(Document *document) override { 51 | Handles evoked(document->store()); 52 | for (Span *span : document->spans()) { 53 | span->AllEvoked(&evoked); 54 | for (Handle h : evoked) { 55 | Frame f(document->store(), h); 56 | if (!f.Has(n_name_)) { 57 | f.Add(n_name_, span->GetText()); 58 | } 59 | } 60 | } 61 | } 62 | 63 | private: 64 | Names names_; 65 | Name n_name_{names_, "name"}; 66 | }; 67 | 68 | REGISTER_ANNOTATOR("mention-name", MentionNameAnnotator); 69 | 70 | 71 | } // namespace nlp 72 | } // namespace sling 73 | -------------------------------------------------------------------------------- /sling/nlp/parser/roles.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/nlp/parser/roles.h" 16 | 17 | namespace sling { 18 | namespace nlp { 19 | 20 | void RoleSet::Add(Handle role) { 21 | if (!role.IsNil() && roles_.find(role) == roles_.end()) { 22 | int index = roles_.size(); 23 | roles_[role] = index; 24 | } 25 | } 26 | 27 | void RoleSet::Add(const std::vector &actions) { 28 | for (const ParserAction &action : actions) Add(action.role); 29 | } 30 | 31 | void RoleSet::GetList(std::vector *list) const { 32 | list->resize(roles_.size()); 33 | for (auto &it : roles_) { 34 | (*list)[it.second] = it.first; 35 | } 36 | } 37 | 38 | void RoleGraph::Compute(const ParserState &state, 39 | int limit, 40 | const RoleSet &roles) { 41 | limit_ = limit; 42 | num_roles_ = roles.size(); 43 | int k = limit_; 44 | edges_.clear(); 45 | if (k > state.AttentionSize()) k = state.AttentionSize(); 46 | for (int source = 0; source < k; ++source) { 47 | Handle handle = state.Attention(source).frame; 48 | const FrameDatum *frame = state.store()->GetFrame(handle); 49 | for (const Slot *slot = frame->begin(); slot < frame->end(); ++slot) { 50 | int target = -1; 51 | if (slot->value.IsLocalRef()) { 52 | target = state.AttentionIndex(slot->value, k); 53 | if (target == -1) continue; 54 | } 55 | 56 | int role = roles.Lookup(slot->name); 57 | if (role == -1) continue; 58 | 59 | edges_.emplace_back(source, role, target); 60 | } 61 | } 62 | } 63 | 64 | } // namespace nlp 65 | } // namespace sling 66 | 67 | -------------------------------------------------------------------------------- /sling/nlp/parser/tools/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | cc_binary( 4 | name = "parse", 5 | srcs = ["parse.cc"], 6 | deps = [ 7 | "//sling/base", 8 | "//sling/base:clock", 9 | "//sling/file:posix", 10 | "//sling/frame:object", 11 | "//sling/frame:serialization", 12 | "//sling/myelin:profile", 13 | "//sling/nlp/document", 14 | "//sling/nlp/document:document-corpus", 15 | "//sling/nlp/document:document-tokenizer", 16 | "//sling/nlp/document:lex", 17 | "//sling/nlp/parser", 18 | "//sling/nlp/parser:multiclass-delegate", 19 | "//sling/nlp/parser:frame-evaluation", 20 | "//sling/string:printf", 21 | ], 22 | ) 23 | 24 | -------------------------------------------------------------------------------- /sling/nlp/parser/tools/train_caspar.py: -------------------------------------------------------------------------------- 1 | import sling 2 | import sling.flags as flags 3 | import sling.task.workflow as workflow 4 | 5 | flags.define("--accurate", default=False,action='store_true') 6 | 7 | flags.parse() 8 | 9 | if flags.arg.accurate: 10 | modelfn = "local/data/e/caspar/caspar-accurate.flow" 11 | rnn_layers = 3 12 | rnn_dim = 192 13 | else: 14 | modelfn = "local/data/e/caspar/caspar.flow" 15 | rnn_layers = 1 16 | rnn_dim = 128 17 | 18 | # Start up workflow system. 19 | workflow.startup() 20 | 21 | # Create workflow. 22 | wf = workflow.Workflow("parser-training") 23 | 24 | # Parser trainer inputs and outputs. 25 | training_corpus = wf.resource( 26 | "local/data/corpora/caspar/train_shuffled.rec", 27 | format="record/document" 28 | ) 29 | 30 | evaluation_corpus = wf.resource( 31 | "local/data/corpora/caspar/dev.rec", 32 | format="record/document" 33 | ) 34 | 35 | word_embeddings = wf.resource( 36 | "local/data/corpora/caspar/word2vec-32-embeddings.bin", 37 | format="embeddings" 38 | ) 39 | 40 | parser_model = wf.resource(modelfn, format="flow") 41 | 42 | # Parser trainer task. 43 | trainer = wf.task("caspar-trainer") 44 | 45 | trainer.add_params({ 46 | "rnn_type": 1, 47 | "rnn_dim": rnn_dim, 48 | "rnn_highways": True, 49 | "rnn_layers": rnn_layers, 50 | "dropout": 0.2, 51 | "ff_l2reg": 0.0001, 52 | 53 | "learning_rate": 1.0, 54 | "learning_rate_decay": 0.8, 55 | "clipping": 1, 56 | "optimizer": "sgd", 57 | "batch_size": 32, 58 | "rampup": 120, 59 | "report_interval": 1000, 60 | "learning_rate_cliff": 40000, 61 | "epochs": 50000, 62 | }) 63 | 64 | trainer.attach_input("training_corpus", training_corpus) 65 | trainer.attach_input("evaluation_corpus", evaluation_corpus) 66 | trainer.attach_input("word_embeddings", word_embeddings) 67 | trainer.attach_output("model", parser_model) 68 | 69 | # Run parser trainer. 70 | workflow.run(wf) 71 | 72 | # Shut down. 73 | workflow.shutdown() 74 | 75 | -------------------------------------------------------------------------------- /sling/nlp/parser/transition-generator.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_NLP_PARSER_TRANSITION_GENERATOR_H_ 16 | #define SLING_NLP_PARSER_TRANSITION_GENERATOR_H_ 17 | 18 | #include 19 | 20 | #include "sling/nlp/document/document.h" 21 | #include "sling/nlp/parser/parser-action.h" 22 | 23 | namespace sling { 24 | namespace nlp { 25 | 26 | // Generates transition sequences for [begin, end) token range in 'document', 27 | // calling 'callback' for every transition. 28 | void Generate(const Document &document, 29 | int begin, int end, 30 | std::function callback); 31 | 32 | // Generates transition sequences for all tokens in 'document', calling 33 | // 'callback' for every transition. 34 | void Generate(const Document &document, 35 | std::function callback); 36 | 37 | } // namespace nlp 38 | } // namespace sling 39 | 40 | #endif // SLING_NLP_PARSER_TRANSITION_GENERATOR_H_ 41 | -------------------------------------------------------------------------------- /sling/nlp/silver/corpus-split.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/nlp/document/document.h" 16 | #include "sling/task/documents.h" 17 | #include "sling/util/fingerprint.h" 18 | 19 | namespace sling { 20 | namespace nlp { 21 | 22 | using namespace task; 23 | 24 | // Split document corpus into training and evaluation data sets. The training 25 | // data is shuffled based on the contents of the document. 26 | class CorpusSplitter : public DocumentProcessor { 27 | public: 28 | void Startup(Task *task) override { 29 | // Get output chanels. 30 | train_ = task->GetSink("train"); 31 | eval_ = task->GetSink("eval"); 32 | CHECK(train_ != nullptr) << "train channel missing"; 33 | CHECK(eval_ != nullptr) << "eval channel missing"; 34 | 35 | // Get parameters. 36 | task->Fetch("split_ratio", &split_ratio_); 37 | } 38 | 39 | void Process(Slice key, const Document &document) override { 40 | uint64 fp = Fingerprint(document.text().data(), document.text().size()); 41 | if (fp % split_ratio_ == (split_ratio_ - 1)) { 42 | // Output evaluation document. 43 | eval_->Send(CreateMessage(key, document.top())); 44 | } else { 45 | // Output training document. 46 | train_->Send(CreateMessage(std::to_string(fp), document.top())); 47 | } 48 | } 49 | 50 | private: 51 | // Channels for training and evaluation documents. 52 | Channel *train_ = nullptr; 53 | Channel *eval_ = nullptr; 54 | 55 | // Corpus split ratio, i.e. a corpus split ratio of 10 means that one in ten 56 | // documents is added to the evaluation set (90% train, 10% eval). 57 | int split_ratio_ = 10; 58 | }; 59 | 60 | REGISTER_TASK_PROCESSOR("corpus-split", CorpusSplitter); 61 | 62 | } // namespace nlp 63 | } // namespace sling 64 | 65 | -------------------------------------------------------------------------------- /sling/nlp/silver/idf.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_NLP_NER_IDF_H_ 16 | #define SLING_NLP_NER_IDF_H_ 17 | 18 | #include "sling/base/port.h" 19 | #include "sling/file/repository.h" 20 | #include "sling/util/unicode.h" 21 | 22 | namespace sling { 23 | namespace nlp { 24 | 25 | // Word vocabulary table for inverse document frequency (IDF). 26 | class IDFTable { 27 | public: 28 | // Load IDF repository from file. 29 | void Load(const string &filename); 30 | 31 | // Look up word fingerprint and return IDF for word. 32 | float GetIDF(uint64 fingerprint) const; 33 | 34 | // Get text normalization flags for IDF table. 35 | Normalization normalization() const { return normalization_; } 36 | 37 | // IDF repository header information. 38 | static const int VERSION = 1; 39 | struct Header { 40 | int version; 41 | float num_docs; 42 | char normalization[16]; 43 | }; 44 | 45 | private: 46 | // Word entry. 47 | struct Word { 48 | uint64 fingerprint; 49 | float idf; 50 | } ABSL_ATTRIBUTE_PACKED; 51 | 52 | // Word index in repository. 53 | class WordIndex : public RepositoryMap { 54 | public: 55 | // Initialize word index. 56 | void Initialize(const Repository &repository) { Init(repository, "IDF"); } 57 | 58 | // Return first element in bucket. 59 | const Word *GetBucket(int bucket) const { return GetObject(bucket); } 60 | }; 61 | 62 | // Find word in word index. 63 | const Word *Find(uint64 fp) const; 64 | 65 | // Repository with name table. 66 | Repository repository_; 67 | 68 | // IDF header information. 69 | const Header *header_ = nullptr; 70 | 71 | // Word index. 72 | WordIndex index_; 73 | 74 | // IDF for out-of-vocabulary words. 75 | float oov_idf_ = 0.0; 76 | 77 | // Text normalization for fingerprints. 78 | Normalization normalization_ = NORMALIZE_DEFAULT; 79 | }; 80 | 81 | } // namespace nlp 82 | } // namespace sling 83 | 84 | #endif // SLING_NLP_NER_IDF_H_ 85 | -------------------------------------------------------------------------------- /sling/nlp/wikicat/app/appicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/sling/nlp/wikicat/app/appicon.ico -------------------------------------------------------------------------------- /sling/pyapi/pybase.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/pyapi/pybase.h" 16 | 17 | namespace sling { 18 | 19 | PyMethodTable::PyMethodTable() { 20 | // Add terminator element. 21 | table_.resize(1); 22 | table_[0].ml_name = nullptr; 23 | } 24 | 25 | void PyMethodTable::Add(const char *name, PyCFunction method, int flags) { 26 | // Set last element to new method. 27 | PyMethodDef &def = table_.back(); 28 | def.ml_name = name; 29 | def.ml_meth = method; 30 | def.ml_flags = flags; 31 | def.ml_doc = ""; 32 | 33 | // Add new terminator element. 34 | table_.resize(table_.size() + 1); 35 | table_[table_.size() - 1].ml_name = nullptr; 36 | } 37 | 38 | void PyBase::InitType(PyTypeObject *type, 39 | const char *name, 40 | size_t size, 41 | bool instantiable) { 42 | type->tp_name = name; 43 | type->tp_basicsize = size; 44 | if (instantiable) type->tp_new = PyType_GenericNew; 45 | type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE; 46 | } 47 | 48 | void PyBase::RegisterType(PyTypeObject *type) { 49 | PyType_Ready(type); 50 | Py_INCREF(type); 51 | } 52 | 53 | void PyBase::RegisterType(PyTypeObject *type, 54 | PyObject *module, 55 | const char *name) { 56 | PyType_Ready(type); 57 | Py_INCREF(type); 58 | PyModule_AddObject(module, name, reinterpret_cast(type)); 59 | } 60 | 61 | void PyBase::RegisterEnum(PyObject *module, 62 | const char *name, 63 | int value) { 64 | PyModule_AddIntConstant(module, name, value); 65 | } 66 | 67 | } // namespace sling 68 | 69 | -------------------------------------------------------------------------------- /sling/pyapi/pydate.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_PYAPI_PYDATE_H_ 16 | #define SLING_PYAPI_PYDATE_H_ 17 | 18 | #include "sling/nlp/kb/calendar.h" 19 | #include "sling/pyapi/pybase.h" 20 | #include "sling/pyapi/pystore.h" 21 | 22 | namespace sling { 23 | 24 | // Python wrapper for date. 25 | struct PyDate : public PyBase { 26 | // Initialize date wrapper. 27 | int Init(PyObject *args, PyObject *kwds); 28 | 29 | // Deallocate date wrapper. 30 | void Dealloc(); 31 | 32 | // Convert date to string (YYYY-MM-DD). 33 | PyObject *Str(); 34 | 35 | // Date in ISO 8601 format. 36 | PyObject *ISO(); 37 | 38 | // Convert date to string or integer value. 39 | PyObject *Value(); 40 | 41 | // Date object. 42 | nlp::Date date; 43 | 44 | // Registration. 45 | static PyTypeObject type; 46 | static PyMemberDef members[]; 47 | static PyMethodTable methods; 48 | static void Define(PyObject *module); 49 | }; 50 | 51 | // Python wrapper for calendar. 52 | struct PyCalendar : public PyBase { 53 | // Initialize calendar wrapper. 54 | int Init(PyObject *args, PyObject *kwds); 55 | 56 | // Deallocate record reader wrapper. 57 | void Dealloc(); 58 | 59 | // Convert date to human-readable string. 60 | PyObject *Str(PyObject *obj); 61 | 62 | // Return frames for date parts. 63 | PyObject *Day(PyObject *obj); 64 | PyObject *Month(PyObject *obj); 65 | PyObject *Year(PyObject *obj); 66 | PyObject *Decade(PyObject *obj); 67 | PyObject *Century(PyObject *obj); 68 | PyObject *Millennium(PyObject *obj); 69 | 70 | // Get date object. 71 | PyDate *GetDate(PyObject *obj); 72 | 73 | // Store for calendar frames. 74 | PyStore *pystore; 75 | 76 | // Calendar. 77 | nlp::Calendar *calendar; 78 | 79 | // Registration. 80 | static PyTypeObject type; 81 | static PyMethodTable methods; 82 | static void Define(PyObject *module); 83 | }; 84 | 85 | } // namespace sling 86 | 87 | #endif // SLING_PYAPI_PYDATE_H_ 88 | 89 | -------------------------------------------------------------------------------- /sling/pyapi/pymisc.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_PYAPI_PYMISC_H_ 16 | #define SLING_PYAPI_PYMISC_H_ 17 | 18 | #include "sling/pyapi/pybase.h" 19 | 20 | namespace sling { 21 | 22 | // Get list of registered command-line flags. 23 | PyObject *PyGetFlags(); 24 | 25 | // Set value of command-line flag. 26 | PyObject *PySetFlag(PyObject *self, PyObject *args); 27 | 28 | // Log message. 29 | PyObject *PyLogMessage(PyObject *self, PyObject *args); 30 | 31 | } // namespace sling 32 | 33 | #endif // SLING_PYAPI_PYMISC_H_ 34 | 35 | -------------------------------------------------------------------------------- /sling/stream/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | cc_library( 4 | name = "stream", 5 | hdrs = ["stream.h"], 6 | deps = [ 7 | "//sling/base", 8 | ], 9 | ) 10 | 11 | cc_library( 12 | name = "file", 13 | srcs = ["file.cc"], 14 | hdrs = ["file.h"], 15 | deps = [ 16 | ":stream", 17 | "//sling/base", 18 | "//sling/file", 19 | ], 20 | ) 21 | 22 | cc_library( 23 | name = "memory", 24 | srcs = ["memory.cc"], 25 | hdrs = ["memory.h"], 26 | deps = [ 27 | ":stream", 28 | "//sling/base", 29 | ], 30 | ) 31 | 32 | cc_library( 33 | name = "input", 34 | srcs = ["input.cc"], 35 | hdrs = ["input.h"], 36 | deps = [ 37 | ":stream", 38 | "//sling/base", 39 | "//sling/util:varint", 40 | ], 41 | ) 42 | 43 | cc_library( 44 | name = "output", 45 | srcs = ["output.cc"], 46 | hdrs = ["output.h"], 47 | deps = [ 48 | ":stream", 49 | "//sling/base", 50 | "//sling/string:text", 51 | "//sling/util:varint", 52 | ], 53 | ) 54 | 55 | cc_library( 56 | name = "file-input", 57 | srcs = ["file-input.cc"], 58 | hdrs = ["file-input.h"], 59 | deps = [ 60 | ":bzip2", 61 | ":file", 62 | ":gzip", 63 | ":input", 64 | ], 65 | ) 66 | 67 | cc_library( 68 | name = "bounded", 69 | srcs = ["bounded.cc"], 70 | hdrs = ["bounded.h"], 71 | deps = [ 72 | ":stream", 73 | ], 74 | ) 75 | 76 | cc_library( 77 | name = "bzip2", 78 | srcs = ["bzip2.cc"], 79 | hdrs = ["bzip2.h"], 80 | deps = [ 81 | ":stream", 82 | "//sling/base", 83 | "//third_party/bz2lib", 84 | ], 85 | ) 86 | 87 | cc_library( 88 | name = "gzip", 89 | srcs = ["gzip.cc"], 90 | hdrs = ["gzip.h"], 91 | deps = [ 92 | ":stream", 93 | "//sling/base", 94 | "//third_party/zlib", 95 | ], 96 | ) 97 | 98 | cc_library( 99 | name = "zipfile", 100 | srcs = ["zipfile.cc"], 101 | hdrs = ["zipfile.h"], 102 | deps = [ 103 | ":bounded", 104 | ":file", 105 | ":file-input", 106 | ":gzip", 107 | "//sling/base", 108 | "//sling/file", 109 | ], 110 | ) 111 | 112 | cc_library( 113 | name = "unix-file", 114 | srcs = ["unix-file.cc"], 115 | hdrs = ["unix-file.h"], 116 | deps = [ 117 | ":stream", 118 | "//sling/base", 119 | ], 120 | ) 121 | 122 | -------------------------------------------------------------------------------- /sling/stream/bounded.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/stream/bounded.h" 16 | 17 | namespace sling { 18 | 19 | BoundedInputStream::BoundedInputStream(InputStream *input, int64 limit) 20 | : input_(input), left_(limit) { 21 | start_ = input_->ByteCount(); 22 | } 23 | 24 | BoundedInputStream::~BoundedInputStream() { 25 | // Back up if we overshot the size of the stream. 26 | if (left_ < 0) input_->BackUp(-left_); 27 | } 28 | 29 | bool BoundedInputStream::Next(const void **data, int *size) { 30 | // Check if we have reached the limit of the stream. 31 | if (left_ <= 0) return false; 32 | 33 | // Read next chunk from the underlying stream. 34 | if (!input_->Next(data, size)) return false; 35 | 36 | // Adjust size of we overshot the limit. 37 | left_ -= *size; 38 | if (left_ < 0) *size += left_; 39 | 40 | return true; 41 | } 42 | 43 | void BoundedInputStream::BackUp(int count) { 44 | if (left_ < 0) { 45 | // Include the overshoot when backing up in the underlying stream. 46 | input_->BackUp(count - left_); 47 | left_ = count; 48 | } else { 49 | // Back up in the underlying stream. 50 | input_->BackUp(count); 51 | left_ += count; 52 | } 53 | } 54 | 55 | bool BoundedInputStream::Skip(int count) { 56 | if (count > left_) { 57 | // Skip to end. 58 | if (left_ < 0) return false; 59 | input_->Skip(left_); 60 | left_ = 0; 61 | return false; 62 | } else { 63 | // Skip within limit. 64 | if (!input_->Skip(count)) return false; 65 | left_ -= count; 66 | return true; 67 | } 68 | } 69 | 70 | int64 BoundedInputStream::ByteCount() const { 71 | if (left_ < 0) { 72 | return input_->ByteCount() + left_ - start_; 73 | } else { 74 | return input_->ByteCount() - start_; 75 | } 76 | } 77 | 78 | } // namespace sling 79 | 80 | -------------------------------------------------------------------------------- /sling/stream/bounded.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_STREAM_BOUNDED_H_ 16 | #define SLING_STREAM_BOUNDED_H_ 17 | 18 | #include "sling/base/macros.h" 19 | #include "sling/base/types.h" 20 | #include "sling/stream/stream.h" 21 | 22 | namespace sling { 23 | 24 | // A bounded input stream that limits the size of the input to a particular 25 | // size. 26 | class BoundedInputStream : public InputStream { 27 | public: 28 | BoundedInputStream(InputStream *input, int64 limit); 29 | ~BoundedInputStream(); 30 | 31 | // InputStream interface. 32 | bool Next(const void **data, int *size); 33 | void BackUp(int count); 34 | bool Skip(int count); 35 | int64 ByteCount() const; 36 | 37 | private: 38 | // Underlying input stream. 39 | InputStream *input_; 40 | 41 | // Number of bytes left to read. This can be negative if have overshoot the 42 | // limit of the stream. 43 | int64 left_; 44 | 45 | // Initial position of the underlying stream. 46 | int64 start_; 47 | 48 | DISALLOW_IMPLICIT_CONSTRUCTORS(BoundedInputStream); 49 | }; 50 | 51 | } // namespace sling 52 | 53 | #endif // SLING_STREAM_BOUNDED_H_ 54 | 55 | -------------------------------------------------------------------------------- /sling/stream/bzip2.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_STREAM_BZIP2_H_ 16 | #define SLING_STREAM_BZIP2_H_ 17 | 18 | #include "sling/base/types.h" 19 | #include "sling/stream/stream.h" 20 | #include "third_party/bz2lib/bzlib.h" 21 | 22 | namespace sling { 23 | 24 | // BZIP2 stream compression. 25 | class BZip2Compressor : public OutputStream { 26 | public: 27 | // Initialize compressor. 28 | BZip2Compressor(OutputStream *sink, 29 | int block_size = 1 << 20, 30 | int compression_level = 9); 31 | ~BZip2Compressor() override; 32 | 33 | // Implementation of OutputStream interface. 34 | bool Next(void **data, int *size) override; 35 | void BackUp(int count) override; 36 | int64 ByteCount() const override; 37 | 38 | private: 39 | // Compressor. 40 | bz_stream stream_; 41 | }; 42 | 43 | // BZIP2 stream decompression. 44 | class BZip2Decompressor : public InputStream { 45 | public: 46 | // Initialize decompressor. 47 | BZip2Decompressor(InputStream *source, 48 | int block_size = 1 << 20); 49 | ~BZip2Decompressor() override; 50 | 51 | // Implementation of InputStream interface. 52 | bool Next(const void **data, int *size) override; 53 | void BackUp(int count) override; 54 | bool Skip(int count) override; 55 | int64 ByteCount() const override; 56 | 57 | private: 58 | // Decompress next chunk. 59 | bool NextChunk(); 60 | 61 | // Source for compressed input. 62 | InputStream *source_; 63 | 64 | // Compression buffer. 65 | char *buffer_; 66 | int block_size_; 67 | 68 | // Decompressor. 69 | bz_stream stream_; 70 | 71 | // Number of bytes uncompressed. 72 | uint64 total_bytes_; 73 | 74 | // Reset decompressor on next chunk (for multi stream bzip2 files). 75 | bool reset_; 76 | 77 | // Number of bytes to back up. 78 | int backup_; 79 | }; 80 | 81 | } // namespace sling 82 | 83 | #endif // SLING_STREAM_BZIP2_H_ 84 | 85 | -------------------------------------------------------------------------------- /sling/stream/file-input.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_STREAM_FILE_INPUT_H_ 16 | #define SLING_STREAM_FILE_INPUT_H_ 17 | 18 | #include 19 | #include 20 | 21 | #include "sling/base/macros.h" 22 | #include "sling/base/types.h" 23 | #include "sling/stream/input.h" 24 | 25 | namespace sling { 26 | 27 | // Input stream that runs a pipeline of input stream. 28 | class InputPipeline : public InputStream { 29 | public: 30 | InputPipeline(); 31 | ~InputPipeline(); 32 | 33 | // Return the last steam in the pipeline. 34 | InputStream *last() const { return last_; } 35 | 36 | // Add input stream to pipeline. Takes ownership of the stream. 37 | void Add(InputStream *stream); 38 | 39 | // Implementation of InputStream interface. 40 | bool Next(const void **data, int *size) override; 41 | void BackUp(int count) override; 42 | bool Skip(int count) override; 43 | int64 ByteCount() const override; 44 | 45 | private: 46 | // Final input stream. 47 | InputStream *last_ = nullptr; 48 | 49 | // Input stream pipeline. 50 | std::vector streams_; 51 | }; 52 | 53 | // File input class that supports decompression of the input stream based on 54 | // the file extension. 55 | class FileInput : public Input { 56 | public: 57 | // Open file. 58 | explicit FileInput(const string &filename, int block_size = 1 << 20) 59 | : Input(Open(filename, block_size)) {} 60 | 61 | ~FileInput() { delete stream(); } 62 | 63 | // Open input file and add decompression for compressed input files. 64 | static InputStream *Open(const string &filename, int block_size = 1 << 20); 65 | 66 | private: 67 | DISALLOW_IMPLICIT_CONSTRUCTORS(FileInput); 68 | }; 69 | 70 | } // namespace sling 71 | 72 | #endif // SLING_STREAM_FILE_INPUT_H_ 73 | 74 | -------------------------------------------------------------------------------- /sling/stream/gzip.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_STREAM_GZIP_H_ 16 | #define SLING_STREAM_GZIP_H_ 17 | 18 | #include "sling/base/types.h" 19 | #include "sling/stream/stream.h" 20 | #include "third_party/zlib/zlib.h" 21 | 22 | namespace sling { 23 | 24 | // GZIP stream compression. 25 | class GZipCompressor : public OutputStream { 26 | public: 27 | // Initialize compressor. 28 | GZipCompressor(OutputStream *sink, 29 | int block_size = 1 << 20, 30 | int compression_level = 9); 31 | ~GZipCompressor() override; 32 | 33 | // Implementation of OutputStream interface. 34 | bool Next(void **data, int *size) override; 35 | void BackUp(int count) override; 36 | int64 ByteCount() const override; 37 | 38 | private: 39 | // Compressor. 40 | z_stream stream_; 41 | }; 42 | 43 | // GZIP stream decompression. 44 | class GZipDecompressor : public InputStream { 45 | public: 46 | // Initialize decompressor. 47 | GZipDecompressor(InputStream *source, 48 | int block_size = 1 << 20, 49 | int window_bits = 15 + 16); 50 | ~GZipDecompressor() override; 51 | 52 | // Implementation of InputStream interface. 53 | bool Next(const void **data, int *size) override; 54 | void BackUp(int count) override; 55 | bool Skip(int count) override; 56 | int64 ByteCount() const override; 57 | 58 | private: 59 | // Decompress next chunk. 60 | bool NextChunk(); 61 | 62 | // Source for compressed input. 63 | InputStream *source_; 64 | 65 | // Decompression buffer. 66 | char *buffer_; 67 | int block_size_; 68 | 69 | // Decompressor. 70 | z_stream stream_; 71 | 72 | // Number of bytes uncompressed. 73 | uint64 total_bytes_; 74 | 75 | // Reset decompressor on next chunk (for multi stream gzip files). 76 | bool reset_; 77 | 78 | // Number of bytes to back up. 79 | int backup_; 80 | }; 81 | 82 | } // namespace sling 83 | 84 | #endif // SLING_STREAM_GZIP_H_ 85 | 86 | -------------------------------------------------------------------------------- /sling/string/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | cc_library( 4 | name = "string", 5 | deps = [ 6 | ":ctype", 7 | ":numbers", 8 | ":printf", 9 | ":strcat", 10 | ":text", 11 | ], 12 | ) 13 | 14 | cc_library( 15 | name = "ctype", 16 | hdrs = ["ctype.h"], 17 | srcs = ["ctype.cc"], 18 | copts = [ 19 | "-Wno-narrowing", 20 | ] 21 | ) 22 | 23 | cc_library( 24 | name = "numbers", 25 | hdrs = ["numbers.h"], 26 | srcs = ["numbers.cc"], 27 | deps = [ 28 | ":ctype", 29 | "//sling/base", 30 | ], 31 | ) 32 | 33 | cc_library( 34 | name = "printf", 35 | hdrs = ["printf.h"], 36 | srcs = ["printf.cc"], 37 | deps = [ 38 | "//sling/base", 39 | ], 40 | ) 41 | 42 | cc_library( 43 | name = "strcat", 44 | hdrs = ["strcat.h"], 45 | srcs = ["strcat.cc"], 46 | deps = [ 47 | ":ctype", 48 | ":numbers", 49 | ":text", 50 | "//sling/base", 51 | ], 52 | ) 53 | 54 | cc_library( 55 | name = "text", 56 | hdrs = ["text.h"], 57 | srcs = ["text.cc"], 58 | deps = [ 59 | "//sling/base", 60 | "//sling/util:city", 61 | ], 62 | copts = [ 63 | "-Wno-deprecated", 64 | "-Wno-sign-compare", 65 | ], 66 | ) 67 | 68 | -------------------------------------------------------------------------------- /sling/string/printf.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Printf variants that place their output in a C++ string. 16 | // 17 | // Usage: 18 | // string result = StringPrintf("%d %s\n", 10, "hello"); 19 | // SStringPrintf(&result, "%d %s\n", 10, "hello"); 20 | // StringAppendF(&result, "%d %s\n", 20, "there"); 21 | 22 | #ifndef SLING_STRING_PRINTF_H_ 23 | #define SLING_STRING_PRINTF_H_ 24 | 25 | #include 26 | #include 27 | 28 | #include "sling/base/port.h" 29 | #include "sling/base/types.h" 30 | 31 | namespace sling { 32 | 33 | // Return a C++ string. 34 | extern string StringPrintf(const char *format, ...) ABSL_PRINTF_ATTRIBUTE(1, 2); 35 | 36 | // Store result into a supplied string and return it. 37 | extern const string &SStringPrintf(string *dst, const char *format, ...) 38 | ABSL_PRINTF_ATTRIBUTE(2, 3); 39 | 40 | // Append result to a supplied string 41 | extern void StringAppendF(string *dst, const char *format, ...) 42 | ABSL_PRINTF_ATTRIBUTE(2, 3); 43 | 44 | // Lower-level routine that takes a va_list and appends to a specified string. 45 | extern void StringAppendV(string *dst, const char *format, va_list ap); 46 | 47 | } // namespace sling 48 | 49 | #endif // SLING_STRING_PRINTF_H_ 50 | 51 | -------------------------------------------------------------------------------- /sling/task/app/appicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/sling/task/app/appicon.ico -------------------------------------------------------------------------------- /sling/task/app/dashboard.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: lcd; 3 | src: url(digital-7.mono.ttf); 4 | } 5 | 6 | .job-table { 7 | margin-top: 8px; 8 | margin-right: 8px; 9 | margin-bottom: 8px; 10 | } 11 | 12 | .channel-table { 13 | width: calc(100% - 8px); 14 | margin-top: 8px; 15 | margin-bottom: 8px; 16 | } 17 | 18 | .counter-table { 19 | margin-top: 8px; 20 | margin-bottom: 8px; 21 | } 22 | 23 | .res-table { 24 | padding: 4px; 25 | width: 160px; 26 | margin-left: 8px; 27 | margin-bottom: 8px; 28 | margin-top: 8px; 29 | border-radius: 4px; 30 | box-shadow: inset 0px 0px 24px 2px rgba(0,0,0,0.2); 31 | color: #303060; 32 | background: #BAC2B6; 33 | font-family: arial; 34 | font-weight: normal; 35 | font-size: 12pt; 36 | text-shadow: 1px 1px 4px rgba(150, 150, 150, 1); 37 | } 38 | 39 | .res-table td { 40 | vertical-align: baseline; 41 | } 42 | 43 | .lcd { 44 | font-family: lcd; 45 | font-size: 20pt; 46 | font-weight: normal; 47 | text-align: right; 48 | width: 100%; 49 | } 50 | 51 | -------------------------------------------------------------------------------- /sling/task/app/digital-7.mono.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/sling/task/app/digital-7.mono.ttf -------------------------------------------------------------------------------- /sling/task/documents.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/task/documents.h" 16 | 17 | namespace sling { 18 | namespace task { 19 | 20 | REGISTER_TASK_PROCESSOR("document-processor", DocumentProcessor); 21 | 22 | void DocumentProcessor::InitCommons(Task *task) { 23 | // Initialize document annotation pipeline. 24 | pipeline_.Init(task, commons_); 25 | 26 | // Bind document names. 27 | docnames_ = new nlp::DocumentNames(commons_); 28 | } 29 | 30 | void DocumentProcessor::Start(Task *task) { 31 | // Initialize frame processor. 32 | FrameProcessor::Start(task); 33 | 34 | // Statistics. 35 | num_documents_ = task->GetCounter("documents"); 36 | num_tokens_ = task->GetCounter("tokens"); 37 | num_spans_ = task->GetCounter("spans"); 38 | } 39 | 40 | void DocumentProcessor::Process(Slice key, const Frame &frame) { 41 | // Create document from frame. 42 | nlp::Document document(frame, docnames_); 43 | 44 | // Run preprocessing pipeline on document. 45 | if (!pipeline_.empty()) { 46 | pipeline_.Annotate(&document); 47 | document.Update(); 48 | } 49 | 50 | // Process document. 51 | Process(key, document); 52 | 53 | // Update statistics. 54 | num_documents_->Increment(); 55 | num_tokens_->Increment(document.num_tokens()); 56 | num_spans_->Increment(document.num_spans()); 57 | } 58 | 59 | void DocumentProcessor::Process(Slice key, const nlp::Document &document) { 60 | Output(key, document); 61 | } 62 | 63 | void DocumentProcessor::Output(Text key, const nlp::Document &document) { 64 | FrameProcessor::Output(key, document.top()); 65 | } 66 | 67 | void DocumentProcessor::Output(const nlp::Document &document) { 68 | FrameProcessor::Output(document.top()); 69 | } 70 | 71 | } // namespace task 72 | } // namespace sling 73 | 74 | -------------------------------------------------------------------------------- /sling/task/documents.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_TASK_DOCUMENTS_H_ 16 | #define SLING_TASK_DOCUMENTS_H_ 17 | 18 | #include "sling/nlp/document/annotator.h" 19 | #include "sling/nlp/document/document.h" 20 | #include "sling/task/frames.h" 21 | 22 | namespace sling { 23 | namespace task { 24 | 25 | // Task processor for receiving and sending documents. 26 | class DocumentProcessor : public FrameProcessor { 27 | public: 28 | ~DocumentProcessor() { if (docnames_) docnames_->Release(); } 29 | 30 | void Process(Slice key, const Frame &frame) override; 31 | 32 | // Initialize commons store with document symbols. 33 | void InitCommons(Task *task) override; 34 | 35 | // Initialize document processor. 36 | void Start(Task *task) override; 37 | 38 | // Called for each document received on input. 39 | virtual void Process(Slice key, const nlp::Document &document); 40 | 41 | // Output document to output. 42 | void Output(Text key, const nlp::Document &document); 43 | 44 | // Output document to output using document id as key. 45 | void Output(const nlp::Document &document); 46 | 47 | // Document schema. 48 | const nlp::DocumentNames *docnames() const { return docnames_; } 49 | 50 | private: 51 | // Document symbol names. 52 | const nlp::DocumentNames *docnames_ = nullptr; 53 | 54 | // Document annotator pipeline for preprocessing incoming documents. 55 | nlp::Pipeline pipeline_; 56 | 57 | // Statistics. 58 | Counter *num_documents_; 59 | Counter *num_tokens_; 60 | Counter *num_spans_; 61 | }; 62 | 63 | } // namespace task 64 | } // namespace sling 65 | 66 | #endif // SLING_TASK_DOCUMENTS_H_ 67 | 68 | -------------------------------------------------------------------------------- /sling/task/environment.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_TASK_ENVIRONMENT_H_ 16 | #define SLING_TASK_ENVIRONMENT_H_ 17 | 18 | #include 19 | #include 20 | 21 | #include "sling/base/types.h" 22 | 23 | namespace sling { 24 | namespace task { 25 | 26 | class Channel; 27 | class Task; 28 | 29 | // Lock-free counter for statistics. 30 | class Counter { 31 | public: 32 | // Increment counter. 33 | void Increment() { ++value_; } 34 | void Increment(int64 delta) { value_ += delta; } 35 | 36 | // Reset counter. 37 | void Reset() { value_ = 0; } 38 | 39 | // Set counter value. 40 | void Set(int64 value) { value_ = value; } 41 | 42 | // Return counter value. 43 | int64 value() const { return value_; } 44 | 45 | private: 46 | std::atomic value_{0}; 47 | }; 48 | 49 | // Container environment interface. 50 | class Environment { 51 | public: 52 | virtual ~Environment() = default; 53 | 54 | // Return statistics counter. 55 | virtual Counter *GetCounter(const string &name) = 0; 56 | 57 | // Notify that channel has completed. 58 | virtual void ChannelCompleted(Channel *channel) = 0; 59 | 60 | // Notify that task has completed. 61 | virtual void TaskCompleted(Task *task) = 0; 62 | }; 63 | 64 | } // namespace task 65 | } // namespace sling 66 | 67 | #endif // SLING_TASK_ENVIRONMENT_H_ 68 | 69 | -------------------------------------------------------------------------------- /sling/task/identity.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | 17 | #include "sling/base/logging.h" 18 | #include "sling/task/task.h" 19 | 20 | namespace sling { 21 | namespace task { 22 | 23 | // Mapper that relays all input messages to the output channel. 24 | class IdentityMapper : public Processor { 25 | public: 26 | void Start(Task *task) override { 27 | output_ = task->GetSink("output"); 28 | } 29 | 30 | void Receive(Channel *channel, Message *message) override { 31 | if (output_ != nullptr) { 32 | output_->Send(message); 33 | } else { 34 | delete message; 35 | } 36 | } 37 | 38 | private: 39 | Channel *output_ = nullptr; 40 | }; 41 | 42 | REGISTER_TASK_PROCESSOR("identity-mapper", IdentityMapper); 43 | 44 | // Reducer that relays all input messages to the corresponding output channel. 45 | class IdentityReducer : public Processor { 46 | public: 47 | void Start(Task *task) override { 48 | outputs_ = task->GetSinks("output"); 49 | } 50 | 51 | void Receive(Channel *channel, Message *message) override { 52 | int shard = channel->consumer().shard().part(); 53 | CHECK_LT(shard, outputs_.size()); 54 | outputs_[shard]->Send(message); 55 | } 56 | 57 | private: 58 | std::vector outputs_; 59 | }; 60 | 61 | REGISTER_TASK_PROCESSOR("identity-reducer", IdentityReducer); 62 | 63 | } // namespace task 64 | } // namespace sling 65 | 66 | -------------------------------------------------------------------------------- /sling/task/mapper.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/task/mapper.h" 16 | 17 | #include "sling/base/logging.h" 18 | 19 | namespace sling { 20 | namespace task { 21 | 22 | void Mapper::Start(Task *task) { 23 | // Get output channel. 24 | output_ = task->GetSink("output"); 25 | if (output_ == nullptr) { 26 | LOG(ERROR) << "No output channel"; 27 | return; 28 | } 29 | } 30 | 31 | void Mapper::Receive(Channel *channel, Message *message) { 32 | // Call Map() method on each input message. 33 | MapInput input(message->key(), message->value()); 34 | Map(input); 35 | 36 | // Delete input message. 37 | delete message; 38 | } 39 | 40 | void Mapper::Done(Task *task) { 41 | // Close output channel. 42 | if (output_ != nullptr) output_->Close(); 43 | } 44 | 45 | void Mapper::Output(Slice key, Slice value) { 46 | // Ignore if there is no output. 47 | if (output_ == nullptr) return; 48 | 49 | // Create new message and send it on the output channel. 50 | Message *message = new Message(key, value); 51 | output_->Send(message); 52 | } 53 | 54 | } // namespace task 55 | } // namespace sling 56 | 57 | -------------------------------------------------------------------------------- /sling/task/mapper.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_TASK_REDUCER_H_ 16 | #define SLING_TASK_REDUCER_H_ 17 | 18 | #include 19 | 20 | #include "sling/base/slice.h" 21 | #include "sling/task/message.h" 22 | #include "sling/task/task.h" 23 | 24 | namespace sling { 25 | namespace task { 26 | 27 | // Input to mapper with a key and a value. 28 | class MapInput { 29 | public: 30 | MapInput(Slice key, Slice value) 31 | : key_(key), value_(value) {} 32 | 33 | // Key for message. 34 | Slice key() const { return key_; } 35 | 36 | // Value for message. 37 | Slice value() const { return value_; } 38 | 39 | private: 40 | Slice key_; 41 | Slice value_; 42 | }; 43 | 44 | // A mapper processes all the input message in the Map() method and can output 45 | // new key/value pairs to the output. 46 | class Mapper : public Processor { 47 | public: 48 | void Start(Task *task) override; 49 | void Receive(Channel *channel, Message *message) override; 50 | void Done(Task *task) override; 51 | 52 | // The Map() method is called for each message in the input and can call the 53 | // Output() method to produce key/value pairs. 54 | virtual void Map(const MapInput &input) = 0; 55 | 56 | // Output key/value pair to output. 57 | void Output(Slice key, Slice value); 58 | 59 | // Return output channel. 60 | Channel *output() const { return output_; } 61 | 62 | private: 63 | // Output channel. 64 | Channel *output_ = nullptr; 65 | }; 66 | 67 | } // namespace task 68 | } // namespace sling 69 | 70 | #endif // SLING_TASK_REDUCER_H_ 71 | 72 | -------------------------------------------------------------------------------- /sling/task/message-printer.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | 17 | #include "sling/base/logging.h" 18 | #include "sling/task/task.h" 19 | 20 | namespace sling { 21 | namespace task { 22 | 23 | // Print incoming messages. 24 | class MessagePrinter : public Processor { 25 | public: 26 | void Receive(Channel *channel, Message *message) override { 27 | LOG(INFO) << "Message on channel " << channel->id() 28 | << " from " << channel->producer().task()->ToString() 29 | << " key: " << message->key() 30 | << " value: " << message->value(); 31 | delete message; 32 | } 33 | }; 34 | 35 | REGISTER_TASK_PROCESSOR("printer", MessagePrinter); 36 | 37 | } // namespace task 38 | } // namespace sling 39 | 40 | -------------------------------------------------------------------------------- /sling/task/message.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/task/message.h" 16 | 17 | #include 18 | 19 | namespace sling { 20 | namespace task { 21 | 22 | Buffer::Buffer(Slice source) { 23 | if (source.empty()) { 24 | data_ = nullptr; 25 | size_ = 0; 26 | } else { 27 | size_ = source.size(); 28 | data_ = new char[size_]; 29 | memcpy(data_, source.data(), size_); 30 | } 31 | } 32 | 33 | } // namespace task 34 | } // namespace sling 35 | 36 | -------------------------------------------------------------------------------- /sling/task/null-sink.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/task/task.h" 16 | 17 | namespace sling { 18 | namespace task { 19 | 20 | // Silently discard all incoming messages. 21 | class NullSink : public Processor { 22 | public: 23 | void Receive(Channel *channel, Message *message) override { 24 | delete message; 25 | } 26 | }; 27 | 28 | REGISTER_TASK_PROCESSOR("null", NullSink); 29 | 30 | } // namespace task 31 | } // namespace sling 32 | 33 | -------------------------------------------------------------------------------- /sling/task/pipe-reader.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | #include 17 | 18 | #include "sling/base/logging.h" 19 | #include "sling/base/types.h" 20 | #include "sling/stream/input.h" 21 | #include "sling/stream/unix-file.h" 22 | #include "sling/task/process.h" 23 | #include "sling/task/task.h" 24 | 25 | namespace sling { 26 | namespace task { 27 | 28 | // Run command and output lines to channel. 29 | class PipeReader : public Process { 30 | public: 31 | // Process input file. 32 | void Run(Task *task) override { 33 | // Get command. 34 | string command = task->Get("command", ""); 35 | 36 | // Get output channel. 37 | Channel *output = task->GetSink("output"); 38 | if (output == nullptr) { 39 | LOG(ERROR) << "No output channel"; 40 | return; 41 | } 42 | 43 | // Run command. 44 | int buffer_size = task->Get("buffer_size", 1 << 16); 45 | FILE *pipe = popen(command.c_str(), "r"); 46 | if (pipe == nullptr) { 47 | LOG(ERROR) << "Error running command: " << command; 48 | return; 49 | } 50 | StdFileInputStream stream(pipe, false, buffer_size); 51 | Input input(&stream); 52 | 53 | // Read lines from output of program and output to output channel. 54 | string line; 55 | while (input.ReadLine(&line)) { 56 | // Send message with line to output channel. 57 | output->Send(new Message(Slice(), Slice(line))); 58 | } 59 | 60 | // Close pipe and output channel. 61 | int status = pclose(pipe); 62 | CHECK(WIFEXITED(status)) << status; 63 | CHECK_EQ(WEXITSTATUS(status), 0); 64 | output->Close(); 65 | } 66 | }; 67 | 68 | REGISTER_TASK_PROCESSOR("pipe-reader", PipeReader); 69 | 70 | } // namespace task 71 | } // namespace sling 72 | 73 | -------------------------------------------------------------------------------- /sling/task/record-file-writer.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/base/logging.h" 16 | #include "sling/file/recordio.h" 17 | #include "sling/task/task.h" 18 | #include "sling/util/mutex.h" 19 | 20 | namespace sling { 21 | namespace task { 22 | 23 | // Write incoming messages to record file. 24 | class RecordFileWriter : public Processor { 25 | public: 26 | ~RecordFileWriter() override { delete writer_; } 27 | 28 | void Init(Task *task) override { 29 | // Get output file. 30 | Binding *output = task->GetOutput("output"); 31 | if (output == nullptr) { 32 | LOG(ERROR) << "Output missing"; 33 | return; 34 | } 35 | 36 | // Open record file writer. 37 | RecordFileOptions options; 38 | if (task->Get("indexed", false)) options.indexed = true; 39 | writer_ = new RecordWriter(output->resource()->name(), options); 40 | } 41 | 42 | void Receive(Channel *channel, Message *message) override { 43 | MutexLock lock(&mu_); 44 | 45 | // Write message to record file. 46 | CHECK(writer_->Write(message->key(), message->value())); 47 | delete message; 48 | } 49 | 50 | void Done(Task *task) override { 51 | MutexLock lock(&mu_); 52 | 53 | // Close writer. 54 | if (writer_ != nullptr) { 55 | CHECK(writer_->Close()); 56 | delete writer_; 57 | writer_ = nullptr; 58 | } 59 | } 60 | 61 | private: 62 | // Record writer for writing to output. 63 | RecordWriter *writer_ = nullptr; 64 | 65 | // Mutex for record writer. 66 | Mutex mu_; 67 | }; 68 | 69 | REGISTER_TASK_PROCESSOR("record-file-writer", RecordFileWriter); 70 | 71 | } // namespace task 72 | } // namespace sling 73 | 74 | -------------------------------------------------------------------------------- /sling/task/reducer.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/task/reducer.h" 16 | 17 | namespace sling { 18 | namespace task { 19 | 20 | Reducer::~Reducer() { 21 | for (auto *s : shards_) delete s; 22 | } 23 | 24 | void Reducer::Start(Task *task) { 25 | int num_shards = task->GetSources("input").size(); 26 | shards_.reserve(num_shards); 27 | for (int i = 0; i < num_shards; ++i) { 28 | shards_.push_back(new Shard()); 29 | } 30 | outputs_ = task->GetSinks("output"); 31 | } 32 | 33 | void Reducer::Receive(Channel *channel, Message *message) { 34 | int shard = channel->consumer().shard().part(); 35 | DCHECK_GE(shard, 0); 36 | DCHECK_LT(shard, shards_.size()); 37 | Shard *s = shards_[shard]; 38 | 39 | MutexLock lock(&s->mu); 40 | if (s->messages.empty()) { 41 | s->key = message->key(); 42 | } else if (message->key() != s->key) { 43 | ReduceShard(shard); 44 | s->key = message->key(); 45 | } 46 | s->messages.push_back(message); 47 | } 48 | 49 | void Reducer::ReduceShard(int shard) { 50 | Shard *s = shards_[shard]; 51 | if (s->messages.empty()) return; 52 | 53 | ReduceInput input(shard, s->key, s->messages); 54 | Reduce(input); 55 | s->clear(); 56 | } 57 | 58 | void Reducer::Done(Task *task) { 59 | for (int shard = 0; shard < shards_.size(); ++shard) { 60 | ReduceShard(shard); 61 | delete shards_[shard]; 62 | } 63 | shards_.clear(); 64 | } 65 | 66 | void Reducer::Output(int shard, Message *message) { 67 | DCHECK_GE(shard, 0); 68 | DCHECK_LT(shard, outputs_.size()); 69 | outputs_[shard % outputs_.size()]->Send(message); 70 | } 71 | 72 | } // namespace task 73 | } // namespace sling 74 | 75 | -------------------------------------------------------------------------------- /sling/task/rekey.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | 17 | #include "sling/base/types.h" 18 | #include "sling/frame/store.h" 19 | #include "sling/task/task.h" 20 | #include "sling/task/frames.h" 21 | 22 | namespace sling { 23 | namespace task { 24 | 25 | // Output messages with new key from frame role. 26 | class RekeyTask : public Processor { 27 | public: 28 | void Start(Task *task) override { 29 | // Get output channel. 30 | output_ = task->GetSink("output"); 31 | CHECK(output_ != nullptr) << "Output channel missing"; 32 | 33 | // Initialize commons. 34 | role_ = commons_.Lookup(task->Get("key", "id")); 35 | commons_.Freeze(); 36 | 37 | // Statistics. 38 | num_not_rekeyed_ = task->GetCounter("records_not_rekeyed"); 39 | } 40 | 41 | void Receive(Channel *channel, Message *message) override { 42 | // Decode frame. 43 | Store store(&commons_); 44 | Frame f = DecodeMessage(&store, message); 45 | CHECK(f.valid()); 46 | 47 | // Get key from role. 48 | Handle key = f.GetHandle(role_); 49 | if (!key.IsNil()) { 50 | // Update key in message. 51 | string keystr = store.DebugString(key); 52 | message->set_key(keystr); 53 | } else { 54 | num_not_rekeyed_->Increment(); 55 | } 56 | 57 | // Output message on output channel. 58 | output_->Send(message); 59 | } 60 | 61 | private: 62 | // Output channel. 63 | Channel *output_ = nullptr; 64 | 65 | // Commons store. 66 | Store commons_; 67 | 68 | // Role for re-keying. 69 | Handle role_; 70 | 71 | // Statistics. 72 | Counter *num_not_rekeyed_ = nullptr; 73 | }; 74 | 75 | REGISTER_TASK_PROCESSOR("rekey", RekeyTask); 76 | 77 | } // namespace task 78 | } // namespace sling 79 | 80 | -------------------------------------------------------------------------------- /sling/task/sharder.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | 17 | #include "sling/base/types.h" 18 | #include "sling/task/task.h" 19 | #include "sling/util/fingerprint.h" 20 | 21 | namespace sling { 22 | namespace task { 23 | 24 | // Shard input messages according to key fingerprint. 25 | class SharderTask : public Processor { 26 | public: 27 | void Start(Task *task) override { 28 | // Get output shard channels. 29 | shards_ = task->GetSinks("output"); 30 | } 31 | 32 | void Receive(Channel *channel, Message *message) override { 33 | // Compute key fingerprint. 34 | uint64 fp = Fingerprint(message->key().data(), message->key().size()); 35 | int shard = fp % shards_.size(); 36 | 37 | // Output message on output shard channel. 38 | shards_[shard]->Send(message); 39 | } 40 | 41 | private: 42 | // Output shard channels. 43 | std::vector shards_; 44 | }; 45 | 46 | REGISTER_TASK_PROCESSOR("sharder", SharderTask); 47 | 48 | } // namespace task 49 | } // namespace sling 50 | 51 | -------------------------------------------------------------------------------- /sling/task/text-file-reader.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | 17 | #include "sling/base/logging.h" 18 | #include "sling/base/types.h" 19 | #include "sling/stream/file-input.h" 20 | #include "sling/task/process.h" 21 | #include "sling/task/task.h" 22 | 23 | namespace sling { 24 | namespace task { 25 | 26 | // Read text file and output lines to channel. 27 | class TextFileReader : public Process { 28 | public: 29 | // Process input file. 30 | void Run(Task *task) override { 31 | // Get input file. 32 | Binding *input = task->GetInput("input"); 33 | if (input == nullptr) { 34 | LOG(ERROR) << "No input resource"; 35 | return; 36 | } 37 | 38 | // Get output channel. 39 | Channel *output = task->GetSink("output"); 40 | if (output == nullptr) { 41 | LOG(ERROR) << "No output channel"; 42 | return; 43 | } 44 | 45 | // Open input file. 46 | int buffer_size = task->Get("buffer_size", 1 << 16); 47 | FileInput file(input->resource()->name(), buffer_size); 48 | 49 | // Statistics counters. 50 | Counter *lines_read = task->GetCounter("text_lines_read"); 51 | Counter *bytes_read = task->GetCounter("text_bytes_read"); 52 | 53 | // Read lines from file and output to output channel. 54 | int64 max_lines = task->Get("max_lines", 0); 55 | int64 num_lines = 0; 56 | string line; 57 | while (file.ReadLine(&line)) { 58 | // Update stats. 59 | lines_read->Increment(); 60 | bytes_read->Increment(line.size()); 61 | 62 | // Send message with line to output channel. 63 | output->Send(new Message(Slice(), Slice(line))); 64 | 65 | // Stop when max lines reached. 66 | if (max_lines > 0 && ++num_lines == max_lines) break; 67 | } 68 | 69 | // Close output channel. 70 | output->Close(); 71 | } 72 | }; 73 | 74 | REGISTER_TASK_PROCESSOR("text-file-reader", TextFileReader); 75 | 76 | } // namespace task 77 | } // namespace sling 78 | 79 | -------------------------------------------------------------------------------- /sling/task/workers.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/task/task.h" 16 | #include "sling/util/threadpool.h" 17 | 18 | namespace sling { 19 | namespace task { 20 | 21 | // Create a pool of worker threads and distribute the incoming messages to 22 | // the output channel using the worker threads. This adds parallelism to the 23 | // processing of the message stream. 24 | class Workers : public Processor { 25 | public: 26 | ~Workers() override { delete pool_; } 27 | 28 | void Start(Task *task) override { 29 | // Get output port. 30 | output_ = task->GetSink("output"); 31 | 32 | // Get worker pool parameters. 33 | int num_workers = task->Get("worker_threads", 5); 34 | int queue_size = task->Get("queue_size", num_workers * 2); 35 | 36 | // Start worker pool. 37 | pool_ = new ThreadPool(num_workers, queue_size); 38 | pool_->StartWorkers(); 39 | } 40 | 41 | void Receive(Channel *channel, Message *message) override { 42 | if (output_ == nullptr) { 43 | // No receiver. 44 | delete message; 45 | } else { 46 | // Send message to output in one of the worker threads. 47 | pool_->Schedule([this, message]() { 48 | output_->Send(message); 49 | }); 50 | } 51 | } 52 | 53 | void Done(Task *task) override { 54 | // Stop all worker threads. 55 | delete pool_; 56 | pool_ = nullptr; 57 | } 58 | 59 | private: 60 | // Thread pool for dispatching messages. 61 | ThreadPool *pool_ = nullptr; 62 | 63 | // Output channel. 64 | Channel *output_; 65 | }; 66 | 67 | REGISTER_TASK_PROCESSOR("workers", Workers); 68 | 69 | } // namespace task 70 | } // namespace sling 71 | 72 | -------------------------------------------------------------------------------- /sling/util/asset.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_UTIL_ASSET_H_ 16 | #define SLING_UTIL_ASSET_H_ 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | namespace sling { 24 | 25 | // Shared asset. 26 | class Asset { 27 | public: 28 | virtual ~Asset() = default; 29 | }; 30 | 31 | // Return unique identifier for type. 32 | typedef size_t TypeID; 33 | template inline TypeID TypeId() { 34 | static char signature; 35 | return reinterpret_cast(&signature); 36 | } 37 | 38 | // Asset manager that can hold one shared instance per type id and asset name 39 | // combination. 40 | class AssetManager { 41 | public: 42 | ~AssetManager() { 43 | for (auto &it : assets_) delete it.second; 44 | } 45 | 46 | // Delete all assets. 47 | void DisposeAssets() { 48 | for (auto &it : assets_) delete it.second; 49 | assets_.clear(); 50 | } 51 | 52 | // Return asset for type and name, initializing a new instance the first 53 | // time the type and name pair is acquired. 54 | template const T *Acquire( 55 | const std::string &name, 56 | std::function init) { 57 | Key key(TypeId(), name); 58 | Asset *&a = assets_[key]; 59 | if (a == nullptr) a = init(); 60 | return reinterpret_cast(a); 61 | } 62 | 63 | private: 64 | // An asset key consists of a type id and an asset name. 65 | typedef std::pair Key; 66 | 67 | struct KeyHash { 68 | size_t operator()(const Key &key) const { 69 | size_t h1 = std::hash()(key.first); 70 | size_t h2 = std::hash()(key.second); 71 | return h1 ^ h2; 72 | } 73 | }; 74 | 75 | // Mapping from type id and asset name to asset. 76 | std::unordered_map assets_; 77 | }; 78 | 79 | } // namespace sling 80 | 81 | #endif // SLING_UTIL_ASSET_H_ 82 | 83 | -------------------------------------------------------------------------------- /sling/util/fingerprint.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2010-2014 Google 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | // This code comes from: 15 | // https://code.google.com/p/or-tools/source/browse/trunk/src/base/fingerprint2011.h 16 | // and was adapted to the needs of this project. 17 | 18 | #include "sling/util/fingerprint.h" 19 | 20 | #include "sling/base/types.h" 21 | 22 | namespace sling { 23 | 24 | uint64 FingerprintCat(uint64 fp1, uint64 fp2) { 25 | // Two big prime numbers. 26 | const uint64 mul1 = 0xC6A4A7935BD1E995u; 27 | const uint64 mul2 = 0x228876A7198B743u; 28 | 29 | const uint64 a = fp1 * mul1 + fp2 * mul2; 30 | 31 | // Note: The following line also makes sure we never return 0 or 1, because we 32 | // will only add something to 'a' if there are any MSBs (the remaining bits 33 | // after the shift) being 0, in which case wrapping around would not happen. 34 | return a + (~a >> 47); 35 | } 36 | 37 | // This should be better (collision-wise) than the default hash, 38 | // without being much slower. It never returns 0 or 1. 39 | uint64 Fingerprint(const char *bytes, size_t len) { 40 | // Some big prime number. 41 | uint64 fp = 0xA5B85C5E198ED849u; 42 | const char *end = bytes + len; 43 | while (bytes + sizeof(uint64) <= end) { 44 | fp = FingerprintCat(fp, *(reinterpret_cast(bytes))); 45 | bytes += sizeof(uint64); 46 | } 47 | uint64 residual = 0; 48 | while (bytes < end) { 49 | residual = residual << 8 | *reinterpret_cast(bytes); 50 | bytes++; 51 | } 52 | 53 | return FingerprintCat(fp, residual); 54 | } 55 | 56 | uint32 Fingerprint32(const char *bytes, size_t len) { 57 | uint64 fp = Fingerprint(bytes, len); 58 | return fp ^(fp >> 32); 59 | } 60 | 61 | } // namespace sling 62 | 63 | -------------------------------------------------------------------------------- /sling/util/fingerprint.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010-2014 Google 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | #include "sling/base/types.h" 15 | 16 | #ifndef SLING_UTIL_FINGERPRINT_H_ 17 | #define SLING_UTIL_FINGERPRINT_H_ 18 | 19 | namespace sling { 20 | 21 | // Concatenate two fingerprints. 22 | uint64 FingerprintCat(uint64 fp1, uint64 fp2); 23 | 24 | // Compute 64-bit fingerprint for data. This should be better (collision-wise) 25 | // than the default hash, without being much slower. It never returns 26 | // 0 or 1. 27 | uint64 Fingerprint(const char *bytes, size_t len); 28 | 29 | // Compute 32-bit fingerprint by folding 64-bit fingerprint. 30 | uint32 Fingerprint32(const char *bytes, size_t len); 31 | 32 | } // namespace sling 33 | 34 | #endif // SLING_UTIL_FINGERPRINT_H_ 35 | 36 | -------------------------------------------------------------------------------- /sling/util/mutex.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_UTIL_MUTEX_H_ 16 | #define SLING_UTIL_MUTEX_H_ 17 | 18 | #include 19 | 20 | namespace sling { 21 | 22 | // Basic mutex wrapper around a std::mutex. 23 | class Mutex : public std::mutex { 24 | public: 25 | // Wait for lock and acquire it. 26 | void Lock() { lock(); } 27 | 28 | // Release mutex. 29 | void Unlock() { unlock(); } 30 | 31 | // Try to acquire mutex. 32 | bool TryLock() { return try_lock(); } 33 | }; 34 | 35 | // Lock guard. 36 | class MutexLock { 37 | public: 38 | // Constructor that acquires mutex. 39 | explicit MutexLock(Mutex *lock) : lock_(lock) { lock_->Lock(); } 40 | 41 | // Destructor that releases mutex. 42 | ~MutexLock() { lock_->Unlock(); } 43 | 44 | private: 45 | // Lock for guard. 46 | Mutex *lock_; 47 | }; 48 | 49 | } // namespace sling 50 | 51 | #endif // SLING_UTIL_MUTEX_H_ 52 | 53 | -------------------------------------------------------------------------------- /sling/util/random.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_UTIL_RANDOM_H_ 16 | #define SLING_UTIL_RANDOM_H_ 17 | 18 | #include 19 | 20 | namespace sling { 21 | 22 | // Random number generator. 23 | class Random { 24 | public: 25 | // Initialize random number generator. 26 | Random() : dist_(0.0, 1.0) {} 27 | 28 | // Set seed for random number generator. 29 | void seed(int seed) { prng_.seed(seed); } 30 | 31 | // Return random number between 0.0 (inclusive) and 1.0 (exclusive). 32 | float UniformProb() { 33 | return dist_(prng_); 34 | } 35 | 36 | // Return uniformly distributed random number r=p*scale+bias, 0<=p<1. 37 | float UniformFloat(float scale, float bias) { 38 | return dist_(prng_) * scale + bias; 39 | } 40 | 41 | // Return uniformly distributed random number between 0 and n (exclusive). 42 | int UniformInt(int n) { 43 | return prng_() % n; 44 | } 45 | 46 | private: 47 | // Mersenne Twister pseudo-random generator of 64-bit numbers. 48 | std::mt19937_64 prng_; 49 | 50 | // Uniform distribution. 51 | std::uniform_real_distribution dist_; 52 | }; 53 | 54 | } // namespace sling 55 | 56 | #endif // SLING_UTIL_RANDOM_H_ 57 | 58 | -------------------------------------------------------------------------------- /sling/util/snappy.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_UTIL_SNAPPY_ 16 | #define SLING_UTIL_SNAPPY_ 17 | 18 | #include "third_party/snappy/snappy.h" 19 | #include "third_party/snappy/snappy-sinksource.h" 20 | 21 | #endif // SLING_UTIL_SNAPPY_ 22 | 23 | -------------------------------------------------------------------------------- /sling/util/sortmap.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_UTIL_SORTMAP_H_ 16 | #define SLING_UTIL_SORTMAP_H_ 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | namespace sling { 23 | 24 | // A hash map which can be sorted by value. This implementation is space 25 | // efficient since the sorted array just keeps pointers to the internal 26 | // nodes in the hash map. 27 | template> struct SortableMap { 28 | public: 29 | typedef std::unordered_map Map; 30 | typedef typename Map::value_type Node; 31 | typedef std::vector Array; 32 | 33 | // Look up value in hash map. 34 | V &operator[](const K &key) { return map[key]; } 35 | 36 | // Sort hash map. 37 | void sort() { 38 | array.clear(); 39 | array.reserve(map.size()); 40 | for (Node &node : map) array.emplace_back(&node); 41 | std::sort(array.begin(), array.end(), [](const Node *n1, const Node *n2) { 42 | return n1->second < n2->second; 43 | }); 44 | } 45 | 46 | Map map; 47 | Array array; 48 | }; 49 | 50 | } // namespace sling 51 | 52 | #endif // SLING_UTIL_SORTMAP_H_ 53 | -------------------------------------------------------------------------------- /sling/util/thread.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/util/thread.h" 16 | 17 | #include "sling/base/logging.h" 18 | 19 | namespace sling { 20 | 21 | void *Thread::ThreadMain(void *arg) { 22 | Thread *thread = static_cast(arg); 23 | thread->Run(); 24 | return nullptr; 25 | } 26 | 27 | Thread::Thread() : running_(false) {} 28 | Thread::~Thread() {} 29 | 30 | void Thread::Start() { 31 | CHECK(!running_); 32 | pthread_create(&thread_, nullptr, &ThreadMain, this); 33 | running_ = true; 34 | 35 | // Detach the thread if it is not joinable. 36 | if (!joinable_) { 37 | pthread_detach(thread_); 38 | } 39 | } 40 | 41 | void Thread::Join() { 42 | if (!running_) return; 43 | CHECK(joinable_); 44 | 45 | void *unused; 46 | pthread_join(thread_, &unused); 47 | running_ = false; 48 | } 49 | 50 | void Thread::SetJoinable(bool joinable) { 51 | CHECK(!running_) << "Can't SetJoinable() on a running thread"; 52 | joinable_ = true; 53 | } 54 | 55 | bool Thread::IsSelf() const { 56 | return pthread_equal(thread_, pthread_self()); 57 | } 58 | 59 | void ClosureThread::Run() { 60 | // Run closure. 61 | closure_(); 62 | } 63 | 64 | void WorkerPool::Start(int num_workers, const Worker &worker) { 65 | // Create worker threads. 66 | int first = workers_.size(); 67 | for (int i = 0; i < num_workers; ++i) { 68 | workers_.emplace_back([worker, i]() { worker(i); }); 69 | } 70 | 71 | // Start worker threads. 72 | for (int i = first; i < workers_.size(); ++i) { 73 | workers_[i].SetJoinable(true); 74 | workers_[i].Start(); 75 | } 76 | } 77 | 78 | void WorkerPool::Join() { 79 | for (auto &t : workers_) t.Join(); 80 | } 81 | 82 | } // namespace sling 83 | 84 | -------------------------------------------------------------------------------- /sling/util/threadpool.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "sling/base/logging.h" 16 | #include "sling/util/threadpool.h" 17 | 18 | namespace sling { 19 | 20 | ThreadPool::ThreadPool(int num_workers, int queue_size) 21 | : num_workers_(num_workers), queue_size_(queue_size) {} 22 | 23 | ThreadPool::~ThreadPool() { 24 | // Wait until all tasks have been completed. 25 | Shutdown(); 26 | 27 | // Wait until all workers have terminated. 28 | for (auto &t : workers_) t.Join(); 29 | } 30 | 31 | void ThreadPool::StartWorkers() { 32 | // Create worker threads. 33 | CHECK(workers_.empty()); 34 | for (int i = 0; i < num_workers_; ++i) { 35 | workers_.emplace_back([this]() { 36 | // Keep processing tasks until done. 37 | Task task; 38 | while (FetchTask(&task)) task(); 39 | }); 40 | } 41 | 42 | // Start worker threads. 43 | for (auto &t : workers_) { 44 | t.SetJoinable(true); 45 | t.Start(); 46 | } 47 | } 48 | 49 | void ThreadPool::Schedule(Task &&task) { 50 | std::unique_lock lock(mu_); 51 | while (tasks_.size() >= queue_size_) { 52 | nonfull_.wait(lock); 53 | } 54 | tasks_.push(std::move(task)); 55 | nonempty_.notify_one(); 56 | } 57 | 58 | bool ThreadPool::FetchTask(Task *task) { 59 | std::unique_lock lock(mu_); 60 | while (tasks_.empty()) { 61 | if (done_) return false; 62 | nonempty_.wait(lock); 63 | } 64 | *task = tasks_.front(); 65 | tasks_.pop(); 66 | nonfull_.notify_one(); 67 | return true; 68 | } 69 | 70 | void ThreadPool::Shutdown() { 71 | // Notify all threads that we are done. 72 | std::lock_guard lock(mu_); 73 | done_ = true; 74 | nonempty_.notify_all(); 75 | } 76 | 77 | } // namespace sling 78 | 79 | -------------------------------------------------------------------------------- /sling/util/threadpool.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_UTIL_THREADPOOL_H_ 16 | #define SLING_UTIL_THREADPOOL_H_ 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "sling/util/thread.h" 25 | 26 | namespace sling { 27 | 28 | // Thread pool for executing tasks using a pool of worker threads. 29 | class ThreadPool { 30 | public: 31 | // Task that can be scheduled for execution. 32 | typedef std::function Task; 33 | 34 | // Initialize thread pool. 35 | ThreadPool(int num_workers, int queue_size); 36 | 37 | // Wait for all workers to complete. 38 | ~ThreadPool(); 39 | 40 | // Start worker threads. 41 | void StartWorkers(); 42 | 43 | // Schedule task to be executed by worker. 44 | void Schedule(Task &&task); 45 | 46 | private: 47 | // Fetch next task. Returns false when all tasks have been completed. 48 | bool FetchTask(Task *task); 49 | 50 | // Shut down workers. This waits until all tasks have been completed. 51 | void Shutdown(); 52 | 53 | // Worker threads. 54 | int num_workers_; 55 | std::vector workers_; 56 | 57 | // Task queue. 58 | int queue_size_; 59 | std::queue tasks_; 60 | 61 | // Are we done with adding new tasks. 62 | bool done_ = false; 63 | 64 | // Mutex for serializing access to task queue. 65 | std::mutex mu_; 66 | 67 | // Signal to notify about new tasks in queue. 68 | std::condition_variable nonempty_; 69 | 70 | // Signal to notify about available space in queue. 71 | std::condition_variable nonfull_; 72 | }; 73 | 74 | } // namespace sling 75 | 76 | #endif // SLING_UTIL_THREADPOOL_H_ 77 | 78 | -------------------------------------------------------------------------------- /sling/web/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | cc_library( 4 | name = "xml-parser", 5 | srcs = ["xml-parser.cc"], 6 | hdrs = ["xml-parser.h"], 7 | deps = [ 8 | ":entity-ref", 9 | "//sling/base", 10 | "//sling/stream:input", 11 | "//sling/string:ctype", 12 | "//sling/util:unicode", 13 | ], 14 | ) 15 | 16 | cc_library( 17 | name = "entity-ref", 18 | srcs = ["entity-ref.cc"], 19 | hdrs = ["entity-ref.h"], 20 | deps = [ 21 | "//sling/base", 22 | ], 23 | ) 24 | 25 | -------------------------------------------------------------------------------- /sling/web/entity-ref.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SLING_WEB_ENTITY_REF_H_ 16 | #define SLING_WEB_ENTITY_REF_H_ 17 | 18 | #include 19 | 20 | #include "sling/base/types.h" 21 | 22 | namespace sling { 23 | 24 | // Parse entity reference. Return -1 on errors. 25 | int ParseEntityRef(const char *str, int len, int *consumed); 26 | int ParseEntityRef(const string &str); 27 | 28 | } // namespace sling 29 | 30 | #endif // SLING_WEB_ENTITY_REF_H_ 31 | 32 | -------------------------------------------------------------------------------- /third_party/bz2lib/BUILD: -------------------------------------------------------------------------------- 1 | # Imported from http://bzip.org/downloads.html 2 | 3 | licenses(["notice"]) 4 | 5 | cc_library( 6 | name = "bz2lib", 7 | visibility = ["//visibility:public"], 8 | srcs = [ 9 | "blocksort.c", 10 | "bzlib.c", 11 | "compress.c", 12 | "crctable.c", 13 | "decompress.c", 14 | "huffman.c", 15 | "randtable.c", 16 | ], 17 | hdrs = [ 18 | "bzlib.h", 19 | "bzlib_private.h", 20 | ], 21 | copts = [ 22 | "-Wno-unknown-warning-option", 23 | "-Wno-unused-const-variable", 24 | "-Wno-unused-but-set-variable", 25 | "-Wno-unused-private-field", 26 | "-DBZ_NO_STDIO", 27 | ] 28 | ) 29 | 30 | -------------------------------------------------------------------------------- /third_party/bz2lib/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------- 3 | 4 | This program, "bzip2", the associated library "libbzip2", and all 5 | documentation, are copyright (C) 1996-2010 Julian R Seward. All 6 | rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions 10 | are met: 11 | 12 | 1. Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | 15 | 2. The origin of this software must not be misrepresented; you must 16 | not claim that you wrote the original software. If you use this 17 | software in a product, an acknowledgment in the product 18 | documentation would be appreciated but is not required. 19 | 20 | 3. Altered source versions must be plainly marked as such, and must 21 | not be misrepresented as being the original software. 22 | 23 | 4. The name of the author may not be used to endorse or promote 24 | products derived from this software without specific prior written 25 | permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 28 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 29 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 31 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 33 | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 35 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 36 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 37 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 | 39 | Julian Seward, jseward@bzip.org 40 | bzip2/libbzip2 version 1.0.6 of 6 September 2010 41 | 42 | -------------------------------------------------------------------------- 43 | -------------------------------------------------------------------------------- /third_party/jit/BUILD: -------------------------------------------------------------------------------- 1 | # x64 jit assembler. 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | licenses(["notice"]) # BSD 6 | 7 | exports_files(["LICENSE"]) 8 | 9 | cc_library( 10 | name = "types", 11 | hdrs = ["types.h"], 12 | deps = [ 13 | "//sling/base", 14 | ], 15 | ) 16 | 17 | cc_library( 18 | name = "memory", 19 | hdrs = ["memory.h"], 20 | deps = [ 21 | ":types", 22 | "//sling/base", 23 | ], 24 | ) 25 | 26 | cc_library( 27 | name = "code", 28 | srcs = ["code.cc"], 29 | hdrs = ["code.h"], 30 | deps = [ 31 | ":memory", 32 | ":types", 33 | "//sling/base", 34 | ], 35 | ) 36 | 37 | cc_library( 38 | name = "cpu", 39 | srcs = ["cpu.cc"], 40 | hdrs = ["cpu.h"], 41 | deps = [ 42 | "//sling/base", 43 | ], 44 | ) 45 | 46 | cc_library( 47 | name = "assembler", 48 | srcs = ["assembler.cc"], 49 | hdrs = [ 50 | "assembler.h", 51 | "instructions.h", 52 | "registers.h", 53 | "avx512.inc", 54 | ], 55 | deps = [ 56 | ":code", 57 | ":cpu", 58 | ":memory", 59 | ":types", 60 | "//sling/base", 61 | ], 62 | ) 63 | 64 | -------------------------------------------------------------------------------- /third_party/jit/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 1994-2006 Sun Microsystems Inc. 2 | All Rights Reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | - Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | - Redistribution in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | - Neither the name of Sun Microsystems or the names of contributors may 16 | be used to endorse or promote products derived from this software without 17 | specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 20 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | The original source code covered by the above license above has been 32 | modified significantly by Google Inc. 33 | Copyright 2012 the V8 project authors. All rights reserved. 34 | Copyright 2017 Google Inc. All rights reserved. 35 | 36 | -------------------------------------------------------------------------------- /third_party/snappy/BUILD: -------------------------------------------------------------------------------- 1 | # Snappy compression library, see https://github.com/google/snappy 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | licenses(["notice"]) # BSD 6 | 7 | exports_files(["LICENSE"]) 8 | 9 | cc_library( 10 | name = "bits", 11 | hdrs = ["bits.h"], 12 | deps = [ 13 | "//sling/base", 14 | ], 15 | ) 16 | 17 | cc_library( 18 | name = "endian", 19 | hdrs = ["endian.h"], 20 | deps = [ 21 | "//sling/base", 22 | ], 23 | ) 24 | 25 | cc_library( 26 | name = "snappy", 27 | srcs = [ 28 | "snappy.cc", 29 | "snappy-sinksource.cc", 30 | ], 31 | hdrs = [ 32 | "snappy.h", 33 | "snappy-sinksource.h", 34 | ], 35 | deps = [ 36 | ":bits", 37 | ":endian", 38 | "//sling/base", 39 | "//sling/util:varint", 40 | ], 41 | copts = [ 42 | "-Wno-sign-compare", 43 | ], 44 | ) 45 | 46 | -------------------------------------------------------------------------------- /third_party/snappy/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2011, Google Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following disclaimer 12 | in the documentation and/or other materials provided with the 13 | distribution. 14 | * Neither the name of Google Inc. nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /third_party/zlib/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) # BSD/MIT-like license (for zlib) 4 | 5 | cc_library( 6 | name = "zlib", 7 | srcs = [ 8 | "adler32.c", 9 | "compress.c", 10 | "crc32.c", 11 | "deflate.c", 12 | "gzclose.c", 13 | "gzlib.c", 14 | "gzread.c", 15 | "gzwrite.c", 16 | "infback.c", 17 | "inffast.c", 18 | "inflate.c", 19 | "inftrees.c", 20 | "trees.c", 21 | "uncompr.c", 22 | "zutil.c", 23 | ], 24 | hdrs = [ 25 | "crc32.h", 26 | "deflate.h", 27 | "gzguts.h", 28 | "inffast.h", 29 | "inffixed.h", 30 | "inflate.h", 31 | "inftrees.h", 32 | "trees.h", 33 | "zconf.h", 34 | "zlib.h", 35 | "zutil.h", 36 | ], 37 | copts = [ 38 | "-O3", 39 | "-Wno-unused-variable", 40 | "-Wno-unused-private-field", 41 | "-Wno-implicit-function-declaration", 42 | ], 43 | ) 44 | 45 | -------------------------------------------------------------------------------- /third_party/zlib/gzclose.c: -------------------------------------------------------------------------------- 1 | /* gzclose.c -- zlib gzclose() function 2 | * Copyright (C) 2004, 2010 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | #include "gzguts.h" 7 | 8 | /* gzclose() is in a separate file so that it is linked in only if it is used. 9 | That way the other gzclose functions can be used instead to avoid linking in 10 | unneeded compression or decompression routines. */ 11 | int ZEXPORT gzclose(file) 12 | gzFile file; 13 | { 14 | #ifndef NO_GZCOMPRESS 15 | gz_statep state; 16 | 17 | if (file == NULL) 18 | return Z_STREAM_ERROR; 19 | state = (gz_statep)file; 20 | 21 | return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file); 22 | #else 23 | return gzclose_r(file); 24 | #endif 25 | } 26 | -------------------------------------------------------------------------------- /third_party/zlib/inffast.h: -------------------------------------------------------------------------------- 1 | /* inffast.h -- header to use inffast.c 2 | * Copyright (C) 1995-2003, 2010 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | /* WARNING: this file should *not* be used by applications. It is 7 | part of the implementation of the compression library and is 8 | subject to change. Applications should only use zlib.h. 9 | */ 10 | 11 | void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); 12 | -------------------------------------------------------------------------------- /tools/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | cc_binary( 4 | name = "embed-data", 5 | srcs = ["embed-data.cc"], 6 | deps = [ 7 | "//sling/util:elf-writer", 8 | ], 9 | ) 10 | 11 | cc_binary( 12 | name = "codex", 13 | srcs = ["codex.cc"], 14 | deps = [ 15 | "//sling/base", 16 | "//sling/file", 17 | "//sling/file:recordio", 18 | "//sling/file:posix", 19 | "//sling/frame", 20 | "//sling/stream:memory", 21 | "//sling/string:printf", 22 | "//sling/util:fingerprint", 23 | ], 24 | ) 25 | 26 | cc_binary( 27 | name = "index", 28 | srcs = ["index.cc"], 29 | deps = [ 30 | "//sling/base", 31 | "//sling/file", 32 | "//sling/file:recordio", 33 | "//sling/file:posix", 34 | ], 35 | ) 36 | 37 | cc_binary( 38 | name = "snaps", 39 | srcs = ["snaps.cc"], 40 | deps = [ 41 | "//sling/base", 42 | "//sling/file", 43 | "//sling/file:posix", 44 | "//sling/frame:serialization", 45 | "//sling/frame:snapshot", 46 | "//sling/frame:store", 47 | ], 48 | ) 49 | 50 | -------------------------------------------------------------------------------- /tools/buildall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | bazel build -c opt $* \ 4 | sling/base:* \ 5 | sling/file:* \ 6 | sling/frame:* \ 7 | sling/http:* \ 8 | sling/myelin:* \ 9 | sling/myelin/kernel:* \ 10 | sling/myelin/generator:* \ 11 | sling/myelin/cuda:* \ 12 | sling/nlp/document:* \ 13 | sling/nlp/embedding:* \ 14 | sling/nlp/kb:* \ 15 | sling/nlp/silver:* \ 16 | sling/nlp/parser:* \ 17 | sling/nlp/parser/tools:* \ 18 | sling/nlp/wiki:* \ 19 | sling/pyapi:* \ 20 | sling/stream:* \ 21 | sling/string:* \ 22 | sling/task:* \ 23 | sling/util:* \ 24 | sling/web:* \ 25 | tools:* \ 26 | 27 | -------------------------------------------------------------------------------- /tools/docv1to2.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License") 4 | 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http:#www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Convert SLING documents from version 1 to version 2. 17 | 18 | import sling 19 | import sys 20 | 21 | # Check arguments. 22 | if len(sys.argv) != 3: 23 | print("usage:", sys.argv[0], "", "") 24 | sys.exit(1) 25 | 26 | # Intialize commons store. 27 | commons = sling.Store() 28 | commons.parse(""" 29 | {=document =/s/document} 30 | {=url =/s/document/url} 31 | {=title =/s/document/title} 32 | {=text =/s/document/text} 33 | {=tokens =/s/document/tokens} 34 | {=mention =/s/document/mention} 35 | {=theme =/s/document/theme} 36 | {=token =/s/token} 37 | {=index =/s/token/index} 38 | {=start =/s/token/start} 39 | {=size =/s/token/length} 40 | {=break =/s/token/break} 41 | {=word =/s/token/text} 42 | {=phrase =/s/phrase} 43 | {=begin =/s/phrase/begin} 44 | {=length =/s/phrase/length} 45 | {=evokes =/s/phrase/evokes} 46 | """) 47 | commons.freeze() 48 | 49 | # Convert documents. 50 | num_docs = 0 51 | fin = sling.RecordReader(sys.argv[1]) 52 | fout = sling.RecordWriter(sys.argv[2]) 53 | for key, value in fin: 54 | store = sling.Store(commons) 55 | f = store.parse(value) 56 | fout.write(key, f.data(binary=True)) 57 | num_docs += 1 58 | 59 | fin.close() 60 | fout.close() 61 | print(num_docs, "documents converted") 62 | 63 | -------------------------------------------------------------------------------- /tools/embed.bzl: -------------------------------------------------------------------------------- 1 | # Compile embedded data files into ELF object files. 2 | 3 | def _genembed_impl(ctx): 4 | # Generate arguments to the embedded data compiler. 5 | args = [] 6 | for i in ctx.attr.srcs: 7 | args += [f.path for f in i.files.to_list()] 8 | 9 | # Run embedded data compiler. 10 | ctx.actions.run( 11 | inputs = ctx.files.srcs, 12 | outputs = [ctx.outputs.out], 13 | arguments = ["-o", ctx.outputs.out.path] + args, 14 | progress_message = "Embedding %s" % ctx.label.name, 15 | executable = ctx.executable._embed_data_compiler 16 | ) 17 | 18 | genembed = rule( 19 | implementation = _genembed_impl, 20 | attrs = { 21 | "srcs": attr.label_list( 22 | allow_files = True 23 | ), 24 | "_embed_data_compiler": attr.label( 25 | default = Label("//tools:embed-data"), 26 | cfg = "host", 27 | executable = True, 28 | ), 29 | }, 30 | outputs = { 31 | "out": "%{name}.o" 32 | }, 33 | ) 34 | 35 | def embed_data(name, srcs): 36 | embed_pkg = genembed( 37 | name = name + "_genembed", 38 | srcs = srcs, 39 | ) 40 | native.cc_library( 41 | name = name, 42 | srcs = [name + "_genembed"], 43 | alwayslink = True, 44 | linkstatic = True, 45 | ) 46 | 47 | -------------------------------------------------------------------------------- /tools/snaps.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Create SLING store snapshot files. 16 | 17 | #include 18 | #include 19 | 20 | #include "sling/base/init.h" 21 | #include "sling/base/flags.h" 22 | #include "sling/base/logging.h" 23 | #include "sling/base/types.h" 24 | #include "sling/file/file.h" 25 | #include "sling/frame/serialization.h" 26 | #include "sling/frame/snapshot.h" 27 | #include "sling/frame/store.h" 28 | 29 | DEFINE_bool(check, false, "Check for valid snapshot"); 30 | DEFINE_bool(verify, false, "Check snapshot by reading it into memory"); 31 | 32 | using namespace sling; 33 | 34 | int main(int argc, char *argv[]) { 35 | InitProgram(&argc, &argv); 36 | 37 | // Get files to snapshot. 38 | std::vector files; 39 | for (int i = 1; i < argc; ++i) { 40 | File::Match(argv[i], &files); 41 | } 42 | 43 | for (const string &file : files) { 44 | if (FLAGS_check) { 45 | bool valid = Snapshot::Valid(file); 46 | std::cout << file << ": " << (valid ? "valid" : "INVALID") << "\n"; 47 | } else if (FLAGS_verify) { 48 | std::cout << file << ": " << std::flush; 49 | std::cout << "load " << std::flush; 50 | Store store; 51 | CHECK(Snapshot::Read(&store, file)); 52 | std::cout << "done\n" << std::flush; 53 | } else { 54 | std::cout << file << ": " << std::flush; 55 | File::Delete(Snapshot::Filename(file)); 56 | std::cout << "load " << std::flush; 57 | Store store; 58 | LoadStore(file, &store); 59 | std::cout << "freeze " << std::flush; 60 | store.AllocateSymbolHeap(); 61 | store.Freeze(); 62 | std::cout << "snapshot " << std::flush; 63 | CHECK(Snapshot::Write(&store, file)); 64 | std::cout << "done\n" << std::flush; 65 | } 66 | } 67 | 68 | return 0; 69 | } 70 | --------------------------------------------------------------------------------