├── .bazelrc
├── .gitignore
├── .travis.yml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── WORKSPACE
├── app
├── BUILD
├── external
│ ├── README.md
│ ├── material-icons.css
│ ├── material-icons.woff2
│ ├── material.css
│ ├── material.js
│ └── preact.js
├── image
│ └── appicon.ico
├── lib
│ ├── docview.js
│ ├── mdl.js
│ └── util.js
└── style
│ └── docview.css
├── data
├── nlp
│ └── schemas
│ │ ├── catalog.sling
│ │ ├── constituency.sling
│ │ ├── document-schema.sling
│ │ └── meta-schema.sling
└── wiki
│ ├── aliases.sling
│ ├── calendar.sling
│ ├── countries.sling
│ ├── da
│ └── templates.sling
│ ├── de
│ └── templates.sling
│ ├── en
│ ├── phrases.txt
│ └── templates.sling
│ ├── es
│ └── templates.sling
│ ├── fi
│ └── templates.sling
│ ├── fr
│ └── templates.sling
│ ├── it
│ └── templates.sling
│ ├── languages.sling
│ ├── nl
│ └── templates.sling
│ ├── no
│ └── templates.sling
│ ├── pl
│ └── templates.sling
│ ├── pt
│ └── templates.sling
│ ├── sv
│ └── templates.sling
│ ├── units.sling
│ ├── wikidata.sling
│ └── wikipedia.sling
├── doc
├── guide
│ ├── README.md
│ ├── caspar.md
│ ├── flowasm.txt
│ ├── flowin.svg
│ ├── flowout.svg
│ ├── frames.md
│ ├── install.md
│ ├── kb-browser.png
│ ├── myelin.md
│ ├── parsing.md
│ ├── pyapi.md
│ ├── training.md
│ ├── wikiflow.md
│ └── wikiflow.svg
└── report
│ ├── acl2017.sty
│ ├── acl_natbib.bst
│ ├── dev-eval.pdf
│ ├── network.pdf
│ ├── network.svg
│ ├── runtime.pdf
│ ├── runtime.svg
│ ├── sling.bib
│ └── sling.tex
├── python
├── BUILD
├── __init__.py
├── flags.py
├── log.py
├── myelin
│ ├── __init__.py
│ ├── builder.py
│ ├── flow.py
│ ├── nn.py
│ ├── simulator.py
│ └── tf.py
├── nlp
│ ├── __init__.py
│ ├── document.py
│ └── parser.py
├── pysling.so
├── run.py
├── task
│ ├── __init__.py
│ ├── corpora.py
│ ├── download.py
│ ├── embedding.py
│ ├── silver.py
│ ├── wiki.py
│ └── workflow.py
└── wikibot
│ ├── en_wp_dates.py
│ ├── extract_dates.py
│ ├── wikibot.py
│ └── wikimonitor.py
├── run.sh
├── setup.sh
├── sling
├── base
│ ├── BUILD
│ ├── bitcast.h
│ ├── clock.cc
│ ├── clock.h
│ ├── flags.cc
│ ├── flags.h
│ ├── init.cc
│ ├── init.h
│ ├── libinit.cc
│ ├── logging.cc
│ ├── logging.h
│ ├── macros.h
│ ├── perf.cc
│ ├── perf.h
│ ├── port.h
│ ├── registry.cc
│ ├── registry.h
│ ├── slice.h
│ ├── status.cc
│ ├── status.h
│ └── types.h
├── file
│ ├── BUILD
│ ├── embed.cc
│ ├── embed.h
│ ├── file.cc
│ ├── file.h
│ ├── posix.cc
│ ├── posix.h
│ ├── recordio.cc
│ ├── recordio.h
│ ├── repository.cc
│ ├── repository.h
│ ├── textmap.cc
│ └── textmap.h
├── frame
│ ├── BUILD
│ ├── decoder.cc
│ ├── decoder.h
│ ├── encoder.cc
│ ├── encoder.h
│ ├── json.cc
│ ├── json.h
│ ├── object.cc
│ ├── object.h
│ ├── printer.cc
│ ├── printer.h
│ ├── reader.cc
│ ├── reader.h
│ ├── scanner.cc
│ ├── scanner.h
│ ├── serialization.cc
│ ├── serialization.h
│ ├── snapshot.cc
│ ├── snapshot.h
│ ├── store.cc
│ ├── store.h
│ ├── tokenizer.cc
│ ├── tokenizer.h
│ ├── turtle.cc
│ ├── turtle.h
│ ├── wire.h
│ ├── xml.cc
│ └── xml.h
├── http
│ ├── BUILD
│ ├── http-server.cc
│ ├── http-server.h
│ ├── http-stream.cc
│ ├── http-stream.h
│ ├── http-utils.cc
│ ├── http-utils.h
│ ├── static-content.cc
│ ├── static-content.h
│ ├── web-service.cc
│ └── web-service.h
├── myelin
│ ├── BUILD
│ ├── analyze.cc
│ ├── aot-linker.cc
│ ├── aot-linker.h
│ ├── builder.cc
│ ├── builder.h
│ ├── compiler.cc
│ ├── compiler.h
│ ├── compute.cc
│ ├── compute.h
│ ├── cuda
│ │ ├── BUILD
│ │ ├── cuda-api.cc
│ │ ├── cuda-api.h
│ │ ├── cuda-kernel.cc
│ │ ├── cuda-kernel.h
│ │ ├── cuda-runtime.cc
│ │ ├── cuda-runtime.h
│ │ ├── cuda.cc
│ │ └── cuda.h
│ ├── elf-linker.cc
│ ├── elf-linker.h
│ ├── express.cc
│ ├── express.h
│ ├── flow.cc
│ ├── flow.h
│ ├── generator
│ │ ├── BUILD
│ │ ├── elementwise.cc
│ │ ├── elementwise.h
│ │ ├── expression.cc
│ │ ├── expression.h
│ │ ├── index.cc
│ │ ├── index.h
│ │ ├── scalar-flt-avx.cc
│ │ ├── scalar-flt-sse.cc
│ │ ├── scalar-int.cc
│ │ ├── vector-flt-avx128.cc
│ │ ├── vector-flt-avx256.cc
│ │ ├── vector-flt-avx512.cc
│ │ ├── vector-flt-sse.cc
│ │ ├── vector-int-avx128.cc
│ │ ├── vector-int-avx256.cc
│ │ └── vector-int-sse.cc
│ ├── gradient.cc
│ ├── gradient.h
│ ├── graph.cc
│ ├── graph.h
│ ├── kernel
│ │ ├── BUILD
│ │ ├── arithmetic.cc
│ │ ├── arithmetic.h
│ │ ├── array.cc
│ │ ├── avx-math.cc
│ │ ├── avx-matmul.cc
│ │ ├── avx-operators.cc
│ │ ├── avx.cc
│ │ ├── avx.h
│ │ ├── cublas-matmul.cc
│ │ ├── cuda-arithmetic.cc
│ │ ├── cuda-array.cc
│ │ ├── cuda-matmul.cc
│ │ ├── cuda.cc
│ │ ├── cuda.h
│ │ ├── dragnn.cc
│ │ ├── dragnn.h
│ │ ├── generic-math.cc
│ │ ├── generic-matmul.cc
│ │ ├── generic-operators.cc
│ │ ├── generic.cc
│ │ ├── generic.h
│ │ ├── gradients.cc
│ │ ├── gradients.h
│ │ ├── mkl.cc
│ │ ├── mkl.h
│ │ ├── precompute.cc
│ │ ├── precompute.h
│ │ ├── simd-matmul.cc
│ │ ├── sse-matmul.cc
│ │ ├── sse.cc
│ │ ├── sse.h
│ │ ├── tensorflow.cc
│ │ └── tensorflow.h
│ ├── learning.cc
│ ├── learning.h
│ ├── macro-assembler.cc
│ ├── macro-assembler.h
│ ├── multi-process.cc
│ ├── multi-process.h
│ ├── nnc.cc
│ ├── profile.cc
│ ├── profile.h
│ ├── rnn.cc
│ ├── rnn.h
│ ├── simd-assembler.cc
│ ├── simd-assembler.h
│ └── tests
│ │ ├── gradcheck.py
│ │ ├── opcheck.py
│ │ └── runall.sh
├── nlp
│ ├── document
│ │ ├── BUILD
│ │ ├── affix.cc
│ │ ├── affix.h
│ │ ├── analyzer.cc
│ │ ├── annotator.cc
│ │ ├── annotator.h
│ │ ├── app
│ │ │ ├── analyzer.css
│ │ │ ├── analyzer.html
│ │ │ ├── analyzer.js
│ │ │ ├── corpus.css
│ │ │ ├── corpus.html
│ │ │ └── corpus.js
│ │ ├── corpus-browser.cc
│ │ ├── document-corpus.cc
│ │ ├── document-corpus.h
│ │ ├── document-service.cc
│ │ ├── document-service.h
│ │ ├── document-tokenizer.cc
│ │ ├── document-tokenizer.h
│ │ ├── document.cc
│ │ ├── document.h
│ │ ├── features.cc
│ │ ├── features.h
│ │ ├── fingerprinter.cc
│ │ ├── fingerprinter.h
│ │ ├── lex.cc
│ │ ├── lex.h
│ │ ├── lexical-encoder.cc
│ │ ├── lexical-encoder.h
│ │ ├── lexicon.cc
│ │ ├── lexicon.h
│ │ ├── phrase-tokenizer.cc
│ │ ├── phrase-tokenizer.h
│ │ ├── text-tokenizer.cc
│ │ ├── text-tokenizer.h
│ │ ├── token-properties.h
│ │ └── vocabulary-builder.cc
│ ├── embedding
│ │ ├── BUILD
│ │ ├── embedding-model.cc
│ │ ├── embedding-model.h
│ │ ├── fact-embeddings.cc
│ │ ├── fact-plausibility.cc
│ │ ├── plausibility-model.cc
│ │ ├── plausibility-model.h
│ │ └── word-embeddings.cc
│ ├── kb
│ │ ├── BUILD
│ │ ├── app
│ │ │ ├── appicon.ico
│ │ │ ├── index.html
│ │ │ ├── kb.css
│ │ │ └── kb.js
│ │ ├── calendar.cc
│ │ ├── calendar.h
│ │ ├── fact-lexicon.cc
│ │ ├── facts.cc
│ │ ├── facts.h
│ │ ├── knowledge-server.cc
│ │ ├── knowledge-service.cc
│ │ ├── knowledge-service.h
│ │ ├── name-table-builder.cc
│ │ ├── name-table.cc
│ │ ├── name-table.h
│ │ ├── phrase-table-builder.cc
│ │ ├── phrase-table.cc
│ │ ├── phrase-table.h
│ │ ├── resolver.cc
│ │ └── resolver.h
│ ├── parser
│ │ ├── BUILD
│ │ ├── action-table.cc
│ │ ├── action-table.h
│ │ ├── caspar-trainer.cc
│ │ ├── frame-evaluation.cc
│ │ ├── frame-evaluation.h
│ │ ├── multiclass-delegate.cc
│ │ ├── ontonotes
│ │ │ ├── annotations.py
│ │ │ ├── head_finder.py
│ │ │ ├── make_corpus.sh
│ │ │ ├── ontonotesv5_to_sling.py
│ │ │ ├── shuffle.py
│ │ │ └── statistics.py
│ │ ├── parser-action.cc
│ │ ├── parser-action.h
│ │ ├── parser-annotator.cc
│ │ ├── parser-features.cc
│ │ ├── parser-features.h
│ │ ├── parser-state.cc
│ │ ├── parser-state.h
│ │ ├── parser-trainer.cc
│ │ ├── parser-trainer.h
│ │ ├── parser.cc
│ │ ├── parser.h
│ │ ├── roles.cc
│ │ ├── roles.h
│ │ ├── tools
│ │ │ ├── BUILD
│ │ │ ├── commons_from_corpora.py
│ │ │ ├── parse.cc
│ │ │ ├── parse.py
│ │ │ ├── train.sh
│ │ │ ├── train_caspar.py
│ │ │ ├── train_pytorch.py
│ │ │ ├── validate.py
│ │ │ └── viewmodel.py
│ │ ├── trainer
│ │ │ ├── action.py
│ │ │ ├── action_table.py
│ │ │ ├── cascade.py
│ │ │ ├── corpora.py
│ │ │ ├── lexical_encoder.py
│ │ │ ├── lexicon.py
│ │ │ ├── parser_state.py
│ │ │ ├── pytorch_modules.py
│ │ │ ├── spec.py
│ │ │ ├── trace.py
│ │ │ ├── train_util.py
│ │ │ ├── trainer.py
│ │ │ └── transition_generator.py
│ │ ├── transition-generator.cc
│ │ └── transition-generator.h
│ ├── silver
│ │ ├── BUILD
│ │ ├── anaphora.cc
│ │ ├── chart.cc
│ │ ├── chart.h
│ │ ├── corpus-split.cc
│ │ ├── idf.cc
│ │ ├── idf.h
│ │ ├── mentions.cc
│ │ ├── mentions.h
│ │ ├── nominal-pruning.cc
│ │ ├── parse-chart.cc
│ │ ├── phrases.cc
│ │ ├── relations.cc
│ │ └── types.cc
│ ├── wiki
│ │ ├── BUILD
│ │ ├── aliases.cc
│ │ ├── parse-wiki-text.cc
│ │ ├── wiki-annotator.cc
│ │ ├── wiki-annotator.h
│ │ ├── wiki-extractor.cc
│ │ ├── wiki-extractor.h
│ │ ├── wiki-macros.cc
│ │ ├── wiki-parser.cc
│ │ ├── wiki-parser.h
│ │ ├── wiki.cc
│ │ ├── wiki.h
│ │ ├── wikidata-converter.cc
│ │ ├── wikidata-converter.h
│ │ ├── wikidata-importer.cc
│ │ ├── wikipedia-documents.cc
│ │ ├── wikipedia-importer.cc
│ │ ├── wikipedia-links.cc
│ │ ├── wikipedia-map.cc
│ │ └── wikipedia-map.h
│ └── wikicat
│ │ ├── app
│ │ ├── appicon.ico
│ │ ├── index.html
│ │ ├── wikicat.css
│ │ └── wikicat.js
│ │ ├── fact_matcher.py
│ │ ├── generator.py
│ │ ├── prelim_ranker.py
│ │ ├── server.py
│ │ ├── util.py
│ │ └── workflow.py
├── pyapi
│ ├── BUILD
│ ├── pyapi.cc
│ ├── pyarray.cc
│ ├── pyarray.h
│ ├── pybase.cc
│ ├── pybase.h
│ ├── pydate.cc
│ ├── pydate.h
│ ├── pyframe.cc
│ ├── pyframe.h
│ ├── pymisc.cc
│ ├── pymisc.h
│ ├── pymyelin.cc
│ ├── pymyelin.h
│ ├── pyparser.cc
│ ├── pyparser.h
│ ├── pyphrase.cc
│ ├── pyphrase.h
│ ├── pyrecordio.cc
│ ├── pyrecordio.h
│ ├── pystore.cc
│ ├── pystore.h
│ ├── pytask.cc
│ ├── pytask.h
│ ├── pywiki.cc
│ └── pywiki.h
├── stream
│ ├── BUILD
│ ├── bounded.cc
│ ├── bounded.h
│ ├── bzip2.cc
│ ├── bzip2.h
│ ├── file-input.cc
│ ├── file-input.h
│ ├── file.cc
│ ├── file.h
│ ├── gzip.cc
│ ├── gzip.h
│ ├── input.cc
│ ├── input.h
│ ├── memory.cc
│ ├── memory.h
│ ├── output.cc
│ ├── output.h
│ ├── stream.h
│ ├── unix-file.cc
│ ├── unix-file.h
│ ├── zipfile.cc
│ └── zipfile.h
├── string
│ ├── BUILD
│ ├── ctype.cc
│ ├── ctype.h
│ ├── numbers.cc
│ ├── numbers.h
│ ├── printf.cc
│ ├── printf.h
│ ├── strcat.cc
│ ├── strcat.h
│ ├── text.cc
│ └── text.h
├── task
│ ├── BUILD
│ ├── accumulator.cc
│ ├── accumulator.h
│ ├── app
│ │ ├── appicon.ico
│ │ ├── dashboard.css
│ │ ├── dashboard.js
│ │ ├── digital-7.mono.ttf
│ │ └── index.html
│ ├── dashboard.cc
│ ├── dashboard.h
│ ├── documents.cc
│ ├── documents.h
│ ├── environment.h
│ ├── frame-store-reader.cc
│ ├── frame-store-writer.cc
│ ├── frames.cc
│ ├── frames.h
│ ├── identity.cc
│ ├── job.cc
│ ├── job.h
│ ├── learner.cc
│ ├── learner.h
│ ├── mapper.cc
│ ├── mapper.h
│ ├── message-printer.cc
│ ├── message.cc
│ ├── message.h
│ ├── null-sink.cc
│ ├── pipe-reader.cc
│ ├── process.cc
│ ├── process.h
│ ├── record-file-reader.cc
│ ├── record-file-writer.cc
│ ├── reducer.cc
│ ├── reducer.h
│ ├── rekey.cc
│ ├── sharder.cc
│ ├── sorter.cc
│ ├── task.cc
│ ├── task.h
│ ├── text-file-reader.cc
│ ├── text-file-writer.cc
│ ├── text-map-reader.cc
│ ├── text-map-writer.cc
│ └── workers.cc
├── util
│ ├── BUILD
│ ├── arena.h
│ ├── asset.h
│ ├── bloom.h
│ ├── city.cc
│ ├── city.h
│ ├── elf-writer.cc
│ ├── elf-writer.h
│ ├── embeddings.cc
│ ├── embeddings.h
│ ├── fingerprint.cc
│ ├── fingerprint.h
│ ├── mutex.h
│ ├── random.h
│ ├── snappy.h
│ ├── sortmap.h
│ ├── thread.cc
│ ├── thread.h
│ ├── threadpool.cc
│ ├── threadpool.h
│ ├── top.h
│ ├── unicode.cc
│ ├── unicode.h
│ ├── unicodetab.cc
│ ├── varint.cc
│ ├── varint.h
│ ├── vocabulary.cc
│ └── vocabulary.h
└── web
│ ├── BUILD
│ ├── entity-ref.cc
│ ├── entity-ref.h
│ ├── xml-parser.cc
│ └── xml-parser.h
├── third_party
├── bz2lib
│ ├── BUILD
│ ├── LICENSE
│ ├── README
│ ├── blocksort.c
│ ├── bzlib.c
│ ├── bzlib.h
│ ├── bzlib_private.h
│ ├── compress.c
│ ├── crctable.c
│ ├── decompress.c
│ ├── huffman.c
│ └── randtable.c
├── jit
│ ├── BUILD
│ ├── LICENSE
│ ├── assembler.cc
│ ├── assembler.h
│ ├── avx512.inc
│ ├── avx512ops.txt
│ ├── code.cc
│ ├── code.h
│ ├── cpu.cc
│ ├── cpu.h
│ ├── instructions.h
│ ├── memory.h
│ ├── registers.h
│ └── types.h
├── snappy
│ ├── BUILD
│ ├── LICENSE
│ ├── bits.h
│ ├── endian.h
│ ├── snappy-sinksource.cc
│ ├── snappy-sinksource.h
│ ├── snappy.cc
│ └── snappy.h
└── zlib
│ ├── BUILD
│ ├── README
│ ├── adler32.c
│ ├── compress.c
│ ├── crc32.c
│ ├── crc32.h
│ ├── deflate.c
│ ├── deflate.h
│ ├── gzclose.c
│ ├── gzguts.h
│ ├── gzlib.c
│ ├── gzread.c
│ ├── gzwrite.c
│ ├── infback.c
│ ├── inffast.c
│ ├── inffast.h
│ ├── inffixed.h
│ ├── inflate.c
│ ├── inflate.h
│ ├── inftrees.c
│ ├── inftrees.h
│ ├── trees.c
│ ├── trees.h
│ ├── uncompr.c
│ ├── zconf.h
│ ├── zlib.h
│ ├── zutil.c
│ └── zutil.h
└── tools
├── BUILD
├── build-wheel.py
├── buildall.sh
├── codex.cc
├── docv1to2.py
├── embed-data.cc
├── embed.bzl
├── index.cc
├── optohdr.py
└── snaps.cc
/.bazelrc:
--------------------------------------------------------------------------------
1 | build --color=yes
2 | build --cxxopt=-Wno-unknown-warning-option
3 | build --cxxopt=-Wno-unused-command-line-argument
4 | build --cxxopt=-Wno-deprecated
5 | build --cxxopt=-Wno-sign-compare
6 | build --cxxopt=-Wno-unused-local-typedefs
7 | build --cxxopt=-Wno-undefined-var-template
8 | build --cxxopt=-Wno-attributes
9 | build --spawn_strategy=standalone
10 |
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bazel-*
2 | examples
3 | local
4 | *.pyc
5 | *.aux
6 | *.bbl
7 | *.blg
8 | *.log
9 | *.out
10 | *.pdf_tex
11 | doc/report/sling.pdf
12 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | dist: trusty
2 | language:
3 | - cpp
4 | - python
5 | compiler: gcc
6 | python: "3.5"
7 |
8 | addons:
9 | apt:
10 | sources:
11 | - ubuntu-toolchain-r-test
12 | packages:
13 | - wget
14 | - pkg-config
15 | - g++-4.8
16 | - python3.5-dev
17 |
18 | before_install:
19 | - wget https://github.com/bazelbuild/bazel/releases/download/1.0.0/bazel_1.0.0-linux-x86_64.deb
20 | - sudo dpkg -i bazel_1.0.0-linux-x86_64.deb
21 |
22 | script:
23 | - tools/buildall.sh
24 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution,
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
--------------------------------------------------------------------------------
/WORKSPACE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/WORKSPACE
--------------------------------------------------------------------------------
/app/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | load("//tools:embed.bzl", "embed_data")
4 |
5 | embed_data(
6 | name = "app",
7 | srcs = [
8 | "external/material.js",
9 | "external/material.css",
10 | "external/material-icons.css",
11 | "external/material-icons.woff2",
12 | "external/preact.js",
13 | "image/appicon.ico",
14 | "lib/docview.js",
15 | "lib/mdl.js",
16 | "lib/util.js",
17 | "style/docview.css",
18 | ],
19 | )
20 |
--------------------------------------------------------------------------------
/app/external/README.md:
--------------------------------------------------------------------------------
1 | # External web components for SLING.
2 |
3 | ## `preact.js`
4 | Fast alternative to React.
5 | * Website: https://preactjs.com
6 | * Retrieved from: https://unpkg.com/preact
7 | * Date: 2018-09-24
8 | * License: [MIT](https://github.com/developit/preact/blob/master/LICENSE)
9 |
10 | ## `material.js` and `material.css`
11 | Material Design Light (MDL) components.
12 | * Website: https://getmdl.io
13 | * Retrieved from: https://code.getmdl.io/1.3.0/mdl.zip
14 | * Date: 2018-09-24
15 | * Version: 1.3.0
16 | * License: [Apache 2.0](https://github.com/google/material-design-lite/blob/mdl-1.x/LICENSE)
17 |
18 | ## `material-icons.css` and `material-icons.woff2`
19 | Material Design Icon Font.
20 | * Website: https://material.io/tools/icons
21 | * Retrieved from: https://fonts.googleapis.com/icon?family=Material+Icons
22 | * Date: 2018-09-25
23 | * License: [Apache 2.0](https://github.com/google/material-design-icons/blob/master/LICENSE)
24 |
--------------------------------------------------------------------------------
/app/external/material-icons.css:
--------------------------------------------------------------------------------
1 | /* fallback */
2 | @font-face {
3 | font-family: 'Material Icons';
4 | font-style: normal;
5 | font-weight: 400;
6 | src: url(/common/external/material-icons.woff2) format('woff2');
7 | }
8 |
9 | .material-icons {
10 | font-family: 'Material Icons';
11 | font-weight: normal;
12 | font-style: normal;
13 | font-size: 24px;
14 | line-height: 1;
15 | letter-spacing: normal;
16 | text-transform: none;
17 | display: inline-block;
18 | white-space: nowrap;
19 | word-wrap: normal;
20 | direction: ltr;
21 | -webkit-font-feature-settings: 'liga';
22 | -webkit-font-smoothing: antialiased;
23 | }
24 |
--------------------------------------------------------------------------------
/app/external/material-icons.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/app/external/material-icons.woff2
--------------------------------------------------------------------------------
/app/image/appicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/app/image/appicon.ico
--------------------------------------------------------------------------------
/app/lib/util.js:
--------------------------------------------------------------------------------
1 | // Web utility functions.
2 |
3 | export function stylesheet(url) {
4 | if (document.getElementById(url)) return;
5 | var head = document.getElementsByTagName('head')[0];
6 | var link = document.createElement('link');
7 | link.id = url;
8 | link.rel = 'stylesheet';
9 | link.type = 'text/css';
10 | link.href = url;
11 | head.appendChild(link);
12 | }
13 |
14 |
--------------------------------------------------------------------------------
/data/nlp/schemas/catalog.sling:
--------------------------------------------------------------------------------
1 | ; Schema catalog.
2 |
3 | {=global :catalog :named
4 | name: "Global catalog"
5 | catalog_schema_family: /schema/meta
6 | catalog_schema_family: /schema/document
7 | catalog_schema_family: /schema/lang
8 | catalog_schema_family: /schema/wikidata
9 | catalog_schema_family: /schema/wikipedia
10 | }
11 |
--------------------------------------------------------------------------------
/data/nlp/schemas/constituency.sling:
--------------------------------------------------------------------------------
1 | ; Schema for constituency parse information
2 |
3 | {=/constituency/constituents name: "constituents"}
4 | {=/constituency/constituent name: "constituent"}
5 | {=/constituency/tag name: "constituency-tag"}
6 | {=/constituency/parent name: "constituency-parent"}
7 | {=/constituency/children name: "constituency-children"}
8 | {=/constituency/head name: "constituency-head"}
9 |
--------------------------------------------------------------------------------
/data/wiki/de/templates.sling:
--------------------------------------------------------------------------------
1 | {=/wp/templates/de
2 |
3 | "'": {type: "text" text: "'"}
4 |
5 | "datum": {type: "date" year: 3 month: 2 day: 1 yearn: "jahr" monthn: "monat" dayn: "tag" format: /w/dates/de}
6 | "FormatDate": {type: "date" full: 1 format: /w/dates/de}
7 |
8 | }
9 |
10 |
--------------------------------------------------------------------------------
/data/wiki/en/phrases.txt:
--------------------------------------------------------------------------------
1 | ; Custom phrase annotations for English.
2 |
3 | ; New York (Q1384)
4 | [New York|Q1384]
5 |
6 | ; South Korea (Q884)
7 | [South Korea|Q884]
8 |
9 | ; United Kingdom (Q145)
10 | [United Kingdom|Q145]
11 |
12 | ; United States of America (Q30)
13 | [United States|Q30]
14 |
15 | ; Washington DC (Q61)
16 | [Washington, [D.C.|{=#1 +Q3551781}]|{+Q61 P131: #1}]
17 | [Washington [D.C.|{=#1 +Q3551781}]|{+Q61 P131: #1}]
18 | [Washington [DC|{=#1 +Q3551781}]|{+Q61 P131: #1}]
19 |
20 |
--------------------------------------------------------------------------------
/data/wiki/es/templates.sling:
--------------------------------------------------------------------------------
1 | {=/wp/templates/es
2 |
3 | "!": {type: "text" text: "|"}
4 | "!!": {type: "text" text: "||"}
5 | "'": {type: "text" text: "'"}
6 |
7 | "!((": {type: "text" text: "[["}
8 | "))!": {type: "text" text: "]]"}
9 | "(": {type: "text" text: "{"}
10 | ")": {type: "text" text: "}"}
11 | "((": {type: "text" text: "{{"}
12 | "))": {type: "text" text: "}}"}
13 |
14 | "·": {type: "text" text: " ·"}
15 | "=": {type: "text" text: "="}
16 |
17 | "fecha": {type: "date" year: 1 month: 2 day: 3 reverse: 1 format: /w/dates/es}
18 | "fecha de muerte": {type: "date" year: 1 month: 2 day: 3 reverse: 1 format: /w/dates/es}
19 | "año": {type: "year" bc: 2 format: /w/dates/es}
20 |
21 | }
22 |
23 |
--------------------------------------------------------------------------------
/data/wiki/fi/templates.sling:
--------------------------------------------------------------------------------
1 | {=/wp/templates/fi
2 |
3 | "!!": {type: "text" text: "||"}
4 | "'": {type: "text" text: "'"}
5 | "-\"": {type: "text" text: " \""}
6 | "'-": {type: "text" text: "' "}
7 |
8 | "((": {type: "text" text: "{{"}
9 | "))": {type: "text" text: "}}"}
10 |
11 | "·": {type: "text" text: " ·"}
12 | "•": {type: "text" text: " •"}
13 | "=": {type: "text" text: "="}
14 |
15 | "päiväys": {type: "date" full: 1 year: 1 month: 2 day:3 format: /w/dates/fi}
16 |
17 | }
18 |
19 |
--------------------------------------------------------------------------------
/data/wiki/fr/templates.sling:
--------------------------------------------------------------------------------
1 | {=/wp/templates/fr
2 |
3 | "!": {type: "text" text: "|"}
4 | "!!": {type: "text" text: "||"}
5 | "'": {type: "text" text: "'"}
6 |
7 | ")!": {type: "text" text: "]"}
8 | "!((": {type: "text" text: "[["}
9 | "))!": {type: "text" text: "]]"}
10 | "(": {type: "text" text: "{"}
11 | ")": {type: "text" text: "}"}
12 | "((": {type: "text" text: "{{"}
13 | "))": {type: "text" text: "}}"}
14 | "(((": {type: "text" text: "{{{"}
15 | ")))": {type: "text" text: "}}}"}
16 |
17 | "·": {type: "text" text: " ·"}
18 | "•": {type: "text" text: " •"}
19 | "=": {type: "text" text: "="}
20 |
21 | "date": {type: "date" full: 1 year: 1 month: 2 day: 3 qual: 4 format: /w/dates/fr}
22 | "date-": {type: "date" full: 1 year: 1 month: 2 day: 3 qual: 4 format: /w/dates/fr}
23 | "date sport": {type: "date" year: 3 month: 2 day: 1 qual: 4 format: /w/dates/fr}
24 | "Date France": {type: "date" year: 3 month: 2 day: 1 post: " en France" format: /w/dates/fr}
25 | "date de naissance": {type: "date" year: 3 month: 2 day: 1 qual: 4 format: /w/dates/fr}
26 | "date de décès": {type: "date" year: 3 month: 2 day: 1 qual: 4 format: /w/dates/fr}
27 | "date de décès-": {type: "date" year: 3 month: 2 day: 1 qual: 4 format: /w/dates/fr}
28 | "existe depuis": {type: "date" year: 3 month: 2 day: 1 format: /w/dates/fr}
29 |
30 | }
31 |
32 |
--------------------------------------------------------------------------------
/data/wiki/it/templates.sling:
--------------------------------------------------------------------------------
1 | {=/wp/templates/it
2 |
3 | "!!": {type: "text" text: "||"}
4 | "'": {type: "text" text: "'"}
5 |
6 | ")!": {type: "text" text: "]"}
7 | "(": {type: "text" text: "{"}
8 | ")": {type: "text" text: "}"}
9 |
10 | "·": {type: "text" text: " ·"}
11 | "•": {type: "text" text: " •"}
12 | "=": {type: "text" text: "="}
13 |
14 | "data": {type: "date" year: 3 month: 2 day: 1 yearn: "A" monthn: "M" dayn: "G" format: /w/dates/it}
15 |
16 | }
17 |
18 |
--------------------------------------------------------------------------------
/data/wiki/nl/templates.sling:
--------------------------------------------------------------------------------
1 | {=/wp/templates/nl
2 |
3 | "!!": {type: "text" text: "||"}
4 | ")!": {type: "text" text: "]"}
5 | "!((": {type: "text" text: "[["}
6 | "))!": {type: "text" text: "]]"}
7 | "=": {type: "text" text: "="}
8 |
9 | }
10 |
11 |
--------------------------------------------------------------------------------
/data/wiki/no/templates.sling:
--------------------------------------------------------------------------------
1 | {=/wp/templates/no
2 |
3 | "!!": {type: "text" text: "||"}
4 | "'": {type: "text" text: "'"}
5 |
6 | ")!": {type: "text" text: "]"}
7 | "(": {type: "text" text: "{"}
8 | ")": {type: "text" text: "}"}
9 | "((": {type: "text" text: "{{"}
10 | "))": {type: "text" text: "}}"}
11 | "(((": {type: "text" text: "{{{"}
12 | ")))": {type: "text" text: "}}}"}
13 |
14 | "·": {type: "text" text: " ·"}
15 | "•": {type: "text" text: " •"}
16 | "\\": {type: "text" text: " /"}
17 | "=": {type: "text" text: "="}
18 |
19 | "startdato": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/no}
20 | "startdato og alder": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/no}
21 | "fødselsdato": {type: "date" year: 1 month: 2 day: 3 yearn: "år" monthn: "måned" dayn: "dag" format: /w/dates/no}
22 | "fødselsdato og alder": {type: "date" year: 1 month: 2 day: 3 yearn: "år" monthn: "måned" dayn: "dag" format: /w/dates/no}
23 | "fødselsdato og alder2": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/no}
24 |
25 | "dødsdato": {type: "date" year: 1 month: 2 day: 3 yearn: "år" monthn: "måned" dayn: "dag" format: /w/dates/no}
26 | "dødsdato og alder": {type: "date" year: 1 month: 2 day: 3 yearn: "år" monthn: "måned" dayn: "dag" format: /w/dates/no}
27 |
28 | }
29 |
30 |
--------------------------------------------------------------------------------
/data/wiki/pl/templates.sling:
--------------------------------------------------------------------------------
1 | {=/wp/templates/pl
2 |
3 | "!": {type: "text" text: "|"}
4 | "!!": {type: "text" text: "||"}
5 | ")!": {type: "text" text: "]"}
6 | "!((": {type: "text" text: "[["}
7 | "))!": {type: "text" text: "]]"}
8 | "(": {type: "text" text: "{"}
9 | ")": {type: "text" text: "}"}
10 | "((": {type: "text" text: "{{"}
11 | "))": {type: "text" text: "}}"}
12 | "(((": {type: "text" text: "{{{"}
13 | ")))": {type: "text" text: "}}}"}
14 | "·": {type: "text" text: " ·"}
15 | "•": {type: "text" text: " •"}
16 | "=": {type: "text" text: "="}
17 |
18 | }
19 |
20 |
--------------------------------------------------------------------------------
/data/wiki/pt/templates.sling:
--------------------------------------------------------------------------------
1 | {=/wp/templates/pt
2 |
3 | "!!": {type: "text" text: "||"}
4 | "'": {type: "text" text: "'"}
5 | "\" '": {type: "text" text: "\" '"}
6 |
7 | ")!": {type: "text" text: "]"}
8 | "!((": {type: "text" text: "[["}
9 | "))!": {type: "text" text: "]]"}
10 | "(": {type: "text" text: "{"}
11 | ")": {type: "text" text: "}"}
12 | "((": {type: "text" text: "{{"}
13 | "))": {type: "text" text: "}}"}
14 | "·": {type: "text" text: " ·"}
15 | "•": {type: "text" text: " •"}
16 | "\\": {type: "text" text: " /"}
17 | "=": {type: "text" text: "="}
18 |
19 | "data": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/pt}
20 | "DataExt": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/pt}
21 | "dtlink": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/pt}
22 | "dtext": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/pt}
23 | "dni": {type: "date" year: 3 month: 2 day: 1 format: /w/dates/pt}
24 | "morte": {type: "date" year: 3 month: 2 day: 1 format: /w/dates/pt}
25 | "ani": {type: "date" year: 1 month: 2 day: 3 yearn: "ano" monthn: "mês" dayn: "dia" format: /w/dates/pt}
26 |
27 | }
28 |
29 |
--------------------------------------------------------------------------------
/data/wiki/sv/templates.sling:
--------------------------------------------------------------------------------
1 | {=/wp/templates/sv
2 |
3 | "!": {type: "text" text: "|"}
4 | "!!": {type: "text" text: "||"}
5 | "'": {type: "text" text: "'"}
6 | "'s": {type: "text" text: "'s"}
7 | "'-": {type: "text" text: "' "}
8 |
9 | ")!": {type: "text" text: "]"}
10 | "!((": {type: "text" text: "[["}
11 | "))!": {type: "text" text: "]]"}
12 | "(": {type: "text" text: "{"}
13 | ")": {type: "text" text: "}"}
14 | "((": {type: "text" text: "{{"}
15 | "))": {type: "text" text: "}}"}
16 | "(((": {type: "text" text: "{{{"}
17 | ")))": {type: "text" text: "}}}"}
18 |
19 | "·": {type: "text" text: " ·"}
20 | "•": {type: "text" text: " •"}
21 | "\\": {type: "text" text: " /"}
22 | "=": {type: "text" text: "="}
23 |
24 | ;"datum": {type: "date" year: 3 month: 2 day: 1 format: /w/dates/se}
25 | "startdatum": {type: "date" year: 1 month: 1 day: 3 format: /w/dates/se}
26 | "slutdatum": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/se}
27 | "startdatum och ålder": {type: "date" year: 1 month: 1 day: 3 format: /w/dates/se}
28 | ;"slutdatum och ålder": {type: "date" year: 1 month: 2 day: 3 format: /w/dates/se}
29 |
30 | "död datum och ålder": {type: "date" year: 3 month: 2 day: 1 format: /w/dates/se}
31 | "död år och ålder": {type: "year" format: /w/dates/se}
32 |
33 | }
34 |
35 |
--------------------------------------------------------------------------------
/data/wiki/wikipedia.sling:
--------------------------------------------------------------------------------
1 | ; Wikipedia schemas.
2 |
3 | {=/wp/page +/s/document :schema
4 | name: "Wikipedia page"
5 | family: /schema/wikipedia
6 |
7 | role: {=/wp/page/pageid :slot
8 | name: "page id"
9 | description: "Wikipedia page id"
10 | source: /wp/page
11 | target: int
12 | }
13 |
14 | role: {=/wp/page/title :slot
15 | name: "page title"
16 | description: "Wikipedia page title"
17 | source: /wp/page
18 | target: string
19 | }
20 |
21 | role: {=/wp/page/text :slot
22 | name: "wiki text"
23 | description: "Wikipedia page text in wiki markup format"
24 | source: /wp/page
25 | target: string
26 | }
27 |
28 | role: {=/wp/page/item :slot
29 | name: "Wikidata item"
30 | description: "Wikidata item"
31 | source: /wp/page
32 | target: /w/item
33 | }
34 |
35 | role: {=/wp/page/category :slot
36 | name: "category"
37 | description: "Wikipedia category"
38 | source: /wp/page
39 | target: /wp/category
40 | multi: 1
41 | }
42 | }
43 |
44 | {=/wp/redirect :schema
45 | name: "Wikipedia redirect"
46 | family: /schema/wikipedia
47 |
48 | role: {=/wp/redirect/pageid :slot
49 | name: "page id"
50 | description: "Wikipedia page id"
51 | source: /wp/redirect
52 | target: int
53 | }
54 |
55 | role: {=/wp/redirect/title :slot
56 | name: "redirect title"
57 | description: "Wikipedia redirect page title"
58 | source: /wp/redirect
59 | target: string
60 | }
61 |
62 | role: {=/wp/redirect/link :slot
63 | name: "redirect link"
64 | description: "Wikipedia redirect link"
65 | source: /wp/redirect
66 | target: /wp/page
67 | }
68 | }
69 |
70 | {=/wp/link :schema
71 | name: "Wikipedia link"
72 | family: /schema/wikipedia
73 | }
74 |
75 | {=/wp/category :schema
76 | name: "Wikipedia category"
77 | family: /schema/wikipedia
78 | }
79 |
80 | {=/schema/wikipedia :schema_family
81 | name: "Wikipedia schemas"
82 |
83 | member_schema: /wp/page
84 | member_schema: /wp/redirect
85 | member_schema: /wp/link
86 | member_schema: /wp/category
87 | }
88 |
89 |
--------------------------------------------------------------------------------
/doc/guide/README.md:
--------------------------------------------------------------------------------
1 | # SLING Guides
2 |
3 | * [SLING installation and building](install.md)
4 | * [CASPAR frame semantics parser](caspar.md)
5 | * [SLING frames](frames.md)
6 | * [SLING Python API](pyapi.md)
7 | * [Myelin neural network JIT compiler](myelin.md)
8 | * [Wikipedia and Wikidata processing](wikiflow.md)
9 |
10 | ## Out-dated guides
11 |
12 | * [Training a SLING parser](training.md)
13 | * [Parsing with SLING](parsing.md)
14 |
15 |
--------------------------------------------------------------------------------
/doc/guide/kb-browser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/doc/guide/kb-browser.png
--------------------------------------------------------------------------------
/doc/report/dev-eval.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/doc/report/dev-eval.pdf
--------------------------------------------------------------------------------
/doc/report/network.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/doc/report/network.pdf
--------------------------------------------------------------------------------
/doc/report/runtime.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/doc/report/runtime.pdf
--------------------------------------------------------------------------------
/python/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | py_library(
4 | name = "flow",
5 | srcs = ["flow.py"],
6 | )
7 |
8 | py_library(
9 | name = "mnist",
10 | srcs = ["mnist.py"],
11 | deps = [":flow"],
12 | )
13 |
14 |
--------------------------------------------------------------------------------
/python/__init__.py:
--------------------------------------------------------------------------------
1 | import sling.pysling as api
2 |
3 | from sling.log import *
4 | from sling.nlp.document import *
5 | from sling.nlp.parser import *
6 |
7 | Store=api.Store
8 | Frame=api.Frame
9 | Array=api.Array
10 |
11 | RecordReader=api.RecordReader
12 | RecordDatabase=api.RecordDatabase
13 | RecordWriter=api.RecordWriter
14 | PhraseTable=api.PhraseTable
15 | Calendar=api.Calendar
16 | Date=api.Date
17 | WikiConverter=api.WikiConverter
18 | FactExtractor=api.FactExtractor
19 | PlausibilityModel=api.PlausibilityModel
20 |
21 | MILLENNIUM=api.MILLENNIUM
22 | CENTURY=api.CENTURY
23 | DECADE=api.DECADE
24 | YEAR=api.YEAR
25 | MONTH=api.MONTH
26 | DAY=api.DAY
27 |
28 | CASE_INVALID=api.CASE_INVALID
29 | CASE_NONE=api.CASE_NONE
30 | CASE_UPPER=api.CASE_UPPER
31 | CASE_LOWER=api.CASE_LOWER
32 | CASE_TITLE=api.CASE_TITLE
33 |
34 |
--------------------------------------------------------------------------------
/python/log.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http:#www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Simple logging."""
16 |
17 | import inspect
18 | import os
19 | import sling.pysling as api
20 |
21 | INFO = 0
22 | WARNING = 1
23 | ERROR = 2
24 | FATAL = 3
25 |
26 | def _log_message(msg, severity):
27 | caller = inspect.stack()[2]
28 | fn = os.path.basename(caller[1])
29 | line = caller[2]
30 | api.log_message(severity, fn, line, msg)
31 |
32 | def info(msg):
33 | _log_message(msg, INFO)
34 |
35 | def warning(msg):
36 | _log_message(msg, WARNING)
37 |
38 | def error(msg):
39 | _log_message(msg, ERROR)
40 |
41 | def fatal(msg):
42 | _log_message(msg, FATAL)
43 |
44 |
--------------------------------------------------------------------------------
/python/myelin/__init__.py:
--------------------------------------------------------------------------------
1 | import sling.pysling as api
2 |
3 | from .builder import *
4 | from .flow import *
5 |
6 | Compiler=api.Compiler
7 |
8 |
--------------------------------------------------------------------------------
/python/nlp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/python/nlp/__init__.py
--------------------------------------------------------------------------------
/python/pysling.so:
--------------------------------------------------------------------------------
1 | ../bazel-bin/sling/pyapi/pysling.so
--------------------------------------------------------------------------------
/python/task/__init__.py:
--------------------------------------------------------------------------------
1 | from sling.task.workflow import *
2 |
3 |
--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | python3 python/run.py $*
4 |
5 |
--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Exit on errors.
4 | set -e
5 |
6 | echo "========================================================================="
7 | echo "Set up SLING development environment"
8 | echo "========================================================================="
9 |
10 | # Install packages.
11 | echo
12 | echo "=== Install SLING dependencies"
13 | PYVER=3.5
14 | PYPKGS="python${PYVER} python${PYVER}-dev python3-pip"
15 | PKGS="pkg-config zip g++ zlib1g-dev unzip lbzip2 ${PYPKGS}"
16 | sudo apt-get install ${PKGS}
17 |
18 | # Install bazel.
19 | BAZELVER=1.0.0
20 | BAZELSH=bazel-${BAZELVER}-installer-linux-x86_64.sh
21 | BAZELREPO=https://github.com/bazelbuild/bazel
22 | BAZELURL=${BAZELREPO}/releases/download/${BAZELVER}/${BAZELSH}
23 |
24 | if [[ $UPGRADE_BAZEL = "1" ]]; then
25 | echo "=== Forcing reinstall of Bazel"
26 | sudo rm $(which bazel)
27 | fi
28 |
29 | if ! which bazel > /dev/null; then
30 | echo
31 | echo "=== Install Bazel build system"
32 | wget -P /tmp ${BAZELURL}
33 | chmod +x /tmp/${BAZELSH}
34 | sudo /tmp/${BAZELSH}
35 | rm /tmp/${BAZELSH}
36 | fi
37 |
38 | # Build SLING.
39 | echo
40 | echo "=== Build SLING"
41 | tools/buildall.sh
42 |
43 | # Install SLING Python API.
44 | echo
45 | echo "=== Set up SLING Python API"
46 | SLINGPKG=/usr/lib/python3/dist-packages/sling
47 |
48 | PIP="sudo -H pip3 --disable-pip-version-check"
49 |
50 | if [[ -L "/usr/lib/python2.7/dist-packages/sling" ]]; then
51 | echo "Removing deprecated SLING Python 2.7 package"
52 | sudo rm /usr/lib/python2.7/dist-packages/sling
53 | fi
54 | if [[ -L "/usr/local/lib/python2.7/dist-packages/sling" ]]; then
55 | echo "Removing deprecated SLING Python 2.7 local package"
56 | sudo rm /usr/local/lib/python2.7/dist-packages/sling
57 | fi
58 |
59 | if [[ $(${PIP} freeze | grep "sling==") ]]; then
60 | echo "Removing existing SLING pip package"
61 | ${PIP} uninstall sling
62 | fi
63 |
64 | if [[ -x "${SLINGPKG}" ]]; then
65 | echo "SLING Python package already installed"
66 | else
67 | echo "Adding link for SLING Python package"
68 | sudo ln -s $(realpath python) ${SLINGPKG}
69 | fi
70 |
71 | # Done.
72 | echo
73 | echo "=== SLING is now set up."
74 |
75 |
--------------------------------------------------------------------------------
/sling/base/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | cc_library(
4 | name = "base",
5 | srcs = [
6 | "flags.cc",
7 | "init.cc",
8 | "logging.cc",
9 | "status.cc",
10 | ],
11 | hdrs = [
12 | "bitcast.h",
13 | "flags.h",
14 | "init.h",
15 | "logging.h",
16 | "macros.h",
17 | "port.h",
18 | "slice.h",
19 | "status.h",
20 | "types.h",
21 | ],
22 | linkopts = [
23 | "-lpthread",
24 | ],
25 | )
26 |
27 | cc_library(
28 | name = "libinit",
29 | srcs = ["libinit.cc"],
30 | deps = [
31 | ":base",
32 | ],
33 | alwayslink = 1,
34 | )
35 |
36 | cc_library(
37 | name = "registry",
38 | srcs = ["registry.cc"],
39 | hdrs = ["registry.h"],
40 | deps = [
41 | ":base",
42 | ],
43 | )
44 |
45 | cc_library(
46 | name = "clock",
47 | srcs = ["clock.cc"],
48 | hdrs = ["clock.h"],
49 | deps = [
50 | ":base",
51 | ],
52 | )
53 |
54 | cc_library(
55 | name = "perf",
56 | srcs = ["perf.cc"],
57 | hdrs = ["perf.h"],
58 | deps = [
59 | ":base",
60 | ],
61 | )
62 |
63 |
--------------------------------------------------------------------------------
/sling/base/bitcast.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_BASE_BITCAST_H_
16 | #define SLING_BASE_BITCAST_H_
17 |
18 | #include
19 |
20 | namespace sling {
21 |
22 | // bit_cast is a template function that implements the equivalent
23 | // of "*reinterpret_cast(&source)".
24 | template
25 | inline Dest bit_cast(const Source &source) {
26 | // Compile time assertion: sizeof(Dest) == sizeof(Source)
27 | // A compile error here means your Dest and Source have different sizes.
28 | typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1];
29 |
30 | Dest dest;
31 | memcpy(&dest, &source, sizeof(dest));
32 | return dest;
33 | }
34 |
35 | } // namespace sling
36 |
37 | #endif // SLING_BASE_BITCAST_H_
38 |
39 |
--------------------------------------------------------------------------------
/sling/base/clock.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_BASE_CLOCK_H_
16 | #define SLING_BASE_CLOCK_H_
17 |
18 | #include "sling/base/types.h"
19 |
20 | namespace sling {
21 |
22 | // Cycle-counting clock for performance measurements.
23 | class Clock {
24 | public:
25 | // TSC timestamp.
26 | typedef int64_t Timestamp;
27 |
28 | // Return timestamp from cycle counter.
29 | static inline Timestamp now() {
30 | uint64_t low, high;
31 | __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
32 | return (high << 32) | low;
33 | }
34 |
35 | // Return clock speed in Hz.
36 | static double hz();
37 |
38 | // Return clock speed in MHz.
39 | static double mhz();
40 |
41 | // Start clock.
42 | void start() { start_ = now(); }
43 |
44 | // Stop clock.
45 | void stop() { end_ = now(); }
46 |
47 | // Return clock cycles elapsed since start.
48 | Timestamp elapsed() const { return now() - start_; }
49 |
50 | // Return clock cycles between start and stop.
51 | Timestamp cycles() const { return end_ - start_; }
52 |
53 | // Return time in seconds.
54 | double secs() const;
55 |
56 | // Return time in milliseconds.
57 | double ms() const;
58 |
59 | // Return time in microseconds.
60 | double us() const;
61 |
62 | // Return time in nanoseconds.
63 | double ns() const;
64 |
65 | private:
66 | Timestamp start_; // start timestamp
67 | Timestamp end_; // end timestamp
68 | };
69 |
70 | } // namespace sling
71 |
72 | #endif // SLING_BASE_CLOCK_H_
73 |
74 |
--------------------------------------------------------------------------------
/sling/base/init.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/base/init.h"
16 |
17 | #include
18 |
19 | #include "sling/base/flags.h"
20 | #include "sling/base/logging.h"
21 | #include "sling/base/types.h"
22 |
23 | namespace sling {
24 |
25 | // Linked list of module initializers.
26 | ModuleInitializer *ModuleInitializer::first = nullptr;
27 | ModuleInitializer *ModuleInitializer::last = nullptr;
28 |
29 | ModuleInitializer::ModuleInitializer(const char *n, Handler h)
30 | : name(n), handler(h) {
31 | if (first == nullptr) first = this;
32 | if (last != nullptr) last->next = this;
33 | last = this;
34 | }
35 |
36 | static void RunModuleInitializers() {
37 | ModuleInitializer *initializer = ModuleInitializer::first;
38 | while (initializer != nullptr) {
39 | VLOG(2) << "Initializing " << initializer->name << " module";
40 | initializer->handler();
41 | initializer = initializer->next;
42 | }
43 | }
44 |
45 | void InitProgram(int *argc, char ***argv) {
46 | // Initialize command line flags.
47 | if (*argc > 0) {
48 | string usage;
49 | usage.append((*argv)[0]);
50 | usage.append(" [OPTIONS]\n");
51 | Flag::SetUsageMessage(usage);
52 | if (Flag::ParseCommandLineFlags(argc, *argv) != 0) exit(1);
53 | }
54 |
55 | // Run module initializers.
56 | RunModuleInitializers();
57 | }
58 |
59 | void InitSharedLibrary() {
60 | // Run module initializers.
61 | RunModuleInitializers();
62 | }
63 |
64 | } // namespace sling
65 |
66 |
--------------------------------------------------------------------------------
/sling/base/init.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_BASE_INIT_H_
16 | #define SLING_BASE_INIT_H_
17 |
18 | namespace sling {
19 |
20 | // Module initializers are called in order of registration at startup.
21 | struct ModuleInitializer {
22 | typedef void (*Handler)(void);
23 |
24 | // Add module initializer.
25 | ModuleInitializer(const char *n, Handler h);
26 |
27 | // Module name.
28 | const char *name;
29 |
30 | // Handler for initializing module.
31 | Handler handler;
32 |
33 | // Next initializer.
34 | ModuleInitializer *next;
35 |
36 | // Linked list of module initializers.
37 | static ModuleInitializer *first;
38 | static ModuleInitializer *last;
39 | };
40 |
41 | #define REGISTER_INITIALIZER(name, body) \
42 | namespace { \
43 | static void init_module_##name () { body; } \
44 | __attribute__((init_priority(1000))) \
45 | sling::ModuleInitializer initializer_module_##name \
46 | (#name, init_module_##name); \
47 | }
48 |
49 | // Run module initializers for program.
50 | void InitProgram(int *argc, char **argv[]);
51 |
52 | // Run module initializers for shared library.
53 | void InitSharedLibrary();
54 |
55 | } // namespace sling
56 |
57 | #endif // SLING_BASE_INIT_H_
58 |
59 |
--------------------------------------------------------------------------------
/sling/base/libinit.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/base/init.h"
16 | #include "sling/base/logging.h"
17 |
18 | namespace sling {
19 |
20 | // Class for initializing program modules.
21 | class LibraryInitializer {
22 | public:
23 | LibraryInitializer() {
24 | InitSharedLibrary();
25 | };
26 | };
27 |
28 | // The initialization priority should be set higher than the priority of the
29 | // module initializers in init.h.
30 | static LibraryInitializer init __attribute__((init_priority(2000)));
31 |
32 | } // namespace sling
33 |
34 |
--------------------------------------------------------------------------------
/sling/base/registry.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/base/registry.h"
16 |
17 | namespace sling {
18 |
19 | // Global list of all component registries.
20 | RegistryMetadata *RegistryMetadata::global_registry_list = nullptr;
21 |
22 | void RegistryMetadata::GetComponents(
23 | std::vector *components) const {
24 | components->clear();
25 | ComponentMetadata *meta = *components_;
26 | while (meta != nullptr) {
27 | components->push_back(meta);
28 | meta = meta->link();
29 | }
30 | }
31 |
32 | const ComponentMetadata *RegistryMetadata::GetComponent(
33 | const string &name) const {
34 | ComponentMetadata *meta = *components_;
35 | while (meta != nullptr) {
36 | if (name == meta->name()) return meta;
37 | meta = meta->link();
38 | }
39 | return nullptr;
40 | }
41 |
42 | void RegistryMetadata::Register(RegistryMetadata *registry) {
43 | registry->set_link(global_registry_list);
44 | global_registry_list = registry;
45 | }
46 |
47 | void RegistryMetadata::GetRegistries(
48 | std::vector *registries) {
49 | registries->clear();
50 | RegistryMetadata *meta = global_registry_list;
51 | while (meta != nullptr) {
52 | registries->push_back(meta);
53 | meta = meta->next();
54 | }
55 | }
56 |
57 | const RegistryMetadata *RegistryMetadata::GetRegistry(const string &name) {
58 | RegistryMetadata *meta = global_registry_list;
59 | while (meta != nullptr) {
60 | if (name == meta->name()) return meta;
61 | meta = meta->next();
62 | }
63 | return nullptr;
64 | }
65 |
66 | } // namespace sling
67 |
68 |
--------------------------------------------------------------------------------
/sling/file/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | # File system interface.
4 |
5 | cc_library(
6 | name = "file",
7 | srcs = ["file.cc"],
8 | hdrs = ["file.h"],
9 | deps = [
10 | "//sling/base",
11 | "//sling/base:registry",
12 | ],
13 | copts = [
14 | "-pthread",
15 | ],
16 | )
17 |
18 | cc_library(
19 | name = "posix",
20 | srcs = ["posix.cc"],
21 | hdrs = ["posix.h"],
22 | deps = [
23 | ":file",
24 | "//sling/base",
25 | ],
26 | alwayslink = 1,
27 | )
28 |
29 | cc_library(
30 | name = "embed",
31 | srcs = ["embed.cc"],
32 | hdrs = ["embed.h"],
33 | deps = [
34 | ":file",
35 | "//sling/base",
36 | ],
37 | alwayslink = 1,
38 | )
39 |
40 | # File utility libraries.
41 |
42 | cc_library(
43 | name = "recordio",
44 | srcs = ["recordio.cc"],
45 | hdrs = ["recordio.h"],
46 | deps = [
47 | ":file",
48 | "//sling/base",
49 | "//sling/util:fingerprint",
50 | "//sling/util:snappy",
51 | "//sling/util:varint",
52 | ],
53 | )
54 |
55 | cc_library(
56 | name = "textmap",
57 | srcs = ["textmap.cc"],
58 | hdrs = ["textmap.h"],
59 | deps = [
60 | ":file",
61 | "//sling/base",
62 | "//sling/string:text",
63 | ],
64 | )
65 |
66 | cc_library(
67 | name = "repository",
68 | srcs = ["repository.cc"],
69 | hdrs = ["repository.h"],
70 | deps = [
71 | ":file",
72 | "//sling/base",
73 | ],
74 | )
75 |
76 |
--------------------------------------------------------------------------------
/sling/file/embed.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_FILE_EMBED_H_
16 | #define SLING_FILE_EMBED_H_
17 |
18 | #include
19 | #include
20 |
21 | #include "sling/base/types.h"
22 |
23 | namespace sling {
24 |
25 | // File information for embedded files created with the embed-data tool.
26 | struct EmbeddedFile {
27 | const char *name; // file name
28 | uint64_t size; // file size
29 | const char *data; // file content
30 | uint64_t mtime; // file modification time
31 | };
32 |
33 | // Find embedded file.
34 | const EmbeddedFile *GetEmbeddedFile(const string &name);
35 |
36 | // Return contents of embedded file.
37 | const char *GetEmbeddedFileContent(const string &name);
38 |
39 | } // namespace sling
40 |
41 | #endif // SLING_FILE_EMBED_H_
42 |
43 |
--------------------------------------------------------------------------------
/sling/file/posix.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_FILE_POSIX_H_
16 | #define SLING_FILE_POSIX_H_
17 |
18 | #include
19 |
20 | #include "sling/base/types.h"
21 | #include "sling/file/file.h"
22 |
23 | namespace sling {
24 |
25 | // Create file from POSIX file descriptor. The returned file takes ownership
26 | // of the file descriptor.
27 | File *NewFileFromDescriptor(const string &name, int fd);
28 |
29 | // Create file for standard output.
30 | File *NewStdoutFile();
31 |
32 | } // namespace sling
33 |
34 | #endif // SLING_FILE_POSIX_H_
35 |
36 |
--------------------------------------------------------------------------------
/sling/frame/snapshot.h:
--------------------------------------------------------------------------------
1 | // Copyright 2018 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_FRAME_SNAPSHOT_H_
16 | #define SLING_FRAME_SNAPSHOT_H_
17 |
18 | #include
19 |
20 | #include "sling/base/status.h"
21 | #include "sling/base/types.h"
22 | #include "sling/frame/store.h"
23 |
24 | namespace sling {
25 |
26 | // Global frame stores can be snapshot and saved to .snap files. These can then
27 | // be loaded into a new empty global store. For large stores, this is faster
28 | // than reading the frame store in encoded format.
29 | class Snapshot {
30 | public:
31 | // Filename for snapshot.
32 | static string Filename(const string &filename);
33 |
34 | // Check if there is a valid snapshot file for the store.
35 | static bool Valid(const string &filename);
36 |
37 | // Read snapshot into empty global store.
38 | static Status Read(Store *store, const string &filename);
39 |
40 | // Write store to snapshot file.
41 | static Status Write(Store *store, const string &filename);
42 |
43 | private:
44 | // Current magic and version for snapshots.
45 | static const int MAGIC = 0x50414e53;
46 | static const int VERSION = 2;
47 |
48 | // Snapshot file header.
49 | struct Header {
50 | int magic; // magic number for identifying snapshot file
51 | int version; // snapshot file format version
52 | int heaps; // number of heaps in snapshot
53 | int handles; // size of handle table
54 | Word symtab; // symbol table handle
55 | int symbols; // number of symbols in symbol table
56 | int buckets; // number of hash buckets in the symbol table
57 | int symheap; // heap for symbol table (-1 means no separate heap)
58 | };
59 | };
60 |
61 | } // namespace sling
62 |
63 | #endif // SLING_FRAME_SNAPSHOT_H_
64 |
65 |
--------------------------------------------------------------------------------
/sling/frame/wire.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_FRAME_WIRE_H_
16 | #define SLING_FRAME_WIRE_H_
17 |
18 | namespace sling {
19 |
20 | // Tag numbers for encoding objects in binary wire format. The wire type is
21 | // three bits, but the special tags can be up to 64-3=61 bits.
22 | enum WireType {
23 | WIRE_REF = 0, // reference to previous object (argument is refnum)
24 | WIRE_FRAME = 1, // frame (argument is the number of slots)
25 | WIRE_STRING = 2, // string (argument is the string length in bytes)
26 | WIRE_SYMBOL = 3, // unbound symbol (argument is the symbol name length)
27 | WIRE_LINK = 4, // bound symbol (argument is the symbol name length)
28 | WIRE_INTEGER = 5, // integer (argument is the integer value)
29 | WIRE_FLOAT = 6, // floating-point number (argument is the float value)
30 | WIRE_SPECIAL = 7, // special values
31 | };
32 |
33 | enum WireSpecial {
34 | WIRE_NIL = 1, // "nil" value
35 | WIRE_ID = 2, // "id" value
36 | WIRE_ISA = 3, // "isa" value
37 | WIRE_IS = 4, // "is" value
38 | WIRE_ARRAY = 5, // array, followed by array size and the arguments
39 | WIRE_INDEX = 6, // index value, followed by varint32 encoded integer
40 | WIRE_RESOLVE = 7, // resolve link, followed by slots and replacement index
41 | };
42 |
43 | // The binary marker (i.e. a nul character) is used for prefixing serialized
44 | // SLING objects to indicate that they are binary encoded. The textual encoding
45 | // will never contain a nul character. In binary encoding, a nul character is
46 | // decoded as REF(0). This will never be the first tag in a binary encoding
47 | // since initially there are no references to refer to.
48 | enum EncodingMarker {
49 | WIRE_BINARY_MARKER = 0,
50 | };
51 |
52 | } // namespace sling
53 |
54 | #endif // SLING_FRAME_WIRE_H_
55 |
56 |
--------------------------------------------------------------------------------
/sling/frame/xml.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_FRAME_XML_H_
16 | #define SLING_FRAME_XML_H_
17 |
18 | #include
19 |
20 | #include "sling/frame/object.h"
21 | #include "sling/frame/store.h"
22 | #include "sling/stream/input.h"
23 | #include "sling/web/xml-parser.h"
24 |
25 | namespace sling {
26 |
27 | // The XML reader parses XML input and converts it into frame format. Each XML
28 | // element is converted into a frame slot where the name is the XML tag name and
29 | // the value is the content of the XML element. XML attributes and child
30 | // elements are converted into slots in a sub-frame. If the XML element only
31 | // contains text, the value is just a string with the text.
32 | class XMLReader : public XMLParser {
33 | public:
34 | // Initializes XML reader with store and input.
35 | XMLReader(Store *store, Input *input)
36 | : store_(store), input_(input), slots_(store) {}
37 |
38 | // Parse XML input and return frame with content or nil on errors.
39 | Frame Read();
40 |
41 | private:
42 | // Callbacks from XML parser.
43 | bool StartElement(const XMLElement &element) override;
44 | bool EndElement(const char *name) override;
45 | bool Text(const char *str) override;
46 |
47 | // Object store.
48 | Store *store_;
49 |
50 | // Input with XML.
51 | Input *input_;
52 |
53 | // Stack with slots for the elements currently being parsed.
54 | Slots slots_;
55 |
56 | // Stack which marks the first slot for the elements being parsed.
57 | std::vector marks_;
58 | };
59 |
60 | } // namespace sling
61 |
62 | #endif // SLING_FRAME_XML_H_
63 |
64 |
--------------------------------------------------------------------------------
/sling/http/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | cc_library(
4 | name = "http-utils",
5 | srcs = ["http-utils.cc"],
6 | hdrs = ["http-utils.h"],
7 | deps = [
8 | "//sling/base",
9 | "//sling/string:ctype",
10 | ],
11 | )
12 |
13 | cc_library(
14 | name = "http-server",
15 | srcs = ["http-server.cc"],
16 | hdrs = ["http-server.h"],
17 | deps = [
18 | ":http-utils",
19 | "//sling/base",
20 | "//sling/file",
21 | "//sling/string:numbers",
22 | "//sling/util:mutex",
23 | "//sling/util:thread",
24 | ],
25 | )
26 |
27 | cc_library(
28 | name = "http-stream",
29 | srcs = ["http-stream.cc"],
30 | hdrs = ["http-stream.h"],
31 | deps = [
32 | ":http-server",
33 | "//sling/stream",
34 | ],
35 | )
36 |
37 | cc_library(
38 | name = "static-content",
39 | srcs = ["static-content.cc"],
40 | hdrs = ["static-content.h"],
41 | deps = [
42 | ":http-server",
43 | "//sling/base",
44 | "//sling/file",
45 | ],
46 | )
47 |
48 | cc_library(
49 | name = "web-service",
50 | srcs = ["web-service.cc"],
51 | hdrs = ["web-service.h"],
52 | deps = [
53 | ":http-server",
54 | ":http-stream",
55 | "//sling/base",
56 | "//sling/frame:decoder",
57 | "//sling/frame:encoder",
58 | "//sling/frame:json",
59 | "//sling/frame:printer",
60 | "//sling/frame:object",
61 | "//sling/frame:reader",
62 | "//sling/frame:store",
63 | "//sling/string:text",
64 | ],
65 | )
66 |
67 |
--------------------------------------------------------------------------------
/sling/http/http-stream.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/http/http-stream.h"
16 |
17 | #include "sling/http/http-server.h"
18 | #include "sling/stream/stream.h"
19 |
20 | namespace sling {
21 |
22 | HTTPInputStream::HTTPInputStream(HTTPBuffer *buffer) : buffer_(buffer) {}
23 |
24 | bool HTTPInputStream::Next(const void **data, int *size) {
25 | int n = buffer_->size();
26 | if (n > 0) {
27 | *data = buffer_->start;
28 | *size = n;
29 | buffer_->start = buffer_->end;
30 | return true;
31 | } else {
32 | return false;
33 | }
34 | }
35 |
36 | void HTTPInputStream::BackUp(int count) {
37 | buffer_->start -= count;
38 | }
39 |
40 | bool HTTPInputStream::Skip(int count) {
41 | int left = buffer_->size();
42 | if (count > left) {
43 | buffer_->start = buffer_->end;
44 | return false;
45 | } else {
46 | buffer_->start += count;
47 | return true;
48 | }
49 | }
50 |
51 | int64 HTTPInputStream::ByteCount() const {
52 | return buffer_->start - buffer_->floor;
53 | }
54 |
55 | HTTPOutputStream::HTTPOutputStream(HTTPBuffer *buffer, int block_size)
56 | : buffer_(buffer), block_size_(block_size) {}
57 |
58 | bool HTTPOutputStream::Next(void **data, int *size) {
59 | if (buffer_->full()) buffer_->ensure(block_size_);
60 |
61 | int n = buffer_->remaining();
62 | if (n > block_size_) n = block_size_;
63 | *data = buffer_->end;
64 | *size = n;
65 | buffer_->end += n;
66 | return true;
67 | }
68 |
69 | void HTTPOutputStream::BackUp(int count) {
70 | buffer_->end -= count;
71 | }
72 |
73 | int64 HTTPOutputStream::ByteCount() const {
74 | return buffer_->size();
75 | }
76 |
77 | } // namespace sling
78 |
79 |
--------------------------------------------------------------------------------
/sling/http/http-stream.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_HTTP_HTTP_STREAM_H_
16 | #define SLING_HTTP_HTTP_STREAM_H_
17 |
18 | #include "sling/http/http-server.h"
19 | #include "sling/stream/stream.h"
20 |
21 | namespace sling {
22 |
23 | // An InputStream for reading from a HTTP buffer.
24 | class HTTPInputStream : public InputStream {
25 | public:
26 | HTTPInputStream(HTTPBuffer *buffer);
27 |
28 | // InputStream interface.
29 | bool Next(const void **data, int *size) override;
30 | void BackUp(int count) override;
31 | bool Skip(int count) override;
32 | int64 ByteCount() const override;
33 |
34 | private:
35 | HTTPBuffer *buffer_;
36 | };
37 |
38 | // An OutputStream backed by a HTTP buffer.
39 | class HTTPOutputStream : public OutputStream {
40 | public:
41 | HTTPOutputStream(HTTPBuffer *buffer, int block_size = 8192);
42 |
43 | // OutputStream interface.
44 | bool Next(void **data, int *size) override;
45 | void BackUp(int count) override;
46 | int64 ByteCount() const override;
47 |
48 | private:
49 | HTTPBuffer *buffer_;
50 | int block_size_;
51 | };
52 |
53 | } // namespace sling
54 |
55 | #endif // SLING_HTTP_HTTP_STREAM_H_
56 |
57 |
--------------------------------------------------------------------------------
/sling/http/static-content.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_HTTP_STATIC_CONTENT_H_
16 | #define SLING_HTTP_STATIC_CONTENT_H_
17 |
18 | #include
19 |
20 | #include "sling/base/types.h"
21 | #include "sling/http/http-server.h"
22 |
23 | namespace sling {
24 |
25 | // HTTP handler for serving static web content.
26 | class StaticContent {
27 | public:
28 | // Initialize handler for serving files from a directory.
29 | StaticContent(const string &url, const string &path);
30 |
31 | // Register handler with HTTP server.
32 | void Register(HTTPServer *http);
33 |
34 | // Serve static web content from directory.
35 | void HandleFile(HTTPRequest *request, HTTPResponse *response);
36 |
37 | private:
38 | // URL path for static content.
39 | string url_;
40 |
41 | // Directory with static web content to be served.
42 | string dir_;
43 | };
44 |
45 | } // namespace sling
46 |
47 | #endif // SLING_HTTP_STATIC_CONTENT_H_
48 |
49 |
--------------------------------------------------------------------------------
/sling/myelin/compiler.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_MYELIN_COMPILER_H_
16 | #define SLING_MYELIN_COMPILER_H_
17 |
18 | #include "sling/myelin/compute.h"
19 | #include "sling/myelin/flow.h"
20 |
21 | namespace sling {
22 | namespace myelin {
23 |
24 | // Myelin neural network JIT compiler for compiling a flow to a network.
25 | class Compiler {
26 | public:
27 | // Initialize compiler.
28 | Compiler();
29 | ~Compiler();
30 |
31 | // Compile flow to network.
32 | void Compile(Flow *flow, Network *net);
33 |
34 | // Library with transformations and kernels for compilation.
35 | Library *library() { return library_; }
36 |
37 | // Custom runtime.
38 | Runtime *runtime() const { return runtime_; }
39 | void set_runtime(Runtime *runtime) { runtime_ = runtime; }
40 |
41 | // Perf module FLOP counter support.
42 | bool perf_flopctr() const { return perf_flopctr_; }
43 | void set_perf_flopctr(bool enabled) { perf_flopctr_ = enabled; }
44 |
45 | private:
46 | // Write graph to output as DOT and/or SVG file.
47 | static void WriteGraph(const Flow &flow,
48 | const string &dot,
49 | const string &svg);
50 |
51 | // Compiler library with kernels, transformations, etc.
52 | Library *library_ = nullptr;
53 |
54 | // Custom runtime for generated network.
55 | Runtime *runtime_ = nullptr;
56 |
57 | // Enable perf FLOP counter.
58 | bool perf_flopctr_ = true;
59 | };
60 |
61 | // Enable/disable CPU features for compiler.
62 | void SetCPUFeatures(const string &features);
63 |
64 | } // namespace myelin
65 | } // namespace sling
66 |
67 | #endif // SLING_MYELIN_COMPILE_H_
68 |
69 |
--------------------------------------------------------------------------------
/sling/myelin/cuda/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | cc_library(
4 | name = "cuda-api",
5 | srcs = ["cuda-api.cc"],
6 | hdrs = ["cuda-api.h"],
7 | deps = [
8 | "//sling/base",
9 | ],
10 | linkopts = [
11 | "-ldl",
12 | ],
13 | )
14 |
15 | cc_library(
16 | name = "cuda",
17 | srcs = ["cuda.cc"],
18 | hdrs = ["cuda.h"],
19 | deps = [
20 | ":cuda-api",
21 | "//sling/base",
22 | ],
23 | )
24 |
25 | cc_library(
26 | name = "cuda-runtime",
27 | srcs = ["cuda-runtime.cc"],
28 | hdrs = ["cuda-runtime.h"],
29 | deps = [
30 | ":cuda",
31 | "//sling/base",
32 | "//sling/myelin:compute",
33 | ],
34 | )
35 |
36 | cc_library(
37 | name = "cuda-kernel",
38 | srcs = ["cuda-kernel.cc"],
39 | hdrs = ["cuda-kernel.h"],
40 | deps = [
41 | ":cuda",
42 | ":cuda-runtime",
43 | "//sling/base",
44 | "//sling/myelin:compute",
45 | ],
46 | )
47 |
48 |
--------------------------------------------------------------------------------
/sling/myelin/generator/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | cc_library(
4 | name = "index",
5 | srcs = ["index.cc"],
6 | hdrs = ["index.h"],
7 | deps = [
8 | "//sling/base",
9 | "//sling/myelin:compute",
10 | "//sling/myelin:express",
11 | ],
12 | )
13 |
14 | cc_library(
15 | name = "elementwise",
16 | srcs = ["elementwise.cc"],
17 | hdrs = ["elementwise.h"],
18 | deps = [
19 | ":index",
20 | ],
21 | )
22 |
23 | cc_library(
24 | name = "expression",
25 | srcs = [
26 | "expression.cc",
27 | "scalar-flt-sse.cc",
28 | "scalar-flt-avx.cc",
29 | "vector-flt-sse.cc",
30 | "vector-flt-avx128.cc",
31 | "vector-flt-avx256.cc",
32 | "vector-flt-avx512.cc",
33 | "scalar-int.cc",
34 | "vector-int-sse.cc",
35 | "vector-int-avx128.cc",
36 | "vector-int-avx256.cc",
37 | ],
38 | hdrs = ["expression.h"],
39 | deps = [
40 | ":index",
41 | "//sling/base",
42 | "//sling/myelin:compute",
43 | "//sling/myelin:express",
44 | ],
45 | )
46 |
47 |
--------------------------------------------------------------------------------
/sling/myelin/kernel/arithmetic.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_MYELIN_KERNEL_ARITHMETIC_H_
16 | #define SLING_MYELIN_KERNEL_ARITHMETIC_H_
17 |
18 | #include "sling/myelin/compute.h"
19 | #include "sling/myelin/express.h"
20 |
21 | namespace sling {
22 | namespace myelin {
23 |
24 | // Register arithmetic library.
25 | void RegisterArithmeticLibrary(Library *library);
26 |
27 | // Register arithmetic transforms.
28 | void RegisterArithmeticTransforms(Library *library);
29 |
30 | // Initialize expression for step.
31 | void InitExpression(const Step *step, Express *expr);
32 |
33 | } // namespace myelin
34 | } // namespace sling
35 |
36 | #endif // SLING_MYELIN_KERNEL_ARITHMETIC_H_
37 |
38 |
--------------------------------------------------------------------------------
/sling/myelin/kernel/avx.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/myelin/kernel/avx.h"
16 |
17 | #include "sling/myelin/compute.h"
18 |
19 | namespace sling {
20 | namespace myelin {
21 |
22 | // avx-math.cc
23 | void RegisterAVXMath(Library *library);
24 |
25 | // avx-matmul.cc
26 | void RegisterAVXMatMul(Library *library);
27 |
28 | // avx-operators.cc
29 | void RegisterAVXOperators(Library *library);
30 |
31 | // simd-matmul.cc
32 | void RegisterSIMDMatMulLibrary(Library *library);
33 |
34 | // Register AVX library.
35 | void RegisterAVXLibrary(Library *library) {
36 | RegisterAVXMath(library);
37 | RegisterSIMDMatMulLibrary(library);
38 | RegisterAVXMatMul(library);
39 | RegisterAVXOperators(library);
40 | }
41 |
42 | } // namespace myelin
43 | } // namespace sling
44 |
45 |
--------------------------------------------------------------------------------
/sling/myelin/kernel/avx.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_MYELIN_KERNEL_AVX_H_
16 | #define SLING_MYELIN_KERNEL_AVX_H_
17 |
18 | #include "sling/myelin/compute.h"
19 |
20 | namespace sling {
21 | namespace myelin {
22 |
23 | // AVX vectors.
24 | typedef float FloatVec8[8] __attribute__ ((aligned (32)));
25 | #define CONST8(x) {x, x, x, x, x, x, x, x}
26 |
27 | // Register AVX library.
28 | void RegisterAVXLibrary(Library *library);
29 |
30 | } // namespace myelin
31 | } // namespace sling
32 |
33 | #endif // SLING_MYELIN_KERNEL_AVX_H_
34 |
35 |
--------------------------------------------------------------------------------
/sling/myelin/kernel/cuda.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/myelin/compute.h"
16 |
17 | namespace sling {
18 | namespace myelin {
19 |
20 | // cuda-matmul.cc
21 | void RegisterCUDAMatMulLibrary(Library *library);
22 |
23 | // cublas-matmul.cc
24 | void RegisterCUBLASMatMulLibrary(Library *library);
25 |
26 | // cuda-arithmetic.cc
27 | void RegisterCUDAArithmeticLibrary(Library *library);
28 |
29 | // cuda-array.cc
30 | void RegisterCUDAArrayLibrary(Library *library);
31 |
32 | // Register CUDA kernels.
33 | void RegisterCUDALibrary(Library *library) {
34 | RegisterCUDAMatMulLibrary(library);
35 | RegisterCUBLASMatMulLibrary(library);
36 | RegisterCUDAArithmeticLibrary(library);
37 | RegisterCUDAArrayLibrary(library);
38 | }
39 |
40 | } // namespace myelin
41 | } // namespace sling
42 |
43 |
--------------------------------------------------------------------------------
/sling/myelin/kernel/cuda.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_MYELIN_KERNEL_CUDA_H_
16 | #define SLING_MYELIN_KERNEL_CUDA_H_
17 |
18 | #include "sling/myelin/compute.h"
19 |
20 | namespace sling {
21 | namespace myelin {
22 |
23 | // Register CUDA kernels.
24 | void RegisterCUDALibrary(Library *library);
25 |
26 | } // namespace myelin
27 | } // namespace sling
28 |
29 | #endif // SLING_MYELIN_KERNEL_CUDA_H_
30 |
31 |
--------------------------------------------------------------------------------
/sling/myelin/kernel/dragnn.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_MYELIN_KERNEL_DRAGNN_H_
16 | #define SLING_MYELIN_KERNEL_DRAGNN_H_
17 |
18 | #include "sling/myelin/compute.h"
19 |
20 | namespace sling {
21 | namespace myelin {
22 |
23 | // Register Dragnn library.
24 | void RegisterDragnnLibrary(Library *library);
25 |
26 | } // namespace myelin
27 | } // namespace sling
28 |
29 | #endif // SLING_MYELIN_KERNEL_DRAGNN_H_
30 |
31 |
--------------------------------------------------------------------------------
/sling/myelin/kernel/generic.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_MYELIN_KERNEL_GENERIC_H_
16 | #define SLING_MYELIN_KERNEL_GENERIC_H_
17 |
18 | #include "sling/myelin/compute.h"
19 |
20 | namespace sling {
21 | namespace myelin {
22 |
23 | // Register generic transforms.
24 | void RegisterGenericTransforms(Library *library);
25 |
26 | // Register generic library.
27 | void RegisterGenericLibrary(Library *library);
28 |
29 | } // namespace myelin
30 | } // namespace sling
31 |
32 | #endif // SLING_MYELIN_KERNEL_GENERIC_H_
33 |
34 |
--------------------------------------------------------------------------------
/sling/myelin/kernel/gradients.h:
--------------------------------------------------------------------------------
1 | // Copyright 2018 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_MYELIN_KERNEL_GRADIENTS_H_
16 | #define SLING_MYELIN_KERNEL_GRADIENTS_H_
17 |
18 | #include "sling/myelin/gradient.h"
19 | #include "sling/myelin/flow.h"
20 |
21 | namespace sling {
22 | namespace myelin {
23 |
24 | // Register gradient functions for standard ops.
25 | void RegisterStandardGradients();
26 |
27 | } // namespace myelin
28 | } // namespace sling
29 |
30 | #endif // SLING_MYELIN_KERNEL_GRADIENTS_H_
31 |
32 |
--------------------------------------------------------------------------------
/sling/myelin/kernel/mkl.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_MYELIN_KERNEL_MKL_H_
16 | #define SLING_MYELIN_KERNEL_MKL_H_
17 |
18 | #include "sling/myelin/compute.h"
19 |
20 | namespace sling {
21 | namespace myelin {
22 |
23 | // Register MKL library.
24 | void RegisterMKLLibrary(Library *library);
25 |
26 | } // namespace myelin
27 | } // namespace sling
28 |
29 | #endif // SLING_MYELIN_KERNEL_MKL_H_
30 |
31 |
--------------------------------------------------------------------------------
/sling/myelin/kernel/precompute.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_MYELIN_KERNEL_PRECOMPUTE_H_
16 | #define SLING_MYELIN_KERNEL_PRECOMPUTE_H_
17 |
18 | #include "sling/myelin/compute.h"
19 |
20 | namespace sling {
21 | namespace myelin {
22 |
23 | // Register precompute library.
24 | void RegisterPrecomputeLibrary(Library *library);
25 |
26 | } // namespace myelin
27 | } // namespace sling
28 |
29 | #endif // SLING_MYELIN_KERNEL_PRECOMPUTE_H_
30 |
31 |
--------------------------------------------------------------------------------
/sling/myelin/kernel/sse.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/myelin/kernel/sse.h"
16 |
17 | #include "sling/myelin/compute.h"
18 |
19 | namespace sling {
20 | namespace myelin {
21 |
22 | // sse-matmul.cc
23 | void RegisterSSEMatMul(Library *library);
24 |
25 | // Register SSE library.
26 | void RegisterSSELibrary(Library *library) {
27 | RegisterSSEMatMul(library);
28 | }
29 |
30 | } // namespace myelin
31 | } // namespace sling
32 |
33 |
--------------------------------------------------------------------------------
/sling/myelin/kernel/sse.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_MYELIN_KERNEL_SSE_H_
16 | #define SLING_MYELIN_KERNEL_SSE_H_
17 |
18 | #include "sling/myelin/compute.h"
19 |
20 | namespace sling {
21 | namespace myelin {
22 |
23 | // SSE vectors.
24 | typedef float FloatVec4[8] __attribute__ ((aligned (16)));
25 | #define CONST4(x) {x, x, x, x}
26 |
27 | // Register SSE library.
28 | void RegisterSSELibrary(Library *library);
29 |
30 | } // namespace myelin
31 | } // namespace sling
32 |
33 | #endif // SLING_MYELIN_KERNEL_SSE_H_
34 |
35 |
--------------------------------------------------------------------------------
/sling/myelin/kernel/tensorflow.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/myelin/kernel/tensorflow.h"
16 |
17 | #include
18 |
19 | #include "sling/myelin/compute.h"
20 | #include "sling/myelin/kernel/arithmetic.h"
21 | #include "sling/myelin/kernel/avx.h"
22 | #include "sling/myelin/kernel/generic.h"
23 | #include "sling/myelin/kernel/gradients.h"
24 | #include "sling/myelin/kernel/sse.h"
25 | #include "sling/myelin/kernel/precompute.h"
26 |
27 | namespace sling {
28 | namespace myelin {
29 |
30 | static std::once_flag gradients_initialized;
31 |
32 | // Register Tensorflow ops.
33 | void RegisterTensorflowLibrary(Library *library) {
34 | RegisterArithmeticTransforms(library);
35 | RegisterGenericLibrary(library);
36 | RegisterSSELibrary(library);
37 | RegisterAVXLibrary(library);
38 | RegisterArithmeticLibrary(library);
39 | RegisterPrecomputeLibrary(library);
40 | RegisterGenericTransforms(library);
41 |
42 | std::call_once(gradients_initialized, RegisterStandardGradients);
43 | }
44 |
45 | } // namespace myelin
46 | } // namespace sling
47 |
48 |
--------------------------------------------------------------------------------
/sling/myelin/kernel/tensorflow.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_MYELIN_KERNEL_TENSORFLOW_H_
16 | #define SLING_MYELIN_KERNEL_TENSORFLOW_H_
17 |
18 | #include "sling/myelin/compute.h"
19 |
20 | namespace sling {
21 | namespace myelin {
22 |
23 | // Register Tensorflow library.
24 | void RegisterTensorflowLibrary(Library *library);
25 |
26 | } // namespace myelin
27 | } // namespace sling
28 |
29 | #endif // SLING_MYELIN_KERNEL_TENSORFLOW_H_
30 |
31 |
--------------------------------------------------------------------------------
/sling/myelin/multi-process.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_MYELIN_MULTI_PROCESS_H_
16 | #define SLING_MYELIN_MULTI_PROCESS_H_
17 |
18 | #include "sling/myelin/compute.h"
19 |
20 | #include
21 | #include
22 | #include
23 |
24 | namespace sling {
25 | namespace myelin {
26 |
27 | class Worker;
28 |
29 | // Myelin runtime for multi-processor execution.
30 | class MultiProcessorRuntime : public Runtime {
31 | public:
32 | ~MultiProcessorRuntime();
33 | string Description() override { return "Multi-processor"; }
34 |
35 | // Instance data allocation.
36 | void AllocateInstance(Instance *instance) override;
37 | void FreeInstance(Instance *instance) override;
38 | void ClearInstance(Instance *instance) override;
39 |
40 | // Channel allocation.
41 | char *AllocateChannel(char *data,
42 | size_t old_size,
43 | size_t new_size,
44 | size_t alignment,
45 | Placement placement) override;
46 | void ClearChannel(char *data, size_t pos,
47 | size_t size,
48 | Placement placement) override;
49 | void FreeChannel(char *data, Placement placement) override;
50 |
51 | // Multi-processor runtime support.
52 | bool SupportsAsync() override { return true; }
53 | TaskFunc StartTaskFunc() override;
54 | TaskFunc WaitTaskFunc() override;
55 |
56 | private:
57 | // Mutex for synchronizing access to worker pool.
58 | std::mutex mu_;
59 |
60 | // Worker pool.
61 | std::vector workers_;
62 | };
63 |
64 | } // namespace myelin
65 | } // namespace sling
66 |
67 | #endif // SLING_MYELIN_MULTI_PROCESS_H_
68 |
69 |
--------------------------------------------------------------------------------
/sling/myelin/tests/runall.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright 2018 Google Inc.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http:#www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Run all Myelin tests.
18 |
19 | TESTPGM="python3 sling/myelin/tests/opcheck.py"
20 | EXTRA=$@
21 |
22 | # Determine CPU feature support.
23 | AVX512=$(grep avx512 /proc/cpuinfo)
24 | FMA=$(grep fma /proc/cpuinfo)
25 | AVX2=$(grep avx2 /proc/cpuinfo)
26 | AVX=$(grep avx /proc/cpuinfo)
27 |
28 | # Run all CPU tests for data type.
29 | testcpu() {
30 | DT=$1
31 | echo "Test data type $DT"
32 | $TESTPGM --dt $DT ${EXTRA}
33 |
34 | if [[ $AVX512 ]]; then
35 | echo "Test data type $DT without AVX512"
36 | $TESTPGM --dt $DT --cpu=-avx512 ${EXTRA}
37 | fi
38 | if [[ $FMA ]]; then
39 | echo "Test data type $DT without FMA3"
40 | $TESTPGM --dt $DT --cpu=-avx512-fma3 ${EXTRA}
41 | fi
42 | if [[ $AVX2 ]]; then
43 | echo "Test data type $DT without AVX2"
44 | $TESTPGM --dt $DT --cpu=-avx512-avx2 ${EXTRA}
45 | if [[ $FMA ]]; then
46 | echo "Test data type $DT without AVX2 and FMA3"
47 | $TESTPGM --dt $DT --cpu=-avx512-fma3-avx2 ${EXTRA}
48 | fi
49 | fi
50 | if [[ $AVX ]]; then
51 | echo "Test data type $DT without AVX"
52 | $TESTPGM --dt $DT --cpu=-avx512-fma3-avx2-avx ${EXTRA}
53 | fi
54 | }
55 |
56 | # Run all GPU tests for data type.
57 | testgpu() {
58 | DT=$1
59 | echo "Test data type $DT on GPU"
60 | $TESTPGM --gpu --dt $DT ${EXTRA}
61 | }
62 |
63 | # Stop on errors.
64 | set -e
65 |
66 | # Test float types on CPU.
67 | testcpu float32
68 | testcpu float64
69 |
70 | # Test integer types on CPU.
71 | testcpu int8
72 | testcpu int16
73 | testcpu int32
74 | testcpu int64
75 |
76 | # Test on GPU if CUDA is installed.
77 | if [ -f /usr/lib/x86_64-linux-gnu/libcuda.so.1 ]; then
78 | # Test float types on GPU.
79 | testgpu float32
80 | testgpu float64
81 |
82 | # Test integer types on GPU.
83 | testgpu int16
84 | testgpu int32
85 | testgpu int64
86 | fi
87 |
88 | echo "==== ALL TESTS PASSED ====="
89 |
90 |
--------------------------------------------------------------------------------
/sling/nlp/document/app/analyzer.css:
--------------------------------------------------------------------------------
1 | #app {
2 | width: 100%;
3 | height: 100%;
4 | }
5 |
6 | #main {
7 | background-color: #eeeeee;
8 | width: 100%;
9 | height: 100%;
10 | overflow: hidden;
11 | padding: 10px;
12 | box-sizing: border-box;
13 | }
14 |
15 | #docid {
16 | background-color: #FFF;
17 | color: #000;
18 | padding: 10px;
19 | }
20 |
21 | .editor {
22 | width: 100%;
23 | height: 100%;
24 | box-sizing: border-box;
25 | padding: 10px;
26 | }
27 |
28 |
--------------------------------------------------------------------------------
/sling/nlp/document/app/analyzer.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | SLING document analyzer
7 |
8 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/sling/nlp/document/app/corpus.css:
--------------------------------------------------------------------------------
1 | #app {
2 | width: 100%;
3 | height: 100%;
4 | }
5 |
6 | #main {
7 | background-color: #eeeeee;
8 | width: 100%;
9 | height: 100%;
10 | overflow: hidden;
11 | padding: 10px;
12 | box-sizing: border-box;
13 | }
14 |
15 | #docid {
16 | background-color: #FFF;
17 | color: #000;
18 | padding: 10px;
19 | }
20 |
21 |
--------------------------------------------------------------------------------
/sling/nlp/document/app/corpus.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Corpus browser
7 |
8 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/sling/nlp/document/app/corpus.js:
--------------------------------------------------------------------------------
1 | import {Component, h, render} from "/common/external/preact.js";
2 | import {Layout, TextField, Button, Icon} from "/common/lib/mdl.js";
3 | import {Document, DocumentViewer} from "/common/lib/docview.js";
4 | import {stylesheet} from "/common/lib/util.js";
5 |
6 | stylesheet("/doc/corpus.css");
7 |
8 | class App extends Component {
9 | constructor(props) {
10 | super(props);
11 | this.state = { document: null };
12 | }
13 |
14 | update(url) {
15 | var self = this;
16 | fetch(url)
17 | .then(response => {
18 | if (response.ok) {
19 | return response.json();
20 | } else {
21 | console.log("fetch error", response.status, response.message);
22 | return null;
23 | }
24 | })
25 | .then(response => {
26 | self.setState({document: new Document(response)});
27 | });
28 | }
29 |
30 | search(e) {
31 | var docid = e.target.value
32 | if (docid) {
33 | this.update("/fetch?docid=" + docid + "&fmt=cjson");
34 | }
35 | }
36 |
37 | forward(e) {
38 | this.update("/forward?fmt=cjson");
39 | }
40 |
41 | back(e) {
42 | this.update("/back?fmt=cjson");
43 | }
44 |
45 | render(props, state) {
46 | return (
47 | h("div", {id: "app"},
48 | h(Layout, null,
49 | h(Layout.Header, null,
50 | h(Layout.HeaderRow, null,
51 | h(Layout.Title, null, "Corpus Browser"),
52 | h(Layout.Spacer),
53 | h(TextField, {
54 | id: "docid",
55 | placeholder: "Document ID",
56 | type: "search",
57 | value: state.document ? state.document.key : "",
58 | onsearch: e => this.search(e),
59 | }),
60 | h(Button, {icon: true, onclick: e => this.back(e)},
61 | h(Icon, {icon: "arrow_backward"})
62 | ),
63 | h(Button, {icon: true, onclick: e => this.forward(e)},
64 | h(Icon, {icon: "arrow_forward"})
65 | ),
66 | ),
67 | ),
68 | h(Layout.Drawer, null, h(Layout.Title, null, "Menu")),
69 | h(Layout.DrawerButton),
70 |
71 | h(Layout.Content, {id: "main"},
72 | h(DocumentViewer, {document: state.document})
73 | )
74 | )
75 | )
76 | );
77 | }
78 | }
79 |
80 | render(h(App), document.body);
81 |
--------------------------------------------------------------------------------
/sling/nlp/document/document-corpus.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/nlp/document/document-corpus.h"
16 |
17 | #include "sling/frame/serialization.h"
18 |
19 | namespace sling {
20 | namespace nlp {
21 |
22 | DocumentCorpus::DocumentCorpus(Store *commons, const string &filepattern)
23 | : corpus_(filepattern, RecordFileOptions()) {
24 | docnames_ = commons->frozen() ? nullptr : new DocumentNames(commons);
25 | }
26 |
27 | DocumentCorpus::DocumentCorpus(Store *commons,
28 | const std::vector &filenames)
29 | : corpus_(filenames, RecordFileOptions()) {
30 | docnames_ = commons->frozen() ? nullptr : new DocumentNames(commons);
31 | }
32 |
33 | DocumentCorpus::~DocumentCorpus() {
34 | if (docnames_ != nullptr) docnames_->Release();
35 | }
36 |
37 | Document *DocumentCorpus::Next(Store *store) {
38 | // Return null if there are no more document.
39 | if (corpus_.Done()) return nullptr;
40 |
41 | // Read next record.
42 | Record record;
43 | CHECK(corpus_.Next(&record));
44 |
45 | // Decode document frame.
46 | ArrayInputStream stream(record.value.data(), record.value.size());
47 | InputParser parser(store, &stream);
48 | Frame frame = parser.Read().AsFrame();
49 | CHECK(frame.valid());
50 |
51 | // Return new document.
52 | return new Document(frame, docnames_);
53 | }
54 |
55 | void DocumentCorpus::Rewind() {
56 | CHECK(corpus_.Rewind());
57 | }
58 |
59 | } // namespace nlp
60 | } // namespace sling
61 |
62 |
--------------------------------------------------------------------------------
/sling/nlp/document/document-corpus.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_NLP_DOCUMENT_DOCUMENT_CORPUS_H_
16 | #define SLING_NLP_DOCUMENT_DOCUMENT_CORPUS_H_
17 |
18 | #include
19 | #include
20 |
21 | #include "sling/file/recordio.h"
22 | #include "sling/frame/store.h"
23 | #include "sling/nlp/document/document.h"
24 |
25 | namespace sling {
26 | namespace nlp {
27 |
28 | // A document corpus is a set of record files with SLING-encoded documents.
29 | class DocumentCorpus {
30 | public:
31 | // Initialize document corpus.
32 | DocumentCorpus(Store *commons, const string &filepattern);
33 | DocumentCorpus(Store *commons, const std::vector &filenames);
34 | ~DocumentCorpus();
35 |
36 | // Read next document into store and return it or null of there are no
37 | // more document. The returned document is owned by the caller.
38 | Document *Next(Store *store);
39 |
40 | // Rewind to the start of the corpus.
41 | void Rewind();
42 |
43 | private:
44 | // Record files with documents.
45 | RecordDatabase corpus_;
46 |
47 | // Document schema.
48 | DocumentNames *docnames_;
49 | };
50 |
51 | } // namespace nlp
52 | } // namespace sling
53 |
54 | #endif // SLING_NLP_DOCUMENT_DOCUMENT_CORPUS_H_
55 |
--------------------------------------------------------------------------------
/sling/nlp/document/document-tokenizer.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/nlp/document/document-tokenizer.h"
16 |
17 | #include "sling/base/types.h"
18 | #include "sling/nlp/document/document.h"
19 | #include "sling/nlp/document/text-tokenizer.h"
20 | #include "sling/string/text.h"
21 |
22 | namespace sling {
23 | namespace nlp {
24 |
25 | DocumentTokenizer::DocumentTokenizer() {
26 | // Initialize tokenizer.
27 | tokenizer_.InitLDC();
28 | }
29 |
30 | void DocumentTokenizer::Tokenize(Document *document, Text text) const {
31 | document->SetText(text);
32 | Tokenize(document);
33 | }
34 |
35 | void DocumentTokenizer::Tokenize(Document *document) const {
36 | const string &text = document->text();
37 | tokenizer_.Tokenize(text,
38 | [document](const Tokenizer::Token &t) {
39 | document->AddToken(t.text, t.begin, t.end, t.brk, t.style);
40 | }
41 | );
42 | }
43 |
44 | } // namespace nlp
45 | } // namespace sling
46 |
47 |
--------------------------------------------------------------------------------
/sling/nlp/document/document-tokenizer.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_NLP_DOCUMENT_DOCUMENT_TOKENIZER_H_
16 | #define SLING_NLP_DOCUMENT_DOCUMENT_TOKENIZER_H_
17 |
18 | #include "sling/nlp/document/document.h"
19 | #include "sling/nlp/document/text-tokenizer.h"
20 | #include "sling/string/text.h"
21 |
22 | namespace sling {
23 | namespace nlp {
24 |
25 | class DocumentTokenizer {
26 | public:
27 | DocumentTokenizer();
28 |
29 | // Add tokenized text to document
30 | void Tokenize(Document *document, Text text) const;
31 |
32 | // Tokenize text in document.
33 | void Tokenize(Document *document) const;
34 |
35 | private:
36 | // Text tokenizer.
37 | Tokenizer tokenizer_;
38 | };
39 |
40 | } // namespace nlp
41 | } // namespace sling
42 |
43 | #endif // SLING_NLP_DOCUMENT_DOCUMENT_TOKENIZER_H_
44 |
45 |
--------------------------------------------------------------------------------
/sling/nlp/document/features.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/nlp/document/features.h"
16 |
17 | #include "sling/base/types.h"
18 | #include "sling/nlp/document/document.h"
19 | #include "sling/util/unicode.h"
20 |
21 | namespace sling {
22 | namespace nlp {
23 |
24 | void DocumentFeatures::Extract(const Document &document, int begin, int end) {
25 | if (end == -1) end = document.num_tokens();
26 | int length = end - begin;
27 | features_.resize(length);
28 | bool in_quote = false;
29 | for (int i = 0; i < length; ++i) {
30 | const string &word = document.token(begin + i).word();
31 | TokenFeatures &f = features_[i];
32 |
33 | // Look up token word in lexicon and get word features.
34 | f.word = lexicon_->Lookup(word, &f.prefix, &f.suffix, &f.shape);
35 |
36 | // Re-compute context-sensitive features.
37 | if (i == 0 || document.token(i).brk() >= SENTENCE_BREAK) {
38 | if (f.shape.capitalization == WordShape::CAPITALIZED) {
39 | f.shape.capitalization = WordShape::INITIAL;
40 | }
41 | }
42 | if (f.shape.quote == WordShape::UNKNOWN_QUOTE) {
43 | f.shape.quote = in_quote ? WordShape::CLOSE_QUOTE : WordShape::OPEN_QUOTE;
44 | in_quote = !in_quote;
45 | }
46 | }
47 | }
48 |
49 | } // namespace nlp
50 | } // namespace sling
51 |
52 |
--------------------------------------------------------------------------------
/sling/nlp/document/fingerprinter.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/nlp/document/fingerprinter.h"
16 |
17 | #include
18 |
19 | #include "sling/util/unicode.h"
20 |
21 | namespace sling {
22 | namespace nlp {
23 |
24 | uint64 Fingerprinter::Fingerprint(Text word, Normalization normalization) {
25 | // Normalize string.
26 | string normalized;
27 | UTF8::Normalize(word.data(), word.size(), normalization, &normalized);
28 |
29 | // Ignore degenerate words.
30 | if (normalized.empty()) return 1;
31 |
32 | // Return fingerprint for normalized word.
33 | return Hash(normalized);
34 | }
35 |
36 | uint64 Fingerprinter::Fingerprint(Text word, uint64 seed,
37 | Normalization normalization) {
38 | uint64 fp = Fingerprint(word, normalization);
39 | return fp == 1 ? seed : Mix(fp, seed);
40 | }
41 |
42 | uint64 Fingerprinter::Fingerprint(const std::vector &words,
43 | Normalization normalization) {
44 | uint64 fp = 1;
45 | for (const Text &word : words) {
46 | uint64 word_fp = Fingerprint(word, normalization);
47 | if (word_fp == 1) continue;
48 | fp = Mix(word_fp, fp);
49 | }
50 | return fp;
51 | }
52 |
53 | } // namespace nlp
54 | } // namespace sling
55 |
56 |
--------------------------------------------------------------------------------
/sling/nlp/document/lex.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // LEX is a light-weight frame annotation format for text.
16 |
17 | #ifndef SLING_NLP_DOCUMENT_LEX_H_
18 | #define SLING_NLP_DOCUMENT_LEX_H_
19 |
20 | #include "sling/base/types.h"
21 | #include "sling/nlp/document/document.h"
22 | #include "sling/nlp/document/document-tokenizer.h"
23 |
24 | namespace sling {
25 | namespace nlp {
26 |
27 | class DocumentLexer {
28 | public:
29 | // Initialize document lexer.
30 | DocumentLexer(const DocumentTokenizer *tokenizer) : tokenizer_(tokenizer) {}
31 |
32 | // parse text in LEX format and add text and annotations to document.
33 | bool Lex(Document *document, Text lex) const;
34 |
35 | private:
36 | // Markable span in LEX-encoded text.
37 | struct Markable {
38 | Markable(int pos) : begin(pos) {}
39 | // Range of bytes in plain text covering the span.
40 | int begin;
41 | int end = -1;
42 |
43 | // Annotation object number.
44 | int object = -1;
45 | };
46 |
47 | // Document tokenizer.
48 | const DocumentTokenizer *tokenizer_;
49 | };
50 |
51 | // Convert document to LEX format.
52 | string ToLex(const Document &document);
53 |
54 | } // namespace nlp
55 | } // namespace sling
56 |
57 | #endif // SLING_NLP_DOCUMENT_LEX_H_
58 |
59 |
--------------------------------------------------------------------------------
/sling/nlp/document/phrase-tokenizer.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_NLP_DOCUMENT_PHRASE_TOKENIZER_H_
16 | #define SLING_NLP_DOCUMENT_PHRASE_TOKENIZER_H_
17 |
18 | #include "sling/base/types.h"
19 | #include "sling/nlp/document/text-tokenizer.h"
20 | #include "sling/nlp/document/fingerprinter.h"
21 | #include "sling/string/text.h"
22 | #include "sling/util/unicode.h"
23 |
24 | namespace sling {
25 | namespace nlp {
26 |
27 | class PhraseTokenizer {
28 | public:
29 | PhraseTokenizer();
30 |
31 | // Tokenize phrase into tokens.
32 | void Tokenize(Text text, std::vector *tokens) const;
33 |
34 | // Tokenize phrase and return token fingerprints for each token.
35 | uint64 TokenFingerprints(Text text, std::vector *tokens) const;
36 |
37 | // Compute fingerprint for phrase.
38 | uint64 Fingerprint(Text text) const;
39 |
40 | // Compute fingerprint and case form for phrase.
41 | void FingerprintAndForm(Text text, uint64 *fingerprint, CaseForm *form) const;
42 |
43 | // Set/get phrase normalization flags.
44 | Normalization normalization() const { return normalization_; }
45 | void set_normalization(Normalization normalization) {
46 | normalization_ = normalization;
47 | }
48 |
49 | private:
50 | // Phrase text normalization.
51 | Normalization normalization_ = NORMALIZE_DEFAULT;
52 |
53 | // Text tokenizer.
54 | Tokenizer tokenizer_;
55 | };
56 |
57 | } // namespace nlp
58 | } // namespace sling
59 |
60 | #endif // SLING_NLP_DOCUMENT_PHRASE_TOKENIZER_H_
61 |
62 |
--------------------------------------------------------------------------------
/sling/nlp/kb/app/appicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/sling/nlp/kb/app/appicon.ico
--------------------------------------------------------------------------------
/sling/nlp/kb/knowledge-server.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/base/flags.h"
16 | #include "sling/base/init.h"
17 | #include "sling/base/logging.h"
18 | #include "sling/frame/serialization.h"
19 | #include "sling/http/http-server.h"
20 | #include "sling/nlp/kb/knowledge-service.h"
21 |
22 | DEFINE_int32(port, 8080, "HTTP server port");
23 | DEFINE_string(kb, "local/data/e/wiki/kb.sling", "Knowledge base");
24 | DEFINE_string(names, "local/data/e/wiki/en/name-table.repo", "Name table");
25 |
26 | using namespace sling;
27 | using namespace sling::nlp;
28 |
29 | int main(int argc, char *argv[]) {
30 | InitProgram(&argc, &argv);
31 |
32 | LOG(INFO) << "Loading knowledge base from " << FLAGS_kb;
33 | Store commons;
34 | LoadStore(FLAGS_kb, &commons);
35 |
36 | LOG(INFO) << "Start HTTP server on port " << FLAGS_port;
37 | HTTPServerOptions options;
38 | HTTPServer http(options, FLAGS_port);
39 |
40 | KnowledgeService kb;
41 | kb.Load(&commons, FLAGS_names);
42 | commons.Freeze();
43 |
44 | kb.Register(&http);
45 | http.Register("/", [](HTTPRequest *req, HTTPResponse *rsp) {
46 | rsp->TempRedirectTo("/kb");
47 | });
48 |
49 | CHECK(http.Start());
50 |
51 | LOG(INFO) << "HTTP server running";
52 | http.Wait();
53 |
54 | LOG(INFO) << "HTTP server done";
55 | return 0;
56 | }
57 |
58 |
--------------------------------------------------------------------------------
/sling/nlp/parser/action-table.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_NLP_PARSER_ACTION_TABLE_H_
16 | #define SLING_NLP_PARSER_ACTION_TABLE_H_
17 |
18 | #include
19 | #include
20 |
21 | #include "sling/base/types.h"
22 | #include "sling/frame/object.h"
23 | #include "sling/frame/store.h"
24 | #include "sling/nlp/parser/parser-action.h"
25 |
26 | namespace sling {
27 | namespace nlp {
28 |
29 | // The action table is a set of parser actions indexed by id.
30 | class ActionTable {
31 | public:
32 | // Add action to the table.
33 | void Add(const ParserAction &action);
34 |
35 | // Return the index of action.
36 | int Index(const ParserAction &action) const {
37 | const auto &it = mapping_.find(action);
38 | return it == mapping_.end() ? -1 : it->second;
39 | }
40 |
41 | // Return the number of parser actions.
42 | int size() const { return actions_.size(); }
43 |
44 | // Return the ith parser action.
45 | const ParserAction &Action(int index) const { return actions_[index]; }
46 |
47 | // Return list of actions.
48 | const std::vector &list() const { return actions_; }
49 |
50 | // Read action table from frame.
51 | void Read(const Frame &frame);
52 |
53 | // Write action table to frame.
54 | void Write(Builder *frame) const;
55 |
56 | private:
57 | // List of actions.
58 | std::vector actions_;
59 |
60 | // Mapping from parser action to index.
61 | std::unordered_map mapping_;
62 | };
63 |
64 | } // namespace nlp
65 | } // namespace sling
66 |
67 | #endif // SLING_NLP_PARSER_ACTION_TABLE_H_
68 |
--------------------------------------------------------------------------------
/sling/nlp/parser/multiclass-delegate.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/nlp/parser/action-table.h"
16 | #include "sling/nlp/parser/parser.h"
17 |
18 | namespace sling {
19 | namespace nlp {
20 |
21 | using namespace myelin;
22 |
23 | // Deletegate for fixed action classification.
24 | class MultiClassDelegate : public Delegate {
25 | public:
26 | void Initialize(const Network &network, const Frame &spec) override {
27 | cell_ = network.GetCell(spec.GetString("cell"));
28 | input_ = cell_->GetParameter(cell_->name() + "/input");
29 | output_ = cell_->GetParameter(cell_->name() + "/output");
30 | actions_.Read(spec);
31 | }
32 |
33 | DelegateInstance *CreateInstance() override {
34 | return new MultiClassDelegateInstance(this);
35 | }
36 |
37 | // Multi-class delegate instance.
38 | class MultiClassDelegateInstance : public DelegateInstance {
39 | public:
40 | MultiClassDelegateInstance(MultiClassDelegate *delegate)
41 | : delegate_(delegate),
42 | data_(delegate->cell_) {}
43 |
44 | void Predict(float *activation, ParserAction *action) override {
45 | // Predict action from activations.
46 | data_.SetReference(delegate_->input_, activation);
47 | data_.Compute();
48 | int argmax = *data_.Get(delegate_->output_);
49 | *action = delegate_->actions_.Action(argmax);
50 | }
51 |
52 | private:
53 | MultiClassDelegate *delegate_;
54 | Instance data_;
55 | };
56 |
57 | private:
58 | ActionTable actions_; // action table for multi-class classification
59 |
60 | Cell *cell_ = nullptr; // cell for computation
61 | Tensor *input_ = nullptr; // input for activations
62 | Tensor *output_ = nullptr; // output prediction
63 | };
64 |
65 | REGISTER_DELEGATE("multiclass", MultiClassDelegate);
66 |
67 | } // namespace nlp
68 | } // namespace sling
69 |
70 |
--------------------------------------------------------------------------------
/sling/nlp/parser/ontonotes/shuffle.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http:#www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # Shuffle training corpus.
16 |
17 | import random
18 | import sling
19 | import sling.flags as flags
20 |
21 | flags.define('--input',
22 | help='input file with documents')
23 | flags.define('--output',
24 | help='output for shuffled documents')
25 | flags.define('--seed',
26 | help='seed for shuffling the corpus',
27 | default="314159",
28 | type=int,
29 | metavar='NUM')
30 |
31 | if __name__ == '__main__':
32 | flags.parse()
33 |
34 | # Read input corpus.
35 | reader = sling.RecordReader(flags.arg.input)
36 | records = [(key, value) for key, value in reader]
37 | reader.close()
38 |
39 | # Shufle documents.
40 | r = random.Random(flags.arg.seed)
41 | r.shuffle(records)
42 |
43 | # Write shuffled documents to output.
44 | writer = sling.RecordWriter(flags.arg.output)
45 | for key, value in records:
46 | writer.write(key, value)
47 | writer.close()
48 |
49 |
--------------------------------------------------------------------------------
/sling/nlp/parser/parser-annotator.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/nlp/parser/parser.h"
16 | #include "sling/nlp/document/annotator.h"
17 |
18 | namespace sling {
19 | namespace nlp {
20 |
21 | // Document annotator for adding semantic parse annotations to document.
22 | class ParserAnnotator : public Annotator {
23 | public:
24 | void Init(task::Task *task, Store *commons) override {
25 | // Load parser model.
26 | string model = task->GetInputFile("parser");
27 | LOG(INFO) << "Loading parser model from " << model;
28 | parser_.Load(commons, model);
29 | }
30 |
31 | void Annotate(Document *document) override {
32 | // Parse document.
33 | parser_.Parse(document);
34 | }
35 |
36 | private:
37 | // Parser model.
38 | Parser parser_;
39 | };
40 |
41 | REGISTER_ANNOTATOR("parser", ParserAnnotator);
42 |
43 | // Document annotator for adding names to frame based on first mention.
44 | class MentionNameAnnotator : public Annotator {
45 | public:
46 | void Init(task::Task *task, Store *commons) override {
47 | names_.Bind(commons);
48 | }
49 |
50 | void Annotate(Document *document) override {
51 | Handles evoked(document->store());
52 | for (Span *span : document->spans()) {
53 | span->AllEvoked(&evoked);
54 | for (Handle h : evoked) {
55 | Frame f(document->store(), h);
56 | if (!f.Has(n_name_)) {
57 | f.Add(n_name_, span->GetText());
58 | }
59 | }
60 | }
61 | }
62 |
63 | private:
64 | Names names_;
65 | Name n_name_{names_, "name"};
66 | };
67 |
68 | REGISTER_ANNOTATOR("mention-name", MentionNameAnnotator);
69 |
70 |
71 | } // namespace nlp
72 | } // namespace sling
73 |
--------------------------------------------------------------------------------
/sling/nlp/parser/roles.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/nlp/parser/roles.h"
16 |
17 | namespace sling {
18 | namespace nlp {
19 |
20 | void RoleSet::Add(Handle role) {
21 | if (!role.IsNil() && roles_.find(role) == roles_.end()) {
22 | int index = roles_.size();
23 | roles_[role] = index;
24 | }
25 | }
26 |
27 | void RoleSet::Add(const std::vector &actions) {
28 | for (const ParserAction &action : actions) Add(action.role);
29 | }
30 |
31 | void RoleSet::GetList(std::vector *list) const {
32 | list->resize(roles_.size());
33 | for (auto &it : roles_) {
34 | (*list)[it.second] = it.first;
35 | }
36 | }
37 |
38 | void RoleGraph::Compute(const ParserState &state,
39 | int limit,
40 | const RoleSet &roles) {
41 | limit_ = limit;
42 | num_roles_ = roles.size();
43 | int k = limit_;
44 | edges_.clear();
45 | if (k > state.AttentionSize()) k = state.AttentionSize();
46 | for (int source = 0; source < k; ++source) {
47 | Handle handle = state.Attention(source).frame;
48 | const FrameDatum *frame = state.store()->GetFrame(handle);
49 | for (const Slot *slot = frame->begin(); slot < frame->end(); ++slot) {
50 | int target = -1;
51 | if (slot->value.IsLocalRef()) {
52 | target = state.AttentionIndex(slot->value, k);
53 | if (target == -1) continue;
54 | }
55 |
56 | int role = roles.Lookup(slot->name);
57 | if (role == -1) continue;
58 |
59 | edges_.emplace_back(source, role, target);
60 | }
61 | }
62 | }
63 |
64 | } // namespace nlp
65 | } // namespace sling
66 |
67 |
--------------------------------------------------------------------------------
/sling/nlp/parser/tools/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | cc_binary(
4 | name = "parse",
5 | srcs = ["parse.cc"],
6 | deps = [
7 | "//sling/base",
8 | "//sling/base:clock",
9 | "//sling/file:posix",
10 | "//sling/frame:object",
11 | "//sling/frame:serialization",
12 | "//sling/myelin:profile",
13 | "//sling/nlp/document",
14 | "//sling/nlp/document:document-corpus",
15 | "//sling/nlp/document:document-tokenizer",
16 | "//sling/nlp/document:lex",
17 | "//sling/nlp/parser",
18 | "//sling/nlp/parser:multiclass-delegate",
19 | "//sling/nlp/parser:frame-evaluation",
20 | "//sling/string:printf",
21 | ],
22 | )
23 |
24 |
--------------------------------------------------------------------------------
/sling/nlp/parser/tools/train_caspar.py:
--------------------------------------------------------------------------------
1 | import sling
2 | import sling.flags as flags
3 | import sling.task.workflow as workflow
4 |
5 | flags.define("--accurate", default=False,action='store_true')
6 |
7 | flags.parse()
8 |
9 | if flags.arg.accurate:
10 | modelfn = "local/data/e/caspar/caspar-accurate.flow"
11 | rnn_layers = 3
12 | rnn_dim = 192
13 | else:
14 | modelfn = "local/data/e/caspar/caspar.flow"
15 | rnn_layers = 1
16 | rnn_dim = 128
17 |
18 | # Start up workflow system.
19 | workflow.startup()
20 |
21 | # Create workflow.
22 | wf = workflow.Workflow("parser-training")
23 |
24 | # Parser trainer inputs and outputs.
25 | training_corpus = wf.resource(
26 | "local/data/corpora/caspar/train_shuffled.rec",
27 | format="record/document"
28 | )
29 |
30 | evaluation_corpus = wf.resource(
31 | "local/data/corpora/caspar/dev.rec",
32 | format="record/document"
33 | )
34 |
35 | word_embeddings = wf.resource(
36 | "local/data/corpora/caspar/word2vec-32-embeddings.bin",
37 | format="embeddings"
38 | )
39 |
40 | parser_model = wf.resource(modelfn, format="flow")
41 |
42 | # Parser trainer task.
43 | trainer = wf.task("caspar-trainer")
44 |
45 | trainer.add_params({
46 | "rnn_type": 1,
47 | "rnn_dim": rnn_dim,
48 | "rnn_highways": True,
49 | "rnn_layers": rnn_layers,
50 | "dropout": 0.2,
51 | "ff_l2reg": 0.0001,
52 |
53 | "learning_rate": 1.0,
54 | "learning_rate_decay": 0.8,
55 | "clipping": 1,
56 | "optimizer": "sgd",
57 | "batch_size": 32,
58 | "rampup": 120,
59 | "report_interval": 1000,
60 | "learning_rate_cliff": 40000,
61 | "epochs": 50000,
62 | })
63 |
64 | trainer.attach_input("training_corpus", training_corpus)
65 | trainer.attach_input("evaluation_corpus", evaluation_corpus)
66 | trainer.attach_input("word_embeddings", word_embeddings)
67 | trainer.attach_output("model", parser_model)
68 |
69 | # Run parser trainer.
70 | workflow.run(wf)
71 |
72 | # Shut down.
73 | workflow.shutdown()
74 |
75 |
--------------------------------------------------------------------------------
/sling/nlp/parser/transition-generator.h:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_NLP_PARSER_TRANSITION_GENERATOR_H_
16 | #define SLING_NLP_PARSER_TRANSITION_GENERATOR_H_
17 |
18 | #include
19 |
20 | #include "sling/nlp/document/document.h"
21 | #include "sling/nlp/parser/parser-action.h"
22 |
23 | namespace sling {
24 | namespace nlp {
25 |
26 | // Generates transition sequences for [begin, end) token range in 'document',
27 | // calling 'callback' for every transition.
28 | void Generate(const Document &document,
29 | int begin, int end,
30 | std::function callback);
31 |
32 | // Generates transition sequences for all tokens in 'document', calling
33 | // 'callback' for every transition.
34 | void Generate(const Document &document,
35 | std::function callback);
36 |
37 | } // namespace nlp
38 | } // namespace sling
39 |
40 | #endif // SLING_NLP_PARSER_TRANSITION_GENERATOR_H_
41 |
--------------------------------------------------------------------------------
/sling/nlp/silver/corpus-split.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/nlp/document/document.h"
16 | #include "sling/task/documents.h"
17 | #include "sling/util/fingerprint.h"
18 |
19 | namespace sling {
20 | namespace nlp {
21 |
22 | using namespace task;
23 |
24 | // Split document corpus into training and evaluation data sets. The training
25 | // data is shuffled based on the contents of the document.
26 | class CorpusSplitter : public DocumentProcessor {
27 | public:
28 | void Startup(Task *task) override {
29 | // Get output chanels.
30 | train_ = task->GetSink("train");
31 | eval_ = task->GetSink("eval");
32 | CHECK(train_ != nullptr) << "train channel missing";
33 | CHECK(eval_ != nullptr) << "eval channel missing";
34 |
35 | // Get parameters.
36 | task->Fetch("split_ratio", &split_ratio_);
37 | }
38 |
39 | void Process(Slice key, const Document &document) override {
40 | uint64 fp = Fingerprint(document.text().data(), document.text().size());
41 | if (fp % split_ratio_ == (split_ratio_ - 1)) {
42 | // Output evaluation document.
43 | eval_->Send(CreateMessage(key, document.top()));
44 | } else {
45 | // Output training document.
46 | train_->Send(CreateMessage(std::to_string(fp), document.top()));
47 | }
48 | }
49 |
50 | private:
51 | // Channels for training and evaluation documents.
52 | Channel *train_ = nullptr;
53 | Channel *eval_ = nullptr;
54 |
55 | // Corpus split ratio, i.e. a corpus split ratio of 10 means that one in ten
56 | // documents is added to the evaluation set (90% train, 10% eval).
57 | int split_ratio_ = 10;
58 | };
59 |
60 | REGISTER_TASK_PROCESSOR("corpus-split", CorpusSplitter);
61 |
62 | } // namespace nlp
63 | } // namespace sling
64 |
65 |
--------------------------------------------------------------------------------
/sling/nlp/silver/idf.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_NLP_NER_IDF_H_
16 | #define SLING_NLP_NER_IDF_H_
17 |
18 | #include "sling/base/port.h"
19 | #include "sling/file/repository.h"
20 | #include "sling/util/unicode.h"
21 |
22 | namespace sling {
23 | namespace nlp {
24 |
25 | // Word vocabulary table for inverse document frequency (IDF).
26 | class IDFTable {
27 | public:
28 | // Load IDF repository from file.
29 | void Load(const string &filename);
30 |
31 | // Look up word fingerprint and return IDF for word.
32 | float GetIDF(uint64 fingerprint) const;
33 |
34 | // Get text normalization flags for IDF table.
35 | Normalization normalization() const { return normalization_; }
36 |
37 | // IDF repository header information.
38 | static const int VERSION = 1;
39 | struct Header {
40 | int version;
41 | float num_docs;
42 | char normalization[16];
43 | };
44 |
45 | private:
46 | // Word entry.
47 | struct Word {
48 | uint64 fingerprint;
49 | float idf;
50 | } ABSL_ATTRIBUTE_PACKED;
51 |
52 | // Word index in repository.
53 | class WordIndex : public RepositoryMap {
54 | public:
55 | // Initialize word index.
56 | void Initialize(const Repository &repository) { Init(repository, "IDF"); }
57 |
58 | // Return first element in bucket.
59 | const Word *GetBucket(int bucket) const { return GetObject(bucket); }
60 | };
61 |
62 | // Find word in word index.
63 | const Word *Find(uint64 fp) const;
64 |
65 | // Repository with name table.
66 | Repository repository_;
67 |
68 | // IDF header information.
69 | const Header *header_ = nullptr;
70 |
71 | // Word index.
72 | WordIndex index_;
73 |
74 | // IDF for out-of-vocabulary words.
75 | float oov_idf_ = 0.0;
76 |
77 | // Text normalization for fingerprints.
78 | Normalization normalization_ = NORMALIZE_DEFAULT;
79 | };
80 |
81 | } // namespace nlp
82 | } // namespace sling
83 |
84 | #endif // SLING_NLP_NER_IDF_H_
85 |
--------------------------------------------------------------------------------
/sling/nlp/wikicat/app/appicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/sling/nlp/wikicat/app/appicon.ico
--------------------------------------------------------------------------------
/sling/pyapi/pybase.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/pyapi/pybase.h"
16 |
17 | namespace sling {
18 |
19 | PyMethodTable::PyMethodTable() {
20 | // Add terminator element.
21 | table_.resize(1);
22 | table_[0].ml_name = nullptr;
23 | }
24 |
25 | void PyMethodTable::Add(const char *name, PyCFunction method, int flags) {
26 | // Set last element to new method.
27 | PyMethodDef &def = table_.back();
28 | def.ml_name = name;
29 | def.ml_meth = method;
30 | def.ml_flags = flags;
31 | def.ml_doc = "";
32 |
33 | // Add new terminator element.
34 | table_.resize(table_.size() + 1);
35 | table_[table_.size() - 1].ml_name = nullptr;
36 | }
37 |
38 | void PyBase::InitType(PyTypeObject *type,
39 | const char *name,
40 | size_t size,
41 | bool instantiable) {
42 | type->tp_name = name;
43 | type->tp_basicsize = size;
44 | if (instantiable) type->tp_new = PyType_GenericNew;
45 | type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE;
46 | }
47 |
48 | void PyBase::RegisterType(PyTypeObject *type) {
49 | PyType_Ready(type);
50 | Py_INCREF(type);
51 | }
52 |
53 | void PyBase::RegisterType(PyTypeObject *type,
54 | PyObject *module,
55 | const char *name) {
56 | PyType_Ready(type);
57 | Py_INCREF(type);
58 | PyModule_AddObject(module, name, reinterpret_cast(type));
59 | }
60 |
61 | void PyBase::RegisterEnum(PyObject *module,
62 | const char *name,
63 | int value) {
64 | PyModule_AddIntConstant(module, name, value);
65 | }
66 |
67 | } // namespace sling
68 |
69 |
--------------------------------------------------------------------------------
/sling/pyapi/pydate.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_PYAPI_PYDATE_H_
16 | #define SLING_PYAPI_PYDATE_H_
17 |
18 | #include "sling/nlp/kb/calendar.h"
19 | #include "sling/pyapi/pybase.h"
20 | #include "sling/pyapi/pystore.h"
21 |
22 | namespace sling {
23 |
24 | // Python wrapper for date.
25 | struct PyDate : public PyBase {
26 | // Initialize date wrapper.
27 | int Init(PyObject *args, PyObject *kwds);
28 |
29 | // Deallocate date wrapper.
30 | void Dealloc();
31 |
32 | // Convert date to string (YYYY-MM-DD).
33 | PyObject *Str();
34 |
35 | // Date in ISO 8601 format.
36 | PyObject *ISO();
37 |
38 | // Convert date to string or integer value.
39 | PyObject *Value();
40 |
41 | // Date object.
42 | nlp::Date date;
43 |
44 | // Registration.
45 | static PyTypeObject type;
46 | static PyMemberDef members[];
47 | static PyMethodTable methods;
48 | static void Define(PyObject *module);
49 | };
50 |
51 | // Python wrapper for calendar.
52 | struct PyCalendar : public PyBase {
53 | // Initialize calendar wrapper.
54 | int Init(PyObject *args, PyObject *kwds);
55 |
56 | // Deallocate record reader wrapper.
57 | void Dealloc();
58 |
59 | // Convert date to human-readable string.
60 | PyObject *Str(PyObject *obj);
61 |
62 | // Return frames for date parts.
63 | PyObject *Day(PyObject *obj);
64 | PyObject *Month(PyObject *obj);
65 | PyObject *Year(PyObject *obj);
66 | PyObject *Decade(PyObject *obj);
67 | PyObject *Century(PyObject *obj);
68 | PyObject *Millennium(PyObject *obj);
69 |
70 | // Get date object.
71 | PyDate *GetDate(PyObject *obj);
72 |
73 | // Store for calendar frames.
74 | PyStore *pystore;
75 |
76 | // Calendar.
77 | nlp::Calendar *calendar;
78 |
79 | // Registration.
80 | static PyTypeObject type;
81 | static PyMethodTable methods;
82 | static void Define(PyObject *module);
83 | };
84 |
85 | } // namespace sling
86 |
87 | #endif // SLING_PYAPI_PYDATE_H_
88 |
89 |
--------------------------------------------------------------------------------
/sling/pyapi/pymisc.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_PYAPI_PYMISC_H_
16 | #define SLING_PYAPI_PYMISC_H_
17 |
18 | #include "sling/pyapi/pybase.h"
19 |
20 | namespace sling {
21 |
22 | // Get list of registered command-line flags.
23 | PyObject *PyGetFlags();
24 |
25 | // Set value of command-line flag.
26 | PyObject *PySetFlag(PyObject *self, PyObject *args);
27 |
28 | // Log message.
29 | PyObject *PyLogMessage(PyObject *self, PyObject *args);
30 |
31 | } // namespace sling
32 |
33 | #endif // SLING_PYAPI_PYMISC_H_
34 |
35 |
--------------------------------------------------------------------------------
/sling/stream/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | cc_library(
4 | name = "stream",
5 | hdrs = ["stream.h"],
6 | deps = [
7 | "//sling/base",
8 | ],
9 | )
10 |
11 | cc_library(
12 | name = "file",
13 | srcs = ["file.cc"],
14 | hdrs = ["file.h"],
15 | deps = [
16 | ":stream",
17 | "//sling/base",
18 | "//sling/file",
19 | ],
20 | )
21 |
22 | cc_library(
23 | name = "memory",
24 | srcs = ["memory.cc"],
25 | hdrs = ["memory.h"],
26 | deps = [
27 | ":stream",
28 | "//sling/base",
29 | ],
30 | )
31 |
32 | cc_library(
33 | name = "input",
34 | srcs = ["input.cc"],
35 | hdrs = ["input.h"],
36 | deps = [
37 | ":stream",
38 | "//sling/base",
39 | "//sling/util:varint",
40 | ],
41 | )
42 |
43 | cc_library(
44 | name = "output",
45 | srcs = ["output.cc"],
46 | hdrs = ["output.h"],
47 | deps = [
48 | ":stream",
49 | "//sling/base",
50 | "//sling/string:text",
51 | "//sling/util:varint",
52 | ],
53 | )
54 |
55 | cc_library(
56 | name = "file-input",
57 | srcs = ["file-input.cc"],
58 | hdrs = ["file-input.h"],
59 | deps = [
60 | ":bzip2",
61 | ":file",
62 | ":gzip",
63 | ":input",
64 | ],
65 | )
66 |
67 | cc_library(
68 | name = "bounded",
69 | srcs = ["bounded.cc"],
70 | hdrs = ["bounded.h"],
71 | deps = [
72 | ":stream",
73 | ],
74 | )
75 |
76 | cc_library(
77 | name = "bzip2",
78 | srcs = ["bzip2.cc"],
79 | hdrs = ["bzip2.h"],
80 | deps = [
81 | ":stream",
82 | "//sling/base",
83 | "//third_party/bz2lib",
84 | ],
85 | )
86 |
87 | cc_library(
88 | name = "gzip",
89 | srcs = ["gzip.cc"],
90 | hdrs = ["gzip.h"],
91 | deps = [
92 | ":stream",
93 | "//sling/base",
94 | "//third_party/zlib",
95 | ],
96 | )
97 |
98 | cc_library(
99 | name = "zipfile",
100 | srcs = ["zipfile.cc"],
101 | hdrs = ["zipfile.h"],
102 | deps = [
103 | ":bounded",
104 | ":file",
105 | ":file-input",
106 | ":gzip",
107 | "//sling/base",
108 | "//sling/file",
109 | ],
110 | )
111 |
112 | cc_library(
113 | name = "unix-file",
114 | srcs = ["unix-file.cc"],
115 | hdrs = ["unix-file.h"],
116 | deps = [
117 | ":stream",
118 | "//sling/base",
119 | ],
120 | )
121 |
122 |
--------------------------------------------------------------------------------
/sling/stream/bounded.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/stream/bounded.h"
16 |
17 | namespace sling {
18 |
19 | BoundedInputStream::BoundedInputStream(InputStream *input, int64 limit)
20 | : input_(input), left_(limit) {
21 | start_ = input_->ByteCount();
22 | }
23 |
24 | BoundedInputStream::~BoundedInputStream() {
25 | // Back up if we overshot the size of the stream.
26 | if (left_ < 0) input_->BackUp(-left_);
27 | }
28 |
29 | bool BoundedInputStream::Next(const void **data, int *size) {
30 | // Check if we have reached the limit of the stream.
31 | if (left_ <= 0) return false;
32 |
33 | // Read next chunk from the underlying stream.
34 | if (!input_->Next(data, size)) return false;
35 |
36 | // Adjust size of we overshot the limit.
37 | left_ -= *size;
38 | if (left_ < 0) *size += left_;
39 |
40 | return true;
41 | }
42 |
43 | void BoundedInputStream::BackUp(int count) {
44 | if (left_ < 0) {
45 | // Include the overshoot when backing up in the underlying stream.
46 | input_->BackUp(count - left_);
47 | left_ = count;
48 | } else {
49 | // Back up in the underlying stream.
50 | input_->BackUp(count);
51 | left_ += count;
52 | }
53 | }
54 |
55 | bool BoundedInputStream::Skip(int count) {
56 | if (count > left_) {
57 | // Skip to end.
58 | if (left_ < 0) return false;
59 | input_->Skip(left_);
60 | left_ = 0;
61 | return false;
62 | } else {
63 | // Skip within limit.
64 | if (!input_->Skip(count)) return false;
65 | left_ -= count;
66 | return true;
67 | }
68 | }
69 |
70 | int64 BoundedInputStream::ByteCount() const {
71 | if (left_ < 0) {
72 | return input_->ByteCount() + left_ - start_;
73 | } else {
74 | return input_->ByteCount() - start_;
75 | }
76 | }
77 |
78 | } // namespace sling
79 |
80 |
--------------------------------------------------------------------------------
/sling/stream/bounded.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_STREAM_BOUNDED_H_
16 | #define SLING_STREAM_BOUNDED_H_
17 |
18 | #include "sling/base/macros.h"
19 | #include "sling/base/types.h"
20 | #include "sling/stream/stream.h"
21 |
22 | namespace sling {
23 |
24 | // A bounded input stream that limits the size of the input to a particular
25 | // size.
26 | class BoundedInputStream : public InputStream {
27 | public:
28 | BoundedInputStream(InputStream *input, int64 limit);
29 | ~BoundedInputStream();
30 |
31 | // InputStream interface.
32 | bool Next(const void **data, int *size);
33 | void BackUp(int count);
34 | bool Skip(int count);
35 | int64 ByteCount() const;
36 |
37 | private:
38 | // Underlying input stream.
39 | InputStream *input_;
40 |
41 | // Number of bytes left to read. This can be negative if have overshoot the
42 | // limit of the stream.
43 | int64 left_;
44 |
45 | // Initial position of the underlying stream.
46 | int64 start_;
47 |
48 | DISALLOW_IMPLICIT_CONSTRUCTORS(BoundedInputStream);
49 | };
50 |
51 | } // namespace sling
52 |
53 | #endif // SLING_STREAM_BOUNDED_H_
54 |
55 |
--------------------------------------------------------------------------------
/sling/stream/bzip2.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_STREAM_BZIP2_H_
16 | #define SLING_STREAM_BZIP2_H_
17 |
18 | #include "sling/base/types.h"
19 | #include "sling/stream/stream.h"
20 | #include "third_party/bz2lib/bzlib.h"
21 |
22 | namespace sling {
23 |
24 | // BZIP2 stream compression.
25 | class BZip2Compressor : public OutputStream {
26 | public:
27 | // Initialize compressor.
28 | BZip2Compressor(OutputStream *sink,
29 | int block_size = 1 << 20,
30 | int compression_level = 9);
31 | ~BZip2Compressor() override;
32 |
33 | // Implementation of OutputStream interface.
34 | bool Next(void **data, int *size) override;
35 | void BackUp(int count) override;
36 | int64 ByteCount() const override;
37 |
38 | private:
39 | // Compressor.
40 | bz_stream stream_;
41 | };
42 |
43 | // BZIP2 stream decompression.
44 | class BZip2Decompressor : public InputStream {
45 | public:
46 | // Initialize decompressor.
47 | BZip2Decompressor(InputStream *source,
48 | int block_size = 1 << 20);
49 | ~BZip2Decompressor() override;
50 |
51 | // Implementation of InputStream interface.
52 | bool Next(const void **data, int *size) override;
53 | void BackUp(int count) override;
54 | bool Skip(int count) override;
55 | int64 ByteCount() const override;
56 |
57 | private:
58 | // Decompress next chunk.
59 | bool NextChunk();
60 |
61 | // Source for compressed input.
62 | InputStream *source_;
63 |
64 | // Compression buffer.
65 | char *buffer_;
66 | int block_size_;
67 |
68 | // Decompressor.
69 | bz_stream stream_;
70 |
71 | // Number of bytes uncompressed.
72 | uint64 total_bytes_;
73 |
74 | // Reset decompressor on next chunk (for multi stream bzip2 files).
75 | bool reset_;
76 |
77 | // Number of bytes to back up.
78 | int backup_;
79 | };
80 |
81 | } // namespace sling
82 |
83 | #endif // SLING_STREAM_BZIP2_H_
84 |
85 |
--------------------------------------------------------------------------------
/sling/stream/file-input.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_STREAM_FILE_INPUT_H_
16 | #define SLING_STREAM_FILE_INPUT_H_
17 |
18 | #include
19 | #include
20 |
21 | #include "sling/base/macros.h"
22 | #include "sling/base/types.h"
23 | #include "sling/stream/input.h"
24 |
25 | namespace sling {
26 |
27 | // Input stream that runs a pipeline of input stream.
28 | class InputPipeline : public InputStream {
29 | public:
30 | InputPipeline();
31 | ~InputPipeline();
32 |
33 | // Return the last steam in the pipeline.
34 | InputStream *last() const { return last_; }
35 |
36 | // Add input stream to pipeline. Takes ownership of the stream.
37 | void Add(InputStream *stream);
38 |
39 | // Implementation of InputStream interface.
40 | bool Next(const void **data, int *size) override;
41 | void BackUp(int count) override;
42 | bool Skip(int count) override;
43 | int64 ByteCount() const override;
44 |
45 | private:
46 | // Final input stream.
47 | InputStream *last_ = nullptr;
48 |
49 | // Input stream pipeline.
50 | std::vector streams_;
51 | };
52 |
53 | // File input class that supports decompression of the input stream based on
54 | // the file extension.
55 | class FileInput : public Input {
56 | public:
57 | // Open file.
58 | explicit FileInput(const string &filename, int block_size = 1 << 20)
59 | : Input(Open(filename, block_size)) {}
60 |
61 | ~FileInput() { delete stream(); }
62 |
63 | // Open input file and add decompression for compressed input files.
64 | static InputStream *Open(const string &filename, int block_size = 1 << 20);
65 |
66 | private:
67 | DISALLOW_IMPLICIT_CONSTRUCTORS(FileInput);
68 | };
69 |
70 | } // namespace sling
71 |
72 | #endif // SLING_STREAM_FILE_INPUT_H_
73 |
74 |
--------------------------------------------------------------------------------
/sling/stream/gzip.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_STREAM_GZIP_H_
16 | #define SLING_STREAM_GZIP_H_
17 |
18 | #include "sling/base/types.h"
19 | #include "sling/stream/stream.h"
20 | #include "third_party/zlib/zlib.h"
21 |
22 | namespace sling {
23 |
24 | // GZIP stream compression.
25 | class GZipCompressor : public OutputStream {
26 | public:
27 | // Initialize compressor.
28 | GZipCompressor(OutputStream *sink,
29 | int block_size = 1 << 20,
30 | int compression_level = 9);
31 | ~GZipCompressor() override;
32 |
33 | // Implementation of OutputStream interface.
34 | bool Next(void **data, int *size) override;
35 | void BackUp(int count) override;
36 | int64 ByteCount() const override;
37 |
38 | private:
39 | // Compressor.
40 | z_stream stream_;
41 | };
42 |
43 | // GZIP stream decompression.
44 | class GZipDecompressor : public InputStream {
45 | public:
46 | // Initialize decompressor.
47 | GZipDecompressor(InputStream *source,
48 | int block_size = 1 << 20,
49 | int window_bits = 15 + 16);
50 | ~GZipDecompressor() override;
51 |
52 | // Implementation of InputStream interface.
53 | bool Next(const void **data, int *size) override;
54 | void BackUp(int count) override;
55 | bool Skip(int count) override;
56 | int64 ByteCount() const override;
57 |
58 | private:
59 | // Decompress next chunk.
60 | bool NextChunk();
61 |
62 | // Source for compressed input.
63 | InputStream *source_;
64 |
65 | // Decompression buffer.
66 | char *buffer_;
67 | int block_size_;
68 |
69 | // Decompressor.
70 | z_stream stream_;
71 |
72 | // Number of bytes uncompressed.
73 | uint64 total_bytes_;
74 |
75 | // Reset decompressor on next chunk (for multi stream gzip files).
76 | bool reset_;
77 |
78 | // Number of bytes to back up.
79 | int backup_;
80 | };
81 |
82 | } // namespace sling
83 |
84 | #endif // SLING_STREAM_GZIP_H_
85 |
86 |
--------------------------------------------------------------------------------
/sling/string/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | cc_library(
4 | name = "string",
5 | deps = [
6 | ":ctype",
7 | ":numbers",
8 | ":printf",
9 | ":strcat",
10 | ":text",
11 | ],
12 | )
13 |
14 | cc_library(
15 | name = "ctype",
16 | hdrs = ["ctype.h"],
17 | srcs = ["ctype.cc"],
18 | copts = [
19 | "-Wno-narrowing",
20 | ]
21 | )
22 |
23 | cc_library(
24 | name = "numbers",
25 | hdrs = ["numbers.h"],
26 | srcs = ["numbers.cc"],
27 | deps = [
28 | ":ctype",
29 | "//sling/base",
30 | ],
31 | )
32 |
33 | cc_library(
34 | name = "printf",
35 | hdrs = ["printf.h"],
36 | srcs = ["printf.cc"],
37 | deps = [
38 | "//sling/base",
39 | ],
40 | )
41 |
42 | cc_library(
43 | name = "strcat",
44 | hdrs = ["strcat.h"],
45 | srcs = ["strcat.cc"],
46 | deps = [
47 | ":ctype",
48 | ":numbers",
49 | ":text",
50 | "//sling/base",
51 | ],
52 | )
53 |
54 | cc_library(
55 | name = "text",
56 | hdrs = ["text.h"],
57 | srcs = ["text.cc"],
58 | deps = [
59 | "//sling/base",
60 | "//sling/util:city",
61 | ],
62 | copts = [
63 | "-Wno-deprecated",
64 | "-Wno-sign-compare",
65 | ],
66 | )
67 |
68 |
--------------------------------------------------------------------------------
/sling/string/printf.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Printf variants that place their output in a C++ string.
16 | //
17 | // Usage:
18 | // string result = StringPrintf("%d %s\n", 10, "hello");
19 | // SStringPrintf(&result, "%d %s\n", 10, "hello");
20 | // StringAppendF(&result, "%d %s\n", 20, "there");
21 |
22 | #ifndef SLING_STRING_PRINTF_H_
23 | #define SLING_STRING_PRINTF_H_
24 |
25 | #include
26 | #include
27 |
28 | #include "sling/base/port.h"
29 | #include "sling/base/types.h"
30 |
31 | namespace sling {
32 |
33 | // Return a C++ string.
34 | extern string StringPrintf(const char *format, ...) ABSL_PRINTF_ATTRIBUTE(1, 2);
35 |
36 | // Store result into a supplied string and return it.
37 | extern const string &SStringPrintf(string *dst, const char *format, ...)
38 | ABSL_PRINTF_ATTRIBUTE(2, 3);
39 |
40 | // Append result to a supplied string
41 | extern void StringAppendF(string *dst, const char *format, ...)
42 | ABSL_PRINTF_ATTRIBUTE(2, 3);
43 |
44 | // Lower-level routine that takes a va_list and appends to a specified string.
45 | extern void StringAppendV(string *dst, const char *format, va_list ap);
46 |
47 | } // namespace sling
48 |
49 | #endif // SLING_STRING_PRINTF_H_
50 |
51 |
--------------------------------------------------------------------------------
/sling/task/app/appicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/sling/task/app/appicon.ico
--------------------------------------------------------------------------------
/sling/task/app/dashboard.css:
--------------------------------------------------------------------------------
1 | @font-face {
2 | font-family: lcd;
3 | src: url(digital-7.mono.ttf);
4 | }
5 |
6 | .job-table {
7 | margin-top: 8px;
8 | margin-right: 8px;
9 | margin-bottom: 8px;
10 | }
11 |
12 | .channel-table {
13 | width: calc(100% - 8px);
14 | margin-top: 8px;
15 | margin-bottom: 8px;
16 | }
17 |
18 | .counter-table {
19 | margin-top: 8px;
20 | margin-bottom: 8px;
21 | }
22 |
23 | .res-table {
24 | padding: 4px;
25 | width: 160px;
26 | margin-left: 8px;
27 | margin-bottom: 8px;
28 | margin-top: 8px;
29 | border-radius: 4px;
30 | box-shadow: inset 0px 0px 24px 2px rgba(0,0,0,0.2);
31 | color: #303060;
32 | background: #BAC2B6;
33 | font-family: arial;
34 | font-weight: normal;
35 | font-size: 12pt;
36 | text-shadow: 1px 1px 4px rgba(150, 150, 150, 1);
37 | }
38 |
39 | .res-table td {
40 | vertical-align: baseline;
41 | }
42 |
43 | .lcd {
44 | font-family: lcd;
45 | font-size: 20pt;
46 | font-weight: normal;
47 | text-align: right;
48 | width: 100%;
49 | }
50 |
51 |
--------------------------------------------------------------------------------
/sling/task/app/digital-7.mono.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/sling/f408a148a06bc2d62e853a292a8ba7266c642839/sling/task/app/digital-7.mono.ttf
--------------------------------------------------------------------------------
/sling/task/documents.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/task/documents.h"
16 |
17 | namespace sling {
18 | namespace task {
19 |
20 | REGISTER_TASK_PROCESSOR("document-processor", DocumentProcessor);
21 |
22 | void DocumentProcessor::InitCommons(Task *task) {
23 | // Initialize document annotation pipeline.
24 | pipeline_.Init(task, commons_);
25 |
26 | // Bind document names.
27 | docnames_ = new nlp::DocumentNames(commons_);
28 | }
29 |
30 | void DocumentProcessor::Start(Task *task) {
31 | // Initialize frame processor.
32 | FrameProcessor::Start(task);
33 |
34 | // Statistics.
35 | num_documents_ = task->GetCounter("documents");
36 | num_tokens_ = task->GetCounter("tokens");
37 | num_spans_ = task->GetCounter("spans");
38 | }
39 |
40 | void DocumentProcessor::Process(Slice key, const Frame &frame) {
41 | // Create document from frame.
42 | nlp::Document document(frame, docnames_);
43 |
44 | // Run preprocessing pipeline on document.
45 | if (!pipeline_.empty()) {
46 | pipeline_.Annotate(&document);
47 | document.Update();
48 | }
49 |
50 | // Process document.
51 | Process(key, document);
52 |
53 | // Update statistics.
54 | num_documents_->Increment();
55 | num_tokens_->Increment(document.num_tokens());
56 | num_spans_->Increment(document.num_spans());
57 | }
58 |
59 | void DocumentProcessor::Process(Slice key, const nlp::Document &document) {
60 | Output(key, document);
61 | }
62 |
63 | void DocumentProcessor::Output(Text key, const nlp::Document &document) {
64 | FrameProcessor::Output(key, document.top());
65 | }
66 |
67 | void DocumentProcessor::Output(const nlp::Document &document) {
68 | FrameProcessor::Output(document.top());
69 | }
70 |
71 | } // namespace task
72 | } // namespace sling
73 |
74 |
--------------------------------------------------------------------------------
/sling/task/documents.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_TASK_DOCUMENTS_H_
16 | #define SLING_TASK_DOCUMENTS_H_
17 |
18 | #include "sling/nlp/document/annotator.h"
19 | #include "sling/nlp/document/document.h"
20 | #include "sling/task/frames.h"
21 |
22 | namespace sling {
23 | namespace task {
24 |
25 | // Task processor for receiving and sending documents.
26 | class DocumentProcessor : public FrameProcessor {
27 | public:
28 | ~DocumentProcessor() { if (docnames_) docnames_->Release(); }
29 |
30 | void Process(Slice key, const Frame &frame) override;
31 |
32 | // Initialize commons store with document symbols.
33 | void InitCommons(Task *task) override;
34 |
35 | // Initialize document processor.
36 | void Start(Task *task) override;
37 |
38 | // Called for each document received on input.
39 | virtual void Process(Slice key, const nlp::Document &document);
40 |
41 | // Output document to output.
42 | void Output(Text key, const nlp::Document &document);
43 |
44 | // Output document to output using document id as key.
45 | void Output(const nlp::Document &document);
46 |
47 | // Document schema.
48 | const nlp::DocumentNames *docnames() const { return docnames_; }
49 |
50 | private:
51 | // Document symbol names.
52 | const nlp::DocumentNames *docnames_ = nullptr;
53 |
54 | // Document annotator pipeline for preprocessing incoming documents.
55 | nlp::Pipeline pipeline_;
56 |
57 | // Statistics.
58 | Counter *num_documents_;
59 | Counter *num_tokens_;
60 | Counter *num_spans_;
61 | };
62 |
63 | } // namespace task
64 | } // namespace sling
65 |
66 | #endif // SLING_TASK_DOCUMENTS_H_
67 |
68 |
--------------------------------------------------------------------------------
/sling/task/environment.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_TASK_ENVIRONMENT_H_
16 | #define SLING_TASK_ENVIRONMENT_H_
17 |
18 | #include
19 | #include
20 |
21 | #include "sling/base/types.h"
22 |
23 | namespace sling {
24 | namespace task {
25 |
26 | class Channel;
27 | class Task;
28 |
29 | // Lock-free counter for statistics.
30 | class Counter {
31 | public:
32 | // Increment counter.
33 | void Increment() { ++value_; }
34 | void Increment(int64 delta) { value_ += delta; }
35 |
36 | // Reset counter.
37 | void Reset() { value_ = 0; }
38 |
39 | // Set counter value.
40 | void Set(int64 value) { value_ = value; }
41 |
42 | // Return counter value.
43 | int64 value() const { return value_; }
44 |
45 | private:
46 | std::atomic value_{0};
47 | };
48 |
49 | // Container environment interface.
50 | class Environment {
51 | public:
52 | virtual ~Environment() = default;
53 |
54 | // Return statistics counter.
55 | virtual Counter *GetCounter(const string &name) = 0;
56 |
57 | // Notify that channel has completed.
58 | virtual void ChannelCompleted(Channel *channel) = 0;
59 |
60 | // Notify that task has completed.
61 | virtual void TaskCompleted(Task *task) = 0;
62 | };
63 |
64 | } // namespace task
65 | } // namespace sling
66 |
67 | #endif // SLING_TASK_ENVIRONMENT_H_
68 |
69 |
--------------------------------------------------------------------------------
/sling/task/identity.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include
16 |
17 | #include "sling/base/logging.h"
18 | #include "sling/task/task.h"
19 |
20 | namespace sling {
21 | namespace task {
22 |
23 | // Mapper that relays all input messages to the output channel.
24 | class IdentityMapper : public Processor {
25 | public:
26 | void Start(Task *task) override {
27 | output_ = task->GetSink("output");
28 | }
29 |
30 | void Receive(Channel *channel, Message *message) override {
31 | if (output_ != nullptr) {
32 | output_->Send(message);
33 | } else {
34 | delete message;
35 | }
36 | }
37 |
38 | private:
39 | Channel *output_ = nullptr;
40 | };
41 |
42 | REGISTER_TASK_PROCESSOR("identity-mapper", IdentityMapper);
43 |
44 | // Reducer that relays all input messages to the corresponding output channel.
45 | class IdentityReducer : public Processor {
46 | public:
47 | void Start(Task *task) override {
48 | outputs_ = task->GetSinks("output");
49 | }
50 |
51 | void Receive(Channel *channel, Message *message) override {
52 | int shard = channel->consumer().shard().part();
53 | CHECK_LT(shard, outputs_.size());
54 | outputs_[shard]->Send(message);
55 | }
56 |
57 | private:
58 | std::vector outputs_;
59 | };
60 |
61 | REGISTER_TASK_PROCESSOR("identity-reducer", IdentityReducer);
62 |
63 | } // namespace task
64 | } // namespace sling
65 |
66 |
--------------------------------------------------------------------------------
/sling/task/mapper.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/task/mapper.h"
16 |
17 | #include "sling/base/logging.h"
18 |
19 | namespace sling {
20 | namespace task {
21 |
22 | void Mapper::Start(Task *task) {
23 | // Get output channel.
24 | output_ = task->GetSink("output");
25 | if (output_ == nullptr) {
26 | LOG(ERROR) << "No output channel";
27 | return;
28 | }
29 | }
30 |
31 | void Mapper::Receive(Channel *channel, Message *message) {
32 | // Call Map() method on each input message.
33 | MapInput input(message->key(), message->value());
34 | Map(input);
35 |
36 | // Delete input message.
37 | delete message;
38 | }
39 |
40 | void Mapper::Done(Task *task) {
41 | // Close output channel.
42 | if (output_ != nullptr) output_->Close();
43 | }
44 |
45 | void Mapper::Output(Slice key, Slice value) {
46 | // Ignore if there is no output.
47 | if (output_ == nullptr) return;
48 |
49 | // Create new message and send it on the output channel.
50 | Message *message = new Message(key, value);
51 | output_->Send(message);
52 | }
53 |
54 | } // namespace task
55 | } // namespace sling
56 |
57 |
--------------------------------------------------------------------------------
/sling/task/mapper.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_TASK_REDUCER_H_
16 | #define SLING_TASK_REDUCER_H_
17 |
18 | #include
19 |
20 | #include "sling/base/slice.h"
21 | #include "sling/task/message.h"
22 | #include "sling/task/task.h"
23 |
24 | namespace sling {
25 | namespace task {
26 |
27 | // Input to mapper with a key and a value.
28 | class MapInput {
29 | public:
30 | MapInput(Slice key, Slice value)
31 | : key_(key), value_(value) {}
32 |
33 | // Key for message.
34 | Slice key() const { return key_; }
35 |
36 | // Value for message.
37 | Slice value() const { return value_; }
38 |
39 | private:
40 | Slice key_;
41 | Slice value_;
42 | };
43 |
44 | // A mapper processes all the input message in the Map() method and can output
45 | // new key/value pairs to the output.
46 | class Mapper : public Processor {
47 | public:
48 | void Start(Task *task) override;
49 | void Receive(Channel *channel, Message *message) override;
50 | void Done(Task *task) override;
51 |
52 | // The Map() method is called for each message in the input and can call the
53 | // Output() method to produce key/value pairs.
54 | virtual void Map(const MapInput &input) = 0;
55 |
56 | // Output key/value pair to output.
57 | void Output(Slice key, Slice value);
58 |
59 | // Return output channel.
60 | Channel *output() const { return output_; }
61 |
62 | private:
63 | // Output channel.
64 | Channel *output_ = nullptr;
65 | };
66 |
67 | } // namespace task
68 | } // namespace sling
69 |
70 | #endif // SLING_TASK_REDUCER_H_
71 |
72 |
--------------------------------------------------------------------------------
/sling/task/message-printer.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include
16 |
17 | #include "sling/base/logging.h"
18 | #include "sling/task/task.h"
19 |
20 | namespace sling {
21 | namespace task {
22 |
23 | // Print incoming messages.
24 | class MessagePrinter : public Processor {
25 | public:
26 | void Receive(Channel *channel, Message *message) override {
27 | LOG(INFO) << "Message on channel " << channel->id()
28 | << " from " << channel->producer().task()->ToString()
29 | << " key: " << message->key()
30 | << " value: " << message->value();
31 | delete message;
32 | }
33 | };
34 |
35 | REGISTER_TASK_PROCESSOR("printer", MessagePrinter);
36 |
37 | } // namespace task
38 | } // namespace sling
39 |
40 |
--------------------------------------------------------------------------------
/sling/task/message.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/task/message.h"
16 |
17 | #include
18 |
19 | namespace sling {
20 | namespace task {
21 |
22 | Buffer::Buffer(Slice source) {
23 | if (source.empty()) {
24 | data_ = nullptr;
25 | size_ = 0;
26 | } else {
27 | size_ = source.size();
28 | data_ = new char[size_];
29 | memcpy(data_, source.data(), size_);
30 | }
31 | }
32 |
33 | } // namespace task
34 | } // namespace sling
35 |
36 |
--------------------------------------------------------------------------------
/sling/task/null-sink.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/task/task.h"
16 |
17 | namespace sling {
18 | namespace task {
19 |
20 | // Silently discard all incoming messages.
21 | class NullSink : public Processor {
22 | public:
23 | void Receive(Channel *channel, Message *message) override {
24 | delete message;
25 | }
26 | };
27 |
28 | REGISTER_TASK_PROCESSOR("null", NullSink);
29 |
30 | } // namespace task
31 | } // namespace sling
32 |
33 |
--------------------------------------------------------------------------------
/sling/task/pipe-reader.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include
16 | #include
17 |
18 | #include "sling/base/logging.h"
19 | #include "sling/base/types.h"
20 | #include "sling/stream/input.h"
21 | #include "sling/stream/unix-file.h"
22 | #include "sling/task/process.h"
23 | #include "sling/task/task.h"
24 |
25 | namespace sling {
26 | namespace task {
27 |
28 | // Run command and output lines to channel.
29 | class PipeReader : public Process {
30 | public:
31 | // Process input file.
32 | void Run(Task *task) override {
33 | // Get command.
34 | string command = task->Get("command", "");
35 |
36 | // Get output channel.
37 | Channel *output = task->GetSink("output");
38 | if (output == nullptr) {
39 | LOG(ERROR) << "No output channel";
40 | return;
41 | }
42 |
43 | // Run command.
44 | int buffer_size = task->Get("buffer_size", 1 << 16);
45 | FILE *pipe = popen(command.c_str(), "r");
46 | if (pipe == nullptr) {
47 | LOG(ERROR) << "Error running command: " << command;
48 | return;
49 | }
50 | StdFileInputStream stream(pipe, false, buffer_size);
51 | Input input(&stream);
52 |
53 | // Read lines from output of program and output to output channel.
54 | string line;
55 | while (input.ReadLine(&line)) {
56 | // Send message with line to output channel.
57 | output->Send(new Message(Slice(), Slice(line)));
58 | }
59 |
60 | // Close pipe and output channel.
61 | int status = pclose(pipe);
62 | CHECK(WIFEXITED(status)) << status;
63 | CHECK_EQ(WEXITSTATUS(status), 0);
64 | output->Close();
65 | }
66 | };
67 |
68 | REGISTER_TASK_PROCESSOR("pipe-reader", PipeReader);
69 |
70 | } // namespace task
71 | } // namespace sling
72 |
73 |
--------------------------------------------------------------------------------
/sling/task/record-file-writer.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/base/logging.h"
16 | #include "sling/file/recordio.h"
17 | #include "sling/task/task.h"
18 | #include "sling/util/mutex.h"
19 |
20 | namespace sling {
21 | namespace task {
22 |
23 | // Write incoming messages to record file.
24 | class RecordFileWriter : public Processor {
25 | public:
26 | ~RecordFileWriter() override { delete writer_; }
27 |
28 | void Init(Task *task) override {
29 | // Get output file.
30 | Binding *output = task->GetOutput("output");
31 | if (output == nullptr) {
32 | LOG(ERROR) << "Output missing";
33 | return;
34 | }
35 |
36 | // Open record file writer.
37 | RecordFileOptions options;
38 | if (task->Get("indexed", false)) options.indexed = true;
39 | writer_ = new RecordWriter(output->resource()->name(), options);
40 | }
41 |
42 | void Receive(Channel *channel, Message *message) override {
43 | MutexLock lock(&mu_);
44 |
45 | // Write message to record file.
46 | CHECK(writer_->Write(message->key(), message->value()));
47 | delete message;
48 | }
49 |
50 | void Done(Task *task) override {
51 | MutexLock lock(&mu_);
52 |
53 | // Close writer.
54 | if (writer_ != nullptr) {
55 | CHECK(writer_->Close());
56 | delete writer_;
57 | writer_ = nullptr;
58 | }
59 | }
60 |
61 | private:
62 | // Record writer for writing to output.
63 | RecordWriter *writer_ = nullptr;
64 |
65 | // Mutex for record writer.
66 | Mutex mu_;
67 | };
68 |
69 | REGISTER_TASK_PROCESSOR("record-file-writer", RecordFileWriter);
70 |
71 | } // namespace task
72 | } // namespace sling
73 |
74 |
--------------------------------------------------------------------------------
/sling/task/reducer.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/task/reducer.h"
16 |
17 | namespace sling {
18 | namespace task {
19 |
20 | Reducer::~Reducer() {
21 | for (auto *s : shards_) delete s;
22 | }
23 |
24 | void Reducer::Start(Task *task) {
25 | int num_shards = task->GetSources("input").size();
26 | shards_.reserve(num_shards);
27 | for (int i = 0; i < num_shards; ++i) {
28 | shards_.push_back(new Shard());
29 | }
30 | outputs_ = task->GetSinks("output");
31 | }
32 |
33 | void Reducer::Receive(Channel *channel, Message *message) {
34 | int shard = channel->consumer().shard().part();
35 | DCHECK_GE(shard, 0);
36 | DCHECK_LT(shard, shards_.size());
37 | Shard *s = shards_[shard];
38 |
39 | MutexLock lock(&s->mu);
40 | if (s->messages.empty()) {
41 | s->key = message->key();
42 | } else if (message->key() != s->key) {
43 | ReduceShard(shard);
44 | s->key = message->key();
45 | }
46 | s->messages.push_back(message);
47 | }
48 |
49 | void Reducer::ReduceShard(int shard) {
50 | Shard *s = shards_[shard];
51 | if (s->messages.empty()) return;
52 |
53 | ReduceInput input(shard, s->key, s->messages);
54 | Reduce(input);
55 | s->clear();
56 | }
57 |
58 | void Reducer::Done(Task *task) {
59 | for (int shard = 0; shard < shards_.size(); ++shard) {
60 | ReduceShard(shard);
61 | delete shards_[shard];
62 | }
63 | shards_.clear();
64 | }
65 |
66 | void Reducer::Output(int shard, Message *message) {
67 | DCHECK_GE(shard, 0);
68 | DCHECK_LT(shard, outputs_.size());
69 | outputs_[shard % outputs_.size()]->Send(message);
70 | }
71 |
72 | } // namespace task
73 | } // namespace sling
74 |
75 |
--------------------------------------------------------------------------------
/sling/task/rekey.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include
16 |
17 | #include "sling/base/types.h"
18 | #include "sling/frame/store.h"
19 | #include "sling/task/task.h"
20 | #include "sling/task/frames.h"
21 |
22 | namespace sling {
23 | namespace task {
24 |
25 | // Output messages with new key from frame role.
26 | class RekeyTask : public Processor {
27 | public:
28 | void Start(Task *task) override {
29 | // Get output channel.
30 | output_ = task->GetSink("output");
31 | CHECK(output_ != nullptr) << "Output channel missing";
32 |
33 | // Initialize commons.
34 | role_ = commons_.Lookup(task->Get("key", "id"));
35 | commons_.Freeze();
36 |
37 | // Statistics.
38 | num_not_rekeyed_ = task->GetCounter("records_not_rekeyed");
39 | }
40 |
41 | void Receive(Channel *channel, Message *message) override {
42 | // Decode frame.
43 | Store store(&commons_);
44 | Frame f = DecodeMessage(&store, message);
45 | CHECK(f.valid());
46 |
47 | // Get key from role.
48 | Handle key = f.GetHandle(role_);
49 | if (!key.IsNil()) {
50 | // Update key in message.
51 | string keystr = store.DebugString(key);
52 | message->set_key(keystr);
53 | } else {
54 | num_not_rekeyed_->Increment();
55 | }
56 |
57 | // Output message on output channel.
58 | output_->Send(message);
59 | }
60 |
61 | private:
62 | // Output channel.
63 | Channel *output_ = nullptr;
64 |
65 | // Commons store.
66 | Store commons_;
67 |
68 | // Role for re-keying.
69 | Handle role_;
70 |
71 | // Statistics.
72 | Counter *num_not_rekeyed_ = nullptr;
73 | };
74 |
75 | REGISTER_TASK_PROCESSOR("rekey", RekeyTask);
76 |
77 | } // namespace task
78 | } // namespace sling
79 |
80 |
--------------------------------------------------------------------------------
/sling/task/sharder.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include
16 |
17 | #include "sling/base/types.h"
18 | #include "sling/task/task.h"
19 | #include "sling/util/fingerprint.h"
20 |
21 | namespace sling {
22 | namespace task {
23 |
24 | // Shard input messages according to key fingerprint.
25 | class SharderTask : public Processor {
26 | public:
27 | void Start(Task *task) override {
28 | // Get output shard channels.
29 | shards_ = task->GetSinks("output");
30 | }
31 |
32 | void Receive(Channel *channel, Message *message) override {
33 | // Compute key fingerprint.
34 | uint64 fp = Fingerprint(message->key().data(), message->key().size());
35 | int shard = fp % shards_.size();
36 |
37 | // Output message on output shard channel.
38 | shards_[shard]->Send(message);
39 | }
40 |
41 | private:
42 | // Output shard channels.
43 | std::vector shards_;
44 | };
45 |
46 | REGISTER_TASK_PROCESSOR("sharder", SharderTask);
47 |
48 | } // namespace task
49 | } // namespace sling
50 |
51 |
--------------------------------------------------------------------------------
/sling/task/text-file-reader.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include
16 |
17 | #include "sling/base/logging.h"
18 | #include "sling/base/types.h"
19 | #include "sling/stream/file-input.h"
20 | #include "sling/task/process.h"
21 | #include "sling/task/task.h"
22 |
23 | namespace sling {
24 | namespace task {
25 |
26 | // Read text file and output lines to channel.
27 | class TextFileReader : public Process {
28 | public:
29 | // Process input file.
30 | void Run(Task *task) override {
31 | // Get input file.
32 | Binding *input = task->GetInput("input");
33 | if (input == nullptr) {
34 | LOG(ERROR) << "No input resource";
35 | return;
36 | }
37 |
38 | // Get output channel.
39 | Channel *output = task->GetSink("output");
40 | if (output == nullptr) {
41 | LOG(ERROR) << "No output channel";
42 | return;
43 | }
44 |
45 | // Open input file.
46 | int buffer_size = task->Get("buffer_size", 1 << 16);
47 | FileInput file(input->resource()->name(), buffer_size);
48 |
49 | // Statistics counters.
50 | Counter *lines_read = task->GetCounter("text_lines_read");
51 | Counter *bytes_read = task->GetCounter("text_bytes_read");
52 |
53 | // Read lines from file and output to output channel.
54 | int64 max_lines = task->Get("max_lines", 0);
55 | int64 num_lines = 0;
56 | string line;
57 | while (file.ReadLine(&line)) {
58 | // Update stats.
59 | lines_read->Increment();
60 | bytes_read->Increment(line.size());
61 |
62 | // Send message with line to output channel.
63 | output->Send(new Message(Slice(), Slice(line)));
64 |
65 | // Stop when max lines reached.
66 | if (max_lines > 0 && ++num_lines == max_lines) break;
67 | }
68 |
69 | // Close output channel.
70 | output->Close();
71 | }
72 | };
73 |
74 | REGISTER_TASK_PROCESSOR("text-file-reader", TextFileReader);
75 |
76 | } // namespace task
77 | } // namespace sling
78 |
79 |
--------------------------------------------------------------------------------
/sling/task/workers.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/task/task.h"
16 | #include "sling/util/threadpool.h"
17 |
18 | namespace sling {
19 | namespace task {
20 |
21 | // Create a pool of worker threads and distribute the incoming messages to
22 | // the output channel using the worker threads. This adds parallelism to the
23 | // processing of the message stream.
24 | class Workers : public Processor {
25 | public:
26 | ~Workers() override { delete pool_; }
27 |
28 | void Start(Task *task) override {
29 | // Get output port.
30 | output_ = task->GetSink("output");
31 |
32 | // Get worker pool parameters.
33 | int num_workers = task->Get("worker_threads", 5);
34 | int queue_size = task->Get("queue_size", num_workers * 2);
35 |
36 | // Start worker pool.
37 | pool_ = new ThreadPool(num_workers, queue_size);
38 | pool_->StartWorkers();
39 | }
40 |
41 | void Receive(Channel *channel, Message *message) override {
42 | if (output_ == nullptr) {
43 | // No receiver.
44 | delete message;
45 | } else {
46 | // Send message to output in one of the worker threads.
47 | pool_->Schedule([this, message]() {
48 | output_->Send(message);
49 | });
50 | }
51 | }
52 |
53 | void Done(Task *task) override {
54 | // Stop all worker threads.
55 | delete pool_;
56 | pool_ = nullptr;
57 | }
58 |
59 | private:
60 | // Thread pool for dispatching messages.
61 | ThreadPool *pool_ = nullptr;
62 |
63 | // Output channel.
64 | Channel *output_;
65 | };
66 |
67 | REGISTER_TASK_PROCESSOR("workers", Workers);
68 |
69 | } // namespace task
70 | } // namespace sling
71 |
72 |
--------------------------------------------------------------------------------
/sling/util/asset.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_UTIL_ASSET_H_
16 | #define SLING_UTIL_ASSET_H_
17 |
18 | #include
19 | #include
20 | #include
21 | #include
22 |
23 | namespace sling {
24 |
25 | // Shared asset.
26 | class Asset {
27 | public:
28 | virtual ~Asset() = default;
29 | };
30 |
31 | // Return unique identifier for type.
32 | typedef size_t TypeID;
33 | template inline TypeID TypeId() {
34 | static char signature;
35 | return reinterpret_cast(&signature);
36 | }
37 |
38 | // Asset manager that can hold one shared instance per type id and asset name
39 | // combination.
40 | class AssetManager {
41 | public:
42 | ~AssetManager() {
43 | for (auto &it : assets_) delete it.second;
44 | }
45 |
46 | // Delete all assets.
47 | void DisposeAssets() {
48 | for (auto &it : assets_) delete it.second;
49 | assets_.clear();
50 | }
51 |
52 | // Return asset for type and name, initializing a new instance the first
53 | // time the type and name pair is acquired.
54 | template const T *Acquire(
55 | const std::string &name,
56 | std::function init) {
57 | Key key(TypeId(), name);
58 | Asset *&a = assets_[key];
59 | if (a == nullptr) a = init();
60 | return reinterpret_cast(a);
61 | }
62 |
63 | private:
64 | // An asset key consists of a type id and an asset name.
65 | typedef std::pair Key;
66 |
67 | struct KeyHash {
68 | size_t operator()(const Key &key) const {
69 | size_t h1 = std::hash()(key.first);
70 | size_t h2 = std::hash()(key.second);
71 | return h1 ^ h2;
72 | }
73 | };
74 |
75 | // Mapping from type id and asset name to asset.
76 | std::unordered_map assets_;
77 | };
78 |
79 | } // namespace sling
80 |
81 | #endif // SLING_UTIL_ASSET_H_
82 |
83 |
--------------------------------------------------------------------------------
/sling/util/fingerprint.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2010-2014 Google
2 | // Licensed under the Apache License, Version 2.0 (the "License");
3 | // you may not use this file except in compliance with the License.
4 | // You may obtain a copy of the License at
5 | //
6 | // http://www.apache.org/licenses/LICENSE-2.0
7 | //
8 | // Unless required by applicable law or agreed to in writing, software
9 | // distributed under the License is distributed on an "AS IS" BASIS,
10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 |
14 | // This code comes from:
15 | // https://code.google.com/p/or-tools/source/browse/trunk/src/base/fingerprint2011.h
16 | // and was adapted to the needs of this project.
17 |
18 | #include "sling/util/fingerprint.h"
19 |
20 | #include "sling/base/types.h"
21 |
22 | namespace sling {
23 |
24 | uint64 FingerprintCat(uint64 fp1, uint64 fp2) {
25 | // Two big prime numbers.
26 | const uint64 mul1 = 0xC6A4A7935BD1E995u;
27 | const uint64 mul2 = 0x228876A7198B743u;
28 |
29 | const uint64 a = fp1 * mul1 + fp2 * mul2;
30 |
31 | // Note: The following line also makes sure we never return 0 or 1, because we
32 | // will only add something to 'a' if there are any MSBs (the remaining bits
33 | // after the shift) being 0, in which case wrapping around would not happen.
34 | return a + (~a >> 47);
35 | }
36 |
37 | // This should be better (collision-wise) than the default hash,
38 | // without being much slower. It never returns 0 or 1.
39 | uint64 Fingerprint(const char *bytes, size_t len) {
40 | // Some big prime number.
41 | uint64 fp = 0xA5B85C5E198ED849u;
42 | const char *end = bytes + len;
43 | while (bytes + sizeof(uint64) <= end) {
44 | fp = FingerprintCat(fp, *(reinterpret_cast(bytes)));
45 | bytes += sizeof(uint64);
46 | }
47 | uint64 residual = 0;
48 | while (bytes < end) {
49 | residual = residual << 8 | *reinterpret_cast(bytes);
50 | bytes++;
51 | }
52 |
53 | return FingerprintCat(fp, residual);
54 | }
55 |
56 | uint32 Fingerprint32(const char *bytes, size_t len) {
57 | uint64 fp = Fingerprint(bytes, len);
58 | return fp ^(fp >> 32);
59 | }
60 |
61 | } // namespace sling
62 |
63 |
--------------------------------------------------------------------------------
/sling/util/fingerprint.h:
--------------------------------------------------------------------------------
1 | // Copyright 2010-2014 Google
2 | // Licensed under the Apache License, Version 2.0 (the "License");
3 | // you may not use this file except in compliance with the License.
4 | // You may obtain a copy of the License at
5 | //
6 | // http://www.apache.org/licenses/LICENSE-2.0
7 | //
8 | // Unless required by applicable law or agreed to in writing, software
9 | // distributed under the License is distributed on an "AS IS" BASIS,
10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 |
14 | #include "sling/base/types.h"
15 |
16 | #ifndef SLING_UTIL_FINGERPRINT_H_
17 | #define SLING_UTIL_FINGERPRINT_H_
18 |
19 | namespace sling {
20 |
21 | // Concatenate two fingerprints.
22 | uint64 FingerprintCat(uint64 fp1, uint64 fp2);
23 |
24 | // Compute 64-bit fingerprint for data. This should be better (collision-wise)
25 | // than the default hash, without being much slower. It never returns
26 | // 0 or 1.
27 | uint64 Fingerprint(const char *bytes, size_t len);
28 |
29 | // Compute 32-bit fingerprint by folding 64-bit fingerprint.
30 | uint32 Fingerprint32(const char *bytes, size_t len);
31 |
32 | } // namespace sling
33 |
34 | #endif // SLING_UTIL_FINGERPRINT_H_
35 |
36 |
--------------------------------------------------------------------------------
/sling/util/mutex.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_UTIL_MUTEX_H_
16 | #define SLING_UTIL_MUTEX_H_
17 |
18 | #include
19 |
20 | namespace sling {
21 |
22 | // Basic mutex wrapper around a std::mutex.
23 | class Mutex : public std::mutex {
24 | public:
25 | // Wait for lock and acquire it.
26 | void Lock() { lock(); }
27 |
28 | // Release mutex.
29 | void Unlock() { unlock(); }
30 |
31 | // Try to acquire mutex.
32 | bool TryLock() { return try_lock(); }
33 | };
34 |
35 | // Lock guard.
36 | class MutexLock {
37 | public:
38 | // Constructor that acquires mutex.
39 | explicit MutexLock(Mutex *lock) : lock_(lock) { lock_->Lock(); }
40 |
41 | // Destructor that releases mutex.
42 | ~MutexLock() { lock_->Unlock(); }
43 |
44 | private:
45 | // Lock for guard.
46 | Mutex *lock_;
47 | };
48 |
49 | } // namespace sling
50 |
51 | #endif // SLING_UTIL_MUTEX_H_
52 |
53 |
--------------------------------------------------------------------------------
/sling/util/random.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_UTIL_RANDOM_H_
16 | #define SLING_UTIL_RANDOM_H_
17 |
18 | #include
19 |
20 | namespace sling {
21 |
22 | // Random number generator.
23 | class Random {
24 | public:
25 | // Initialize random number generator.
26 | Random() : dist_(0.0, 1.0) {}
27 |
28 | // Set seed for random number generator.
29 | void seed(int seed) { prng_.seed(seed); }
30 |
31 | // Return random number between 0.0 (inclusive) and 1.0 (exclusive).
32 | float UniformProb() {
33 | return dist_(prng_);
34 | }
35 |
36 | // Return uniformly distributed random number r=p*scale+bias, 0<=p<1.
37 | float UniformFloat(float scale, float bias) {
38 | return dist_(prng_) * scale + bias;
39 | }
40 |
41 | // Return uniformly distributed random number between 0 and n (exclusive).
42 | int UniformInt(int n) {
43 | return prng_() % n;
44 | }
45 |
46 | private:
47 | // Mersenne Twister pseudo-random generator of 64-bit numbers.
48 | std::mt19937_64 prng_;
49 |
50 | // Uniform distribution.
51 | std::uniform_real_distribution dist_;
52 | };
53 |
54 | } // namespace sling
55 |
56 | #endif // SLING_UTIL_RANDOM_H_
57 |
58 |
--------------------------------------------------------------------------------
/sling/util/snappy.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_UTIL_SNAPPY_
16 | #define SLING_UTIL_SNAPPY_
17 |
18 | #include "third_party/snappy/snappy.h"
19 | #include "third_party/snappy/snappy-sinksource.h"
20 |
21 | #endif // SLING_UTIL_SNAPPY_
22 |
23 |
--------------------------------------------------------------------------------
/sling/util/sortmap.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_UTIL_SORTMAP_H_
16 | #define SLING_UTIL_SORTMAP_H_
17 |
18 | #include
19 | #include
20 | #include
21 |
22 | namespace sling {
23 |
24 | // A hash map which can be sorted by value. This implementation is space
25 | // efficient since the sorted array just keeps pointers to the internal
26 | // nodes in the hash map.
27 | template> struct SortableMap {
28 | public:
29 | typedef std::unordered_map Map;
30 | typedef typename Map::value_type Node;
31 | typedef std::vector Array;
32 |
33 | // Look up value in hash map.
34 | V &operator[](const K &key) { return map[key]; }
35 |
36 | // Sort hash map.
37 | void sort() {
38 | array.clear();
39 | array.reserve(map.size());
40 | for (Node &node : map) array.emplace_back(&node);
41 | std::sort(array.begin(), array.end(), [](const Node *n1, const Node *n2) {
42 | return n1->second < n2->second;
43 | });
44 | }
45 |
46 | Map map;
47 | Array array;
48 | };
49 |
50 | } // namespace sling
51 |
52 | #endif // SLING_UTIL_SORTMAP_H_
53 |
--------------------------------------------------------------------------------
/sling/util/thread.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2013 Google Inc. All Rights Reserved.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/util/thread.h"
16 |
17 | #include "sling/base/logging.h"
18 |
19 | namespace sling {
20 |
21 | void *Thread::ThreadMain(void *arg) {
22 | Thread *thread = static_cast(arg);
23 | thread->Run();
24 | return nullptr;
25 | }
26 |
27 | Thread::Thread() : running_(false) {}
28 | Thread::~Thread() {}
29 |
30 | void Thread::Start() {
31 | CHECK(!running_);
32 | pthread_create(&thread_, nullptr, &ThreadMain, this);
33 | running_ = true;
34 |
35 | // Detach the thread if it is not joinable.
36 | if (!joinable_) {
37 | pthread_detach(thread_);
38 | }
39 | }
40 |
41 | void Thread::Join() {
42 | if (!running_) return;
43 | CHECK(joinable_);
44 |
45 | void *unused;
46 | pthread_join(thread_, &unused);
47 | running_ = false;
48 | }
49 |
50 | void Thread::SetJoinable(bool joinable) {
51 | CHECK(!running_) << "Can't SetJoinable() on a running thread";
52 | joinable_ = true;
53 | }
54 |
55 | bool Thread::IsSelf() const {
56 | return pthread_equal(thread_, pthread_self());
57 | }
58 |
59 | void ClosureThread::Run() {
60 | // Run closure.
61 | closure_();
62 | }
63 |
64 | void WorkerPool::Start(int num_workers, const Worker &worker) {
65 | // Create worker threads.
66 | int first = workers_.size();
67 | for (int i = 0; i < num_workers; ++i) {
68 | workers_.emplace_back([worker, i]() { worker(i); });
69 | }
70 |
71 | // Start worker threads.
72 | for (int i = first; i < workers_.size(); ++i) {
73 | workers_[i].SetJoinable(true);
74 | workers_[i].Start();
75 | }
76 | }
77 |
78 | void WorkerPool::Join() {
79 | for (auto &t : workers_) t.Join();
80 | }
81 |
82 | } // namespace sling
83 |
84 |
--------------------------------------------------------------------------------
/sling/util/threadpool.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "sling/base/logging.h"
16 | #include "sling/util/threadpool.h"
17 |
18 | namespace sling {
19 |
20 | ThreadPool::ThreadPool(int num_workers, int queue_size)
21 | : num_workers_(num_workers), queue_size_(queue_size) {}
22 |
23 | ThreadPool::~ThreadPool() {
24 | // Wait until all tasks have been completed.
25 | Shutdown();
26 |
27 | // Wait until all workers have terminated.
28 | for (auto &t : workers_) t.Join();
29 | }
30 |
31 | void ThreadPool::StartWorkers() {
32 | // Create worker threads.
33 | CHECK(workers_.empty());
34 | for (int i = 0; i < num_workers_; ++i) {
35 | workers_.emplace_back([this]() {
36 | // Keep processing tasks until done.
37 | Task task;
38 | while (FetchTask(&task)) task();
39 | });
40 | }
41 |
42 | // Start worker threads.
43 | for (auto &t : workers_) {
44 | t.SetJoinable(true);
45 | t.Start();
46 | }
47 | }
48 |
49 | void ThreadPool::Schedule(Task &&task) {
50 | std::unique_lock lock(mu_);
51 | while (tasks_.size() >= queue_size_) {
52 | nonfull_.wait(lock);
53 | }
54 | tasks_.push(std::move(task));
55 | nonempty_.notify_one();
56 | }
57 |
58 | bool ThreadPool::FetchTask(Task *task) {
59 | std::unique_lock lock(mu_);
60 | while (tasks_.empty()) {
61 | if (done_) return false;
62 | nonempty_.wait(lock);
63 | }
64 | *task = tasks_.front();
65 | tasks_.pop();
66 | nonfull_.notify_one();
67 | return true;
68 | }
69 |
70 | void ThreadPool::Shutdown() {
71 | // Notify all threads that we are done.
72 | std::lock_guard lock(mu_);
73 | done_ = true;
74 | nonempty_.notify_all();
75 | }
76 |
77 | } // namespace sling
78 |
79 |
--------------------------------------------------------------------------------
/sling/util/threadpool.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_UTIL_THREADPOOL_H_
16 | #define SLING_UTIL_THREADPOOL_H_
17 |
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 |
24 | #include "sling/util/thread.h"
25 |
26 | namespace sling {
27 |
28 | // Thread pool for executing tasks using a pool of worker threads.
29 | class ThreadPool {
30 | public:
31 | // Task that can be scheduled for execution.
32 | typedef std::function Task;
33 |
34 | // Initialize thread pool.
35 | ThreadPool(int num_workers, int queue_size);
36 |
37 | // Wait for all workers to complete.
38 | ~ThreadPool();
39 |
40 | // Start worker threads.
41 | void StartWorkers();
42 |
43 | // Schedule task to be executed by worker.
44 | void Schedule(Task &&task);
45 |
46 | private:
47 | // Fetch next task. Returns false when all tasks have been completed.
48 | bool FetchTask(Task *task);
49 |
50 | // Shut down workers. This waits until all tasks have been completed.
51 | void Shutdown();
52 |
53 | // Worker threads.
54 | int num_workers_;
55 | std::vector workers_;
56 |
57 | // Task queue.
58 | int queue_size_;
59 | std::queue tasks_;
60 |
61 | // Are we done with adding new tasks.
62 | bool done_ = false;
63 |
64 | // Mutex for serializing access to task queue.
65 | std::mutex mu_;
66 |
67 | // Signal to notify about new tasks in queue.
68 | std::condition_variable nonempty_;
69 |
70 | // Signal to notify about available space in queue.
71 | std::condition_variable nonfull_;
72 | };
73 |
74 | } // namespace sling
75 |
76 | #endif // SLING_UTIL_THREADPOOL_H_
77 |
78 |
--------------------------------------------------------------------------------
/sling/web/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | cc_library(
4 | name = "xml-parser",
5 | srcs = ["xml-parser.cc"],
6 | hdrs = ["xml-parser.h"],
7 | deps = [
8 | ":entity-ref",
9 | "//sling/base",
10 | "//sling/stream:input",
11 | "//sling/string:ctype",
12 | "//sling/util:unicode",
13 | ],
14 | )
15 |
16 | cc_library(
17 | name = "entity-ref",
18 | srcs = ["entity-ref.cc"],
19 | hdrs = ["entity-ref.h"],
20 | deps = [
21 | "//sling/base",
22 | ],
23 | )
24 |
25 |
--------------------------------------------------------------------------------
/sling/web/entity-ref.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef SLING_WEB_ENTITY_REF_H_
16 | #define SLING_WEB_ENTITY_REF_H_
17 |
18 | #include
19 |
20 | #include "sling/base/types.h"
21 |
22 | namespace sling {
23 |
24 | // Parse entity reference. Return -1 on errors.
25 | int ParseEntityRef(const char *str, int len, int *consumed);
26 | int ParseEntityRef(const string &str);
27 |
28 | } // namespace sling
29 |
30 | #endif // SLING_WEB_ENTITY_REF_H_
31 |
32 |
--------------------------------------------------------------------------------
/third_party/bz2lib/BUILD:
--------------------------------------------------------------------------------
1 | # Imported from http://bzip.org/downloads.html
2 |
3 | licenses(["notice"])
4 |
5 | cc_library(
6 | name = "bz2lib",
7 | visibility = ["//visibility:public"],
8 | srcs = [
9 | "blocksort.c",
10 | "bzlib.c",
11 | "compress.c",
12 | "crctable.c",
13 | "decompress.c",
14 | "huffman.c",
15 | "randtable.c",
16 | ],
17 | hdrs = [
18 | "bzlib.h",
19 | "bzlib_private.h",
20 | ],
21 | copts = [
22 | "-Wno-unknown-warning-option",
23 | "-Wno-unused-const-variable",
24 | "-Wno-unused-but-set-variable",
25 | "-Wno-unused-private-field",
26 | "-DBZ_NO_STDIO",
27 | ]
28 | )
29 |
30 |
--------------------------------------------------------------------------------
/third_party/bz2lib/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | --------------------------------------------------------------------------
3 |
4 | This program, "bzip2", the associated library "libbzip2", and all
5 | documentation, are copyright (C) 1996-2010 Julian R Seward. All
6 | rights reserved.
7 |
8 | Redistribution and use in source and binary forms, with or without
9 | modification, are permitted provided that the following conditions
10 | are met:
11 |
12 | 1. Redistributions of source code must retain the above copyright
13 | notice, this list of conditions and the following disclaimer.
14 |
15 | 2. The origin of this software must not be misrepresented; you must
16 | not claim that you wrote the original software. If you use this
17 | software in a product, an acknowledgment in the product
18 | documentation would be appreciated but is not required.
19 |
20 | 3. Altered source versions must be plainly marked as such, and must
21 | not be misrepresented as being the original software.
22 |
23 | 4. The name of the author may not be used to endorse or promote
24 | products derived from this software without specific prior written
25 | permission.
26 |
27 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
28 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
29 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
31 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
33 | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
35 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 |
39 | Julian Seward, jseward@bzip.org
40 | bzip2/libbzip2 version 1.0.6 of 6 September 2010
41 |
42 | --------------------------------------------------------------------------
43 |
--------------------------------------------------------------------------------
/third_party/jit/BUILD:
--------------------------------------------------------------------------------
1 | # x64 jit assembler.
2 |
3 | package(default_visibility = ["//visibility:public"])
4 |
5 | licenses(["notice"]) # BSD
6 |
7 | exports_files(["LICENSE"])
8 |
9 | cc_library(
10 | name = "types",
11 | hdrs = ["types.h"],
12 | deps = [
13 | "//sling/base",
14 | ],
15 | )
16 |
17 | cc_library(
18 | name = "memory",
19 | hdrs = ["memory.h"],
20 | deps = [
21 | ":types",
22 | "//sling/base",
23 | ],
24 | )
25 |
26 | cc_library(
27 | name = "code",
28 | srcs = ["code.cc"],
29 | hdrs = ["code.h"],
30 | deps = [
31 | ":memory",
32 | ":types",
33 | "//sling/base",
34 | ],
35 | )
36 |
37 | cc_library(
38 | name = "cpu",
39 | srcs = ["cpu.cc"],
40 | hdrs = ["cpu.h"],
41 | deps = [
42 | "//sling/base",
43 | ],
44 | )
45 |
46 | cc_library(
47 | name = "assembler",
48 | srcs = ["assembler.cc"],
49 | hdrs = [
50 | "assembler.h",
51 | "instructions.h",
52 | "registers.h",
53 | "avx512.inc",
54 | ],
55 | deps = [
56 | ":code",
57 | ":cpu",
58 | ":memory",
59 | ":types",
60 | "//sling/base",
61 | ],
62 | )
63 |
64 |
--------------------------------------------------------------------------------
/third_party/jit/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 1994-2006 Sun Microsystems Inc.
2 | All Rights Reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are
6 | met:
7 |
8 | - Redistributions of source code must retain the above copyright notice,
9 | this list of conditions and the following disclaimer.
10 |
11 | - Redistribution in binary form must reproduce the above copyright
12 | notice, this list of conditions and the following disclaimer in the
13 | documentation and/or other materials provided with the distribution.
14 |
15 | - Neither the name of Sun Microsystems or the names of contributors may
16 | be used to endorse or promote products derived from this software without
17 | specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | The original source code covered by the above license above has been
32 | modified significantly by Google Inc.
33 | Copyright 2012 the V8 project authors. All rights reserved.
34 | Copyright 2017 Google Inc. All rights reserved.
35 |
36 |
--------------------------------------------------------------------------------
/third_party/snappy/BUILD:
--------------------------------------------------------------------------------
1 | # Snappy compression library, see https://github.com/google/snappy
2 |
3 | package(default_visibility = ["//visibility:public"])
4 |
5 | licenses(["notice"]) # BSD
6 |
7 | exports_files(["LICENSE"])
8 |
9 | cc_library(
10 | name = "bits",
11 | hdrs = ["bits.h"],
12 | deps = [
13 | "//sling/base",
14 | ],
15 | )
16 |
17 | cc_library(
18 | name = "endian",
19 | hdrs = ["endian.h"],
20 | deps = [
21 | "//sling/base",
22 | ],
23 | )
24 |
25 | cc_library(
26 | name = "snappy",
27 | srcs = [
28 | "snappy.cc",
29 | "snappy-sinksource.cc",
30 | ],
31 | hdrs = [
32 | "snappy.h",
33 | "snappy-sinksource.h",
34 | ],
35 | deps = [
36 | ":bits",
37 | ":endian",
38 | "//sling/base",
39 | "//sling/util:varint",
40 | ],
41 | copts = [
42 | "-Wno-sign-compare",
43 | ],
44 | )
45 |
46 |
--------------------------------------------------------------------------------
/third_party/snappy/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2011, Google Inc.
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are
6 | met:
7 |
8 | * Redistributions of source code must retain the above copyright
9 | notice, this list of conditions and the following disclaimer.
10 | * Redistributions in binary form must reproduce the above
11 | copyright notice, this list of conditions and the following disclaimer
12 | in the documentation and/or other materials provided with the
13 | distribution.
14 | * Neither the name of Google Inc. nor the names of its
15 | contributors may be used to endorse or promote products derived from
16 | this software without specific prior written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
--------------------------------------------------------------------------------
/third_party/zlib/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | licenses(["notice"]) # BSD/MIT-like license (for zlib)
4 |
5 | cc_library(
6 | name = "zlib",
7 | srcs = [
8 | "adler32.c",
9 | "compress.c",
10 | "crc32.c",
11 | "deflate.c",
12 | "gzclose.c",
13 | "gzlib.c",
14 | "gzread.c",
15 | "gzwrite.c",
16 | "infback.c",
17 | "inffast.c",
18 | "inflate.c",
19 | "inftrees.c",
20 | "trees.c",
21 | "uncompr.c",
22 | "zutil.c",
23 | ],
24 | hdrs = [
25 | "crc32.h",
26 | "deflate.h",
27 | "gzguts.h",
28 | "inffast.h",
29 | "inffixed.h",
30 | "inflate.h",
31 | "inftrees.h",
32 | "trees.h",
33 | "zconf.h",
34 | "zlib.h",
35 | "zutil.h",
36 | ],
37 | copts = [
38 | "-O3",
39 | "-Wno-unused-variable",
40 | "-Wno-unused-private-field",
41 | "-Wno-implicit-function-declaration",
42 | ],
43 | )
44 |
45 |
--------------------------------------------------------------------------------
/third_party/zlib/gzclose.c:
--------------------------------------------------------------------------------
1 | /* gzclose.c -- zlib gzclose() function
2 | * Copyright (C) 2004, 2010 Mark Adler
3 | * For conditions of distribution and use, see copyright notice in zlib.h
4 | */
5 |
6 | #include "gzguts.h"
7 |
8 | /* gzclose() is in a separate file so that it is linked in only if it is used.
9 | That way the other gzclose functions can be used instead to avoid linking in
10 | unneeded compression or decompression routines. */
11 | int ZEXPORT gzclose(file)
12 | gzFile file;
13 | {
14 | #ifndef NO_GZCOMPRESS
15 | gz_statep state;
16 |
17 | if (file == NULL)
18 | return Z_STREAM_ERROR;
19 | state = (gz_statep)file;
20 |
21 | return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file);
22 | #else
23 | return gzclose_r(file);
24 | #endif
25 | }
26 |
--------------------------------------------------------------------------------
/third_party/zlib/inffast.h:
--------------------------------------------------------------------------------
1 | /* inffast.h -- header to use inffast.c
2 | * Copyright (C) 1995-2003, 2010 Mark Adler
3 | * For conditions of distribution and use, see copyright notice in zlib.h
4 | */
5 |
6 | /* WARNING: this file should *not* be used by applications. It is
7 | part of the implementation of the compression library and is
8 | subject to change. Applications should only use zlib.h.
9 | */
10 |
11 | void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start));
12 |
--------------------------------------------------------------------------------
/tools/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | cc_binary(
4 | name = "embed-data",
5 | srcs = ["embed-data.cc"],
6 | deps = [
7 | "//sling/util:elf-writer",
8 | ],
9 | )
10 |
11 | cc_binary(
12 | name = "codex",
13 | srcs = ["codex.cc"],
14 | deps = [
15 | "//sling/base",
16 | "//sling/file",
17 | "//sling/file:recordio",
18 | "//sling/file:posix",
19 | "//sling/frame",
20 | "//sling/stream:memory",
21 | "//sling/string:printf",
22 | "//sling/util:fingerprint",
23 | ],
24 | )
25 |
26 | cc_binary(
27 | name = "index",
28 | srcs = ["index.cc"],
29 | deps = [
30 | "//sling/base",
31 | "//sling/file",
32 | "//sling/file:recordio",
33 | "//sling/file:posix",
34 | ],
35 | )
36 |
37 | cc_binary(
38 | name = "snaps",
39 | srcs = ["snaps.cc"],
40 | deps = [
41 | "//sling/base",
42 | "//sling/file",
43 | "//sling/file:posix",
44 | "//sling/frame:serialization",
45 | "//sling/frame:snapshot",
46 | "//sling/frame:store",
47 | ],
48 | )
49 |
50 |
--------------------------------------------------------------------------------
/tools/buildall.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | bazel build -c opt $* \
4 | sling/base:* \
5 | sling/file:* \
6 | sling/frame:* \
7 | sling/http:* \
8 | sling/myelin:* \
9 | sling/myelin/kernel:* \
10 | sling/myelin/generator:* \
11 | sling/myelin/cuda:* \
12 | sling/nlp/document:* \
13 | sling/nlp/embedding:* \
14 | sling/nlp/kb:* \
15 | sling/nlp/silver:* \
16 | sling/nlp/parser:* \
17 | sling/nlp/parser/tools:* \
18 | sling/nlp/wiki:* \
19 | sling/pyapi:* \
20 | sling/stream:* \
21 | sling/string:* \
22 | sling/task:* \
23 | sling/util:* \
24 | sling/web:* \
25 | tools:* \
26 |
27 |
--------------------------------------------------------------------------------
/tools/docv1to2.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License")
4 |
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http:#www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Convert SLING documents from version 1 to version 2.
17 |
18 | import sling
19 | import sys
20 |
21 | # Check arguments.
22 | if len(sys.argv) != 3:
23 | print("usage:", sys.argv[0], "", "")
24 | sys.exit(1)
25 |
26 | # Intialize commons store.
27 | commons = sling.Store()
28 | commons.parse("""
29 | {=document =/s/document}
30 | {=url =/s/document/url}
31 | {=title =/s/document/title}
32 | {=text =/s/document/text}
33 | {=tokens =/s/document/tokens}
34 | {=mention =/s/document/mention}
35 | {=theme =/s/document/theme}
36 | {=token =/s/token}
37 | {=index =/s/token/index}
38 | {=start =/s/token/start}
39 | {=size =/s/token/length}
40 | {=break =/s/token/break}
41 | {=word =/s/token/text}
42 | {=phrase =/s/phrase}
43 | {=begin =/s/phrase/begin}
44 | {=length =/s/phrase/length}
45 | {=evokes =/s/phrase/evokes}
46 | """)
47 | commons.freeze()
48 |
49 | # Convert documents.
50 | num_docs = 0
51 | fin = sling.RecordReader(sys.argv[1])
52 | fout = sling.RecordWriter(sys.argv[2])
53 | for key, value in fin:
54 | store = sling.Store(commons)
55 | f = store.parse(value)
56 | fout.write(key, f.data(binary=True))
57 | num_docs += 1
58 |
59 | fin.close()
60 | fout.close()
61 | print(num_docs, "documents converted")
62 |
63 |
--------------------------------------------------------------------------------
/tools/embed.bzl:
--------------------------------------------------------------------------------
1 | # Compile embedded data files into ELF object files.
2 |
3 | def _genembed_impl(ctx):
4 | # Generate arguments to the embedded data compiler.
5 | args = []
6 | for i in ctx.attr.srcs:
7 | args += [f.path for f in i.files.to_list()]
8 |
9 | # Run embedded data compiler.
10 | ctx.actions.run(
11 | inputs = ctx.files.srcs,
12 | outputs = [ctx.outputs.out],
13 | arguments = ["-o", ctx.outputs.out.path] + args,
14 | progress_message = "Embedding %s" % ctx.label.name,
15 | executable = ctx.executable._embed_data_compiler
16 | )
17 |
18 | genembed = rule(
19 | implementation = _genembed_impl,
20 | attrs = {
21 | "srcs": attr.label_list(
22 | allow_files = True
23 | ),
24 | "_embed_data_compiler": attr.label(
25 | default = Label("//tools:embed-data"),
26 | cfg = "host",
27 | executable = True,
28 | ),
29 | },
30 | outputs = {
31 | "out": "%{name}.o"
32 | },
33 | )
34 |
35 | def embed_data(name, srcs):
36 | embed_pkg = genembed(
37 | name = name + "_genembed",
38 | srcs = srcs,
39 | )
40 | native.cc_library(
41 | name = name,
42 | srcs = [name + "_genembed"],
43 | alwayslink = True,
44 | linkstatic = True,
45 | )
46 |
47 |
--------------------------------------------------------------------------------
/tools/snaps.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2018 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Create SLING store snapshot files.
16 |
17 | #include
18 | #include
19 |
20 | #include "sling/base/init.h"
21 | #include "sling/base/flags.h"
22 | #include "sling/base/logging.h"
23 | #include "sling/base/types.h"
24 | #include "sling/file/file.h"
25 | #include "sling/frame/serialization.h"
26 | #include "sling/frame/snapshot.h"
27 | #include "sling/frame/store.h"
28 |
29 | DEFINE_bool(check, false, "Check for valid snapshot");
30 | DEFINE_bool(verify, false, "Check snapshot by reading it into memory");
31 |
32 | using namespace sling;
33 |
34 | int main(int argc, char *argv[]) {
35 | InitProgram(&argc, &argv);
36 |
37 | // Get files to snapshot.
38 | std::vector files;
39 | for (int i = 1; i < argc; ++i) {
40 | File::Match(argv[i], &files);
41 | }
42 |
43 | for (const string &file : files) {
44 | if (FLAGS_check) {
45 | bool valid = Snapshot::Valid(file);
46 | std::cout << file << ": " << (valid ? "valid" : "INVALID") << "\n";
47 | } else if (FLAGS_verify) {
48 | std::cout << file << ": " << std::flush;
49 | std::cout << "load " << std::flush;
50 | Store store;
51 | CHECK(Snapshot::Read(&store, file));
52 | std::cout << "done\n" << std::flush;
53 | } else {
54 | std::cout << file << ": " << std::flush;
55 | File::Delete(Snapshot::Filename(file));
56 | std::cout << "load " << std::flush;
57 | Store store;
58 | LoadStore(file, &store);
59 | std::cout << "freeze " << std::flush;
60 | store.AllocateSymbolHeap();
61 | store.Freeze();
62 | std::cout << "snapshot " << std::flush;
63 | CHECK(Snapshot::Write(&store, file));
64 | std::cout << "done\n" << std::flush;
65 | }
66 | }
67 |
68 | return 0;
69 | }
70 |
--------------------------------------------------------------------------------