├── releases ├── .gitignore ├── pypi │ ├── .gitignore │ ├── MANIFEST.in │ ├── tests │ │ └── test_import.py │ ├── Changes │ ├── gen.sh │ └── setup.py ├── cpan │ ├── .gitignore │ ├── t │ │ └── import.t │ ├── UDPipe.xs │ ├── Changes │ ├── gen.sh │ └── README ├── test_data │ └── test.model ├── java │ └── include │ │ ├── darwin │ │ └── jni_md.h │ │ └── win32 │ │ └── jni_md.h └── release-model.sh ├── src ├── .clang_complete ├── utils │ ├── AUTHORS │ ├── README │ ├── new_unique_ptr.h │ ├── compressor.h │ ├── url_detector.h │ ├── getwhole.h │ ├── getpara.h │ ├── common.h │ └── iostreams.h ├── rest_server │ └── microrestd │ │ ├── AUTHORS │ │ ├── pugixml │ │ ├── README │ │ ├── AUTHORS │ │ └── LICENSE │ │ ├── libmicrohttpd │ │ ├── README │ │ └── autoinit_funcs.h │ │ ├── pugixml.h │ │ ├── rest_server │ │ ├── version.cpp │ │ ├── rest_service.h │ │ ├── version.h │ │ ├── response_generator.h │ │ ├── xml_response_generator.cpp │ │ ├── json_response_generator.cpp │ │ ├── xml_response_generator.h │ │ ├── json_response_generator.h │ │ └── string_piece.h │ │ ├── microrestd.h │ │ ├── README │ │ ├── Makefile.include │ │ └── CHANGES.md ├── .gitignore ├── parsito │ ├── AUTHORS │ ├── CHANGES │ ├── transition │ │ ├── transition_system_link2.h │ │ ├── transition_system_swap.h │ │ ├── transition_system_projective.h │ │ └── transition_system.h │ ├── configuration │ │ ├── configuration.h │ │ ├── configuration.cpp │ │ ├── value_extractor.h │ │ └── node_extractor.h │ ├── tree │ │ ├── tree.h │ │ └── node.h │ ├── version │ │ └── version.h │ ├── Makefile.include │ ├── network │ │ └── activation_function.h │ ├── parser │ │ ├── parser_nn_trainer.h │ │ └── parser.h │ └── README ├── unilib │ ├── AUTHORS │ ├── Makefile.include │ ├── version.cpp │ ├── README │ └── version.h ├── morphodita │ ├── AUTHORS │ ├── morpho │ │ ├── Makefile │ │ ├── external_morpho_encoder.h │ │ ├── morpho_prefix_guesser_encoder.h │ │ ├── english_morpho_encoder.h │ │ ├── morpho_statistical_guesser_encoder.h │ │ ├── english_morpho_guesser_encoder.h │ │ ├── czech_morpho_encoder.h │ │ ├── generic_morpho_encoder.h │ │ ├── raw_morpho_dictionary_reader.h │ │ ├── morpho_statistical_guesser.h │ │ ├── external_morpho_encoder.cpp │ │ ├── small_stringops.h │ │ ├── morpho_ids.h │ │ └── morpho_statistical_guesser_trainer.h │ ├── tokenizer │ │ ├── Makefile │ │ ├── generic_tokenizer_factory_encoder.cpp │ │ ├── generic_tokenizer_factory_encoder.h │ │ ├── czech_tokenizer_factory_encoder.cpp │ │ ├── czech_tokenizer_factory_encoder.h │ │ ├── vertical_tokenizer.h │ │ ├── generic_tokenizer.h │ │ ├── generic_tokenizer_factory.cpp │ │ ├── generic_tokenizer_factory.h │ │ ├── tokenizer_factory.h │ │ ├── english_tokenizer.h │ │ ├── czech_tokenizer_factory.h │ │ ├── gru_tokenizer_network.cpp │ │ ├── czech_tokenizer_factory.cpp │ │ ├── gru_tokenizer_factory.h │ │ ├── tokenizer_ids.h │ │ ├── tokenizer.cpp │ │ ├── gru_tokenizer_trainer.h │ │ └── czech_tokenizer.h │ ├── derivator │ │ ├── derivator_dictionary_encoder.h │ │ ├── derivator_dictionary.h │ │ └── derivator.h │ ├── tagset_converter │ │ ├── identity_tagset_converter.cpp │ │ ├── identity_tagset_converter.h │ │ ├── pdt_to_conll2009_tagset_converter.h │ │ ├── strip_lemma_id_tagset_converter.h │ │ └── strip_lemma_comment_tagset_converter.h │ ├── tagger │ │ └── elementary_features_encoder.h │ ├── version │ │ └── version.h │ └── README ├── .editorconfig ├── common.h ├── trainer │ ├── training_failure.cpp │ ├── training_failure.h │ └── trainer.h ├── tokenizer │ ├── multiword_splitter_trainer.h │ └── multiword_splitter.h ├── version │ └── version.h ├── sentence │ ├── multiword_token.h │ ├── empty_node.h │ └── word.h └── model │ ├── model.h │ ├── model.cpp │ └── pipeline.h ├── bindings ├── csharp │ ├── examples │ │ ├── .gitignore │ │ └── Makefile │ ├── .gitignore │ └── Makefile ├── java │ ├── examples │ │ ├── .gitignore │ │ └── Makefile │ ├── .gitignore │ └── udpipe_java.i ├── perl │ ├── .gitignore │ ├── udpipe_perl.i │ ├── std_common.i │ ├── Makefile │ └── examples │ │ └── run_udpipe.pl ├── python │ ├── .gitignore │ └── udpipe_python.i └── common │ ├── udpipe_stl.i │ └── Makefile.common ├── doc ├── .gitignore ├── t2t_align_percent_cells_right.conf ├── t2t_docsys │ ├── t2t_docsys_web.conf │ ├── t2t_docsys_manual.conf │ ├── t2t_docsys_striplevel.conf │ ├── AUTHORS │ ├── .editorconfig │ ├── t2t_docsys_html_addlevel.conf │ ├── t2t_docsys.conf │ ├── t2t_docsys.sty │ └── Makefile.include ├── manual_model_ud-1.2_readme.t2t ├── manual_model_ud-2.0_readme.t2t ├── manual_model_ud-2.3_readme.t2t ├── manual_model_ud-2.4_readme.t2t ├── manual_model_ud-2.5_readme.t2t ├── readme_md.t2t ├── manual_model_about.t2t ├── manual_bindings_csharp_install.t2t ├── manual_bindings_csharp_api.t2t ├── manual_online.t2t ├── manual_bindings_java.t2t ├── manual_bindings_perl.t2t ├── manual_bindings_csharp.t2t ├── manual_bindings_python.t2t ├── manual_bindings_perl_install.t2t ├── manual_bindings_python_api.t2t ├── manual_models.t2t ├── manual_bindings_java_install.t2t ├── manual_bindings_perl_api.t2t ├── readme.t2t ├── manual_bindings_python_install.t2t ├── manual_bindings_java_api.t2t ├── manual_about.t2t └── manual_model_ud-2.2-conll18.t2t ├── training ├── ud-1.2-raw-texts │ ├── .gitignore │ └── raw_texts.sh ├── models-ud-1.2 │ ├── .gitignore │ ├── binaries.sh │ ├── results.sh │ ├── train_all.sh │ └── train.sh ├── scripts │ ├── .gitignore │ └── Makefile ├── ud-1.2-embeddings │ ├── .gitignore │ ├── gen_all.sh │ └── gen.sh ├── ud-2.0-embeddings │ ├── .gitignore │ ├── gen_all.sh │ ├── binaries.sh │ └── gen.sh ├── ud-2.5-embeddings │ ├── .gitignore │ ├── gen_all.sh │ ├── binaries.sh │ └── gen.sh ├── models-ud-2.0 │ ├── binaries.sh │ ├── train_all.sh │ ├── .gitignore │ └── train.sh ├── ud-2.0-raw-texts │ ├── .gitignore │ ├── grc_proiel.readme │ └── raw_texts.sh ├── ud-2.5-raw-texts │ ├── grc_proiel.readme │ ├── .gitignore │ └── raw_texts.sh ├── ud-2.5 │ ├── langs_sizes_licenses.sh │ ├── langs_sizes.sh │ ├── langs_sizes_licenses_overview.sh │ ├── conllu_split.pl │ ├── get.sh │ ├── langs │ ├── de_hdt.fix │ └── .gitignore ├── ud-1.2 │ ├── stats.sh │ ├── .gitignore │ └── get.sh ├── models-ud-2.6-tokenizer-chosen │ └── from_ud_2.5.sh ├── ud-2.0 │ └── .gitignore ├── ud-2.2 │ ├── conllu_split.pl │ ├── get.sh │ └── .gitignore ├── ud-2.3 │ ├── conllu_split.pl │ ├── .gitignore │ └── get.sh ├── ud-2.4 │ ├── conllu_split.pl │ ├── get.sh │ └── .gitignore └── ud-2.6 │ ├── conllu_split.pl │ ├── get.sh │ └── langs ├── web ├── lindat-service │ ├── flags │ │ ├── .gitignore │ │ └── gen.sh │ ├── .gitignore │ ├── icon.png │ ├── footer.php │ ├── .htaccess │ ├── demo.php │ ├── Makefile │ └── info.php └── ufal │ ├── udpipe.png │ ├── udpipe_small.png │ ├── .gitignore │ ├── warning.hdr │ └── Makefile ├── tests ├── .gitignore ├── ascii_only.py ├── Makefile └── udpipe_bundle.cpp ├── src_lib_only ├── .gitignore └── Makefile ├── AUTHORS ├── .github └── workflows │ └── ascii_only.yml ├── Dockerfile └── scripts ├── update_microrestd.sh ├── update_unilib.sh ├── update_utils.sh └── wrap_morphodita_model.py /releases/.gitignore: -------------------------------------------------------------------------------- 1 | /releases/ 2 | -------------------------------------------------------------------------------- /src/.clang_complete: -------------------------------------------------------------------------------- 1 | -std=c++11 -I. 2 | -------------------------------------------------------------------------------- /releases/pypi/.gitignore: -------------------------------------------------------------------------------- 1 | ufal.udpipe 2 | -------------------------------------------------------------------------------- /bindings/csharp/examples/.gitignore: -------------------------------------------------------------------------------- 1 | *.exe 2 | -------------------------------------------------------------------------------- /bindings/java/examples/.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.md 3 | *.txt 4 | -------------------------------------------------------------------------------- /training/ud-1.2-raw-texts/.gitignore: -------------------------------------------------------------------------------- 1 | *.txt 2 | -------------------------------------------------------------------------------- /web/lindat-service/flags/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | -------------------------------------------------------------------------------- /training/models-ud-1.2/.gitignore: -------------------------------------------------------------------------------- 1 | /*/ 2 | udpipe 3 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | /.build/ 2 | udpipe_bundle 3 | *.exe 4 | -------------------------------------------------------------------------------- /web/lindat-service/.gitignore: -------------------------------------------------------------------------------- 1 | branding 2 | about.html 3 | -------------------------------------------------------------------------------- /src/utils/AUTHORS: -------------------------------------------------------------------------------- 1 | Milan Straka 2 | -------------------------------------------------------------------------------- /training/scripts/.gitignore: -------------------------------------------------------------------------------- 1 | /.build/ 2 | normalize_form 3 | -------------------------------------------------------------------------------- /releases/cpan/.gitignore: -------------------------------------------------------------------------------- 1 | Ufal-UDPipe/ 2 | Ufal-UDPipe-*.tar.gz 3 | -------------------------------------------------------------------------------- /training/ud-1.2-embeddings/.gitignore: -------------------------------------------------------------------------------- 1 | word2vec 2 | *.vectors 3 | -------------------------------------------------------------------------------- /doc/t2t_align_percent_cells_right.conf: -------------------------------------------------------------------------------- 1 | %!preproc: '\| ([0-9.]*%)' '| \1' 2 | -------------------------------------------------------------------------------- /src/rest_server/microrestd/AUTHORS: -------------------------------------------------------------------------------- 1 | Milan Straka 2 | -------------------------------------------------------------------------------- /bindings/perl/.gitignore: -------------------------------------------------------------------------------- 1 | /.build/ 2 | /Ufal/ 3 | /auto/ 4 | udpipe_perl.cpp 5 | -------------------------------------------------------------------------------- /src_lib_only/.gitignore: -------------------------------------------------------------------------------- 1 | /.build/ 2 | merge_sources/merge_sources 3 | udpipe.cpp 4 | -------------------------------------------------------------------------------- /web/ufal/udpipe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ufal/udpipe/HEAD/web/ufal/udpipe.png -------------------------------------------------------------------------------- /bindings/python/.gitignore: -------------------------------------------------------------------------------- 1 | /.build/ 2 | /ufal/ 3 | udpipe_python.cpp 4 | ufal_udpipe.so 5 | -------------------------------------------------------------------------------- /releases/cpan/t/import.t: -------------------------------------------------------------------------------- 1 | use Test::More tests => 1; 2 | 3 | require_ok('Ufal::UDPipe'); 4 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Milan Straka 2 | Jana Straková 3 | -------------------------------------------------------------------------------- /bindings/csharp/.gitignore: -------------------------------------------------------------------------------- 1 | /.build/ 2 | /Ufal/ 3 | libudpipe_csharp.so 4 | udpipe_csharp.cpp 5 | -------------------------------------------------------------------------------- /training/ud-2.0-embeddings/.gitignore: -------------------------------------------------------------------------------- 1 | normalize_form 2 | udpipe 3 | word2vec 4 | *.vectors 5 | -------------------------------------------------------------------------------- /training/ud-2.5-embeddings/.gitignore: -------------------------------------------------------------------------------- 1 | *.vectors 2 | normalize_form 3 | udpipe 4 | word2vec 5 | -------------------------------------------------------------------------------- /web/ufal/udpipe_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ufal/udpipe/HEAD/web/ufal/udpipe_small.png -------------------------------------------------------------------------------- /bindings/java/.gitignore: -------------------------------------------------------------------------------- 1 | /.build/ 2 | /cz/ 3 | libudpipe_java.so 4 | udpipe.jar 5 | udpipe_java.cpp 6 | -------------------------------------------------------------------------------- /doc/t2t_docsys/t2t_docsys_web.conf: -------------------------------------------------------------------------------- 1 | %!includeconf: t2t_docsys.conf 2 | 3 | %!preproc: '^%web% ' '' 4 | -------------------------------------------------------------------------------- /web/lindat-service/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ufal/udpipe/HEAD/web/lindat-service/icon.png -------------------------------------------------------------------------------- /doc/t2t_docsys/t2t_docsys_manual.conf: -------------------------------------------------------------------------------- 1 | %!includeconf: t2t_docsys.conf 2 | 3 | %!preproc: '^%manual% ' '' 4 | -------------------------------------------------------------------------------- /doc/t2t_docsys/t2t_docsys_striplevel.conf: -------------------------------------------------------------------------------- 1 | %!preproc: '^==' '=' 2 | %!preproc: '==(|\[[^]]*])$' '=\1' 3 | -------------------------------------------------------------------------------- /releases/test_data/test.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ufal/udpipe/HEAD/releases/test_data/test.model -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | /.build/ 2 | rest_server/udpipe_server 3 | libudpipe.* 4 | udpipe 5 | *.exe 6 | *.swp 7 | -------------------------------------------------------------------------------- /src/parsito/AUTHORS: -------------------------------------------------------------------------------- 1 | Milan Straka 2 | Jana Straková 3 | -------------------------------------------------------------------------------- /src/unilib/AUTHORS: -------------------------------------------------------------------------------- 1 | Milan Straka 2 | Jana Straková 3 | -------------------------------------------------------------------------------- /src/morphodita/AUTHORS: -------------------------------------------------------------------------------- 1 | Milan Straka 2 | Jana Straková 3 | -------------------------------------------------------------------------------- /training/models-ud-1.2/binaries.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | make -C ../../src -j4 udpipe && cp ../../src/udpipe . 4 | -------------------------------------------------------------------------------- /training/models-ud-2.0/binaries.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | make -C ../../src -j4 udpipe && cp ../../src/udpipe . 4 | -------------------------------------------------------------------------------- /releases/pypi/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include Changes 2 | include LICENSE 3 | graft examples 4 | graft tests 5 | graft ufal 6 | -------------------------------------------------------------------------------- /training/ud-2.0-raw-texts/.gitignore: -------------------------------------------------------------------------------- 1 | da.txt 2 | fi_ftb.txt 3 | grc_proiel.txt 4 | la_proiel.txt 5 | sl_sst.txt 6 | -------------------------------------------------------------------------------- /doc/t2t_docsys/AUTHORS: -------------------------------------------------------------------------------- 1 | Milan Straka 2 | 3 | txt2tags: Aurelio Jargas 4 | -------------------------------------------------------------------------------- /web/ufal/.gitignore: -------------------------------------------------------------------------------- 1 | api_reference.html 2 | install.html 3 | models.html 4 | online.html 5 | udpipe.html 6 | user.html 7 | -------------------------------------------------------------------------------- /training/ud-2.0-raw-texts/grc_proiel.readme: -------------------------------------------------------------------------------- 1 | Under CC BY-SA license, from Perseus Digital Library http://www.perseus.tufts.edu. 2 | -------------------------------------------------------------------------------- /training/ud-2.5-raw-texts/grc_proiel.readme: -------------------------------------------------------------------------------- 1 | Under CC BY-SA license, from Perseus Digital Library http://www.perseus.tufts.edu. 2 | -------------------------------------------------------------------------------- /web/lindat-service/footer.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /training/ud-1.2-embeddings/gen_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | for d in ../../ud-1.2/*/; do 4 | l=`basename $d` 5 | qsub -cwd -b y ./gen.sh $l 6 | done 7 | -------------------------------------------------------------------------------- /training/ud-2.0-embeddings/gen_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | for d in ${@:-../ud-2.0/*/}; do 4 | l=`basename $d` 5 | qsub -cwd -b y -j y ./gen.sh $l 6 | done 7 | -------------------------------------------------------------------------------- /bindings/perl/udpipe_perl.i: -------------------------------------------------------------------------------- 1 | %module "Ufal::UDPipe" 2 | 3 | %runtime %{ 4 | #ifdef seed 5 | #undef seed 6 | #endif 7 | %} 8 | 9 | %include "../common/udpipe.i" 10 | -------------------------------------------------------------------------------- /releases/cpan/UDPipe.xs: -------------------------------------------------------------------------------- 1 | #include "EXTERN.h" 2 | #include "perl.h" 3 | #include "XSUB.h" 4 | 5 | MODULE = Ufal::UDPipe::XS PACKAGE = Ufal::UDPipe 6 | VERSIONCHECK: DISABLE 7 | -------------------------------------------------------------------------------- /training/ud-2.5-embeddings/gen_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for d in ${@:-../ud-2.5/*/}; do 4 | l=`basename $d` 5 | qsub -q cpu-troja.q -cwd -b y -j y ./gen.sh $l 6 | done 7 | -------------------------------------------------------------------------------- /src/.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*.{c,cpp,h}] 4 | indent_style = space 5 | indent_size = 2 6 | end_of_line = lf 7 | charset = utf-8 8 | insert_final_newline = true 9 | -------------------------------------------------------------------------------- /doc/t2t_docsys/.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*.py] 4 | indent_style = tab 5 | indent_size = 8 6 | end_of_line = lf 7 | charset = utf-8 8 | insert_final_newline = true 9 | -------------------------------------------------------------------------------- /training/ud-2.0-embeddings/binaries.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | make -C ../../src -j4 udpipe && cp ../../src/udpipe . 4 | make -C ../scripts -j4 normalize_form && cp ../scripts/normalize_form . 5 | -------------------------------------------------------------------------------- /training/ud-2.5-embeddings/binaries.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | make -C ../../src -j4 udpipe && cp ../../src/udpipe . 4 | make -C ../scripts -j4 normalize_form && cp ../scripts/normalize_form . 5 | -------------------------------------------------------------------------------- /training/ud-2.5/langs_sizes_licenses.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | for d in */; do 4 | echo $d >&2 5 | echo ${d%/} $(grep -cP "^\d+\t" $d*train.conllu) $(grep "^License:" $d/README.*) 6 | done 7 | -------------------------------------------------------------------------------- /web/lindat-service/.htaccess: -------------------------------------------------------------------------------- 1 | DirectoryIndex run.php 2 | 3 | RewriteEngine on 4 | 5 | RewriteRule api$ http://127.0.0.1:8001/ [QSA,P,L] 6 | RewriteRule api/(.*)$ http://127.0.0.1:8001/$1 [QSA,P,L] 7 | -------------------------------------------------------------------------------- /training/models-ud-1.2/results.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for d in ${@:-*/}; do 4 | l=`basename $d` 5 | 6 | echo $l 7 | [ -f "$l/$l.test" ] && sed 's/^/ /' $l/$l.test 8 | echo 9 | done 10 | -------------------------------------------------------------------------------- /web/ufal/warning.hdr: -------------------------------------------------------------------------------- 1 |
Be aware that this page describes UDPipe 1. You might be also interested in visiting UDPipe 2 page.
2 | -------------------------------------------------------------------------------- /doc/manual_model_ud-1.2_readme.t2t: -------------------------------------------------------------------------------- 1 | Universal Dependencies 1.2 Models for UDPipe 2 | ============================================ 3 | 4 | %!include: manual_model_about.t2t 5 | 6 | %!include: manual_model_ud-1.2.t2t 7 | -------------------------------------------------------------------------------- /doc/manual_model_ud-2.0_readme.t2t: -------------------------------------------------------------------------------- 1 | Universal Dependencies 2.0 Models for UDPipe 2 | ============================================ 3 | 4 | %!include: manual_model_about.t2t 5 | 6 | %!include: manual_model_ud-2.0.t2t 7 | -------------------------------------------------------------------------------- /doc/manual_model_ud-2.3_readme.t2t: -------------------------------------------------------------------------------- 1 | Universal Dependencies 2.3 Models for UDPipe 2 | ============================================ 3 | 4 | %!include: manual_model_about.t2t 5 | 6 | %!include: manual_model_ud-2.3.t2t 7 | -------------------------------------------------------------------------------- /doc/manual_model_ud-2.4_readme.t2t: -------------------------------------------------------------------------------- 1 | Universal Dependencies 2.4 Models for UDPipe 2 | ============================================ 3 | 4 | %!include: manual_model_about.t2t 5 | 6 | %!include: manual_model_ud-2.4.t2t 7 | -------------------------------------------------------------------------------- /doc/manual_model_ud-2.5_readme.t2t: -------------------------------------------------------------------------------- 1 | Universal Dependencies 2.5 Models for UDPipe 2 | ============================================ 3 | 4 | %!include: manual_model_about.t2t 5 | 6 | %!include: manual_model_ud-2.5.t2t 7 | -------------------------------------------------------------------------------- /doc/readme_md.t2t: -------------------------------------------------------------------------------- 1 | UDPipe 1 2 | [![Compile Status](https://github.com/ufal/udpipe/actions/workflows/compile.yml/badge.svg)](https://github.com/ufal/udpipe/actions/workflows/compile.yml) 3 | 4 | %!include: readme.t2t 5 | -------------------------------------------------------------------------------- /training/ud-2.5-raw-texts/.gitignore: -------------------------------------------------------------------------------- 1 | de_hdt.txt 2 | fi_ftb.txt 3 | fr_spoken.txt 4 | fro_srcmf.txt 5 | grc_proiel.txt 6 | ko_kaist.txt 7 | la_proiel.txt 8 | no_nynorsklia.txt 9 | sl_sst.txt 10 | te_mtg.txt 11 | -------------------------------------------------------------------------------- /training/ud-2.5/langs_sizes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | for d in */; do 4 | echo $d >&2 5 | echo ${d%/} $(grep -cP "^\d+\t" $d*train.conllu) 6 | done | sort -rnk2 >langs_sizes 7 | 8 | cut -d" " -f1 langs_sizes >langs 9 | -------------------------------------------------------------------------------- /training/ud-1.2/stats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | for d in *; do 4 | [ -d "$d" ] || continue 5 | 6 | echo -n "$d" 7 | ../scripts/nonprojective_stats.pl $d/*.conllu | sed 's/^/\t/' | tr -d "\n" 8 | echo 9 | done 10 | -------------------------------------------------------------------------------- /doc/manual_model_about.t2t: -------------------------------------------------------------------------------- 1 | UDPipe 2 | ====== 3 | 4 | To use this model, you need UDPipe, an open-source tool for tokenization, 5 | tagging, lemmatization and parsing of CoNLL-U files. Please visit the UDPipe 6 | website http://ufal.mff.cuni.cz/udpipe for more information. 7 | -------------------------------------------------------------------------------- /training/ud-2.5/langs_sizes_licenses_overview.sh: -------------------------------------------------------------------------------- 1 | sed 's/^\([^_]*\)[^ ]* \([^ ]*\)/\1 \2 &/' $(dirname $0)/langs_sizes_licenses | sort -nrk2 | sort -s -k1,1 | cut -d" " -f1,3- | sed 's/License:/ &/; s/ License:.*NC/NC&/; s/ /\t/; s/ /\t/; s/ /\t/;s/ /\t/' | column -nts " " 2 | 3 | -------------------------------------------------------------------------------- /releases/pypi/tests/test_import.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import unittest 4 | 5 | class TestImport(unittest.TestCase): 6 | def test_import(self): 7 | import ufal.udpipe 8 | 9 | self.assertTrue(True) 10 | 11 | if __name__ == '__main__': 12 | unittest.main() 13 | -------------------------------------------------------------------------------- /doc/manual_bindings_csharp_install.t2t: -------------------------------------------------------------------------------- 1 | C# UDPipe Bindings 2 | ================== 3 | 4 | Binary C# bindings are available in UDPipe binary packages. 5 | 6 | To compile C# bindings manually, run ``make`` in the ``bindings/csharp`` 7 | directory, optionally with the options described in UDPipe Installation. 8 | -------------------------------------------------------------------------------- /training/models-ud-1.2/train_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | [ -x udpipe ] || { echo Missing udpipe >&2; exit 1; } 4 | 5 | ls="$@" 6 | [ -z "$ls" ] && ls=`awk '{print $1}' params_tagger` 7 | for l in $ls; do 8 | mkdir -p $l 9 | qsub $SGE_ARGS -cwd -b y -o $l/$l.log -j y ./train.sh $l 10 | done 11 | -------------------------------------------------------------------------------- /training/models-ud-2.0/train_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | [ -x udpipe ] || { echo Missing udpipe >&2; exit 1; } 4 | 5 | ls="$@" 6 | [ -z "$ls" ] && ls=`awk '{print $1}' params_parser` 7 | for l in $ls; do 8 | mkdir -p $l 9 | qsub -q troja*@* $SGE_ARGS -cwd -b y -o $l/$l.log -j y ./train.sh $l 10 | done 11 | -------------------------------------------------------------------------------- /.github/workflows/ascii_only.yml: -------------------------------------------------------------------------------- 1 | name: ASCII test 2 | 3 | on: push 4 | 5 | jobs: 6 | ascii_only: 7 | name: ASCII test 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v3 12 | 13 | - name: Test that sources are ASCII only 14 | run: python3 tests/ascii_only.py 15 | -------------------------------------------------------------------------------- /doc/t2t_docsys/t2t_docsys_html_addlevel.conf: -------------------------------------------------------------------------------- 1 | %!postproc: '<[Hh]5>([1-9][^<]*)' '
\1
' 2 | %!postproc: '<[Hh]4>([1-9][^<]*)' '
\1
' 3 | %!postproc: '<[Hh]3>([1-9][^<]*)' '

\1

' 4 | %!postproc: '<[Hh]2>([1-9][^<]*)' '

\1

' 5 | %!postproc: '<[Hh]1>([1-9][^<]*)' '

\1

' 6 | -------------------------------------------------------------------------------- /bindings/python/udpipe_python.i: -------------------------------------------------------------------------------- 1 | %module(package="ufal") udpipe 2 | 3 | %pythonbegin %{ 4 | # __version__ = 5 | %} 6 | 7 | #define HAVE_CUSTOM_BYTES 8 | %typemap(out) std::vector* { 9 | $result = PyBytes_FromStringAndSize((const char*) $1->data(), $1->size()); 10 | delete $1; 11 | } 12 | 13 | %include "../common/udpipe.i" 14 | -------------------------------------------------------------------------------- /doc/manual_bindings_csharp_api.t2t: -------------------------------------------------------------------------------- 1 | C# UDPipe Bindings 2 | ================== 3 | 4 | UDPipe library bindings is available in the ``Ufal.UDPipe`` namespace. 5 | 6 | The bindings is a straightforward conversion of the ``C++`` bindings API. 7 | The bindings requires native C++ library ``libudpipe_csharp`` (called 8 | ``udpipe_csharp`` on Windows). 9 | -------------------------------------------------------------------------------- /training/ud-2.0-raw-texts/raw_texts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | data() { 4 | case "$2" in 5 | none) echo;; 6 | *) zcat /net/data/W2C/W2C_WEB/2011-08/$2.txt.gz | head -c 500000;; 7 | esac > $1.txt 8 | } 9 | 10 | nodata() { 11 | true 12 | } 13 | 14 | data da dan 15 | data fi_ftb fin 16 | data la_proiel lat 17 | data sl_sst slv 18 | -------------------------------------------------------------------------------- /doc/manual_online.t2t: -------------------------------------------------------------------------------- 1 | UDPipe Online 2 | ============= 3 | 4 | UDPipe Web Application is available at http://lindat.mff.cuni.cz/services/udpipe/ 5 | using [LINDAT/CLARIN infrastructure http://lindat.cz]. 6 | 7 | UDPipe REST Web Service is also available, with the API documentation available at 8 | http://lindat.mff.cuni.cz/services/udpipe/api-reference.php. 9 | -------------------------------------------------------------------------------- /doc/manual_bindings_java.t2t: -------------------------------------------------------------------------------- 1 | Java UDPipe Bindings 2 | ==================== 3 | 4 | == Installation ==[java_installation] 5 | %!include: manual_bindings_java_install.t2t 6 | 7 | 8 | == Java Bindings API ==[java_bindings_api] 9 | %!include: manual_bindings_java_api.t2t 10 | 11 | 12 | == C++ Bindings API ==[cpp_bindings_api] 13 | %!include: manual_bindings_api.t2t 14 | -------------------------------------------------------------------------------- /doc/manual_bindings_perl.t2t: -------------------------------------------------------------------------------- 1 | Perl UDPipe Bindings 2 | ==================== 3 | 4 | == Installation ==[perl_installation] 5 | %!include: manual_bindings_perl_install.t2t 6 | 7 | 8 | == Perl Bindings API ==[perl_bindings_api] 9 | %!include: manual_bindings_perl_api.t2t 10 | 11 | 12 | == C++ Bindings API ==[cpp_bindings_api] 13 | %!include: manual_bindings_api.t2t 14 | -------------------------------------------------------------------------------- /doc/manual_bindings_csharp.t2t: -------------------------------------------------------------------------------- 1 | C# UDPipe Bindings 2 | ================== 3 | 4 | == Installation ==[csharp_installation] 5 | %!include: manual_bindings_csharp_install.t2t 6 | 7 | 8 | == C# Bindings API ==[csharp_bindings_api] 9 | %!include: manual_bindings_csharp_api.t2t 10 | 11 | 12 | == C++ Bindings API ==[cpp_bindings_api] 13 | %!include: manual_bindings_api.t2t 14 | -------------------------------------------------------------------------------- /doc/manual_bindings_python.t2t: -------------------------------------------------------------------------------- 1 | Python UDPipe Bindings 2 | ====================== 3 | 4 | == Installation ==[python_installation] 5 | %!include: manual_bindings_python_install.t2t 6 | 7 | 8 | == Python Bindings API ==[python_bindings_api] 9 | %!include: manual_bindings_python_api.t2t 10 | 11 | 12 | == C++ Bindings API ==[cpp_bindings_api] 13 | %!include: manual_bindings_api.t2t 14 | -------------------------------------------------------------------------------- /training/ud-2.0-embeddings/gen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | [ $# -lt 1 ] && { echo Usage: $0 language_code >&2; exit 1; } 4 | 5 | ./udpipe --output=horizontal none ../ud-2.0/"$1"/*train.conllu | ./normalize_form >"$1".in 6 | ./word2vec -train "$1".in -output "$1".skip.forms.50.vectors -cbow 0 -size 50 -window 10 -negative 5 -hs 0 -sample 1e-1 -threads 1 -binary 0 -iter 15 -min-count 2 7 | rm "$1".in 8 | -------------------------------------------------------------------------------- /training/ud-2.5-embeddings/gen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | [ $# -lt 1 ] && { echo Usage: $0 language_code >&2; exit 1; } 4 | 5 | ./udpipe --output=horizontal none ../ud-2.5/"$1"/*train.conllu | ./normalize_form >"$1".in 6 | ./word2vec -train "$1".in -output "$1".skip.forms.50.vectors -cbow 0 -size 50 -window 10 -negative 5 -hs 0 -sample 1e-1 -threads 1 -binary 0 -iter 15 -min-count 2 7 | rm "$1".in 8 | -------------------------------------------------------------------------------- /training/ud-1.2/.gitignore: -------------------------------------------------------------------------------- 1 | /ar/ 2 | /bg/ 3 | /cs/ 4 | /cu/ 5 | /da/ 6 | /de/ 7 | /el/ 8 | /en/ 9 | /es/ 10 | /et/ 11 | /eu/ 12 | /fa/ 13 | /fi/ 14 | /fi_ftb/ 15 | /fr/ 16 | /ga/ 17 | /got/ 18 | /grc/ 19 | /grc_proiel/ 20 | /he/ 21 | /hi/ 22 | /hr/ 23 | /hu/ 24 | /id/ 25 | /it/ 26 | /ja_ktc/ 27 | /la/ 28 | /la_itt/ 29 | /la_proiel/ 30 | /nl/ 31 | /no/ 32 | /pl/ 33 | /pt/ 34 | /ro/ 35 | /sl/ 36 | /sv/ 37 | /ta/ 38 | -------------------------------------------------------------------------------- /web/lindat-service/demo.php: -------------------------------------------------------------------------------- 1 | 8 | -------------------------------------------------------------------------------- /src/rest_server/microrestd/pugixml/README: -------------------------------------------------------------------------------- 1 | Pugixml sources http://pugixml.org/ 2 | 3 | The pugixml is licensed under MIT. The pugixml license and the 4 | authors are in LICENSE and AUTHORS files. 5 | 6 | We have performed the following changes to the libmicrohttpd: 7 | - the ufal::microrestd namespace was added 8 | - name matching (nodes, attributes) was changed to ignore XML namespaces 9 | - XPATH and STL module were completely removed 10 | -------------------------------------------------------------------------------- /training/ud-2.5-raw-texts/raw_texts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | data() { 4 | case "$2" in 5 | none) echo;; 6 | *) zcat /net/data/W2C/W2C_WEB/2011-08/$2.txt.gz | head -c 500000;; 7 | esac > $1.txt 8 | } 9 | 10 | data de_hdt deu 11 | data fi_ftb fin 12 | data fr_spoken fra 13 | data fro_srcmf fra 14 | data ko_kaist kor 15 | data la_proiel lat 16 | data no_bokmaal nor 17 | data no_nynorsklia nno 18 | data sl_sst slv 19 | data te_mtg tel 20 | -------------------------------------------------------------------------------- /training/ud-1.2/get.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | wget --content-disposition 'https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-1548/ud-treebanks-v1.2.tgz?sequence=1&isAllowed=y' 4 | tar xf ud-treebanks-v1.2.tgz 5 | rm ud-treebanks-v1.2.tgz 6 | 7 | for tb in universal-dependencies-1.2/*/*-ud-dev.conllu; do 8 | dir=`dirname $tb` 9 | code=`basename $tb` 10 | code=${code%%-*} 11 | 12 | mv $dir $code 13 | done 14 | rmdir universal-dependencies-1.2 15 | -------------------------------------------------------------------------------- /releases/cpan/Changes: -------------------------------------------------------------------------------- 1 | Revision history for Ufal-UDPipe 2 | 3 | 1.4.0.1 [20 Nov 25] 4 | Update UDPipe to version 1.4.0. 5 | 6 | 1.3.1.1 [15 Nov 23] 7 | Update UDPipe to version 1.3.1. 8 | 9 | 1.3.0.1 [16 Feb 23] 10 | Update UDPipe to 1.3.0. 11 | 12 | 1.2.0.1 [01 Aug 17] 13 | Update UDPipe to 1.2.0. 14 | 15 | 1.1.0.1 [29 May 17] 16 | Update UDPipe to 1.1.0. 17 | 18 | 1.0.0.1 [27 May 16] 19 | First version of bindings for UDPipe 1.0.0. 20 | -------------------------------------------------------------------------------- /src/rest_server/microrestd/libmicrohttpd/README: -------------------------------------------------------------------------------- 1 | Subset of libmicrohttpd sources http://www.gnu.org/software/libmicrohttpd/. 2 | 3 | The libmicrohttpd is licensed under LGPL. The libmicrohttpd license and the 4 | authors are in COPYING and AUTHORS files. 5 | 6 | We have performed the following changes to the libmicrohttpd: 7 | - code was converted to C++ 8 | - the ufal::microrestd::libmicrohttpd namespace was added 9 | - we use compile-time configuration (see MHD_config.h) instead 10 | of configure script 11 | -------------------------------------------------------------------------------- /doc/manual_bindings_perl_install.t2t: -------------------------------------------------------------------------------- 1 | Perl UDPipe Bindings 2 | ==================== 3 | 4 | The Perl bindings are available as ``Ufal-UDPipe`` package on CPAN. 5 | 6 | To compile Perl bindings manually, run ``make`` in the ``bindings/perl`` 7 | directory, optionally with the options described in UDPipe Installation. 8 | Perl 5.10 and later is supported. 9 | 10 | Path to the include headers of the required Perl version must be specified 11 | in the ``PERL_INCLUDE`` variable using 12 | ``` make PERL_INCLUDE=path_to_Perl_includes 13 | -------------------------------------------------------------------------------- /doc/manual_bindings_python_api.t2t: -------------------------------------------------------------------------------- 1 | Python UDPipe Bindings 2 | ====================== 3 | 4 | UDPipe library bindings is available in the 5 | [``ufal.udpipe`` http://pypi.python.org/pypi/ufal.udpipe] module. 6 | 7 | The bindings is a straightforward conversion of the ``C++`` bindings API, 8 | just native ``bytes`` type is used instead of the ``C++`` ``Bytes`` type. 9 | 10 | You might also be interested in a contributed package 11 | [spacy-udpipe https://github.com/TakeLab/spacy-udpipe] which wraps UDPipe 12 | with spaCy API. 13 | -------------------------------------------------------------------------------- /training/models-ud-2.6-tokenizer-chosen/from_ud_2.5.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for d in ../ud-2.6/*/; do 4 | d=$(basename ${d%/}) 5 | lines=$(diff <(awk -F "[\t]" '/# new/{print} /^[0-9]*\t/{print $1,$2,(($10 ~ /SpaceAfter=No/) ? "SA=No" : "")}' ../ud-2.6/$d/*.conllu) <(awk -F "[\t]" '/# new/{print} /^[0-9]*\t/{print $1,$2,(($10 ~ /SpaceAfter=No/) ? "SA=No" : "")}' ../ud-2.5/$d/*.conllu) | wc -l) 6 | [ "$lines" -eq 0 ] && { echo -n Copying "" >&2; grep "^$d " ../models-ud-2.5-tokenizer-chosen/params_tokenizer; } 7 | echo $d $lines >&2 8 | done 9 | -------------------------------------------------------------------------------- /src/rest_server/microrestd/pugixml.h: -------------------------------------------------------------------------------- 1 | // This file is part of MicroRestD . 2 | // 3 | // Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of 4 | // Mathematics and Physics, Charles University in Prague, Czech Republic. 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla Public 7 | // License, v. 2.0. If a copy of the MPL was not distributed with this 8 | // file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #pragma once 11 | 12 | #include "pugixml/pugixml.h" 13 | -------------------------------------------------------------------------------- /training/models-ud-1.2/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | l="$1"; shift 4 | 5 | mkdir -p $l 6 | ./udpipe --train --tokenizer=`grep "^$l " params_tokenizer | sed "s/^$l //"` --tagger=`grep "^$l " params_tagger | sed "s/^$l //"` --parser=`grep "^$l " params_parser | sed "s/^$l //"` --heldout=../ud-1.2/$l/$l-ud-dev.conllu $l/$l.model ../ud-1.2/$l/$l-ud-train*.conllu 7 | ./udpipe --accuracy --tokenize --tag --parse "$l/$l.model" <../ud-1.2/$l/$l-ud-dev.conllu >"$l/$l.dev" 8 | ./udpipe --accuracy --tokenize --tag --parse "$l/$l.model" <../ud-1.2/$l/$l-ud-test.conllu >"$l/$l.test" 9 | -------------------------------------------------------------------------------- /doc/manual_models.t2t: -------------------------------------------------------------------------------- 1 | UDPipe Models 2 | ============= 3 | 4 | Like any supervised machine-learning tool, UDPipe needs a trained linguistic model. 5 | This section describes the available models. 6 | 7 | 8 | %!include: manual_model_ud-2.5.t2t 9 | 10 | 11 | %!include: manual_model_ud-2.4.t2t 12 | 13 | 14 | %!include: manual_model_ud-2.3.t2t 15 | 16 | 17 | %!include: manual_model_ud-2.2-conll18.t2t 18 | 19 | 20 | %!include: manual_model_ud-2.0.t2t 21 | 22 | 23 | %!include: manual_model_ud-2.0-conll17.t2t 24 | 25 | 26 | %!include: manual_model_ud-1.2.t2t 27 | -------------------------------------------------------------------------------- /doc/manual_bindings_java_install.t2t: -------------------------------------------------------------------------------- 1 | Java UDPipe Bindings 2 | ==================== 3 | 4 | Binary Java bindings are available in UDPipe binary packages. 5 | 6 | To compile Java bindings manually, run ``make`` in the ``bindings/java`` 7 | directory, optionally with the options described in UDPipe Installation. 8 | Java 6 and newer is supported. 9 | 10 | The Java installation specified in the environment variable ``JAVA_HOME`` is 11 | used. If the environment variable does not exist, the ``JAVA_HOME`` can be 12 | specified using 13 | ``` make JAVA_HOME=path_to_Java_installation 14 | -------------------------------------------------------------------------------- /doc/manual_bindings_perl_api.t2t: -------------------------------------------------------------------------------- 1 | Perl UDPipe Bindings 2 | ==================== 3 | 4 | UDPipe library bindings is available in the 5 | [``Ufal::UDPipe`` http://search.cpan.org/~straka/Ufal-UDPipe/] package. 6 | The classes can be imported into the current namespace using the ``:all`` 7 | export tag. 8 | 9 | The bindings is a straightforward conversion of the ``C++`` bindings API. 10 | Vectors do not have native Perl interface, see ``Ufal::UDPipe::Words`` for 11 | reference. Static methods and enumerations are available only through the 12 | module, not through object instance. 13 | -------------------------------------------------------------------------------- /doc/t2t_docsys/t2t_docsys.conf: -------------------------------------------------------------------------------- 1 | %!options: --css-sugar 2 | 3 | % Allow links in verbatim. Keep in HTML, remove othervise. 4 | %!postproc(html): '\[([^]#]*) (#[^]]*)]' '\1' 5 | %!postproc(tex): '\[([^]#]*) (#[^]]*)]' '¿\\href{\2}{\1}¡' 6 | %!postproc(tex): '_(?=[^}\\]*}¡)' '\\char95' 7 | %!postproc: '\[([^]#]*) (\\?#[^] ]*)]' '\1' 8 | 9 | % Remove local links in TXT target. 10 | %!preproc(txt): '\[([^]#]*) (#[^] ]*)]' '\1' 11 | 12 | % Remove empty lines 13 | %!postproc: '^\s*$' '' 14 | 15 | % Handle ' -- ' 16 | %!postproc(html): ' -- ' ' – ' 17 | %!preproc(txt): ' -- ' ' - ' 18 | -------------------------------------------------------------------------------- /src/unilib/Makefile.include: -------------------------------------------------------------------------------- 1 | # This file is part of UniLib . 2 | # 3 | # Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of 4 | # Mathematics and Physics, Charles University in Prague, Czech Republic. 5 | # 6 | # This Source Code Form is subject to the terms of the Mozilla Public 7 | # License, v. 2.0. If a copy of the MPL was not distributed with this 8 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | UNILIB_VERSION := 3.3.1 11 | UNILIB_UNICODE_VERSION := 15.0.0 12 | 13 | UNILIB_OBJECTS := unicode uninorms unistrip utf8 utf16 version 14 | -------------------------------------------------------------------------------- /bindings/common/udpipe_stl.i: -------------------------------------------------------------------------------- 1 | // This file is part of UDPipe . 2 | // 3 | // Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of 4 | // Mathematics and Physics, Charles University in Prague, Czech Republic. 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla Public 7 | // License, v. 2.0. If a copy of the MPL was not distributed with this 8 | // file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef UDPIPE_STL_SWG_ 11 | #define UDPIPE_STL_SWG_ 12 | 13 | %include "std_string.i" 14 | %include "std_vector.i" 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /doc/readme.t2t: -------------------------------------------------------------------------------- 1 | UDPipe 1 2 | ======== 3 | 4 | %!include: manual_about.t2t 5 | 6 | UDPipe website http://ufal.mff.cuni.cz/udpipe contains download links 7 | of both the released packages and trained models, hosts documentation and 8 | offers online web service. 9 | 10 | UDPipe development repository http://github.com/ufal/udpipe is hosted 11 | on GitHub. 12 | 13 | //Third-party contribution:// Instructions how to build UDPipe REST server as 14 | Docker image is here: http://github.com/samisalkosuo/udpipe-rest-server-docker. 15 | Instructions how to train UDPipe language models using a Docker image is also 16 | there. 17 | -------------------------------------------------------------------------------- /src/morphodita/morpho/Makefile: -------------------------------------------------------------------------------- 1 | # This file is part of MorphoDiTa . 2 | # 3 | # Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of 4 | # Mathematics and Physics, Charles University in Prague, Czech Republic. 5 | # 6 | # This Source Code Form is subject to the terms of the Mozilla Public 7 | # License, v. 2.0. If a copy of the MPL was not distributed with this 8 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | morpho/english_morpho_guesser.cpp: %.cpp: %.rl 11 | ragel $< -T0 -o $@ && sed '1d; /^#line [0-9]/d; /^static const int [^ ]*_en_main = [0-9]*;$$/d' -i $@ 12 | -------------------------------------------------------------------------------- /releases/java/include/darwin/jni_md.h: -------------------------------------------------------------------------------- 1 | /* 2 | * @(#)jni_md.h 1.19 05/11/17 3 | * 4 | * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 5 | * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. 6 | */ 7 | 8 | #ifndef _JAVASOFT_JNI_MD_H_ 9 | #define _JAVASOFT_JNI_MD_H_ 10 | 11 | #define JNIEXPORT __attribute__((visibility("default"))) 12 | #define JNIIMPORT 13 | #define JNICALL 14 | 15 | #if defined(__LP64__) && __LP64__ /* for -Wundef */ 16 | typedef int jint; 17 | #else 18 | typedef long jint; 19 | #endif 20 | typedef long long jlong; 21 | typedef signed char jbyte; 22 | 23 | #endif /* !_JAVASOFT_JNI_MD_H_ */ 24 | -------------------------------------------------------------------------------- /bindings/csharp/examples/Makefile: -------------------------------------------------------------------------------- 1 | # This file is part of UDPipe . 2 | # 3 | # Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of 4 | # Mathematics and Physics, Charles University in Prague, Czech Republic. 5 | # 6 | # This Source Code Form is subject to the terms of the Mozilla Public 7 | # License, v. 2.0. If a copy of the MPL was not distributed with this 8 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | TARGETS = RunUDPipe.exe 11 | 12 | all: $(TARGETS) 13 | 14 | %.exe: %.cs 15 | mcs $< $(wildcard ../Ufal/UDPipe/*) 16 | 17 | .PHONY: clean 18 | clean: 19 | rm -rf $(TARGETS) 20 | -------------------------------------------------------------------------------- /web/lindat-service/Makefile: -------------------------------------------------------------------------------- 1 | TARGETS=.htaccess about.html api-reference.php bootstrap-select.min.css bootstrap-select.min.js 2 | TARGETS+=demo.php filesaver.min.js fill-using-params.js footer.php header.php js-treex-view.min.js icon.png info.php udpipe.css run.php 3 | all: $(TARGETS) 4 | 5 | refresh: 6 | $(MAKE) -C ../../doc/ web 7 | 8 | about.html: refresh 9 | sed -n '/^
]*>/
/' >$@ 10 | 11 | install: $(TARGETS) 12 | rsync -avc $(TARGETS) udpipe:udpipe/www 13 | 14 | install-flags: 15 | rsync -av flags/*.png udpipe:udpipe/www/flags/ 16 | 17 | .PHONY: clean 18 | clean: 19 | rm -f about.html 20 | -------------------------------------------------------------------------------- /releases/java/include/win32/jni_md.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 1996, 1998, Oracle and/or its affiliates. All rights reserved. 3 | * ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. 4 | * 5 | * 6 | * 7 | * 8 | * 9 | * 10 | * 11 | * 12 | * 13 | * 14 | * 15 | * 16 | * 17 | * 18 | * 19 | * 20 | * 21 | * 22 | * 23 | * 24 | */ 25 | 26 | #ifndef _JAVASOFT_JNI_MD_H_ 27 | #define _JAVASOFT_JNI_MD_H_ 28 | 29 | #define JNIEXPORT __declspec(dllexport) 30 | #define JNIIMPORT __declspec(dllimport) 31 | #define JNICALL __stdcall 32 | 33 | typedef long jint; 34 | typedef __int64 jlong; 35 | typedef signed char jbyte; 36 | 37 | #endif /* !_JAVASOFT_JNI_MD_H_ */ 38 | -------------------------------------------------------------------------------- /training/ud-1.2-embeddings/gen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | [ $# -lt 1 ] && { echo Usage: $0 language_code >&2; exit 1; } 4 | 5 | cat ../../ud-1.2/"$1"/*train*.conllu | grep -v -e "^#" -e "^[0-9]*-" | cut -f2 | perl -e '$w=""; while(<>) {chomp; if (length $_) {$w .= (length $w ? " " : "") . $_;} else {print "$w\n"; $w="";}} print "$w\n" if length $w' > "$1".in 6 | ./word2vec -train "$1".in -output "$1".skip.forms.50.vectors -cbow 0 -size 50 -window 10 -negative 5 -hs 0 -sample 1e-1 -threads 12 -binary 0 -iter 15 -min-count 2 7 | #./word2vec -train "$1".in -output "$1".cbow.forms.50.vectors -cbow 1 -size 50 -window 8 -negative 0 -hs 1 -sample 1e-1 -threads 12 -binary 0 -iter 15 -min-count 2 8 | rm "$1".in 9 | -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | // This file is part of UDPipe . 2 | // 3 | // Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of 4 | // Mathematics and Physics, Charles University in Prague, Czech Republic. 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla Public 7 | // License, v. 2.0. If a copy of the MPL was not distributed with this 8 | // file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #pragma once 11 | 12 | #include "utils/common.h" 13 | #include "utils/string_piece.h" 14 | 15 | namespace ufal { 16 | namespace udpipe { 17 | 18 | using namespace utils; 19 | 20 | } // namespace udpipe 21 | } // namespace ufal 22 | -------------------------------------------------------------------------------- /src/utils/README: -------------------------------------------------------------------------------- 1 | UFAL C++ Utils 2 | ============== 3 | 4 | UFAL C++ Utils is a small C++ cross-platform library used in several UFAL 5 | projects. It is released under MPL 2.0 license (http://www.mozilla.org/MPL/2.0/) 6 | and is versioned using Semantic Versioning (http://semver.org/). 7 | 8 | The supported platforms are Linux, OS X and Windows, supported compilers are 9 | gcc, clang, tdm-gcc on Windows and Visual C++ 2015 or later on Windows. 10 | 11 | Copyright 2015 by Institute of Formal and Applied Linguistics, Faculty of 12 | Mathematics and Physics, Charles University in Prague, Czech Republic. 13 | 14 | UFAL C++ Utils repository http://github.com/ufal/cpp_utils is hosted on GitHub. 15 | -------------------------------------------------------------------------------- /web/lindat-service/flags/gen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | git clone --depth=1 --branch=pages-source https://github.com/UniversalDependencies/docs 6 | git clone --depth=1 https://github.com/UniversalDependencies/docs-automation 7 | 8 | cat docs-automation/codes_and_flags.yaml | grep -e "^[^ ]" -e "flag:" | sed ' 9 | s/:$//; s/ /_/g; s/.*/\L&/; N; s/\n\s*flag://; s/["'"'"']//g; 10 | ' | while read name code; do 11 | cp docs/flags/svg/$code.svg $name.svg 12 | done 13 | 14 | rm -rf docs docs-automation 15 | 16 | for svg in *.svg; do 17 | echo $svg 18 | inkscape $svg -o ${svg%.svg}.png -h 32 19 | done 20 | rm *.svg 21 | 22 | cp old_east_slavic.png old_russian.png 23 | 24 | echo All done 25 | -------------------------------------------------------------------------------- /bindings/java/examples/Makefile: -------------------------------------------------------------------------------- 1 | # This file is part of UDPipe . 2 | # 3 | # Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of 4 | # Mathematics and Physics, Charles University in Prague, Czech Republic. 5 | # 6 | # This Source Code Form is subject to the terms of the Mozilla Public 7 | # License, v. 2.0. If a copy of the MPL was not distributed with this 8 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | TARGETS = RunUDPipe.class 11 | 12 | all: $(TARGETS) 13 | 14 | %.class: %.java 15 | javac -cp .. $< 16 | 17 | %: %.class 18 | LD_LIBRARY_PATH=.. java -cp .:.. $* $(ARGS) 19 | 20 | .PHONY: clean 21 | clean: 22 | rm -rf $(TARGETS) 23 | -------------------------------------------------------------------------------- /doc/manual_bindings_python_install.t2t: -------------------------------------------------------------------------------- 1 | Python UDPipe Bindings 2 | ====================== 3 | 4 | The Python bindings are available as ``ufal.udpipe`` package on PyPI. 5 | 6 | To compile Python bindings manually, run ``make`` in the ``bindings/python`` 7 | directory, optionally with options described in UDPipe Installation. Both 8 | Python 2.6+ and Python 3+ are supported. 9 | 10 | Path to the include headers of the required Python version must be specified 11 | in the ``PYTHON_INCLUDE`` variable using 12 | ``` make PYTHON_INCLUDE=path_to_Python_includes 13 | 14 | You might also be interested in a contributed package 15 | [spacy-udpipe https://github.com/TakeLab/spacy-udpipe] which wraps UDPipe 16 | with spaCy API. 17 | -------------------------------------------------------------------------------- /src/rest_server/microrestd/pugixml/AUTHORS: -------------------------------------------------------------------------------- 1 | Arseny Kapoulkine 2 | 3 | Acknowledgments 4 | pugixml could not be developed without the help from many people; some of them 5 | are listed in this section. If you've played a part in pugixml development and 6 | you can not find yourself on this list, I'm truly sorry; please send me an 7 | e-mail so I can fix this. 8 | 9 | Thanks to Kristen Wegner for pugxml parser, which was used as a basis for 10 | pugixml. 11 | 12 | Thanks to Neville Franks for contributions to pugxml parser. 13 | 14 | Thanks to Artyom Palvelev for suggesting a lazy gap contraction approach. 15 | 16 | Thanks to Vyacheslav Egorov for documentation proofreading. 17 | -------------------------------------------------------------------------------- /src/morphodita/tokenizer/Makefile: -------------------------------------------------------------------------------- 1 | # This file is part of MorphoDiTa . 2 | # 3 | # Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of 4 | # Mathematics and Physics, Charles University in Prague, Czech Republic. 5 | # 6 | # This Source Code Form is subject to the terms of the Mozilla Public 7 | # License, v. 2.0. If a copy of the MPL was not distributed with this 8 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | TOKENIZERS=$(patsubst %,tokenizer/%_tokenizer.cpp,czech english generic ragel) 11 | 12 | $(TOKENIZERS): %.cpp: %.rl tokenizer/ragel_tokenizer.rl 13 | ragel $< -T1 -o $@ && sed '1d; /^#line [0-9]/d; /^static const int [^ ]*_en_main = [0-9]*;$$/d' -i $@ 14 | -------------------------------------------------------------------------------- /src/rest_server/microrestd/rest_server/version.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of MicroRestD . 2 | // 3 | // Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of 4 | // Mathematics and Physics, Charles University in Prague, Czech Republic. 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla Public 7 | // License, v. 2.0. If a copy of the MPL was not distributed with this 8 | // file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #include "version.h" 11 | 12 | namespace ufal { 13 | namespace microrestd { 14 | 15 | // Returns current version. 16 | version version::current() { 17 | return {1, 2, 4, ""}; 18 | } 19 | 20 | } // namespace microrestd 21 | } // namespace ufal 22 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # build: `docker build -t udpipe/server .` 2 | # run: `docker run --rm -it -p 8080:8080 udpipe/server` 3 | # 4 | # see also https://github.com/samisalkosuo/udpipe-rest-server-docker 5 | # 6 | FROM ubuntu:18.04 7 | 8 | ENV MODEL_FILE_NAME test.model 9 | ENV MODEL_NAME test 10 | ENV MODEL_DESC test description 11 | 12 | RUN apt-get -q update && \ 13 | apt-get -q install -y curl gcc g++ build-essential && \ 14 | g++ --version 15 | 16 | ADD src /udpipe/src 17 | ADD releases/test_data/${MODEL_FILE_NAME} /models/ 18 | WORKDIR /udpipe/src/rest_server 19 | 20 | RUN cd /udpipe/src && BITS=64 MODE=release make -j4 server 21 | 22 | EXPOSE 8080 23 | CMD ./udpipe_server "8080" "${MODEL_NAME}" "${MODEL_NAME}" "/models/${MODEL_FILE_NAME}" "${MODEL_DESC}" 24 | -------------------------------------------------------------------------------- /src/parsito/CHANGES: -------------------------------------------------------------------------------- 1 | Version 1.1.1-devel 2 | ------------------- 3 | - Add Adam optimizer. 4 | - Add ReLU activation. 5 | - The created models not supported by older versions. 6 | - Fix dropout implementation. 7 | - Add Xavier initialization. 8 | - Add single_root option. 9 | - On Windows, the file paths are now UTF-8 encoded, instead of ANSI. 10 | This change affects the API, binary arguments, and program outputs. 11 | - The Windows binaries are now compiled with VS 2019, older systems 12 | than Windows 7 are no longer supported. 13 | 14 | 15 | Version 1.1.0 [04 Jan 2016] 16 | --------------------------- 17 | - Implement optional beam search during decoding. 18 | 19 | 20 | Version 1.0.0 [04 Dec 2015] 21 | --------------------------- 22 | - Initial public release. 23 | -------------------------------------------------------------------------------- /web/ufal/Makefile: -------------------------------------------------------------------------------- 1 | TARGETS=udpipe.html online.html install.html models.html user.html api_reference.html 2 | all: $(TARGETS) 3 | 4 | refresh: 5 | $(MAKE) -C ../../doc/ web 6 | 7 | udpipe.html: refresh 8 | sed -e '1rwarning.hdr' -e '1,//d; /
$@ 9 | 10 | online.html install.html models.html user.html api_reference.html:%: refresh 11 | sed -e '1rwarning.hdr' -e '1,//d; /^