├── perl
└── lib
│ ├── ISO
│ ├── LMF.pm
│ └── LMF
│ │ ├── EntityBase.pm
│ │ ├── LexicalResource.pm
│ │ ├── Lexicon.pm
│ │ ├── WordForm.pm
│ │ └── LexicalEntry.pm
│ ├── Lingua
│ └── AOT
│ │ ├── MorphDict
│ │ ├── FormSpec.pm
│ │ ├── AccentParadigm.pm
│ │ ├── Form.pm
│ │ ├── MorphVariant.pm
│ │ ├── Paradigm.pm
│ │ └── Gramtab.pm
│ │ └── test.pl
│ └── OpenCorpora
│ └── Dict
│ ├── SimpleReader.pm
│ └── Entry.pm
├── lib
├── .htaccess
├── timer.php
├── lib_mail.php
└── header_ajax.php
├── anaphora
├── NE_extract
│ ├── bad_noun.txt
│ ├── pronouns.txt
│ ├── run_toma.sh
│ ├── kwtypes.proto
│ ├── run_parse.sh
│ ├── maindic.gzt
│ ├── config.proto
│ └── facttypes.proto
├── ana_test.pairs
├── features
│ └── runF.sh
├── export_pairs.py
├── learning
│ ├── baseline.py
│ ├── learn.py
│ └── scorer.py
├── ana_test.groups
└── pairs.py
├── migrations
├── .htaccess
├── 20140101000000_initial_db.php
├── 20150703113519_add_pool_proto_name.php
├── 20160203212016_multiword_types.php
├── 20150610120658_merge_fails_comments.php
├── 20160124093035_add_prop_order.php
├── 20150628112302_turn_game_on_for_all.php
├── 20160130085413_more_on_multiwords.php
├── 20160110211650_remove_old_ne_prop.php
├── 20151209163951_add_ne_book_moderator.php
├── 20151210150743_add_moderator_column_to_ne_par.php
├── 20140930101915_add_timer.php
├── 20190504181939_add_sentence_quality.php
├── 20160716091852_ne_annot_number_per_tagset.php
├── 20160122154639_obj_property_multiple_values.php
├── 20200625120954_long_good_sentences.php
├── 20151028150709_move_object_type_to2_level.php
├── 20150720124123_last_dict_revision.php
├── 20160109110445_ne_objects_properties.php
├── 20140923233544_add_permission_ne_moderator.php
├── 20151105153227_many_entities2_many_mentions.php
├── 20171203130148_drop_sentence_check.php
├── 20140830004643_named_entities_event_log.php
├── 20140828143321_named_entities_comments.php
├── 20151107191437_color_for_types.php
├── 20190416073732_add_user_generated_dict_revisions.php
├── 20150420150929_new_achievements_table.php
├── 20150709223738_simplify_user_rating.php
├── 20150603164038_remove_old_achievements.php
├── 20150623182816_remove_user_meta_options.php
├── 20151108101851_many_books2_many_tagsets.php
├── 20160129161233_mw_basic_structure.php
├── 20150216164636_undelete_lemmata.php
├── 20150624200024_change_candidate_samples.php
├── 20151023153845_add_ne_tagsets.php
└── 20150314214304_add_tables_for_selective_backup.php
├── postagging
└── brill
│ └── unsupervised
│ ├── python
│ ├── learn_rules
│ │ └── __init__.py
│ ├── spearman_test
│ │ ├── 1.txt
│ │ ├── 1r.txt
│ │ ├── 2.txt
│ │ └── 3.txt
│ ├── check_disjoint.sh
│ ├── learning_test
│ │ ├── rand1.tab
│ │ └── rand0.tab
│ ├── apply.py
│ ├── pictures.py
│ └── sample_corpus.py
│ ├── cpp
│ ├── include
│ │ ├── corpora_io.h
│ │ ├── utils.h
│ │ ├── dict.h
│ │ └── sentence.h
│ ├── train
│ │ ├── Makefile
│ │ ├── aux.h
│ │ └── aux.cpp
│ ├── lemmatizer
│ │ └── Makefile
│ └── lib
│ │ └── brill.cpp
│ └── perl
│ └── diff_tab_markup.pl
├── scripts
├── oc2conllu
│ └── requirements.txt
├── train_tokenizer.sh
├── aot_import
│ ├── lists
│ │ ├── add_Arch_ADJF.txt
│ │ ├── add_Dist_PRTS.txt
│ │ ├── add_Infr_ablt_plur.txt
│ │ ├── add_Erro_PRTS.txt
│ │ ├── add_Dist_aux.txt
│ │ ├── add_Litr.txt
│ │ ├── abbr_del.txt
│ │ ├── remove_ANim.txt
│ │ ├── Del_anim-inan&Add_ANim.txt
│ │ ├── add_Coun_gent_plur.txt
│ │ ├── list_adjf_fixd_NOUN.txt
│ │ ├── add_Erro_ADJS.txt
│ │ ├── add_Infr_ADJS.txt
│ │ ├── add_Infr_COMP.txt
│ │ ├── add_Infr_ablt_sing.txt
│ │ ├── pred_to_intj.txt
│ │ ├── list_adjf_fixd_ADVB.txt
│ │ ├── adjs_forms_del.txt
│ │ ├── list_numr_dupl_gent.txt
│ │ └── Unite_Paras&Add_ANim.txt
│ ├── read_mrd.pl
│ ├── rgramtab.tab
│ ├── list_paradigm.pl
│ ├── morphs.mrd.patch
│ └── bad_lemma_grammems.txt
├── export_and_stats.sh
├── exports.sh
├── subst.txt
├── stats
│ └── update_stats.sh
├── ma_pools
│ ├── post_merge.php
│ ├── autopublish.php
│ └── unpublish_pools.py
├── tokenizer
│ ├── train.php
│ ├── tokenize.php
│ ├── cronrunner.pl
│ ├── tokenizer_exceptions.txt
│ └── check_sentence_split.pl
├── invalidate_auth_tokens.pl
├── run_validators.sh
├── mwords
│ ├── rules.txt
│ └── search.php
├── json2ini.py
├── cronrunner.pl
├── validators
│ ├── par_validator.py
│ └── url_validator.py
├── export
│ └── export_dict.sh
├── check_dog_achievement.php
├── backup.sh
├── consistency
│ ├── form2tf.pl
│ ├── form2lemma.pl
│ └── dict_update_forms.pl
└── delete_unused_files.pl
├── export
├── annot
│ ├── disamb_nonmod_tests
│ │ ├── pools.txt
│ │ ├── pool_158.tab
│ │ └── annot.opcorpora.canon_out.xml
│ └── no_homonymy_constants.py
├── pools
│ └── export_pools.sh
├── pools.pl
└── database
│ ├── backup.sh
│ └── copy_nulled_tables.sql
├── favicon.ico
├── doc
├── presentations
│ ├── 2012_September29_OnePage.sh
│ ├── 2012_May31_Dialog_RoundTable.sh
│ ├── img
│ │ ├── annotation-lifecycle.sh
│ │ ├── 2012_miem_1.png
│ │ ├── 2012_miem_2.png
│ │ ├── 2012_miem_3.png
│ │ ├── 2012_miem_4.png
│ │ ├── 2012_miem_5.png
│ │ ├── 2012_miem_6.png
│ │ ├── markupUI2.png
│ │ ├── markupUI2-part.png
│ │ ├── 2011_nlpseminar_1.png
│ │ ├── 2011_nlpseminar_2.png
│ │ ├── 2011_nlpseminar_3.png
│ │ └── annotation-lifecycle.png
│ └── 2012_September29_OnePage.tex
└── articles
│ └── img
│ ├── 2011_Dialog_img1.png
│ └── 2012_MIEM_img1.png
├── assets
├── img
│ ├── sf.ttf
│ ├── grey.png
│ ├── logo.png
│ ├── star.png
│ ├── appeal1.png
│ ├── appeal2.png
│ ├── fb-pic.png
│ ├── lang_en.png
│ ├── lang_ru.png
│ ├── robot.png
│ ├── icon_plus.png
│ ├── robot_big.png
│ ├── tiny_grid.png
│ ├── ajax-loader.gif
│ ├── icon_glass.png
│ ├── icon_smile.png
│ ├── icon_target.png
│ ├── wiki
│ │ ├── Markup.png
│ │ ├── Buttonanswer.png
│ │ ├── Buttonnomore.png
│ │ ├── Buttonother.png
│ │ ├── Buttonskip.png
│ │ ├── Tasklist_php.png
│ │ ├── Buttoncomment.png
│ │ ├── Buttonwantmore.png
│ │ ├── Leftcontextex.png
│ │ └── Rightcontextex.png
│ ├── icon_speed_60.png
│ ├── icon_star_gray.png
│ ├── icon_star_red.png
│ ├── icon_user_blue.png
│ ├── tiny_grid_blue.png
│ ├── icon_speedometer.png
│ ├── icon_star_green.png
│ ├── icon_star_orange.png
│ ├── icon_star_yellow.png
│ ├── icon_trophy_black.png
│ ├── icon_user_orange.png
│ └── badges
│ │ ├── aist-400x400.png
│ │ ├── bobr-400x400.png
│ │ ├── dog-400x400.png
│ │ ├── fish-400x400.png
│ │ ├── ainl2015-400x400.png
│ │ ├── chameleon-400x400.png
│ │ ├── wantmore-400x400.png
│ │ ├── aist-100x100-grayscale.png
│ │ ├── bobr-100x100-grayscale.png
│ │ ├── dog-100x100-grayscale.png
│ │ ├── fish-100x100-grayscale.png
│ │ ├── ainl2015-100x100-grayscale.png
│ │ ├── chameleon-100x100-grayscale.png
│ │ └── wantmore-100x100-grayscale.png
└── js
│ └── merge_fails.js
├── no_js.php
├── README.md
├── templates
├── error.tpl
├── static
│ ├── doc
│ │ └── annotation.tpl
│ ├── faq.tpl
│ └── downloads.row.tpl
├── qa
│ ├── tasks_guest.tpl
│ ├── empty_books.tpl
│ ├── sent_split.tpl
│ ├── unknowns.tpl
│ ├── good_sentences.tpl
│ ├── dl_urls.tpl
│ ├── pool_tabs.tpl
│ ├── game_status.tpl
│ ├── book_tags.tpl
│ ├── pool_candidates.tpl
│ ├── useful_pools.tpl
│ ├── tokenizer.tpl
│ └── pool_types.tpl
├── common_no_js.tpl
├── dict
│ ├── links_main.tpl
│ ├── absent.tpl
│ └── links_single.tpl
├── search.tpl
├── books.tpl
├── top100.tpl
├── openid_license.tpl
├── addtext.tpl
├── footer.tpl
├── ner
│ └── _partials
│ │ └── objects-modal.tpl
├── tag_stats.tpl
├── comments.tpl
├── sentence_syntax_groups_moderator.tpl
└── sentence_syntax_moderator.tpl
├── tokenizer_monitor.php
├── ajax
├── readonly.php
├── wantmore.php
├── clck_log.php
├── merge_tokens.php
├── tokenizer_monitor.php
├── add_book_tag.php
├── set_option.php
├── download_url.php
├── run_test.php
├── run_generator.php
├── set_token_text.php
├── game_mark_shown.php
├── lemma_search.php
├── select_book.php
├── publish_update.php
├── tag_autocomplete.php
├── get_context.php
├── paradigm_info.php
├── dict_pending.php
├── save_check.php
├── merge_fails.php
├── own_book.php
├── lastpar.php
├── anaphora.php
├── dict_reload.php
├── annot.php
├── post_comment.php
├── bind_book.php
├── get_comments.php
└── guess_wiki_categ.php
├── SECURITY.md
├── search.php
├── comments.php
├── .gitignore
├── dict_diff.php
├── dict_history.php
├── diff.php
├── robots.txt
├── revert.php
├── sources.php
├── user.php
├── add.php
├── history.php
├── generator_cp.php
├── manual.php
├── syntax.php
├── composer.json
├── ner.php
├── phinx.php
└── options.php
/perl/lib/ISO/LMF.pm:
--------------------------------------------------------------------------------
1 |
2 |
3 | 1;
4 |
--------------------------------------------------------------------------------
/lib/.htaccess:
--------------------------------------------------------------------------------
1 | Order allow,deny
2 | Deny from all
3 |
--------------------------------------------------------------------------------
/anaphora/NE_extract/bad_noun.txt:
--------------------------------------------------------------------------------
1 | быль
2 | были
3 | уж
4 |
--------------------------------------------------------------------------------
/migrations/.htaccess:
--------------------------------------------------------------------------------
1 | Order deny,allow
2 | Deny from all
3 |
--------------------------------------------------------------------------------
/postagging/brill/unsupervised/python/learn_rules/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/scripts/oc2conllu/requirements.txt:
--------------------------------------------------------------------------------
1 | xmltodict
2 | russian_tagsets
3 |
--------------------------------------------------------------------------------
/scripts/train_tokenizer.sh:
--------------------------------------------------------------------------------
1 | php /corpus/scripts/tokenizer/train.php
2 |
--------------------------------------------------------------------------------
/export/annot/disamb_nonmod_tests/pools.txt:
--------------------------------------------------------------------------------
1 | 158 NOUN&plur&gent@NOUN&plur&accs 4
2 |
--------------------------------------------------------------------------------
/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/favicon.ico
--------------------------------------------------------------------------------
/doc/presentations/2012_September29_OnePage.sh:
--------------------------------------------------------------------------------
1 | pdflatex 2012_September29_OnePage.tex
2 |
--------------------------------------------------------------------------------
/assets/img/sf.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/sf.ttf
--------------------------------------------------------------------------------
/no_js.php:
--------------------------------------------------------------------------------
1 | display('common_no_js.tpl');
4 |
--------------------------------------------------------------------------------
/postagging/brill/unsupervised/python/spearman_test/1.txt:
--------------------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 |
--------------------------------------------------------------------------------
/postagging/brill/unsupervised/python/spearman_test/1r.txt:
--------------------------------------------------------------------------------
1 | 6
2 | 5
3 | 4
4 | 3
5 | 2
6 | 1
7 |
--------------------------------------------------------------------------------
/postagging/brill/unsupervised/python/spearman_test/2.txt:
--------------------------------------------------------------------------------
1 | 2
2 | 1
3 | 4
4 | 3
5 | 6
6 | 5
7 |
--------------------------------------------------------------------------------
/postagging/brill/unsupervised/python/spearman_test/3.txt:
--------------------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 |
--------------------------------------------------------------------------------
/assets/img/grey.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/grey.png
--------------------------------------------------------------------------------
/assets/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/logo.png
--------------------------------------------------------------------------------
/assets/img/star.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/star.png
--------------------------------------------------------------------------------
/doc/presentations/2012_May31_Dialog_RoundTable.sh:
--------------------------------------------------------------------------------
1 | pdflatex 2012_May31_Dialog_RoundTable.tex
2 |
--------------------------------------------------------------------------------
/assets/img/appeal1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/appeal1.png
--------------------------------------------------------------------------------
/assets/img/appeal2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/appeal2.png
--------------------------------------------------------------------------------
/assets/img/fb-pic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/fb-pic.png
--------------------------------------------------------------------------------
/assets/img/lang_en.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/lang_en.png
--------------------------------------------------------------------------------
/assets/img/lang_ru.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/lang_ru.png
--------------------------------------------------------------------------------
/assets/img/robot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/robot.png
--------------------------------------------------------------------------------
/scripts/aot_import/lists/add_Arch_ADJF.txt:
--------------------------------------------------------------------------------
1 | #Добавить этим полным прилагательным помету Устар.
2 | самоё
--------------------------------------------------------------------------------
/scripts/aot_import/lists/add_Dist_PRTS.txt:
--------------------------------------------------------------------------------
1 | #Добавить этим кратким причастиям помету Искаж
2 | стера
3 |
--------------------------------------------------------------------------------
/assets/img/icon_plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/icon_plus.png
--------------------------------------------------------------------------------
/assets/img/robot_big.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/robot_big.png
--------------------------------------------------------------------------------
/assets/img/tiny_grid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/tiny_grid.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | opencorpora
2 | ===========
3 |
4 | A web-based engine for creating and annotating textual corpora
5 |
--------------------------------------------------------------------------------
/assets/img/ajax-loader.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/ajax-loader.gif
--------------------------------------------------------------------------------
/assets/img/icon_glass.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/icon_glass.png
--------------------------------------------------------------------------------
/assets/img/icon_smile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/icon_smile.png
--------------------------------------------------------------------------------
/assets/img/icon_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/icon_target.png
--------------------------------------------------------------------------------
/assets/img/wiki/Markup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/wiki/Markup.png
--------------------------------------------------------------------------------
/assets/img/icon_speed_60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/icon_speed_60.png
--------------------------------------------------------------------------------
/assets/img/icon_star_gray.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/icon_star_gray.png
--------------------------------------------------------------------------------
/assets/img/icon_star_red.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/icon_star_red.png
--------------------------------------------------------------------------------
/assets/img/icon_user_blue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/icon_user_blue.png
--------------------------------------------------------------------------------
/assets/img/tiny_grid_blue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/tiny_grid_blue.png
--------------------------------------------------------------------------------
/assets/img/icon_speedometer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/icon_speedometer.png
--------------------------------------------------------------------------------
/assets/img/icon_star_green.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/icon_star_green.png
--------------------------------------------------------------------------------
/assets/img/icon_star_orange.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/icon_star_orange.png
--------------------------------------------------------------------------------
/assets/img/icon_star_yellow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/icon_star_yellow.png
--------------------------------------------------------------------------------
/assets/img/icon_trophy_black.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/icon_trophy_black.png
--------------------------------------------------------------------------------
/assets/img/icon_user_orange.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/icon_user_orange.png
--------------------------------------------------------------------------------
/assets/img/wiki/Buttonanswer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/wiki/Buttonanswer.png
--------------------------------------------------------------------------------
/assets/img/wiki/Buttonnomore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/wiki/Buttonnomore.png
--------------------------------------------------------------------------------
/assets/img/wiki/Buttonother.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/wiki/Buttonother.png
--------------------------------------------------------------------------------
/assets/img/wiki/Buttonskip.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/wiki/Buttonskip.png
--------------------------------------------------------------------------------
/assets/img/wiki/Tasklist_php.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/wiki/Tasklist_php.png
--------------------------------------------------------------------------------
/doc/presentations/img/annotation-lifecycle.sh:
--------------------------------------------------------------------------------
1 | dot img/annotation-lifecycle.dot -Tpng -o img/annotation-lifecycle.png
2 |
--------------------------------------------------------------------------------
/scripts/aot_import/read_mrd.pl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/scripts/aot_import/read_mrd.pl
--------------------------------------------------------------------------------
/scripts/aot_import/rgramtab.tab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/scripts/aot_import/rgramtab.tab
--------------------------------------------------------------------------------
/assets/img/badges/aist-400x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/badges/aist-400x400.png
--------------------------------------------------------------------------------
/assets/img/badges/bobr-400x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/badges/bobr-400x400.png
--------------------------------------------------------------------------------
/assets/img/badges/dog-400x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/badges/dog-400x400.png
--------------------------------------------------------------------------------
/assets/img/badges/fish-400x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/badges/fish-400x400.png
--------------------------------------------------------------------------------
/assets/img/wiki/Buttoncomment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/wiki/Buttoncomment.png
--------------------------------------------------------------------------------
/assets/img/wiki/Buttonwantmore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/wiki/Buttonwantmore.png
--------------------------------------------------------------------------------
/assets/img/wiki/Leftcontextex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/wiki/Leftcontextex.png
--------------------------------------------------------------------------------
/assets/img/wiki/Rightcontextex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/assets/img/wiki/Rightcontextex.png
--------------------------------------------------------------------------------
/scripts/export_and_stats.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | /corpus/scripts/exports.sh
3 | /corpus/scripts/stats/update_stats.sh
4 |
--------------------------------------------------------------------------------
/doc/articles/img/2011_Dialog_img1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/doc/articles/img/2011_Dialog_img1.png
--------------------------------------------------------------------------------
/doc/articles/img/2012_MIEM_img1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/doc/articles/img/2012_MIEM_img1.png
--------------------------------------------------------------------------------
/doc/presentations/img/2012_miem_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/doc/presentations/img/2012_miem_1.png
--------------------------------------------------------------------------------
/doc/presentations/img/2012_miem_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/doc/presentations/img/2012_miem_2.png
--------------------------------------------------------------------------------
/doc/presentations/img/2012_miem_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/doc/presentations/img/2012_miem_3.png
--------------------------------------------------------------------------------
/doc/presentations/img/2012_miem_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/doc/presentations/img/2012_miem_4.png
--------------------------------------------------------------------------------
/doc/presentations/img/2012_miem_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/doc/presentations/img/2012_miem_5.png
--------------------------------------------------------------------------------
/doc/presentations/img/2012_miem_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/doc/presentations/img/2012_miem_6.png
--------------------------------------------------------------------------------
/doc/presentations/img/markupUI2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/doc/presentations/img/markupUI2.png
--------------------------------------------------------------------------------
/scripts/aot_import/list_paradigm.pl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/scripts/aot_import/list_paradigm.pl
--------------------------------------------------------------------------------
/scripts/aot_import/morphs.mrd.patch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCorpora/opencorpora/HEAD/scripts/aot_import/morphs.mrd.patch
--------------------------------------------------------------------------------
/templates/error.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name=content}
4 |
6 |
Ошибка
7 |
Для корректной работы сайта необходим JavaScript.
8 | {/block}
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Как сообщить об уязвимости
2 |
3 | Пожалуйста, пишите на granovsky@opencorpora.org и bocharov@opencorpora.org.
4 |
5 | # How to report a vulnerability
6 |
7 | Please contact granovsky@opencorpora.org and bocharov@opencorpora.org.
8 |
--------------------------------------------------------------------------------
/scripts/subst.txt:
--------------------------------------------------------------------------------
1 | общий
2 | такой
3 | особенный
4 | русский
5 | английский
6 | больной
7 | весь
8 | один
9 | дальнейший
10 | знакомый
11 | каждый
12 | бессознательный
13 | другой
14 | обычной
15 | хороший
16 | несовершеннолетний
17 | угловой
18 |
--------------------------------------------------------------------------------
/search.php:
--------------------------------------------------------------------------------
1 | assign('search', get_search_results($search, GET('exact_form', true)));
7 | $smarty->display('search.tpl');
8 | log_timing();
9 |
--------------------------------------------------------------------------------
/comments.php:
--------------------------------------------------------------------------------
1 | assign('comments', get_latest_comments($skip));
8 | $smarty->assign('skip', $skip);
9 | $smarty->display('comments.tpl');
10 | log_timing();
11 | ?>
12 |
--------------------------------------------------------------------------------
/scripts/stats/update_stats.sh:
--------------------------------------------------------------------------------
1 | /corpus/scripts/cronrunner.pl "/corpus/scripts/stats/update_stats.pl /corpus/config.ini"
2 | /corpus/scripts/cronrunner.pl "/corpus/scripts/stats/update_tag_stats.pl /corpus/config.ini"
3 | /corpus/scripts/cronrunner.pl "php /corpus/scripts/stats/sentence_quality.php"
4 |
--------------------------------------------------------------------------------
/templates/dict/links_main.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name='content'}
4 |
Виды связей
5 |
6 | {foreach item=name key=typeid from=$data}
7 | - {$name}
8 | {/foreach}
9 |
10 | {/block}
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .htaccess
2 | /assets/vendor
3 | /composer.phar
4 | /config.ini
5 | /config.json
6 | /doc/articles/*.pdf
7 | /doc/presentations/*.pdf
8 | /files
9 | /vendor
10 | /yadisk-auth
11 | /yandex_*.html
12 | /yandex_*.txt
13 | *.swp
14 | *.project
15 | *.prefs
16 | *.pydevproject
17 | *.pyc
18 |
--------------------------------------------------------------------------------
/anaphora/features/runF.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 |
3 | perl lineal.pl -m $1 -g $2 -p $3 > lineal.tmp
4 | python morph.py $2 $1 < $3 | cut -f 2- > morph.tmp
5 | perl synt.pl > synt.tmp
6 | python ParDistance.py $3 $2 $1 | cut -f 2 > ParDistance.tmp
7 | paste lineal.tmp morph.tmp synt.tmp ParDistance.tmp
8 | rm *.tmp
9 |
--------------------------------------------------------------------------------
/ajax/wantmore.php:
--------------------------------------------------------------------------------
1 | emit(EventTypes::WANT_MORE);
11 |
12 | echo json_encode($result);
--------------------------------------------------------------------------------
/ajax/clck_log.php:
--------------------------------------------------------------------------------
1 |
14 |
--------------------------------------------------------------------------------
/scripts/ma_pools/post_merge.php:
--------------------------------------------------------------------------------
1 |
14 |
--------------------------------------------------------------------------------
/dict_diff.php:
--------------------------------------------------------------------------------
1 | assign('diff', dict_diff($lemma_id, $set_id));
8 | $smarty->display('dict/diff.tpl');
9 | log_timing();
10 | ?>
11 |
--------------------------------------------------------------------------------
/scripts/ma_pools/autopublish.php:
--------------------------------------------------------------------------------
1 |
15 |
--------------------------------------------------------------------------------
/dict_history.php:
--------------------------------------------------------------------------------
1 | assign('history', dict_history($lemma_id, $skip));
7 | $smarty->assign('skip', $skip);
8 | $smarty->display('dict/history.tpl');
9 | log_timing();
10 | ?>
11 |
--------------------------------------------------------------------------------
/scripts/aot_import/bad_lemma_grammems.txt:
--------------------------------------------------------------------------------
1 | * sing
2 | * plur
3 | * indc
4 | * nomn
5 | * gent
6 | * datv
7 | * accs
8 | * ablt
9 | * loct
10 | VERB excl
11 | VERB impr
12 | VERB pres
13 | VERB past
14 | VERB masc
15 | VERB neut
16 | GRND pres
17 | GRND past
18 | GRND V-sh
19 | ADJS neut
20 | PRTF masc
21 | PRTS femn
22 | COMP V-ej
23 |
--------------------------------------------------------------------------------
/ajax/set_option.php:
--------------------------------------------------------------------------------
1 |
15 |
--------------------------------------------------------------------------------
/diff.php:
--------------------------------------------------------------------------------
1 | assign('diff', main_diff($sent_id, $set_id, $rev_id));
9 | $smarty->display('diff.tpl');
10 | log_timing();
11 | ?>
12 |
--------------------------------------------------------------------------------
/ajax/run_test.php:
--------------------------------------------------------------------------------
1 |
17 |
--------------------------------------------------------------------------------
/ajax/run_generator.php:
--------------------------------------------------------------------------------
1 |
17 |
--------------------------------------------------------------------------------
/ajax/set_token_text.php:
--------------------------------------------------------------------------------
1 | execute(file_get_contents(__DIR__.'/initial_schema.sql'));
9 | }
10 |
11 | public function down() {
12 | throw new Exception("Not implemented");
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/ajax/game_mark_shown.php:
--------------------------------------------------------------------------------
1 | set_all_seen();
9 | }
10 | catch (Exception $e) {
11 | $result['error'] = 1;
12 | }
13 |
14 | log_timing(true);
15 | die(json_encode($result));
16 |
--------------------------------------------------------------------------------
/ajax/lemma_search.php:
--------------------------------------------------------------------------------
1 |
14 |
--------------------------------------------------------------------------------
/ajax/select_book.php:
--------------------------------------------------------------------------------
1 | $title) {
10 | $result['books'][] = array('id' => $id, 'title' => $title);
11 | }
12 |
13 | log_timing(true);
14 | die(json_encode($result));
15 | ?>
16 |
--------------------------------------------------------------------------------
/perl/lib/Lingua/AOT/MorphDict/FormSpec.pm:
--------------------------------------------------------------------------------
1 | package Lingua::AOT::MorphDict::FormSpec;
2 |
3 | use strict;
4 | use warnings;
5 | use utf8;
6 | use Encode;
7 |
8 | our $VERSION = "0.01";
9 |
10 |
11 | sub new {
12 | my $self = {};
13 | my $class;
14 | ($class, $self->{flex}, $self->{ancode}, $self->{prefix}) = @_;
15 |
16 | bless($self, $class);
17 |
18 | return $self;
19 | }
20 |
21 |
--------------------------------------------------------------------------------
/ajax/publish_update.php:
--------------------------------------------------------------------------------
1 |
17 |
--------------------------------------------------------------------------------
/migrations/20150703113519_add_pool_proto_name.php:
--------------------------------------------------------------------------------
1 | table("morph_annot_pool_types");
10 | $types->addColumn('pool_proto_name', 'string', array('limit' => 120))
11 | ->update();
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/ajax/tag_autocomplete.php:
--------------------------------------------------------------------------------
1 |
12 |
--------------------------------------------------------------------------------
/scripts/aot_import/lists/list_adjf_fixd_NOUN.txt:
--------------------------------------------------------------------------------
1 | аллегри
2 | апаш
3 | апплике
4 | барокко
5 | беж
6 | бордо
7 | брокколи
8 | буфф
9 | валансьен
10 | дум-дум
11 | клёш
12 | либерти
13 | макси
14 | маренго
15 | миди
16 | модерн
17 | морзе
18 | онлайн
19 | пик
20 | пике
21 | плаке
22 | плиссе
23 | пралине
24 | рамбулье
25 | ретро
26 | рококо
27 | сомон
28 | фантази
29 | фри
30 | хаки
31 | экстра
32 | электрик
33 |
--------------------------------------------------------------------------------
/perl/lib/ISO/LMF/EntityBase.pm:
--------------------------------------------------------------------------------
1 | package OpenCorpora::ISO::LMF::EntityBase;
2 |
3 | use strict;
4 | use warnings;
5 | use utf8;
6 |
7 | our $VERSION = "0.01";
8 |
9 |
10 |
11 |
12 | sub new {
13 | my($class, %args) = @_;
14 |
15 | my $self = bless({}, $class);
16 |
17 | $self->{xmlatt} = {};
18 | $self->{feat} = {};
19 | $self->{fsr} = {};
20 |
21 | return $self;
22 | }
23 |
24 |
25 | 1;
26 |
--------------------------------------------------------------------------------
/postagging/brill/unsupervised/python/learning_test/rand1.tab:
--------------------------------------------------------------------------------
1 | sent
2 | 100 муму 1 мумуа TAGA 2 мумуб TAGB
3 | 101 куку 3 куку TAGC
4 | 102 . 0 . PNCT
5 | /sent
6 |
7 | sent
8 | 103 пупу 1 пупуа TAGA
9 | 104 зузу 3 зузу TAGC
10 | 105 . 0 . PNCT
11 | /sent
12 |
13 | sent
14 | 106 фуфу 2 фуфуб TAGB
15 | 107 . 0 . PNCT
16 | /sent
17 |
18 | sent
19 | 108 муму 1 мумуа TAGA 2 мумуб TAGB
20 | 109 . 0 . PNCT
21 | /sent
22 |
--------------------------------------------------------------------------------
/scripts/aot_import/lists/add_Erro_ADJS.txt:
--------------------------------------------------------------------------------
1 | взаимосвязанна
2 | взаимосвязанны
3 | взаимосвязанно
4 | излюбленна
5 | излюбленны
6 | излюбленно
7 | незаверенна
8 | незаверенны
9 | незаверенно
10 | приверженна
11 | приверженны
12 | приверженно
13 | самоотвержена
14 | самоотвержено
15 | самоотвержены
16 | безветрено
17 | завихренно
18 | закомплексованно
19 | неуверено
20 | отверженно
21 | превыспренно
22 | превыспренны
23 |
24 |
25 |
--------------------------------------------------------------------------------
/scripts/tokenizer/train.php:
--------------------------------------------------------------------------------
1 | train($limit);
16 |
--------------------------------------------------------------------------------
/migrations/20160203212016_multiword_types.php:
--------------------------------------------------------------------------------
1 | table("mw_main")
11 | ->addColumn('mw_type', 'integer', array('signed' => false, 'limit' => MysqlAdapter::INT_TINY))
12 | ->save();
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | Disallow: /books
3 | Disallow: /dict
4 | Disallow: /diff
5 | Disallow: /files/saved
6 | Disallow: /history
7 | Disallow: /login
8 | Disallow: /options
9 | Disallow: /pools
10 | Disallow: /revert
11 | Disallow: /scripts
12 | Disallow: /sentence
13 | Disallow: /tasks
14 | Disallow: /user.php
15 | Disallow: /w/
16 | Disallow: /wiki/Special:Search
17 | Disallow: /wiki/Special:Random
18 | Disallow: /?page=top100
19 |
--------------------------------------------------------------------------------
/scripts/invalidate_auth_tokens.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | use strict;
3 | use DBI;
4 | use Config::INI::Reader;
5 |
6 | #reading config
7 | my $conf = Config::INI::Reader->read_file($ARGV[0]);
8 | $conf = $conf->{mysql};
9 |
10 | my $dbh = DBI->connect('DBI:mysql:'.$conf->{'dbname'}.':'.$conf->{'host'}, $conf->{'user'}, $conf->{'passwd'}) or die $DBI::errstr;
11 | $dbh->do("DELETE FROM user_tokens WHERE timestamp<".(time()-60*60*24*7));
12 |
--------------------------------------------------------------------------------
/scripts/run_validators.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | CONFIG_INI=/corpus/config.ini
3 | CONFIG_JSON=/corpus/config.json
4 |
5 | python /corpus/scripts/validators/year_valid.py $CONFIG_INI
6 | python /corpus/scripts/validators/author_validator.py $CONFIG_INI
7 | python /corpus/scripts/validators/url_validator.py $CONFIG_INI
8 | python /corpus/scripts/validators/par_validator.py $CONFIG_INI
9 | /corpus/scripts/find_good_sentences.py $CONFIG_JSON
10 |
--------------------------------------------------------------------------------
/postagging/brill/unsupervised/python/learning_test/rand0.tab:
--------------------------------------------------------------------------------
1 | sent
2 | 100 муму 1 мумуа TAGA 2 мумуб TAGB
3 | 101 куку 3 куку TAGC
4 | 102 . 0 . PNCT
5 | /sent
6 |
7 | sent
8 | 103 абаб 4 абаба TAGD
9 | 104 пупу 1 пупуа TAGA
10 | 105 зузу 3 зузу TAGC
11 | 106 . 0 . PNCT
12 | /sent
13 |
14 | sent
15 | 107 фуфу 2 фуфуб TAGB
16 | 108 . 0 . PNCT
17 | /sent
18 |
19 | sent
20 | 109 муму 1 мумуа TAGA 2 мумуб TAGB
21 | 110 . 0 . PNCT
22 | /sent
23 |
--------------------------------------------------------------------------------
/postagging/brill/unsupervised/cpp/include/corpora_io.h:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "sentence.h"
6 | #include "tag.h"
7 |
8 | #ifndef __CORPORA_IO_H
9 | #define __CORPORA_IO_H
10 |
11 | typedef std::vector SentenceCollection;
12 |
13 | void readCorpus(const std::string &fn, SentenceCollection &sc);
14 |
15 | std::set makeVariants(const std::string &s);
16 |
17 | #endif
18 |
--------------------------------------------------------------------------------
/anaphora/NE_extract/run_toma.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 |
3 | if [ -z "$1" ] || [ -z "$2" ]
4 | then
5 | echo "./run_toma.sh PlaintextDirName ResXmlDirName"
6 | exit 1
7 | else
8 |
9 | mkdir -p $2
10 | for d in $1/*
11 | do
12 | XML=$(basename $d)
13 | echo $XML
14 | D=`echo ${d//\//\\\/}`
15 | echo $D
16 | sed -i "s/Dir = \".*\"/Dir = \"$D\"/" config.proto
17 | ./tomita-upd config.proto > $2/$XML.xml
18 | done
19 | fi
20 |
--------------------------------------------------------------------------------
/perl/lib/Lingua/AOT/MorphDict/AccentParadigm.pm:
--------------------------------------------------------------------------------
1 | package Lingua::AOT::MorphDict::AccentParadigm;
2 |
3 | use strict;
4 | use warnings;
5 | use utf8;
6 |
7 | our $VERSION = "0.01";
8 |
9 |
10 | sub new {
11 | my ($class, $line) = @_;
12 | my $self = {};
13 |
14 | @{$self->{forms}} = split(/;/, $line);
15 |
16 | bless($self, $class);
17 | return $self;
18 | }
19 |
20 | sub GetLastFormNo {
21 | my $self = shift;
22 | return $#{$self->{forms}};
23 | }
24 |
--------------------------------------------------------------------------------
/templates/static/faq.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name='content'}
4 | О проекте
5 |
11 | {if isset($title)}{$title}
{/if}
12 | {$content}
13 | {/block}
14 |
--------------------------------------------------------------------------------
/scripts/aot_import/lists/add_Infr_ADJS.txt:
--------------------------------------------------------------------------------
1 | #Добавить этим кратким прилагательным помету Разг.
2 | вёртка
3 | квела
4 | сажённа
5 | сверхлёгка
6 | хлёстка
7 | чётка
8 | весёлы
9 | волён
10 | выспренне
11 | выспренни
12 | далёки
13 | добродушно-весёлы
14 | жестки
15 | искренно
16 | искренны
17 | кисл
18 | шустр
19 | неискренно
20 | неудовлетворённо
21 | общо
22 | угнетённа
23 | угнетённо
24 | угнетённы
25 | черствы
26 | нетвёрды
27 | твёрды
28 | огорчённа
29 | огорчённо
30 | остер
31 |
--------------------------------------------------------------------------------
/ajax/get_context.php:
--------------------------------------------------------------------------------
1 |
17 |
--------------------------------------------------------------------------------
/templates/dict/absent.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name='content'}
4 |
5 | - Словарь /
6 | - Top несловарных токенов
7 |
8 | Top 500 токенов с UNKN
9 |
10 | {foreach from=$words item=word}
11 | - {$word.word|htmlspecialchars} [{$word.count}]
12 | {/foreach}
13 |
14 | {/block}
15 |
--------------------------------------------------------------------------------
/templates/dict/links_single.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name='content'}
4 | Всего пар лемм со связью типа {$data.name}: {$data.total}. Ниже не более 100 примеров.
5 |
14 | {/block}
15 |
--------------------------------------------------------------------------------
/anaphora/NE_extract/kwtypes.proto:
--------------------------------------------------------------------------------
1 | import "base.proto"; // подключаем описания protobuf-типов (TAuxDicArticle и прочих)
2 | import "articles_base.proto"; // Файлы base.proto и articles_base.proto встроены в компилятор.
3 |
4 | message complex_prep: TAuxDicArticle { };
5 | message complex_adv: TAuxDicArticle { };
6 | message complex_conj: TAuxDicArticle { };
7 | message introduct: TAuxDicArticle { };
8 | message bad_noun: TAuxDicArticle { };
9 | message pronoun: TAuxDicArticle { };
10 |
--------------------------------------------------------------------------------
/scripts/mwords/rules.txt:
--------------------------------------------------------------------------------
1 | # правила для поиска кандидатов в мультитокены
2 | # c # можно начинать строки с комментариями
3 |
4 | # поиск по умолчанию - по точной форме, без учёта регистра
5 |
6 | # через @ можно написать тип, он определяет надпись на кнопке у юзера,
7 | # по умолчанию там написано "мультитокен"
8 | # @1 - "сложный предлог"
9 | # @2 - "сложный союз"
10 |
11 |
12 | в течение @ 1
13 | в течении @ 1
14 | несмотря на @ 1
15 | т . к . @ 2
16 | так как @ 2
17 | хотя б
18 | хотя бы
19 |
--------------------------------------------------------------------------------
/templates/qa/empty_books.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name=content}
4 | Пустые тексты
5 | Это тексты, к которым не приписано ни одного раздела и ни одного предложения.
6 | Список обновляется при каждом обращении к этой странице.
7 |
8 | {foreach item=book from=$books}
9 | - {$book.name|htmlspecialchars}
10 | {foreachelse}
11 | Список пуст.
12 | {/foreach}
13 |
14 | {/block}
15 |
--------------------------------------------------------------------------------
/migrations/20150610120658_merge_fails_comments.php:
--------------------------------------------------------------------------------
1 | table('morph_annot_merge_comments', array('id' => false, 'primary_key' => array('sample_id')));
10 | $tbl->addColumn('sample_id', 'integer', array('signed' => false))
11 | ->addColumn('comment', 'text')
12 | ->create();
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/templates/search.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name=content}
4 | Результаты поиска
5 | Всего найдено: {$search.total}
6 | {foreach from=$search.results item=s name=m}
7 | {$smarty.foreach.m.index + 1}. {foreach from=$s.context item=word key=tid}{if $tid == $s.mainword}{$word|htmlspecialchars}{else}{$word|htmlspecialchars}{/if} {/foreach}
8 | {/foreach}
9 | {/block}
10 |
--------------------------------------------------------------------------------
/postagging/brill/unsupervised/cpp/train/Makefile:
--------------------------------------------------------------------------------
1 | CC=g++
2 | INC_PATH=../include/
3 | LIB_PATH=../lib/
4 | CFLAGS=-c -O3 -std=c++0x -Wall -I$(INC_PATH)
5 | SOURCES=main.cpp aux.cpp $(LIB_PATH)corpora_io.cpp $(LIB_PATH)corpus_stat.cpp $(LIB_PATH)brill.cpp
6 | OBJECTS=$(SOURCES:.cpp=.o)
7 | INCLUDES=$(wildcard $(INC_PATH)/*.h) $(wildcard *.h)
8 |
9 | all: train
10 |
11 | train: $(OBJECTS)
12 | $(CC) -O3 $(OBJECTS) -o train
13 |
14 | .cpp.o: $< ${INCLUDES}
15 | $(CC) $(CFLAGS) $< -o $@
16 |
17 | clean:
18 | rm -rf *.o train
19 |
--------------------------------------------------------------------------------
/migrations/20160124093035_add_prop_order.php:
--------------------------------------------------------------------------------
1 | execute("alter table ne_object_props add column `order` int unsigned not null default 0");
14 | }
15 |
16 | /**
17 | * Migrate Down.
18 | */
19 | public function down()
20 | {
21 | $this->table("ne_object_props")->removeColumn("order");
22 | }
23 | }
--------------------------------------------------------------------------------
/perl/lib/ISO/LMF/LexicalResource.pm:
--------------------------------------------------------------------------------
1 | package OpenCorpora::ISO::LMF::LexicalResource;
2 |
3 | use strict;
4 | use warnings;
5 | use utf8;
6 |
7 | our $VERSION = "0.01";
8 |
9 | sub new {
10 | my($class, %args) = @_;
11 | my $base = OpenCorpora::ISO::LMF::EntityBase->new();
12 | my $self = bless($base, $class);
13 |
14 | $self->{lexicon} = ();
15 |
16 | return $self;
17 | }
18 |
19 | sub add_lexicon {
20 | my ($self, @lexicon) = @_;
21 | foreach my $l (@lexicon) {
22 | push @{$self->{lexicon}}, $l;
23 | }
24 | }
25 |
26 | 1;
27 |
--------------------------------------------------------------------------------
/postagging/brill/unsupervised/python/apply.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import sys
4 |
5 | from learn_rules.utils import apply_rule, read_corpus, write_corpus, parse_rule
6 |
7 |
8 | if __name__ == '__main__':
9 | TYPES = {'tag': 0, 'word': 1}
10 | rules = []
11 | inc = read_corpus(sys.stdin)
12 | for line in open(sys.argv[1], 'r'):
13 | if not line:
14 | continue
15 | r = parse_rule(line)
16 | rules.append(r)
17 | inc = list(apply_rule(r, inc))
18 | write_corpus(inc)
19 |
--------------------------------------------------------------------------------
/scripts/json2ini.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import sys
3 | import json
4 |
5 |
6 | def main(path):
7 | with open(path) as fin:
8 | data = json.load(fin)
9 | for block_name, block_items in data.items():
10 | print("[{}]".format(block_name))
11 | for k, v in block_items.items():
12 | vstr = ','.join(list(map(str, v))) if isinstance(v, list) else v
13 | print("{} = {}".format(k, vstr))
14 | print()
15 |
16 |
17 | if __name__ == "__main__":
18 | main(sys.argv[1])
19 |
--------------------------------------------------------------------------------
/templates/qa/sent_split.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name=content}
4 | Странное деление предложений
5 | Список обновляется раз в час.
6 |
7 | | id | Текст | |
8 | {foreach from=$sentences item=s}
9 |
10 | | {$s.id} |
11 | {$s.text|htmlspecialchars} |
12 | исправить |
13 |
14 | {/foreach}
15 |
16 | {/block}
17 |
--------------------------------------------------------------------------------
/perl/lib/Lingua/AOT/MorphDict/Form.pm:
--------------------------------------------------------------------------------
1 | package Lingua::AOT::MorphDict::Form;
2 |
3 | use strict;
4 | use warnings;
5 | use utf8;
6 | use Encode;
7 |
8 | our $VERSION = "0.01";
9 |
10 |
11 | sub new {
12 | my $self = {};
13 | my ($class, $ref_dic, $text, $ancode, $lemma_ancode) = @_;
14 | ($self->{text}, $self->{ancode}) = ($text, $ancode);
15 |
16 | bless($self, $class);
17 | return $self;
18 | }
19 |
20 | sub Text {
21 | my $self = shift;
22 | return $self->{text};
23 | }
24 |
25 | sub Ancode {
26 | my $self = shift;
27 | return $self->{ancode};
28 | }
29 |
--------------------------------------------------------------------------------
/postagging/brill/unsupervised/cpp/include/utils.h:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | #ifndef __UTILS_H
7 | #define __UTILS_H
8 |
9 | inline std::vector &split(const std::string &s, char delim, std::vector &elems) {
10 | // std::cerr << "split(\"" << s << "\", \'" << delim << "\' ...)" << std::endl;
11 | std::stringstream ss(s);
12 | std::string item;
13 | while(std::getline(ss, item, delim)) {
14 | elems.push_back(item);
15 | }
16 | return elems;
17 | }
18 |
19 | #endif
20 |
--------------------------------------------------------------------------------
/ajax/paradigm_info.php:
--------------------------------------------------------------------------------
1 | $para['lemma_gram'], 'suffix' => $para['lemma_suffix_len']);
8 | $result['forms'] = array();
9 | foreach ($para['forms'] as $form) {
10 | $result['forms'][] = array('gram' => join(', ', $form['grm']), 'suffix' => $form['suffix']);
11 | }
12 | }
13 | else
14 | $result['error'] = 1;
15 |
16 | log_timing(true);
17 | die(json_encode($result));
18 | ?>
19 |
--------------------------------------------------------------------------------
/revert.php:
--------------------------------------------------------------------------------
1 |
24 |
--------------------------------------------------------------------------------
/scripts/tokenizer/tokenize.php:
--------------------------------------------------------------------------------
1 | tokenize($line) as $token) {
13 | echo implode("\t", array($token->start_pos, $token->end_pos, $token->get_feats_str_binary(), $token->border_weight)) . "\n";
14 | }
15 | }
16 |
17 | ?>
18 |
--------------------------------------------------------------------------------
/migrations/20150628112302_turn_game_on_for_all.php:
--------------------------------------------------------------------------------
1 | table("users");
11 | $users->removeColumn("show_game");
12 | }
13 |
14 | public function down()
15 | {
16 | $users = $this->table("users");
17 | $users->addColumn("show_game", "integer", array('signed' => false, 'limit' => MysqlAdapter::INT_TINY))
18 | ->save();
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/scripts/aot_import/lists/add_Infr_COMP.txt:
--------------------------------------------------------------------------------
1 | # Поставить компаративу помету Разг
2 | бойчее
3 | врожденнее
4 | врожденней
5 | поврожденнее
6 | поврожденней
7 | дешевее
8 | дичее
9 | закаленнее
10 | закаленней
11 | позакаленней
12 | позакаленнее
13 | неудовлетворённее
14 | неудовлетворённей
15 | понеудовлетворённее
16 | понеудовлетворённей
17 | угнетенней
18 | угнетеннее
19 | поугнетенней
20 | поугнетеннее
21 | позднее
22 | поздней
23 | попозднее
24 | попоздней
25 | прирожденнее
26 | прирожденней
27 | поприрожденнее
28 | поприрожденней
29 | старее
30 | старей
31 | постарее
32 | постарей
33 | хлеще
34 | похлеще
35 |
--------------------------------------------------------------------------------
/templates/qa/unknowns.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name=content}
4 | Токены из словаря, но с разбором UNKN ({$tokens|sizeof})
5 |
6 | {foreach item=token from=$tokens}
7 |
8 | | {$token.text|htmlspecialchars} |
9 | {foreach from=$token.comments item=comment} {$comment.text|htmlspecialchars} ({$comment.author|htmlspecialchars}) {/foreach} |
10 |
11 | {/foreach}
12 |
13 | {/block}
14 |
--------------------------------------------------------------------------------
/scripts/cronrunner.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 |
3 | use strict;
4 | use warnings;
5 |
6 | my $cmd = shift or exit print qq{Usage: cronrunner.pl "cmd --args"};
7 |
8 | my @pieces = grep defined, (split / /, $cmd)[0, 1];
9 | my $file = (grep -e $_, @pieces)[0];
10 |
11 | my $lock;
12 | if(defined $file) {
13 | $file = (split '/', $file)[-1];
14 | $lock = "/var/lock/$file.lock";
15 | }
16 | else {
17 | $lock = "/var/lock/$pieces[0].lock";
18 | }
19 |
20 |
21 | system qq{flock --exclusive --nonblock $lock --command "$cmd"}
22 | and exit print "Failed to acquire run lock: $!";
23 |
24 | exit 0;
25 |
--------------------------------------------------------------------------------
/migrations/20160130085413_more_on_multiwords.php:
--------------------------------------------------------------------------------
1 | table('mw_answers')
11 | ->addColumn('answer', 'integer', array('signed' => false, 'limit' => MysqlAdapter::INT_TINY))
12 | ->save();
13 |
14 | $this->table('mw_main')
15 | ->addColumn('applied', 'integer', array('signed' => false, 'limit' => MysqlAdapter::INT_TINY))
16 | ->save();
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/scripts/aot_import/lists/add_Infr_ablt_sing.txt:
--------------------------------------------------------------------------------
1 | #поставить форма твор падежа помету Разг
2 | баржой
3 | баржою
4 | бомжем
5 | военспецем
6 | главспецем
7 | голбцом
8 | гуляшем
9 | ильичем
10 | кешом
11 | киём
12 | кишмишом
13 | козьмичем
14 | кряжом
15 | кузьмичем
16 | кэшом
17 | ломтём
18 | лукичем
19 | мальцем
20 | мацей
21 | метакэшем
22 | миражом
23 | неровнёй
24 | неровнёю
25 | обжой
26 | петлей
27 | петлею
28 | пешней
29 | пешнею
30 | пращей
31 | ровнёй
32 | ровнёю
33 | ряжом
34 | сажем
35 | сазандарём
36 | слэшом
37 | спецем
38 | ставцем
39 | стукачем
40 | фомичем
41 | углем
42 | чавычой
43 | чувашом
44 | шлицом
45 |
46 |
--------------------------------------------------------------------------------
/scripts/tokenizer/cronrunner.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 |
3 | use strict;
4 | use warnings;
5 |
6 | my $cmd = shift or exit print qq{Usage: cronrunner.pl "cmd --args"};
7 |
8 | my @pieces = grep defined, (split / /, $cmd)[0, 1];
9 | my $file = (grep -e $_, @pieces)[0];
10 |
11 | my $lock;
12 | if(defined $file) {
13 | $file = (split '/', $file)[-1];
14 | $lock = "/var/lock/$file.lock";
15 | }
16 | else {
17 | $lock = "/var/lock/$pieces[0].lock";
18 | }
19 |
20 |
21 | system qq{flock --exclusive --non-block $lock --command "$cmd"}
22 | and exit print "Failed to acquire run lock: $!";
23 |
24 | exit 0;
25 |
--------------------------------------------------------------------------------
/perl/lib/ISO/LMF/Lexicon.pm:
--------------------------------------------------------------------------------
1 | package OpenCorpora::ISO::LMF::Lexicon;
2 |
3 | use strict;
4 | use warnings;
5 | use utf8;
6 |
7 | use OpenCorpora::ISO::LMF::EntityBase;
8 |
9 | our $VERSION = "0.01";
10 |
11 |
12 | sub new {
13 | my($class, %args) = @_;
14 | my $base = OpenCorpora::ISO::LMF::EntityBase->new();
15 | my $self = bless($base, $class);
16 |
17 | $self->{lexical_entries} = ();
18 |
19 | return $self;
20 | }
21 |
22 | sub add_lexical_entry {
23 | my ($self, @lexical_entries) = @_;
24 | foreach my $le (@lexical_entries) {
25 | push @{$self->{lexical_entries}}, $le;
26 | }
27 | }
28 |
29 | 1;
30 |
--------------------------------------------------------------------------------
/ajax/dict_pending.php:
--------------------------------------------------------------------------------
1 |
24 |
--------------------------------------------------------------------------------
/migrations/20160110211650_remove_old_ne_prop.php:
--------------------------------------------------------------------------------
1 | table("ne_objects")
10 | ->removeColumn("canon_name")
11 | ->removeColumn("wikidata_id")
12 | ->update();
13 | }
14 |
15 | public function down()
16 | {
17 | $this->table("ne_objects")
18 | ->addColumn("canon_name", "string", array("limit" => 255))
19 | ->addColumn("wikidata_id", "integer")
20 | ->save();
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/migrations/20151209163951_add_ne_book_moderator.php:
--------------------------------------------------------------------------------
1 | table("ne_books_tagsets")
12 | ->addColumn("moderator_id", "integer", array("default" => 0))
13 | ->update();
14 | }
15 |
16 | /**
17 | * Migrate Down.
18 | */
19 | public function down() {
20 | $this->table("ne_books_tagsets")
21 | ->removeColumn("moderator_id")
22 | ->update();
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/templates/qa/good_sentences.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name=content}
4 | Наименее омонимичные предложения
5 | Список обновляется раз в сутки.
6 | {if isset($smarty.get.no_zero)}показать неомонимичные{else}скрыть неомонимичные{/if}
7 |
8 | | # | Всего слов | Омонимичных слов |
9 | {foreach item=sentence from=$sentences}
10 | | {$sentence.id} | {$sentence.total} | {$sentence.homonymous} |
11 | {/foreach}
12 |
13 | {/block}
14 |
--------------------------------------------------------------------------------
/perl/lib/ISO/LMF/WordForm.pm:
--------------------------------------------------------------------------------
1 | package OpenCorpora::ISO::LMF::WordForm;
2 |
3 | use strict;
4 | use warnings;
5 | use utf8;
6 |
7 | use OpenCorpora::ISO::LMF::EntityBase;
8 |
9 | our $VERSION = "0.01";
10 |
11 |
12 | sub new {
13 | my($class, %args) = @_;
14 | my $base = OpenCorpora::ISO::LMF::EntityBase->new();
15 | my $self = bless($base, $class);
16 |
17 | $self->{form_representations} = ();
18 |
19 | return $self;
20 | }
21 |
22 | sub add_form_representation {
23 | my ($self, @form_representations) = @_;
24 | foreach my $fr (@form_representations) {
25 | push @{$self->{form_representations}}, $fr;
26 | }
27 | }
28 |
29 | 1;
30 |
--------------------------------------------------------------------------------
/lib/timer.php:
--------------------------------------------------------------------------------
1 | 0)
12 | $user_id = $_SESSION['user_id'];
13 |
14 | $page = $_SERVER['REQUEST_URI'];
15 | sql_pe(
16 | "INSERT INTO timing (user_id, page, total_time, is_ajax) VALUES (?, ?, ?, ?)",
17 | array($user_id, $page, $total_time, $is_ajax ? 1 : 0)
18 | );
19 | }
20 | ?>
21 |
--------------------------------------------------------------------------------
/migrations/20151210150743_add_moderator_column_to_ne_par.php:
--------------------------------------------------------------------------------
1 | table("ne_paragraphs")
12 | ->addColumn("is_moderator", "boolean", array("default" => false))
13 | ->update();
14 | }
15 |
16 | /**
17 | * Migrate Down.
18 | */
19 | public function down() {
20 | $this->table("ne_paragraphs")
21 | ->removeColumn("is_moderator")
22 | ->update();
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/perl/lib/Lingua/AOT/MorphDict/MorphVariant.pm:
--------------------------------------------------------------------------------
1 | package Lingua::AOT::MorphDict::MorphVariant;
2 |
3 | use strict;
4 | use warnings;
5 | use utf8;
6 |
7 | use Lingua::AOT::MorphDict::FormSpec;
8 | use Lingua::AOT::MorphDict::Form;
9 | use Lingua::AOT::MorphDict::Paradigm;
10 |
11 | our $VERSION = "0.01";
12 |
13 |
14 | sub new {
15 | my ($class, $lemma_id, $ancode) = @_;
16 | my $self = {};
17 | ($self->{lid}, $self->{ancode}) = ($lemma_id, $ancode);
18 | bless($self, $class);
19 | }
20 |
21 | sub LemmaId {
22 | my $self = shift;
23 | return $self->{lid};
24 | }
25 |
26 | sub Ancode {
27 | my $self = shift;
28 | return $self->{ancode};
29 | }
30 |
--------------------------------------------------------------------------------
/ajax/save_check.php:
--------------------------------------------------------------------------------
1 |
24 |
--------------------------------------------------------------------------------
/scripts/aot_import/lists/pred_to_intj.txt:
--------------------------------------------------------------------------------
1 | аллилуйя
2 | аминь
3 | брык
4 | верть
5 | вот
6 | дёрг
7 | да
8 | динь-динь-динь
9 | добро
10 | дрыг
11 | дудки
12 | кувырк
13 | мерси
14 | молчок
15 | морг
16 | на-ка
17 | нате-ка
18 | нет
19 | неужели
20 | неужто
21 | ни-ни
22 | паф
23 | пиф-паф
24 | плюс-минус
25 | пожалуйста
26 | прыг
27 | растудыть
28 | скок
29 | спасибо
30 | стук
31 | то-то
32 | топ
33 | трух-трух
34 | трюх-трюх
35 | тык
36 | тырк
37 | тю-тю
38 | умора
39 | фук
40 | фырк
41 | хап
42 | хлысть
43 | хлясть
44 | хоп
45 | цап
46 | цап-царап
47 | царап
48 | цоп
49 | чебурах
50 | чик-чирик
51 | шабаш
52 | шарк
53 | швах
54 | швырк
55 | шмыг
56 | шмяк
57 | щип
58 | юрк
59 |
--------------------------------------------------------------------------------
/ajax/merge_fails.php:
--------------------------------------------------------------------------------
1 | $TMPDIR/pools.txt
12 |
13 | for id in $( cat $TMPDIR/pools.txt | gawk '{ print $1 }' )
14 | do
15 | wget -q "http://localhost/pools.php?act=samples&pool_id=$id&tabs=1&mod_ans" --output-document=$TMPDIR/pool_$id.tab
16 | done
17 |
18 | cd $TMPDIR
19 | zip -q9 $EXPORT_DIR/pools.zip pool*.t*
20 | tar -cjf $EXPORT_DIR/pools.tar.bz2 pool*.tab pools.txt --remove-files
21 |
22 | rm -rf $TMPDIR
23 |
--------------------------------------------------------------------------------
/migrations/20140930101915_add_timer.php:
--------------------------------------------------------------------------------
1 | execute("CREATE TABLE timing (
10 | `timestamp` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
11 | `user_id` SMALLINT UNSIGNED NOT NULL DEFAULT 0,
12 | `page` VARCHAR(255) NOT NULL,
13 | `total_time` FLOAT NOT NULL,
14 | `is_ajax` TINYINT UNSIGNED NOT NULL
15 | ) ENGINE=INNODB");
16 | }
17 |
18 | public function down()
19 | {
20 | $this->dropTable("timing");
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/sources.php:
--------------------------------------------------------------------------------
1 | assign('sources', get_sources_page($skip, $what, $src));
17 | $smarty->assign('what', $what);
18 | $smarty->assign('skip', $skip);
19 | $smarty->display('templates/sources.tpl');
20 | }
21 | log_timing();
22 | ?>
23 |
--------------------------------------------------------------------------------
/user.php:
--------------------------------------------------------------------------------
1 | assign('user', get_user_info($id));
10 | $smarty->assign('user_id', $id);
11 |
12 | $smarty->assign('complexity', array(
13 | 0 => 'Сложность неизвестна',
14 | 1 => 'Очень простые задания',
15 | 2 => 'Простые задания',
16 | 3 => 'Сложные задания',
17 | 4 => 'Очень сложные задания'));
18 |
19 | $am2 = new AchievementsManager($id);
20 | $smarty->assign('achievements', $a = $am2->pull_all());
21 |
22 | $smarty->display('user.tpl');
23 | log_timing();
24 |
--------------------------------------------------------------------------------
/anaphora/NE_extract/run_parse.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 |
3 | if [ -z "$1" ] || [ -z "$2" ]
4 | then
5 | echo "./run_parse.sh MorphDirName XmlDirName ResDirName"
6 | exit 1
7 | else
8 |
9 | mkdir -p $3
10 | rm $3/*
11 | for d in $1/*
12 | do
13 | XML=$(basename $d)
14 | perl parse_xmlfacts.pl -m $d -x $2/$XML.xml > $3/$XML.tsv 2>>lost
15 | sort -u -k2,2 $3/$XML.tsv | sort -n > $3/$XML_sorted.tsv
16 | mv $3/$XML_sorted.tsv $3/$XML.tsv
17 | cat $3/$XML.tsv >> $3/groups.tsv
18 | done
19 |
20 | sort -u -k2,2 $3/groups.tsv | sort -n > $3/groups_sorted.tsv
21 | mv $3/groups_sorted.tsv $3/groups.tsv
22 | grep -P "\t17" $3/groups.tsv > $3/pronouns.tsv
23 | fi
24 |
--------------------------------------------------------------------------------
/migrations/20190504181939_add_sentence_quality.php:
--------------------------------------------------------------------------------
1 | table('sentence_quality', array('id' => false, 'engine' => 'InnoDB'));
11 | $sq->addColumn('length', 'integer', array('signed' => false, 'limit' => MysqlAdapter::INT_SMALL))
12 | ->addColumn('status', 'integer', array('signed' => false, 'limit' => MysqlAdapter::INT_TINY))
13 | ->addColumn('count', 'integer', array('signed' => false, 'limit' => MysqlAdapter::INT_MEDIUM))
14 | ->create();
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/doc/presentations/2012_September29_OnePage.tex:
--------------------------------------------------------------------------------
1 | \documentclass{beamer}
2 | \usepackage{cmap}
3 | \usepackage[utf8]{inputenc}
4 | \usepackage[russian]{babel}
5 | \usepackage{listings}
6 | \usetheme{Antibes}
7 | \usecolortheme{beaver}
8 | \usepackage{graphicx}
9 | \graphicspath{{img/}}
10 |
11 | \title{Проект Открытый корпус / Морфологическая разметка}
12 | \begin{document}
13 |
14 | %slide 01
15 | \begin{frame}
16 | \frametitle{Присоединяйтесь к созданию разметки}
17 | \large{http://opencorpora.org}
18 | \hspace{1.4cm}
19 | \small{\color{gray}\{vk.com|twitter.com\}/opencorpora}
20 | \begin{figure}
21 | \center{\includegraphics[width=1\linewidth]{markupUI2-part.png}}
22 | \end{figure}
23 | \end{frame}
24 |
25 | \end{document}
26 |
--------------------------------------------------------------------------------
/templates/qa/dl_urls.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name=content}
4 | Сохранённые копии текстов источников
5 | Список обновляется при каждом обращении к этой странице.
6 |
16 | {/block}
17 |
--------------------------------------------------------------------------------
/ajax/own_book.php:
--------------------------------------------------------------------------------
1 | 0 ? $_SESSION['user_id'] : 0;
16 |
17 | sql_pe("UPDATE sources SET user_id=? WHERE source_id=? LIMIT 1", array($user_id, $sid));
18 | }
19 | catch (Exception $e) {
20 | $result['error'] = 1;
21 | }
22 | log_timing(true);
23 | die(json_encode($result));
24 |
--------------------------------------------------------------------------------
/scripts/aot_import/lists/list_adjf_fixd_ADVB.txt:
--------------------------------------------------------------------------------
1 | адажио
2 | алегретто
3 | аллегро
4 | альсекко
5 | альфреско
6 | анданте
7 | андантино
8 | аппассионато
9 | арпеджио
10 | арпеджо
11 | брутто
12 | вибрато
13 | виваче
14 | виво
15 | глиссандо
16 | граве
17 | декрешендо
18 | декрещендо
19 | диминуэндо
20 | дольче
21 | ин-кварто
22 | ин-октаво
23 | ин-фолио
24 | кантабиле
25 | крешендо
26 | крещендо
27 | ларгетто
28 | ларго
29 | легато
30 | ленто
31 | маэстозо
32 | модерато
33 | неглиже
34 | нетто
35 | пианиссимо
36 | пиано
37 | пиццикато
38 | пиччикато
39 | престиссимо
40 | престо
41 | ритенуто
42 | соло
43 | стаккато
44 | стретто
45 | субито
46 | сфорцандо
47 | сфорцато
48 | тремоландо
49 | факсимиле
50 | форте
51 | фортиссимо
52 | фуриозо
53 |
--------------------------------------------------------------------------------
/add.php:
--------------------------------------------------------------------------------
1 | assign('check', addtext_check(POST('txt'), POST('book_id', 0)));
14 | $smarty->display('addtext_check.tpl');
15 | break;
16 | default:
17 | check_permission(PERM_ADDER);
18 | $smarty->assign('txt', POST('txt', ''));
19 | $smarty->display('addtext.tpl');
20 | }
21 |
22 | log_timing();
23 |
--------------------------------------------------------------------------------
/templates/books.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name=content}
4 | Тексты
5 | Всего книг: {$books.num}{if $user_permission_adder},
6 | добавить:
7 |
8 | {/if}
9 |
10 | {foreach item=book from=$books.list}
11 | - {$book.title}
12 | {/foreach}
13 |
14 | {/block}
15 |
--------------------------------------------------------------------------------
/history.php:
--------------------------------------------------------------------------------
1 | setCaching(Smarty::CACHING_LIFETIME_SAVED);
12 | $smarty->setCacheLifetime(90);
13 | $cache_id = "$sent_id@$set_id@$skip@$maa@$user_id";
14 |
15 | if (!is_cached('history.tpl', $cache_id)) {
16 | $smarty->assign('history', main_history($sent_id, $set_id, $skip, $maa, $user_id));
17 | $smarty->assign('skip', $skip);
18 | $smarty->assign('maa', $maa);
19 | $smarty->assign('user_id', $user_id);
20 | }
21 | $smarty->display('history.tpl', $cache_id);
22 | log_timing();
23 | ?>
24 |
--------------------------------------------------------------------------------
/migrations/20160716091852_ne_annot_number_per_tagset.php:
--------------------------------------------------------------------------------
1 | table("ne_tagsets")
11 | ->addColumn('annots_per_text', 'integer', array(
12 | 'signed' => false,
13 | 'limit' => MysqlAdapter::INT_TINY,
14 | 'default' => 4
15 | ))
16 | ->addColumn('active_texts', 'integer', array(
17 | 'signed' => false,
18 | 'limit' => MysqlAdapter::INT_SMALL,
19 | 'default' => 10
20 | ))
21 | ->save();
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/templates/qa/pool_tabs.tpl:
--------------------------------------------------------------------------------
1 | {foreach from=$pool.samples item=sample}
2 | {$sample.id} {$sample.token_id} {strip}
3 | {foreach $sample.context as $token_id => $word}{if $token_id == $sample.mainword}[[{$word}]]{else}{$word}{/if} {/foreach}
4 | {/strip} {strip}
5 | {foreach from=$sample.comments item=comment}
6 | {$comment.text|replace:"\n":'\n'} ({$comment.author}, {$comment.timestamp|date_format:"%d.%m.%Y, %H:%M"});
7 | {/foreach}
8 | {/strip} {strip}
9 | {foreach from=$sample.instances item=instance}{if $instance.answer_num == $smarty.const.MA_ANSWER_OTHER}Other {elseif $instance.answer_num > 0}{$instance.answer_gram} {/if}
10 | {/foreach}
11 | {/strip}{if isset($smarty.get.mod_ans) && isset($sample.moder_answer_gram)} {$sample.moder_answer_gram}{else}{/if}
12 |
13 | {/foreach}
14 |
--------------------------------------------------------------------------------
/templates/top100.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name='content'}
4 |
5 | {if $smarty.get.what == 'colloc'}
6 | | # | | Абс. частота 1 | Абс. частота 2 | Совм. частота | Коэфф. |
7 | {foreach $stats as $i=>$s}
8 | | {$i+1} | {$s.lterm|htmlspecialchars} {$s.rterm|htmlspecialchars} | {$s.lfreq} | {$s.rfreq} | {$s.cfreq} | {$s.coeff} |
9 | {/foreach}
10 | {else}
11 | | # | Токен | Абс. частота | ipm (частота на миллион) |
12 | {foreach $stats as $i=>$s}
13 | | {$i+1} | {$s.token|htmlspecialchars} | {$s.abs} | {$s.ipm} |
14 | {/foreach}
15 | {/if}
16 |
17 | {/block}
18 |
--------------------------------------------------------------------------------
/generator_cp.php:
--------------------------------------------------------------------------------
1 | assign('status', $current['status']);
10 | $smarty->assign('since', $current['since']);
11 | $smarty->assign('tag', $current['tag']);
12 | $smarty->assign('next', $current['next']);
13 |
14 | switch ($action) {
15 | case 'toggle':
16 | $new = toggle_generator_status();
17 | $smarty->assign('status', $new['status']);
18 | $smarty->assign('since', $new['since']);
19 | $smarty->assign('tag', $new['tag']);
20 | $smarty->assign('next', $new['next']);
21 |
22 | break;
23 | }
24 |
25 | $smarty->display('generator_cp.tpl');
26 | log_timing();
27 | ?>
28 |
--------------------------------------------------------------------------------
/migrations/20160122154639_obj_property_multiple_values.php:
--------------------------------------------------------------------------------
1 | execute("alter table ne_object_prop_vals drop primary key");
12 | $this->execute("alter table ne_object_prop_vals add column val_id int unsigned not null auto_increment first, add primary key (val_id)");
13 | }
14 |
15 | /**
16 | * Migrate Down.
17 | */
18 | public function down() {
19 | $this->table("ne_object_prop_vals")->removeColumn("val_id")->update();
20 | $this->execute("alter table ne_object_prop_vals add primary key (object_id, prop_id)");
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/templates/qa/game_status.tpl:
--------------------------------------------------------------------------------
1 |
2 |
3 |
{if $user_rating.remaining_percent < 85}{$user_rating.current}{/if}
4 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/templates/openid_license.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name=content}
4 | Согласие с лицензией
5 | Вы входите на наш сайт в первый раз. Для того, чтобы продолжить работу, вы должны подтвердить свое согласие с лицензией.
6 |
10 | {/block}
11 |
--------------------------------------------------------------------------------
/perl/lib/ISO/LMF/LexicalEntry.pm:
--------------------------------------------------------------------------------
1 | package OpenCorpora::ISO::LMF::LexicalEntry;
2 |
3 | use strict;
4 | use warnings;
5 | use utf8;
6 |
7 | our $VERSION = "0.01";
8 |
9 |
10 |
11 |
12 | sub new {
13 | my($class, %args) = @_;
14 |
15 | my $self = bless({}, $class);
16 |
17 | # if (exists $args{handlers}) {
18 | # my $handlers = $args{handlers};
19 | # $self->{handler_lemma} = exists $handlers->{lemma} ? $handlers->{lemma} : \&nop_function;
20 | # }
21 |
22 | $self->{lemma}->{text} = "";
23 | $self->{lemma}->{gram} = {};
24 | $self->{forms} = {};
25 |
26 | return $self;
27 | }
28 |
29 | sub lemma_text {
30 | my $self = shift;
31 | if (@_) {
32 | my $text = shift;
33 | $self->{lemma}->{text} = $text;
34 | }
35 |
36 | return $self->{lemma}->{text};
37 | }
38 |
39 | 1;
40 |
--------------------------------------------------------------------------------
/manual.php:
--------------------------------------------------------------------------------
1 | assign('content', get_wiki_page("Инструкция для модераторов"));
11 | break;
12 | case 'newslist_announce':
13 | $smarty->assign('content', get_wiki_page("Newslist opencorpora-dev"));
14 | break;
15 | default:
16 | if ($pool_type)
17 | $smarty->assign('content', get_wiki_page(get_pool_manual_page($pool_type)));
18 | else
19 | $smarty->assign('content', get_wiki_page("Инструкция по интерфейсу для снятия омонимии"));
20 | }
21 |
22 | $smarty->display('static/doc/annotation.tpl');
23 | log_timing();
24 | ?>
25 |
--------------------------------------------------------------------------------
/templates/addtext.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 | {extends file='common.tpl'}
3 | {block name=content}
4 | Добавляем текст
5 |
17 | {/block}
18 |
--------------------------------------------------------------------------------
/assets/js/merge_fails.js:
--------------------------------------------------------------------------------
1 | $(document).ready(function() {
2 | $(".approve-sample").change(function(event) {
3 | var $c = $(event.target).closest('td');
4 | $.post("ajax/merge_fails.php", {
5 | act: "approve",
6 | value: $(this).is(":checked") ? 1 : 0,
7 | id: $(this).attr("data-id")
8 | },
9 | function() {
10 | $c.addClass('bggreen');
11 | });
12 | });
13 |
14 | $(".comment-cell").blur(function(event) {
15 | var $c = $(event.target).closest('td');
16 | $.post("ajax/merge_fails.php", {
17 | act: "comment",
18 | id: $(this).attr("data-id"),
19 | text: $(this).text()
20 | },
21 | function() {
22 | $c.addClass('bggreen');
23 | });
24 | });
25 | });
26 |
--------------------------------------------------------------------------------
/scripts/aot_import/lists/adjs_forms_del.txt:
--------------------------------------------------------------------------------
1 | #Удалить из парадигмы кратких прилагательных следующие формы
2 | вынуждена
3 | вынуждено
4 | вынуждены
5 | изощрена
6 | изощрено
7 | изощрены
8 | искажена
9 | искажено
10 | искажены
11 | обижена
12 | обижено
13 | обижены
14 | обнажена
15 | обнажено
16 | обнажены
17 | обречена
18 | обречено
19 | обречены
20 | огорчено
21 | огорчено
22 | огорчены
23 | обособлена
24 | обособлено
25 | обособлены
26 | одарена
27 | одарено
28 | одарены
29 | предана
30 | предано
31 | преданы
32 | связана
33 | смущена
34 | убеждена
35 | уверена
36 | углублена
37 | удалена
38 | удручена
39 | утомлена
40 | связано
41 | смущено
42 | убеждено
43 | уверено
44 | углублено
45 | удалено
46 | удручено
47 | утомлено
48 | связаны
49 | смущены
50 | убеждены
51 | уверены
52 | углублены
53 | удалены
54 | удручены
55 | утомлены
56 |
--------------------------------------------------------------------------------
/syntax.php:
--------------------------------------------------------------------------------
1 | assign('page', get_books_with_syntax());
25 | $smarty->display('syntax/main.tpl');
26 | }
27 | log_timing();
28 | ?>
29 |
--------------------------------------------------------------------------------
/export/annot/disamb_nonmod_tests/pool_158.tab:
--------------------------------------------------------------------------------
1 | 122834 332515 После подсчёта голосов 10 % избирательных участков , результаты выборов , в нижнюю палату парламента — Палату [[депутатов]] , выглядят так : NOUN & plur & gent NOUN & plur & gent NOUN & plur & accs
2 | 122769 124015 Также следует учесть , что в Австралии , с одной стороны , значительная часть аборигенов продолжала жить в таких же условиях , как жили их предки лет сто назад , с другой стороны , появилась небольшая городская прослойка [[аборигенов]] , проживавших в гетто и имевшие хорошее представление о том , что такое рок-музыка . NOUN & plur & gent NOUN & plur & gent NOUN & plur & gent
3 | 122850 473525 Рустэм [[Хамитов]] опроверг появившуюся в СМИ информацию о необычно высокой пенсии Муртазы Рахимова . Хамитов - ед.ч., именительный (quorax, 01.09.2012, 12:13); Other Other Other
4 |
--------------------------------------------------------------------------------
/postagging/brill/unsupervised/cpp/lemmatizer/Makefile:
--------------------------------------------------------------------------------
1 | CC=g++
2 | LD=g++
3 | INC_PATH=../include/
4 | LIB_PATH=../lib/
5 | CFLAGS=-c -O3 -Wall -std=c++0x -I$(INC_PATH) `pkg-config --cflags glibmm-2.4`
6 | LDFLAGS=`pkg-config --libs glibmm-2.4`
7 |
8 | all: lemmatizer
9 |
10 | lemmatizer: main.o corpora_io.o dict.o
11 | $(LD) -O3 main.o corpora_io.o dict.o $(LDFLAGS) -o lemmatizer
12 |
13 | main.o: main.cpp $(INC_PATH)tag.h $(INC_PATH)token.h $(INC_PATH)utils.h $(INC_PATH)sentence.h $(INC_PATH)corpora_io.h $(INC_PATH)dict.h
14 | $(CC) $(CFLAGS) main.cpp
15 |
16 | corpora_io.o: $(LIB_PATH)corpora_io.cpp $(INC_PATH)corpora_io.h $(INC_PATH)sentence.h $(INC_PATH)token.h $(INC_PATH)tag.h $(INC_PATH)utils.h
17 | $(CC) $(CFLAGS) $(LIB_PATH)corpora_io.cpp
18 |
19 | dict.o: $(LIB_PATH)dict.cpp $(INC_PATH)dict.h
20 | $(CC) $(CFLAGS) $(LIB_PATH)dict.cpp
21 |
--------------------------------------------------------------------------------
/templates/footer.tpl:
--------------------------------------------------------------------------------
1 | {* Smarty *}
2 |
13 |
--------------------------------------------------------------------------------
/export/pools.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 |
3 | use strict;
4 | use utf8;
5 | use DBI;
6 | use Config::INI::Reader;
7 |
8 | $ARGV[0] or die "Usage: $0 ";
9 | #reading config
10 | my $conf = Config::INI::Reader->read_file($ARGV[0]);
11 | $conf = $conf->{mysql};
12 |
13 | #main
14 | my $dbh = DBI->connect('DBI:mysql:'.$conf->{'dbname'}.':'.$conf->{'host'}, $conf->{'user'}, $conf->{'passwd'});
15 | if (!$dbh) {
16 | die $DBI::errstr;
17 | }
18 |
19 | my $scan = $dbh->prepare("
20 | SELECT pool_id, status, grammemes
21 | FROM morph_annot_pools p
22 | LEFT JOIN morph_annot_pool_types t
23 | ON (p.pool_type = t.type_id)
24 | ORDER BY pool_id
25 | ");
26 | $scan->execute();
27 | while (my $r = $scan->fetchrow_hashref()) {
28 | printf "%d\t%s\t%d\n",
29 | $r->{'pool_id'}, $r->{'grammemes'}, $r->{'status'};
30 | }
31 |
--------------------------------------------------------------------------------
/anaphora/export_pairs.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python2
2 | # -*- coding: utf-8 -*-
3 | import sys
4 | sys.path.append('/corpus/python')
5 | from Annotation import AnnotationEditor
6 |
7 | CONFIG_PATH = "/corpus/config.ini"
8 |
9 | def do_export(dbh):
10 | dbh.execute("""
11 | SELECT token_id, group_id, book_id
12 | FROM anaphora
13 | LEFT JOIN tokens ON (anaphora.token_id = tokens.tf_id)
14 | JOIN sentences USING (sent_id)
15 | JOIN paragraphs USING (par_id)
16 | ORDER BY book_id, group_id, token_id
17 | """)
18 |
19 | for row in dbh.fetchall():
20 | print("{2}\t{0}\t{1}".format(row['token_id'], row['group_id'], row['book_id']))
21 |
22 | def main():
23 | editor = AnnotationEditor(CONFIG_PATH)
24 | do_export(editor.db_cursor)
25 |
26 | if __name__ == "__main__":
27 | main()
28 |
--------------------------------------------------------------------------------
/migrations/20200625120954_long_good_sentences.php:
--------------------------------------------------------------------------------
1 | table('good_sentences');
11 | $gs->changeColumn('num_words', 'integer', ['signed' => false, 'limit' => MysqlAdapter::INT_SMALL]);
12 | $gs->changeColumn('num_homonymous', 'integer', ['signed' => false, 'limit' => MysqlAdapter::INT_SMALL]);
13 | }
14 |
15 | public function down()
16 | {
17 | $gs = $this->table('good_sentences');
18 | $gs->changeColumn('num_words', 'integer', ['signed' => false, 'limit' => MysqlAdapter::INT_TINY]);
19 | $gs->changeColumn('num_homonymous', 'integer', ['signed' => false, 'limit' => MysqlAdapter::INT_TINY]);
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/migrations/20151028150709_move_object_type_to2_level.php:
--------------------------------------------------------------------------------
1 | table("ne_objects")
12 | ->removeColumn("object_type_id")
13 | ->update();
14 |
15 | $this->table("ne_mentions")
16 | ->addColumn('object_type_id', 'integer')
17 | ->update();
18 | }
19 |
20 | /**
21 | * Migrate Down.
22 | */
23 | public function down() {
24 | $this->table("ne_mentions")
25 | ->removeColumn("object_type_id")
26 | ->update();
27 |
28 | $this->table("ne_objects")
29 | ->addColumn('object_type_id', 'integer')
30 | ->update();
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/postagging/brill/unsupervised/cpp/include/dict.h:
--------------------------------------------------------------------------------
1 | #include
2 | #include