├── .gitignore ├── README.md ├── compute-pre-train-distance.sh ├── data └── .gitignore ├── fix_special_symbols.py ├── pre-trained └── zhwiki-latest-pages-articles.0620 │ ├── chs.normalized.wordseg.w2v │ └── chs.normalized.wordseg.w2v.vocab ├── requirement.txt ├── t2s.json ├── tensorflow_word2vec ├── README.md ├── requirements.txt ├── result.txt ├── text8.zip └── word2vec_basic.py ├── tfidf_plain.py ├── tfidf_sklearn.py ├── word2vec_c_format_build_dict.sh ├── word2vec_c_format_train.sh ├── word2vec_gensim_similarity.py └── wordseg.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.swo 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/README.md -------------------------------------------------------------------------------- /compute-pre-train-distance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/compute-pre-train-distance.sh -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/data/.gitignore -------------------------------------------------------------------------------- /fix_special_symbols.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/fix_special_symbols.py -------------------------------------------------------------------------------- /pre-trained/zhwiki-latest-pages-articles.0620/chs.normalized.wordseg.w2v: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/pre-trained/zhwiki-latest-pages-articles.0620/chs.normalized.wordseg.w2v -------------------------------------------------------------------------------- /pre-trained/zhwiki-latest-pages-articles.0620/chs.normalized.wordseg.w2v.vocab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/pre-trained/zhwiki-latest-pages-articles.0620/chs.normalized.wordseg.w2v.vocab -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/requirement.txt -------------------------------------------------------------------------------- /t2s.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/t2s.json -------------------------------------------------------------------------------- /tensorflow_word2vec/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/tensorflow_word2vec/README.md -------------------------------------------------------------------------------- /tensorflow_word2vec/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/tensorflow_word2vec/requirements.txt -------------------------------------------------------------------------------- /tensorflow_word2vec/result.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/tensorflow_word2vec/result.txt -------------------------------------------------------------------------------- /tensorflow_word2vec/text8.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/tensorflow_word2vec/text8.zip -------------------------------------------------------------------------------- /tensorflow_word2vec/word2vec_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/tensorflow_word2vec/word2vec_basic.py -------------------------------------------------------------------------------- /tfidf_plain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/tfidf_plain.py -------------------------------------------------------------------------------- /tfidf_sklearn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/tfidf_sklearn.py -------------------------------------------------------------------------------- /word2vec_c_format_build_dict.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/word2vec_c_format_build_dict.sh -------------------------------------------------------------------------------- /word2vec_c_format_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/word2vec_c_format_train.sh -------------------------------------------------------------------------------- /word2vec_gensim_similarity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/word2vec_gensim_similarity.py -------------------------------------------------------------------------------- /wordseg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chatopera/wikidata-corpus/HEAD/wordseg.py --------------------------------------------------------------------------------