├── demo-word.sh
├── demo-classes.sh
├── demo-word-accuracy.sh
├── .gitignore
├── demo-analogy.sh
├── demo-phrases.sh
├── demo-phrase-accuracy.sh
├── Makefile
├── win32-port.h
├── README.txt
├── vs2017
    ├── word2vec.sln
    ├── distance.vcxproj
    ├── word2vec.vcxproj
    ├── word-analogy.vcxproj
    ├── word2phrase.vcxproj
    ├── compute-accuracy.vcxproj
    └── word2vec-doc2vec.vcxproj
├── vs2015
    ├── word2vec.sln
    ├── distance.vcxproj
    ├── word2vec.vcxproj
    ├── word2phrase.vcxproj
    ├── word-analogy.vcxproj
    ├── compute-accuracy.vcxproj
    └── word2vec-doc2vec.vcxproj
├── distance.c
├── word-analogy.c
├── demo-train-big-model-v1.sh
├── compute-accuracy.c
├── word2phrase.c
├── LICENSE.txt
├── word2vec-doc2vec.c
└── word2vec.c


/demo-word.sh:
--------------------------------------------------------------------------------
1 | make
2 | if [ ! -e text8 ]; then
3 |   wget http://mattmahoney.net/dc/text8.zip -O text8.gz
4 |   gzip -d text8.gz -f
5 | fi
6 | time ./word2vec -train text8 -output vectors.bin -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 20 -binary 1 -iter 15
7 | ./distance vectors.bin
8 | 


--------------------------------------------------------------------------------
/demo-classes.sh:
--------------------------------------------------------------------------------
1 | make
2 | if [ ! -e text8 ]; then
3 |   wget http://mattmahoney.net/dc/text8.zip -O text8.gz
4 |   gzip -d text8.gz -f
5 | fi
6 | time ./word2vec -train text8 -output classes.txt -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 20 -iter 15 -classes 500
7 | sort classes.txt -k 2 -n > classes.sorted.txt
8 | echo The word classes were saved to file classes.sorted.txt
9 | 


--------------------------------------------------------------------------------
/demo-word-accuracy.sh:
--------------------------------------------------------------------------------
1 | make
2 | if [ ! -e text8 ]; then
3 |   wget http://mattmahoney.net/dc/text8.zip -O text8.gz
4 |   gzip -d text8.gz -f
5 | fi
6 | time ./word2vec -train text8 -output vectors.bin -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 20 -binary 1 -iter 15
7 | ./compute-accuracy vectors.bin 30000 < questions-words.txt
8 | # to compute accuracy with the full vocabulary, use: ./compute-accuracy vectors.bin < questions-words.txt
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | 
 3 | # Compiled Object files
 4 | *.slo
 5 | *.lo
 6 | *.o
 7 | *.co
 8 | *.obj
 9 | *.iobj
10 | *.ipdb
11 | *.pdb
12 | 
13 | # Visual Studio
14 | *sdf
15 | *.suo
16 | *.ncb
17 | *.user
18 | Debug
19 | Release
20 | 
21 | *.exe
22 | text8
23 | text8-phrase
24 | vectors.bin
25 | vectors-phrase.bin
26 | classes.txt
27 | classes.sorted.txt
28 | 
29 | compute-accuracy
30 | distance
31 | word-analogy
32 | word2phrase
33 | word2vec
34 | word2vec-blas
35 | word2vec-doc2vec
36 | *.opendb
37 | 


--------------------------------------------------------------------------------
/demo-analogy.sh:
--------------------------------------------------------------------------------
 1 | make
 2 | if [ ! -e text8 ]; then
 3 |   wget http://mattmahoney.net/dc/text8.zip -O text8.gz
 4 |   gzip -d text8.gz -f
 5 | fi
 6 | echo ---------------------------------------------------------------------------------------------------
 7 | echo Note that for the word analogy to perform well, the model should be trained on much larger data set
 8 | echo Example input: paris france berlin
 9 | echo ---------------------------------------------------------------------------------------------------
10 | time ./word2vec -train text8 -output vectors.bin -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 20 -binary 1 -iter 15
11 | ./word-analogy vectors.bin
12 | 


--------------------------------------------------------------------------------
/demo-phrases.sh:
--------------------------------------------------------------------------------
 1 | make
 2 | if [ ! -e news.2012.en.shuffled ]; then
 3 |   wget http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2012.en.shuffled.gz
 4 |   gzip -d news.2012.en.shuffled.gz -f
 5 | fi
 6 | sed -e "s/’/'/g" -e "s/′/'/g" -e "s/''/ /g" < news.2012.en.shuffled | tr -c "A-Za-z'_ \n" " " > news.2012.en.shuffled-norm0
 7 | time ./word2phrase -train news.2012.en.shuffled-norm0 -output news.2012.en.shuffled-norm0-phrase0 -threshold 200 -debug 2
 8 | time ./word2phrase -train news.2012.en.shuffled-norm0-phrase0 -output news.2012.en.shuffled-norm0-phrase1 -threshold 100 -debug 2
 9 | tr A-Z a-z < news.2012.en.shuffled-norm0-phrase1 > news.2012.en.shuffled-norm1-phrase1
10 | time ./word2vec -train news.2012.en.shuffled-norm1-phrase1 -output vectors-phrase.bin -cbow 1 -size 200 -window 10 -negative 25 -hs 0 -sample 1e-5 -threads 20 -binary 1 -iter 15
11 | ./distance vectors-phrase.bin
12 | 


--------------------------------------------------------------------------------
/demo-phrase-accuracy.sh:
--------------------------------------------------------------------------------
 1 | make
 2 | if [ ! -e news.2012.en.shuffled ]; then
 3 |   wget http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2012.en.shuffled.gz
 4 |   gzip -d news.2012.en.shuffled.gz -f
 5 | fi
 6 | sed -e "s/’/'/g" -e "s/′/'/g" -e "s/''/ /g" < news.2012.en.shuffled | tr -c "A-Za-z'_ \n" " " > news.2012.en.shuffled-norm0
 7 | time ./word2phrase -train news.2012.en.shuffled-norm0 -output news.2012.en.shuffled-norm0-phrase0 -threshold 200 -debug 2
 8 | time ./word2phrase -train news.2012.en.shuffled-norm0-phrase0 -output news.2012.en.shuffled-norm0-phrase1 -threshold 100 -debug 2
 9 | tr A-Z a-z < news.2012.en.shuffled-norm0-phrase1 > news.2012.en.shuffled-norm1-phrase1
10 | time ./word2vec -train news.2012.en.shuffled-norm1-phrase1 -output vectors-phrase.bin -cbow 1 -size 200 -window 10 -negative 25 -hs 0 -sample 1e-5 -threads 20 -binary 1 -iter 15
11 | ./compute-accuracy vectors-phrase.bin < questions-phrases.txt
12 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CC = gcc
 2 | #Using -Ofast instead of -O3 might result in faster code, but is supported only by newer GCC versions
 3 | CFLAGS = -lm -O3 -march=native -mtune=native -Wall -funroll-loops -Wno-unused-result -DNDEBUG
 4 | 
 5 | all: word2vec word2vec-doc2vec word2phrase distance word-analogy compute-accuracy
 6 | 
 7 | word2vec : word2vec.c
 8 | 	$(CC) word2vec.c -o word2vec $(CFLAGS) -pthread
 9 | word2vec-doc2vec : word2vec-doc2vec.c
10 | 	$(CC) word2vec-doc2vec.c -o word2vec-doc2vec $(CFLAGS) -pthread
11 | word2phrase : word2phrase.c
12 | 	$(CC) word2phrase.c -o word2phrase $(CFLAGS)
13 | distance : distance.c
14 | 	$(CC) distance.c -o distance $(CFLAGS)
15 | word-analogy : word-analogy.c
16 | 	$(CC) word-analogy.c -o word-analogy $(CFLAGS)
17 | compute-accuracy : compute-accuracy.c
18 | 	$(CC) compute-accuracy.c -o compute-accuracy $(CFLAGS)
19 | word2vec-blas : word2vec.c
20 | 	$(CC) word2vec.c -o word2vec-blas $(CFLAGS) -pthread -DHAVE_CBLAS=1 -lopenblas
21 | 
22 | clean:
23 | 	rm -f word2vec word2vec-doc2vec word2vec-blas word2phrase distance word-analogy compute-accuracy
24 | 	rm -f word2vec.exe word2vec-doc2vec.exe word2vec-blas.exe word2phrase.exe distance.exe word-analogy.exe compute-accuracy.exe
25 | 


--------------------------------------------------------------------------------
/win32-port.h:
--------------------------------------------------------------------------------
 1 | #if !defined WIN32_LEAN_AND_MEAN
 2 | #define WIN32_LEAN_AND_MEAN
 3 | #endif
 4 | #include <Windows.h>
 5 | #include <process.h>
 6 | #include <assert.h>
 7 | 
 8 | typedef struct {
 9 |   void *(*pthread_routine)(void *);
10 |   void *pthread_arg;
11 |   HANDLE handle;
12 | } pthread_t;
13 | 
14 | static unsigned __stdcall win32_start_routine(void *arg) {
15 |   pthread_t *p = (pthread_t *)arg;
16 |   p->pthread_routine(p->pthread_arg);
17 |   return 0;
18 | }
19 | 
20 | static int pthread_create(pthread_t *id, void *attr,
21 |                           void *(*start_routine)(void *), void *arg) {
22 |   assert(attr == 0);
23 |   id->pthread_routine = start_routine;
24 |   id->pthread_arg = arg;
25 |   id->handle =
26 |       (HANDLE)_beginthreadex(0, 0, win32_start_routine, (void *)id, 0, 0);
27 |   if (id->handle != 0) return 0;
28 |   return -1;
29 | }
30 | 
31 | static int pthread_join(pthread_t thread, void **retval) {
32 |   WaitForSingleObject(thread.handle, INFINITE);
33 |   if (retval) {
34 |     *retval = 0;
35 |   }
36 |   return 0;
37 | }
38 | 
39 | static void pthread_exit(void *p) { _endthreadex(0); }
40 | 
41 | static int posix_memalign(void **memptr, size_t alignment, size_t size) {
42 |   assert(memptr);
43 |   *memptr = _aligned_malloc(size, alignment);
44 |   if (*memptr) {
45 |     return 0;
46 |   } else {
47 |     return -1;
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
 1 | Tools for computing distributed representtion of words
 2 | ------------------------------------------------------
 3 | 
 4 | We provide an implementation of the Continuous Bag-of-Words (CBOW) and the Skip-gram model (SG), as well as several demo scripts.
 5 | 
 6 | Given a text corpus, the word2vec tool learns a vector for every word in the vocabulary using the Continuous
 7 | Bag-of-Words or the Skip-Gram neural network architectures. The user should to specify the following:
 8 |  - desired vector dimensionality
 9 |  - the size of the context window for either the Skip-Gram or the Continuous Bag-of-Words model
10 |  - training algorithm: hierarchical softmax and / or negative sampling
11 |  - threshold for downsampling the frequent words 
12 |  - number of threads to use
13 |  - the format of the output word vector file (text or binary)
14 | 
15 | Usually, the other hyper-parameters such as the learning rate do not need to be tuned for different training sets. 
16 | 
17 | The script demo-word.sh downloads a small (100MB) text corpus from the web, and trains a small word vector model. After the training
18 | is finished, the user can interactively explore the similarity of the words.
19 | 
20 | More information about the scripts is provided at https://code.google.com/p/word2vec/
21 | 
22 | ------------------------------------------------------
23 | In order to get Python wrapper (https://github.com/danielfrg/word2vec) working 
24 | added word2vec-doc2vec tool support (https://github.com/nliu86/word2vec-doc2vec)
25 | 


--------------------------------------------------------------------------------
/vs2017/word2vec.sln:
--------------------------------------------------------------------------------
 1 | 
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 15
 4 | VisualStudioVersion = 15.0.26403.3
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "word2vec", "word2vec.vcxproj", "{3665E45D-8606-4F60-B864-2AD85FB18CA1}"
 7 | EndProject
 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "distance", "distance.vcxproj", "{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}"
 9 | EndProject
10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "word2phrase", "word2phrase.vcxproj", "{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}"
11 | EndProject
12 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "compute-accuracy", "compute-accuracy.vcxproj", "{78698725-BA8A-410B-9971-2BF28562B2D1}"
13 | EndProject
14 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "word-analogy", "word-analogy.vcxproj", "{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}"
15 | EndProject
16 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "word2vec-doc2vec", "word2vec-doc2vec.vcxproj", "{4192BAE0-FC98-4AE4-819A-65C0B896C38E}"
17 | EndProject
18 | Global
19 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
20 | 		Debug|x64 = Debug|x64
21 | 		Release|x64 = Release|x64
22 | 	EndGlobalSection
23 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
24 | 		{3665E45D-8606-4F60-B864-2AD85FB18CA1}.Debug|x64.ActiveCfg = Debug|x64
25 | 		{3665E45D-8606-4F60-B864-2AD85FB18CA1}.Debug|x64.Build.0 = Debug|x64
26 | 		{3665E45D-8606-4F60-B864-2AD85FB18CA1}.Release|x64.ActiveCfg = Release|x64
27 | 		{3665E45D-8606-4F60-B864-2AD85FB18CA1}.Release|x64.Build.0 = Release|x64
28 | 		{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}.Debug|x64.ActiveCfg = Debug|x64
29 | 		{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}.Debug|x64.Build.0 = Debug|x64
30 | 		{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}.Release|x64.ActiveCfg = Release|x64
31 | 		{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}.Release|x64.Build.0 = Release|x64
32 | 		{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}.Debug|x64.ActiveCfg = Debug|x64
33 | 		{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}.Debug|x64.Build.0 = Debug|x64
34 | 		{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}.Release|x64.ActiveCfg = Release|x64
35 | 		{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}.Release|x64.Build.0 = Release|x64
36 | 		{78698725-BA8A-410B-9971-2BF28562B2D1}.Debug|x64.ActiveCfg = Debug|x64
37 | 		{78698725-BA8A-410B-9971-2BF28562B2D1}.Debug|x64.Build.0 = Debug|x64
38 | 		{78698725-BA8A-410B-9971-2BF28562B2D1}.Release|x64.ActiveCfg = Release|x64
39 | 		{78698725-BA8A-410B-9971-2BF28562B2D1}.Release|x64.Build.0 = Release|x64
40 | 		{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}.Debug|x64.ActiveCfg = Debug|x64
41 | 		{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}.Debug|x64.Build.0 = Debug|x64
42 | 		{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}.Release|x64.ActiveCfg = Release|x64
43 | 		{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}.Release|x64.Build.0 = Release|x64
44 | 		{4192BAE0-FC98-4AE4-819A-65C0B896C38E}.Debug|x64.ActiveCfg = Debug|x64
45 | 		{4192BAE0-FC98-4AE4-819A-65C0B896C38E}.Debug|x64.Build.0 = Debug|x64
46 | 		{4192BAE0-FC98-4AE4-819A-65C0B896C38E}.Release|x64.ActiveCfg = Release|x64
47 | 		{4192BAE0-FC98-4AE4-819A-65C0B896C38E}.Release|x64.Build.0 = Release|x64
48 | 	EndGlobalSection
49 | 	GlobalSection(SolutionProperties) = preSolution
50 | 		HideSolutionNode = FALSE
51 | 	EndGlobalSection
52 | EndGlobal
53 | 


--------------------------------------------------------------------------------
/vs2015/word2vec.sln:
--------------------------------------------------------------------------------
 1 | 
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 14
 4 | VisualStudioVersion = 14.0.25420.1
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "word2vec", "word2vec.vcxproj", "{3665E45D-8606-4F60-B864-2AD85FB18CA1}"
 7 | EndProject
 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "distance", "distance.vcxproj", "{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}"
 9 | EndProject
10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "word2phrase", "word2phrase.vcxproj", "{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}"
11 | EndProject
12 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "compute-accuracy", "compute-accuracy.vcxproj", "{78698725-BA8A-410B-9971-2BF28562B2D1}"
13 | EndProject
14 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "word-analogy", "word-analogy.vcxproj", "{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}"
15 | EndProject
16 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "word2vec-doc2vec", "word2vec-doc2vec.vcxproj", "{4192BAE0-FC98-4AE4-819A-65C0B896C38E}"
17 | EndProject
18 | Global
19 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
20 | 		Debug|Win32 = Debug|Win32
21 | 		Debug|x64 = Debug|x64
22 | 		Release|Win32 = Release|Win32
23 | 		Release|x64 = Release|x64
24 | 	EndGlobalSection
25 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
26 | 		{3665E45D-8606-4F60-B864-2AD85FB18CA1}.Debug|Win32.ActiveCfg = Debug|Win32
27 | 		{3665E45D-8606-4F60-B864-2AD85FB18CA1}.Debug|Win32.Build.0 = Debug|Win32
28 | 		{3665E45D-8606-4F60-B864-2AD85FB18CA1}.Debug|x64.ActiveCfg = Debug|x64
29 | 		{3665E45D-8606-4F60-B864-2AD85FB18CA1}.Debug|x64.Build.0 = Debug|x64
30 | 		{3665E45D-8606-4F60-B864-2AD85FB18CA1}.Release|Win32.ActiveCfg = Release|Win32
31 | 		{3665E45D-8606-4F60-B864-2AD85FB18CA1}.Release|Win32.Build.0 = Release|Win32
32 | 		{3665E45D-8606-4F60-B864-2AD85FB18CA1}.Release|x64.ActiveCfg = Release|x64
33 | 		{3665E45D-8606-4F60-B864-2AD85FB18CA1}.Release|x64.Build.0 = Release|x64
34 | 		{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}.Debug|Win32.ActiveCfg = Debug|Win32
35 | 		{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}.Debug|Win32.Build.0 = Debug|Win32
36 | 		{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}.Debug|x64.ActiveCfg = Debug|x64
37 | 		{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}.Debug|x64.Build.0 = Debug|x64
38 | 		{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}.Release|Win32.ActiveCfg = Release|Win32
39 | 		{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}.Release|Win32.Build.0 = Release|Win32
40 | 		{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}.Release|x64.ActiveCfg = Release|x64
41 | 		{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}.Release|x64.Build.0 = Release|x64
42 | 		{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}.Debug|Win32.ActiveCfg = Debug|Win32
43 | 		{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}.Debug|Win32.Build.0 = Debug|Win32
44 | 		{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}.Debug|x64.ActiveCfg = Debug|x64
45 | 		{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}.Debug|x64.Build.0 = Debug|x64
46 | 		{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}.Release|Win32.ActiveCfg = Release|Win32
47 | 		{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}.Release|Win32.Build.0 = Release|Win32
48 | 		{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}.Release|x64.ActiveCfg = Release|x64
49 | 		{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}.Release|x64.Build.0 = Release|x64
50 | 		{78698725-BA8A-410B-9971-2BF28562B2D1}.Debug|Win32.ActiveCfg = Debug|Win32
51 | 		{78698725-BA8A-410B-9971-2BF28562B2D1}.Debug|Win32.Build.0 = Debug|Win32
52 | 		{78698725-BA8A-410B-9971-2BF28562B2D1}.Debug|x64.ActiveCfg = Debug|x64
53 | 		{78698725-BA8A-410B-9971-2BF28562B2D1}.Debug|x64.Build.0 = Debug|x64
54 | 		{78698725-BA8A-410B-9971-2BF28562B2D1}.Release|Win32.ActiveCfg = Release|Win32
55 | 		{78698725-BA8A-410B-9971-2BF28562B2D1}.Release|Win32.Build.0 = Release|Win32
56 | 		{78698725-BA8A-410B-9971-2BF28562B2D1}.Release|x64.ActiveCfg = Release|x64
57 | 		{78698725-BA8A-410B-9971-2BF28562B2D1}.Release|x64.Build.0 = Release|x64
58 | 		{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}.Debug|Win32.ActiveCfg = Debug|Win32
59 | 		{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}.Debug|Win32.Build.0 = Debug|Win32
60 | 		{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}.Debug|x64.ActiveCfg = Debug|x64
61 | 		{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}.Debug|x64.Build.0 = Debug|x64
62 | 		{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}.Release|Win32.ActiveCfg = Release|Win32
63 | 		{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}.Release|Win32.Build.0 = Release|Win32
64 | 		{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}.Release|x64.ActiveCfg = Release|x64
65 | 		{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}.Release|x64.Build.0 = Release|x64
66 | 		{4192BAE0-FC98-4AE4-819A-65C0B896C38E}.Debug|Win32.ActiveCfg = Debug|Win32
67 | 		{4192BAE0-FC98-4AE4-819A-65C0B896C38E}.Debug|Win32.Build.0 = Debug|Win32
68 | 		{4192BAE0-FC98-4AE4-819A-65C0B896C38E}.Debug|x64.ActiveCfg = Debug|x64
69 | 		{4192BAE0-FC98-4AE4-819A-65C0B896C38E}.Debug|x64.Build.0 = Debug|x64
70 | 		{4192BAE0-FC98-4AE4-819A-65C0B896C38E}.Release|Win32.ActiveCfg = Release|Win32
71 | 		{4192BAE0-FC98-4AE4-819A-65C0B896C38E}.Release|Win32.Build.0 = Release|Win32
72 | 		{4192BAE0-FC98-4AE4-819A-65C0B896C38E}.Release|x64.ActiveCfg = Release|x64
73 | 		{4192BAE0-FC98-4AE4-819A-65C0B896C38E}.Release|x64.Build.0 = Release|x64
74 | 	EndGlobalSection
75 | 	GlobalSection(SolutionProperties) = preSolution
76 | 		HideSolutionNode = FALSE
77 | 	EndGlobalSection
78 | EndGlobal
79 | 


--------------------------------------------------------------------------------
/distance.c:
--------------------------------------------------------------------------------
  1 | //  Copyright 2013 Google Inc. All Rights Reserved.
  2 | //
  3 | //  Licensed under the Apache License, Version 2.0 (the "License");
  4 | //  you may not use this file except in compliance with the License.
  5 | //  You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | //  Unless required by applicable law or agreed to in writing, software
 10 | //  distributed under the License is distributed on an "AS IS" BASIS,
 11 | //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | //  See the License for the specific language governing permissions and
 13 | //  limitations under the License.
 14 | 
 15 | #include <stdio.h>
 16 | #include <string.h>
 17 | #include <math.h>
 18 | #include <malloc.h>
 19 | 
 20 | #define max_size 2000  // max length of strings
 21 | #define N 40           // number of closest words that will be shown
 22 | #define max_w 50       // max length of vocabulary entries
 23 | 
 24 | int main(int argc, char **argv) {
 25 |   FILE *f;
 26 |   char st1[max_size];
 27 |   char *bestw[N];
 28 |   char file_name[max_size], st[100][max_size];
 29 |   float dist, len, bestd[N], vec[max_size];
 30 |   long long words, size, a, b, c, d, cn, bi[100];
 31 |   float *M;
 32 |   char *vocab;
 33 |   if (argc < 2) {
 34 |     printf(
 35 |         "Usage: ./distance <FILE>\nwhere FILE contains word projections in the "
 36 |         "BINARY FORMAT\n");
 37 |     return 0;
 38 |   }
 39 |   strcpy(file_name, argv[1]);
 40 |   f = fopen(file_name, "rb");
 41 |   if (f == NULL) {
 42 |     printf("Input file not found\n");
 43 |     return -1;
 44 |   }
 45 |   fscanf(f, "%lld", &words);
 46 |   fscanf(f, "%lld", &size);
 47 |   vocab = (char *)malloc((long long)words * max_w * sizeof(char));
 48 |   for (a = 0; a < N; a++) bestw[a] = (char *)malloc(max_size * sizeof(char));
 49 |   M = (float *)malloc((long long)words * (long long)size * sizeof(float));
 50 |   if (M == NULL) {
 51 |     printf("Cannot allocate memory: %lld MB    %lld  %lld\n",
 52 |            (long long)words * size * sizeof(float) / 1048576, words, size);
 53 |     return -1;
 54 |   }
 55 |   for (b = 0; b < words; b++) {
 56 |     a = 0;
 57 |     while (1) {
 58 |       vocab[b * max_w + a] = fgetc(f);
 59 |       if (feof(f) || (vocab[b * max_w + a] == ' ')) break;
 60 |       if ((a < max_w) && (vocab[b * max_w + a] != '\n')) a++;
 61 |     }
 62 |     vocab[b * max_w + a] = 0;
 63 |     for (a = 0; a < size; a++) fread(&M[a + b * size], sizeof(float), 1, f);
 64 |     len = 0;
 65 |     for (a = 0; a < size; a++) len += M[a + b * size] * M[a + b * size];
 66 |     len = sqrt(len);
 67 |     for (a = 0; a < size; a++) M[a + b * size] /= len;
 68 |   }
 69 |   fclose(f);
 70 |   while (1) {
 71 |     for (a = 0; a < N; a++) bestd[a] = 0;
 72 |     for (a = 0; a < N; a++) bestw[a][0] = 0;
 73 |     printf("Enter word or sentence (EXIT to break): ");
 74 |     a = 0;
 75 |     while (1) {
 76 |       st1[a] = fgetc(stdin);
 77 |       if ((st1[a] == '\n') || (a >= max_size - 1)) {
 78 |         st1[a] = 0;
 79 |         break;
 80 |       }
 81 |       a++;
 82 |     }
 83 |     if (!strcmp(st1, "EXIT")) break;
 84 |     cn = 0;
 85 |     b = 0;
 86 |     c = 0;
 87 |     while (1) {
 88 |       st[cn][b] = st1[c];
 89 |       b++;
 90 |       c++;
 91 |       st[cn][b] = 0;
 92 |       if (st1[c] == 0) break;
 93 |       if (st1[c] == ' ') {
 94 |         cn++;
 95 |         b = 0;
 96 |         c++;
 97 |       }
 98 |     }
 99 |     cn++;
100 |     for (a = 0; a < cn; a++) {
101 |       for (b = 0; b < words; b++)
102 |         if (!strcmp(&vocab[b * max_w], st[a])) break;
103 |       if (b == words) b = -1;
104 |       bi[a] = b;
105 |       printf("\nWord: %s  Position in vocabulary: %lld\n", st[a], bi[a]);
106 |       if (b == -1) {
107 |         printf("Out of dictionary word!\n");
108 |         break;
109 |       }
110 |     }
111 |     if (b == -1) continue;
112 |     printf(
113 |         "\n                                              Word       Cosine "
114 |         "distance\n------------------------------------------------------------"
115 |         "------------\n");
116 |     for (a = 0; a < size; a++) vec[a] = 0;
117 |     for (b = 0; b < cn; b++) {
118 |       if (bi[b] == -1) continue;
119 |       for (a = 0; a < size; a++) vec[a] += M[a + bi[b] * size];
120 |     }
121 |     len = 0;
122 |     for (a = 0; a < size; a++) len += vec[a] * vec[a];
123 |     len = sqrt(len);
124 |     for (a = 0; a < size; a++) vec[a] /= len;
125 |     for (a = 0; a < N; a++) bestd[a] = -1;
126 |     for (a = 0; a < N; a++) bestw[a][0] = 0;
127 |     for (c = 0; c < words; c++) {
128 |       a = 0;
129 |       for (b = 0; b < cn; b++)
130 |         if (bi[b] == c) a = 1;
131 |       if (a == 1) continue;
132 |       dist = 0;
133 |       for (a = 0; a < size; a++) dist += vec[a] * M[a + c * size];
134 |       for (a = 0; a < N; a++) {
135 |         if (dist > bestd[a]) {
136 |           for (d = N - 1; d > a; d--) {
137 |             bestd[d] = bestd[d - 1];
138 |             strcpy(bestw[d], bestw[d - 1]);
139 |           }
140 |           bestd[a] = dist;
141 |           strcpy(bestw[a], &vocab[c * max_w]);
142 |           break;
143 |         }
144 |       }
145 |     }
146 |     for (a = 0; a < N; a++) printf("%50s\t\t%f\n", bestw[a], bestd[a]);
147 |   }
148 |   return 0;
149 | }
150 | 


--------------------------------------------------------------------------------
/word-analogy.c:
--------------------------------------------------------------------------------
  1 | //  Copyright 2013 Google Inc. All Rights Reserved.
  2 | //
  3 | //  Licensed under the Apache License, Version 2.0 (the "License");
  4 | //  you may not use this file except in compliance with the License.
  5 | //  You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | //  Unless required by applicable law or agreed to in writing, software
 10 | //  distributed under the License is distributed on an "AS IS" BASIS,
 11 | //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | //  See the License for the specific language governing permissions and
 13 | //  limitations under the License.
 14 | 
 15 | #include <stdio.h>
 16 | #include <string.h>
 17 | #include <math.h>
 18 | #include <malloc.h>
 19 | 
 20 | #define max_size 2000  // max length of strings
 21 | #define N 40           // number of closest words that will be shown
 22 | #define max_w 50       // max length of vocabulary entries
 23 | 
 24 | int main(int argc, char **argv) {
 25 |   FILE *f;
 26 |   char st1[max_size];
 27 |   char bestw[N][max_size];
 28 |   char file_name[max_size], st[100][max_size];
 29 |   float dist, len, bestd[N], vec[max_size];
 30 |   long long words, size, a, b, c, d, cn, bi[100];
 31 |   float *M;
 32 |   char *vocab;
 33 |   if (argc < 2) {
 34 |     printf(
 35 |         "Usage: ./word-analogy <FILE>\nwhere FILE contains word projections in "
 36 |         "the BINARY FORMAT\n");
 37 |     return 0;
 38 |   }
 39 |   strcpy(file_name, argv[1]);
 40 |   f = fopen(file_name, "rb");
 41 |   if (f == NULL) {
 42 |     printf("Input file not found\n");
 43 |     return -1;
 44 |   }
 45 |   fscanf(f, "%lld", &words);
 46 |   fscanf(f, "%lld", &size);
 47 |   vocab = (char *)malloc((long long)words * max_w * sizeof(char));
 48 |   M = (float *)malloc((long long)words * (long long)size * sizeof(float));
 49 |   if (M == NULL) {
 50 |     printf("Cannot allocate memory: %lld MB    %lld  %lld\n",
 51 |            (long long)words * size * sizeof(float) / 1048576, words, size);
 52 |     return -1;
 53 |   }
 54 |   for (b = 0; b < words; b++) {
 55 |     a = 0;
 56 |     while (1) {
 57 |       vocab[b * max_w + a] = fgetc(f);
 58 |       if (feof(f) || (vocab[b * max_w + a] == ' ')) break;
 59 |       if ((a < max_w) && (vocab[b * max_w + a] != '\n')) a++;
 60 |     }
 61 |     vocab[b * max_w + a] = 0;
 62 |     for (a = 0; a < size; a++) fread(&M[a + b * size], sizeof(float), 1, f);
 63 |     len = 0;
 64 |     for (a = 0; a < size; a++) len += M[a + b * size] * M[a + b * size];
 65 |     len = sqrt(len);
 66 |     for (a = 0; a < size; a++) M[a + b * size] /= len;
 67 |   }
 68 |   fclose(f);
 69 |   while (1) {
 70 |     for (a = 0; a < N; a++) bestd[a] = 0;
 71 |     for (a = 0; a < N; a++) bestw[a][0] = 0;
 72 |     printf("Enter three words (EXIT to break): ");
 73 |     a = 0;
 74 |     while (1) {
 75 |       st1[a] = fgetc(stdin);
 76 |       if ((st1[a] == '\n') || (a >= max_size - 1)) {
 77 |         st1[a] = 0;
 78 |         break;
 79 |       }
 80 |       a++;
 81 |     }
 82 |     if (!strcmp(st1, "EXIT")) break;
 83 |     cn = 0;
 84 |     b = 0;
 85 |     c = 0;
 86 |     while (1) {
 87 |       st[cn][b] = st1[c];
 88 |       b++;
 89 |       c++;
 90 |       st[cn][b] = 0;
 91 |       if (st1[c] == 0) break;
 92 |       if (st1[c] == ' ') {
 93 |         cn++;
 94 |         b = 0;
 95 |         c++;
 96 |       }
 97 |     }
 98 |     cn++;
 99 |     if (cn < 3) {
100 |       printf(
101 |           "Only %lld words were entered.. three words are needed at the input "
102 |           "to perform the calculation\n",
103 |           cn);
104 |       continue;
105 |     }
106 |     for (a = 0; a < cn; a++) {
107 |       for (b = 0; b < words; b++)
108 |         if (!strcmp(&vocab[b * max_w], st[a])) break;
109 |       if (b == words) b = 0;
110 |       bi[a] = b;
111 |       printf("\nWord: %s  Position in vocabulary: %lld\n", st[a], bi[a]);
112 |       if (b == 0) {
113 |         printf("Out of dictionary word!\n");
114 |         break;
115 |       }
116 |     }
117 |     if (b == 0) continue;
118 |     printf(
119 |         "\n                                              Word              "
120 |         "Distance\n------------------------------------------------------------"
121 |         "------------\n");
122 |     for (a = 0; a < size; a++)
123 |       vec[a] = M[a + bi[1] * size] - M[a + bi[0] * size] + M[a + bi[2] * size];
124 |     len = 0;
125 |     for (a = 0; a < size; a++) len += vec[a] * vec[a];
126 |     len = sqrt(len);
127 |     for (a = 0; a < size; a++) vec[a] /= len;
128 |     for (a = 0; a < N; a++) bestd[a] = 0;
129 |     for (a = 0; a < N; a++) bestw[a][0] = 0;
130 |     for (c = 0; c < words; c++) {
131 |       if (c == bi[0]) continue;
132 |       if (c == bi[1]) continue;
133 |       if (c == bi[2]) continue;
134 |       a = 0;
135 |       for (b = 0; b < cn; b++)
136 |         if (bi[b] == c) a = 1;
137 |       if (a == 1) continue;
138 |       dist = 0;
139 |       for (a = 0; a < size; a++) dist += vec[a] * M[a + c * size];
140 |       for (a = 0; a < N; a++) {
141 |         if (dist > bestd[a]) {
142 |           for (d = N - 1; d > a; d--) {
143 |             bestd[d] = bestd[d - 1];
144 |             strcpy(bestw[d], bestw[d - 1]);
145 |           }
146 |           bestd[a] = dist;
147 |           strcpy(bestw[a], &vocab[c * max_w]);
148 |           break;
149 |         }
150 |       }
151 |     }
152 |     for (a = 0; a < N; a++) printf("%50s\t\t%f\n", bestw[a], bestd[a]);
153 |   }
154 |   return 0;
155 | }
156 | 


--------------------------------------------------------------------------------
/demo-train-big-model-v1.sh:
--------------------------------------------------------------------------------
  1 | ###############################################################################################
  2 | #
  3 | # Script for training good word and phrase vector model using public corpora, version 1.0.
  4 | # The training time will be from several hours to about a day.
  5 | #
  6 | # Downloads about 8 billion words, makes phrases using two runs of word2phrase, trains
  7 | # a 500-dimensional vector model and evaluates it on word and phrase analogy tasks.
  8 | #
  9 | ###############################################################################################
 10 | 
 11 | # This function will convert text to lowercase and remove special characters
 12 | normalize_text() {
 13 |   awk '{print tolower($0);}' | sed -e "s/’/'/g" -e "s/′/'/g" -e "s/''/ /g" -e "s/'/ ' /g" -e "s/“/\"/g" -e "s/”/\"/g" \
 14 |   -e 's/"/ " /g' -e 's/\./ \. /g' -e 's/<br \/>/ /g' -e 's/, / , /g' -e 's/(/ ( /g' -e 's/)/ ) /g' -e 's/\!/ \! /g' \
 15 |   -e 's/\?/ \? /g' -e 's/\;/ /g' -e 's/\:/ /g' -e 's/-/ - /g' -e 's/=/ /g' -e 's/=/ /g' -e 's/*/ /g' -e 's/|/ /g' \
 16 |   -e 's/«/ /g' | tr 0-9 " "
 17 | }
 18 | 
 19 | mkdir word2vec
 20 | cd word2vec
 21 | 
 22 | wget http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2012.en.shuffled.gz
 23 | wget http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2013.en.shuffled.gz
 24 | gzip -d news.2012.en.shuffled.gz
 25 | gzip -d news.2013.en.shuffled.gz
 26 | normalize_text < news.2012.en.shuffled > data.txt
 27 | normalize_text < news.2013.en.shuffled >> data.txt
 28 | 
 29 | wget http://www.statmt.org/lm-benchmark/1-billion-word-language-modeling-benchmark-r13output.tar.gz
 30 | tar -xvf 1-billion-word-language-modeling-benchmark-r13output.tar.gz
 31 | for i in `ls 1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled`; do
 32 |   normalize_text < 1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/$i >> data.txt
 33 | done
 34 | 
 35 | wget http://ebiquity.umbc.edu/redirect/to/resource/id/351/UMBC-webbase-corpus
 36 | tar -zxvf umbc_webbase_corpus.tar.gz webbase_all/*.txt
 37 | for i in `ls webbase_all`; do
 38 |   normalize_text < webbase_all/$i >> data.txt
 39 | done
 40 | 
 41 | wget http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2
 42 | bzip2 -c -d enwiki-latest-pages-articles.xml.bz2 | awk '{print tolower($0);}' | perl -e '
 43 | # Program to filter Wikipedia XML dumps to "clean" text consisting only of lowercase
 44 | # letters (a-z, converted from A-Z), and spaces (never consecutive)...
 45 | # All other characters are converted to spaces.  Only text which normally appears.
 46 | # in the web browser is displayed.  Tables are removed.  Image captions are.
 47 | # preserved.  Links are converted to normal text.  Digits are spelled out.
 48 | # *** Modified to not spell digits or throw away non-ASCII characters ***
 49 | 
 50 | # Written by Matt Mahoney, June 10, 2006.  This program is released to the public domain.
 51 | 
 52 | $/=">";                     # input record separator
 53 | while (<>) {
 54 |   if (/<text /) {$text=1;}  # remove all but between <text> ... </text>
 55 |   if (/#redirect/i) {$text=0;}  # remove #REDIRECT
 56 |   if ($text) {
 57 | 
 58 |     # Remove any text not normally visible
 59 |     if (/<\/text>/) {$text=0;}
 60 |     s/<.*>//;               # remove xml tags
 61 |     s/&amp;/&/g;            # decode URL encoded chars
 62 |     s/&lt;/</g;
 63 |     s/&gt;/>/g;
 64 |     s/<ref[^<]*<\/ref>//g;  # remove references <ref...> ... </ref>
 65 |     s/<[^>]*>//g;           # remove xhtml tags
 66 |     s/\[http:[^] ]*/[/g;    # remove normal url, preserve visible text
 67 |     s/\|thumb//ig;          # remove images links, preserve caption
 68 |     s/\|left//ig;
 69 |     s/\|right//ig;
 70 |     s/\|\d+px//ig;
 71 |     s/\[\[image:[^\[\]]*\|//ig;
 72 |     s/\[\[category:([^|\]]*)[^]]*\]\]/[[$1]]/ig;  # show categories without markup
 73 |     s/\[\[[a-z\-]*:[^\]]*\]\]//g;  # remove links to other languages
 74 |     s/\[\[[^\|\]]*\|/[[/g;  # remove wiki url, preserve visible text
 75 |     s/{{[^}]*}}//g;         # remove {{icons}} and {tables}
 76 |     s/{[^}]*}//g;
 77 |     s/\[//g;                # remove [ and ]
 78 |     s/\]//g;
 79 |     s/&[^;]*;/ /g;          # remove URL encoded chars
 80 | 
 81 |     $_=" $_ ";
 82 |     chop;
 83 |     print $_;
 84 |   }
 85 | }
 86 | ' | normalize_text | awk '{if (NF>1) print;}' >> data.txt
 87 | 
 88 | wget http://word2vec.googlecode.com/svn/trunk/word2vec.c
 89 | wget http://word2vec.googlecode.com/svn/trunk/word2phrase.c
 90 | wget http://word2vec.googlecode.com/svn/trunk/compute-accuracy.c
 91 | wget http://word2vec.googlecode.com/svn/trunk/questions-words.txt
 92 | wget http://word2vec.googlecode.com/svn/trunk/questions-phrases.txt
 93 | gcc word2vec.c -o word2vec -lm -pthread -O3 -march=native -funroll-loops
 94 | gcc word2phrase.c -o word2phrase -lm -pthread -O3 -march=native -funroll-loops
 95 | gcc compute-accuracy.c -o compute-accuracy -lm -pthread -O3 -march=native -funroll-loops
 96 | ./word2phrase -train data.txt -output data-phrase.txt -threshold 200 -debug 2
 97 | ./word2phrase -train data-phrase.txt -output data-phrase2.txt -threshold 100 -debug 2
 98 | ./word2vec -train data-phrase2.txt -output vectors.bin -cbow 1 -size 500 -window 10 -negative 10 -hs 0 -sample 1e-5 -threads 40 -binary 1 -iter 3 -min-count 10
 99 | ./compute-accuracy vectors.bin 400000 < questions-words.txt     # should get to almost 78% accuracy on 99.7% of questions
100 | ./compute-accuracy vectors.bin 1000000 < questions-phrases.txt  # about 78% accuracy with 77% coverage
101 | 


--------------------------------------------------------------------------------
/vs2017/distance.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|x64">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>x64</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|x64">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |   </ItemGroup>
 13 |   <ItemGroup>
 14 |     <ClCompile Include="..\distance.c" />
 15 |   </ItemGroup>
 16 |   <PropertyGroup Label="Globals">
 17 |     <ProjectGuid>{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}</ProjectGuid>
 18 |     <Keyword>Win32Proj</Keyword>
 19 |     <RootNamespace>test</RootNamespace>
 20 |     <ProjectName>distance</ProjectName>
 21 |     <WindowsTargetPlatformVersion>10.0.14393.0</WindowsTargetPlatformVersion>
 22 |   </PropertyGroup>
 23 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 24 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 25 |     <ConfigurationType>Application</ConfigurationType>
 26 |     <UseDebugLibraries>true</UseDebugLibraries>
 27 |     <PlatformToolset>v141</PlatformToolset>
 28 |     <CharacterSet>NotSet</CharacterSet>
 29 |   </PropertyGroup>
 30 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 31 |     <ConfigurationType>Application</ConfigurationType>
 32 |     <UseDebugLibraries>false</UseDebugLibraries>
 33 |     <PlatformToolset>v141</PlatformToolset>
 34 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 35 |     <CharacterSet>NotSet</CharacterSet>
 36 |   </PropertyGroup>
 37 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 38 |   <ImportGroup Label="ExtensionSettings">
 39 |   </ImportGroup>
 40 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 41 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 42 |   </ImportGroup>
 43 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 44 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 45 |   </ImportGroup>
 46 |   <PropertyGroup Label="UserMacros" />
 47 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 48 |     <LinkIncremental>false</LinkIncremental>
 49 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 50 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 51 |   </PropertyGroup>
 52 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 53 |     <LinkIncremental>false</LinkIncremental>
 54 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 55 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 56 |   </PropertyGroup>
 57 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 58 |     <ClCompile>
 59 |       <PrecompiledHeader>
 60 |       </PrecompiledHeader>
 61 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 62 |       <Optimization>Disabled</Optimization>
 63 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 64 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
 65 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
 66 |       <PrecompiledHeaderFile>
 67 |       </PrecompiledHeaderFile>
 68 |       <PrecompiledHeaderOutputFile>
 69 |       </PrecompiledHeaderOutputFile>
 70 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 71 |       <CompileAs>Default</CompileAs>
 72 |       <DisableSpecificWarnings>
 73 |       </DisableSpecificWarnings>
 74 |     </ClCompile>
 75 |     <Link>
 76 |       <SubSystem>Console</SubSystem>
 77 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 78 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
 79 |     </Link>
 80 |   </ItemDefinitionGroup>
 81 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 82 |     <ClCompile>
 83 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 84 |       <PrecompiledHeader>
 85 |       </PrecompiledHeader>
 86 |       <Optimization>MaxSpeed</Optimization>
 87 |       <FunctionLevelLinking>true</FunctionLevelLinking>
 88 |       <IntrinsicFunctions>true</IntrinsicFunctions>
 89 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 90 |       <DisableSpecificWarnings>
 91 |       </DisableSpecificWarnings>
 92 |       <CompileAs>Default</CompileAs>
 93 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 94 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
 95 |     </ClCompile>
 96 |     <Link>
 97 |       <SubSystem>Console</SubSystem>
 98 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 99 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
100 |       <OptimizeReferences>true</OptimizeReferences>
101 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
102 |       <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
103 |     </Link>
104 |   </ItemDefinitionGroup>
105 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
106 |   <ImportGroup Label="ExtensionTargets">
107 |   </ImportGroup>
108 | </Project>


--------------------------------------------------------------------------------
/vs2017/word2vec.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|x64">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>x64</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|x64">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |   </ItemGroup>
 13 |   <ItemGroup>
 14 |     <ClCompile Include="..\word2vec.c" />
 15 |   </ItemGroup>
 16 |   <PropertyGroup Label="Globals">
 17 |     <ProjectGuid>{3665E45D-8606-4F60-B864-2AD85FB18CA1}</ProjectGuid>
 18 |     <Keyword>Win32Proj</Keyword>
 19 |     <RootNamespace>test</RootNamespace>
 20 |     <ProjectName>word2vec</ProjectName>
 21 |     <WindowsTargetPlatformVersion>10.0.14393.0</WindowsTargetPlatformVersion>
 22 |   </PropertyGroup>
 23 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 24 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 25 |     <ConfigurationType>Application</ConfigurationType>
 26 |     <UseDebugLibraries>true</UseDebugLibraries>
 27 |     <PlatformToolset>v141</PlatformToolset>
 28 |     <CharacterSet>NotSet</CharacterSet>
 29 |   </PropertyGroup>
 30 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 31 |     <ConfigurationType>Application</ConfigurationType>
 32 |     <UseDebugLibraries>false</UseDebugLibraries>
 33 |     <PlatformToolset>v141</PlatformToolset>
 34 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 35 |     <CharacterSet>NotSet</CharacterSet>
 36 |   </PropertyGroup>
 37 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 38 |   <ImportGroup Label="ExtensionSettings">
 39 |   </ImportGroup>
 40 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 41 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 42 |   </ImportGroup>
 43 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 44 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 45 |   </ImportGroup>
 46 |   <PropertyGroup Label="UserMacros" />
 47 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 48 |     <LinkIncremental>false</LinkIncremental>
 49 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 50 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 51 |   </PropertyGroup>
 52 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 53 |     <LinkIncremental>false</LinkIncremental>
 54 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 55 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 56 |   </PropertyGroup>
 57 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 58 |     <ClCompile>
 59 |       <PrecompiledHeader>
 60 |       </PrecompiledHeader>
 61 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 62 |       <Optimization>Disabled</Optimization>
 63 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 64 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
 65 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
 66 |       <PrecompiledHeaderFile>
 67 |       </PrecompiledHeaderFile>
 68 |       <PrecompiledHeaderOutputFile>
 69 |       </PrecompiledHeaderOutputFile>
 70 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 71 |       <CompileAs>Default</CompileAs>
 72 |       <DisableSpecificWarnings>
 73 |       </DisableSpecificWarnings>
 74 |     </ClCompile>
 75 |     <Link>
 76 |       <SubSystem>Console</SubSystem>
 77 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 78 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
 79 |     </Link>
 80 |   </ItemDefinitionGroup>
 81 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 82 |     <ClCompile>
 83 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 84 |       <PrecompiledHeader>
 85 |       </PrecompiledHeader>
 86 |       <Optimization>MaxSpeed</Optimization>
 87 |       <FunctionLevelLinking>true</FunctionLevelLinking>
 88 |       <IntrinsicFunctions>true</IntrinsicFunctions>
 89 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 90 |       <DisableSpecificWarnings>
 91 |       </DisableSpecificWarnings>
 92 |       <CompileAs>Default</CompileAs>
 93 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 94 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
 95 |     </ClCompile>
 96 |     <Link>
 97 |       <SubSystem>Console</SubSystem>
 98 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 99 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
100 |       <OptimizeReferences>true</OptimizeReferences>
101 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
102 |       <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
103 |     </Link>
104 |   </ItemDefinitionGroup>
105 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
106 |   <ImportGroup Label="ExtensionTargets">
107 |   </ImportGroup>
108 | </Project>


--------------------------------------------------------------------------------
/vs2017/word-analogy.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|x64">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>x64</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|x64">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |   </ItemGroup>
 13 |   <ItemGroup>
 14 |     <ClCompile Include="..\word-analogy.c" />
 15 |   </ItemGroup>
 16 |   <PropertyGroup Label="Globals">
 17 |     <ProjectGuid>{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}</ProjectGuid>
 18 |     <Keyword>Win32Proj</Keyword>
 19 |     <RootNamespace>test</RootNamespace>
 20 |     <ProjectName>word-analogy</ProjectName>
 21 |     <WindowsTargetPlatformVersion>10.0.14393.0</WindowsTargetPlatformVersion>
 22 |   </PropertyGroup>
 23 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 24 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 25 |     <ConfigurationType>Application</ConfigurationType>
 26 |     <UseDebugLibraries>true</UseDebugLibraries>
 27 |     <PlatformToolset>v141</PlatformToolset>
 28 |     <CharacterSet>NotSet</CharacterSet>
 29 |   </PropertyGroup>
 30 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 31 |     <ConfigurationType>Application</ConfigurationType>
 32 |     <UseDebugLibraries>false</UseDebugLibraries>
 33 |     <PlatformToolset>v141</PlatformToolset>
 34 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 35 |     <CharacterSet>NotSet</CharacterSet>
 36 |   </PropertyGroup>
 37 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 38 |   <ImportGroup Label="ExtensionSettings">
 39 |   </ImportGroup>
 40 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 41 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 42 |   </ImportGroup>
 43 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 44 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 45 |   </ImportGroup>
 46 |   <PropertyGroup Label="UserMacros" />
 47 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 48 |     <LinkIncremental>false</LinkIncremental>
 49 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 50 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 51 |   </PropertyGroup>
 52 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 53 |     <LinkIncremental>false</LinkIncremental>
 54 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 55 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 56 |   </PropertyGroup>
 57 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 58 |     <ClCompile>
 59 |       <PrecompiledHeader>
 60 |       </PrecompiledHeader>
 61 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 62 |       <Optimization>Disabled</Optimization>
 63 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 64 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
 65 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
 66 |       <PrecompiledHeaderFile>
 67 |       </PrecompiledHeaderFile>
 68 |       <PrecompiledHeaderOutputFile>
 69 |       </PrecompiledHeaderOutputFile>
 70 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 71 |       <CompileAs>Default</CompileAs>
 72 |       <DisableSpecificWarnings>
 73 |       </DisableSpecificWarnings>
 74 |     </ClCompile>
 75 |     <Link>
 76 |       <SubSystem>Console</SubSystem>
 77 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 78 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
 79 |     </Link>
 80 |   </ItemDefinitionGroup>
 81 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 82 |     <ClCompile>
 83 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 84 |       <PrecompiledHeader>
 85 |       </PrecompiledHeader>
 86 |       <Optimization>MaxSpeed</Optimization>
 87 |       <FunctionLevelLinking>true</FunctionLevelLinking>
 88 |       <IntrinsicFunctions>true</IntrinsicFunctions>
 89 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 90 |       <DisableSpecificWarnings>
 91 |       </DisableSpecificWarnings>
 92 |       <CompileAs>Default</CompileAs>
 93 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 94 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
 95 |     </ClCompile>
 96 |     <Link>
 97 |       <SubSystem>Console</SubSystem>
 98 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 99 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
100 |       <OptimizeReferences>true</OptimizeReferences>
101 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
102 |       <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
103 |     </Link>
104 |   </ItemDefinitionGroup>
105 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
106 |   <ImportGroup Label="ExtensionTargets">
107 |   </ImportGroup>
108 | </Project>


--------------------------------------------------------------------------------
/vs2017/word2phrase.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|x64">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>x64</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|x64">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |   </ItemGroup>
 13 |   <ItemGroup>
 14 |     <ClCompile Include="..\word2phrase.c" />
 15 |   </ItemGroup>
 16 |   <PropertyGroup Label="Globals">
 17 |     <ProjectGuid>{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}</ProjectGuid>
 18 |     <Keyword>Win32Proj</Keyword>
 19 |     <RootNamespace>test</RootNamespace>
 20 |     <ProjectName>word2phrase</ProjectName>
 21 |     <WindowsTargetPlatformVersion>10.0.14393.0</WindowsTargetPlatformVersion>
 22 |   </PropertyGroup>
 23 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 24 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 25 |     <ConfigurationType>Application</ConfigurationType>
 26 |     <UseDebugLibraries>true</UseDebugLibraries>
 27 |     <PlatformToolset>v141</PlatformToolset>
 28 |     <CharacterSet>NotSet</CharacterSet>
 29 |   </PropertyGroup>
 30 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 31 |     <ConfigurationType>Application</ConfigurationType>
 32 |     <UseDebugLibraries>false</UseDebugLibraries>
 33 |     <PlatformToolset>v141</PlatformToolset>
 34 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 35 |     <CharacterSet>NotSet</CharacterSet>
 36 |   </PropertyGroup>
 37 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 38 |   <ImportGroup Label="ExtensionSettings">
 39 |   </ImportGroup>
 40 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 41 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 42 |   </ImportGroup>
 43 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 44 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 45 |   </ImportGroup>
 46 |   <PropertyGroup Label="UserMacros" />
 47 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 48 |     <LinkIncremental>false</LinkIncremental>
 49 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 50 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 51 |   </PropertyGroup>
 52 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 53 |     <LinkIncremental>false</LinkIncremental>
 54 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 55 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 56 |   </PropertyGroup>
 57 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 58 |     <ClCompile>
 59 |       <PrecompiledHeader>
 60 |       </PrecompiledHeader>
 61 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 62 |       <Optimization>Disabled</Optimization>
 63 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 64 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
 65 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
 66 |       <PrecompiledHeaderFile>
 67 |       </PrecompiledHeaderFile>
 68 |       <PrecompiledHeaderOutputFile>
 69 |       </PrecompiledHeaderOutputFile>
 70 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 71 |       <CompileAs>Default</CompileAs>
 72 |       <DisableSpecificWarnings>
 73 |       </DisableSpecificWarnings>
 74 |     </ClCompile>
 75 |     <Link>
 76 |       <SubSystem>Console</SubSystem>
 77 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 78 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
 79 |     </Link>
 80 |   </ItemDefinitionGroup>
 81 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 82 |     <ClCompile>
 83 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 84 |       <PrecompiledHeader>
 85 |       </PrecompiledHeader>
 86 |       <Optimization>MaxSpeed</Optimization>
 87 |       <FunctionLevelLinking>true</FunctionLevelLinking>
 88 |       <IntrinsicFunctions>true</IntrinsicFunctions>
 89 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 90 |       <DisableSpecificWarnings>
 91 |       </DisableSpecificWarnings>
 92 |       <CompileAs>Default</CompileAs>
 93 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 94 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
 95 |     </ClCompile>
 96 |     <Link>
 97 |       <SubSystem>Console</SubSystem>
 98 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 99 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
100 |       <OptimizeReferences>true</OptimizeReferences>
101 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
102 |       <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
103 |     </Link>
104 |   </ItemDefinitionGroup>
105 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
106 |   <ImportGroup Label="ExtensionTargets">
107 |   </ImportGroup>
108 | </Project>


--------------------------------------------------------------------------------
/vs2017/compute-accuracy.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|x64">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>x64</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|x64">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |   </ItemGroup>
 13 |   <ItemGroup>
 14 |     <ClCompile Include="..\word-analogy.c" />
 15 |   </ItemGroup>
 16 |   <PropertyGroup Label="Globals">
 17 |     <ProjectGuid>{78698725-BA8A-410B-9971-2BF28562B2D1}</ProjectGuid>
 18 |     <Keyword>Win32Proj</Keyword>
 19 |     <RootNamespace>test</RootNamespace>
 20 |     <ProjectName>compute-accuracy</ProjectName>
 21 |     <WindowsTargetPlatformVersion>10.0.14393.0</WindowsTargetPlatformVersion>
 22 |   </PropertyGroup>
 23 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 24 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 25 |     <ConfigurationType>Application</ConfigurationType>
 26 |     <UseDebugLibraries>true</UseDebugLibraries>
 27 |     <PlatformToolset>v141</PlatformToolset>
 28 |     <CharacterSet>NotSet</CharacterSet>
 29 |   </PropertyGroup>
 30 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 31 |     <ConfigurationType>Application</ConfigurationType>
 32 |     <UseDebugLibraries>false</UseDebugLibraries>
 33 |     <PlatformToolset>v141</PlatformToolset>
 34 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 35 |     <CharacterSet>NotSet</CharacterSet>
 36 |   </PropertyGroup>
 37 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 38 |   <ImportGroup Label="ExtensionSettings">
 39 |   </ImportGroup>
 40 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 41 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 42 |   </ImportGroup>
 43 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 44 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 45 |   </ImportGroup>
 46 |   <PropertyGroup Label="UserMacros" />
 47 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 48 |     <LinkIncremental>false</LinkIncremental>
 49 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 50 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 51 |   </PropertyGroup>
 52 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 53 |     <LinkIncremental>false</LinkIncremental>
 54 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 55 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 56 |   </PropertyGroup>
 57 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 58 |     <ClCompile>
 59 |       <PrecompiledHeader>
 60 |       </PrecompiledHeader>
 61 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 62 |       <Optimization>Disabled</Optimization>
 63 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 64 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
 65 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
 66 |       <PrecompiledHeaderFile>
 67 |       </PrecompiledHeaderFile>
 68 |       <PrecompiledHeaderOutputFile>
 69 |       </PrecompiledHeaderOutputFile>
 70 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 71 |       <CompileAs>Default</CompileAs>
 72 |       <DisableSpecificWarnings>
 73 |       </DisableSpecificWarnings>
 74 |     </ClCompile>
 75 |     <Link>
 76 |       <SubSystem>Console</SubSystem>
 77 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 78 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
 79 |     </Link>
 80 |   </ItemDefinitionGroup>
 81 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 82 |     <ClCompile>
 83 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 84 |       <PrecompiledHeader>
 85 |       </PrecompiledHeader>
 86 |       <Optimization>MaxSpeed</Optimization>
 87 |       <FunctionLevelLinking>true</FunctionLevelLinking>
 88 |       <IntrinsicFunctions>true</IntrinsicFunctions>
 89 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 90 |       <DisableSpecificWarnings>
 91 |       </DisableSpecificWarnings>
 92 |       <CompileAs>Default</CompileAs>
 93 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 94 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
 95 |     </ClCompile>
 96 |     <Link>
 97 |       <SubSystem>Console</SubSystem>
 98 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 99 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
100 |       <OptimizeReferences>true</OptimizeReferences>
101 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
102 |       <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
103 |     </Link>
104 |   </ItemDefinitionGroup>
105 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
106 |   <ImportGroup Label="ExtensionTargets">
107 |   </ImportGroup>
108 | </Project>


--------------------------------------------------------------------------------
/vs2017/word2vec-doc2vec.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|x64">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>x64</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|x64">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |   </ItemGroup>
 13 |   <ItemGroup>
 14 |     <ClCompile Include="..\word2vec-doc2vec.c" />
 15 |   </ItemGroup>
 16 |   <PropertyGroup Label="Globals">
 17 |     <ProjectGuid>{4192BAE0-FC98-4AE4-819A-65C0B896C38E}</ProjectGuid>
 18 |     <Keyword>Win32Proj</Keyword>
 19 |     <RootNamespace>test</RootNamespace>
 20 |     <ProjectName>word2vec-doc2vec</ProjectName>
 21 |     <WindowsTargetPlatformVersion>10.0.14393.0</WindowsTargetPlatformVersion>
 22 |   </PropertyGroup>
 23 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 24 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 25 |     <ConfigurationType>Application</ConfigurationType>
 26 |     <UseDebugLibraries>true</UseDebugLibraries>
 27 |     <PlatformToolset>v141</PlatformToolset>
 28 |     <CharacterSet>NotSet</CharacterSet>
 29 |   </PropertyGroup>
 30 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 31 |     <ConfigurationType>Application</ConfigurationType>
 32 |     <UseDebugLibraries>false</UseDebugLibraries>
 33 |     <PlatformToolset>v141</PlatformToolset>
 34 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 35 |     <CharacterSet>NotSet</CharacterSet>
 36 |   </PropertyGroup>
 37 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 38 |   <ImportGroup Label="ExtensionSettings">
 39 |   </ImportGroup>
 40 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 41 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 42 |   </ImportGroup>
 43 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 44 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 45 |   </ImportGroup>
 46 |   <PropertyGroup Label="UserMacros" />
 47 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 48 |     <LinkIncremental>false</LinkIncremental>
 49 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 50 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 51 |   </PropertyGroup>
 52 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 53 |     <LinkIncremental>false</LinkIncremental>
 54 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 55 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 56 |   </PropertyGroup>
 57 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 58 |     <ClCompile>
 59 |       <PrecompiledHeader>
 60 |       </PrecompiledHeader>
 61 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 62 |       <Optimization>Disabled</Optimization>
 63 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 64 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
 65 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
 66 |       <PrecompiledHeaderFile>
 67 |       </PrecompiledHeaderFile>
 68 |       <PrecompiledHeaderOutputFile>
 69 |       </PrecompiledHeaderOutputFile>
 70 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 71 |       <CompileAs>Default</CompileAs>
 72 |       <DisableSpecificWarnings>
 73 |       </DisableSpecificWarnings>
 74 |     </ClCompile>
 75 |     <Link>
 76 |       <SubSystem>Console</SubSystem>
 77 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 78 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
 79 |     </Link>
 80 |   </ItemDefinitionGroup>
 81 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 82 |     <ClCompile>
 83 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 84 |       <PrecompiledHeader>
 85 |       </PrecompiledHeader>
 86 |       <Optimization>MaxSpeed</Optimization>
 87 |       <FunctionLevelLinking>true</FunctionLevelLinking>
 88 |       <IntrinsicFunctions>true</IntrinsicFunctions>
 89 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 90 |       <DisableSpecificWarnings>
 91 |       </DisableSpecificWarnings>
 92 |       <CompileAs>Default</CompileAs>
 93 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 94 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
 95 |     </ClCompile>
 96 |     <Link>
 97 |       <SubSystem>Console</SubSystem>
 98 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 99 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
100 |       <OptimizeReferences>true</OptimizeReferences>
101 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
102 |       <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
103 |     </Link>
104 |   </ItemDefinitionGroup>
105 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
106 |   <ImportGroup Label="ExtensionTargets">
107 |   </ImportGroup>
108 | </Project>


--------------------------------------------------------------------------------
/compute-accuracy.c:
--------------------------------------------------------------------------------
  1 | //  Copyright 2013 Google Inc. All Rights Reserved.
  2 | //
  3 | //  Licensed under the Apache License, Version 2.0 (the "License");
  4 | //  you may not use this file except in compliance with the License.
  5 | //  You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | //  Unless required by applicable law or agreed to in writing, software
 10 | //  distributed under the License is distributed on an "AS IS" BASIS,
 11 | //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | //  See the License for the specific language governing permissions and
 13 | //  limitations under the License.
 14 | 
 15 | #include <stdio.h>
 16 | #include <stdlib.h>
 17 | #include <string.h>
 18 | #include <math.h>
 19 | #include <malloc.h>
 20 | #include <ctype.h>
 21 | 
 22 | const long long max_size = 2000;  // max length of strings
 23 | const long long N = 1;            // number of closest words
 24 | const long long max_w = 50;       // max length of vocabulary entries
 25 | 
 26 | int main(int argc, char **argv) {
 27 |   FILE *f;
 28 |   char st1[max_size], st2[max_size], st3[max_size], st4[max_size],
 29 |       bestw[N][max_size], file_name[max_size];
 30 |   float dist, len, bestd[N], vec[max_size];
 31 |   long long words, size, a, b, c, d, b1, b2, b3, threshold = 0;
 32 |   float *M;
 33 |   char *vocab;
 34 |   int TCN, CCN = 0, TACN = 0, CACN = 0, SECN = 0, SYCN = 0, SEAC = 0, SYAC = 0,
 35 |            QID = 0, TQ = 0, TQS = 0;
 36 |   if (argc < 2) {
 37 |     printf(
 38 |         "Usage: ./compute-accuracy <FILE> <threshold>\nwhere FILE contains "
 39 |         "word projections, and threshold is used to reduce vocabulary of the "
 40 |         "model for fast approximate evaluation (0 = off, otherwise typical "
 41 |         "value is 30000)\n");
 42 |     return 0;
 43 |   }
 44 |   strcpy(file_name, argv[1]);
 45 |   if (argc > 2) threshold = atoi(argv[2]);
 46 |   f = fopen(file_name, "rb");
 47 |   if (f == NULL) {
 48 |     printf("Input file not found\n");
 49 |     return -1;
 50 |   }
 51 |   fscanf(f, "%lld", &words);
 52 |   if (threshold)
 53 |     if (words > threshold) words = threshold;
 54 |   fscanf(f, "%lld", &size);
 55 |   vocab = (char *)malloc(words * max_w * sizeof(char));
 56 |   M = (float *)malloc(words * size * sizeof(float));
 57 |   if (M == NULL) {
 58 |     printf("Cannot allocate memory: %lld MB\n",
 59 |            words * size * sizeof(float) / 1048576);
 60 |     return -1;
 61 |   }
 62 |   for (b = 0; b < words; b++) {
 63 |     a = 0;
 64 |     while (1) {
 65 |       vocab[b * max_w + a] = fgetc(f);
 66 |       if (feof(f) || (vocab[b * max_w + a] == ' ')) break;
 67 |       if ((a < max_w) && (vocab[b * max_w + a] != '\n')) a++;
 68 |     }
 69 |     vocab[b * max_w + a] = 0;
 70 |     for (a = 0; a < max_w; a++)
 71 |       vocab[b * max_w + a] = toupper(vocab[b * max_w + a]);
 72 |     for (a = 0; a < size; a++) fread(&M[a + b * size], sizeof(float), 1, f);
 73 |     len = 0;
 74 |     for (a = 0; a < size; a++) len += M[a + b * size] * M[a + b * size];
 75 |     len = sqrt(len);
 76 |     for (a = 0; a < size; a++) M[a + b * size] /= len;
 77 |   }
 78 |   fclose(f);
 79 |   TCN = 0;
 80 |   while (1) {
 81 |     for (a = 0; a < N; a++) bestd[a] = 0;
 82 |     for (a = 0; a < N; a++) bestw[a][0] = 0;
 83 |     scanf("%s", st1);
 84 |     for (a = 0; a < strlen(st1); a++) st1[a] = toupper(st1[a]);
 85 |     if ((!strcmp(st1, ":")) || (!strcmp(st1, "EXIT")) || feof(stdin)) {
 86 |       if (TCN == 0) TCN = 1;
 87 |       if (QID != 0) {
 88 |         printf("ACCURACY TOP1: %.2f %%  (%d / %d)\n", CCN / (float)TCN * 100,
 89 |                CCN, TCN);
 90 |         printf(
 91 |             "Total accuracy: %.2f %%   Semantic accuracy: %.2f %%   Syntactic "
 92 |             "accuracy: %.2f %% \n",
 93 |             CACN / (float)TACN * 100, SEAC / (float)SECN * 100,
 94 |             SYAC / (float)SYCN * 100);
 95 |       }
 96 |       QID++;
 97 |       scanf("%s", st1);
 98 |       if (feof(stdin)) break;
 99 |       printf("%s:\n", st1);
100 |       TCN = 0;
101 |       CCN = 0;
102 |       continue;
103 |     }
104 |     if (!strcmp(st1, "EXIT")) break;
105 |     scanf("%s", st2);
106 |     for (a = 0; a < strlen(st2); a++) st2[a] = toupper(st2[a]);
107 |     scanf("%s", st3);
108 |     for (a = 0; a < strlen(st3); a++) st3[a] = toupper(st3[a]);
109 |     scanf("%s", st4);
110 |     for (a = 0; a < strlen(st4); a++) st4[a] = toupper(st4[a]);
111 |     for (b = 0; b < words; b++)
112 |       if (!strcmp(&vocab[b * max_w], st1)) break;
113 |     b1 = b;
114 |     for (b = 0; b < words; b++)
115 |       if (!strcmp(&vocab[b * max_w], st2)) break;
116 |     b2 = b;
117 |     for (b = 0; b < words; b++)
118 |       if (!strcmp(&vocab[b * max_w], st3)) break;
119 |     b3 = b;
120 |     for (a = 0; a < N; a++) bestd[a] = 0;
121 |     for (a = 0; a < N; a++) bestw[a][0] = 0;
122 |     TQ++;
123 |     if (b1 == words) continue;
124 |     if (b2 == words) continue;
125 |     if (b3 == words) continue;
126 |     for (b = 0; b < words; b++)
127 |       if (!strcmp(&vocab[b * max_w], st4)) break;
128 |     if (b == words) continue;
129 |     for (a = 0; a < size; a++)
130 |       vec[a] = (M[a + b2 * size] - M[a + b1 * size]) + M[a + b3 * size];
131 |     TQS++;
132 |     for (c = 0; c < words; c++) {
133 |       if (c == b1) continue;
134 |       if (c == b2) continue;
135 |       if (c == b3) continue;
136 |       dist = 0;
137 |       for (a = 0; a < size; a++) dist += vec[a] * M[a + c * size];
138 |       for (a = 0; a < N; a++) {
139 |         if (dist > bestd[a]) {
140 |           for (d = N - 1; d > a; d--) {
141 |             bestd[d] = bestd[d - 1];
142 |             strcpy(bestw[d], bestw[d - 1]);
143 |           }
144 |           bestd[a] = dist;
145 |           strcpy(bestw[a], &vocab[c * max_w]);
146 |           break;
147 |         }
148 |       }
149 |     }
150 |     if (!strcmp(st4, bestw[0])) {
151 |       CCN++;
152 |       CACN++;
153 |       if (QID <= 5)
154 |         SEAC++;
155 |       else
156 |         SYAC++;
157 |     }
158 |     if (QID <= 5)
159 |       SECN++;
160 |     else
161 |       SYCN++;
162 |     TCN++;
163 |     TACN++;
164 |   }
165 |   printf("Questions seen / total: %d %d   %.2f %% \n", TQS, TQ,
166 |          TQS / (float)TQ * 100);
167 |   return 0;
168 | }
169 | 


--------------------------------------------------------------------------------
/vs2015/distance.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Debug|x64">
  9 |       <Configuration>Debug</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|Win32">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>Win32</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <ItemGroup>
 22 |     <ClCompile Include="..\distance.c" />
 23 |   </ItemGroup>
 24 |   <PropertyGroup Label="Globals">
 25 |     <ProjectGuid>{FE73759F-BDF7-4118-9CC9-BA2AE2D7D9A7}</ProjectGuid>
 26 |     <Keyword>Win32Proj</Keyword>
 27 |     <RootNamespace>test</RootNamespace>
 28 |     <ProjectName>distance</ProjectName>
 29 |     <WindowsTargetPlatformVersion>10.0.14393.0</WindowsTargetPlatformVersion>
 30 |   </PropertyGroup>
 31 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 32 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 33 |     <ConfigurationType>Application</ConfigurationType>
 34 |     <UseDebugLibraries>true</UseDebugLibraries>
 35 |     <PlatformToolset>v141</PlatformToolset>
 36 |     <CharacterSet>NotSet</CharacterSet>
 37 |   </PropertyGroup>
 38 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 39 |     <ConfigurationType>Application</ConfigurationType>
 40 |     <UseDebugLibraries>true</UseDebugLibraries>
 41 |     <PlatformToolset>v141</PlatformToolset>
 42 |     <CharacterSet>NotSet</CharacterSet>
 43 |   </PropertyGroup>
 44 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 45 |     <ConfigurationType>Application</ConfigurationType>
 46 |     <UseDebugLibraries>false</UseDebugLibraries>
 47 |     <PlatformToolset>v140</PlatformToolset>
 48 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 49 |     <CharacterSet>NotSet</CharacterSet>
 50 |   </PropertyGroup>
 51 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 52 |     <ConfigurationType>Application</ConfigurationType>
 53 |     <UseDebugLibraries>false</UseDebugLibraries>
 54 |     <PlatformToolset>v140</PlatformToolset>
 55 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 56 |     <CharacterSet>NotSet</CharacterSet>
 57 |   </PropertyGroup>
 58 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 59 |   <ImportGroup Label="ExtensionSettings">
 60 |   </ImportGroup>
 61 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 62 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 63 |   </ImportGroup>
 64 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 65 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 66 |   </ImportGroup>
 67 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 68 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 69 |   </ImportGroup>
 70 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 71 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 72 |   </ImportGroup>
 73 |   <PropertyGroup Label="UserMacros" />
 74 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 75 |     <LinkIncremental>false</LinkIncremental>
 76 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 77 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 78 |   </PropertyGroup>
 79 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 80 |     <LinkIncremental>false</LinkIncremental>
 81 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 82 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 83 |   </PropertyGroup>
 84 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 85 |     <LinkIncremental>false</LinkIncremental>
 86 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 87 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 88 |   </PropertyGroup>
 89 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 90 |     <LinkIncremental>false</LinkIncremental>
 91 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 92 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 93 |   </PropertyGroup>
 94 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 95 |     <ClCompile>
 96 |       <PrecompiledHeader>
 97 |       </PrecompiledHeader>
 98 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 99 |       <Optimization>Disabled</Optimization>
100 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
101 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
102 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
103 |       <PrecompiledHeaderFile />
104 |       <PrecompiledHeaderOutputFile />
105 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
106 |       <CompileAs>Default</CompileAs>
107 |       <DisableSpecificWarnings>
108 |       </DisableSpecificWarnings>
109 |     </ClCompile>
110 |     <Link>
111 |       <SubSystem>Console</SubSystem>
112 |       <GenerateDebugInformation>true</GenerateDebugInformation>
113 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
114 |     </Link>
115 |   </ItemDefinitionGroup>
116 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
117 |     <ClCompile>
118 |       <PrecompiledHeader>
119 |       </PrecompiledHeader>
120 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
121 |       <Optimization>Disabled</Optimization>
122 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
123 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
124 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
125 |       <PrecompiledHeaderFile>
126 |       </PrecompiledHeaderFile>
127 |       <PrecompiledHeaderOutputFile>
128 |       </PrecompiledHeaderOutputFile>
129 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
130 |       <CompileAs>Default</CompileAs>
131 |       <DisableSpecificWarnings>
132 |       </DisableSpecificWarnings>
133 |     </ClCompile>
134 |     <Link>
135 |       <SubSystem>Console</SubSystem>
136 |       <GenerateDebugInformation>true</GenerateDebugInformation>
137 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
138 |     </Link>
139 |   </ItemDefinitionGroup>
140 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
141 |     <ClCompile>
142 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
143 |       <PrecompiledHeader>
144 |       </PrecompiledHeader>
145 |       <Optimization>MaxSpeed</Optimization>
146 |       <FunctionLevelLinking>true</FunctionLevelLinking>
147 |       <IntrinsicFunctions>true</IntrinsicFunctions>
148 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
149 |       <DisableSpecificWarnings>
150 |       </DisableSpecificWarnings>
151 |       <CompileAs>Default</CompileAs>
152 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
153 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
154 |     </ClCompile>
155 |     <Link>
156 |       <SubSystem>Console</SubSystem>
157 |       <GenerateDebugInformation>true</GenerateDebugInformation>
158 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
159 |       <OptimizeReferences>true</OptimizeReferences>
160 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
161 |       <OutputFile>..\$(TargetName)$(TargetExt)</OutputFile>
162 |     </Link>
163 |   </ItemDefinitionGroup>
164 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
165 |     <ClCompile>
166 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
167 |       <PrecompiledHeader>
168 |       </PrecompiledHeader>
169 |       <Optimization>MaxSpeed</Optimization>
170 |       <FunctionLevelLinking>true</FunctionLevelLinking>
171 |       <IntrinsicFunctions>true</IntrinsicFunctions>
172 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
173 |       <DisableSpecificWarnings>
174 |       </DisableSpecificWarnings>
175 |       <CompileAs>Default</CompileAs>
176 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
177 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
178 |     </ClCompile>
179 |     <Link>
180 |       <SubSystem>Console</SubSystem>
181 |       <GenerateDebugInformation>true</GenerateDebugInformation>
182 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
183 |       <OptimizeReferences>true</OptimizeReferences>
184 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
185 |       <OutputFile>..\$(TargetName)$(TargetExt)</OutputFile>
186 |     </Link>
187 |   </ItemDefinitionGroup>
188 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
189 |   <ImportGroup Label="ExtensionTargets">
190 |   </ImportGroup>
191 | </Project>


--------------------------------------------------------------------------------
/vs2015/word2vec.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Debug|x64">
  9 |       <Configuration>Debug</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|Win32">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>Win32</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <ItemGroup>
 22 |     <ClCompile Include="..\word2vec.c" />
 23 |   </ItemGroup>
 24 |   <PropertyGroup Label="Globals">
 25 |     <ProjectGuid>{3665E45D-8606-4F60-B864-2AD85FB18CA1}</ProjectGuid>
 26 |     <Keyword>Win32Proj</Keyword>
 27 |     <RootNamespace>test</RootNamespace>
 28 |     <ProjectName>word2vec</ProjectName>
 29 |     <WindowsTargetPlatformVersion>10.0.14393.0</WindowsTargetPlatformVersion>
 30 |   </PropertyGroup>
 31 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 32 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 33 |     <ConfigurationType>Application</ConfigurationType>
 34 |     <UseDebugLibraries>true</UseDebugLibraries>
 35 |     <PlatformToolset>v141</PlatformToolset>
 36 |     <CharacterSet>NotSet</CharacterSet>
 37 |   </PropertyGroup>
 38 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 39 |     <ConfigurationType>Application</ConfigurationType>
 40 |     <UseDebugLibraries>true</UseDebugLibraries>
 41 |     <PlatformToolset>v141</PlatformToolset>
 42 |     <CharacterSet>NotSet</CharacterSet>
 43 |   </PropertyGroup>
 44 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 45 |     <ConfigurationType>Application</ConfigurationType>
 46 |     <UseDebugLibraries>false</UseDebugLibraries>
 47 |     <PlatformToolset>v140</PlatformToolset>
 48 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 49 |     <CharacterSet>NotSet</CharacterSet>
 50 |   </PropertyGroup>
 51 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 52 |     <ConfigurationType>Application</ConfigurationType>
 53 |     <UseDebugLibraries>false</UseDebugLibraries>
 54 |     <PlatformToolset>v140</PlatformToolset>
 55 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 56 |     <CharacterSet>NotSet</CharacterSet>
 57 |   </PropertyGroup>
 58 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 59 |   <ImportGroup Label="ExtensionSettings">
 60 |   </ImportGroup>
 61 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 62 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 63 |   </ImportGroup>
 64 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 65 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 66 |   </ImportGroup>
 67 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 68 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 69 |   </ImportGroup>
 70 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 71 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 72 |   </ImportGroup>
 73 |   <PropertyGroup Label="UserMacros" />
 74 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 75 |     <LinkIncremental>false</LinkIncremental>
 76 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 77 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 78 |   </PropertyGroup>
 79 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 80 |     <LinkIncremental>false</LinkIncremental>
 81 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 82 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 83 |   </PropertyGroup>
 84 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 85 |     <LinkIncremental>false</LinkIncremental>
 86 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 87 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 88 |   </PropertyGroup>
 89 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 90 |     <LinkIncremental>false</LinkIncremental>
 91 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 92 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 93 |   </PropertyGroup>
 94 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 95 |     <ClCompile>
 96 |       <PrecompiledHeader>
 97 |       </PrecompiledHeader>
 98 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 99 |       <Optimization>Disabled</Optimization>
100 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
101 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
102 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
103 |       <PrecompiledHeaderFile />
104 |       <PrecompiledHeaderOutputFile />
105 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
106 |       <CompileAs>Default</CompileAs>
107 |       <DisableSpecificWarnings>
108 |       </DisableSpecificWarnings>
109 |     </ClCompile>
110 |     <Link>
111 |       <SubSystem>Console</SubSystem>
112 |       <GenerateDebugInformation>true</GenerateDebugInformation>
113 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
114 |     </Link>
115 |   </ItemDefinitionGroup>
116 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
117 |     <ClCompile>
118 |       <PrecompiledHeader>
119 |       </PrecompiledHeader>
120 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
121 |       <Optimization>Disabled</Optimization>
122 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
123 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
124 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
125 |       <PrecompiledHeaderFile>
126 |       </PrecompiledHeaderFile>
127 |       <PrecompiledHeaderOutputFile>
128 |       </PrecompiledHeaderOutputFile>
129 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
130 |       <CompileAs>Default</CompileAs>
131 |       <DisableSpecificWarnings>
132 |       </DisableSpecificWarnings>
133 |     </ClCompile>
134 |     <Link>
135 |       <SubSystem>Console</SubSystem>
136 |       <GenerateDebugInformation>true</GenerateDebugInformation>
137 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
138 |     </Link>
139 |   </ItemDefinitionGroup>
140 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
141 |     <ClCompile>
142 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
143 |       <PrecompiledHeader>
144 |       </PrecompiledHeader>
145 |       <Optimization>MaxSpeed</Optimization>
146 |       <FunctionLevelLinking>true</FunctionLevelLinking>
147 |       <IntrinsicFunctions>true</IntrinsicFunctions>
148 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
149 |       <DisableSpecificWarnings>
150 |       </DisableSpecificWarnings>
151 |       <CompileAs>Default</CompileAs>
152 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
153 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
154 |     </ClCompile>
155 |     <Link>
156 |       <SubSystem>Console</SubSystem>
157 |       <GenerateDebugInformation>true</GenerateDebugInformation>
158 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
159 |       <OptimizeReferences>true</OptimizeReferences>
160 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
161 |       <OutputFile>..\$(TargetName)$(TargetExt)</OutputFile>
162 |     </Link>
163 |   </ItemDefinitionGroup>
164 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
165 |     <ClCompile>
166 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
167 |       <PrecompiledHeader>
168 |       </PrecompiledHeader>
169 |       <Optimization>MaxSpeed</Optimization>
170 |       <FunctionLevelLinking>true</FunctionLevelLinking>
171 |       <IntrinsicFunctions>true</IntrinsicFunctions>
172 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
173 |       <DisableSpecificWarnings>
174 |       </DisableSpecificWarnings>
175 |       <CompileAs>Default</CompileAs>
176 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
177 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
178 |     </ClCompile>
179 |     <Link>
180 |       <SubSystem>Console</SubSystem>
181 |       <GenerateDebugInformation>true</GenerateDebugInformation>
182 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
183 |       <OptimizeReferences>true</OptimizeReferences>
184 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
185 |       <OutputFile>..\$(TargetName)$(TargetExt)</OutputFile>
186 |     </Link>
187 |   </ItemDefinitionGroup>
188 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
189 |   <ImportGroup Label="ExtensionTargets">
190 |   </ImportGroup>
191 | </Project>


--------------------------------------------------------------------------------
/vs2015/word2phrase.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Debug|x64">
  9 |       <Configuration>Debug</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|Win32">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>Win32</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <ItemGroup>
 22 |     <ClCompile Include="..\word2phrase.c" />
 23 |   </ItemGroup>
 24 |   <PropertyGroup Label="Globals">
 25 |     <ProjectGuid>{66CAAE3C-A752-4FD1-BE30-8F65DAD73137}</ProjectGuid>
 26 |     <Keyword>Win32Proj</Keyword>
 27 |     <RootNamespace>test</RootNamespace>
 28 |     <ProjectName>word2phrase</ProjectName>
 29 |     <WindowsTargetPlatformVersion>10.0.14393.0</WindowsTargetPlatformVersion>
 30 |   </PropertyGroup>
 31 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 32 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 33 |     <ConfigurationType>Application</ConfigurationType>
 34 |     <UseDebugLibraries>true</UseDebugLibraries>
 35 |     <PlatformToolset>v141</PlatformToolset>
 36 |     <CharacterSet>NotSet</CharacterSet>
 37 |   </PropertyGroup>
 38 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 39 |     <ConfigurationType>Application</ConfigurationType>
 40 |     <UseDebugLibraries>true</UseDebugLibraries>
 41 |     <PlatformToolset>v141</PlatformToolset>
 42 |     <CharacterSet>NotSet</CharacterSet>
 43 |   </PropertyGroup>
 44 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 45 |     <ConfigurationType>Application</ConfigurationType>
 46 |     <UseDebugLibraries>false</UseDebugLibraries>
 47 |     <PlatformToolset>v140</PlatformToolset>
 48 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 49 |     <CharacterSet>NotSet</CharacterSet>
 50 |   </PropertyGroup>
 51 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 52 |     <ConfigurationType>Application</ConfigurationType>
 53 |     <UseDebugLibraries>false</UseDebugLibraries>
 54 |     <PlatformToolset>v140</PlatformToolset>
 55 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 56 |     <CharacterSet>NotSet</CharacterSet>
 57 |   </PropertyGroup>
 58 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 59 |   <ImportGroup Label="ExtensionSettings">
 60 |   </ImportGroup>
 61 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 62 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 63 |   </ImportGroup>
 64 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 65 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 66 |   </ImportGroup>
 67 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 68 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 69 |   </ImportGroup>
 70 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 71 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 72 |   </ImportGroup>
 73 |   <PropertyGroup Label="UserMacros" />
 74 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 75 |     <LinkIncremental>false</LinkIncremental>
 76 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 77 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 78 |   </PropertyGroup>
 79 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 80 |     <LinkIncremental>false</LinkIncremental>
 81 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 82 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 83 |   </PropertyGroup>
 84 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 85 |     <LinkIncremental>false</LinkIncremental>
 86 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 87 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 88 |   </PropertyGroup>
 89 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 90 |     <LinkIncremental>false</LinkIncremental>
 91 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 92 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 93 |   </PropertyGroup>
 94 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 95 |     <ClCompile>
 96 |       <PrecompiledHeader>
 97 |       </PrecompiledHeader>
 98 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 99 |       <Optimization>Disabled</Optimization>
100 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
101 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
102 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
103 |       <PrecompiledHeaderFile />
104 |       <PrecompiledHeaderOutputFile />
105 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
106 |       <CompileAs>Default</CompileAs>
107 |       <DisableSpecificWarnings>
108 |       </DisableSpecificWarnings>
109 |     </ClCompile>
110 |     <Link>
111 |       <SubSystem>Console</SubSystem>
112 |       <GenerateDebugInformation>true</GenerateDebugInformation>
113 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
114 |     </Link>
115 |   </ItemDefinitionGroup>
116 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
117 |     <ClCompile>
118 |       <PrecompiledHeader>
119 |       </PrecompiledHeader>
120 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
121 |       <Optimization>Disabled</Optimization>
122 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
123 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
124 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
125 |       <PrecompiledHeaderFile>
126 |       </PrecompiledHeaderFile>
127 |       <PrecompiledHeaderOutputFile>
128 |       </PrecompiledHeaderOutputFile>
129 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
130 |       <CompileAs>Default</CompileAs>
131 |       <DisableSpecificWarnings>
132 |       </DisableSpecificWarnings>
133 |     </ClCompile>
134 |     <Link>
135 |       <SubSystem>Console</SubSystem>
136 |       <GenerateDebugInformation>true</GenerateDebugInformation>
137 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
138 |     </Link>
139 |   </ItemDefinitionGroup>
140 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
141 |     <ClCompile>
142 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
143 |       <PrecompiledHeader>
144 |       </PrecompiledHeader>
145 |       <Optimization>MaxSpeed</Optimization>
146 |       <FunctionLevelLinking>true</FunctionLevelLinking>
147 |       <IntrinsicFunctions>true</IntrinsicFunctions>
148 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
149 |       <DisableSpecificWarnings>
150 |       </DisableSpecificWarnings>
151 |       <CompileAs>Default</CompileAs>
152 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
153 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
154 |     </ClCompile>
155 |     <Link>
156 |       <SubSystem>Console</SubSystem>
157 |       <GenerateDebugInformation>true</GenerateDebugInformation>
158 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
159 |       <OptimizeReferences>true</OptimizeReferences>
160 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
161 |       <OutputFile>..\$(TargetName)$(TargetExt)</OutputFile>
162 |     </Link>
163 |   </ItemDefinitionGroup>
164 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
165 |     <ClCompile>
166 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
167 |       <PrecompiledHeader>
168 |       </PrecompiledHeader>
169 |       <Optimization>MaxSpeed</Optimization>
170 |       <FunctionLevelLinking>true</FunctionLevelLinking>
171 |       <IntrinsicFunctions>true</IntrinsicFunctions>
172 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
173 |       <DisableSpecificWarnings>
174 |       </DisableSpecificWarnings>
175 |       <CompileAs>Default</CompileAs>
176 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
177 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
178 |     </ClCompile>
179 |     <Link>
180 |       <SubSystem>Console</SubSystem>
181 |       <GenerateDebugInformation>true</GenerateDebugInformation>
182 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
183 |       <OptimizeReferences>true</OptimizeReferences>
184 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
185 |       <OutputFile>..\$(TargetName)$(TargetExt)</OutputFile>
186 |     </Link>
187 |   </ItemDefinitionGroup>
188 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
189 |   <ImportGroup Label="ExtensionTargets">
190 |   </ImportGroup>
191 | </Project>


--------------------------------------------------------------------------------
/vs2015/word-analogy.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Debug|x64">
  9 |       <Configuration>Debug</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|Win32">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>Win32</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <ItemGroup>
 22 |     <ClCompile Include="..\word-analogy.c" />
 23 |   </ItemGroup>
 24 |   <PropertyGroup Label="Globals">
 25 |     <ProjectGuid>{8C667D06-771F-441F-B94B-4DBE6D5BE3B6}</ProjectGuid>
 26 |     <Keyword>Win32Proj</Keyword>
 27 |     <RootNamespace>test</RootNamespace>
 28 |     <ProjectName>word-analogy</ProjectName>
 29 |     <WindowsTargetPlatformVersion>10.0.14393.0</WindowsTargetPlatformVersion>
 30 |   </PropertyGroup>
 31 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 32 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 33 |     <ConfigurationType>Application</ConfigurationType>
 34 |     <UseDebugLibraries>true</UseDebugLibraries>
 35 |     <PlatformToolset>v141</PlatformToolset>
 36 |     <CharacterSet>NotSet</CharacterSet>
 37 |   </PropertyGroup>
 38 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 39 |     <ConfigurationType>Application</ConfigurationType>
 40 |     <UseDebugLibraries>true</UseDebugLibraries>
 41 |     <PlatformToolset>v141</PlatformToolset>
 42 |     <CharacterSet>NotSet</CharacterSet>
 43 |   </PropertyGroup>
 44 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 45 |     <ConfigurationType>Application</ConfigurationType>
 46 |     <UseDebugLibraries>false</UseDebugLibraries>
 47 |     <PlatformToolset>v140</PlatformToolset>
 48 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 49 |     <CharacterSet>NotSet</CharacterSet>
 50 |   </PropertyGroup>
 51 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 52 |     <ConfigurationType>Application</ConfigurationType>
 53 |     <UseDebugLibraries>false</UseDebugLibraries>
 54 |     <PlatformToolset>v140</PlatformToolset>
 55 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 56 |     <CharacterSet>NotSet</CharacterSet>
 57 |   </PropertyGroup>
 58 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 59 |   <ImportGroup Label="ExtensionSettings">
 60 |   </ImportGroup>
 61 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 62 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 63 |   </ImportGroup>
 64 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 65 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 66 |   </ImportGroup>
 67 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 68 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 69 |   </ImportGroup>
 70 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 71 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 72 |   </ImportGroup>
 73 |   <PropertyGroup Label="UserMacros" />
 74 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 75 |     <LinkIncremental>false</LinkIncremental>
 76 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 77 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 78 |   </PropertyGroup>
 79 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 80 |     <LinkIncremental>false</LinkIncremental>
 81 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 82 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 83 |   </PropertyGroup>
 84 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 85 |     <LinkIncremental>false</LinkIncremental>
 86 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 87 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 88 |   </PropertyGroup>
 89 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 90 |     <LinkIncremental>false</LinkIncremental>
 91 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 92 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 93 |   </PropertyGroup>
 94 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 95 |     <ClCompile>
 96 |       <PrecompiledHeader>
 97 |       </PrecompiledHeader>
 98 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 99 |       <Optimization>Disabled</Optimization>
100 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
101 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
102 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
103 |       <PrecompiledHeaderFile />
104 |       <PrecompiledHeaderOutputFile />
105 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
106 |       <CompileAs>Default</CompileAs>
107 |       <DisableSpecificWarnings>
108 |       </DisableSpecificWarnings>
109 |     </ClCompile>
110 |     <Link>
111 |       <SubSystem>Console</SubSystem>
112 |       <GenerateDebugInformation>true</GenerateDebugInformation>
113 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
114 |     </Link>
115 |   </ItemDefinitionGroup>
116 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
117 |     <ClCompile>
118 |       <PrecompiledHeader>
119 |       </PrecompiledHeader>
120 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
121 |       <Optimization>Disabled</Optimization>
122 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
123 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
124 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
125 |       <PrecompiledHeaderFile>
126 |       </PrecompiledHeaderFile>
127 |       <PrecompiledHeaderOutputFile>
128 |       </PrecompiledHeaderOutputFile>
129 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
130 |       <CompileAs>Default</CompileAs>
131 |       <DisableSpecificWarnings>
132 |       </DisableSpecificWarnings>
133 |     </ClCompile>
134 |     <Link>
135 |       <SubSystem>Console</SubSystem>
136 |       <GenerateDebugInformation>true</GenerateDebugInformation>
137 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
138 |     </Link>
139 |   </ItemDefinitionGroup>
140 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
141 |     <ClCompile>
142 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
143 |       <PrecompiledHeader>
144 |       </PrecompiledHeader>
145 |       <Optimization>MaxSpeed</Optimization>
146 |       <FunctionLevelLinking>true</FunctionLevelLinking>
147 |       <IntrinsicFunctions>true</IntrinsicFunctions>
148 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
149 |       <DisableSpecificWarnings>
150 |       </DisableSpecificWarnings>
151 |       <CompileAs>Default</CompileAs>
152 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
153 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
154 |     </ClCompile>
155 |     <Link>
156 |       <SubSystem>Console</SubSystem>
157 |       <GenerateDebugInformation>true</GenerateDebugInformation>
158 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
159 |       <OptimizeReferences>true</OptimizeReferences>
160 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
161 |       <OutputFile>..\$(TargetName)$(TargetExt)</OutputFile>
162 |     </Link>
163 |   </ItemDefinitionGroup>
164 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
165 |     <ClCompile>
166 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
167 |       <PrecompiledHeader>
168 |       </PrecompiledHeader>
169 |       <Optimization>MaxSpeed</Optimization>
170 |       <FunctionLevelLinking>true</FunctionLevelLinking>
171 |       <IntrinsicFunctions>true</IntrinsicFunctions>
172 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
173 |       <DisableSpecificWarnings>
174 |       </DisableSpecificWarnings>
175 |       <CompileAs>Default</CompileAs>
176 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
177 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
178 |     </ClCompile>
179 |     <Link>
180 |       <SubSystem>Console</SubSystem>
181 |       <GenerateDebugInformation>true</GenerateDebugInformation>
182 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
183 |       <OptimizeReferences>true</OptimizeReferences>
184 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
185 |       <OutputFile>..\$(TargetName)$(TargetExt)</OutputFile>
186 |     </Link>
187 |   </ItemDefinitionGroup>
188 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
189 |   <ImportGroup Label="ExtensionTargets">
190 |   </ImportGroup>
191 | </Project>


--------------------------------------------------------------------------------
/vs2015/compute-accuracy.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Debug|x64">
  9 |       <Configuration>Debug</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|Win32">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>Win32</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <ItemGroup>
 22 |     <ClCompile Include="..\word-analogy.c" />
 23 |   </ItemGroup>
 24 |   <PropertyGroup Label="Globals">
 25 |     <ProjectGuid>{78698725-BA8A-410B-9971-2BF28562B2D1}</ProjectGuid>
 26 |     <Keyword>Win32Proj</Keyword>
 27 |     <RootNamespace>test</RootNamespace>
 28 |     <ProjectName>compute-accuracy</ProjectName>
 29 |     <WindowsTargetPlatformVersion>10.0.14393.0</WindowsTargetPlatformVersion>
 30 |   </PropertyGroup>
 31 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 32 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 33 |     <ConfigurationType>Application</ConfigurationType>
 34 |     <UseDebugLibraries>true</UseDebugLibraries>
 35 |     <PlatformToolset>v141</PlatformToolset>
 36 |     <CharacterSet>NotSet</CharacterSet>
 37 |   </PropertyGroup>
 38 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 39 |     <ConfigurationType>Application</ConfigurationType>
 40 |     <UseDebugLibraries>true</UseDebugLibraries>
 41 |     <PlatformToolset>v141</PlatformToolset>
 42 |     <CharacterSet>NotSet</CharacterSet>
 43 |   </PropertyGroup>
 44 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 45 |     <ConfigurationType>Application</ConfigurationType>
 46 |     <UseDebugLibraries>false</UseDebugLibraries>
 47 |     <PlatformToolset>v140</PlatformToolset>
 48 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 49 |     <CharacterSet>NotSet</CharacterSet>
 50 |   </PropertyGroup>
 51 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 52 |     <ConfigurationType>Application</ConfigurationType>
 53 |     <UseDebugLibraries>false</UseDebugLibraries>
 54 |     <PlatformToolset>v140</PlatformToolset>
 55 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 56 |     <CharacterSet>NotSet</CharacterSet>
 57 |   </PropertyGroup>
 58 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 59 |   <ImportGroup Label="ExtensionSettings">
 60 |   </ImportGroup>
 61 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 62 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 63 |   </ImportGroup>
 64 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 65 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 66 |   </ImportGroup>
 67 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 68 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 69 |   </ImportGroup>
 70 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 71 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 72 |   </ImportGroup>
 73 |   <PropertyGroup Label="UserMacros" />
 74 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 75 |     <LinkIncremental>false</LinkIncremental>
 76 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 77 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 78 |   </PropertyGroup>
 79 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 80 |     <LinkIncremental>false</LinkIncremental>
 81 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 82 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 83 |   </PropertyGroup>
 84 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 85 |     <LinkIncremental>false</LinkIncremental>
 86 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 87 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 88 |   </PropertyGroup>
 89 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 90 |     <LinkIncremental>false</LinkIncremental>
 91 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 92 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 93 |   </PropertyGroup>
 94 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 95 |     <ClCompile>
 96 |       <PrecompiledHeader>
 97 |       </PrecompiledHeader>
 98 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 99 |       <Optimization>Disabled</Optimization>
100 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
101 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
102 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
103 |       <PrecompiledHeaderFile />
104 |       <PrecompiledHeaderOutputFile />
105 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
106 |       <CompileAs>Default</CompileAs>
107 |       <DisableSpecificWarnings>
108 |       </DisableSpecificWarnings>
109 |     </ClCompile>
110 |     <Link>
111 |       <SubSystem>Console</SubSystem>
112 |       <GenerateDebugInformation>true</GenerateDebugInformation>
113 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
114 |     </Link>
115 |   </ItemDefinitionGroup>
116 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
117 |     <ClCompile>
118 |       <PrecompiledHeader>
119 |       </PrecompiledHeader>
120 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
121 |       <Optimization>Disabled</Optimization>
122 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
123 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
124 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
125 |       <PrecompiledHeaderFile>
126 |       </PrecompiledHeaderFile>
127 |       <PrecompiledHeaderOutputFile>
128 |       </PrecompiledHeaderOutputFile>
129 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
130 |       <CompileAs>Default</CompileAs>
131 |       <DisableSpecificWarnings>
132 |       </DisableSpecificWarnings>
133 |     </ClCompile>
134 |     <Link>
135 |       <SubSystem>Console</SubSystem>
136 |       <GenerateDebugInformation>true</GenerateDebugInformation>
137 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
138 |     </Link>
139 |   </ItemDefinitionGroup>
140 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
141 |     <ClCompile>
142 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
143 |       <PrecompiledHeader>
144 |       </PrecompiledHeader>
145 |       <Optimization>MaxSpeed</Optimization>
146 |       <FunctionLevelLinking>true</FunctionLevelLinking>
147 |       <IntrinsicFunctions>true</IntrinsicFunctions>
148 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
149 |       <DisableSpecificWarnings>
150 |       </DisableSpecificWarnings>
151 |       <CompileAs>Default</CompileAs>
152 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
153 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
154 |     </ClCompile>
155 |     <Link>
156 |       <SubSystem>Console</SubSystem>
157 |       <GenerateDebugInformation>true</GenerateDebugInformation>
158 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
159 |       <OptimizeReferences>true</OptimizeReferences>
160 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
161 |       <OutputFile>..\$(TargetName)$(TargetExt)</OutputFile>
162 |     </Link>
163 |   </ItemDefinitionGroup>
164 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
165 |     <ClCompile>
166 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
167 |       <PrecompiledHeader>
168 |       </PrecompiledHeader>
169 |       <Optimization>MaxSpeed</Optimization>
170 |       <FunctionLevelLinking>true</FunctionLevelLinking>
171 |       <IntrinsicFunctions>true</IntrinsicFunctions>
172 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
173 |       <DisableSpecificWarnings>
174 |       </DisableSpecificWarnings>
175 |       <CompileAs>Default</CompileAs>
176 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
177 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
178 |     </ClCompile>
179 |     <Link>
180 |       <SubSystem>Console</SubSystem>
181 |       <GenerateDebugInformation>true</GenerateDebugInformation>
182 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
183 |       <OptimizeReferences>true</OptimizeReferences>
184 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
185 |       <OutputFile>..\$(TargetName)$(TargetExt)</OutputFile>
186 |     </Link>
187 |   </ItemDefinitionGroup>
188 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
189 |   <ImportGroup Label="ExtensionTargets">
190 |   </ImportGroup>
191 | </Project>


--------------------------------------------------------------------------------
/vs2015/word2vec-doc2vec.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Debug|x64">
  9 |       <Configuration>Debug</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|Win32">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>Win32</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <ItemGroup>
 22 |     <ClCompile Include="..\word2vec-doc2vec.c" />
 23 |   </ItemGroup>
 24 |   <PropertyGroup Label="Globals">
 25 |     <ProjectGuid>{4192BAE0-FC98-4AE4-819A-65C0B896C38E}</ProjectGuid>
 26 |     <Keyword>Win32Proj</Keyword>
 27 |     <RootNamespace>test</RootNamespace>
 28 |     <ProjectName>word2vec-doc2vec</ProjectName>
 29 |     <WindowsTargetPlatformVersion>10.0.14393.0</WindowsTargetPlatformVersion>
 30 |   </PropertyGroup>
 31 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 32 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 33 |     <ConfigurationType>Application</ConfigurationType>
 34 |     <UseDebugLibraries>true</UseDebugLibraries>
 35 |     <PlatformToolset>v141</PlatformToolset>
 36 |     <CharacterSet>NotSet</CharacterSet>
 37 |   </PropertyGroup>
 38 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 39 |     <ConfigurationType>Application</ConfigurationType>
 40 |     <UseDebugLibraries>true</UseDebugLibraries>
 41 |     <PlatformToolset>v141</PlatformToolset>
 42 |     <CharacterSet>NotSet</CharacterSet>
 43 |   </PropertyGroup>
 44 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 45 |     <ConfigurationType>Application</ConfigurationType>
 46 |     <UseDebugLibraries>false</UseDebugLibraries>
 47 |     <PlatformToolset>v140</PlatformToolset>
 48 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 49 |     <CharacterSet>NotSet</CharacterSet>
 50 |   </PropertyGroup>
 51 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 52 |     <ConfigurationType>Application</ConfigurationType>
 53 |     <UseDebugLibraries>false</UseDebugLibraries>
 54 |     <PlatformToolset>v140</PlatformToolset>
 55 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 56 |     <CharacterSet>NotSet</CharacterSet>
 57 |   </PropertyGroup>
 58 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 59 |   <ImportGroup Label="ExtensionSettings">
 60 |   </ImportGroup>
 61 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 62 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 63 |   </ImportGroup>
 64 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 65 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 66 |   </ImportGroup>
 67 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 68 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 69 |   </ImportGroup>
 70 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 71 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 72 |   </ImportGroup>
 73 |   <PropertyGroup Label="UserMacros" />
 74 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 75 |     <LinkIncremental>false</LinkIncremental>
 76 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 77 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 78 |   </PropertyGroup>
 79 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 80 |     <LinkIncremental>false</LinkIncremental>
 81 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 82 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 83 |   </PropertyGroup>
 84 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 85 |     <LinkIncremental>false</LinkIncremental>
 86 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 87 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 88 |   </PropertyGroup>
 89 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 90 |     <LinkIncremental>false</LinkIncremental>
 91 |     <OutDir>$(SolutionDir)\$(Configuration)\</OutDir>
 92 |     <IntDir>$(SolutionDir)\$(Configuration)\$(ProjectName)\</IntDir>
 93 |   </PropertyGroup>
 94 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 95 |     <ClCompile>
 96 |       <PrecompiledHeader>
 97 |       </PrecompiledHeader>
 98 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
 99 |       <Optimization>Disabled</Optimization>
100 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
101 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
102 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
103 |       <PrecompiledHeaderFile />
104 |       <PrecompiledHeaderOutputFile />
105 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
106 |       <CompileAs>Default</CompileAs>
107 |       <DisableSpecificWarnings>
108 |       </DisableSpecificWarnings>
109 |     </ClCompile>
110 |     <Link>
111 |       <SubSystem>Console</SubSystem>
112 |       <GenerateDebugInformation>true</GenerateDebugInformation>
113 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
114 |     </Link>
115 |   </ItemDefinitionGroup>
116 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
117 |     <ClCompile>
118 |       <PrecompiledHeader>
119 |       </PrecompiledHeader>
120 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
121 |       <Optimization>Disabled</Optimization>
122 |       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
123 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
124 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
125 |       <PrecompiledHeaderFile>
126 |       </PrecompiledHeaderFile>
127 |       <PrecompiledHeaderOutputFile>
128 |       </PrecompiledHeaderOutputFile>
129 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
130 |       <CompileAs>Default</CompileAs>
131 |       <DisableSpecificWarnings>
132 |       </DisableSpecificWarnings>
133 |     </ClCompile>
134 |     <Link>
135 |       <SubSystem>Console</SubSystem>
136 |       <GenerateDebugInformation>true</GenerateDebugInformation>
137 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
138 |     </Link>
139 |   </ItemDefinitionGroup>
140 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
141 |     <ClCompile>
142 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
143 |       <PrecompiledHeader>
144 |       </PrecompiledHeader>
145 |       <Optimization>MaxSpeed</Optimization>
146 |       <FunctionLevelLinking>true</FunctionLevelLinking>
147 |       <IntrinsicFunctions>true</IntrinsicFunctions>
148 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
149 |       <DisableSpecificWarnings>
150 |       </DisableSpecificWarnings>
151 |       <CompileAs>Default</CompileAs>
152 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
153 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
154 |     </ClCompile>
155 |     <Link>
156 |       <SubSystem>Console</SubSystem>
157 |       <GenerateDebugInformation>true</GenerateDebugInformation>
158 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
159 |       <OptimizeReferences>true</OptimizeReferences>
160 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
161 |       <OutputFile>..\$(TargetName)$(TargetExt)</OutputFile>
162 |     </Link>
163 |   </ItemDefinitionGroup>
164 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
165 |     <ClCompile>
166 |       <WarningLevel>TurnOffAllWarnings</WarningLevel>
167 |       <PrecompiledHeader>
168 |       </PrecompiledHeader>
169 |       <Optimization>MaxSpeed</Optimization>
170 |       <FunctionLevelLinking>true</FunctionLevelLinking>
171 |       <IntrinsicFunctions>true</IntrinsicFunctions>
172 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
173 |       <DisableSpecificWarnings>
174 |       </DisableSpecificWarnings>
175 |       <CompileAs>Default</CompileAs>
176 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
177 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
178 |     </ClCompile>
179 |     <Link>
180 |       <SubSystem>Console</SubSystem>
181 |       <GenerateDebugInformation>true</GenerateDebugInformation>
182 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
183 |       <OptimizeReferences>true</OptimizeReferences>
184 |       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
185 |       <OutputFile>..\$(TargetName)$(TargetExt)</OutputFile>
186 |     </Link>
187 |   </ItemDefinitionGroup>
188 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
189 |   <ImportGroup Label="ExtensionTargets">
190 |   </ImportGroup>
191 | </Project>


--------------------------------------------------------------------------------
/word2phrase.c:
--------------------------------------------------------------------------------
  1 | //  Copyright 2013 Google Inc. All Rights Reserved.
  2 | //
  3 | //  Licensed under the Apache License, Version 2.0 (the "License");
  4 | //  you may not use this file except in compliance with the License.
  5 | //  You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | //  Unless required by applicable law or agreed to in writing, software
 10 | //  distributed under the License is distributed on an "AS IS" BASIS,
 11 | //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | //  See the License for the specific language governing permissions and
 13 | //  limitations under the License.
 14 | 
 15 | #include <stdio.h>
 16 | #include <stdlib.h>
 17 | #include <string.h>
 18 | #include <math.h>
 19 | 
 20 | #define MAX_STRING 60
 21 | 
 22 | const int vocab_hash_size =
 23 |     500000000;  // Maximum 500M entries in the vocabulary
 24 | 
 25 | typedef float real;  // Precision of float numbers
 26 | 
 27 | struct vocab_word {
 28 |   long long cn;
 29 |   char *word;
 30 | };
 31 | 
 32 | char train_file[MAX_STRING], output_file[MAX_STRING];
 33 | struct vocab_word *vocab;
 34 | int debug_mode = 2, min_count = 5, *vocab_hash, min_reduce = 1;
 35 | long long vocab_max_size = 10000, vocab_size = 0;
 36 | long long train_words = 0;
 37 | real threshold = 100;
 38 | 
 39 | unsigned long long next_random = 1;
 40 | 
 41 | // Reads a single word from a file, assuming space + tab + EOL to be word
 42 | // boundaries
 43 | void ReadWord(char *word, FILE *fin) {
 44 |   int a = 0, ch;
 45 |   while (!feof(fin)) {
 46 |     ch = fgetc(fin);
 47 |     if (ch == 13) continue;
 48 |     if ((ch == ' ') || (ch == '\t') || (ch == '\n')) {
 49 |       if (a > 0) {
 50 |         if (ch == '\n') ungetc(ch, fin);
 51 |         break;
 52 |       }
 53 |       if (ch == '\n') {
 54 |         strcpy(word, (char *)"</s>");
 55 |         return;
 56 |       } else
 57 |         continue;
 58 |     }
 59 |     word[a] = ch;
 60 |     a++;
 61 |     if (a >= MAX_STRING - 1) a--;  // Truncate too long words
 62 |   }
 63 |   word[a] = 0;
 64 | }
 65 | 
 66 | // Returns hash value of a word
 67 | int GetWordHash(char *word) {
 68 |   unsigned long long a, hash = 1;
 69 |   for (a = 0; a < strlen(word); a++) hash = hash * 257 + word[a];
 70 |   hash = hash % vocab_hash_size;
 71 |   return hash;
 72 | }
 73 | 
 74 | // Returns position of a word in the vocabulary; if the word is not found,
 75 | // returns -1
 76 | int SearchVocab(char *word) {
 77 |   unsigned int hash = GetWordHash(word);
 78 |   while (1) {
 79 |     if (vocab_hash[hash] == -1) return -1;
 80 |     if (!strcmp(word, vocab[vocab_hash[hash]].word)) return vocab_hash[hash];
 81 |     hash = (hash + 1) % vocab_hash_size;
 82 |   }
 83 |   return -1;
 84 | }
 85 | 
 86 | // Reads a word and returns its index in the vocabulary
 87 | int ReadWordIndex(FILE *fin) {
 88 |   char word[MAX_STRING];
 89 |   ReadWord(word, fin);
 90 |   if (feof(fin)) return -1;
 91 |   return SearchVocab(word);
 92 | }
 93 | 
 94 | // Adds a word to the vocabulary
 95 | int AddWordToVocab(char *word) {
 96 |   unsigned int hash, length = strlen(word) + 1;
 97 |   if (length > MAX_STRING) length = MAX_STRING;
 98 |   vocab[vocab_size].word = (char *)calloc(length, sizeof(char));
 99 |   strcpy(vocab[vocab_size].word, word);
100 |   vocab[vocab_size].cn = 0;
101 |   vocab_size++;
102 |   // Reallocate memory if needed
103 |   if (vocab_size + 2 >= vocab_max_size) {
104 |     vocab_max_size += 10000;
105 |     vocab = (struct vocab_word *)realloc(
106 |         vocab, vocab_max_size * sizeof(struct vocab_word));
107 |   }
108 |   hash = GetWordHash(word);
109 |   while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
110 |   vocab_hash[hash] = vocab_size - 1;
111 |   return vocab_size - 1;
112 | }
113 | 
114 | // Used later for sorting by word counts
115 | int VocabCompare(const void *a, const void *b) {
116 |   return ((struct vocab_word *)b)->cn - ((struct vocab_word *)a)->cn;
117 | }
118 | 
119 | // Sorts the vocabulary by frequency using word counts
120 | void SortVocab() {
121 |   int a;
122 |   unsigned int hash;
123 |   // Sort the vocabulary and keep </s> at the first position
124 |   qsort(&vocab[1], vocab_size - 1, sizeof(struct vocab_word), VocabCompare);
125 |   for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
126 |   for (a = 0; a < vocab_size; a++) {
127 |     // Words occuring less than min_count times will be discarded from the vocab
128 |     if (vocab[a].cn < min_count) {
129 |       vocab_size--;
130 |       free(vocab[vocab_size].word);
131 |     } else {
132 |       // Hash will be re-computed, as after the sorting it is not actual
133 |       hash = GetWordHash(vocab[a].word);
134 |       while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
135 |       vocab_hash[hash] = a;
136 |     }
137 |   }
138 |   vocab = (struct vocab_word *)realloc(vocab,
139 |                                        vocab_size * sizeof(struct vocab_word));
140 | }
141 | 
142 | // Reduces the vocabulary by removing infrequent tokens
143 | void ReduceVocab() {
144 |   int a, b = 0;
145 |   unsigned int hash;
146 |   for (a = 0; a < vocab_size; a++)
147 |     if (vocab[a].cn > min_reduce) {
148 |       vocab[b].cn = vocab[a].cn;
149 |       vocab[b].word = vocab[a].word;
150 |       b++;
151 |     } else
152 |       free(vocab[a].word);
153 |   vocab_size = b;
154 |   for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
155 |   for (a = 0; a < vocab_size; a++) {
156 |     // Hash will be re-computed, as it is not actual
157 |     hash = GetWordHash(vocab[a].word);
158 |     while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
159 |     vocab_hash[hash] = a;
160 |   }
161 |   fflush(stdout);
162 |   min_reduce++;
163 | }
164 | 
165 | void LearnVocabFromTrainFile() {
166 |   char word[MAX_STRING], last_word[MAX_STRING], bigram_word[MAX_STRING * 2];
167 |   FILE *fin;
168 |   long long a, i, start = 1;
169 |   for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
170 |   fin = fopen(train_file, "rb");
171 |   if (fin == NULL) {
172 |     printf("ERROR: training data file not found!\n");
173 |     exit(1);
174 |   }
175 |   vocab_size = 0;
176 |   AddWordToVocab((char *)"</s>");
177 |   while (1) {
178 |     ReadWord(word, fin);
179 |     if (feof(fin)) break;
180 |     if (!strcmp(word, "</s>")) {
181 |       start = 1;
182 |       continue;
183 |     } else
184 |       start = 0;
185 |     train_words++;
186 |     if ((debug_mode > 1) && (train_words % 100000 == 0)) {
187 |       printf("Words processed: %lldK     Vocab size: %lldK  %c",
188 |              train_words / 1000, vocab_size / 1000, 13);
189 |       fflush(stdout);
190 |     }
191 |     i = SearchVocab(word);
192 |     if (i == -1) {
193 |       a = AddWordToVocab(word);
194 |       vocab[a].cn = 1;
195 |     } else
196 |       vocab[i].cn++;
197 |     if (start) continue;
198 |     sprintf(bigram_word, "%s_%s", last_word, word);
199 |     bigram_word[MAX_STRING - 1] = 0;
200 |     strcpy(last_word, word);
201 |     i = SearchVocab(bigram_word);
202 |     if (i == -1) {
203 |       a = AddWordToVocab(bigram_word);
204 |       vocab[a].cn = 1;
205 |     } else
206 |       vocab[i].cn++;
207 |     if (vocab_size > vocab_hash_size * 0.7) ReduceVocab();
208 |   }
209 |   SortVocab();
210 |   if (debug_mode > 0) {
211 |     printf("\nVocab size (unigrams + bigrams): %lld\n", vocab_size);
212 |     printf("Words in train file: %lld\n", train_words);
213 |   }
214 |   fclose(fin);
215 | }
216 | 
217 | void TrainModel() {
218 |   long long pa = 0, pb = 0, pab = 0, oov, i, li = -1, cn = 0;
219 |   char word[MAX_STRING], last_word[MAX_STRING], bigram_word[MAX_STRING * 2];
220 |   real score;
221 |   FILE *fo, *fin;
222 |   printf("Starting training using file %s\n", train_file);
223 |   LearnVocabFromTrainFile();
224 |   fin = fopen(train_file, "rb");
225 |   fo = fopen(output_file, "wb");
226 |   word[0] = 0;
227 |   while (1) {
228 |     strcpy(last_word, word);
229 |     ReadWord(word, fin);
230 |     if (feof(fin)) break;
231 |     if (!strcmp(word, "</s>")) {
232 |       fprintf(fo, "\n");
233 |       continue;
234 |     }
235 |     cn++;
236 |     if ((debug_mode > 1) && (cn % 100000 == 0)) {
237 |       printf("Words written: %lldK%c", cn / 1000, 13);
238 |       fflush(stdout);
239 |     }
240 |     oov = 0;
241 |     i = SearchVocab(word);
242 |     if (i == -1)
243 |       oov = 1;
244 |     else
245 |       pb = vocab[i].cn;
246 |     if (li == -1) oov = 1;
247 |     li = i;
248 |     sprintf(bigram_word, "%s_%s", last_word, word);
249 |     bigram_word[MAX_STRING - 1] = 0;
250 |     i = SearchVocab(bigram_word);
251 |     if (i == -1)
252 |       oov = 1;
253 |     else
254 |       pab = vocab[i].cn;
255 |     if (pa < min_count) oov = 1;
256 |     if (pb < min_count) oov = 1;
257 |     if (oov)
258 |       score = 0;
259 |     else
260 |       score = (pab - min_count) / (real)pa / (real)pb * (real)train_words;
261 |     if (score > threshold) {
262 |       fprintf(fo, "_%s", word);
263 |       pb = 0;
264 |     } else
265 |       fprintf(fo, " %s", word);
266 |     pa = pb;
267 |   }
268 |   fclose(fo);
269 |   fclose(fin);
270 | }
271 | 
272 | int ArgPos(char *str, int argc, char **argv) {
273 |   int a;
274 |   for (a = 1; a < argc; a++)
275 |     if (!strcmp(str, argv[a])) {
276 |       if (a == argc - 1) {
277 |         printf("Argument missing for %s\n", str);
278 |         exit(1);
279 |       }
280 |       return a;
281 |     }
282 |   return -1;
283 | }
284 | 
285 | int main(int argc, char **argv) {
286 |   int i;
287 |   if (argc == 1) {
288 |     printf("WORD2PHRASE tool v0.1a\n\n");
289 |     printf("Options:\n");
290 |     printf("Parameters for training:\n");
291 |     printf("\t-train <file>\n");
292 |     printf("\t\tUse text data from <file> to train the model\n");
293 |     printf("\t-output <file>\n");
294 |     printf(
295 |         "\t\tUse <file> to save the resulting word vectors / word clusters / "
296 |         "phrases\n");
297 |     printf("\t-min-count <int>\n");
298 |     printf(
299 |         "\t\tThis will discard words that appear less than <int> times; "
300 |         "default is 5\n");
301 |     printf("\t-threshold <float>\n");
302 |     printf(
303 |         "\t\t The <float> value represents threshold for forming the phrases "
304 |         "(higher means less phrases); default 100\n");
305 |     printf("\t-debug <int>\n");
306 |     printf(
307 |         "\t\tSet the debug mode (default = 2 = more info during training)\n");
308 |     printf("\nExamples:\n");
309 |     printf(
310 |         "./word2phrase -train text.txt -output phrases.txt -threshold 100 "
311 |         "-debug 2\n\n");
312 |     return 0;
313 |   }
314 |   if ((i = ArgPos((char *)"-train", argc, argv)) > 0)
315 |     strcpy(train_file, argv[i + 1]);
316 |   if ((i = ArgPos((char *)"-debug", argc, argv)) > 0)
317 |     debug_mode = atoi(argv[i + 1]);
318 |   if ((i = ArgPos((char *)"-output", argc, argv)) > 0)
319 |     strcpy(output_file, argv[i + 1]);
320 |   if ((i = ArgPos((char *)"-min-count", argc, argv)) > 0)
321 |     min_count = atoi(argv[i + 1]);
322 |   if ((i = ArgPos((char *)"-threshold", argc, argv)) > 0)
323 |     threshold = atof(argv[i + 1]);
324 |   vocab =
325 |       (struct vocab_word *)calloc(vocab_max_size, sizeof(struct vocab_word));
326 |   vocab_hash = (int *)calloc(vocab_hash_size, sizeof(int));
327 |   TrainModel();
328 |   return 0;
329 | }
330 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/word2vec-doc2vec.c:
--------------------------------------------------------------------------------
  1 | //  Copyright 2013 Google Inc. All Rights Reserved.
  2 | //
  3 | //  Licensed under the Apache License, Version 2.0 (the "License");
  4 | //  you may not use this file except in compliance with the License.
  5 | //  You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | //  Unless required by applicable law or agreed to in writing, software
 10 | //  distributed under the License is distributed on an "AS IS" BASIS,
 11 | //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | //  See the License for the specific language governing permissions and
 13 | //  limitations under the License.
 14 | 
 15 | #include <stdio.h>
 16 | #include <stdlib.h>
 17 | #include <string.h>
 18 | #include <math.h>
 19 | #include <time.h>
 20 | #if defined _WIN32
 21 | #include "win32-port.h"
 22 | #else
 23 | #include <pthread.h>
 24 | #endif
 25 | 
 26 | #define MAX_STRING 100
 27 | #define EXP_TABLE_SIZE 1000
 28 | #define MAX_EXP 6
 29 | #define MAX_SENTENCE_LENGTH 1000
 30 | #define MAX_CODE_LENGTH 40
 31 | 
 32 | const int vocab_hash_size =
 33 |     30000000;  // Maximum 30 * 0.7 = 21M words in the vocabulary
 34 | 
 35 | typedef float real;  // Precision of float numbers
 36 | 
 37 | struct vocab_word {
 38 |   long long cn;
 39 |   int *point;
 40 |   char *word, *code, codelen;
 41 | };
 42 | 
 43 | char train_file[MAX_STRING], output_file[MAX_STRING];
 44 | char save_vocab_file[MAX_STRING], read_vocab_file[MAX_STRING];
 45 | struct vocab_word *vocab;
 46 | int binary = 0, cbow = 1, debug_mode = 2, window = 5, min_count = 5,
 47 |     num_threads = 12, min_reduce = 1;
 48 | int *vocab_hash;
 49 | long long vocab_max_size = 1000, vocab_size = 0, layer1_size = 100;
 50 | long long sentence_vectors = 0;
 51 | long long train_words = 0, word_count_actual = 0, iter = 5, file_size = 0,
 52 |           classes = 0;
 53 | real alpha = 0.025, starting_alpha, sample = 1e-3;
 54 | real *syn0, *syn1, *syn1neg, *expTable;
 55 | clock_t start;
 56 | 
 57 | int hs = 0, negative = 5;
 58 | const int table_size = 1e8;
 59 | int *table;
 60 | 
 61 | void InitUnigramTable() {
 62 |   int a, i;
 63 |   double train_words_pow = 0;
 64 |   double d1, power = 0.75;
 65 |   table = (int *)malloc(table_size * sizeof(int));
 66 |   for (a = 1; a < vocab_size; a++) train_words_pow += pow(vocab[a].cn, power);
 67 |   i = 1;
 68 |   d1 = pow(vocab[i].cn, power) / train_words_pow;
 69 |   for (a = 0; a < table_size; a++) {
 70 |     table[a] = i;
 71 |     if (a / (double)table_size > d1) {
 72 |       i++;
 73 |       d1 += pow(vocab[i].cn, power) / train_words_pow;
 74 |     }
 75 |     if (i >= vocab_size) i = vocab_size - 1;
 76 |   }
 77 | }
 78 | 
 79 | // Reads a single word from a file, assuming space + tab + EOL to be word
 80 | // boundaries
 81 | void ReadWord(char *word, FILE *fin) {
 82 |   int a = 0, ch;
 83 |   while (!feof(fin)) {
 84 |     ch = fgetc(fin);
 85 |     if (ch == 13) continue;
 86 |     if ((ch == ' ') || (ch == '\t') || (ch == '\n')) {
 87 |       if (a > 0) {
 88 |         if (ch == '\n') ungetc(ch, fin);
 89 |         break;
 90 |       }
 91 |       if (ch == '\n') {
 92 |         strcpy(word, (char *)"</s>");
 93 |         return;
 94 |       } else
 95 |         continue;
 96 |     }
 97 |     word[a] = ch;
 98 |     a++;
 99 |     if (a >= MAX_STRING - 1) a--;  // Truncate too long words
100 |   }
101 |   word[a] = 0;
102 | }
103 | 
104 | // Returns hash value of a word
105 | int GetWordHash(char *word) {
106 |   unsigned long long a, hash = 0;
107 |   for (a = 0; a < strlen(word); a++) hash = hash * 257 + word[a];
108 |   hash = hash % vocab_hash_size;
109 |   return hash;
110 | }
111 | 
112 | // Returns position of a word in the vocabulary; if the word is not found,
113 | // returns -1
114 | int SearchVocab(char *word) {
115 |   unsigned int hash = GetWordHash(word);
116 |   while (1) {
117 |     if (vocab_hash[hash] == -1) return -1;
118 |     if (!strcmp(word, vocab[vocab_hash[hash]].word)) return vocab_hash[hash];
119 |     hash = (hash + 1) % vocab_hash_size;
120 |   }
121 |   return -1;
122 | }
123 | 
124 | // Reads a word and returns its index in the vocabulary
125 | int ReadWordIndex(FILE *fin) {
126 |   char word[MAX_STRING];
127 |   ReadWord(word, fin);
128 |   if (feof(fin)) return -1;
129 |   return SearchVocab(word);
130 | }
131 | 
132 | // Adds a word to the vocabulary
133 | int AddWordToVocab(char *word) {
134 |   unsigned int hash, length = strlen(word) + 1;
135 |   if (length > MAX_STRING) length = MAX_STRING;
136 |   vocab[vocab_size].word = (char *)calloc(length, sizeof(char));
137 |   strcpy(vocab[vocab_size].word, word);
138 |   vocab[vocab_size].cn = 0;
139 |   vocab_size++;
140 |   // Reallocate memory if needed
141 |   if (vocab_size + 2 >= vocab_max_size) {
142 |     vocab_max_size += 1000;
143 |     vocab = (struct vocab_word *)realloc(
144 |         vocab, vocab_max_size * sizeof(struct vocab_word));
145 |   }
146 |   hash = GetWordHash(word);
147 |   while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
148 |   vocab_hash[hash] = vocab_size - 1;
149 |   return vocab_size - 1;
150 | }
151 | 
152 | // Used later for sorting by word counts
153 | int VocabCompare(const void *a, const void *b) {
154 |   return ((struct vocab_word *)b)->cn - ((struct vocab_word *)a)->cn;
155 | }
156 | 
157 | // Sorts the vocabulary by frequency using word counts
158 | void SortVocab() {
159 |   int a, size;
160 |   unsigned int hash;
161 |   // Sort the vocabulary and keep </s> at the first position
162 |   qsort(&vocab[1], vocab_size - 1, sizeof(struct vocab_word), VocabCompare);
163 |   for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
164 |   size = vocab_size;
165 |   train_words = 0;
166 |   for (a = 0; a < size; a++) {
167 |     // Words occuring less than min_count times will be discarded from the vocab
168 |     if ((vocab[a].cn < min_count) && (a != 0)) {
169 |       vocab_size--;
170 |       free(vocab[a].word);
171 |     } else {
172 |       // Hash will be re-computed, as after the sorting it is not actual
173 |       hash = GetWordHash(vocab[a].word);
174 |       while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
175 |       vocab_hash[hash] = a;
176 |       train_words += vocab[a].cn;
177 |     }
178 |   }
179 |   vocab = (struct vocab_word *)realloc(
180 |       vocab, (vocab_size + 1) * sizeof(struct vocab_word));
181 |   // Allocate memory for the binary tree construction
182 |   for (a = 0; a < vocab_size; a++) {
183 |     vocab[a].code = (char *)calloc(MAX_CODE_LENGTH, sizeof(char));
184 |     vocab[a].point = (int *)calloc(MAX_CODE_LENGTH, sizeof(int));
185 |   }
186 | }
187 | 
188 | // Reduces the vocabulary by removing infrequent tokens
189 | void ReduceVocab() {
190 |   int a, b = 0;
191 |   unsigned int hash;
192 |   for (a = 0; a < vocab_size; a++)
193 |     if (vocab[a].cn > min_reduce) {
194 |       vocab[b].cn = vocab[a].cn;
195 |       vocab[b].word = vocab[a].word;
196 |       b++;
197 |     } else
198 |       free(vocab[a].word);
199 |   vocab_size = b;
200 |   for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
201 |   for (a = 0; a < vocab_size; a++) {
202 |     // Hash will be re-computed, as it is not actual
203 |     hash = GetWordHash(vocab[a].word);
204 |     while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
205 |     vocab_hash[hash] = a;
206 |   }
207 |   fflush(stdout);
208 |   min_reduce++;
209 | }
210 | 
211 | // Create binary Huffman tree using the word counts
212 | // Frequent words will have short uniqe binary codes
213 | void CreateBinaryTree() {
214 |   long long a, b, i, min1i, min2i, pos1, pos2, point[MAX_CODE_LENGTH];
215 |   char code[MAX_CODE_LENGTH];
216 |   long long *count = (long long *)calloc(vocab_size * 2 + 1, sizeof(long long));
217 |   long long *binary =
218 |       (long long *)calloc(vocab_size * 2 + 1, sizeof(long long));
219 |   long long *parent_node =
220 |       (long long *)calloc(vocab_size * 2 + 1, sizeof(long long));
221 |   for (a = 0; a < vocab_size; a++) count[a] = vocab[a].cn;
222 |   for (a = vocab_size; a < vocab_size * 2; a++) count[a] = 1e15;
223 |   pos1 = vocab_size - 1;
224 |   pos2 = vocab_size;
225 |   // Following algorithm constructs the Huffman tree by adding one node at a
226 |   // time
227 |   for (a = 0; a < vocab_size - 1; a++) {
228 |     // First, find two smallest nodes 'min1, min2'
229 |     if (pos1 >= 0) {
230 |       if (count[pos1] < count[pos2]) {
231 |         min1i = pos1;
232 |         pos1--;
233 |       } else {
234 |         min1i = pos2;
235 |         pos2++;
236 |       }
237 |     } else {
238 |       min1i = pos2;
239 |       pos2++;
240 |     }
241 |     if (pos1 >= 0) {
242 |       if (count[pos1] < count[pos2]) {
243 |         min2i = pos1;
244 |         pos1--;
245 |       } else {
246 |         min2i = pos2;
247 |         pos2++;
248 |       }
249 |     } else {
250 |       min2i = pos2;
251 |       pos2++;
252 |     }
253 |     count[vocab_size + a] = count[min1i] + count[min2i];
254 |     parent_node[min1i] = vocab_size + a;
255 |     parent_node[min2i] = vocab_size + a;
256 |     binary[min2i] = 1;
257 |   }
258 |   // Now assign binary code to each vocabulary word
259 |   for (a = 0; a < vocab_size; a++) {
260 |     b = a;
261 |     i = 0;
262 |     while (1) {
263 |       code[i] = binary[b];
264 |       point[i] = b;
265 |       i++;
266 |       b = parent_node[b];
267 |       if (b == vocab_size * 2 - 2) break;
268 |     }
269 |     vocab[a].codelen = i;
270 |     vocab[a].point[0] = vocab_size - 2;
271 |     for (b = 0; b < i; b++) {
272 |       vocab[a].code[i - b - 1] = code[b];
273 |       vocab[a].point[i - b] = point[b] - vocab_size;
274 |     }
275 |   }
276 |   free(count);
277 |   free(binary);
278 |   free(parent_node);
279 | }
280 | 
281 | void LearnVocabFromTrainFile() {
282 |   char word[MAX_STRING];
283 |   FILE *fin;
284 |   long long a, i;
285 |   for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
286 |   fin = fopen(train_file, "rb");
287 |   if (fin == NULL) {
288 |     printf("ERROR: training data file not found!\n");
289 |     exit(1);
290 |   }
291 |   vocab_size = 0;
292 |   AddWordToVocab((char *)"</s>");
293 |   while (1) {
294 |     ReadWord(word, fin);
295 |     if (feof(fin)) break;
296 |     train_words++;
297 |     if ((debug_mode > 1) && (train_words % 100000 == 0)) {
298 |       printf("%lldK%c", train_words / 1000, 13);
299 |       fflush(stdout);
300 |     }
301 |     i = SearchVocab(word);
302 |     if (i == -1) {
303 |       a = AddWordToVocab(word);
304 |       vocab[a].cn = 1;
305 |     } else
306 |       vocab[i].cn++;
307 |     if (vocab_size > vocab_hash_size * 0.7) ReduceVocab();
308 |   }
309 |   SortVocab();
310 |   if (debug_mode > 0) {
311 |     printf("Vocab size: %lld\n", vocab_size);
312 |     printf("Words in train file: %lld\n", train_words);
313 |   }
314 |   file_size = ftell(fin);
315 |   fclose(fin);
316 | }
317 | 
318 | void SaveVocab() {
319 |   long long i;
320 |   FILE *fo = fopen(save_vocab_file, "wb");
321 |   for (i = 0; i < vocab_size; i++)
322 |     fprintf(fo, "%s %lld\n", vocab[i].word, vocab[i].cn);
323 |   fclose(fo);
324 | }
325 | 
326 | void ReadVocab() {
327 |   long long a, i = 0;
328 |   char c;
329 |   char word[MAX_STRING];
330 |   FILE *fin = fopen(read_vocab_file, "rb");
331 |   if (fin == NULL) {
332 |     printf("Vocabulary file not found\n");
333 |     exit(1);
334 |   }
335 |   for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
336 |   vocab_size = 0;
337 |   while (1) {
338 |     ReadWord(word, fin);
339 |     if (feof(fin)) break;
340 |     a = AddWordToVocab(word);
341 |     fscanf(fin, "%lld%c", &vocab[a].cn, &c);
342 |     i++;
343 |   }
344 |   SortVocab();
345 |   if (debug_mode > 0) {
346 |     printf("Vocab size: %lld\n", vocab_size);
347 |     printf("Words in train file: %lld\n", train_words);
348 |   }
349 |   fin = fopen(train_file, "rb");
350 |   if (fin == NULL) {
351 |     printf("ERROR: training data file not found!\n");
352 |     exit(1);
353 |   }
354 |   fseek(fin, 0, SEEK_END);
355 |   file_size = ftell(fin);
356 |   fclose(fin);
357 | }
358 | 
359 | void InitNet() {
360 |   long long a, b;
361 |   unsigned long long next_random = 1;
362 |   a = posix_memalign((void **)&syn0, 128,
363 |                      (long long)vocab_size * layer1_size * sizeof(real));
364 |   if (syn0 == NULL) {
365 |     printf("Memory allocation failed\n");
366 |     exit(1);
367 |   }
368 |   if (hs) {
369 |     a = posix_memalign((void **)&syn1, 128,
370 |                        (long long)vocab_size * layer1_size * sizeof(real));
371 |     if (syn1 == NULL) {
372 |       printf("Memory allocation failed\n");
373 |       exit(1);
374 |     }
375 |     for (a = 0; a < vocab_size; a++)
376 |       for (b = 0; b < layer1_size; b++) syn1[a * layer1_size + b] = 0;
377 |   }
378 |   if (negative > 0) {
379 |     a = posix_memalign((void **)&syn1neg, 128,
380 |                        (long long)vocab_size * layer1_size * sizeof(real));
381 |     if (syn1neg == NULL) {
382 |       printf("Memory allocation failed\n");
383 |       exit(1);
384 |     }
385 |     for (a = 0; a < vocab_size; a++)
386 |       for (b = 0; b < layer1_size; b++) syn1neg[a * layer1_size + b] = 0;
387 |   }
388 |   for (a = 0; a < vocab_size; a++)
389 |     for (b = 0; b < layer1_size; b++) {
390 |       next_random = next_random * (unsigned long long)25214903917 + 11;
391 |       syn0[a * layer1_size + b] =
392 |           (((next_random & 0xFFFF) / (real)65536) - 0.5) / layer1_size;
393 |     }
394 |   CreateBinaryTree();
395 | }
396 | 
397 | void *TrainModelThread(void *id) {
398 |   long long a, b, d, cw, word, last_word, sentence_length = 0,
399 |                                           sentence_position = 0;
400 |   long long word_count = 0, last_word_count = 0, sen[MAX_SENTENCE_LENGTH + 1];
401 |   long long l1, l2, c, target, label, local_iter = iter;
402 |   unsigned long long next_random = (long long)id;
403 |   real f, g;
404 |   clock_t now;
405 |   real *neu1 = (real *)calloc(layer1_size, sizeof(real));
406 |   real *neu1e = (real *)calloc(layer1_size, sizeof(real));
407 |   FILE *fi = fopen(train_file, "rb");
408 |   fseek(fi, file_size / (long long)num_threads * (long long)id, SEEK_SET);
409 |   while (1) {
410 |     if (word_count - last_word_count > 10000) {
411 |       word_count_actual += word_count - last_word_count;
412 |       last_word_count = word_count;
413 |       if ((debug_mode > 1)) {
414 |         now = clock();
415 |         printf("%cAlpha: %f  Progress: %.2f%%  Words/thread/sec: %.2fk  ", 13,
416 |                alpha, word_count_actual / (real)(iter * train_words + 1) * 100,
417 |                word_count_actual /
418 |                    ((real)(now - start + 1) / (real)CLOCKS_PER_SEC * 1000));
419 |         fflush(stdout);
420 |       }
421 |       alpha = starting_alpha *
422 |               (1 - word_count_actual / (real)(iter * train_words + 1));
423 |       if (alpha < starting_alpha * 0.0001) alpha = starting_alpha * 0.0001;
424 |     }
425 |     if (sentence_length == 0) {
426 |       while (1) {
427 |         word = ReadWordIndex(fi);
428 |         if (feof(fi)) break;
429 |         if (word == -1) continue;
430 |         word_count++;
431 |         if (word == 0) break;
432 |         // The subsampling randomly discards frequent words while keeping the
433 |         // ranking same
434 |         if (sample > 0) {
435 |           real ran = (sqrt(vocab[word].cn / (sample * train_words)) + 1) *
436 |                      (sample * train_words) / vocab[word].cn;
437 |           next_random = next_random * (unsigned long long)25214903917 + 11;
438 |           if (ran < (next_random & 0xFFFF) / (real)65536) continue;
439 |         }
440 |         sen[sentence_length] = word;
441 |         sentence_length++;
442 |         if (sentence_length >= MAX_SENTENCE_LENGTH) break;
443 |       }
444 |       sentence_position = 0;
445 |     }
446 |     if (feof(fi) || (word_count > train_words / num_threads)) {
447 |       word_count_actual += word_count - last_word_count;
448 |       local_iter--;
449 |       if (local_iter == 0) break;
450 |       word_count = 0;
451 |       last_word_count = 0;
452 |       sentence_length = 0;
453 |       fseek(fi, file_size / (long long)num_threads * (long long)id, SEEK_SET);
454 |       continue;
455 |     }
456 |     word = sen[sentence_position];
457 |     if (word == -1) continue;
458 |     for (c = 0; c < layer1_size; c++) neu1[c] = 0;
459 |     for (c = 0; c < layer1_size; c++) neu1e[c] = 0;
460 |     next_random = next_random * (unsigned long long)25214903917 + 11;
461 |     b = next_random % window;
462 |     if (cbow) {  // train the cbow architecture
463 |       // in -> hidden
464 |       cw = 0;
465 |       for (a = b; a < window * 1 + 1 - b; a++)
466 |         if (a != window) {
467 |           c = sentence_position - window + a;
468 |           if (c < 0) continue;
469 |           if (c >= sentence_length) continue;
470 |           if (sentence_vectors && (c == 0)) continue;
471 |           last_word = sen[c];
472 |           if (last_word == -1) continue;
473 |           for (c = 0; c < layer1_size; c++)
474 |             neu1[c] += syn0[c + last_word * layer1_size];
475 |           cw++;
476 |         }
477 |       if (sentence_vectors) {
478 |         last_word = sen[0];
479 |         if (last_word == -1) continue;
480 |         for (c = 0; c < layer1_size; c++)
481 |           neu1[c] += syn0[c + last_word * layer1_size];
482 |         cw++;
483 |       }
484 |       if (cw) {
485 |         for (c = 0; c < layer1_size; c++) neu1[c] /= cw;
486 |         if (hs)
487 |           for (d = 0; d < vocab[word].codelen; d++) {
488 |             f = 0;
489 |             l2 = vocab[word].point[d] * layer1_size;
490 |             // Propagate hidden -> output
491 |             for (c = 0; c < layer1_size; c++) f += neu1[c] * syn1[c + l2];
492 |             if (f <= -MAX_EXP)
493 |               continue;
494 |             else if (f >= MAX_EXP)
495 |               continue;
496 |             else
497 |               f = expTable[(int)((f + MAX_EXP) *
498 |                                  (EXP_TABLE_SIZE / MAX_EXP / 2))];
499 |             // 'g' is the gradient multiplied by the learning rate
500 |             g = (1 - vocab[word].code[d] - f) * alpha;
501 |             // Propagate errors output -> hidden
502 |             for (c = 0; c < layer1_size; c++) neu1e[c] += g * syn1[c + l2];
503 |             // Learn weights hidden -> output
504 |             for (c = 0; c < layer1_size; c++) syn1[c + l2] += g * neu1[c];
505 |           }
506 |         // NEGATIVE SAMPLING
507 |         if (negative > 0)
508 |           for (d = 0; d < negative + 1; d++) {
509 |             if (d == 0) {
510 |               target = word;
511 |               label = 1;
512 |             } else {
513 |               next_random = next_random * (unsigned long long)25214903917 + 11;
514 |               target = table[(next_random >> 16) % table_size];
515 |               if (target == word) continue;
516 |               label = 0;
517 |             }
518 |             l2 = target * layer1_size;
519 |             f = 0;
520 |             for (c = 0; c < layer1_size; c++) f += neu1[c] * syn1neg[c + l2];
521 |             if (f > MAX_EXP)
522 |               g = (label - 1) * alpha;
523 |             else if (f < -MAX_EXP)
524 |               g = (label - 0) * alpha;
525 |             else
526 |               g = (label - expTable[(int)((f + MAX_EXP) *
527 |                                           (EXP_TABLE_SIZE / MAX_EXP / 2))]) *
528 |                   alpha;
529 |             for (c = 0; c < layer1_size; c++) neu1e[c] += g * syn1neg[c + l2];
530 |             for (c = 0; c < layer1_size; c++) syn1neg[c + l2] += g * neu1[c];
531 |           }
532 |         // hidden -> in
533 |         for (a = b; a < window * 1 + 1 - b; a++)
534 |           if (a != window) {
535 |             c = sentence_position - window + a;
536 |             if (c < 0) continue;
537 |             if (c >= sentence_length) continue;
538 |             if (sentence_vectors && (c == 0)) continue;
539 |             last_word = sen[c];
540 |             if (last_word == -1) continue;
541 |             for (c = 0; c < layer1_size; c++)
542 |               syn0[c + last_word * layer1_size] += neu1e[c];
543 |           }
544 |         if (sentence_vectors) {
545 |           last_word = sen[0];
546 |           if (last_word == -1) continue;
547 |           for (c = 0; c < layer1_size; c++)
548 |             syn0[c + last_word * layer1_size] += neu1e[c];
549 |         }
550 |       }
551 |     } else {  // train skip-gram
552 |       for (a = b; a < window * 2 + 1 + sentence_vectors - b; a++)
553 |         if (a != window) {
554 |           c = sentence_position - window + a;
555 |           if (sentence_vectors)
556 |             if (a >= window * 2 + sentence_vectors - b) c = 0;
557 |           if (c < 0) continue;
558 |           if (c >= sentence_length) continue;
559 |           last_word = sen[c];
560 |           if (last_word == -1) continue;
561 |           l1 = last_word * layer1_size;
562 |           for (c = 0; c < layer1_size; c++) neu1e[c] = 0;
563 |           // HIERARCHICAL SOFTMAX
564 |           if (hs)
565 |             for (d = 0; d < vocab[word].codelen; d++) {
566 |               f = 0;
567 |               l2 = vocab[word].point[d] * layer1_size;
568 |               // Propagate hidden -> output
569 |               for (c = 0; c < layer1_size; c++)
570 |                 f += syn0[c + l1] * syn1[c + l2];
571 |               if (f <= -MAX_EXP)
572 |                 continue;
573 |               else if (f >= MAX_EXP)
574 |                 continue;
575 |               else
576 |                 f = expTable[(int)((f + MAX_EXP) *
577 |                                    (EXP_TABLE_SIZE / MAX_EXP / 2))];
578 |               // 'g' is the gradient multiplied by the learning rate
579 |               g = (1 - vocab[word].code[d] - f) * alpha;
580 |               // Propagate errors output -> hidden
581 |               for (c = 0; c < layer1_size; c++) neu1e[c] += g * syn1[c + l2];
582 |               // Learn weights hidden -> output
583 |               for (c = 0; c < layer1_size; c++)
584 |                 syn1[c + l2] += g * syn0[c + l1];
585 |             }
586 |           // NEGATIVE SAMPLING
587 |           if (negative > 0)
588 |             for (d = 0; d < negative + 1; d++) {
589 |               if (d == 0) {
590 |                 target = word;
591 |                 label = 1;
592 |               } else {
593 |                 next_random =
594 |                     next_random * (unsigned long long)25214903917 + 11;
595 |                 target = table[(next_random >> 16) % table_size];
596 |                 if (target == word) continue;
597 |                 label = 0;
598 |               }
599 |               l2 = target * layer1_size;
600 |               f = 0;
601 |               for (c = 0; c < layer1_size; c++)
602 |                 f += syn0[c + l1] * syn1neg[c + l2];
603 |               if (f > MAX_EXP)
604 |                 g = (label - 1) * alpha;
605 |               else if (f < -MAX_EXP)
606 |                 g = (label - 0) * alpha;
607 |               else
608 |                 g = (label - expTable[(int)((f + MAX_EXP) *
609 |                                             (EXP_TABLE_SIZE / MAX_EXP / 2))]) *
610 |                     alpha;
611 |               for (c = 0; c < layer1_size; c++) neu1e[c] += g * syn1neg[c + l2];
612 |               for (c = 0; c < layer1_size; c++)
613 |                 syn1neg[c + l2] += g * syn0[c + l1];
614 |             }
615 |           // Learn weights input -> hidden
616 |           for (c = 0; c < layer1_size; c++) syn0[c + l1] += neu1e[c];
617 |         }
618 |     }
619 |     sentence_position++;
620 |     if (sentence_position >= sentence_length) {
621 |       sentence_length = 0;
622 |       continue;
623 |     }
624 |   }
625 |   fclose(fi);
626 |   free(neu1);
627 |   free(neu1e);
628 |   pthread_exit(NULL);
629 | }
630 | 
631 | void TrainModel() {
632 |   long a, b, c, d;
633 |   FILE *fo;
634 |   pthread_t *pt = (pthread_t *)malloc(num_threads * sizeof(pthread_t));
635 |   printf("Starting training using file %s\n", train_file);
636 |   starting_alpha = alpha;
637 |   if (read_vocab_file[0] != 0)
638 |     ReadVocab();
639 |   else
640 |     LearnVocabFromTrainFile();
641 |   if (save_vocab_file[0] != 0) SaveVocab();
642 |   if (output_file[0] == 0) return;
643 |   InitNet();
644 |   if (negative > 0) InitUnigramTable();
645 |   start = clock();
646 |   for (a = 0; a < num_threads; a++)
647 |     pthread_create(&pt[a], NULL, TrainModelThread, (void *)a);
648 |   for (a = 0; a < num_threads; a++) pthread_join(pt[a], NULL);
649 |   fo = fopen(output_file, "wb");
650 |   if (classes == 0) {
651 |     // Save the word vectors
652 |     fprintf(fo, "%lld %lld\n", vocab_size, layer1_size);
653 |     for (a = 0; a < vocab_size; a++) {
654 |       fprintf(fo, "%s ", vocab[a].word);
655 |       if (binary)
656 |         for (b = 0; b < layer1_size; b++)
657 |           fwrite(&syn0[a * layer1_size + b], sizeof(real), 1, fo);
658 |       else
659 |         for (b = 0; b < layer1_size; b++)
660 |           fprintf(fo, "%lf ", syn0[a * layer1_size + b]);
661 |       fprintf(fo, "\n");
662 |     }
663 |   } else {
664 |     // Run K-means on the word vectors
665 |     int clcn = classes, iter = 10, closeid;
666 |     int *centcn = (int *)malloc(classes * sizeof(int));
667 |     int *cl = (int *)calloc(vocab_size, sizeof(int));
668 |     real closev, x;
669 |     real *cent = (real *)calloc(classes * layer1_size, sizeof(real));
670 |     for (a = 0; a < vocab_size; a++) cl[a] = a % clcn;
671 |     for (a = 0; a < iter; a++) {
672 |       for (b = 0; b < clcn * layer1_size; b++) cent[b] = 0;
673 |       for (b = 0; b < clcn; b++) centcn[b] = 1;
674 |       for (c = 0; c < vocab_size; c++) {
675 |         for (d = 0; d < layer1_size; d++)
676 |           cent[layer1_size * cl[c] + d] += syn0[c * layer1_size + d];
677 |         centcn[cl[c]]++;
678 |       }
679 |       for (b = 0; b < clcn; b++) {
680 |         closev = 0;
681 |         for (c = 0; c < layer1_size; c++) {
682 |           cent[layer1_size * b + c] /= centcn[b];
683 |           closev += cent[layer1_size * b + c] * cent[layer1_size * b + c];
684 |         }
685 |         closev = sqrt(closev);
686 |         for (c = 0; c < layer1_size; c++) cent[layer1_size * b + c] /= closev;
687 |       }
688 |       for (c = 0; c < vocab_size; c++) {
689 |         closev = -10;
690 |         closeid = 0;
691 |         for (d = 0; d < clcn; d++) {
692 |           x = 0;
693 |           for (b = 0; b < layer1_size; b++)
694 |             x += cent[layer1_size * d + b] * syn0[c * layer1_size + b];
695 |           if (x > closev) {
696 |             closev = x;
697 |             closeid = d;
698 |           }
699 |         }
700 |         cl[c] = closeid;
701 |       }
702 |     }
703 |     // Save the K-means classes
704 |     for (a = 0; a < vocab_size; a++)
705 |       fprintf(fo, "%s %d\n", vocab[a].word, cl[a]);
706 |     free(centcn);
707 |     free(cent);
708 |     free(cl);
709 |   }
710 |   fclose(fo);
711 | }
712 | 
713 | int ArgPos(char *str, int argc, char **argv) {
714 |   int a;
715 |   for (a = 1; a < argc; a++)
716 |     if (!strcmp(str, argv[a])) {
717 |       if (a == argc - 1) {
718 |         printf("Argument missing for %s\n", str);
719 |         exit(1);
720 |       }
721 |       return a;
722 |     }
723 |   return -1;
724 | }
725 | 
726 | int main(int argc, char **argv) {
727 |   int i;
728 |   if (argc == 1) {
729 |     printf("WORD VECTOR estimation toolkit v 0.1c\n\n");
730 |     printf("Options:\n");
731 |     printf("Parameters for training:\n");
732 |     printf("\t-train <file>\n");
733 |     printf("\t\tUse text data from <file> to train the model\n");
734 |     printf("\t-output <file>\n");
735 |     printf(
736 |         "\t\tUse <file> to save the resulting word vectors / word clusters\n");
737 |     printf("\t-size <int>\n");
738 |     printf("\t\tSet size of word vectors; default is 100\n");
739 |     printf("\t-window <int>\n");
740 |     printf("\t\tSet max skip length between words; default is 5\n");
741 |     printf("\t-sample <float>\n");
742 |     printf(
743 |         "\t\tSet threshold for occurrence of words. Those that appear with "
744 |         "higher frequency in the training data\n");
745 |     printf(
746 |         "\t\twill be randomly down-sampled; default is 1e-3, useful range is "
747 |         "(0, 1e-5)\n");
748 |     printf("\t-hs <int>\n");
749 |     printf("\t\tUse Hierarchical Softmax; default is 0 (not used)\n");
750 |     printf("\t-negative <int>\n");
751 |     printf(
752 |         "\t\tNumber of negative examples; default is 5, common values are 3 - "
753 |         "10 (0 = not used)\n");
754 |     printf("\t-threads <int>\n");
755 |     printf("\t\tUse <int> threads (default 12)\n");
756 |     printf("\t-iter <int>\n");
757 |     printf("\t\tRun more training iterations (default 5)\n");
758 |     printf("\t-min-count <int>\n");
759 |     printf(
760 |         "\t\tThis will discard words that appear less than <int> times; "
761 |         "default is 5\n");
762 |     printf("\t-alpha <float>\n");
763 |     printf(
764 |         "\t\tSet the starting learning rate; default is 0.025 for skip-gram "
765 |         "and 0.05 for CBOW\n");
766 |     printf("\t-classes <int>\n");
767 |     printf(
768 |         "\t\tOutput word classes rather than word vectors; default number of "
769 |         "classes is 0 (vectors are written)\n");
770 |     printf("\t-debug <int>\n");
771 |     printf(
772 |         "\t\tSet the debug mode (default = 2 = more info during training)\n");
773 |     printf("\t-binary <int>\n");
774 |     printf(
775 |         "\t\tSave the resulting vectors in binary moded; default is 0 (off)\n");
776 |     printf("\t-save-vocab <file>\n");
777 |     printf("\t\tThe vocabulary will be saved to <file>\n");
778 |     printf("\t-read-vocab <file>\n");
779 |     printf(
780 |         "\t\tThe vocabulary will be read from <file>, not constructed from the "
781 |         "training data\n");
782 |     printf("\t-cbow <int>\n");
783 |     printf(
784 |         "\t\tUse the continuous bag of words model; default is 1 (use 0 for "
785 |         "skip-gram model)\n");
786 |     printf("\t-sentence-vectors <int>\n");
787 |     printf(
788 |         "\t\tAssume the first token at the beginning of each line is a "
789 |         "sentence ID. This token will be trained\n");
790 |     printf(
791 |         "\t\twith full sentence context instead of just the window. Use 1 to "
792 |         "turn on.\n");
793 |     printf("\nExamples:\n");
794 |     printf(
795 |         "./word2vec -train data.txt -output vec.txt -size 200 -window 5 "
796 |         "-sample 1e-4 -negative 5 -hs 0 -binary 0 -cbow 1 -iter 3\n\n");
797 |     return 0;
798 |   }
799 |   output_file[0] = 0;
800 |   save_vocab_file[0] = 0;
801 |   read_vocab_file[0] = 0;
802 |   if ((i = ArgPos((char *)"-size", argc, argv)) > 0)
803 |     layer1_size = atoi(argv[i + 1]);
804 |   if ((i = ArgPos((char *)"-train", argc, argv)) > 0)
805 |     strcpy(train_file, argv[i + 1]);
806 |   if ((i = ArgPos((char *)"-save-vocab", argc, argv)) > 0)
807 |     strcpy(save_vocab_file, argv[i + 1]);
808 |   if ((i = ArgPos((char *)"-read-vocab", argc, argv)) > 0)
809 |     strcpy(read_vocab_file, argv[i + 1]);
810 |   if ((i = ArgPos((char *)"-debug", argc, argv)) > 0)
811 |     debug_mode = atoi(argv[i + 1]);
812 |   if ((i = ArgPos((char *)"-binary", argc, argv)) > 0)
813 |     binary = atoi(argv[i + 1]);
814 |   if ((i = ArgPos((char *)"-cbow", argc, argv)) > 0) cbow = atoi(argv[i + 1]);
815 |   if (cbow) alpha = 0.05;
816 |   if ((i = ArgPos((char *)"-alpha", argc, argv)) > 0) alpha = atof(argv[i + 1]);
817 |   if ((i = ArgPos((char *)"-output", argc, argv)) > 0)
818 |     strcpy(output_file, argv[i + 1]);
819 |   if ((i = ArgPos((char *)"-window", argc, argv)) > 0)
820 |     window = atoi(argv[i + 1]);
821 |   if ((i = ArgPos((char *)"-sample", argc, argv)) > 0)
822 |     sample = atof(argv[i + 1]);
823 |   if ((i = ArgPos((char *)"-hs", argc, argv)) > 0) hs = atoi(argv[i + 1]);
824 |   if ((i = ArgPos((char *)"-negative", argc, argv)) > 0)
825 |     negative = atoi(argv[i + 1]);
826 |   if ((i = ArgPos((char *)"-threads", argc, argv)) > 0)
827 |     num_threads = atoi(argv[i + 1]);
828 |   if ((i = ArgPos((char *)"-iter", argc, argv)) > 0) iter = atoi(argv[i + 1]);
829 |   if ((i = ArgPos((char *)"-min-count", argc, argv)) > 0)
830 |     min_count = atoi(argv[i + 1]);
831 |   if ((i = ArgPos((char *)"-classes", argc, argv)) > 0)
832 |     classes = atoi(argv[i + 1]);
833 |   if ((i = ArgPos((char *)"-sentence-vectors", argc, argv)) > 0)
834 |     sentence_vectors = atoi(argv[i + 1]);
835 |   vocab =
836 |       (struct vocab_word *)calloc(vocab_max_size, sizeof(struct vocab_word));
837 |   vocab_hash = (int *)calloc(vocab_hash_size, sizeof(int));
838 |   expTable = (real *)malloc((EXP_TABLE_SIZE + 1) * sizeof(real));
839 |   for (i = 0; i < EXP_TABLE_SIZE; i++) {
840 |     expTable[i] = exp((i / (real)EXP_TABLE_SIZE * 2 - 1) *
841 |                       MAX_EXP);  // Precompute the exp() table
842 |     expTable[i] =
843 |         expTable[i] / (expTable[i] + 1);  // Precompute f(x) = x / (x + 1)
844 |   }
845 |   TrainModel();
846 |   return 0;
847 | }
848 | 


--------------------------------------------------------------------------------
/word2vec.c:
--------------------------------------------------------------------------------
  1 | //  Copyright 2013 Google Inc. All Rights Reserved.
  2 | //
  3 | //  Licensed under the Apache License, Version 2.0 (the "License");
  4 | //  you may not use this file except in compliance with the License.
  5 | //  You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | //  Unless required by applicable law or agreed to in writing, software
 10 | //  distributed under the License is distributed on an "AS IS" BASIS,
 11 | //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | //  See the License for the specific language governing permissions and
 13 | //  limitations under the License.
 14 | 
 15 | #include <stdio.h>
 16 | #include <stdlib.h>
 17 | #include <string.h>
 18 | #include <math.h>
 19 | #include <time.h>
 20 | #if defined _WIN32
 21 | #include "win32-port.h"
 22 | #else
 23 | #include <pthread.h>
 24 | #endif
 25 | 
 26 | #if HAVE_CBLAS == 1
 27 | // CBLAS declaration
 28 | extern void cblas_scopy(const int n, const float *x, const int incx, float *y,
 29 |                         const int incy);
 30 | extern void cblas_saxpy(const int n, const float alpha, const float *x,
 31 |                         const int incx, float *y, const int incy);
 32 | extern float cblas_sdot(const int n, const float *x, const int incx,
 33 |                         const float *y, const int incy);
 34 | extern void cblas_sscal(const int n, const float alpha, float *x,
 35 |                         const int incx);
 36 | static const float zero = 0;
 37 | #endif
 38 | 
 39 | #define MAX_STRING 100
 40 | #define EXP_TABLE_SIZE 1000
 41 | #define MAX_EXP 6
 42 | #define MAX_SENTENCE_LENGTH 1000
 43 | #define MAX_CODE_LENGTH 40
 44 | 
 45 | const int vocab_hash_size =
 46 |     30000000;  // Maximum 30 * 0.7 = 21M words in the vocabulary
 47 | 
 48 | typedef float real;  // Precision of float numbers
 49 | 
 50 | struct vocab_word {
 51 |   long long cn;
 52 |   int *point;
 53 |   char *word, *code, codelen;
 54 | };
 55 | 
 56 | char train_file[MAX_STRING], output_file[MAX_STRING];
 57 | char save_vocab_file[MAX_STRING], read_vocab_file[MAX_STRING];
 58 | struct vocab_word *vocab;
 59 | int binary = 0, cbow = 1, debug_mode = 2, window = 5, min_count = 5,
 60 |     num_threads = 12, min_reduce = 1;
 61 | int *vocab_hash;
 62 | long long vocab_max_size = 1000, vocab_size = 0, layer1_size = 100;
 63 | long long train_words = 0, word_count_actual = 0, iter = 5, file_size = 0,
 64 |           classes = 0;
 65 | real alpha = 0.025, starting_alpha, sample = 1e-3;
 66 | real *syn0, *syn1, *syn1neg, *expTable;
 67 | clock_t start;
 68 | 
 69 | int hs = 0, negative = 5;
 70 | const int table_size = 1e8;
 71 | int *table;
 72 | 
 73 | void InitUnigramTable() {
 74 |   int a, i;
 75 |   double train_words_pow = 0;
 76 |   double d1, power = 0.75;
 77 |   table = (int *)malloc(table_size * sizeof(int));
 78 |   for (a = 1; a < vocab_size; a++) train_words_pow += pow(vocab[a].cn, power);
 79 |   i = 1;
 80 |   d1 = pow(vocab[i].cn, power) / train_words_pow;
 81 |   for (a = 0; a < table_size; a++) {
 82 |     table[a] = i;
 83 |     if (a / (double)table_size > d1) {
 84 |       i++;
 85 |       d1 += pow(vocab[i].cn, power) / train_words_pow;
 86 |     }
 87 |     if (i >= vocab_size) i = vocab_size - 1;
 88 |   }
 89 | }
 90 | 
 91 | // Reads a single word from a file, assuming space + tab + EOL to be word
 92 | // boundaries
 93 | void ReadWord(char *word, FILE *fin) {
 94 |   int a = 0, ch;
 95 |   while (!feof(fin)) {
 96 |     ch = fgetc(fin);
 97 |     if (ch == 13) continue;
 98 |     if ((ch == ' ') || (ch == '\t') || (ch == '\n')) {
 99 |       if (a > 0) {
100 |         if (ch == '\n') ungetc(ch, fin);
101 |         break;
102 |       }
103 |       if (ch == '\n') {
104 |         strcpy(word, (char *)"</s>");
105 |         return;
106 |       } else
107 |         continue;
108 |     }
109 |     word[a] = ch;
110 |     a++;
111 |     if (a >= MAX_STRING - 1) a--;  // Truncate too long words
112 |   }
113 |   word[a] = 0;
114 | }
115 | 
116 | // Returns hash value of a word
117 | int GetWordHash(char *word) {
118 |   unsigned long long a, hash = 0;
119 |   for (a = 0; a < strlen(word); a++) hash = hash * 257 + word[a];
120 |   hash = hash % vocab_hash_size;
121 |   return hash;
122 | }
123 | 
124 | // Returns position of a word in the vocabulary; if the word is not found,
125 | // returns -1
126 | int SearchVocab(char *word) {
127 |   unsigned int hash = GetWordHash(word);
128 |   while (1) {
129 |     if (vocab_hash[hash] == -1) return -1;
130 |     if (!strcmp(word, vocab[vocab_hash[hash]].word)) return vocab_hash[hash];
131 |     hash = (hash + 1) % vocab_hash_size;
132 |   }
133 |   return -1;
134 | }
135 | 
136 | // Reads a word and returns its index in the vocabulary
137 | int ReadWordIndex(FILE *fin) {
138 |   char word[MAX_STRING];
139 |   ReadWord(word, fin);
140 |   if (feof(fin)) return -1;
141 |   return SearchVocab(word);
142 | }
143 | 
144 | // Adds a word to the vocabulary
145 | int AddWordToVocab(char *word) {
146 |   unsigned int hash, length = strlen(word) + 1;
147 |   if (length > MAX_STRING) length = MAX_STRING;
148 |   vocab[vocab_size].word = (char *)calloc(length, sizeof(char));
149 |   strcpy(vocab[vocab_size].word, word);
150 |   vocab[vocab_size].cn = 0;
151 |   vocab_size++;
152 |   // Reallocate memory if needed
153 |   if (vocab_size + 2 >= vocab_max_size) {
154 |     vocab_max_size += 1000;
155 |     vocab = (struct vocab_word *)realloc(
156 |         vocab, vocab_max_size * sizeof(struct vocab_word));
157 |   }
158 |   hash = GetWordHash(word);
159 |   while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
160 |   vocab_hash[hash] = vocab_size - 1;
161 |   return vocab_size - 1;
162 | }
163 | 
164 | // Used later for sorting by word counts
165 | int VocabCompare(const void *a, const void *b) {
166 |   return ((struct vocab_word *)b)->cn - ((struct vocab_word *)a)->cn;
167 | }
168 | 
169 | // Sorts the vocabulary by frequency using word counts
170 | void SortVocab() {
171 |   int a, size;
172 |   unsigned int hash;
173 |   // Sort the vocabulary and keep </s> at the first position
174 |   qsort(&vocab[1], vocab_size - 1, sizeof(struct vocab_word), VocabCompare);
175 |   for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
176 |   size = vocab_size;
177 |   train_words = 0;
178 |   for (a = 0; a < size; a++) {
179 |     // Words occuring less than min_count times will be discarded from the vocab
180 |     if ((vocab[a].cn < min_count) && (a != 0)) {
181 |       vocab_size--;
182 |       free(vocab[a].word);
183 |     } else {
184 |       // Hash will be re-computed, as after the sorting it is not actual
185 |       hash = GetWordHash(vocab[a].word);
186 |       while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
187 |       vocab_hash[hash] = a;
188 |       train_words += vocab[a].cn;
189 |     }
190 |   }
191 |   vocab = (struct vocab_word *)realloc(
192 |       vocab, (vocab_size + 1) * sizeof(struct vocab_word));
193 |   // Allocate memory for the binary tree construction
194 |   for (a = 0; a < vocab_size; a++) {
195 |     vocab[a].code = (char *)calloc(MAX_CODE_LENGTH, sizeof(char));
196 |     vocab[a].point = (int *)calloc(MAX_CODE_LENGTH, sizeof(int));
197 |   }
198 | }
199 | 
200 | // Reduces the vocabulary by removing infrequent tokens
201 | void ReduceVocab() {
202 |   int a, b = 0;
203 |   unsigned int hash;
204 |   for (a = 0; a < vocab_size; a++)
205 |     if (vocab[a].cn > min_reduce) {
206 |       vocab[b].cn = vocab[a].cn;
207 |       vocab[b].word = vocab[a].word;
208 |       b++;
209 |     } else
210 |       free(vocab[a].word);
211 |   vocab_size = b;
212 |   for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
213 |   for (a = 0; a < vocab_size; a++) {
214 |     // Hash will be re-computed, as it is not actual
215 |     hash = GetWordHash(vocab[a].word);
216 |     while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
217 |     vocab_hash[hash] = a;
218 |   }
219 |   fflush(stdout);
220 |   min_reduce++;
221 | }
222 | 
223 | // Create binary Huffman tree using the word counts
224 | // Frequent words will have short uniqe binary codes
225 | void CreateBinaryTree() {
226 |   long long a, b, i, min1i, min2i, pos1, pos2, point[MAX_CODE_LENGTH];
227 |   char code[MAX_CODE_LENGTH];
228 |   long long *count = (long long *)calloc(vocab_size * 2 + 1, sizeof(long long));
229 |   long long *binary =
230 |       (long long *)calloc(vocab_size * 2 + 1, sizeof(long long));
231 |   long long *parent_node =
232 |       (long long *)calloc(vocab_size * 2 + 1, sizeof(long long));
233 |   for (a = 0; a < vocab_size; a++) count[a] = vocab[a].cn;
234 |   for (a = vocab_size; a < vocab_size * 2; a++) count[a] = 1e15;
235 |   pos1 = vocab_size - 1;
236 |   pos2 = vocab_size;
237 |   // Following algorithm constructs the Huffman tree by adding one node at a
238 |   // time
239 |   for (a = 0; a < vocab_size - 1; a++) {
240 |     // First, find two smallest nodes 'min1, min2'
241 |     if (pos1 >= 0) {
242 |       if (count[pos1] < count[pos2]) {
243 |         min1i = pos1;
244 |         pos1--;
245 |       } else {
246 |         min1i = pos2;
247 |         pos2++;
248 |       }
249 |     } else {
250 |       min1i = pos2;
251 |       pos2++;
252 |     }
253 |     if (pos1 >= 0) {
254 |       if (count[pos1] < count[pos2]) {
255 |         min2i = pos1;
256 |         pos1--;
257 |       } else {
258 |         min2i = pos2;
259 |         pos2++;
260 |       }
261 |     } else {
262 |       min2i = pos2;
263 |       pos2++;
264 |     }
265 |     count[vocab_size + a] = count[min1i] + count[min2i];
266 |     parent_node[min1i] = vocab_size + a;
267 |     parent_node[min2i] = vocab_size + a;
268 |     binary[min2i] = 1;
269 |   }
270 |   // Now assign binary code to each vocabulary word
271 |   for (a = 0; a < vocab_size; a++) {
272 |     b = a;
273 |     i = 0;
274 |     while (1) {
275 |       code[i] = binary[b];
276 |       point[i] = b;
277 |       i++;
278 |       b = parent_node[b];
279 |       if (b == vocab_size * 2 - 2) break;
280 |     }
281 |     vocab[a].codelen = i;
282 |     vocab[a].point[0] = vocab_size - 2;
283 |     for (b = 0; b < i; b++) {
284 |       vocab[a].code[i - b - 1] = code[b];
285 |       vocab[a].point[i - b] = point[b] - vocab_size;
286 |     }
287 |   }
288 |   free(count);
289 |   free(binary);
290 |   free(parent_node);
291 | }
292 | 
293 | void LearnVocabFromTrainFile() {
294 |   char word[MAX_STRING];
295 |   FILE *fin;
296 |   long long a, i;
297 |   for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
298 |   fin = fopen(train_file, "rb");
299 |   if (fin == NULL) {
300 |     printf("ERROR: training data file not found!\n");
301 |     exit(1);
302 |   }
303 |   vocab_size = 0;
304 |   AddWordToVocab((char *)"</s>");
305 |   while (1) {
306 |     ReadWord(word, fin);
307 |     if (feof(fin)) break;
308 |     train_words++;
309 |     if ((debug_mode > 1) && (train_words % 100000 == 0)) {
310 |       printf("%lldK%c", train_words / 1000, 13);
311 |       fflush(stdout);
312 |     }
313 |     i = SearchVocab(word);
314 |     if (i == -1) {
315 |       a = AddWordToVocab(word);
316 |       vocab[a].cn = 1;
317 |     } else
318 |       vocab[i].cn++;
319 |     if (vocab_size > vocab_hash_size * 0.7) ReduceVocab();
320 |   }
321 |   SortVocab();
322 |   if (debug_mode > 0) {
323 |     printf("Vocab size: %lld\n", vocab_size);
324 |     printf("Words in train file: %lld\n", train_words);
325 |   }
326 |   file_size = ftell(fin);
327 |   fclose(fin);
328 | }
329 | 
330 | void SaveVocab() {
331 |   long long i;
332 |   FILE *fo = fopen(save_vocab_file, "wb");
333 |   for (i = 0; i < vocab_size; i++)
334 |     fprintf(fo, "%s %lld\n", vocab[i].word, vocab[i].cn);
335 |   fclose(fo);
336 | }
337 | 
338 | void ReadVocab() {
339 |   long long a, i = 0;
340 |   char c;
341 |   char word[MAX_STRING];
342 |   FILE *fin = fopen(read_vocab_file, "rb");
343 |   if (fin == NULL) {
344 |     printf("Vocabulary file not found\n");
345 |     exit(1);
346 |   }
347 |   for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
348 |   vocab_size = 0;
349 |   while (1) {
350 |     ReadWord(word, fin);
351 |     if (feof(fin)) break;
352 |     a = AddWordToVocab(word);
353 |     fscanf(fin, "%lld%c", &vocab[a].cn, &c);
354 |     i++;
355 |   }
356 |   SortVocab();
357 |   if (debug_mode > 0) {
358 |     printf("Vocab size: %lld\n", vocab_size);
359 |     printf("Words in train file: %lld\n", train_words);
360 |   }
361 |   fin = fopen(train_file, "rb");
362 |   if (fin == NULL) {
363 |     printf("ERROR: training data file not found!\n");
364 |     exit(1);
365 |   }
366 |   fseek(fin, 0, SEEK_END);
367 |   file_size = ftell(fin);
368 |   fclose(fin);
369 | }
370 | 
371 | void InitNet() {
372 |   long long a, b;
373 |   unsigned long long next_random = 1;
374 |   a = posix_memalign((void **)&syn0, 128,
375 |                      (long long)vocab_size * layer1_size * sizeof(real));
376 |   if (syn0 == NULL) {
377 |     printf("Memory allocation failed\n");
378 |     exit(1);
379 |   }
380 |   if (hs) {
381 |     a = posix_memalign((void **)&syn1, 128,
382 |                        (long long)vocab_size * layer1_size * sizeof(real));
383 |     if (syn1 == NULL) {
384 |       printf("Memory allocation failed\n");
385 |       exit(1);
386 |     }
387 |     for (a = 0; a < vocab_size; a++)
388 | #if HAVE_CBLAS == 1
389 |       cblas_scopy(layer1_size, &zero, 1, syn1 + a * layer1_size, 1);
390 | #else
391 |       for (b = 0; b < layer1_size; b++) syn1[a * layer1_size + b] = 0;
392 | #endif
393 |   }
394 |   if (negative > 0) {
395 |     a = posix_memalign((void **)&syn1neg, 128,
396 |                        (long long)vocab_size * layer1_size * sizeof(real));
397 |     if (syn1neg == NULL) {
398 |       printf("Memory allocation failed\n");
399 |       exit(1);
400 |     }
401 |     for (a = 0; a < vocab_size; a++)
402 | #if HAVE_CBLAS == 1
403 |       cblas_scopy(layer1_size, &zero, 0, syn1neg + a * layer1_size, 1);
404 | #else
405 |       for (b = 0; b < layer1_size; b++) syn1neg[a * layer1_size + b] = 0;
406 | #endif
407 |   }
408 |   for (a = 0; a < vocab_size; a++)
409 |     for (b = 0; b < layer1_size; b++) {
410 |       next_random = next_random * (unsigned long long)25214903917 + 11;
411 |       syn0[a * layer1_size + b] =
412 |           (((next_random & 0xFFFF) / (real)65536) - 0.5) / layer1_size;
413 |     }
414 |   CreateBinaryTree();
415 | }
416 | 
417 | void *TrainModelThread(void *id) {
418 |   long long a, b, d, cw, word, last_word, sentence_length = 0,
419 |                                           sentence_position = 0;
420 |   long long word_count = 0, last_word_count = 0, sen[MAX_SENTENCE_LENGTH + 1];
421 |   long long l1, l2, c, target, label, local_iter = iter;
422 |   unsigned long long next_random = (long long)id;
423 |   real f, g;
424 |   clock_t now;
425 |   real *neu1 = (real *)calloc(layer1_size, sizeof(real));
426 |   real *neu1e = (real *)calloc(layer1_size, sizeof(real));
427 |   FILE *fi = fopen(train_file, "rb");
428 |   fseek(fi, file_size / (long long)num_threads * (long long)id, SEEK_SET);
429 |   while (1) {
430 |     if (word_count - last_word_count > 10000) {
431 |       word_count_actual += word_count - last_word_count;
432 |       last_word_count = word_count;
433 |       if ((debug_mode > 1)) {
434 |         now = clock();
435 |         printf("%cAlpha: %f  Progress: %.2f%%  Words/thread/sec: %.2fk  ", 13,
436 |                alpha, word_count_actual / (real)(iter * train_words + 1) * 100,
437 |                word_count_actual /
438 |                    ((real)(now - start + 1) / (real)CLOCKS_PER_SEC * 1000));
439 |         fflush(stdout);
440 |       }
441 |       alpha = starting_alpha *
442 |               (1 - word_count_actual / (real)(iter * train_words + 1));
443 |       if (alpha < starting_alpha * 0.0001) alpha = starting_alpha * 0.0001;
444 |     }
445 |     if (sentence_length == 0) {
446 |       while (1) {
447 |         word = ReadWordIndex(fi);
448 |         if (feof(fi)) break;
449 |         if (word == -1) continue;
450 |         word_count++;
451 |         if (word == 0) break;
452 |         // The subsampling randomly discards frequent words while keeping the
453 |         // ranking same
454 |         if (sample > 0) {
455 |           real ran = (sqrt(vocab[word].cn / (sample * train_words)) + 1) *
456 |                      (sample * train_words) / vocab[word].cn;
457 |           next_random = next_random * (unsigned long long)25214903917 + 11;
458 |           if (ran < (next_random & 0xFFFF) / (real)65536) continue;
459 |         }
460 |         sen[sentence_length] = word;
461 |         sentence_length++;
462 |         if (sentence_length >= MAX_SENTENCE_LENGTH) break;
463 |       }
464 |       sentence_position = 0;
465 |     }
466 |     if (feof(fi) || (word_count > train_words / num_threads)) {
467 |       word_count_actual += word_count - last_word_count;
468 |       local_iter--;
469 |       if (local_iter == 0) break;
470 |       word_count = 0;
471 |       last_word_count = 0;
472 |       sentence_length = 0;
473 |       fseek(fi, file_size / (long long)num_threads * (long long)id, SEEK_SET);
474 |       continue;
475 |     }
476 |     word = sen[sentence_position];
477 |     if (word == -1) continue;
478 | #if HAVE_CBLAS == 1
479 |     cblas_scopy(layer1_size, &zero, 0, neu1, 1);
480 |     cblas_scopy(layer1_size, &zero, 0, neu1e, 1);
481 | #else
482 |     for (c = 0; c < layer1_size; c++) neu1[c] = 0;
483 |     for (c = 0; c < layer1_size; c++) neu1e[c] = 0;
484 | #endif
485 |     next_random = next_random * (unsigned long long)25214903917 + 11;
486 |     b = next_random % window;
487 |     if (cbow) {  // train the cbow architecture
488 |       // in -> hidden
489 |       cw = 0;
490 |       for (a = b; a < window * 2 + 1 - b; a++)
491 |         if (a != window) {
492 |           c = sentence_position - window + a;
493 |           if (c < 0) continue;
494 |           if (c >= sentence_length) continue;
495 |           last_word = sen[c];
496 |           if (last_word == -1) continue;
497 | #if HAVE_CBLAS == 1
498 |           cblas_saxpy(layer1_size, 1.0f, syn0 + last_word * layer1_size, 1,
499 |                       neu1, 1);
500 | #else
501 |           for (c = 0; c < layer1_size; c++)
502 |             neu1[c] += syn0[c + last_word * layer1_size];
503 | #endif
504 |           cw++;
505 |         }
506 |       if (cw) {
507 | #if HAVE_CBLAS == 1
508 |         cblas_sscal(layer1_size, 1.0f / cw, neu1, 1);
509 | #else
510 |         for (c = 0; c < layer1_size; c++) neu1[c] /= cw;
511 | #endif
512 |         if (hs)
513 |           for (d = 0; d < vocab[word].codelen; d++) {
514 |             l2 = vocab[word].point[d] * layer1_size;
515 | #if HAVE_CBLAS == 1
516 |             // Propagate hidden -> output
517 |             f = cblas_sdot(layer1_size, neu1, 1, syn1 + l2, 1);
518 | #else
519 |             // Propagate hidden -> output
520 |             f = 0;
521 |             for (c = 0; c < layer1_size; c++) f += neu1[c] * syn1[c + l2];
522 | #endif
523 |             if (f <= -MAX_EXP)
524 |               continue;
525 |             else if (f >= MAX_EXP)
526 |               continue;
527 |             else
528 |               f = expTable[(int)((f + MAX_EXP) *
529 |                                  (EXP_TABLE_SIZE / MAX_EXP / 2))];
530 |             // 'g' is the gradient multiplied by the learning rate
531 |             g = (1 - vocab[word].code[d] - f) * alpha;
532 | #if HAVE_CBLAS == 1
533 |             // Propagate errors output -> hidden
534 |             cblas_saxpy(layer1_size, g, syn1 + l2, 1, neu1e, 1);
535 |             // Learn weights hidden -> output
536 |             cblas_saxpy(layer1_size, g, neu1, 1, syn1 + l2, 1);
537 | #else
538 |             // Propagate errors output -> hidden
539 |             for (c = 0; c < layer1_size; c++) neu1e[c] += g * syn1[c + l2];
540 |             // Learn weights hidden -> output
541 |             for (c = 0; c < layer1_size; c++) syn1[c + l2] += g * neu1[c];
542 | #endif
543 |           }
544 |         // NEGATIVE SAMPLING
545 |         if (negative > 0)
546 |           for (d = 0; d < negative + 1; d++) {
547 |             if (d == 0) {
548 |               target = word;
549 |               label = 1;
550 |             } else {
551 |               next_random = next_random * (unsigned long long)25214903917 + 11;
552 |               target = table[(next_random >> 16) % table_size];
553 |               if (target == word) continue;
554 |               label = 0;
555 |             }
556 |             l2 = target * layer1_size;
557 | #if HAVE_CBLAS == 1
558 |             f = cblas_sdot(layer1_size, neu1, 1, syn1neg + l2, 1);
559 | #else
560 |             f = 0;
561 |             for (c = 0; c < layer1_size; c++) f += neu1[c] * syn1neg[c + l2];
562 | #endif
563 |             if (f > MAX_EXP)
564 |               g = (label - 1) * alpha;
565 |             else if (f < -MAX_EXP)
566 |               g = (label - 0) * alpha;
567 |             else
568 |               g = (label - expTable[(int)((f + MAX_EXP) *
569 |                                           (EXP_TABLE_SIZE / MAX_EXP / 2))]) *
570 |                   alpha;
571 | #if HAVE_CBLAS == 1
572 |             cblas_saxpy(layer1_size, g, syn1neg + l2, 1, neu1e, 1);
573 |             cblas_saxpy(layer1_size, g, neu1, 1, syn1neg + l2, 1);
574 | #else
575 |             for (c = 0; c < layer1_size; c++) neu1e[c] += g * syn1neg[c + l2];
576 |             for (c = 0; c < layer1_size; c++) syn1neg[c + l2] += g * neu1[c];
577 | #endif
578 |           }
579 |         // hidden -> in
580 |         for (a = b; a < window * 2 + 1 - b; a++)
581 |           if (a != window) {
582 |             c = sentence_position - window + a;
583 |             if (c < 0) continue;
584 |             if (c >= sentence_length) continue;
585 |             last_word = sen[c];
586 |             if (last_word == -1) continue;
587 | #if HAVE_CBLAS == 1
588 |             cblas_saxpy(layer1_size, 1, neu1e, 1,
589 |                         syn0 + last_word * layer1_size, 1);
590 | #else
591 |             for (c = 0; c < layer1_size; c++)
592 |               syn0[c + last_word * layer1_size] += neu1e[c];
593 | #endif
594 |           }
595 |       }
596 |     } else {  // train skip-gram
597 |       for (a = b; a < window * 2 + 1 - b; a++)
598 |         if (a != window) {
599 |           c = sentence_position - window + a;
600 |           if (c < 0) continue;
601 |           if (c >= sentence_length) continue;
602 |           last_word = sen[c];
603 |           if (last_word == -1) continue;
604 |           l1 = last_word * layer1_size;
605 | #if HAVE_CBLAS == 1
606 |           cblas_scopy(layer1_size, &zero, 0, neu1e, 1);
607 | #else
608 |           for (c = 0; c < layer1_size; c++) neu1e[c] = 0;
609 | #endif
610 |           // HIERARCHICAL SOFTMAX
611 |           if (hs)
612 |             for (d = 0; d < vocab[word].codelen; d++) {
613 |               l2 = vocab[word].point[d] * layer1_size;
614 | #if HAVE_CBLAS == 1
615 |               // Propagate hidden -> output
616 |               f = cblas_sdot(layer1_size, syn0 + l1, 1, syn1 + l2, 1);
617 | #else
618 |               // Propagate hidden -> output
619 |               f = 0;
620 |               for (c = 0; c < layer1_size; c++)
621 |                 f += syn0[c + l1] * syn1[c + l2];
622 | #endif
623 |               if (f <= -MAX_EXP)
624 |                 continue;
625 |               else if (f >= MAX_EXP)
626 |                 continue;
627 |               else
628 |                 f = expTable[(int)((f + MAX_EXP) *
629 |                                    (EXP_TABLE_SIZE / MAX_EXP / 2))];
630 |               // 'g' is the gradient multiplied by the learning rate
631 |               g = (1 - vocab[word].code[d] - f) * alpha;
632 | #if HAVE_CBLAS == 1
633 |               // Propagate errors output -> hidden
634 |               cblas_saxpy(layer1_size, g, syn1 + l2, 1, neu1e, 1);
635 |               // Learn weights hidden -> output
636 |               cblas_saxpy(layer1_size, g, syn0 + l1, 1, syn1 + l2, 1);
637 | #else
638 |               // Propagate errors output -> hidden
639 |               for (c = 0; c < layer1_size; c++) neu1e[c] += g * syn1[c + l2];
640 |               // Learn weights hidden -> output
641 |               for (c = 0; c < layer1_size; c++)
642 |                 syn1[c + l2] += g * syn0[c + l1];
643 | #endif
644 |             }
645 |           // NEGATIVE SAMPLING
646 |           if (negative > 0)
647 |             for (d = 0; d < negative + 1; d++) {
648 |               if (d == 0) {
649 |                 target = word;
650 |                 label = 1;
651 |               } else {
652 |                 next_random =
653 |                     next_random * (unsigned long long)25214903917 + 11;
654 |                 target = table[(next_random >> 16) % table_size];
655 |                 if (target == word) continue;
656 |                 label = 0;
657 |               }
658 |               l2 = target * layer1_size;
659 | #if HAVE_CBLAS == 1
660 |               f = cblas_sdot(layer1_size, syn0 + l1, 1, syn1neg + l2, 1);
661 | #else
662 |               f = 0;
663 |               for (c = 0; c < layer1_size; c++)
664 |                 f += syn0[c + l1] * syn1neg[c + l2];
665 | #endif
666 |               if (f > MAX_EXP)
667 |                 g = (label - 1) * alpha;
668 |               else if (f < -MAX_EXP)
669 |                 g = (label - 0) * alpha;
670 |               else
671 |                 g = (label - expTable[(int)((f + MAX_EXP) *
672 |                                             (EXP_TABLE_SIZE / MAX_EXP / 2))]) *
673 |                     alpha;
674 | #if HAVE_CBLAS == 1
675 |               cblas_saxpy(layer1_size, g, syn1neg + l2, 1, neu1e, 1);
676 |               cblas_saxpy(layer1_size, g, syn0 + l1, 1, syn1neg + l2, 1);
677 | #else
678 |               for (c = 0; c < layer1_size; c++) neu1e[c] += g * syn1neg[c + l2];
679 |               for (c = 0; c < layer1_size; c++)
680 |                 syn1neg[c + l2] += g * syn0[c + l1];
681 | #endif
682 |             }
683 | #if HAVE_CBLAS == 1
684 |           // Learn weights input -> hidden
685 |           cblas_saxpy(layer1_size, 1, neu1e, 1, syn0 + l1, 1);
686 | #else
687 |           // Learn weights input -> hidden
688 |           for (c = 0; c < layer1_size; c++) syn0[c + l1] += neu1e[c];
689 | #endif
690 |         }
691 |     }
692 |     sentence_position++;
693 |     if (sentence_position >= sentence_length) {
694 |       sentence_length = 0;
695 |       continue;
696 |     }
697 |   }
698 |   fclose(fi);
699 |   free(neu1);
700 |   free(neu1e);
701 |   pthread_exit(NULL);
702 | }
703 | 
704 | void TrainModel() {
705 |   long a, b, c, d;
706 |   FILE *fo;
707 |   pthread_t *pt = (pthread_t *)malloc(num_threads * sizeof(pthread_t));
708 |   printf("Starting training using file %s\n", train_file);
709 |   starting_alpha = alpha;
710 |   if (read_vocab_file[0] != 0)
711 |     ReadVocab();
712 |   else
713 |     LearnVocabFromTrainFile();
714 |   if (save_vocab_file[0] != 0) SaveVocab();
715 |   if (output_file[0] == 0) return;
716 |   InitNet();
717 |   if (negative > 0) InitUnigramTable();
718 |   start = clock();
719 |   for (a = 0; a < num_threads; a++)
720 |     pthread_create(&pt[a], NULL, TrainModelThread, (void *)a);
721 |   for (a = 0; a < num_threads; a++) pthread_join(pt[a], NULL);
722 |   fo = fopen(output_file, "wb");
723 |   if (classes == 0) {
724 |     // Save the word vectors
725 |     fprintf(fo, "%lld %lld\n", vocab_size, layer1_size);
726 |     for (a = 0; a < vocab_size; a++) {
727 |       fprintf(fo, "%s ", vocab[a].word);
728 |       if (binary)
729 |         for (b = 0; b < layer1_size; b++)
730 |           fwrite(&syn0[a * layer1_size + b], sizeof(real), 1, fo);
731 |       else
732 |         for (b = 0; b < layer1_size; b++)
733 |           fprintf(fo, "%lf ", syn0[a * layer1_size + b]);
734 |       fprintf(fo, "\n");
735 |     }
736 |   } else {
737 |     // Run K-means on the word vectors
738 |     int clcn = classes, iter = 10, closeid;
739 |     int *centcn = (int *)malloc(classes * sizeof(int));
740 |     int *cl = (int *)calloc(vocab_size, sizeof(int));
741 |     real closev, x;
742 |     real *cent = (real *)calloc(classes * layer1_size, sizeof(real));
743 |     for (a = 0; a < vocab_size; a++) cl[a] = a % clcn;
744 |     for (a = 0; a < iter; a++) {
745 |       for (b = 0; b < clcn * layer1_size; b++) cent[b] = 0;
746 |       for (b = 0; b < clcn; b++) centcn[b] = 1;
747 |       for (c = 0; c < vocab_size; c++) {
748 |         for (d = 0; d < layer1_size; d++)
749 |           cent[layer1_size * cl[c] + d] += syn0[c * layer1_size + d];
750 |         centcn[cl[c]]++;
751 |       }
752 |       for (b = 0; b < clcn; b++) {
753 |         closev = 0;
754 |         for (c = 0; c < layer1_size; c++) {
755 |           cent[layer1_size * b + c] /= centcn[b];
756 |           closev += cent[layer1_size * b + c] * cent[layer1_size * b + c];
757 |         }
758 |         closev = sqrt(closev);
759 |         for (c = 0; c < layer1_size; c++) cent[layer1_size * b + c] /= closev;
760 |       }
761 |       for (c = 0; c < vocab_size; c++) {
762 |         closev = -10;
763 |         closeid = 0;
764 |         for (d = 0; d < clcn; d++) {
765 |           x = 0;
766 |           for (b = 0; b < layer1_size; b++)
767 |             x += cent[layer1_size * d + b] * syn0[c * layer1_size + b];
768 |           if (x > closev) {
769 |             closev = x;
770 |             closeid = d;
771 |           }
772 |         }
773 |         cl[c] = closeid;
774 |       }
775 |     }
776 |     // Save the K-means classes
777 |     for (a = 0; a < vocab_size; a++)
778 |       fprintf(fo, "%s %d\n", vocab[a].word, cl[a]);
779 |     free(centcn);
780 |     free(cent);
781 |     free(cl);
782 |   }
783 |   fclose(fo);
784 | }
785 | 
786 | int ArgPos(char *str, int argc, char **argv) {
787 |   int a;
788 |   for (a = 1; a < argc; a++)
789 |     if (!strcmp(str, argv[a])) {
790 |       if (a == argc - 1) {
791 |         printf("Argument missing for %s\n", str);
792 |         exit(1);
793 |       }
794 |       return a;
795 |     }
796 |   return -1;
797 | }
798 | 
799 | int main(int argc, char **argv) {
800 |   int i;
801 |   if (argc == 1) {
802 |     printf("WORD VECTOR estimation toolkit v 0.1c\n\n");
803 |     printf("Options:\n");
804 |     printf("Parameters for training:\n");
805 |     printf("\t-train <file>\n");
806 |     printf("\t\tUse text data from <file> to train the model\n");
807 |     printf("\t-output <file>\n");
808 |     printf(
809 |         "\t\tUse <file> to save the resulting word vectors / word clusters\n");
810 |     printf("\t-size <int>\n");
811 |     printf("\t\tSet size of word vectors; default is 100\n");
812 |     printf("\t-window <int>\n");
813 |     printf("\t\tSet max skip length between words; default is 5\n");
814 |     printf("\t-sample <float>\n");
815 |     printf(
816 |         "\t\tSet threshold for occurrence of words. Those that appear with "
817 |         "higher frequency in the training data\n");
818 |     printf(
819 |         "\t\twill be randomly down-sampled; default is 1e-3, useful range is "
820 |         "(0, 1e-5)\n");
821 |     printf("\t-hs <int>\n");
822 |     printf("\t\tUse Hierarchical Softmax; default is 0 (not used)\n");
823 |     printf("\t-negative <int>\n");
824 |     printf(
825 |         "\t\tNumber of negative examples; default is 5, common values are 3 - "
826 |         "10 (0 = not used)\n");
827 |     printf("\t-threads <int>\n");
828 |     printf("\t\tUse <int> threads (default 12)\n");
829 |     printf("\t-iter <int>\n");
830 |     printf("\t\tRun more training iterations (default 5)\n");
831 |     printf("\t-min-count <int>\n");
832 |     printf(
833 |         "\t\tThis will discard words that appear less than <int> times; "
834 |         "default is 5\n");
835 |     printf("\t-alpha <float>\n");
836 |     printf(
837 |         "\t\tSet the starting learning rate; default is 0.025 for skip-gram "
838 |         "and 0.05 for CBOW\n");
839 |     printf("\t-classes <int>\n");
840 |     printf(
841 |         "\t\tOutput word classes rather than word vectors; default number of "
842 |         "classes is 0 (vectors are written)\n");
843 |     printf("\t-debug <int>\n");
844 |     printf(
845 |         "\t\tSet the debug mode (default = 2 = more info during training)\n");
846 |     printf("\t-binary <int>\n");
847 |     printf(
848 |         "\t\tSave the resulting vectors in binary moded; default is 0 (off)\n");
849 |     printf("\t-save-vocab <file>\n");
850 |     printf("\t\tThe vocabulary will be saved to <file>\n");
851 |     printf("\t-read-vocab <file>\n");
852 |     printf(
853 |         "\t\tThe vocabulary will be read from <file>, not constructed from the "
854 |         "training data\n");
855 |     printf("\t-cbow <int>\n");
856 |     printf(
857 |         "\t\tUse the continuous bag of words model; default is 1 (use 0 for "
858 |         "skip-gram model)\n");
859 |     printf("\nExamples:\n");
860 |     printf(
861 |         "./word2vec -train data.txt -output vec.txt -size 200 -window 5 "
862 |         "-sample 1e-4 -negative 5 -hs 0 -binary 0 -cbow 1 -iter 3\n\n");
863 |     return 0;
864 |   }
865 |   output_file[0] = 0;
866 |   save_vocab_file[0] = 0;
867 |   read_vocab_file[0] = 0;
868 |   if ((i = ArgPos((char *)"-size", argc, argv)) > 0)
869 |     layer1_size = atoi(argv[i + 1]);
870 |   if ((i = ArgPos((char *)"-train", argc, argv)) > 0)
871 |     strcpy(train_file, argv[i + 1]);
872 |   if ((i = ArgPos((char *)"-save-vocab", argc, argv)) > 0)
873 |     strcpy(save_vocab_file, argv[i + 1]);
874 |   if ((i = ArgPos((char *)"-read-vocab", argc, argv)) > 0)
875 |     strcpy(read_vocab_file, argv[i + 1]);
876 |   if ((i = ArgPos((char *)"-debug", argc, argv)) > 0)
877 |     debug_mode = atoi(argv[i + 1]);
878 |   if ((i = ArgPos((char *)"-binary", argc, argv)) > 0)
879 |     binary = atoi(argv[i + 1]);
880 |   if ((i = ArgPos((char *)"-cbow", argc, argv)) > 0) cbow = atoi(argv[i + 1]);
881 |   if (cbow) alpha = 0.05;
882 |   if ((i = ArgPos((char *)"-alpha", argc, argv)) > 0) alpha = atof(argv[i + 1]);
883 |   if ((i = ArgPos((char *)"-output", argc, argv)) > 0)
884 |     strcpy(output_file, argv[i + 1]);
885 |   if ((i = ArgPos((char *)"-window", argc, argv)) > 0)
886 |     window = atoi(argv[i + 1]);
887 |   if ((i = ArgPos((char *)"-sample", argc, argv)) > 0)
888 |     sample = atof(argv[i + 1]);
889 |   if ((i = ArgPos((char *)"-hs", argc, argv)) > 0) hs = atoi(argv[i + 1]);
890 |   if ((i = ArgPos((char *)"-negative", argc, argv)) > 0)
891 |     negative = atoi(argv[i + 1]);
892 |   if ((i = ArgPos((char *)"-threads", argc, argv)) > 0)
893 |     num_threads = atoi(argv[i + 1]);
894 |   if ((i = ArgPos((char *)"-iter", argc, argv)) > 0) iter = atoi(argv[i + 1]);
895 |   if ((i = ArgPos((char *)"-min-count", argc, argv)) > 0)
896 |     min_count = atoi(argv[i + 1]);
897 |   if ((i = ArgPos((char *)"-classes", argc, argv)) > 0)
898 |     classes = atoi(argv[i + 1]);
899 |   vocab =
900 |       (struct vocab_word *)calloc(vocab_max_size, sizeof(struct vocab_word));
901 |   vocab_hash = (int *)calloc(vocab_hash_size, sizeof(int));
902 |   expTable = (real *)malloc((EXP_TABLE_SIZE + 1) * sizeof(real));
903 |   for (i = 0; i < EXP_TABLE_SIZE; i++) {
904 |     expTable[i] = exp((i / (real)EXP_TABLE_SIZE * 2 - 1) *
905 |                       MAX_EXP);  // Precompute the exp() table
906 |     expTable[i] =
907 |         expTable[i] / (expTable[i] + 1);  // Precompute f(x) = x / (x + 1)
908 |   }
909 |   TrainModel();
910 |   return 0;
911 | }
912 | 


--------------------------------------------------------------------------------