├── models
    └── .gitkeep
├── normalize.py
├── plot-correlation.py
├── plot-reduction.py
├── plot-tsne.py
├── project-tsne.sh
├── readme.md
├── setup.sh
├── tokens-to-correlation.py
├── tokens-to-vectors.py
└── words-to-vectors.py


/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kylemcdonald/EmbeddingScripts/953f7958e8eb5c9c9c88f6cbf240cbdd41833c5c/models/.gitkeep


--------------------------------------------------------------------------------
/normalize.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import numpy
3 | d = numpy.loadtxt("/dev/stdin");
4 | d -= d.min(axis=0);
5 | d /= d.max(axis=0);
6 | numpy.savetxt("/dev/stdout", d, fmt="%.8f", delimiter="\t")
7 | 


--------------------------------------------------------------------------------
/plot-correlation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import argparse
 3 | from numpy import loadtxt, genfromtxt, corrcoef, sum, log, arange
 4 | from numpy.random import rand
 5 | from scipy.spatial.distance import cdist
 6 | from pylab import pcolor, show, colorbar, xticks, yticks, savefig
 7 | from tsp_solver.greedy import solve_tsp
 8 | 
 9 | parser = argparse.ArgumentParser(
10 | 	description='Plot tsne output.')
11 | parser.add_argument('-i', '--input', default='data')
12 | args = parser.parse_args()
13 | 
14 | data = loadtxt('{0}/vectors'.format(args.input))
15 | 
16 | labels = []
17 | with open('{}/words'.format(args.input)) as f:
18 | 	for line in f:
19 | 		labels.append(line.strip())
20 | 
21 | distanceMatrix = cdist(data, data, 'euclidean')
22 | path = solve_tsp(distanceMatrix)
23 | pcolor(data[path], cmap='binary')
24 | savefig('{}/correlation.png'.format(args.input), figsize=(4,4), dpi=600)
25 | 
26 | for i in path:
27 | 	print(labels[i])
28 | 


--------------------------------------------------------------------------------
/plot-reduction.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import argparse
  3 | from time import time
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | from matplotlib import offsetbox
  7 | from sklearn import (manifold, datasets, decomposition, ensemble, lda, random_projection)
  8 | 
  9 | parser = argparse.ArgumentParser(
 10 |   description='Plot many kinds of dimensionality reduction algorithms.')
 11 | parser.add_argument('-i', '--input', default='data')
 12 | args = parser.parse_args()
 13 | 
 14 | y = []
 15 | with open('{}/words'.format(args.input)) as f:
 16 |   for line in f:
 17 |     y.append(line.strip())
 18 | 
 19 | X = np.loadtxt('{}/vectors'.format(args.input))
 20 | n_samples, n_features = X.shape
 21 | n_neighbors = 30
 22 | 
 23 | 
 24 | #----------------------------------------------------------------------
 25 | # Scale and visualize the embedding vectors
 26 | def plot_embedding(X, title=None):
 27 |     x_min, x_max = np.min(X, 0), np.max(X, 0)
 28 |     X = (X - x_min) / (x_max - x_min)
 29 | 
 30 |     plt.figure()
 31 |     ax = plt.subplot(111)
 32 |     for i in range(X.shape[0]):
 33 |         plt.text(X[i, 0], X[i, 1], str(y[i]),
 34 |                  # color=plt.cm.Set1(y[i] / 10.),
 35 |                  fontdict={'weight': 'bold', 'size': 9})
 36 | 
 37 |     if hasattr(offsetbox, 'AnnotationBbox'):
 38 |         # only print thumbnails with matplotlib > 1.0
 39 |         shown_images = np.array([[1., 1.]])  # just something big
 40 |         for i in range(X.shape[0]):
 41 |             dist = np.sum((X[i] - shown_images) ** 2, 1)
 42 |             if np.min(dist) < 4e-3:
 43 |                 # don't show points that are too close
 44 |                 continue
 45 |             # shown_images = np.r_[shown_images, [X[i]]]
 46 |             # imagebox = offsetbox.AnnotationBbox(
 47 |             #     offsetbox.OffsetImage(digits.images[i], cmap=plt.cm.gray_r),
 48 |             #     X[i])
 49 |             # ax.add_artist(imagebox)
 50 |     plt.xticks([]), plt.yticks([])
 51 |     if title is not None:
 52 |         plt.title(title)
 53 | 
 54 | 
 55 | #----------------------------------------------------------------------
 56 | # Plot images
 57 | # n_img_per_row = 20
 58 | # img = np.zeros((10 * n_img_per_row, 10 * n_img_per_row))
 59 | # for i in range(n_img_per_row):
 60 | #     ix = 10 * i + 1
 61 | #     for j in range(n_img_per_row):
 62 | #         iy = 10 * j + 1
 63 | #         img[ix:ix + 8, iy:iy + 8] = X[i * n_img_per_row + j].reshape((8, 8))
 64 | 
 65 | # plt.imshow(img, cmap=plt.cm.binary)
 66 | # plt.xticks([])
 67 | # plt.yticks([])
 68 | # plt.title('A selection from the 64-dimensional digits dataset')
 69 | 
 70 | 
 71 | #----------------------------------------------------------------------
 72 | # Random 2D projection using a random unitary matrix
 73 | # print("Computing random projection")
 74 | # rp = random_projection.SparseRandomProjection(n_components=2, random_state=42)
 75 | # X_projected = rp.fit_transform(X)
 76 | # plot_embedding(X_projected, "Random Projection")
 77 | 
 78 | 
 79 | #----------------------------------------------------------------------
 80 | # Projection on to the first 2 principal components
 81 | 
 82 | try:
 83 |   print("Computing PCA projection")
 84 |   t0 = time()
 85 |   X_pca = decomposition.TruncatedSVD(n_components=2).fit_transform(X)
 86 |   plot_embedding(X_pca,
 87 |                  "Principal Components projection (time %.2fs)" %
 88 |                  (time() - t0))
 89 | except:
 90 |   pass
 91 | 
 92 | #----------------------------------------------------------------------
 93 | # Projection on to the first 2 linear discriminant components
 94 | 
 95 | try:
 96 |   print("Computing LDA projection")
 97 |   X2 = X.copy()
 98 |   X2.flat[::X.shape[1] + 1] += 0.01  # Make X invertible
 99 |   t0 = time()
100 |   X_lda = lda.LDA(n_components=2).fit_transform(X2, y)
101 |   plot_embedding(X_lda,
102 |                  "Linear Discriminant projection (time %.2fs)" %
103 |                  (time() - t0))
104 | except:
105 |   pass
106 | 
107 | #----------------------------------------------------------------------
108 | # Isomap projection dataset
109 | try:
110 |   print("Computing Isomap embedding")
111 |   t0 = time()
112 |   X_iso = manifold.Isomap(n_neighbors, n_components=2).fit_transform(X)
113 |   print("Done.")
114 |   plot_embedding(X_iso,
115 |                  "Isomap projection (time %.2fs)" %
116 |                  (time() - t0))
117 | except:
118 |   pass
119 | 
120 | #----------------------------------------------------------------------
121 | # Locally linear embedding dataset
122 | try: 
123 |   print("Computing LLE embedding")
124 |   clf = manifold.LocallyLinearEmbedding(n_neighbors, n_components=2,
125 |                                         method='standard')
126 |   t0 = time()
127 |   X_lle = clf.fit_transform(X)
128 |   print("Done. Reconstruction error: %g" % clf.reconstruction_error_)
129 |   plot_embedding(X_lle,
130 |                  "Locally Linear Embedding (time %.2fs)" %
131 |                  (time() - t0))
132 | except:
133 |   pass
134 | 
135 | #----------------------------------------------------------------------
136 | # Modified Locally linear embedding dataset
137 | try:
138 |   print("Computing modified LLE embedding")
139 |   clf = manifold.LocallyLinearEmbedding(n_neighbors, n_components=2,
140 |                                         method='modified')
141 |   t0 = time()
142 |   X_mlle = clf.fit_transform(X)
143 |   print("Done. Reconstruction error: %g" % clf.reconstruction_error_)
144 |   plot_embedding(X_mlle,
145 |                  "Modified Locally Linear Embedding (time %.2fs)" %
146 |                  (time() - t0))
147 | except:
148 |   pass
149 | 
150 | #----------------------------------------------------------------------
151 | # HLLE embedding dataset
152 | try:
153 |   print("Computing Hessian LLE embedding")
154 |   clf = manifold.LocallyLinearEmbedding(n_neighbors, n_components=2,
155 |                                         method='hessian')
156 |   t0 = time()
157 |   X_hlle = clf.fit_transform(X)
158 |   print("Done. Reconstruction error: %g" % clf.reconstruction_error_)
159 |   plot_embedding(X_hlle,
160 |                  "Hessian Locally Linear Embedding (time %.2fs)" %
161 |                  (time() - t0))
162 | except:
163 |   pass
164 | 
165 | #----------------------------------------------------------------------
166 | # LTSA embedding dataset
167 | try:
168 |   print("Computing LTSA embedding")
169 |   clf = manifold.LocallyLinearEmbedding(n_neighbors, n_components=2,
170 |                                         method='ltsa')
171 |   t0 = time()
172 |   X_ltsa = clf.fit_transform(X)
173 |   print("Done. Reconstruction error: %g" % clf.reconstruction_error_)
174 |   plot_embedding(X_ltsa,
175 |                  "Local Tangent Space Alignment (time %.2fs)" %
176 |                  (time() - t0))
177 | except:
178 |   pass
179 | 
180 | #----------------------------------------------------------------------
181 | # MDS  embedding dataset
182 | try:
183 |   print("Computing MDS embedding")
184 |   clf = manifold.MDS(n_components=2, n_init=1, max_iter=100)
185 |   t0 = time()
186 |   X_mds = clf.fit_transform(X)
187 |   print("Done. Stress: %f" % clf.stress_)
188 |   plot_embedding(X_mds,
189 |                  "MDS embedding (time %.2fs)" %
190 |                  (time() - t0))
191 | except:
192 |   pass
193 | 
194 | #----------------------------------------------------------------------
195 | # Random Trees embedding dataset
196 | try:
197 |   print("Computing Totally Random Trees embedding")
198 |   hasher = ensemble.RandomTreesEmbedding(n_estimators=200, random_state=0,
199 |                                          max_depth=5)
200 |   t0 = time()
201 |   X_transformed = hasher.fit_transform(X)
202 |   pca = decomposition.TruncatedSVD(n_components=2)
203 |   X_reduced = pca.fit_transform(X_transformed)
204 | 
205 |   plot_embedding(X_reduced,
206 |                  "Random forest embedding (time %.2fs)" %
207 |                  (time() - t0))
208 | except:
209 |   pass
210 | 
211 | #----------------------------------------------------------------------
212 | # Spectral embedding dataset
213 | try:
214 |   print("Computing Spectral embedding")
215 |   embedder = manifold.SpectralEmbedding(n_components=2, random_state=0,
216 |                                         eigen_solver="arpack")
217 |   t0 = time()
218 |   X_se = embedder.fit_transform(X)
219 | 
220 |   plot_embedding(X_se,
221 |                  "Spectral embedding (time %.2fs)" %
222 |                  (time() - t0))
223 | except:
224 |   pass
225 | 
226 | #----------------------------------------------------------------------
227 | # t-SNE embedding dataset
228 | try:
229 |   print("Computing t-SNE embedding")
230 |   tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
231 |   t0 = time()
232 |   X_tsne = tsne.fit_transform(X)
233 | 
234 |   plot_embedding(X_tsne,
235 |     "t-SNE embedding (time %.2fs)" %
236 |     (time() - t0))
237 | except:
238 |   pass
239 |   
240 | plt.show()
241 | 


--------------------------------------------------------------------------------
/plot-tsne.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import argparse
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | from matplotlib.backends.backend_pdf import PdfPages
 6 | from scipy.spatial import Voronoi, voronoi_plot_2d
 7 | 
 8 | parser = argparse.ArgumentParser(
 9 | 	description='Plot tsne output.')
10 | parser.add_argument('-i', '--input', default='data')
11 | parser.add_argument('-p', '--perplexity', default=1)
12 | args = parser.parse_args()
13 | 
14 | labels = []
15 | with open('{}/words'.format(args.input)) as f:
16 | 	for line in f:
17 | 		labels.append(line.strip())
18 | 
19 | data2d = np.loadtxt('{0}/{1}.2d.tsne'.format(args.input, args.perplexity))
20 | data3d = np.loadtxt('{0}/{1}.3d.tsne'.format(args.input, args.perplexity))
21 | 
22 | plt.figure(figsize=(10, 10), dpi=100)
23 | 
24 | vor = Voronoi(data2d)
25 | for i in range(len(data2d)):
26 |     if vor.point_region[i] != -1:
27 |     	region = vor.regions[vor.point_region[i]]
28 |     	if not -1 in region:
29 | 	        polygon = [vor.vertices[j] for j in region]
30 | 	        plt.fill(*zip(*polygon), color=data3d[i])
31 | 
32 | for label, x, y in zip(labels, data2d[:, 0], data2d[:, 1]):
33 |     plt.annotate(label, xy = (x, y), size = 2, va = 'center', ha = 'center')
34 | 
35 | plt.axis('off')
36 | plt.xlim([0,1])
37 | plt.ylim([0,1])
38 | 
39 | pp = PdfPages('{0}/{1}-plot.pdf'.format(args.input, args.perplexity))
40 | plt.savefig(pp, format='pdf', bbox_inches='tight', pad_inches=0, aspect='normal')
41 | pp.close()
42 | 


--------------------------------------------------------------------------------
/project-tsne.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | function tsne {
 4 | 	if [ ! -f $1/$2.tsne ]; then
 5 | 		python bh_tsne/bhtsne.py -v -d 2 -p $2 -i $1/vectors -o $1/cache
 6 | 		cat $1/cache | python normalize.py > $1/$2.2d.tsne
 7 | 		python bh_tsne/bhtsne.py -v -d 3 -p $2 -i $1/vectors -o $1/cache
 8 | 		cat $1/cache | python normalize.py > $1/$2.3d.tsne
 9 | 		rm $1/cache
10 | 	fi
11 | }
12 | 
13 | if [ ! -f $1/vectors ]; then
14 | 	# if there are no vectors
15 | 	if [ ! -f $1/tokens ]; then
16 | 		# and no tokens, use word2vec to create vectors
17 | 		python3 words-to-vectors.py -i $1
18 | 	else
19 | 		# otherwise create vectors from tokens
20 | 		# cp $1/wordlist $1/words
21 | 		# python tokens-to-vectors.py -i $1
22 | 		# or create correlation vectors
23 | 		python tokens-to-correlation.py -i $1
24 | 	fi
25 | fi
26 | 
27 | tsne $1 1
28 | tsne $1 5
29 | tsne $1 10
30 | tsne $1 50
31 | tsne $1 100
32 | tsne $1 500
33 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Embedding Scripts
 2 | 
 3 | A small collection of scripts to project/embed high dimensional data in two dimensions.
 4 | 
 5 | First run `./setup.sh` which will make sure python has the necessary libraries. It will also compile Barnes-Hut t-SNE from source, and download a word2vec model trained on the Google News dataset (a very large file that will decompress to ~3.6GB).
 6 | 
 7 | Each dataset is stored in a folder. Inside the folder you might have:
 8 | 
 9 | - `tokens` a tab-separated file of samples, where each column has one token in it. For example, one line of `cocktails/tokens` might look like `whiskey\tginger ale\tlemon`
10 | - `wordlist` are words that are going to be projected using word2vec. For example `moods/wordlist` might read `happy\nsad\nhungry\ndelighted\n`
11 | - `vectors` is a tab-separated list of high dimensional vectors used as input to the nonlinear projection algorithms.
12 | - `words` is a list of labels for each of the lines in `vectors`. If the `vectors` are generated from `wordlist`, some words maybe not have word2vec definitions and `words` will be a subset of `wordlist`.
13 | 
14 | ## Scripts
15 | 
16 | All Python scripts take `-i` as an argument for your input folder.
17 | 
18 | 
19 | ## word-to-vectors.py
20 | 
21 | This will generate `vectors` from `wordlist` using word2vec. It will also generate `words` which may be a subset of `wordlist`.
22 | 
23 | ### tokens-to-vectors.py
24 | 
25 | This will generate binary `vectors` from `tokens`. So if you have 600 cocktails with 3-8 ingredients each, and 180 unique ingredients, the output will be 600 vectors of length 180 with 3-8 values set to 1.
26 | 
27 | ### tokens-to-correlation.py
28 | 
29 | This will generate floating point `vectors` from `tokens` using the correlation/co-occurence between different tokens. If you have 600 cocktails with 3-8 ingredients each, and 180 unique ingredients, the output will be 180 vectors of length 180, and if there are ingredients that co-occur more often the value will be higher. Except for very complex datasets, most elements will be 0.
30 | 
31 | ### plot-reduction.py
32 | 
33 | After generating `vectors` using one of the above techniques or by providing them directly, this script will attempt to run many nonlinear dimensionality reduction algorithms from scikit-learn on the input data. This is usually a good way to figure out what direction to head next.
34 | 
35 | ### plot-correlation.py
36 | 
37 | This plots a basic correlation matrix, with the rows sorted by solving a travelling salesperson problem. It will also print a list of labels "sorted by similarity". Output is stored in the input folder as a png file.
38 | 
39 | ### project.sh
40 | 
41 | This takes one argument for the input folder, and will generate `vectors` if they don't exist, either using `tokens-to-vectors.py` or `word-to-vectors.py` depending on which files are present, and then run `bh_tsne` with perplexities of 1, 5, 10, 50, 100 and 500 for both 2d projection and 3d projection. The results are stored in the input folder.
42 | 
43 | ### plot-tsne.py
44 | 
45 | Besides the argument for the input folder, this script also takes an argument for the perplexity to process using `-p`. It then takes the results of `bh_tsne` and projects it using the 2d projection for placing labels and 3d projection for choosing colors for voronoi cells in the background that can provide a high dimensional intuition for distances in some cases: if two adjacent vectors are "strongly" similar they have similar colors (i.e., they are still adjacent in a higher dimensional space. If they are "weakly" similar they have different colors (they become separated in a higher dimensional space). The output image is saved in the input folder as a pdf file.


--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
 1 | echo "Setting up Python 3"
 2 | pip3 install numpy gensim
 3 | 
 4 | echo "Setting up Python 2"
 5 | pip install numpy scipy scikit-learn matplotlib pylab
 6 | pip install git+https://github.com/dmishin/tsp-solver.git
 7 | 
 8 | echo "Downloading bh_tsne from http://lvdmaaten.github.io/tsne/"
 9 | curl -O http://lvdmaaten.github.io/tsne/code/bh_tsne.tar.gz
10 | tar -zxvf bh_tsne.tar.gz
11 | rm bh_tsne.tar.gz
12 | cd bh_tsne
13 | 
14 | if [ -d /Applications/Xcode.app ]; then
15 | 	echo "Building bh_tsne for OSX..."
16 | 	g++ sptree.cpp tsne.cpp -o bh_tsne -O3 -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/ -lcblas
17 | else
18 | 	echo "Building bh_tsne for Linux..."
19 | 	g++ sptree.cpp tsne.cpp -o bh_tsne -O3 -I./CBLAS/include -L./ -lcblas
20 | fi
21 | echo "Done building bh_tsne"
22 | 
23 | echo "Downloading GoogleNews-vectors-negative300.bin.gz from https://code.google.com/p/word2vec/"
24 | echo "(exit now if you don't want to use word2vec)"
25 | curl -o "models/GoogleNews-vectors-negative300.bin.gz" -Lk "https://googledrive.com/host/0B7XkCwpI5KDYNlNUTTlSS21pQmM"
26 | echo "Extracting GoogleNews-vectors-negative300.bin.gz"
27 | gunzip models/GoogleNews-vectors-negative300.bin.gz
28 | 


--------------------------------------------------------------------------------
/tokens-to-correlation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import argparse, sys, numpy
 3 | from collections import defaultdict
 4 | 
 5 | parser = argparse.ArgumentParser(
 6 | 	description='Generate a .tsv from tab separated tokens using correlation of the tokens.')
 7 | parser.add_argument('-i', '--input', default='data')
 8 | args = parser.parse_args()
 9 | 
10 | unique = set()
11 | counts = defaultdict(
12 | 	lambda: defaultdict(
13 | 		lambda: 0.0))
14 | with open('{}/tokens'.format(args.input)) as f:
15 | 	for line in f:
16 | 		tokens = line.strip().split('\t')
17 | 		unique.update(tokens)
18 | 		for a in tokens:
19 | 			for b in tokens:
20 | 				counts[a][b] += 1
21 | 				counts[b][a] += 1
22 | 
23 | words = []
24 | vectors = []
25 | for a in unique:
26 | 	words.append(a)
27 | 	vector = []
28 | 	for b in unique:
29 | 		vector.append(counts[a][b])
30 | 	vectors.append(vector / numpy.max(vector))
31 | 
32 | numpy.savetxt('{}/words'.format(args.input), words, fmt='%s')
33 | numpy.savetxt('{}/vectors'.format(args.input), vectors, fmt='%.8f', delimiter='\t')
34 | 


--------------------------------------------------------------------------------
/tokens-to-vectors.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import argparse, sys, numpy
 3 | 
 4 | parser = argparse.ArgumentParser(
 5 | 	description='Generate a .tsv from tab separated tokens.')
 6 | parser.add_argument('-i', '--input', default='data')
 7 | args = parser.parse_args()
 8 | 
 9 | # read through file once to get all tokens
10 | unique = set()
11 | with open('{}/tokens'.format(args.input)) as f:
12 | 	for line in f:
13 | 		tokens = line.strip().split('\t')
14 | 		unique.update(tokens)
15 | 
16 | # read through file again to output vectors
17 | vectors = []
18 | with open('{}/tokens'.format(args.input)) as f:
19 | 	for line in f:
20 | 		vector = []
21 | 		tokens = line.strip().split('\t')
22 | 		for ref in unique:
23 | 			if ref in tokens:
24 | 				vector.append(1)
25 | 			else:
26 | 				vector.append(0)
27 | 		vectors.append(vector)
28 | 
29 | numpy.savetxt('{}/vectors'.format(args.input), vectors, fmt='%.1f', delimiter='\t')
30 | 


--------------------------------------------------------------------------------
/words-to-vectors.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse, sys, numpy
 4 | from gensim.models import Word2Vec
 5 | 
 6 | parser = argparse.ArgumentParser(
 7 | 	description='Generate a .tsv of word2vec vectors for a word list.')
 8 | parser.add_argument('-i', '--input', default='data')
 9 | parser.add_argument('-m', '--model', default='models/GoogleNews-vectors-negative300.bin')
10 | args = parser.parse_args()
11 | 
12 | print('Loading model from ' + args.model)
13 | model = Word2Vec.load_word2vec_format(args.model, binary=True)
14 | wordlist = numpy.genfromtxt('{}/wordlist'.format(args.input), dtype='str')
15 | words = []
16 | vectors = []
17 | print('Looking up {} words.'.format(len(wordlist)))
18 | for word in wordlist:
19 | 	if word in model:
20 | 		words.append(word)
21 | 		vectors.append(model[word])
22 | print('Saving {:.2%} of the words.'.format(len(words) / len(wordlist)))
23 | numpy.savetxt('{}/words'.format(args.input), words, fmt='%s')
24 | print('Saving word vectors.')
25 | numpy.savetxt('{}/vectors'.format(args.input), vectors, fmt='%.8f', delimiter='\t')
26 | 


--------------------------------------------------------------------------------