├── requirements.txt
├── README.md
├── project_helper.py
├── tests.py
└── project_tests.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | alphalens==0.3.2
2 | nltk==3.4.5
3 | numpy==1.13.3
4 | ratelimit==2.2.0
5 | requests==2.20.0
6 | scikit-learn==0.19.1
7 | six==1.11.0
8 | tqdm==4.19.5
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # nlp_10-ks
2 | Code for Project 5 from Udacity's AI for Trading nanodegree program. 
3 | 
4 | Original source code from https://github.com/udacity/artificial-intelligence-for-trading
5 | 


--------------------------------------------------------------------------------
/project_helper.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import requests
 3 | 
 4 | from ratelimit import limits, sleep_and_retry
 5 | 
 6 | 
 7 | class SecAPI(object):
 8 |     SEC_CALL_LIMIT = {'calls': 10, 'seconds': 1}
 9 | 
10 |     @staticmethod
11 |     @sleep_and_retry
12 |     # Dividing the call limit by half to avoid coming close to the limit
13 |     @limits(calls=SEC_CALL_LIMIT['calls'] / 2, period=SEC_CALL_LIMIT['seconds'])
14 |     def _call_sec(url):
15 |         return requests.get(url)
16 | 
17 |     def get(self, url):
18 |         return self._call_sec(url).text
19 | 
20 | 
21 | def print_ten_k_data(ten_k_data, fields, field_length_limit=50):
22 |     indentation = '  '
23 | 
24 |     print('[')
25 |     for ten_k in ten_k_data:
26 |         print_statement = '{}{{'.format(indentation)
27 |         for field in fields:
28 |             value = str(ten_k[field])
29 | 
30 |             # Show return lines in output
31 |             if isinstance(value, str):
32 |                 value_str = '\'{}\''.format(value.replace('\n', '\\n'))
33 |             else:
34 |                 value_str = str(value)
35 | 
36 |             # Cut off the string if it gets too long
37 |             if len(value_str) > field_length_limit:
38 |                 value_str = value_str[:field_length_limit] + '...'
39 | 
40 |             print_statement += '\n{}{}: {}'.format(indentation * 2, field, value_str)
41 | 
42 |         print_statement += '},'
43 |         print(print_statement)
44 |     print(']')
45 | 
46 | 
47 | def plot_similarities(similarities_list, dates, title, labels):
48 |     assert len(similarities_list) == len(labels)
49 | 
50 |     plt.figure(1, figsize=(10, 7))
51 |     for similarities, label in zip(similarities_list, labels):
52 |         plt.title(title)
53 |         plt.plot(dates, similarities, label=label)
54 |         plt.legend()
55 |         plt.xticks(rotation=90)
56 | 
57 |     plt.show()
58 | 


--------------------------------------------------------------------------------
/tests.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | from collections import OrderedDict
  3 | import copy
  4 | import pandas as pd
  5 | import numpy as np
  6 | from datetime import date, timedelta
  7 | 
  8 | 
  9 | pd.options.display.float_format = '{:.8f}'.format
 10 | 
 11 | 
 12 | def _generate_output_error_msg(fn_name, fn_inputs, fn_outputs, fn_expected_outputs):
 13 |     formatted_inputs = []
 14 |     formatted_outputs = []
 15 |     formatted_expected_outputs = []
 16 | 
 17 |     for input_name, input_value in fn_inputs.items():
 18 |         formatted_outputs.append('INPUT {}:\n{}\n'.format(
 19 |             input_name, str(input_value)))
 20 |     for output_name, output_value in fn_outputs.items():
 21 |         formatted_outputs.append('OUTPUT {}:\n{}\n'.format(
 22 |             output_name, str(output_value)))
 23 |     for expected_output_name, expected_output_value in fn_expected_outputs.items():
 24 |         formatted_expected_outputs.append('EXPECTED OUTPUT FOR {}:\n{}\n'.format(
 25 |             expected_output_name, str(expected_output_value)))
 26 | 
 27 |     return 'Wrong value for {}.\n' \
 28 |            '{}\n' \
 29 |            '{}\n' \
 30 |            '{}' \
 31 |         .format(
 32 |             fn_name,
 33 |             '\n'.join(formatted_inputs),
 34 |             '\n'.join(formatted_outputs),
 35 |             '\n'.join(formatted_expected_outputs))
 36 | 
 37 | 
 38 | def _is_equal(x, y):
 39 |     is_equal = False
 40 | 
 41 |     if isinstance(x, pd.DataFrame) or isinstance(y, pd.Series):
 42 |         is_equal = x.equals(y)
 43 |     elif isinstance(x, np.ndarray):
 44 |         is_equal = np.array_equal(x, y)
 45 |     elif isinstance(x, list):
 46 |         if len(x) == len(y):
 47 |             for x_item, y_item in zip(x, y):
 48 |                 if not _is_equal(x_item, y_item):
 49 |                     break
 50 |             else:
 51 |                 is_equal = True
 52 |     else:
 53 |         is_equal = x == y
 54 | 
 55 |     return is_equal
 56 | 
 57 | 
 58 | def project_test(func):
 59 |     def func_wrapper(*args):
 60 |         result = func(*args)
 61 |         print('Tests Passed')
 62 |         return result
 63 | 
 64 |     return func_wrapper
 65 | 
 66 | 
 67 | def generate_random_tickers(n_tickers=None):
 68 |     min_ticker_len = 3
 69 |     max_ticker_len = 5
 70 |     tickers = []
 71 | 
 72 |     if not n_tickers:
 73 |         n_tickers = np.random.randint(8, 14)
 74 | 
 75 |     ticker_symbol_random = np.random.randint(ord('A'), ord('Z')+1, (n_tickers, max_ticker_len))
 76 |     ticker_symbol_lengths = np.random.randint(min_ticker_len, max_ticker_len, n_tickers)
 77 |     for ticker_symbol_rand, ticker_symbol_length in zip(ticker_symbol_random, ticker_symbol_lengths):
 78 |         ticker_symbol = ''.join([chr(c_id) for c_id in ticker_symbol_rand[:ticker_symbol_length]])
 79 |         tickers.append(ticker_symbol)
 80 | 
 81 |     return tickers
 82 | 
 83 | 
 84 | def generate_random_dates(n_days=None):
 85 |     if not n_days:
 86 |         n_days = np.random.randint(14, 20)
 87 | 
 88 |     start_year = np.random.randint(1999, 2017)
 89 |     start_month = np.random.randint(1, 12)
 90 |     start_day = np.random.randint(1, 29)
 91 |     start_date = date(start_year, start_month, start_day)
 92 | 
 93 |     dates = []
 94 |     for i in range(n_days):
 95 |         dates.append(start_date + timedelta(days=i))
 96 | 
 97 |     return dates
 98 | 
 99 | 
100 | def assert_structure(received_obj, expected_obj, obj_name):
101 |     assert isinstance(received_obj, type(expected_obj)), \
102 |         'Wrong type for output {}. Got {}, expected {}'.format(obj_name, type(received_obj), type(expected_obj))
103 | 
104 |     if hasattr(expected_obj, 'shape'):
105 |         assert received_obj.shape == expected_obj.shape, \
106 |             'Wrong shape for output {}. Got {}, expected {}'.format(obj_name, received_obj.shape, expected_obj.shape)
107 |     elif hasattr(expected_obj, '__len__'):
108 |         assert len(received_obj) == len(expected_obj), \
109 |             'Wrong len for output {}. Got {}, expected {}'.format(obj_name, len(received_obj), len(expected_obj))
110 | 
111 |     if type(expected_obj) == pd.DataFrame:
112 |         assert set(received_obj.columns) == set(expected_obj.columns), \
113 |             'Incorrect columns for output {}\n' \
114 |             'COLUMNS:          {}\n' \
115 |             'EXPECTED COLUMNS: {}'.format(obj_name, sorted(received_obj.columns), sorted(expected_obj.columns))
116 | 
117 |         # This is to catch a case where __equal__ says it's equal between different types
118 |         assert set([type(i) for i in received_obj.columns]) == set([type(i) for i in expected_obj.columns]), \
119 |             'Incorrect types in columns for output {}\n' \
120 |             'COLUMNS:          {}\n' \
121 |             'EXPECTED COLUMNS: {}'.format(obj_name, sorted(received_obj.columns), sorted(expected_obj.columns))
122 | 
123 |         for column in expected_obj.columns:
124 |             assert received_obj[column].dtype == expected_obj[column].dtype, \
125 |                 'Incorrect type for output {}, column {}\n' \
126 |                 'Type:          {}\n' \
127 |                 'EXPECTED Type: {}'.format(obj_name, column, received_obj[column].dtype, expected_obj[column].dtype)
128 | 
129 |     if type(expected_obj) in {pd.DataFrame, pd.Series}:
130 |         assert set(received_obj.index) == set(expected_obj.index), \
131 |             'Incorrect indices for output {}\n' \
132 |             'INDICES:          {}\n' \
133 |             'EXPECTED INDICES: {}'.format(obj_name, sorted(received_obj.index), sorted(expected_obj.index))
134 | 
135 |         # This is to catch a case where __equal__ says it's equal between different types
136 |         assert set([type(i) for i in received_obj.index]) == set([type(i) for i in expected_obj.index]), \
137 |             'Incorrect types in indices for output {}\n' \
138 |             'INDICES:          {}\n' \
139 |             'EXPECTED INDICES: {}'.format(obj_name, sorted(received_obj.index), sorted(expected_obj.index))
140 | 
141 | 
142 | def does_data_match(obj_a, obj_b):
143 |     if type(obj_a) == pd.DataFrame:
144 |         # Sort Columns
145 |         obj_b = obj_b.sort_index(1)
146 |         obj_a = obj_a.sort_index(1)
147 | 
148 |     if type(obj_a) in {pd.DataFrame, pd.Series}:
149 |         # Sort Indices
150 |         obj_b = obj_b.sort_index()
151 |         obj_a = obj_a.sort_index()
152 |     try:
153 |         data_is_close = np.isclose(obj_b, obj_a, equal_nan=True)
154 |     except TypeError:
155 |         data_is_close = obj_b == obj_a
156 |     else:
157 |         if isinstance(obj_a, collections.Iterable):
158 |             data_is_close = data_is_close.all()
159 | 
160 |     return data_is_close
161 | 
162 | 
163 | def assert_output(fn, fn_inputs, fn_expected_outputs, check_parameter_changes=True):
164 |     assert type(fn_expected_outputs) == OrderedDict
165 | 
166 |     if check_parameter_changes:
167 |         fn_inputs_passed_in = copy.deepcopy(fn_inputs)
168 |     else:
169 |         fn_inputs_passed_in = fn_inputs
170 | 
171 |     fn_raw_out = fn(**fn_inputs_passed_in)
172 | 
173 |     # Check if inputs have changed
174 |     if check_parameter_changes:
175 |         for input_name, input_value in fn_inputs.items():
176 |             passed_in_unchanged = _is_equal(input_value, fn_inputs_passed_in[input_name])
177 | 
178 |             assert passed_in_unchanged, 'Input parameter "{}" has been modified inside the function. ' \
179 |                                         'The function shouldn\'t modify the function parameters.'.format(input_name)
180 | 
181 |     fn_outputs = OrderedDict()
182 |     if len(fn_expected_outputs) == 1:
183 |         fn_outputs[list(fn_expected_outputs)[0]] = fn_raw_out
184 |     elif len(fn_expected_outputs) > 1:
185 |         assert type(fn_raw_out) == tuple,\
186 |             'Expecting function to return tuple, got type {}'.format(type(fn_raw_out))
187 |         assert len(fn_raw_out) == len(fn_expected_outputs),\
188 |             'Expected {} outputs in tuple, only found {} outputs'.format(len(fn_expected_outputs), len(fn_raw_out))
189 |         for key_i, output_key in enumerate(fn_expected_outputs.keys()):
190 |             fn_outputs[output_key] = fn_raw_out[key_i]
191 | 
192 |     err_message = _generate_output_error_msg(
193 |         fn.__name__,
194 |         fn_inputs,
195 |         fn_outputs,
196 |         fn_expected_outputs)
197 | 
198 |     for fn_out, (out_name, expected_out) in zip(fn_outputs.values(), fn_expected_outputs.items()):
199 |         assert_structure(fn_out, expected_out, out_name)
200 |         correct_data = does_data_match(expected_out, fn_out)
201 | 
202 |         assert correct_data, err_message
203 | 


--------------------------------------------------------------------------------
/project_tests.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | from collections import OrderedDict
  5 | 
  6 | from tests import assert_output, project_test, assert_structure
  7 | 
  8 | 
  9 | @project_test
 10 | def test_get_documents(fn):
 11 |     # Test 1
 12 |     doc = '\nThis is inside the document\n' \
 13 |           'This is the text that should be copied'
 14 |     text = 'This is before the test document<DOCUMENT>{}</DOCUMENT>\n' \
 15 |            'This is after the document\n' \
 16 |            'This shouldn\t be included.'.format(doc)
 17 | 
 18 |     fn_inputs = {
 19 |         'text': text}
 20 |     fn_correct_outputs = OrderedDict([
 21 |         (
 22 |             'extracted_docs', [doc])])
 23 | 
 24 |     assert_output(fn, fn_inputs, fn_correct_outputs, check_parameter_changes=False)
 25 | 
 26 |     # Test 2
 27 |     ten_k_real_compressed_doc = '\n' \
 28 |         '<TYPE>10-K\n' \
 29 |         '<SEQUENCE>1\n' \
 30 |         '<FILENAME>test-20171231x10k.htm\n' \
 31 |         '<DESCRIPTION>10-K\n' \
 32 |         '<TEXT>\n' \
 33 |         '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n' \
 34 |         '<html>\n' \
 35 |         '	<head>\n' \
 36 |         '		<title>Document</title>\n' \
 37 |         '	</head>\n' \
 38 |         '	<body style="font-family:Times New Roman;font-size:10pt;">\n' \
 39 |         '...\n' \
 40 |         '<td><strong> Data Type:</strong></td>\n' \
 41 |         '<td>xbrli:sharesItemType</td>\n' \
 42 |         '</tr>\n' \
 43 |         '<tr>\n' \
 44 |         '<td><strong> Balance Type:</strong></td>\n' \
 45 |         '<td>na</td>\n' \
 46 |         '</tr>\n' \
 47 |         '<tr>\n' \
 48 |         '<td><strong> Period Type:</strong></td>\n' \
 49 |         '<td>duration</td>\n' \
 50 |         '</tr>\n' \
 51 |         '</table></div>\n' \
 52 |         '</div></td></tr>\n' \
 53 |         '</table>\n' \
 54 |         '</div>\n' \
 55 |         '</body>\n' \
 56 |         '</html>\n' \
 57 |         '</TEXT>\n'
 58 |     excel_real_compressed_doc = '\n' \
 59 |         '<TYPE>EXCEL\n' \
 60 |         '<SEQUENCE>106\n' \
 61 |         '<FILENAME>Financial_Report.xlsx\n' \
 62 |         '<DESCRIPTION>IDEA: XBRL DOCUMENT\n' \
 63 |         '<TEXT>\n' \
 64 |         'begin 644 Financial_Report.xlsx\n' \
 65 |         'M4$L#!!0    ( %"E04P?(\\#P    !,"   +    7W)E;,O+G)E;.MDD^+\n' \
 66 |         'MPD ,Q;]*F?L:5\#8CUYZ6U9_ )Q)OU#.Y,A$[%^>X>];+=44/ 87O+>CT?V\n' \
 67 |         '...\n' \
 68 |         'M,C,Q7V1E9BYX;6Q02P$"% ,4    " !0I4%,>V7[]F0L 0!(@A  %0\n' \
 69 |         'M        @ %N9@, 86UZ;BTR,#$W,3(S,5]L86(N>&UL4$L! A0#%     @\n' \
 70 |         'M4*5!3*U*Q:W#O0  U=\) !4              ( !!9,$ &%M>FXM,C Q-S$R\n' \
 71 |         '@,S%?<)E+GAM;%!+!08     !@ & (H!  #[4 4    !\n' \
 72 |         '\n' \
 73 |         'end\n' \
 74 |         '</TEXT>\n'
 75 |     real_compressed_text = '<SEC-DOCUMENT>0002014754-18-050402.txt : 20180202\n' \
 76 |         '<SEC-HEADER>00002014754-18-050402.hdr.sgml : 20180202\n' \
 77 |         '<ACCEPTANCE-DATETIME>20180201204115\n' \
 78 |         'ACCESSION NUMBER:		0002014754-18-050402\n' \
 79 |         'CONFORMED SUBMISSION TYPE:	10-K\n' \
 80 |         'PUBLIC DOCUMENT COUNT:		110\n' \
 81 |         'CONFORMED PERIOD OF REPORT:	20171231\n' \
 82 |         'FILED AS OF DATE:		20180202\n' \
 83 |         'DATE AS OF CHANGE:		20180201\n' \
 84 |         '\n' \
 85 |         'FILER:\n' \
 86 |         '\n' \
 87 |         '	COMPANY DATA:	\n' \
 88 |         '		COMPANY CONFORMED NAME:			TEST\n' \
 89 |         '		CENTRAL INDEX KEY:			0001018724\n' \
 90 |         '		STANDARD INDUSTRIAL CLASSIFICATION:	RANDOM [2357234]\n' \
 91 |         '		IRS NUMBER:				91236464620\n' \
 92 |         '		STATE OF INCORPORATION:			DE\n' \
 93 |         '		FISCAL YEAR END:			1231\n' \
 94 |         '\n' \
 95 |         '	FILING VALUES:\n' \
 96 |         '		FORM TYPE:		10-K\n' \
 97 |         '		SEC ACT:		1934 Act\n' \
 98 |         '		SEC FILE NUMBER:	000-2225413\n' \
 99 |         '		FILM NUMBER:		13822526583969\n' \
100 |         '\n' \
101 |         '	BUSINESS ADDRESS:	\n' \
102 |         '		STREET 1:		422320 PLACE AVENUE\n' \
103 |         '		CITY:			SEATTLE\n' \
104 |         '		STATE:			WA\n' \
105 |         '		ZIP:			234234\n' \
106 |         '		BUSINESS PHONE:		306234534246600\n' \
107 |         '\n' \
108 |         '	MAIL ADDRESS:	\n' \
109 |         '		STREET 1:		422320 PLACE AVENUE\n' \
110 |         '		CITY:			SEATTLE\n' \
111 |         '		STATE:			WA\n' \
112 |         '		ZIP:			234234\n' \
113 |         '</SEC-HEADER>\n' \
114 |         '<DOCUMENT>{}</DOCUMENT>\n' \
115 |         '<DOCUMENT>{}</DOCUMENT>\n' \
116 |         '</SEC-DOCUMENT>\n'.format(ten_k_real_compressed_doc, excel_real_compressed_doc)
117 | 
118 |     fn_inputs = {
119 |         'text': real_compressed_text}
120 |     fn_correct_outputs = OrderedDict([
121 |         (
122 |             'extracted_docs', [ten_k_real_compressed_doc, excel_real_compressed_doc])])
123 | 
124 |     assert_output(fn, fn_inputs, fn_correct_outputs, check_parameter_changes=False)
125 | 
126 | 
127 | @project_test
128 | def test_get_document_type(fn):
129 |     doc = '\n' \
130 |         '<TYPE>10-K\n' \
131 |         '<SEQUENCE>1\n' \
132 |         '<FILENAME>test-20171231x10k.htm\n' \
133 |         '<DESCRIPTION>10-K\n' \
134 |         '<TEXT>\n' \
135 |         '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n' \
136 |         '...'
137 | 
138 |     fn_inputs = {
139 |         'doc': doc}
140 |     fn_correct_outputs = OrderedDict([
141 |         (
142 |             'doc_type', '10-k')])
143 | 
144 |     assert_output(fn, fn_inputs, fn_correct_outputs, check_parameter_changes=False)
145 | 
146 | 
147 | @project_test
148 | def test_lemmatize_words(fn):
149 |     fn_inputs = {
150 |         'words': ['cow', 'running', 'jeep', 'swimmers', 'tackle', 'throw', 'driven']}
151 |     fn_correct_outputs = OrderedDict([
152 |         (
153 |             'lemmatized_words', ['cow', 'run', 'jeep', 'swimmers', 'tackle', 'throw', 'drive'])])
154 | 
155 |     assert_output(fn, fn_inputs, fn_correct_outputs, check_parameter_changes=False)
156 | 
157 | 
158 | @project_test
159 | def test_get_bag_of_words(fn):
160 |     def sort_ndarray(array):
161 |         hashes = [hash(str(x)) for x in array]
162 |         sotred_indicies = sorted(range(len(hashes)), key=lambda k: hashes[k])
163 | 
164 |         return array[sotred_indicies]
165 | 
166 |     fn_inputs = {
167 |         'sentiment_words': pd.Series(['one', 'last', 'second']),
168 |         'docs': [
169 |             'this is a document',
170 |             'this document is the second document',
171 |             'last one']}
172 |     fn_correct_outputs = OrderedDict([
173 |         (
174 |             'bag_of_words', np.array([
175 |                 [0, 0, 0],
176 |                 [1, 0, 0],
177 |                 [0, 1, 1]]))])
178 | 
179 |     fn_out = fn(**fn_inputs)
180 |     assert_structure(fn_out, fn_correct_outputs['bag_of_words'], 'bag_of_words')
181 |     assert np.array_equal(sort_ndarray(fn_out.T), sort_ndarray(fn_correct_outputs['bag_of_words'].T)), \
182 |         'Wrong value for bag_of_words.\n' \
183 |         'INPUT docs:\n{}\n\n' \
184 |         'OUTPUT bag_of_words:\n{}\n\n' \
185 |         'A POSSIBLE CORRECT OUTPUT FOR bag_of_words:\n{}\n'\
186 |         .format(fn_inputs['docs'], fn_out, fn_correct_outputs['bag_of_words'])
187 | 
188 | 
189 | @project_test
190 | def test_get_jaccard_similarity(fn):
191 |     fn_inputs = {
192 |         'bag_of_words_matrix': np.array([
193 |                 [0, 1, 1, 0, 0, 0, 1],
194 |                 [0, 1, 2, 0, 1, 1, 1],
195 |                 [1, 0, 0, 1, 0, 0, 0]])}
196 |     fn_correct_outputs = OrderedDict([
197 |         (
198 |             'jaccard_similarities', [0.7142857142857143, 0.0])])
199 | 
200 |     assert_output(fn, fn_inputs, fn_correct_outputs, check_parameter_changes=False)
201 | 
202 | 
203 | @project_test
204 | def test_get_tfidf(fn):
205 |     def sort_ndarray(array):
206 |         hashes = [hash(str(x)) for x in array]
207 |         sotred_indicies = sorted(range(len(hashes)), key=lambda k: hashes[k])
208 | 
209 |         return array[sotred_indicies]
210 | 
211 |     fn_inputs = {
212 |         'sentiment_words': pd.Series(['one', 'last', 'second']),
213 |         'docs': [
214 |             'this is a document',
215 |             'this document is the second document',
216 |             'last one']}
217 |     fn_correct_outputs = OrderedDict([
218 |         (
219 |             'tfidf', np.array([
220 |                 [0.0, 0.0, 0.0],
221 |                 [1.0, 0.0, 0.0],
222 |                 [0.0, 0.70710678, 0.70710678]]))])
223 | 
224 |     fn_out = fn(**fn_inputs)
225 |     assert_structure(fn_out, fn_correct_outputs['tfidf'], 'tfidf')
226 |     assert np.isclose(sort_ndarray(fn_out.T), sort_ndarray(fn_correct_outputs['tfidf'].T)).all(), \
227 |         'Wrong value for tfidf.\n' \
228 |         'INPUT docs:\n{}\n\n' \
229 |         'OUTPUT tfidf:\n{}\n\n' \
230 |         'A POSSIBLE CORRECT OUTPUT FOR tfidf:\n{}\n'\
231 |         .format(fn_inputs['docs'], fn_out, fn_correct_outputs['tfidf'])
232 | 
233 | 
234 | @project_test
235 | def test_get_cosine_similarity(fn):
236 |     fn_inputs = {
237 |         'tfidf_matrix': np.array([
238 |                 [0.0,           0.57735027, 0.57735027, 0.0,        0.0,        0.0,        0.57735027],
239 |                 [0.0,           0.32516555, 0.6503311,  0.0,        0.42755362, 0.42755362, 0.32516555],
240 |                 [0.70710678,    0.0,        0.0,        0.70710678, 0.0,        0.0,        0.0]])}
241 |     fn_correct_outputs = OrderedDict([
242 |         (
243 |             'cosine_similarities', [0.75093766927060945, 0.0])])
244 | 
245 |     assert_output(fn, fn_inputs, fn_correct_outputs, check_parameter_changes=False)
246 | 


--------------------------------------------------------------------------------