├── __init__.py
├── pnas-abstract.tar.gz
├── .gitignore
├── README.md
├── launch_profiler.py
├── inferencer.py
├── launch_test.py
├── launch_resume.py
├── launch_train.py
└── variational_bayes.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pnas-abstract.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kzhai/PyCTM/HEAD/pnas-abstract.tar.gz


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | PyCTM
 2 | ==========
 3 | 
 4 | PyCTM is a Correlated Topic Modeling package, please download the latest version from our [GitHub repository](https://github.com/kzhai/PyCTM).
 5 | 
 6 | Please send any bugs of problems to Ke Zhai (kzhai@umd.edu).
 7 | 
 8 | Install and Build
 9 | ----------
10 | 
11 | This package depends on many external python libraries, such as numpy, scipy and nltk.
12 | 
13 | Launch and Execute
14 | ----------
15 | 
16 | Assume the PyCTM package is downloaded under directory ```$PROJECT_SPACE/src/```, i.e.,
17 | 
18 | 	$PROJECT_SPACE/src/PyCTM
19 | 
20 | To prepare the example dataset,
21 | 
22 | 	tar zxvf pnas-abstract.tar.gz
23 | 
24 | To launch PyCTM, first redirect to the directory of PyCTM source code,
25 | 
26 | 	cd $PROJECT_SPACE/src/PyCTM
27 | 
28 | and run the following command on example dataset,
29 | 
30 | 	python -m launch_train --input_directory=./pnas-abstract --output_directory=./ --number_of_topics=10 --training_iterations=50
31 | 
32 | The generic argument to run PyCTM is
33 | 
34 | 	python -m launch_train --input_directory=$INPUT_DIRECTORY/$CORPUS_NAME --output_directory=$OUTPUT_DIRECTORY --number_of_topics=$NUMBER_OF_TOPICS --training_iterations=$NUMBER_OF_ITERATIONS
35 | 
36 | You should be able to find the output at directory ```$OUTPUT_DIRECTORY/$CORPUS_NAME```.
37 | 
38 | Under any circumstances, you may also get help information and usage hints by running the following command
39 | 
40 | 	python -m launch_train --help
41 | 	


--------------------------------------------------------------------------------
/launch_profiler.py:
--------------------------------------------------------------------------------
 1 | import pickle, string, numpy, getopt, sys, random, time, re, pprint
 2 | import datetime, os;
 3 | 
 4 | import nltk;
 5 | import numpy;
 6 | import cProfile
 7 | 
 8 | 
 9 | def main():
10 |     # parameter set 1
11 |     input_directory = "./nips-abstract"
12 | 
13 |     input_directory = input_directory.rstrip("/");
14 |     # corpus_name = os.path.basename(input_directory);
15 | 
16 |     '''
17 |     output_directory = options.output_directory;
18 |     if not os.path.exists(output_directory):
19 |         os.mkdir(output_directory);
20 |     output_directory = os.path.join(output_directory, corpus_name);
21 |     if not os.path.exists(output_directory):
22 |         os.mkdir(output_directory);
23 |     '''
24 | 
25 |     # Document
26 |     train_docs_path = os.path.join(input_directory, 'train.dat')
27 |     input_doc_stream = open(train_docs_path, 'r');
28 |     train_docs = [];
29 |     for line in input_doc_stream:
30 |         train_docs.append(line.strip().lower());
31 |     print("successfully load all training docs from %s..." % (os.path.abspath(train_docs_path)));
32 | 
33 |     # Vocabulary
34 |     vocabulary_path = os.path.join(input_directory, 'voc.dat');
35 |     input_voc_stream = open(vocabulary_path, 'r');
36 |     vocab = [];
37 |     for line in input_voc_stream:
38 |         vocab.append(line.strip().lower().split()[0]);
39 |     vocab = list(set(vocab));
40 |     print("successfully load all the words from %s..." % (os.path.abspath(vocabulary_path)));
41 | 
42 |     # parameter 2
43 |     number_of_topics = 10;
44 |     alpha_mu = 0;
45 |     alpha_sigma = 1;
46 |     alpha_beta = 1.0 / len(vocab);
47 | 
48 |     # parameter set 3
49 |     training_iterations = 1;
50 | 
51 |     import variational_bayes
52 |     ctm_inferencer = variational_bayes.VariationalBayes();
53 | 
54 |     ctm_inferencer._initialize(train_docs, vocab, number_of_topics, alpha_mu, alpha_sigma, alpha_beta);
55 | 
56 |     for iteration in range(training_iterations):
57 |         clock = time.time();
58 |         log_likelihood = ctm_inferencer.learning();
59 |         clock = time.time() - clock;
60 | 
61 |         # print 'training iteration %d finished in %f seconds: number-of-topics = %d, log-likelihood = %f' % (hdp._iteration_counter, clock, hdp._K, log_likelihood);
62 | 
63 |     # gamma_path = os.path.join(output_directory, 'gamma.txt');
64 |     # numpy.savetxt(gamma_path, hdp._document_topic_distribution);
65 | 
66 |     # topic_inactive_counts_path = os.path.join(output_directory, "topic_inactive_counts.txt");
67 |     # numpy.savetxt(topic_inactive_counts_path, hdp._topic_inactive_counts);
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     main()
72 | 


--------------------------------------------------------------------------------
/inferencer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @author: Ke Zhai (zhaike@cs.umd.edu)
 3 | """
 4 | 
 5 | import time
 6 | import numpy
 7 | import scipy
 8 | import nltk;
 9 | 
10 | def compute_dirichlet_expectation(dirichlet_parameter):
11 |     if (len(dirichlet_parameter.shape) == 1):
12 |         return scipy.special.psi(dirichlet_parameter) - scipy.special.psi(numpy.sum(dirichlet_parameter))
13 |     return scipy.special.psi(dirichlet_parameter) - scipy.special.psi(numpy.sum(dirichlet_parameter, 1))[:, numpy.newaxis]
14 | 
15 | def parse_vocabulary(vocab):
16 |     type_to_index = {};
17 |     index_to_type = {};
18 |     for word in set(vocab):
19 |         index_to_type[len(index_to_type)] = word;
20 |         type_to_index[word] = len(type_to_index);
21 |         
22 |     return type_to_index, index_to_type;
23 | 
24 | class Inferencer():
25 |     """
26 |     """
27 |     def __init__(self,
28 |                  hyper_parameter_optimize_interval=10,
29 |                  ):
30 |         
31 |         self._hyper_parameter_optimize_interval = hyper_parameter_optimize_interval;
32 |         # assert(self._hyper_parameter_optimize_interval>0);
33 |         
34 |         # self._local_parameter_iterations = local_parameter_iterations
35 |         # assert(self._local_maximum_iteration>0)
36 | 
37 |     """
38 |     """
39 |     def _initialize(self, vocab, number_of_topics, alpha_mu, alpha_sigma, alpha_beta):
40 |         self.parse_vocabulary(vocab);
41 |         
42 |         # initialize the size of the vocabulary, i.e. total number of distinct tokens.
43 |         self._number_of_types = len(self._type_to_index)
44 |         
45 |         self._counter = 0;
46 |         
47 |         # initialize the total number of topics.
48 |         self._number_of_topics = number_of_topics
49 |         
50 |         # initialize a K-dimensional vector, valued at 1/K.
51 |         if self._diagonal_covariance_matrix:
52 |             self._alpha_mu = numpy.zeros(self._number_of_topics) + alpha_mu;
53 |             self._alpha_sigma = numpy.zeros(self._number_of_topics) + alpha_sigma;
54 |         else:
55 |             self._alpha_mu = numpy.zeros((1, self._number_of_topics)) + alpha_mu;
56 |             self._alpha_sigma = numpy.eye(self._number_of_topics) * alpha_sigma;
57 |             self._alpha_sigma_inv = numpy.linalg.pinv(self._alpha_sigma);
58 |         
59 |         self._alpha_beta = numpy.zeros(self._number_of_types) + alpha_beta;
60 |     
61 |     def parse_vocabulary(self, vocab):
62 |         self._type_to_index = {};
63 |         self._index_to_type = {};
64 |         for word in set(vocab):
65 |             self._index_to_type[len(self._index_to_type)] = word;
66 |             self._type_to_index[word] = len(self._type_to_index);
67 |             
68 |         self._vocab = list(self._type_to_index.keys());
69 |         
70 |     def parse_data(self):
71 |         raise NotImplementedError;
72 | 
73 |     """
74 |     """
75 |     def learning(self):
76 |         raise NotImplementedError;
77 |     
78 |     """
79 |     """
80 |     def inference(self):
81 |         raise NotImplementedError;
82 | 
83 |     def export_beta(self, exp_beta_path, top_display=-1):
84 |         raise NotImplementedError;
85 |         
86 | if __name__ == "__main__":
87 |     raise NotImplementedError;
88 | 


--------------------------------------------------------------------------------
/launch_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import pickle, getopt, sys, time, re
  3 | import datetime, os;
  4 | 
  5 | import scipy.io;
  6 | import nltk;
  7 | import numpy;
  8 | import optparse;
  9 | 
 10 | def parse_args():
 11 |     parser = optparse.OptionParser()
 12 |     parser.set_defaults(# parameter set 1
 13 |                         input_directory=None,
 14 |                         model_directory=None,
 15 |                         snapshot_index=-1,
 16 |                         )
 17 |     # parameter set 1
 18 |     parser.add_option("--input_directory", type="string", dest="input_directory",
 19 |                       help="input directory [None]");
 20 |     parser.add_option("--model_directory", type="string", dest="model_directory",
 21 |                       help="model directory [None]");
 22 |     parser.add_option("--snapshot_index", type="int", dest="snapshot_index",
 23 |                       help="snapshot index [-: evaluate on all available snapshots]");
 24 |     
 25 |     (options, args) = parser.parse_args();
 26 |     return options;
 27 | 
 28 | def main():
 29 |     options = parse_args();
 30 |     
 31 |     # parameter set 1
 32 |     # assert(options.input_corpus_name!=None);
 33 |     assert(options.input_directory != None);
 34 |     assert(options.model_directory != None);
 35 |     
 36 |     input_directory = options.input_directory;
 37 |     input_directory = input_directory.rstrip("/");
 38 |     input_corpus_name = os.path.basename(input_directory);
 39 |     
 40 |     model_directory = options.model_directory;
 41 |     model_directory = model_directory.rstrip("/");
 42 |     if not os.path.exists(model_directory):
 43 |         sys.stderr.write("error: model directory %s does not exist...\n" % (os.path.abspath(model_directory)));
 44 |         return;
 45 |     corpus_directory = os.path.split(os.path.abspath(model_directory))[0];
 46 |     model_corpus_name = os.path.split(os.path.abspath(corpus_directory))[1]
 47 |     if input_corpus_name != model_corpus_name:
 48 |         sys.stderr.write("error: corpus name does not match for input (%s) and model (%s)...\n" % (input_corpus_name, model_corpus_name));
 49 |         return;
 50 | 
 51 |     snapshot_index = options.snapshot_index;
 52 | 
 53 |     print("========== ========== ========== ========== ==========")
 54 |     # parameter set 1
 55 |     print("model_directory=" + model_directory)
 56 |     print("input_directory=" + input_directory)
 57 |     print("corpus_name=" + input_corpus_name)
 58 |     print("snapshot_index=" + str(snapshot_index));
 59 |     print("========== ========== ========== ========== ==========")
 60 | 
 61 |     # Document
 62 |     test_docs_path = os.path.join(input_directory, 'test.dat')
 63 |     input_doc_stream = open(test_docs_path, 'r');
 64 |     test_docs = [];
 65 |     for line in input_doc_stream:
 66 |         test_docs.append(line.strip().lower());
 67 |     print("successfully load all testing docs from %s..." % (os.path.abspath(test_docs_path)));
 68 |     
 69 |     if snapshot_index >= 0:
 70 |         input_snapshot_path = os.path.join(model_directory, ("model-%d" % (snapshot_index)))
 71 |         if not os.path.exists(input_snapshot_path):
 72 |             sys.stderr.write("error: model snapshot %s does not exist...\n" % (os.path.abspath(input_snapshot_path)));
 73 |             return;
 74 |         
 75 |         output_lambda_path = os.path.join(model_directory, "test-lambda-%d" % snapshot_index);
 76 |         output_nu_square_path = os.path.join(model_directory, "test-nu_square-%d" % snapshot_index);
 77 |             
 78 |         evaluate_snapshot(input_snapshot_path, test_docs, output_lambda_path, output_nu_square_path)
 79 |     else:
 80 |         for model_snapshot in os.listdir(model_directory):
 81 |             if not model_snapshot.startswith("model-"):
 82 |                 continue;
 83 |             
 84 |             snapshot_index = int(model_snapshot.split("-")[-1]);
 85 |             
 86 |             input_snapshot_path = os.path.join(model_directory, model_snapshot);
 87 |             output_lambda_path = os.path.join(model_directory, "test-lambda-%d" % snapshot_index);
 88 |             output_nu_square_path = os.path.join(model_directory, "test-nu_square-%d" % snapshot_index);
 89 |             
 90 |             evaluate_snapshot(input_snapshot_path, test_docs, output_lambda_path, output_nu_square_path)
 91 | 
 92 | def evaluate_snapshot(input_snapshot_path, test_docs, output_lambda_path, output_nu_square_path):
 93 |     # import hybrid, monte_carlo, variational_bayes;
 94 |     lda_inferencer = pickle.load(open(input_snapshot_path, "rb"));
 95 |     # print 'successfully load model snapshot %s...' % (os.path.abspath(input_snapshot_path));
 96 |     
 97 |     log_likelihood, lambda_values, nu_square_values = lda_inferencer.inference(test_docs);
 98 |     print("held-out likelihood of snapshot %s is %g" % (os.path.abspath(input_snapshot_path), log_likelihood));
 99 |     numpy.savetxt(output_lambda_path, lambda_values);
100 |     numpy.savetxt(output_nu_square_path, nu_square_values);
101 | 
102 | if __name__ == '__main__':
103 |     main()
104 | 


--------------------------------------------------------------------------------
/launch_resume.py:
--------------------------------------------------------------------------------
  1 | import pickle;
  2 | import optparse
  3 | import string, numpy, getopt, sys, random, time, re, pprint
  4 | import datetime, os;
  5 | 
  6 | import numpy;
  7 | import shutil
  8 | 
  9 | # model_settings_pattern = re.compile('\d+-\d+-ctm_inferencer-I(?P<iteration>\d+)-S(?P<snapshot>\d+)-aa(?P<alpha>[\d\.]+)(-smh(?P<smh>[\d]+))?(-sp(?P<sp>[\d]+)-mp(?P<mp>[\d]+))?');
 10 | model_settings_pattern = re.compile('\d+-\d+-ctm-I(?P<iteration>\d+)-S(?P<snapshot>\d+)-K(?P<topic>\d+)-am(?P<alpha_mu>[\d\.]+)-as(?P<alpha_sigma>[\d\.]+)-ab(?P<alpha_beta>[\d\.]+)');
 11 | 
 12 | def parse_args():
 13 |     parser = optparse.OptionParser()
 14 |     parser.set_defaults(# parameter set 1
 15 |                         # input_file=None,
 16 |                         model_directory=None,
 17 |                         snapshot_index=-1,
 18 | 
 19 |                         # parameter set 2
 20 |                         output_directory=None,
 21 |                         training_iterations=-1,
 22 |                         snapshot_interval=-1,
 23 |                         )
 24 |     # parameter set 1
 25 |     # parser.add_option("--input_file", type="string", dest="input_file",
 26 |                       # help="input directory [None]");
 27 |     # parser.add_option("--input_directory", type="string", dest="input_directory",
 28 |                       # help="input directory [None]");
 29 |     parser.add_option("--model_directory", type="string", dest="model_directory",
 30 |                       help="model directory [None]");
 31 |     parser.add_option("--snapshot_index", type="int", dest="snapshot_index",
 32 |                       help="snapshot index [-1]");
 33 |     # parser.add_option("--training_iterations", type="int", dest="training_iterations",
 34 |                       # help="number of training iterations [1000]");
 35 |     # parser.add_option("--dataset_name", type="string", dest="dataset_name",
 36 |                       # help="the corpus name [None]");
 37 | 
 38 |     # parameter set 2
 39 |     parser.add_option("--output_directory", type="string", dest="output_directory",
 40 |                       help="output directory [None]");
 41 |     # parser.add_option("--alpha_alpha", type="float", dest="alpha_alpha",
 42 |                       # help="hyper-parameter for Dirichlet process of cluster [1]")
 43 |     # parser.add_option("--alpha_kappa", type="float", dest="alpha_kappa",
 44 |                       # help="hyper-parameter for top level Dirichlet process of distribution over topics [1]")
 45 |     # parser.add_option("--alpha_nu", type="float", dest="alpha_nu",
 46 |                       # help="hyper-parameter for bottom level Dirichlet process of distribution over topics [1]")
 47 |     parser.add_option("--training_iterations", type="int", dest="training_iterations",
 48 |                       help="number of training iterations [-1]");
 49 |     parser.add_option("--snapshot_interval", type="int", dest="snapshot_interval",
 50 |                       help="snapshot interval [-1 (default): remain unchanged]");
 51 |                       
 52 |     (options, args) = parser.parse_args();
 53 |     return options;
 54 |     
 55 | def main():
 56 |     options = parse_args();
 57 |     
 58 |     assert(options.model_directory != None);
 59 |     model_directory = options.model_directory;
 60 |     
 61 |     if not os.path.exists(model_directory):
 62 |         sys.stderr.write("model directory %s not exists...\n" % (model_directory));
 63 |         return;
 64 |     model_directory = model_directory.rstrip("/");
 65 |     model_settings = os.path.basename(model_directory);
 66 |     
 67 |     assert options.snapshot_index > 0
 68 |     snapshot_index = options.snapshot_index;
 69 |     
 70 |     # load the existing model
 71 |     model_snapshot_file_path = os.path.join(model_directory, "model-%d" % snapshot_index);
 72 |     if not os.path.exists(model_snapshot_file_path):
 73 |         sys.stderr.write("error: model snapshot file unfound %s...\n" % (model_snapshot_file_path));
 74 |         return;
 75 |     
 76 |     ctm_inferencer = pickle.load(open(model_snapshot_file_path, "rb"));
 77 |     print('successfully load model snapshot %s...' % (os.path.join(model_directory, "model-%d" % snapshot_index)));
 78 | 
 79 |     # set the resume options  
 80 |     matches = re.match(model_settings_pattern, model_settings);
 81 |     
 82 |     # training_iterations = int(matches.group('iteration'));
 83 |     training_iterations = options.training_iterations;
 84 |     assert training_iterations > snapshot_index;
 85 |     if options.snapshot_interval == -1:
 86 |         snapshot_interval = int(matches.group('snapshot'));
 87 |     else:
 88 |         snapshot_interval = options.snapshot_interval;
 89 |     number_of_topics = int(matches.group('topic'));
 90 |     alpha_mu = float(matches.group('alpha_mu'));
 91 |     alpha_sigma = float(matches.group('alpha_sigma'));
 92 |     alpha_beta = float(matches.group('alpha_beta'));
 93 |     
 94 |     now = datetime.datetime.now();
 95 |     suffix = now.strftime("%y%m%d-%H%M%S") + "";
 96 |     suffix += "-%s" % ("ctm");
 97 |     suffix += "-I%d" % (training_iterations);
 98 |     suffix += "-S%d" % (snapshot_interval);
 99 |     suffix += "-K%g" % (number_of_topics);
100 |     suffix += "-am%g" % (alpha_mu);
101 |     suffix += "-as%g" % (alpha_sigma);
102 |     suffix += "-ab%g" % (alpha_beta);
103 | 
104 |     assert options.output_directory != None;
105 |     output_directory = options.output_directory;
106 |     output_directory = output_directory.rstrip("/");
107 |     output_directory = os.path.join(output_directory, suffix);
108 |     assert (not os.path.exists(os.path.abspath(output_directory)));
109 |     os.mkdir(os.path.abspath(output_directory));
110 |     
111 |     shutil.copy(model_snapshot_file_path, os.path.join(output_directory, "model-" + str(snapshot_index)));
112 |     shutil.copy(model_snapshot_file_path, os.path.join(output_directory, "exp_beta-" + str(snapshot_index)));
113 |     
114 |     for iteration in range(snapshot_index, training_iterations):
115 |         # clock = time.time();
116 |         log_likelihood = ctm_inferencer.learning();
117 |         # clock = time.time()-clock;
118 |         # print 'training iteration %d finished in %f seconds: number-of-clusters = %d, log-likelihood = %f' % (dpgm._iteration_counter, clock, dpgm._K, log_likelihood);
119 |         
120 |         if ((ctm_inferencer._counter) % snapshot_interval == 0):
121 |             ctm_inferencer.export_beta(os.path.join(output_directory, 'exp_beta-' + str(ctm_inferencer._counter)));
122 |             model_snapshot_path = os.path.join(output_directory, 'model-' + str(ctm_inferencer._counter));
123 |             pickle.dump(ctm_inferencer, open(model_snapshot_path, 'wb'));
124 |     
125 |     model_snapshot_path = os.path.join(output_directory, 'model-' + str(ctm_inferencer._counter));
126 |     pickle.dump(ctm_inferencer, open(model_snapshot_path, 'wb'));
127 |     
128 | if __name__ == '__main__':
129 |     main()
130 | 


--------------------------------------------------------------------------------
/launch_train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import pickle, getopt, sys, time, re
  3 | import datetime, os;
  4 | 
  5 | import scipy.io;
  6 | import nltk;
  7 | import numpy;
  8 | import optparse;
  9 | 
 10 | def parse_args():
 11 |     parser = optparse.OptionParser()
 12 |     parser.set_defaults(# parameter set 1
 13 |                         input_directory=None,
 14 |                         output_directory=None,
 15 |                         # dictionary=None,
 16 |                         
 17 |                         # parameter set 2
 18 |                         training_iterations=-1,
 19 |                         snapshot_interval=10,
 20 |                         number_of_topics=-1,
 21 | 
 22 |                         # parameter set 3
 23 |                         alpha_mu=0.,
 24 |                         alpha_sigma=1,
 25 |                         alpha_beta=-1,
 26 |                         
 27 |                         # parameter set 4
 28 |                         optimization_method=None,
 29 |                         number_of_processes=1,
 30 |                         diagonal_covariance_matrix=False,
 31 |                         # inference_mode=-1,
 32 |                         )
 33 |     # parameter set 1
 34 |     parser.add_option("--input_directory", type="string", dest="input_directory",
 35 |                       help="input directory [None]");
 36 |     parser.add_option("--output_directory", type="string", dest="output_directory",
 37 |                       help="output directory [None]");
 38 |     # parser.add_option("--corpus_name", type="string", dest="corpus_name",
 39 |                       # help="the corpus name [None]")
 40 |     # parser.add_option("--dictionary", type="string", dest="dictionary",
 41 |                       # help="the dictionary file [None]")
 42 |     
 43 |     # parameter set 2
 44 |     parser.add_option("--number_of_topics", type="int", dest="number_of_topics",
 45 |                       help="total number of topics [-1]");
 46 |     parser.add_option("--training_iterations", type="int", dest="training_iterations",
 47 |                       help="total number of iterations [-1]");
 48 |     parser.add_option("--snapshot_interval", type="int", dest="snapshot_interval",
 49 |                       help="snapshot interval [10]");
 50 |                       
 51 |     # parameter set 3
 52 |     parser.add_option("--alpha_mu", type="float", dest="alpha_mu",
 53 |                       help="hyper-parameter for logistic normal distribution of topic [0.0]")
 54 |     parser.add_option("--alpha_sigma", type="float", dest="alpha_sigma",
 55 |                       help="hyper-parameter for logistic normal distribution of topic [1.0]")
 56 |     parser.add_option("--alpha_beta", type="float", dest="alpha_beta",
 57 |                       help="hyper-parameter for Dirichlet distribution of vocabulary [1.0/number_of_types]")
 58 |     
 59 |     # parameter set 4
 60 |     parser.add_option("--optimization_method", type="string", dest="optimization_method",
 61 |                       help="optimization method for logistic normal distribution");
 62 |     parser.add_option("--number_of_processes", type="int", dest="number_of_processes",
 63 |                       help="number of processes [1]")
 64 |                       
 65 |     # parser.add_option("--diagonal_covariance_matrix", action="store_true", dest="diagonal_covariance_matrix",
 66 |                       # help="diagonal covariance matrix");
 67 |     # parser.add_option("--inference_mode", type="int", dest="inference_mode",
 68 |                       # help="inference mode [ " + 
 69 |                             # "0: hybrid inference, " + 
 70 |                             # "1: monte carlo, " + 
 71 |                             # "2: variational bayes " + 
 72 |                             # "]");
 73 |     # parser.add_option("--inference_mode", action="store_true", dest="inference_mode",
 74 |     #                  help="run latent Dirichlet allocation in lda mode");
 75 | 
 76 |     (options, args) = parser.parse_args();
 77 |     return options;
 78 | 
 79 | def main():
 80 |     options = parse_args();
 81 | 
 82 |     # parameter set 2
 83 |     assert(options.number_of_topics > 0);
 84 |     number_of_topics = options.number_of_topics;
 85 |     assert(options.training_iterations > 0);
 86 |     training_iterations = options.training_iterations;
 87 |     assert(options.snapshot_interval > 0);
 88 |     if options.snapshot_interval > 0:
 89 |         snapshot_interval = options.snapshot_interval;
 90 |     
 91 |     # parameter set 4
 92 |     optimization_method = options.optimization_method;
 93 |     if optimization_method == None:
 94 |         optimization_method = "L-BFGS-B";
 95 |     number_of_processes = options.number_of_processes;
 96 |     if number_of_processes <= 0:
 97 |         sys.stderr.write("invalid setting for number_of_processes, adjust to 1...\n");
 98 |         number_of_processes = 1;
 99 |     # diagonal_covariance_matrix = options.diagonal_covariance_matrix;
100 |     
101 |     # parameter set 1
102 |     # assert(options.corpus_name!=None);
103 |     assert(options.input_directory != None);
104 |     assert(options.output_directory != None);
105 |     
106 |     input_directory = options.input_directory;
107 |     input_directory = input_directory.rstrip("/");
108 |     corpus_name = os.path.basename(input_directory);
109 |     
110 |     output_directory = options.output_directory;
111 |     if not os.path.exists(output_directory):
112 |         os.mkdir(output_directory);
113 |     output_directory = os.path.join(output_directory, corpus_name);
114 |     if not os.path.exists(output_directory):
115 |         os.mkdir(output_directory);
116 | 
117 |     # Document
118 |     train_docs_path = os.path.join(input_directory, 'train.dat')
119 |     input_doc_stream = open(train_docs_path, 'r');
120 |     train_docs = [];
121 |     for line in input_doc_stream:
122 |         train_docs.append(line.strip().lower());
123 |     print("successfully load all training docs from %s..." % (os.path.abspath(train_docs_path)));
124 |     
125 |     # Vocabulary
126 |     vocabulary_path = os.path.join(input_directory, 'voc.dat');
127 |     input_voc_stream = open(vocabulary_path, 'r');
128 |     vocab = [];
129 |     for line in input_voc_stream:
130 |         vocab.append(line.strip().lower().split()[0]);
131 |     vocab = list(set(vocab));
132 |     print("successfully load all the words from %s..." % (os.path.abspath(vocabulary_path)));
133 |     
134 |     # parameter set 3
135 |     alpha_mu = options.alpha_mu;
136 |     # assert(options.alpha_sigma>0);
137 |     alpha_sigma = options.alpha_sigma;
138 |     if alpha_sigma <= 0:
139 |         # alpha_sigma = 1.0/number_of_topics;
140 |         alpha_sigma = 1.0
141 |     assert(alpha_sigma > 0);
142 |     alpha_beta = options.alpha_beta;
143 |     if alpha_beta <= 0:
144 |         alpha_beta = 1.0 / len(vocab);
145 | 
146 |     # create output directory
147 |     now = datetime.datetime.now();
148 |     suffix = now.strftime("%y%m%d-%H%M%S") + "";
149 |     suffix += "-%s" % ("ctm");
150 |     suffix += "-I%d" % (training_iterations);
151 |     suffix += "-S%d" % (snapshot_interval);
152 |     suffix += "-K%d" % (number_of_topics);
153 |     suffix += "-am%g" % (alpha_mu);
154 |     suffix += "-as%g" % (alpha_sigma);
155 |     suffix += "-ab%g" % (alpha_beta);
156 |     if optimization_method != None:
157 |         suffix += "-%s" % (optimization_method.replace("-", "_"));
158 |     # suffix += "-DCM%s" % (diagonal_covariance_matrix);
159 |     # suffix += "-%s" % (resample_topics);
160 |     # suffix += "-%s" % (hash_oov_words);
161 |     suffix += "/";
162 |     
163 |     output_directory = os.path.join(output_directory, suffix);
164 |     os.mkdir(os.path.abspath(output_directory));
165 | 
166 |     # dict_file = options.dictionary;
167 |     # if dict_file != None:
168 |         # dict_file = dict_file.strip();
169 |         
170 |     # store all the options to a file
171 |     options_output_file = open(output_directory + "option.txt", 'w');
172 |     # parameter set 1
173 |     options_output_file.write("input_directory=" + input_directory + "\n");
174 |     options_output_file.write("corpus_name=" + corpus_name + "\n");
175 |     # options_output_file.write("vocabulary_path=" + str(dict_file) + "\n");
176 |     # parameter set 2
177 |     options_output_file.write("training_iterations=%d\n" % (training_iterations));
178 |     options_output_file.write("snapshot_interval=" + str(snapshot_interval) + "\n");
179 |     options_output_file.write("number_of_topics=" + str(number_of_topics) + "\n");
180 |     # parameter set 3
181 |     options_output_file.write("alpha_mu=" + str(alpha_mu) + "\n");
182 |     options_output_file.write("alpha_sigma=" + str(alpha_sigma) + "\n");
183 |     options_output_file.write("alpha_beta=" + str(alpha_beta) + "\n");
184 |     # parameter set 4
185 |     options_output_file.write("optimization_method=%s\n" % (optimization_method));
186 |     options_output_file.write("number_of_processes=%d\n" % (number_of_processes));
187 |     # options_output_file.write("diagonal_covariance_matrix=%s\n" % (diagonal_covariance_matrix));
188 |     options_output_file.close()
189 | 
190 |     print("========== ========== ========== ========== ==========")
191 |     # parameter set 1
192 |     print("output_directory=" + output_directory)
193 |     print("input_directory=" + input_directory)
194 |     print("corpus_name=" + corpus_name)
195 |     # print "dictionary file=" + str(dict_file)
196 |     # parameter set 2
197 |     print("training_iterations=%d" % (training_iterations));
198 |     print("snapshot_interval=" + str(snapshot_interval));
199 |     print("number_of_topics=" + str(number_of_topics))
200 |     # parameter set 3
201 |     print("alpha_mu=" + str(alpha_mu))
202 |     print("alpha_sigma=" + str(alpha_sigma))
203 |     print("alpha_beta=" + str(alpha_beta))
204 |     # parameter set 4
205 |     print("optimization_method=%s" % (optimization_method))
206 |     print("number_of_processes=%d" % (number_of_processes))
207 |     # print "diagonal_covariance_matrix=%s" % (diagonal_covariance_matrix)
208 |     print("========== ========== ========== ========== ==========")
209 |     
210 |     '''
211 |     if inference_mode==0:
212 |         import hybrid
213 |         ctm_inferencer = hybrid.Hybrid();
214 |     elif inference_mode==1:
215 |         import monte_carlo
216 |         ctm_inferencer = monte_carlo.MonteCarlo();
217 |     elif inference_mode==2:
218 |         import variational_bayes
219 |         ctm_inferencer = variational_bayes.VariationalBayes();
220 |     else:
221 |         sys.stderr.write("error: unrecognized inference mode %d...\n" % (inference_mode));
222 |         return;
223 |     '''
224 |     
225 |     import variational_bayes
226 |     ctm_inferencer = variational_bayes.VariationalBayes(optimization_method);
227 |     
228 |     ctm_inferencer._initialize(train_docs, vocab, number_of_topics, alpha_mu, alpha_sigma, alpha_beta);
229 |     
230 |     for iteration in range(training_iterations):
231 |         ctm_inferencer.learning(number_of_processes);
232 |         
233 |         if (ctm_inferencer._counter % snapshot_interval == 0):
234 |             ctm_inferencer.export_beta(os.path.join(output_directory, 'exp_beta-' + str(ctm_inferencer._counter)));
235 |             model_snapshot_path = os.path.join(output_directory, 'model-' + str(ctm_inferencer._counter));
236 |             pickle.dump(ctm_inferencer, open(model_snapshot_path, 'wb'));
237 |             
238 |     model_snapshot_path = os.path.join(output_directory, 'model-' + str(ctm_inferencer._counter));
239 |     pickle.dump(ctm_inferencer, open(model_snapshot_path, 'wb'));
240 |     
241 | if __name__ == '__main__':
242 |     main()
243 | 


--------------------------------------------------------------------------------
/variational_bayes.py:
--------------------------------------------------------------------------------
   1 | """
   2 | VariationalBayes for Correlated Topic Models
   3 | @author: Ke Zhai (zhaike@cs.umd.edu)
   4 | """
   5 | 
   6 | import multiprocessing
   7 | import nltk;
   8 | import numpy
   9 | import queue;
  10 | import scipy;
  11 | import scipy.misc;
  12 | import scipy.optimize;
  13 | import sklearn;
  14 | import sklearn.covariance;
  15 | import string;
  16 | import sys;
  17 | import time
  18 | 
  19 | from inferencer import compute_dirichlet_expectation
  20 | from inferencer import Inferencer
  21 | 
  22 | # numpy.random.seed(1000);
  23 | 
  24 | '''
  25 | def parse_data(corpus, vocab):
  26 |     doc_count = 0
  27 |     
  28 |     word_ids = [];
  29 |     word_cts = [];
  30 |             
  31 |     for document_line in corpus:
  32 |         #words = document_line.split();
  33 |         document_word_dict = []
  34 |         for token in document_line.split():
  35 |             if token in vocab:
  36 |                 if token not in document_word_dict:
  37 |                     document_word_dict[token] = 0;
  38 |                 document_word_dict[token] += 1;
  39 |             else:
  40 |                 continue;
  41 |             
  42 |         word_ids.append(numpy.array(document_word_dict.keys()));
  43 |         word_cts.append(numpy.array(document_word_dict.values()));
  44 |         
  45 |         doc_count+=1
  46 |         if doc_count%10000==0:
  47 |             print "successfully import %d documents..." % doc_count;
  48 |     
  49 |     print "successfully import %d documents..." % (doc_count);
  50 |     
  51 |     return word_ids, word_cts
  52 | '''
  53 | 
  54 | 
  55 | class Process_E_Step_Queue(multiprocessing.Process):
  56 |     def __init__(self,
  57 |                  task_queue,
  58 | 
  59 |                  model_parameters,
  60 | 
  61 |                  optimize_doc_lambda,
  62 |                  # optimize_doc_nu_square,
  63 |                  optimize_doc_nu_square_in_log_space,
  64 | 
  65 |                  result_doc_parameter_queue,
  66 |                  result_log_likelihood_queue,
  67 |                  result_sufficient_statistics_queue,
  68 | 
  69 |                  diagonal_covariance_matrix=False,
  70 | 
  71 |                  parameter_iteration=10,
  72 |                  parameter_converge_threshold=1e-3):
  73 |         multiprocessing.Process.__init__(self);
  74 | 
  75 |         self._task_queue = task_queue;
  76 |         self._result_doc_parameter_queue = result_doc_parameter_queue;
  77 |         self._result_log_likelihood_queue = result_log_likelihood_queue;
  78 |         self._result_sufficient_statistics_queue = result_sufficient_statistics_queue;
  79 | 
  80 |         self._parameter_iteration = parameter_iteration;
  81 | 
  82 |         self._diagonal_covariance_matrix = diagonal_covariance_matrix;
  83 |         if self._diagonal_covariance_matrix:
  84 |             (self._E_log_eta, self._alpha_mu, self._alpha_sigma) = model_parameters;
  85 |         else:
  86 |             (self._E_log_eta, self._alpha_mu, self._alpha_sigma, self._alpha_sigma_inv) = model_parameters;
  87 |         (self._number_of_topics, self._number_of_types) = self._E_log_eta.shape;
  88 | 
  89 |         if result_sufficient_statistics_queue != None:
  90 |             self._E_log_prob_eta = self._E_log_eta - scipy.special.logsumexp(self._E_log_eta, axis=1)[:, numpy.newaxis]
  91 | 
  92 |         self.optimize_doc_lambda = optimize_doc_lambda;
  93 |         # self.optimize_doc_nu_square = optimize_doc_nu_square;
  94 |         self.optimize_doc_nu_square_in_log_space = optimize_doc_nu_square_in_log_space;
  95 | 
  96 |     def run(self):
  97 |         document_log_likelihood = 0;
  98 |         words_log_likelihood = 0;
  99 | 
 100 |         # initialize a V-by-K matrix phi sufficient statistics
 101 |         phi_sufficient_statistics = numpy.zeros((self._number_of_topics, self._number_of_types));
 102 | 
 103 |         # initialize a D-by-K matrix lambda and nu_square values
 104 |         # lambda_values = numpy.zeros((number_of_documents, self._number_of_topics)) # + self._alpha_mu[numpy.newaxis, :];
 105 |         # nu_square_values = numpy.ones((number_of_documents, self._number_of_topics)) # + self._alpha_sigma[numpy.newaxis, :];
 106 | 
 107 |         while not self._task_queue.empty():
 108 |             try:
 109 |                 (doc_id, term_ids, term_counts) = self._task_queue.get_nowait();
 110 | 
 111 |             except queue.Empty:
 112 |                 continue;
 113 | 
 114 |             doc_lambda = numpy.zeros(self._number_of_topics);
 115 |             doc_nu_square = numpy.ones(self._number_of_topics);
 116 | 
 117 |             assert term_counts.shape == (1, len(term_ids));
 118 |             # compute the total number of words
 119 |             doc_word_count = numpy.sum(term_counts);
 120 | 
 121 |             # initialize gamma for this document
 122 |             # doc_lambda = lambda_values[doc_id, :]
 123 |             # doc_nu_square = nu_square_values[doc_id, :]
 124 |             '''
 125 |             if self._diagonal_covariance_matrix:
 126 |                 doc_lambda = numpy.random.multivariate_normal(self._alpha_mu, numpy.diag(self._alpha_sigma));
 127 |                 doc_nu_square = numpy.copy(self._alpha_sigma);
 128 |             else:
 129 |                 #doc_lambda = numpy.random.multivariate_normal(self._alpha_mu[0, :], self._alpha_sigma);
 130 |                 #doc_nu_square = numpy.copy(numpy.diag(self._alpha_sigma));
 131 |                 doc_lambda = numpy.random.multivariate_normal(numpy.zeros(self._number_of_topics), numpy.eye(self._number_of_topics))
 132 |                 doc_nu_square = numpy.ones(self._number_of_topics)
 133 |             assert doc_lambda.shape==(self._number_of_topics,)
 134 |             assert doc_nu_square.shape==(self._number_of_topics,)
 135 |             '''
 136 | 
 137 |             # term_ids = word_ids[doc_id];
 138 |             # term_counts = word_cts[doc_id];
 139 | 
 140 |             # update zeta in close form 
 141 |             # doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square));
 142 |             doc_zeta_factor = doc_lambda + 0.5 * doc_nu_square
 143 |             assert doc_zeta_factor.shape == (self._number_of_topics,)
 144 |             doc_zeta_factor = numpy.tile(doc_zeta_factor, (self._number_of_topics, 1))
 145 |             assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics)
 146 | 
 147 |             for local_parameter_iteration_index in range(self._parameter_iteration):
 148 |                 # update phi in close form
 149 |                 assert self._E_log_eta.shape == (self._number_of_topics, self._number_of_types);
 150 |                 log_phi = self._E_log_eta[:, term_ids] + doc_lambda[:, numpy.newaxis]
 151 |                 assert log_phi.shape == (self._number_of_topics, len(term_ids));
 152 |                 log_phi -= scipy.special.logsumexp(log_phi, axis=0)[numpy.newaxis, :];
 153 |                 assert log_phi.shape == (self._number_of_topics, len(term_ids));
 154 | 
 155 |                 #
 156 |                 #
 157 |                 #
 158 |                 #
 159 |                 #
 160 | 
 161 |                 # update lambda
 162 |                 sum_phi = numpy.exp(scipy.special.logsumexp(log_phi + numpy.log(term_counts), axis=1))
 163 |                 arguments = (doc_nu_square, doc_zeta_factor, sum_phi, doc_word_count)
 164 |                 doc_lambda = self.optimize_doc_lambda(doc_lambda, arguments);
 165 |                 # print "update lambda of doc %d to %s" % (doc_id, doc_lambda)
 166 | 
 167 |                 #
 168 |                 #
 169 |                 #
 170 |                 #
 171 |                 #
 172 | 
 173 |                 # update zeta in close form 
 174 |                 # doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square));
 175 |                 doc_zeta_factor = doc_lambda + 0.5 * doc_nu_square
 176 |                 assert doc_zeta_factor.shape == (self._number_of_topics,)
 177 |                 doc_zeta_factor = numpy.tile(doc_zeta_factor, (self._number_of_topics, 1))
 178 |                 assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics)
 179 | 
 180 |                 #
 181 |                 #
 182 |                 #
 183 |                 #
 184 |                 #
 185 | 
 186 |                 # update nu_square
 187 |                 arguments = (doc_lambda, doc_zeta_factor, doc_word_count);
 188 |                 # doc_nu_square = self.optimize_doc_nu_square(doc_nu_square, arguments);
 189 |                 doc_nu_square = self.optimize_doc_nu_square_in_log_space(doc_nu_square, arguments);
 190 |                 # print "update nu of doc %d to %s" % (doc_id, doc_nu_square)
 191 | 
 192 |                 #
 193 |                 #
 194 |                 #
 195 |                 #
 196 |                 #
 197 | 
 198 |                 # update zeta in close form
 199 |                 # doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square));
 200 |                 doc_zeta_factor = doc_lambda + 0.5 * doc_nu_square
 201 |                 assert doc_zeta_factor.shape == (self._number_of_topics,)
 202 |                 doc_zeta_factor = numpy.tile(doc_zeta_factor, (self._number_of_topics, 1))
 203 |                 assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics)
 204 | 
 205 |                 # mean_change = numpy.mean(abs(gamma_update - lambda_values[doc_id, :]));
 206 |                 # lambda_values[doc_id, :] = gamma_update;
 207 |                 # if mean_change <= local_parameter_converge_threshold:
 208 |                 # break;
 209 | 
 210 |                 # print doc_id, local_parameter_iteration_index
 211 | 
 212 |             # print "process document %d..." % doc_id
 213 | 
 214 |             # document_log_likelihood -= 0.5 * self._number_of_topics * numpy.log(2 * numpy.pi)
 215 |             if self._diagonal_covariance_matrix:
 216 |                 document_log_likelihood -= 0.5 * numpy.sum(numpy.log(self._alpha_sigma));
 217 |                 document_log_likelihood -= 0.5 * numpy.sum(doc_nu_square / self._alpha_sigma);
 218 |                 document_log_likelihood -= 0.5 * numpy.sum((doc_lambda - self._alpha_mu) ** 2 / self._alpha_sigma);
 219 |             else:
 220 |                 # document_log_likelihood -= 0.5 * numpy.log(numpy.linalg.det(self._alpha_sigma));
 221 |                 document_log_likelihood -= 0.5 * numpy.log(scipy.linalg.det(self._alpha_sigma) + 1e-30);
 222 |                 document_log_likelihood -= 0.5 * numpy.sum(doc_nu_square * numpy.diag(self._alpha_sigma_inv));
 223 |                 document_log_likelihood -= 0.5 * numpy.dot(
 224 |                     numpy.dot((self._alpha_mu - doc_lambda[numpy.newaxis, :]), self._alpha_sigma_inv),
 225 |                     (self._alpha_mu - doc_lambda[numpy.newaxis, :]).T);
 226 | 
 227 |             document_log_likelihood += numpy.sum(numpy.sum(numpy.exp(log_phi) * term_counts, axis=1) * doc_lambda);
 228 |             # use the fact that doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square)), to cancel the factors
 229 |             document_log_likelihood -= scipy.special.logsumexp(doc_lambda + 0.5 * doc_nu_square) * doc_word_count;
 230 | 
 231 |             document_log_likelihood += 0.5 * self._number_of_topics;
 232 |             # document_log_likelihood += 0.5 * self._number_of_topics * numpy.log(2 * numpy.pi)
 233 |             document_log_likelihood += 0.5 * numpy.sum(numpy.log(doc_nu_square));
 234 | 
 235 |             document_log_likelihood -= numpy.sum(numpy.exp(log_phi) * log_phi * term_counts);
 236 | 
 237 |             # Note: all terms including E_q[p(\eta | \beta)], i.e., terms involving \Psi(\eta), are cancelled due to \eta updates in M-step
 238 |             if self._result_sufficient_statistics_queue == None:
 239 |                 # compute the phi terms
 240 |                 words_log_likelihood += numpy.sum(
 241 |                     numpy.exp(log_phi + numpy.log(term_counts)) * self._E_log_prob_eta[:, term_ids]);
 242 | 
 243 |             # lambda_values[doc_id, :] = doc_lambda;
 244 |             # nu_square_values[doc_id, :] = doc_nu_square;
 245 | 
 246 |             assert numpy.all(doc_nu_square > 0);
 247 | 
 248 |             assert log_phi.shape == (self._number_of_topics, len(term_ids));
 249 |             assert term_counts.shape == (1, len(term_ids))
 250 |             phi_sufficient_statistics[:, term_ids] += numpy.exp(log_phi + numpy.log(term_counts));
 251 | 
 252 |             # if (doc_id+1) % 1000==0:
 253 |             # print "successfully processed %d documents..." % (doc_id+1);
 254 | 
 255 |             self._result_doc_parameter_queue.put((doc_id, doc_lambda, doc_nu_square));
 256 | 
 257 |             self._task_queue.task_done();
 258 | 
 259 |         if self._result_sufficient_statistics_queue == None:
 260 |             self._result_log_likelihood_queue.put(words_log_likelihood);
 261 |         else:
 262 |             self._result_log_likelihood_queue.put(document_log_likelihood);
 263 |             self._result_sufficient_statistics_queue.put(phi_sufficient_statistics);
 264 | 
 265 | 
 266 | class VariationalBayes(Inferencer):
 267 |     """
 268 |     """
 269 | 
 270 |     def __init__(self,
 271 |                  scipy_optimization_method=None,
 272 |                  hessian_free_optimization=False,
 273 |                  diagonal_covariance_matrix=False,
 274 |                  hyper_parameter_optimize_interval=1,
 275 | 
 276 |                  hessian_direction_approximation_epsilon=1e-6
 277 |                  # hyper_parameter_iteration=100,
 278 |                  # hyper_parameter_decay_factor=0.9,
 279 |                  # hyper_parameter_maximum_decay=10,
 280 |                  # hyper_parameter_converge_threshold=1e-6,
 281 | 
 282 |                  # model_converge_threshold=1e-6
 283 |                  ):
 284 |         Inferencer.__init__(self, hyper_parameter_optimize_interval);
 285 |         self._scipy_optimization_method = scipy_optimization_method;
 286 | 
 287 |         self._hessian_free_optimization = hessian_free_optimization;
 288 |         self._diagonal_covariance_matrix = diagonal_covariance_matrix;
 289 | 
 290 |         self._hessian_direction_approximation_epsilon = hessian_direction_approximation_epsilon;
 291 | 
 292 |     """
 293 |     @param num_topics: the number of topics
 294 |     @param data: a defaultdict(dict) data type, first indexed by doc id then indexed by term id
 295 |     take note: words are not terms, they are repeatable and thus might be not unique
 296 |     """
 297 | 
 298 |     def _initialize(self, corpus, vocab, number_of_topics, alpha_mu, alpha_sigma, alpha_beta):
 299 |         Inferencer._initialize(self, vocab, number_of_topics, alpha_mu, alpha_sigma, alpha_beta);
 300 | 
 301 |         self._corpus = corpus;
 302 |         self._parsed_corpus = self.parse_data();
 303 | 
 304 |         # define the total number of document
 305 |         self._number_of_documents = len(self._parsed_corpus[0]);
 306 | 
 307 |         # initialize a D-by-K matrix gamma
 308 |         self._lambda = numpy.zeros((self._number_of_documents, self._number_of_topics))
 309 |         self._nu_square = numpy.ones((self._number_of_documents, self._number_of_topics))
 310 | 
 311 |         # initialize a V-by-K matrix beta, subject to the sum over every row is 1
 312 |         self._eta = numpy.random.gamma(100., 1. / 100., (self._number_of_topics, self._number_of_types));
 313 | 
 314 |     def parse_data(self, corpus=None):
 315 |         if corpus == None:
 316 |             corpus = self._corpus;
 317 | 
 318 |         doc_count = 0
 319 | 
 320 |         word_ids = [];
 321 |         word_cts = [];
 322 | 
 323 |         for document_line in corpus:
 324 |             # words = document_line.split();
 325 |             document_word_dict = {}
 326 |             for token in document_line.split():
 327 |                 if token not in self._type_to_index:
 328 |                     continue;
 329 | 
 330 |                 type_id = self._type_to_index[token];
 331 |                 if type_id not in document_word_dict:
 332 |                     document_word_dict[type_id] = 0;
 333 |                 document_word_dict[type_id] += 1;
 334 | 
 335 |             if len(document_word_dict) == 0:
 336 |                 sys.stderr.write("warning: document collapsed during parsing");
 337 |                 continue;
 338 | 
 339 |             word_ids.append(numpy.array(list(document_word_dict.keys())));
 340 |             word_cts.append(numpy.array(list(document_word_dict.values()))[numpy.newaxis, :]);
 341 | 
 342 |             doc_count += 1
 343 |             if doc_count % 10000 == 0:
 344 |                 print("successfully parse %d documents..." % doc_count);
 345 | 
 346 |         assert len(word_ids) == len(word_cts);
 347 |         print("successfully parse %d documents..." % (doc_count));
 348 | 
 349 |         return (word_ids, word_cts)
 350 | 
 351 |     #
 352 |     #
 353 |     #
 354 |     #
 355 |     #
 356 | 
 357 |     def e_step_process_queue(self,
 358 |                              parsed_corpus=None,
 359 |                              number_of_processes=0,
 360 |                              local_parameter_iteration=10,
 361 |                              local_parameter_converge_threshold=1e-3,
 362 |                              ):
 363 |         if parsed_corpus == None:
 364 |             word_ids = self._parsed_corpus[0];
 365 |             word_cts = self._parsed_corpus[1];
 366 |         else:
 367 |             word_ids = parsed_corpus[0]
 368 |             word_cts = parsed_corpus[1];
 369 | 
 370 |         assert len(word_ids) == len(word_cts);
 371 |         number_of_documents = len(word_ids);
 372 | 
 373 |         E_log_eta = compute_dirichlet_expectation(self._eta);
 374 |         assert E_log_eta.shape == (self._number_of_topics, self._number_of_types);
 375 |         # if parsed_corpus!=None:
 376 |         # E_log_prob_eta = E_log_eta-scipy.special.logsumexp(E_log_eta, axis=1)[:, numpy.newaxis]
 377 | 
 378 |         task_queue = multiprocessing.JoinableQueue()
 379 |         for (doc_id, word_id, word_ct) in zip(list(range(number_of_documents)), word_ids, word_cts):
 380 |             task_queue.put((doc_id, word_id, word_ct));
 381 | 
 382 |         result_doc_parameter_queue = multiprocessing.Queue();
 383 |         result_log_likelihood_queue = multiprocessing.Queue();
 384 |         if parsed_corpus == None:
 385 |             result_sufficient_statistics_queue = multiprocessing.Queue();
 386 |         else:
 387 |             result_sufficient_statistics_queue = None;
 388 | 
 389 |         if self._diagonal_covariance_matrix:
 390 |             e_step_parameters = (E_log_eta, self._alpha_mu, self._alpha_sigma);
 391 |         else:
 392 |             e_step_parameters = (E_log_eta, self._alpha_mu, self._alpha_sigma, self._alpha_sigma_inv);
 393 | 
 394 |         # start consumers
 395 |         if number_of_processes <= 1:
 396 |             number_of_processes = multiprocessing.cpu_count();
 397 |         print('creating %d processes' % number_of_processes)
 398 |         processes_e_step = [Process_E_Step_Queue(task_queue,
 399 | 
 400 |                                                  e_step_parameters,
 401 | 
 402 |                                                  self.optimize_doc_lambda,
 403 |                                                  # self.optimize_doc_nu_square,
 404 |                                                  self.optimize_doc_nu_square_in_log_space,
 405 | 
 406 |                                                  result_doc_parameter_queue,
 407 |                                                  result_log_likelihood_queue,
 408 |                                                  result_sufficient_statistics_queue,
 409 | 
 410 |                                                  diagonal_covariance_matrix=self._diagonal_covariance_matrix,
 411 |                                                  parameter_iteration=local_parameter_iteration,
 412 |                                                  )
 413 |                             for process_index in range(number_of_processes)];
 414 | 
 415 |         for process_e_step in processes_e_step:
 416 |             process_e_step.start();
 417 | 
 418 |         task_queue.join();
 419 | 
 420 |         task_queue.close();
 421 | 
 422 |         # initialize a D-by-K matrix lambda and nu_square values
 423 |         lambda_values = numpy.zeros(
 424 |             (number_of_documents, self._number_of_topics))  # + self._alpha_mu[numpy.newaxis, :];
 425 |         nu_square_values = numpy.zeros(
 426 |             (number_of_documents, self._number_of_topics))  # + self._alpha_sigma[numpy.newaxis, :];
 427 | 
 428 |         # for result_queue_element_index in xrange(result_doc_parameter_queue.qsize()):
 429 |         # while not result_doc_parameter_queue.empty():
 430 |         for result_queue_element_index in range(number_of_documents):
 431 |             (doc_id, doc_lambda, doc_nu_square) = result_doc_parameter_queue.get();
 432 | 
 433 |             assert doc_id >= 0 and doc_id < number_of_documents;
 434 |             lambda_values[doc_id, :] = doc_lambda;
 435 |             nu_square_values[doc_id, :] = doc_nu_square;
 436 | 
 437 |         log_likelihood = 0;
 438 |         # for result_queue_element_index in result_log_likelihood_queue.qsize():
 439 |         # while not result_log_likelihood_queue.empty():
 440 |         for result_queue_element_index in range(number_of_processes):
 441 |             log_likelihood += result_log_likelihood_queue.get();
 442 |             # print "log_likelihood is", log_likelihood;
 443 | 
 444 |         if parsed_corpus == None:
 445 |             self._lambda = lambda_values;
 446 |             self._nu_square = nu_square_values;
 447 | 
 448 |             # initialize a K-by-V matrix phi sufficient statistics
 449 |             phi_sufficient_statistics = numpy.zeros((self._number_of_topics, self._number_of_types));
 450 | 
 451 |             # for result_queue_element_index in xrange(result_sufficient_statistics_queue.qsize()):
 452 |             # while not result_sufficient_statistics_queue.empty():
 453 |             for result_queue_element_index in range(number_of_processes):
 454 |                 phi_sufficient_statistics += result_sufficient_statistics_queue.get();
 455 |                 # print "phi_sufficient_statistics", phi_sufficient_statistics
 456 | 
 457 |         for process_e_step in processes_e_step:
 458 |             process_e_step.join();
 459 | 
 460 |         if parsed_corpus == None:
 461 |             return log_likelihood, phi_sufficient_statistics
 462 |         else:
 463 |             return log_likelihood, lambda_values, nu_square_values
 464 | 
 465 |         '''
 466 |         if parsed_corpus==None:
 467 |             document_log_likelihood, lambda_values, nu_square_values, phi_sufficient_statistics = self.format_result_queues(number_of_documents,
 468 |                                                                                                                             result_doc_parameter_queue,
 469 |                                                                                                                             result_log_likelihood_queue,
 470 |                                                                                                                             result_sufficient_statistics_queue
 471 |                                                                                                                             );
 472 |                                                                                                                    
 473 |             self._lambda = lambda_values;
 474 |             self._nu_square = nu_square_values;
 475 |         
 476 |             return document_log_likelihood, phi_sufficient_statistics
 477 |         else:
 478 |             words_log_likelihood, lambda_values, nu_square_values = self.format_result_queues(number_of_documents,
 479 |                                                                                               result_doc_parameter_queue,
 480 |                                                                                               result_log_likelihood_queue,
 481 |                                                                                               );
 482 |                                                                                                  
 483 |             return words_log_likelihood, lambda_values, nu_square_values
 484 |         '''
 485 | 
 486 |     def format_result_queues(self, number_of_documents, result_doc_parameter_queue, result_log_likelihood_queue,
 487 |                              result_sufficient_statistics_queue=None):
 488 |         # initialize a D-by-K matrix lambda and nu_square values
 489 |         lambda_values = numpy.zeros(
 490 |             (number_of_documents, self._number_of_topics))  # + self._alpha_mu[numpy.newaxis, :];
 491 |         nu_square_values = numpy.zeros(
 492 |             (number_of_documents, self._number_of_topics))  # + self._alpha_sigma[numpy.newaxis, :];
 493 | 
 494 |         counter = 0
 495 |         # for result_queue_element_index in xrange(result_doc_parameter_queue.qsize()):
 496 |         while not result_doc_parameter_queue.empty():
 497 |             (doc_id, doc_lambda, doc_nu_square) = result_doc_parameter_queue.get();
 498 | 
 499 |             assert doc_id >= 0 and doc_id < number_of_documents;
 500 |             lambda_values[doc_id, :] = doc_lambda;
 501 |             nu_square_values[doc_id, :] = doc_nu_square;
 502 | 
 503 |             counter += 1;
 504 |         assert counter == number_of_documents, counter;
 505 | 
 506 |         log_likelihood = 0;
 507 |         # for result_queue_element_index in result_log_likelihood_queue.qsize():
 508 |         while not result_log_likelihood_queue.empty():
 509 |             log_likelihood += result_log_likelihood_queue.get();
 510 |             # print "log_likelihood is", log_likelihood;
 511 | 
 512 |         if result_sufficient_statistics_queue == None:
 513 |             return log_likelihood, lambda_values, nu_square_values;
 514 |         else:
 515 |             # initialize a K-by-V matrix phi sufficient statistics
 516 |             phi_sufficient_statistics = numpy.zeros((self._number_of_topics, self._number_of_types));
 517 | 
 518 |             # for result_queue_element_index in xrange(result_sufficient_statistics_queue.qsize()):
 519 |             while not result_sufficient_statistics_queue.empty():
 520 |                 phi_sufficient_statistics += result_sufficient_statistics_queue.get();
 521 |                 # print "phi_sufficient_statistics", phi_sufficient_statistics
 522 | 
 523 |             return log_likelihood, lambda_values, nu_square_values, phi_sufficient_statistics
 524 | 
 525 |     #
 526 |     #
 527 |     #
 528 |     #
 529 |     #
 530 | 
 531 |     def e_step(self,
 532 |                parsed_corpus=None,
 533 |                local_parameter_iteration=10,
 534 |                local_parameter_converge_threshold=1e-3,
 535 |                ):
 536 |         if parsed_corpus == None:
 537 |             word_ids = self._parsed_corpus[0];
 538 |             word_cts = self._parsed_corpus[1];
 539 |         else:
 540 |             word_ids = parsed_corpus[0]
 541 |             word_cts = parsed_corpus[1];
 542 | 
 543 |         assert len(word_ids) == len(word_cts);
 544 |         number_of_documents = len(word_ids);
 545 | 
 546 |         E_log_eta = compute_dirichlet_expectation(self._eta);
 547 |         assert E_log_eta.shape == (self._number_of_topics, self._number_of_types);
 548 |         if parsed_corpus != None:
 549 |             E_log_prob_eta = E_log_eta - scipy.special.logsumexp(E_log_eta, axis=1)[:, numpy.newaxis]
 550 | 
 551 |         document_log_likelihood = 0;
 552 |         words_log_likelihood = 0;
 553 | 
 554 |         # initialize a V_matrix-by-K matrix phi sufficient statistics
 555 |         phi_sufficient_statistics = numpy.zeros((self._number_of_topics, self._number_of_types));
 556 | 
 557 |         # initialize a D-by-K matrix lambda and nu_square values
 558 |         lambda_values = numpy.zeros(
 559 |             (number_of_documents, self._number_of_topics))  # + self._alpha_mu[numpy.newaxis, :];
 560 |         nu_square_values = numpy.ones(
 561 |             (number_of_documents, self._number_of_topics))  # + self._alpha_sigma[numpy.newaxis, :];
 562 | 
 563 |         # iterate over all documents
 564 |         for doc_id in numpy.random.permutation(number_of_documents):
 565 |             # initialize gamma for this document
 566 |             doc_lambda = lambda_values[doc_id, :]
 567 |             doc_nu_square = nu_square_values[doc_id, :]
 568 |             '''
 569 |             if self._diagonal_covariance_matrix:
 570 |                 doc_lambda = numpy.random.multivariate_normal(self._alpha_mu, numpy.diag(self._alpha_sigma));
 571 |                 doc_nu_square = numpy.copy(self._alpha_sigma);
 572 |             else:
 573 |                 #doc_lambda = numpy.random.multivariate_normal(self._alpha_mu[0, :], self._alpha_sigma);
 574 |                 #doc_nu_square = numpy.copy(numpy.diag(self._alpha_sigma));
 575 |                 doc_lambda = numpy.random.multivariate_normal(numpy.zeros(self._number_of_topics), numpy.eye(self._number_of_topics))
 576 |                 doc_nu_square = numpy.ones(self._number_of_topics)
 577 |             assert doc_lambda.shape==(self._number_of_topics,)
 578 |             assert doc_nu_square.shape==(self._number_of_topics,)
 579 |             '''
 580 | 
 581 |             term_ids = word_ids[doc_id];
 582 |             term_counts = word_cts[doc_id];
 583 |             assert term_counts.shape == (1, len(term_ids));
 584 |             # compute the total number of words
 585 |             doc_word_count = numpy.sum(word_cts[doc_id]);
 586 | 
 587 |             # update zeta in close form 
 588 |             # doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square));
 589 |             doc_zeta_factor = doc_lambda + 0.5 * doc_nu_square
 590 |             assert doc_zeta_factor.shape == (self._number_of_topics,)
 591 |             doc_zeta_factor = numpy.tile(doc_zeta_factor, (self._number_of_topics, 1))
 592 |             assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics)
 593 | 
 594 |             for local_parameter_iteration_index in range(local_parameter_iteration):
 595 |                 # update phi in close form
 596 |                 assert E_log_eta.shape == (self._number_of_topics, self._number_of_types);
 597 |                 log_phi = E_log_eta[:, term_ids] + doc_lambda[:, numpy.newaxis]
 598 |                 assert log_phi.shape == (self._number_of_topics, len(term_ids));
 599 |                 log_phi -= scipy.special.logsumexp(log_phi, axis=0)[numpy.newaxis, :];
 600 |                 assert log_phi.shape == (self._number_of_topics, len(term_ids));
 601 | 
 602 |                 #
 603 |                 #
 604 |                 #
 605 |                 #
 606 |                 #
 607 | 
 608 |                 # update lambda
 609 |                 sum_phi = numpy.exp(scipy.special.logsumexp(log_phi + numpy.log(term_counts), axis=1))
 610 |                 arguments = (doc_nu_square, doc_zeta_factor, sum_phi, doc_word_count)
 611 |                 doc_lambda = self.optimize_doc_lambda(doc_lambda, arguments);
 612 |                 '''
 613 |                 if self._hessian_free_optimization:
 614 |                     assert not self._diagonal_covariance_matrix
 615 |                     doc_lambda = self.hessian_free_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi, doc_word_count);
 616 |                 else:
 617 |                     doc_lambda = self.newton_method_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi, doc_word_count);
 618 |                 '''
 619 |                 # print "update lambda of doc %d to %s" % (doc_id, doc_lambda)
 620 | 
 621 |                 #
 622 |                 #
 623 |                 #
 624 |                 #
 625 |                 #
 626 | 
 627 |                 # update zeta in close form 
 628 |                 # doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square));
 629 |                 doc_zeta_factor = doc_lambda + 0.5 * doc_nu_square
 630 |                 assert doc_zeta_factor.shape == (self._number_of_topics,)
 631 |                 doc_zeta_factor = numpy.tile(doc_zeta_factor, (self._number_of_topics, 1))
 632 |                 assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics)
 633 | 
 634 |                 #
 635 |                 #
 636 |                 #
 637 |                 #
 638 |                 #
 639 | 
 640 |                 # update nu_square
 641 |                 arguments = (doc_lambda, doc_zeta_factor, doc_word_count);
 642 |                 # doc_nu_square = self.optimize_doc_nu_square(doc_nu_square, arguments);
 643 |                 doc_nu_square = self.optimize_doc_nu_square_in_log_space(doc_nu_square, arguments);
 644 |                 '''
 645 |                 if self._hessian_free_optimization:
 646 |                     assert not self._diagonal_covariance_matrix
 647 |                     #doc_nu_square = self.hessian_free_nu_square(doc_lambda, doc_nu_square, doc_zeta_factor, doc_word_count);
 648 |                     doc_nu_square = self.hessian_free_nu_square_in_log_space(doc_lambda, doc_nu_square, doc_zeta_factor, doc_word_count);
 649 |                 else:
 650 |                     #doc_nu_square = self.newton_method_nu_square(doc_lambda, doc_nu_square, doc_zeta_factor, doc_word_count);
 651 |                     doc_nu_square = self.newton_method_nu_square_in_log_space(doc_lambda, doc_nu_square, doc_zeta_factor, doc_word_count);
 652 |                 '''
 653 |                 # print "update nu of doc %d to %s" % (doc_id, doc_nu_square)
 654 | 
 655 |                 #
 656 |                 #
 657 |                 #
 658 |                 #
 659 |                 #
 660 | 
 661 |                 # update zeta in close form
 662 |                 # doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square));
 663 |                 doc_zeta_factor = doc_lambda + 0.5 * doc_nu_square
 664 |                 assert doc_zeta_factor.shape == (self._number_of_topics,)
 665 |                 doc_zeta_factor = numpy.tile(doc_zeta_factor, (self._number_of_topics, 1))
 666 |                 assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics)
 667 | 
 668 |                 # mean_change = numpy.mean(abs(gamma_update - lambda_values[doc_id, :]));
 669 |                 # lambda_values[doc_id, :] = gamma_update;
 670 |                 # if mean_change <= local_parameter_converge_threshold:
 671 |                 # break;
 672 | 
 673 |                 # print doc_id, local_parameter_iteration_index
 674 | 
 675 |             # print "process document %d..." % doc_id
 676 | 
 677 |             # document_log_likelihood -= 0.5 * self._number_of_topics * numpy.log(2 * numpy.pi)
 678 |             if self._diagonal_covariance_matrix:
 679 |                 document_log_likelihood -= 0.5 * numpy.sum(numpy.log(self._alpha_sigma));
 680 |                 document_log_likelihood -= 0.5 * numpy.sum(doc_nu_square / self._alpha_sigma);
 681 |                 document_log_likelihood -= 0.5 * numpy.sum((doc_lambda - self._alpha_mu) ** 2 / self._alpha_sigma);
 682 |             else:
 683 |                 # document_log_likelihood -= 0.5 * numpy.log(numpy.linalg.det(self._alpha_sigma));
 684 |                 document_log_likelihood -= 0.5 * numpy.log(scipy.linalg.det(self._alpha_sigma) + 1e-30);
 685 |                 document_log_likelihood -= 0.5 * numpy.sum(doc_nu_square * numpy.diag(self._alpha_sigma_inv));
 686 |                 document_log_likelihood -= 0.5 * numpy.dot(
 687 |                     numpy.dot((self._alpha_mu - doc_lambda[numpy.newaxis, :]), self._alpha_sigma_inv),
 688 |                     (self._alpha_mu - doc_lambda[numpy.newaxis, :]).T);
 689 | 
 690 |             document_log_likelihood += numpy.sum(numpy.sum(numpy.exp(log_phi) * term_counts, axis=1) * doc_lambda);
 691 |             # use the fact that doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square)), to cancel the factors
 692 |             document_log_likelihood -= scipy.special.logsumexp(doc_lambda + 0.5 * doc_nu_square) * doc_word_count;
 693 | 
 694 |             document_log_likelihood += 0.5 * self._number_of_topics;
 695 |             # document_log_likelihood += 0.5 * self._number_of_topics * numpy.log(2 * numpy.pi)
 696 |             document_log_likelihood += 0.5 * numpy.sum(numpy.log(doc_nu_square));
 697 | 
 698 |             document_log_likelihood -= numpy.sum(numpy.exp(log_phi) * log_phi * term_counts);
 699 | 
 700 |             # Note: all terms including E_q[p(\eta | \beta)], i.e., terms involving \Psi(\eta), are cancelled due to \eta updates in M-step
 701 |             if parsed_corpus != None:
 702 |                 # compute the phi terms
 703 |                 words_log_likelihood += numpy.sum(
 704 |                     numpy.exp(log_phi + numpy.log(term_counts)) * E_log_prob_eta[:, term_ids]);
 705 | 
 706 |             lambda_values[doc_id, :] = doc_lambda;
 707 |             nu_square_values[doc_id, :] = doc_nu_square;
 708 | 
 709 |             assert log_phi.shape == (self._number_of_topics, len(term_ids));
 710 |             assert term_counts.shape == (1, len(term_ids))
 711 |             phi_sufficient_statistics[:, term_ids] += numpy.exp(log_phi + numpy.log(term_counts));
 712 | 
 713 |             if (doc_id + 1) % 1000 == 0:
 714 |                 print("successfully processed %d documents..." % (doc_id + 1));
 715 | 
 716 |         assert numpy.all(nu_square_values > 0);
 717 | 
 718 |         if parsed_corpus == None:
 719 |             self._lambda = lambda_values;
 720 |             self._nu_square = nu_square_values;
 721 |             return document_log_likelihood, phi_sufficient_statistics
 722 |         else:
 723 |             return words_log_likelihood, lambda_values, nu_square_values
 724 | 
 725 |     #
 726 |     #
 727 |     #
 728 |     #
 729 |     #
 730 | 
 731 |     def optimize_doc_lambda(self,
 732 |                             doc_lambda,
 733 |                             arguments,
 734 |                             ):
 735 | 
 736 |         optimize_result = scipy.optimize.minimize(self.f_doc_lambda,
 737 |                                                   doc_lambda,
 738 |                                                   args=arguments,
 739 |                                                   method=self._scipy_optimization_method,
 740 |                                                   jac=self.f_prime_doc_lambda,
 741 |                                                   hess=self.f_hessian_doc_lambda,
 742 |                                                   # hess=None,
 743 |                                                   hessp=self.f_hessian_direction_doc_lambda,
 744 |                                                   bounds=None,
 745 |                                                   constraints=(),
 746 |                                                   tol=None,
 747 |                                                   callback=None,
 748 |                                                   options={'disp': False}
 749 |                                                   )
 750 | 
 751 |         return optimize_result.x
 752 | 
 753 |     def f_doc_lambda(self, doc_lambda, *args):
 754 |         (doc_nu_square, doc_zeta_factor, sum_phi, total_word_count) = args;
 755 | 
 756 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
 757 |         assert sum_phi.shape == (self._number_of_topics,)
 758 |         # if doc_lambda.shape==(1, self._number_of_topics):
 759 |         # doc_lambda = doc_lambda[0, :];
 760 |         assert doc_lambda.shape == (self._number_of_topics,);
 761 | 
 762 |         exp_over_doc_zeta = scipy.special.logsumexp(
 763 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
 764 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
 765 | 
 766 |         function_doc_lambda = numpy.sum(sum_phi * doc_lambda);
 767 | 
 768 |         if self._diagonal_covariance_matrix:
 769 |             mean_adjustment = doc_lambda - self._alpha_mu
 770 |             assert mean_adjustment.shape == (self._number_of_topics,);
 771 |             function_doc_lambda += -0.5 * numpy.sum((mean_adjustment ** 2) / self._alpha_sigma)
 772 |         else:
 773 |             mean_adjustment = doc_lambda[numpy.newaxis, :] - self._alpha_mu;
 774 |             assert mean_adjustment.shape == (1, self._number_of_topics), (
 775 |             doc_lambda.shape, mean_adjustment.shape, self._alpha_mu.shape)
 776 |             function_doc_lambda += -0.5 * numpy.dot(numpy.dot(mean_adjustment, self._alpha_sigma_inv),
 777 |                                                     mean_adjustment.T)
 778 | 
 779 |         function_doc_lambda += -total_word_count * numpy.sum(exp_over_doc_zeta);
 780 | 
 781 |         return numpy.asscalar(-function_doc_lambda);
 782 | 
 783 |     def f_prime_doc_lambda(self, doc_lambda, *args):
 784 |         (doc_nu_square, doc_zeta_factor, sum_phi, total_word_count) = args;
 785 | 
 786 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
 787 |         assert sum_phi.shape == (self._number_of_topics,)
 788 | 
 789 |         exp_over_doc_zeta = scipy.special.logsumexp(
 790 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
 791 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
 792 |         assert exp_over_doc_zeta.shape == (self._number_of_topics,);
 793 | 
 794 |         if self._diagonal_covariance_matrix:
 795 |             function_prime_doc_lambda = (self._alpha_mu - doc_lambda) / self._alpha_sigma;
 796 |         else:
 797 |             function_prime_doc_lambda = numpy.dot((self._alpha_mu - doc_lambda[numpy.newaxis, :]),
 798 |                                                   self._alpha_sigma_inv)[0, :]
 799 | 
 800 |         function_prime_doc_lambda += sum_phi;
 801 |         function_prime_doc_lambda -= total_word_count * exp_over_doc_zeta;
 802 | 
 803 |         assert function_prime_doc_lambda.shape == (self._number_of_topics,);
 804 | 
 805 |         return numpy.asarray(-function_prime_doc_lambda);
 806 | 
 807 |     def f_hessian_doc_lambda(self, doc_lambda, *args):
 808 |         (doc_nu_square, doc_zeta_factor, sum_phi, total_word_count) = args;
 809 |         exp_over_doc_zeta = scipy.special.logsumexp(
 810 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
 811 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
 812 | 
 813 |         if self._diagonal_covariance_matrix:
 814 |             function_hessian_doc_lambda = -1.0 / self._alpha_sigma
 815 |             function_hessian_doc_lambda -= total_word_count * exp_over_doc_zeta;
 816 |         else:
 817 |             function_hessian_doc_lambda = -self._alpha_sigma_inv;
 818 |             assert function_hessian_doc_lambda.shape == (self._number_of_topics, self._number_of_topics);
 819 |             function_hessian_doc_lambda -= total_word_count * numpy.diag(exp_over_doc_zeta);
 820 |             assert function_hessian_doc_lambda.shape == (self._number_of_topics, self._number_of_topics);
 821 | 
 822 |         return numpy.asarray(-function_hessian_doc_lambda);
 823 | 
 824 |     def f_hessian_direction_doc_lambda(self, doc_lambda, direction_vector, *args):
 825 |         (doc_nu_square, doc_zeta_factor, sum_phi, total_word_count) = args;
 826 | 
 827 |         assert doc_lambda.shape == (self._number_of_topics,)
 828 |         assert doc_nu_square.shape == (self._number_of_topics,)
 829 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics)
 830 |         assert direction_vector.shape == (self._number_of_topics,)
 831 | 
 832 |         log_exp_over_doc_zeta_a = scipy.special.logsumexp(
 833 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - direction_vector[:,
 834 |                                                              numpy.newaxis] * self._hessian_direction_approximation_epsilon - 0.5 * doc_nu_square[
 835 |                                                                                                                                     :,
 836 |                                                                                                                                     numpy.newaxis],
 837 |             axis=1)
 838 |         log_exp_over_doc_zeta_b = scipy.special.logsumexp(
 839 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
 840 |         assert log_exp_over_doc_zeta_a.shape == (self._number_of_topics,)
 841 |         assert log_exp_over_doc_zeta_b.shape == (self._number_of_topics,)
 842 | 
 843 |         # function_hessian_direction_doc_lambda = total_word_count * numpy.exp(numpy.log(1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a)) - log_exp_over_doc_zeta_b)        
 844 |         function_hessian_direction_doc_lambda = total_word_count * numpy.exp(-log_exp_over_doc_zeta_b) * (
 845 |                     1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a))
 846 | 
 847 |         if self._diagonal_covariance_matrix:
 848 |             function_hessian_direction_doc_lambda += -direction_vector * self._hessian_direction_approximation_epsilon / self._alpha_sigma;
 849 |         else:
 850 |             function_hessian_direction_doc_lambda += -numpy.dot(
 851 |                 direction_vector[numpy.newaxis, :] * self._hessian_direction_approximation_epsilon,
 852 |                 self._alpha_sigma_inv)[0, :]
 853 |         assert function_hessian_direction_doc_lambda.shape == (self._number_of_topics,);
 854 | 
 855 |         function_hessian_direction_doc_lambda /= self._hessian_direction_approximation_epsilon;
 856 | 
 857 |         return numpy.asarray(-function_hessian_direction_doc_lambda)
 858 | 
 859 |     #
 860 |     #
 861 |     #
 862 |     #
 863 |     #
 864 | 
 865 |     def optimize_doc_nu_square(self,
 866 |                                doc_nu_square,
 867 |                                arguments,
 868 |                                ):
 869 |         variable_bounds = tuple([(0, None)] * self._number_of_topics);
 870 | 
 871 |         optimize_result = scipy.optimize.minimize(self.f_doc_nu_square,
 872 |                                                   doc_nu_square,
 873 |                                                   args=arguments,
 874 |                                                   method=self._scipy_optimization_method,
 875 |                                                   jac=self.f_prime_doc_nu_square,
 876 |                                                   hess=self.f_hessian_doc_nu_square,
 877 |                                                   # hess=None,
 878 |                                                   hessp=self.f_hessian_direction_doc_nu_square,
 879 |                                                   bounds=variable_bounds,
 880 |                                                   constraints=(),
 881 |                                                   tol=None,
 882 |                                                   callback=None,
 883 |                                                   options={'disp': False}
 884 |                                                   );
 885 | 
 886 |         return optimize_result.x;
 887 | 
 888 |     def f_doc_nu_square(self, doc_nu_square, *args):
 889 |         (doc_lambda, doc_zeta_factor, total_word_count) = args;
 890 | 
 891 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
 892 | 
 893 |         exp_over_doc_zeta = scipy.special.logsumexp(
 894 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
 895 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
 896 | 
 897 |         function_doc_nu_square = 0.5 * numpy.sum(numpy.log(doc_nu_square));
 898 | 
 899 |         if self._diagonal_covariance_matrix:
 900 |             function_doc_nu_square += -0.5 * numpy.sum(doc_nu_square / self._alpha_sigma)
 901 |         else:
 902 |             function_doc_nu_square += -0.5 * numpy.sum(doc_nu_square * numpy.diag(self._alpha_sigma_inv));
 903 | 
 904 |         function_doc_nu_square += -total_word_count * numpy.sum(exp_over_doc_zeta);
 905 | 
 906 |         return numpy.asscalar(-function_doc_nu_square);
 907 | 
 908 |     def f_prime_doc_nu_square(self, doc_nu_square, *args):
 909 |         (doc_lambda, doc_zeta_factor, total_word_count) = args;
 910 | 
 911 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
 912 | 
 913 |         exp_over_doc_zeta = scipy.special.logsumexp(
 914 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
 915 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
 916 | 
 917 |         if self._diagonal_covariance_matrix:
 918 |             function_prime_doc_nu_square = -0.5 / self._alpha_sigma;
 919 |         else:
 920 |             function_prime_doc_nu_square = -0.5 * numpy.diag(self._alpha_sigma_inv);
 921 |         function_prime_doc_nu_square += 0.5 / doc_nu_square;
 922 |         function_prime_doc_nu_square -= 0.5 * total_word_count * exp_over_doc_zeta;
 923 | 
 924 |         return numpy.asarray(-function_prime_doc_nu_square);
 925 | 
 926 |     def f_hessian_doc_nu_square(self, doc_nu_square, *args):
 927 |         (doc_lambda, doc_zeta_factor, total_word_count) = args;
 928 | 
 929 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
 930 | 
 931 |         exp_over_doc_zeta = scipy.special.logsumexp(
 932 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
 933 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
 934 | 
 935 |         function_hessian_doc_nu_square = -0.5 / (doc_nu_square ** 2);
 936 |         function_hessian_doc_nu_square += -0.25 * total_word_count * exp_over_doc_zeta;
 937 | 
 938 |         function_hessian_doc_nu_square = numpy.diag(function_hessian_doc_nu_square);
 939 | 
 940 |         assert function_hessian_doc_nu_square.shape == (self._number_of_topics, self._number_of_topics);
 941 | 
 942 |         return numpy.asarray(-function_hessian_doc_nu_square);
 943 | 
 944 |     def f_hessian_direction_doc_nu_square(self, doc_nu_square, direction_vector, *args):
 945 |         (doc_lambda, doc_zeta_factor, total_word_count) = args;
 946 | 
 947 |         assert direction_vector.shape == (self._number_of_topics,)
 948 | 
 949 |         # assert doc_lambda.shape==(self._number_of_topics,)
 950 |         # assert doc_nu_square.shape==(self._number_of_topics,)
 951 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics)
 952 | 
 953 |         log_exp_over_doc_zeta_a = scipy.special.logsumexp(doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * (
 954 |                     doc_nu_square[:, numpy.newaxis] + direction_vector[:,
 955 |                                                       numpy.newaxis] * self._hessian_direction_approximation_epsilon),
 956 |                                                        axis=1)
 957 |         log_exp_over_doc_zeta_b = scipy.special.logsumexp(
 958 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
 959 | 
 960 |         # function_hessian_direction_doc_nu_square = total_word_count * numpy.exp(numpy.log(1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a)) - log_exp_over_doc_zeta_b)        
 961 |         function_hessian_direction_doc_nu_square = total_word_count * numpy.exp(-log_exp_over_doc_zeta_b) * (
 962 |                     1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a))
 963 | 
 964 |         function_hessian_direction_doc_nu_square += 0.5 / (
 965 |                     doc_nu_square + self._hessian_direction_approximation_epsilon * direction_vector);
 966 |         function_hessian_direction_doc_nu_square -= 0.5 / (doc_nu_square)
 967 | 
 968 |         function_hessian_direction_doc_nu_square /= self._hessian_direction_approximation_epsilon;
 969 | 
 970 |         assert function_hessian_direction_doc_nu_square.shape == (self._number_of_topics,);
 971 | 
 972 |         return numpy.asarray(-function_hessian_direction_doc_nu_square);
 973 | 
 974 |     #
 975 |     #
 976 |     #
 977 |     #
 978 |     #
 979 | 
 980 |     def optimize_doc_nu_square_in_log_space(self,
 981 |                                             doc_nu_square,
 982 |                                             arguments,
 983 |                                             method_name=None
 984 |                                             ):
 985 |         log_doc_nu_square = numpy.log(doc_nu_square);
 986 | 
 987 |         optimize_result = scipy.optimize.minimize(self.f_log_doc_nu_square,
 988 |                                                   log_doc_nu_square,
 989 |                                                   args=arguments,
 990 |                                                   method=method_name,
 991 |                                                   jac=self.f_prime_log_doc_nu_square,
 992 |                                                   hess=self.f_hessian_log_doc_nu_square,
 993 |                                                   # hess=None,
 994 |                                                   hessp=self.f_hessian_direction_log_doc_nu_square,
 995 |                                                   bounds=None,
 996 |                                                   constraints=(),
 997 |                                                   tol=None,
 998 |                                                   callback=None,
 999 |                                                   options={'disp': False}
1000 |                                                   );
1001 | 
1002 |         log_doc_nu_square_update = optimize_result.x
1003 | 
1004 |         return numpy.exp(log_doc_nu_square_update);
1005 | 
1006 |     def f_log_doc_nu_square(self, log_doc_nu_square, *args):
1007 |         return self.f_doc_nu_square(numpy.exp(log_doc_nu_square), *args);
1008 | 
1009 |     def f_prime_log_doc_nu_square(self, log_doc_nu_square, *args):
1010 |         (doc_lambda, doc_zeta_factor, total_word_count) = args;
1011 | 
1012 |         assert log_doc_nu_square.shape == (self._number_of_topics,)
1013 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
1014 | 
1015 |         exp_log_doc_nu_square = numpy.exp(log_doc_nu_square);
1016 | 
1017 |         exp_over_doc_zeta = scipy.special.logsumexp(
1018 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_log_doc_nu_square[:, numpy.newaxis], axis=1)
1019 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
1020 | 
1021 |         if self._diagonal_covariance_matrix:
1022 |             function_prime_log_doc_nu_square = -0.5 * exp_log_doc_nu_square / self._alpha_sigma;
1023 |         else:
1024 |             function_prime_log_doc_nu_square = -0.5 * exp_log_doc_nu_square * numpy.diag(self._alpha_sigma_inv);
1025 |         function_prime_log_doc_nu_square += 0.5
1026 |         function_prime_log_doc_nu_square -= 0.5 * total_word_count * exp_over_doc_zeta * exp_log_doc_nu_square;
1027 | 
1028 |         assert function_prime_log_doc_nu_square.shape == (self._number_of_topics,);
1029 | 
1030 |         return numpy.asarray(-function_prime_log_doc_nu_square);
1031 | 
1032 |     def f_hessian_log_doc_nu_square(self, log_doc_nu_square, *args):
1033 |         (doc_lambda, doc_zeta_factor, total_word_count) = args;
1034 | 
1035 |         assert log_doc_nu_square.shape == (self._number_of_topics,)
1036 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
1037 | 
1038 |         exp_doc_log_nu_square = numpy.exp(log_doc_nu_square);
1039 | 
1040 |         exp_over_doc_zeta = scipy.special.logsumexp(
1041 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_doc_log_nu_square[:, numpy.newaxis], axis=1)
1042 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
1043 | 
1044 |         if self._diagonal_covariance_matrix:
1045 |             function_hessian_log_doc_nu_square = -0.5 * exp_doc_log_nu_square / self._alpha_sigma;
1046 |         else:
1047 |             function_hessian_log_doc_nu_square = -0.5 * exp_doc_log_nu_square * numpy.diag(self._alpha_sigma_inv);
1048 |         function_hessian_log_doc_nu_square -= 0.5 * total_word_count * exp_over_doc_zeta * exp_doc_log_nu_square * (
1049 |                     1 + 0.5 * exp_doc_log_nu_square);
1050 | 
1051 |         function_hessian_log_doc_nu_square = numpy.diag(function_hessian_log_doc_nu_square);
1052 | 
1053 |         assert function_hessian_log_doc_nu_square.shape == (self._number_of_topics, self._number_of_topics);
1054 | 
1055 |         return numpy.asarray(-function_hessian_log_doc_nu_square);
1056 | 
1057 |     def f_hessian_direction_log_doc_nu_square(self, log_doc_nu_square, direction_vector, *args):
1058 |         (doc_lambda, doc_zeta_factor, total_word_count) = args;
1059 | 
1060 |         # assert doc_lambda.shape==(self._number_of_topics,)
1061 |         assert log_doc_nu_square.shape == (self._number_of_topics,)
1062 |         assert direction_vector.shape == (self._number_of_topics,)
1063 | 
1064 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics)
1065 | 
1066 |         exp_log_doc_nu_square = numpy.exp(log_doc_nu_square);
1067 |         exp_log_doc_nu_square_epsilon_direction = numpy.exp(
1068 |             log_doc_nu_square + direction_vector * self._hessian_direction_approximation_epsilon);
1069 | 
1070 |         log_exp_over_doc_zeta_epsilon_direction = scipy.special.logsumexp(
1071 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_log_doc_nu_square_epsilon_direction[:,
1072 |                                                                    numpy.newaxis], axis=1)
1073 |         log_exp_over_doc_zeta = scipy.special.logsumexp(
1074 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_log_doc_nu_square[:, numpy.newaxis], axis=1)
1075 | 
1076 |         # function_hessian_direction_log_doc_nu_square = 0.5 * total_word_count * numpy.exp(log_doc_nu_square - log_exp_over_doc_zeta)
1077 |         # function_hessian_direction_log_doc_nu_square += - 0.5 * total_word_count * numpy.exp(log_doc_nu_square + direction_vector * epsilon - log_exp_over_doc_zeta_epsilon_direction)
1078 | 
1079 |         function_hessian_direction_log_doc_nu_square = 1 - numpy.exp(
1080 |             direction_vector * self._hessian_direction_approximation_epsilon - log_exp_over_doc_zeta_epsilon_direction + log_exp_over_doc_zeta)
1081 |         function_hessian_direction_log_doc_nu_square *= 0.5 * total_word_count * numpy.exp(
1082 |             log_doc_nu_square - log_exp_over_doc_zeta)
1083 | 
1084 |         if self._diagonal_covariance_matrix:
1085 |             function_hessian_direction_log_doc_nu_square += 0.5 * (
1086 |                         exp_log_doc_nu_square - exp_log_doc_nu_square_epsilon_direction) / self._alpha_sigma;
1087 |         else:
1088 |             function_hessian_direction_log_doc_nu_square += 0.5 * (
1089 |                         exp_log_doc_nu_square - exp_log_doc_nu_square_epsilon_direction) * numpy.diag(
1090 |                 self._alpha_sigma_inv);
1091 | 
1092 |         function_hessian_direction_log_doc_nu_square /= self._hessian_direction_approximation_epsilon
1093 | 
1094 |         assert function_hessian_direction_log_doc_nu_square.shape == (self._number_of_topics,);
1095 | 
1096 |         return numpy.asarray(-function_hessian_direction_log_doc_nu_square);
1097 | 
1098 |     #
1099 |     #
1100 |     #
1101 |     #
1102 |     #
1103 | 
1104 |     def m_step(self, phi_sufficient_statistics):
1105 |         # Note: all terms including E_q[p(\eta|\beta)], i.e., terms involving \Psi(\eta), are cancelled due to \eta updates
1106 | 
1107 |         # compute the beta terms
1108 |         topic_log_likelihood = self._number_of_topics * (scipy.special.gammaln(numpy.sum(self._alpha_beta)) - numpy.sum(
1109 |             scipy.special.gammaln(self._alpha_beta)));
1110 |         # compute the eta terms
1111 |         topic_log_likelihood += numpy.sum(
1112 |             numpy.sum(scipy.special.gammaln(self._eta), axis=1) - scipy.special.gammaln(numpy.sum(self._eta, axis=1)));
1113 | 
1114 |         self._eta = phi_sufficient_statistics + self._alpha_beta
1115 |         assert (self._eta.shape == (self._number_of_topics, self._number_of_types));
1116 | 
1117 |         return topic_log_likelihood
1118 | 
1119 |     """
1120 |     """
1121 | 
1122 |     def learning(self, number_of_processes=1):
1123 |         self._counter += 1;
1124 | 
1125 |         clock_e_step = time.time();
1126 |         if number_of_processes == 1:
1127 |             document_log_likelihood, phi_sufficient_statistics = self.e_step();
1128 |         else:
1129 |             document_log_likelihood, phi_sufficient_statistics = self.e_step_process_queue(None, number_of_processes)
1130 |         clock_e_step = time.time() - clock_e_step;
1131 | 
1132 |         clock_m_step = time.time();
1133 |         topic_log_likelihood = self.m_step(phi_sufficient_statistics);
1134 |         clock_m_step = time.time() - clock_m_step;
1135 | 
1136 |         print(document_log_likelihood, topic_log_likelihood);
1137 |         joint_log_likelihood = document_log_likelihood + topic_log_likelihood;
1138 | 
1139 |         print("e_step and m_step of iteration %d finished in %g and %g seconds respectively with log likelihood %g" % (
1140 |         self._counter, clock_e_step, clock_m_step, joint_log_likelihood))
1141 | 
1142 |         clock_hyper_opt = time.time();
1143 |         if self._hyper_parameter_optimize_interval > 0 and self._counter % self._hyper_parameter_optimize_interval == 0:
1144 |             self.optimize_hyperparameter();
1145 |         clock_hyper_opt = time.time() - clock_hyper_opt;
1146 |         print("hyper-parameter optimization of iteration %d finished in %g seconds" % (self._counter, clock_hyper_opt));
1147 | 
1148 |         # if abs((joint_log_likelihood - old_likelihood) / old_likelihood) < self._model_converge_threshold:
1149 |         # print "model likelihood converged..."
1150 |         # break
1151 |         # old_likelihood = joint_log_likelihood;
1152 | 
1153 |         return joint_log_likelihood
1154 | 
1155 |     def inference(self, corpus):
1156 |         parsed_corpus = self.parse_data(corpus);
1157 |         number_of_documents = len(parsed_corpus[0]);
1158 | 
1159 |         clock_e_step = time.time();
1160 |         document_log_likelihood, lambda_values, nu_square_values = self.e_step(parsed_corpus);
1161 |         clock_e_step = time.time() - clock_e_step;
1162 | 
1163 |         return document_log_likelihood, lambda_values, nu_square_values
1164 | 
1165 |     def optimize_hyperparameter(self):
1166 |         assert self._lambda.shape == (self._number_of_documents, self._number_of_topics);
1167 |         self._alpha_mu = numpy.mean(self._lambda, axis=0);
1168 |         print("update hyper-parameter mu to %s" % self._alpha_mu);
1169 | 
1170 |         assert self._nu_square.shape == (self._number_of_documents, self._number_of_topics);
1171 |         if self._diagonal_covariance_matrix:
1172 |             self._alpha_sigma = numpy.mean(self._nu_square + (self._lambda - self._alpha_mu[numpy.newaxis, :]) ** 2,
1173 |                                            axis=0);
1174 |             print("update hyper-parameter sigma to %s" % self._alpha_sigma);
1175 |         else:
1176 |             self._alpha_mu = self._alpha_mu[numpy.newaxis, :];
1177 | 
1178 |             assert self._lambda.shape == (self._number_of_documents, self._number_of_topics);
1179 |             self._alpha_sigma = numpy.copy(numpy.diag(numpy.mean(self._nu_square, axis=0)));
1180 |             adjusted_lambda = self._lambda - self._alpha_mu;
1181 |             assert adjusted_lambda.shape == (self._number_of_documents, self._number_of_topics);
1182 |             self._alpha_sigma += numpy.dot(adjusted_lambda.T, adjusted_lambda) / self._number_of_documents;
1183 | 
1184 |             # self._alpha_sigma_inv = scipy.linalg.pinv(self._alpha_sigma);
1185 |             self._alpha_sigma_inv = scipy.linalg.inv(self._alpha_sigma);
1186 |             print("update hyper-parameter sigma to")
1187 |             print("%s" % self._alpha_sigma);
1188 | 
1189 |         return
1190 | 
1191 |     """
1192 |     @param alpha_vector: a dict data type represents dirichlet prior, indexed by topic_id
1193 |     @param alpha_sufficient_statistics: a dict data type represents alpha sufficient statistics for alpha updating, indexed by topic_id
1194 |     """
1195 | 
1196 |     def optimize_hyperparameter_old(self):
1197 |         assert self._lambda.shape == (self._number_of_documents, self._number_of_topics);
1198 |         self._alpha_mu = numpy.mean(self._lambda, axis=0);
1199 |         print("update hyper-parameter mu to %s" % self._alpha_mu);
1200 | 
1201 |         assert self._nu_square.shape == (self._number_of_documents, self._number_of_topics);
1202 |         if self._diagonal_covariance_matrix:
1203 |             self._alpha_sigma = numpy.mean(self._nu_square + (self._lambda - self._alpha_mu[numpy.newaxis, :]) ** 2,
1204 |                                            axis=0);
1205 |             print("update hyper-parameter sigma to %s" % self._alpha_sigma);
1206 |         else:
1207 |             self._alpha_mu = self._alpha_mu[numpy.newaxis, :];
1208 | 
1209 |             self._alpha_sigma = sklearn.covariance.empirical_covariance(self._lambda, assume_centered=True);
1210 | 
1211 |             # self._alpha_sigma_inv = scipy.linalg.pinv(self._alpha_sigma);
1212 |             self._alpha_sigma_inv = scipy.linalg.inv(self._alpha_sigma);
1213 |             print("update hyper-parameter sigma to")
1214 |             print("%s" % self._alpha_sigma);
1215 | 
1216 |         return
1217 | 
1218 |     def export_beta(self, exp_beta_path, top_display=-1):
1219 |         output = open(exp_beta_path, 'w');
1220 |         E_log_eta = compute_dirichlet_expectation(self._eta);
1221 |         for topic_index in range(self._number_of_topics):
1222 |             output.write("==========\t%d\t==========\n" % (topic_index));
1223 | 
1224 |             beta_probability = numpy.exp(E_log_eta[topic_index, :] - scipy.special.logsumexp(E_log_eta[topic_index, :]));
1225 | 
1226 |             i = 0;
1227 |             for type_index in reversed(numpy.argsort(beta_probability)):
1228 |                 i += 1;
1229 |                 output.write("%s\t%g\n" % (self._index_to_type[type_index], beta_probability[type_index]));
1230 |                 if top_display > 0 and i >= top_display:
1231 |                     break;
1232 | 
1233 |         output.close();
1234 | 
1235 |     #
1236 |     #
1237 |     #
1238 |     #
1239 |     #
1240 | 
1241 |     def newton_method_lambda(self,
1242 |                              doc_lambda,
1243 |                              doc_nu_square,
1244 |                              doc_zeta_factor,
1245 |                              sum_phi,
1246 |                              total_word_count,
1247 |                              newton_method_iteration=10,
1248 |                              newton_method_decay_factor=0.9,
1249 |                              # newton_method_step_size=0.1,
1250 |                              eigen_value_tolerance=1e-9
1251 |                              ):
1252 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
1253 |         assert sum_phi.shape == (self._number_of_topics,)
1254 | 
1255 |         newton_method_power_index = 0;
1256 |         for newton_method_iteration_index in range(newton_method_iteration):
1257 |             exp_over_doc_zeta = scipy.special.logsumexp(
1258 |                 doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
1259 |             exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
1260 |             assert exp_over_doc_zeta.shape == (self._number_of_topics,);
1261 | 
1262 |             if self._diagonal_covariance_matrix:
1263 |                 first_derivative_lambda = (self._alpha_mu - doc_lambda) / self._alpha_sigma;
1264 |                 first_derivative_lambda += sum_phi
1265 |                 first_derivative_lambda -= total_word_count * exp_over_doc_zeta
1266 |             else:
1267 |                 first_derivative_lambda = numpy.dot((self._alpha_mu - doc_lambda[numpy.newaxis, :]),
1268 |                                                     self._alpha_sigma_inv)
1269 |                 assert first_derivative_lambda.shape == (1, self._number_of_topics);
1270 |                 first_derivative_lambda += sum_phi[numpy.newaxis, :]
1271 |                 first_derivative_lambda -= total_word_count * exp_over_doc_zeta[numpy.newaxis, :]
1272 |                 assert first_derivative_lambda.shape == (1, self._number_of_topics);
1273 | 
1274 |             if self._diagonal_covariance_matrix:
1275 |                 second_derivative_lambda = -1.0 / self._alpha_sigma
1276 |                 second_derivative_lambda -= total_word_count * exp_over_doc_zeta
1277 |             else:
1278 |                 second_derivative_lambda = -self._alpha_sigma_inv;
1279 |                 assert second_derivative_lambda.shape == (self._number_of_topics, self._number_of_topics);
1280 |                 second_derivative_lambda -= total_word_count * numpy.diag(exp_over_doc_zeta)
1281 |                 assert second_derivative_lambda.shape == (self._number_of_topics, self._number_of_topics);
1282 | 
1283 |             if self._diagonal_covariance_matrix:
1284 |                 if not numpy.all(second_derivative_lambda) > 0:
1285 |                     sys.stderr.write("Hessian matrix is not positive definite: %s\n" % second_derivative_lambda)
1286 |                     break;
1287 |             else:
1288 |                 pass
1289 |                 '''
1290 |                 print "%s" % second_derivative_lambda;
1291 |                 E_vector, V_matrix = scipy.linalg.eigh(second_derivative_lambda);
1292 |                 while not numpy.all(E_vector>eigen_value_tolerance):
1293 |                     second_derivative_lambda += numpy.eye(self._number_of_topics);
1294 |                     E_vector, V_matrix = scipy.linalg.eigh(second_derivative_lambda);
1295 |                     print "%s" % E_vector
1296 |                 '''
1297 | 
1298 |             if self._diagonal_covariance_matrix:
1299 |                 step_change = first_derivative_lambda / second_derivative_lambda;
1300 |             else:
1301 |                 # step_change = numpy.dot(first_derivative_lambda, numpy.linalg.pinv(second_derivative_lambda))[0, :]
1302 |                 step_change = numpy.dot(first_derivative_lambda, scipy.linalg.pinv(second_derivative_lambda))[0, :]
1303 | 
1304 |             # step_change *= newton_method_step_size;
1305 |             step_change /= numpy.sqrt(numpy.sum(step_change ** 2));
1306 | 
1307 |             # if numpy.any(numpy.isnan(step_change)) or numpy.any(numpy.isinf(step_change)):
1308 |             # break;
1309 | 
1310 |             step_alpha = numpy.power(newton_method_decay_factor, newton_method_power_index);
1311 | 
1312 |             doc_lambda -= step_alpha * step_change;
1313 |             assert doc_lambda.shape == (self._number_of_topics,);
1314 | 
1315 |             # if numpy.all(numpy.abs(step_change) <= local_parameter_converge_threshold):
1316 |             # break;
1317 | 
1318 |             # print "update lambda to %s" % (doc_lambda)
1319 | 
1320 |         return doc_lambda;
1321 | 
1322 |     def newton_method_nu_square(self,
1323 |                                 doc_lambda,
1324 |                                 doc_nu_square,
1325 |                                 doc_zeta_factor,
1326 |                                 total_word_count,
1327 |                                 newton_method_iteration=10,
1328 |                                 newton_method_decay_factor=0.9,
1329 |                                 # newton_method_step_size=0.1,
1330 |                                 eigen_value_tolerance=1e-9
1331 |                                 ):
1332 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
1333 | 
1334 |         newton_method_power_index = 0;
1335 |         for newton_method_iteration_index in range(newton_method_iteration):
1336 |             # print doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis]
1337 |             # exp_over_doc_zeta = 1.0 / numpy.sum(numpy.exp(doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis]), axis=1);
1338 |             # print scipy.special.logsumexp(doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
1339 |             # exp_over_doc_zeta = numpy.exp(-scipy.special.logsumexp(doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1));
1340 |             exp_over_doc_zeta = scipy.special.logsumexp(
1341 |                 doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
1342 |             # exp_over_doc_zeta = numpy.clip(exp_over_doc_zeta, -10, +10);
1343 |             exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
1344 | 
1345 |             if self._diagonal_covariance_matrix:
1346 |                 first_derivative_nu_square = -0.5 / self._alpha_sigma;
1347 |             else:
1348 |                 first_derivative_nu_square = -0.5 * numpy.diag(self._alpha_sigma_inv);
1349 |             first_derivative_nu_square += 0.5 / doc_nu_square;
1350 |             # first_derivative_nu_square -= 0.5 * (total_word_count / doc_zeta) * numpy.exp(doc_lambda+0.5*doc_nu_square)
1351 |             first_derivative_nu_square -= 0.5 * total_word_count * exp_over_doc_zeta;
1352 | 
1353 |             second_derivative_nu_square = -0.5 / (doc_nu_square ** 2);
1354 |             # second_derivative_nu_square += -0.25 * (total_word_count / doc_zeta) * numpy.exp(doc_lambda+0.5*doc_nu_square);
1355 |             second_derivative_nu_square += -0.25 * total_word_count * exp_over_doc_zeta;
1356 | 
1357 |             if self._diagonal_covariance_matrix:
1358 |                 if not numpy.all(second_derivative_nu_square) > 0:
1359 |                     print("Hessian matrix is not positive definite: ", second_derivative_nu_square)
1360 |                     break;
1361 |             else:
1362 |                 pass
1363 |                 '''
1364 |                 print "%s" % second_derivative_nu_square;
1365 |                 E_vector, V_matrix = scipy.linalg.eigh(second_derivative_nu_square);
1366 |                 while not numpy.all(E_vector>eigen_value_tolerance):
1367 |                     second_derivative_nu_square += numpy.eye(self._number_of_topics);
1368 |                     E_vector, V_matrix = scipy.linalg.eigh(second_derivative_nu_square);
1369 |                     print "%s" % E_vector
1370 |                 '''
1371 | 
1372 |             step_change = first_derivative_nu_square / second_derivative_nu_square;
1373 | 
1374 |             # step_change *= newton_method_step_size;
1375 |             step_change /= numpy.sqrt(numpy.sum(step_change ** 2));
1376 | 
1377 |             step_alpha = numpy.power(newton_method_decay_factor, newton_method_power_index);
1378 |             while numpy.any(doc_nu_square <= step_alpha * step_change):
1379 |                 newton_method_power_index += 1;
1380 |                 step_alpha = numpy.power(newton_method_decay_factor, newton_method_power_index);
1381 | 
1382 |             doc_nu_square -= step_alpha * step_change;
1383 | 
1384 |             assert numpy.all(doc_nu_square > 0), (
1385 |             doc_nu_square, step_change, first_derivative_nu_square, second_derivative_nu_square);
1386 | 
1387 |         return doc_nu_square;
1388 | 
1389 |     def newton_method_nu_square_in_log_space(self,
1390 |                                              doc_lambda,
1391 |                                              doc_nu_square,
1392 |                                              doc_zeta_factor,
1393 |                                              total_word_count,
1394 |                                              newton_method_iteration=10,
1395 |                                              newton_method_decay_factor=0.9,
1396 |                                              newton_method_step_size=0.1,
1397 |                                              eigen_value_tolerance=1e-9
1398 |                                              ):
1399 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
1400 | 
1401 |         doc_log_nu_square = numpy.log(doc_nu_square);
1402 |         exp_doc_log_nu_square = numpy.exp(doc_log_nu_square);
1403 | 
1404 |         newton_method_power_index = 0;
1405 |         for newton_method_iteration_index in range(newton_method_iteration):
1406 |             log_exp_over_doc_zeta_combine = scipy.special.logsumexp(
1407 |                 doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_doc_log_nu_square[:,
1408 |                                                                        numpy.newaxis] - doc_log_nu_square[:,
1409 |                                                                                         numpy.newaxis], axis=1)
1410 |             exp_over_doc_zeta_combine = numpy.exp(-log_exp_over_doc_zeta_combine);
1411 | 
1412 |             if self._diagonal_covariance_matrix:
1413 |                 first_derivative_log_nu_square = -0.5 / self._alpha_sigma * exp_doc_log_nu_square;
1414 |             else:
1415 |                 first_derivative_log_nu_square = -0.5 * numpy.diag(self._alpha_sigma_inv) * exp_doc_log_nu_square;
1416 |             first_derivative_log_nu_square += 0.5
1417 |             first_derivative_log_nu_square += -0.5 * total_word_count * exp_over_doc_zeta_combine
1418 | 
1419 |             if self._diagonal_covariance_matrix:
1420 |                 second_derivative_log_nu_square = -0.5 / self._alpha_sigma * exp_doc_log_nu_square;
1421 |             else:
1422 |                 second_derivative_log_nu_square = -0.5 * numpy.diag(self._alpha_sigma_inv) * exp_doc_log_nu_square;
1423 |             second_derivative_log_nu_square += -0.5 * total_word_count * exp_over_doc_zeta_combine * (
1424 |                         1 + 0.5 * exp_doc_log_nu_square);
1425 | 
1426 |             step_change = first_derivative_log_nu_square / second_derivative_log_nu_square;
1427 | 
1428 |             # step_change *= newton_method_step_size;
1429 |             step_change /= numpy.sqrt(numpy.sum(step_change ** 2));
1430 | 
1431 |             # if numpy.any(numpy.isnan(step_change)) or numpy.any(numpy.isinf(step_change)):
1432 |             # break;
1433 | 
1434 |             step_alpha = numpy.power(newton_method_decay_factor, newton_method_power_index);
1435 | 
1436 |             doc_log_nu_square -= step_alpha * step_change;
1437 |             exp_doc_log_nu_square = numpy.exp(doc_log_nu_square);
1438 | 
1439 |             # if numpy.all(numpy.abs(step_change) <= local_parameter_converge_threshold):
1440 |             # break;
1441 | 
1442 |             # print "update nu to %s" % (doc_nu_square)
1443 | 
1444 |         doc_nu_square = numpy.exp(doc_log_nu_square);
1445 | 
1446 |         return doc_nu_square;
1447 | 
1448 |     #
1449 |     #
1450 |     #
1451 |     #
1452 |     #
1453 | 
1454 |     def hessian_free_lambda(self,
1455 |                             doc_lambda,
1456 |                             doc_nu_square,
1457 |                             doc_zeta_factor,
1458 |                             sum_phi,
1459 |                             total_word_count,
1460 |                             hessian_free_iteration=10,
1461 |                             hessian_free_threshold=1e-9,
1462 |                             ):
1463 |         for hessian_free_iteration_index in range(hessian_free_iteration):
1464 |             delta_doc_lambda = self.conjugate_gradient_delta_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi,
1465 |                                                                     total_word_count, self._number_of_topics);
1466 | 
1467 |             # delta_doc_lambda /= numpy.sqrt(numpy.sum(delta_doc_lambda**2));
1468 | 
1469 |             # print "check point 2", numpy.sum(delta_doc_lambda)
1470 |             # print delta_doc_lambda 
1471 | 
1472 |             doc_lambda += delta_doc_lambda;
1473 | 
1474 |         return doc_lambda;
1475 | 
1476 |     def conjugate_gradient_delta_lambda(self,
1477 |                                         doc_lambda,
1478 |                                         doc_nu_square,
1479 |                                         doc_zeta_factor,
1480 |                                         sum_phi,
1481 |                                         total_word_count,
1482 | 
1483 |                                         conjugate_gradient_iteration=100,
1484 |                                         conjugate_gradient_threshold=1e-9,
1485 |                                         precondition_hessian_matrix=True
1486 |                                         ):
1487 |         # delta_doc_lambda = numpy.random.random(self._number_of_topics);
1488 |         delta_doc_lambda = numpy.zeros(self._number_of_topics);
1489 |         # delta_doc_lambda = numpy.ones(self._number_of_topics);
1490 | 
1491 |         if precondition_hessian_matrix:
1492 |             hessian_lambda = self.second_derivative_lambda(doc_lambda, doc_nu_square, doc_zeta_factor,
1493 |                                                            total_word_count);
1494 |             if not numpy.all(numpy.isfinite(hessian_lambda)):
1495 |                 return numpy.zeros(self._number_of_topics);
1496 |             M_inverse = 1.0 / numpy.diag(hessian_lambda);
1497 |         # print numpy.linalg.cond(hessian_lambda), ">>>", numpy.linalg.cond(numpy.dot(numpy.diag(1.0/numpy.diag(hessian_lambda)), hessian_lambda)), ">>>", numpy.linalg.cond(numpy.dot(numpy.linalg.cholesky(hessian_lambda), hessian_lambda));
1498 | 
1499 |         r_vector = -self.first_derivative_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi, total_word_count);
1500 |         r_vector -= self.hessian_damping_direction_approximation_lambda(doc_lambda, doc_nu_square, doc_zeta_factor,
1501 |                                                                         sum_phi, total_word_count, delta_doc_lambda)
1502 | 
1503 |         if precondition_hessian_matrix:
1504 |             z_vector = M_inverse * r_vector;
1505 |         else:
1506 |             z_vector = numpy.copy(r_vector);
1507 | 
1508 |         p_vector = numpy.copy(z_vector);
1509 |         r_z_vector_square_old = numpy.sum(r_vector * z_vector);
1510 | 
1511 |         for conjugate_gradient_iteration_index in range(conjugate_gradient_iteration):
1512 |             # hessian_p_vector = self.hessian_direction_approximation_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, total_word_count, p_vector);
1513 |             hessian_p_vector = self.hessian_damping_direction_approximation_lambda(doc_lambda, doc_nu_square,
1514 |                                                                                    doc_zeta_factor, sum_phi,
1515 |                                                                                    total_word_count, p_vector);
1516 | 
1517 |             alpha_value = r_z_vector_square_old / numpy.sum(p_vector * hessian_p_vector);
1518 | 
1519 |             delta_doc_lambda += alpha_value * p_vector;
1520 | 
1521 |             r_vector -= alpha_value * hessian_p_vector;
1522 | 
1523 |             if numpy.sqrt(numpy.sum(r_vector ** 2)) <= conjugate_gradient_threshold:
1524 |                 break;
1525 | 
1526 |             if precondition_hessian_matrix:
1527 |                 z_vector = M_inverse * r_vector;
1528 |             else:
1529 |                 z_vector = numpy.copy(r_vector);
1530 | 
1531 |             r_z_vector_square_new = numpy.sum(r_vector * z_vector);
1532 | 
1533 |             p_vector *= r_z_vector_square_new / r_z_vector_square_old;
1534 | 
1535 |             p_vector += z_vector
1536 | 
1537 |             r_z_vector_square_old = r_z_vector_square_new;
1538 | 
1539 |         return delta_doc_lambda;
1540 | 
1541 |     def function_lambda(self, doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi, total_word_count):
1542 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
1543 |         assert sum_phi.shape == (self._number_of_topics,)
1544 | 
1545 |         exp_over_doc_zeta = scipy.special.logsumexp(
1546 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
1547 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
1548 | 
1549 |         function_lambda = numpy.sum(sum_phi * doc_lambda);
1550 | 
1551 |         if self._diagonal_covariance_matrix:
1552 |             mean_adjustment = doc_lambda - self._alpha_mu
1553 |             assert mean_adjustment.shape == (self._number_of_topics,);
1554 |             function_lambda += -0.5 * numpy.sum((mean_adjustment ** 2) / self._alpha_sigma)
1555 |         else:
1556 |             mean_adjustment = doc_lambda[numpy.newaxis, :] - self._alpha_mu;
1557 |             assert mean_adjustment.shape == (1, self._number_of_topics);
1558 |             function_lambda += -0.5 * numpy.dot(numpy.dot(mean_adjustment, self._alpha_sigma_inv), mean_adjustment.T)
1559 | 
1560 |         function_lambda += -total_word_count * numpy.sum(exp_over_doc_zeta);
1561 | 
1562 |         return function_lambda;
1563 | 
1564 |     def first_derivative_lambda(self, doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi, total_word_count):
1565 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
1566 |         assert sum_phi.shape == (self._number_of_topics,)
1567 | 
1568 |         exp_over_doc_zeta = scipy.special.logsumexp(
1569 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
1570 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
1571 |         assert exp_over_doc_zeta.shape == (self._number_of_topics,);
1572 | 
1573 |         if self._diagonal_covariance_matrix:
1574 |             first_derivative_lambda = (self._alpha_mu - doc_lambda) / self._alpha_sigma;
1575 |         else:
1576 |             first_derivative_lambda = numpy.dot((self._alpha_mu - doc_lambda[numpy.newaxis, :]), self._alpha_sigma_inv)[
1577 |                                       0, :]
1578 | 
1579 |         first_derivative_lambda += sum_phi;
1580 |         first_derivative_lambda -= total_word_count * exp_over_doc_zeta;
1581 |         assert first_derivative_lambda.shape == (self._number_of_topics,);
1582 | 
1583 |         return first_derivative_lambda
1584 | 
1585 |     def second_derivative_lambda(self, doc_lambda, doc_nu_square, doc_zeta_factor, total_word_count):
1586 |         exp_over_doc_zeta = scipy.special.logsumexp(
1587 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
1588 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
1589 | 
1590 |         if self._diagonal_covariance_matrix:
1591 |             second_derivative_lambda = -1.0 / self._alpha_sigma
1592 |             second_derivative_lambda -= total_word_count * exp_over_doc_zeta;
1593 |         else:
1594 |             second_derivative_lambda = -self._alpha_sigma_inv;
1595 |             assert second_derivative_lambda.shape == (self._number_of_topics, self._number_of_topics);
1596 |             second_derivative_lambda -= total_word_count * numpy.diag(exp_over_doc_zeta);
1597 |             assert second_derivative_lambda.shape == (self._number_of_topics, self._number_of_topics);
1598 | 
1599 |         return second_derivative_lambda
1600 | 
1601 |     def hessian_direction_approximation_lambda(self, doc_lambda, doc_nu_square, doc_zeta_factor, total_word_count,
1602 |                                                direction_vector, epsilon=1e-6):
1603 |         assert doc_lambda.shape == (self._number_of_topics,)
1604 |         assert doc_nu_square.shape == (self._number_of_topics,)
1605 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics)
1606 |         assert direction_vector.shape == (self._number_of_topics,)
1607 | 
1608 |         log_exp_over_doc_zeta_a = scipy.special.logsumexp(
1609 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - direction_vector[:,
1610 |                                                              numpy.newaxis] * epsilon - 0.5 * doc_nu_square[:,
1611 |                                                                                               numpy.newaxis], axis=1)
1612 |         log_exp_over_doc_zeta_b = scipy.special.logsumexp(
1613 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
1614 |         assert log_exp_over_doc_zeta_a.shape == (self._number_of_topics,)
1615 |         assert log_exp_over_doc_zeta_b.shape == (self._number_of_topics,)
1616 | 
1617 |         # hessian_direction_lambda = total_word_count * numpy.exp(numpy.log(1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a)) - log_exp_over_doc_zeta_b)        
1618 |         hessian_direction_lambda = total_word_count * numpy.exp(-log_exp_over_doc_zeta_b) * (
1619 |                     1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a))
1620 | 
1621 |         if self._diagonal_covariance_matrix:
1622 |             hessian_direction_lambda = -direction_vector * epsilon / self._alpha_sigma;
1623 |         else:
1624 |             hessian_direction_lambda += -numpy.dot(direction_vector[numpy.newaxis, :] * epsilon, self._alpha_sigma_inv)[
1625 |                                          0, :]
1626 |         assert hessian_direction_lambda.shape == (self._number_of_topics,);
1627 | 
1628 |         hessian_direction_lambda /= epsilon;
1629 | 
1630 |         return hessian_direction_lambda
1631 | 
1632 |     def hessian_damping_direction_approximation_lambda(self, doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi,
1633 |                                                        total_word_count, direction_vector,
1634 |                                                        damping_factor_initialization=0.1, damping_factor_iteration=10):
1635 |         damping_factor_numerator = self.function_lambda(doc_lambda + direction_vector, doc_nu_square, doc_zeta_factor,
1636 |                                                         sum_phi, total_word_count);
1637 |         damping_factor_numerator -= self.function_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi,
1638 |                                                          total_word_count);
1639 | 
1640 |         hessian_direction_approximation = self.hessian_direction_approximation_lambda(doc_lambda, doc_nu_square,
1641 |                                                                                       doc_zeta_factor, total_word_count,
1642 |                                                                                       direction_vector);
1643 | 
1644 |         damping_factor_denominator_temp = self.first_derivative_lambda(doc_lambda, doc_nu_square, doc_zeta_factor,
1645 |                                                                        sum_phi, total_word_count);
1646 |         assert damping_factor_denominator_temp.shape == (self._number_of_topics,);
1647 |         damping_factor_denominator_temp += 0.5 * hessian_direction_approximation;
1648 |         assert damping_factor_denominator_temp.shape == (self._number_of_topics,);
1649 | 
1650 |         damping_factor_lambda = damping_factor_initialization;
1651 |         for damping_factor_iteration_index in range(damping_factor_iteration):
1652 |             damping_factor_denominator = damping_factor_denominator_temp + 0.5 * damping_factor_lambda * direction_vector;
1653 |             assert damping_factor_denominator.shape == (self._number_of_topics,);
1654 |             damping_factor_denominator *= direction_vector
1655 |             damping_factor_denominator = numpy.sum(damping_factor_denominator);
1656 | 
1657 |             damping_factor_rho = damping_factor_numerator / damping_factor_denominator
1658 |             if damping_factor_rho < 0.25:
1659 |                 damping_factor_lambda *= 1.5
1660 |             elif damping_factor_rho > 0.75:
1661 |                 damping_factor_lambda /= 1.5
1662 |             else:
1663 |                 return hessian_direction_approximation + damping_factor_lambda * direction_vector;
1664 | 
1665 |         return hessian_direction_approximation
1666 | 
1667 |     #
1668 |     #
1669 |     #
1670 |     #
1671 |     #
1672 | 
1673 |     def hessian_free_nu_square(self,
1674 |                                doc_lambda,
1675 |                                doc_nu_square,
1676 |                                doc_zeta_factor,
1677 |                                total_word_count,
1678 |                                hessian_free_iteration=10,
1679 |                                hessian_free_decay_factor=0.9,
1680 |                                hessian_free_reset_interval=100
1681 |                                ):
1682 |         for hessian_free_iteration_index in range(hessian_free_iteration):
1683 |             delta_doc_nu_square = self.conjugate_gradient_delta_nu_square(doc_lambda, doc_nu_square, doc_zeta_factor,
1684 |                                                                           total_word_count, self._number_of_topics);
1685 | 
1686 |             # delta_doc_nu_square /= numpy.sqrt(numpy.sum(delta_doc_nu_square**2));
1687 | 
1688 |             conjugate_gradient_power_index = 0
1689 |             step_alpha = numpy.power(hessian_free_decay_factor, conjugate_gradient_power_index);
1690 |             while numpy.any(doc_nu_square + step_alpha * delta_doc_nu_square <= 0):
1691 |                 conjugate_gradient_power_index += 1;
1692 |                 step_alpha = numpy.power(hessian_free_decay_factor, conjugate_gradient_power_index);
1693 |                 if conjugate_gradient_power_index >= hessian_free_reset_interval:
1694 |                     print("power index larger than 100", delta_doc_nu_square)
1695 |                     step_alpha = 0;
1696 |                     break
1697 | 
1698 |             doc_nu_square += step_alpha * delta_doc_nu_square;
1699 |             assert numpy.all(doc_nu_square > 0);
1700 | 
1701 |         return doc_nu_square;
1702 | 
1703 |     def conjugate_gradient_delta_nu_square(self,
1704 |                                            doc_lambda,
1705 |                                            doc_nu_square,
1706 |                                            doc_zeta_factor,
1707 |                                            total_word_count,
1708 | 
1709 |                                            conjugate_gradient_iteration=100,
1710 |                                            conjugate_gradient_threshold=1e-6,
1711 |                                            conjugate_gradient_decay_factor=0.9,
1712 |                                            conjugate_gradient_reset_interval=100,
1713 |                                            ):
1714 |         doc_nu_square_copy = numpy.copy(doc_nu_square);
1715 |         # delta_doc_nu_square = numpy.ones(self._number_of_topics);
1716 |         delta_doc_nu_square = numpy.zeros(self._number_of_topics);
1717 |         # delta_doc_nu_square = numpy.random.random(self._number_of_topics);
1718 | 
1719 |         r_vector = -self.first_derivative_nu_square(doc_lambda, doc_nu_square_copy, doc_zeta_factor, total_word_count);
1720 |         # r_vector -= self.hessian_direction_approximation_nu_square(doc_lambda, doc_nu_square_copy, doc_zeta_factor, total_word_count, delta_doc_nu_square, damping_coefficient=1);
1721 |         r_vector -= self.hessian_damping_direction_approximation_nu_square(doc_lambda, doc_nu_square_copy,
1722 |                                                                            doc_zeta_factor, total_word_count,
1723 |                                                                            delta_doc_nu_square);
1724 | 
1725 |         p_vector = numpy.copy(r_vector);
1726 | 
1727 |         r_vector_square_old = numpy.sum(r_vector ** 2);
1728 | 
1729 |         for conjugate_gradient_iteration_index in range(conjugate_gradient_iteration):
1730 |             assert not numpy.any(numpy.isnan(doc_lambda))
1731 |             assert not numpy.any(numpy.isnan(doc_nu_square_copy))
1732 |             assert not numpy.any(numpy.isnan(doc_zeta_factor))
1733 |             assert not numpy.any(numpy.isnan(p_vector));
1734 | 
1735 |             # hessian_p_vector = self.hessian_direction_approximation_nu_square(doc_lambda, doc_nu_square_copy, doc_zeta_factor, total_word_count, p_vector, damping_coefficient=1);
1736 |             hessian_p_vector = self.hessian_damping_direction_approximation_nu_square(doc_lambda, doc_nu_square_copy,
1737 |                                                                                       doc_zeta_factor, total_word_count,
1738 |                                                                                       p_vector);
1739 |             assert not numpy.any(numpy.isnan(hessian_p_vector))
1740 | 
1741 |             alpha_value = r_vector_square_old / numpy.sum(p_vector * hessian_p_vector);
1742 |             assert not numpy.isnan(alpha_value), (r_vector_square_old, numpy.sum(p_vector * hessian_p_vector))
1743 | 
1744 |             '''
1745 |             conjugate_gradient_power_index = 0
1746 |             step_alpha = numpy.power(conjugate_gradient_decay_factor, conjugate_gradient_power_index);
1747 |             while numpy.any(delta_doc_nu_square <= -step_alpha * alpha_value * p_vector):
1748 |                 conjugate_gradient_power_index += 1;
1749 |                 step_alpha = numpy.power(conjugate_gradient_decay_factor, conjugate_gradient_power_index);
1750 |                 if conjugate_gradient_power_index>=100:
1751 |                     print "power index larger than 100", delta_doc_nu_square, alpha_value * p_vector
1752 |                     break;
1753 | 
1754 |             delta_doc_nu_square += step_alpha * alpha_value * p_vector;
1755 |             assert not numpy.any(numpy.isnan(delta_doc_nu_square))
1756 |             '''
1757 | 
1758 |             # p_vector /= numpy.sqrt(numpy.sum(p_vector**2));
1759 | 
1760 |             delta_doc_nu_square += alpha_value * p_vector;
1761 |             assert not numpy.any(numpy.isnan(delta_doc_nu_square)), (alpha_value, p_vector)
1762 | 
1763 |             '''
1764 |             if conjugate_gradient_iteration_index % conjugate_gradient_reset_interval==0:
1765 |                 r_vector = -self.first_derivative_nu_square(doc_lambda, doc_nu_square_copy, doc_zeta_factor, total_word_count);
1766 |                 r_vector -= self.hessian_direction_approximation_nu_square(doc_lambda, doc_nu_square_copy, doc_zeta_factor, total_word_count, delta_doc_nu_square);
1767 |             else:
1768 |                 r_vector -= alpha_value * hessian_p_vector;
1769 |             '''
1770 |             r_vector -= alpha_value * hessian_p_vector;
1771 |             assert not numpy.any(numpy.isnan(r_vector))
1772 | 
1773 |             r_vector_square_new = numpy.sum(r_vector ** 2);
1774 |             assert not numpy.isnan(r_vector_square_new);
1775 | 
1776 |             if numpy.sqrt(r_vector_square_new) <= conjugate_gradient_threshold:
1777 |                 break;
1778 | 
1779 |             p_vector *= r_vector_square_new / r_vector_square_old;
1780 |             assert not numpy.any(numpy.isnan(p_vector))
1781 |             p_vector += r_vector;
1782 |             assert not numpy.any(numpy.isnan(p_vector))
1783 | 
1784 |             r_vector_square_old = r_vector_square_new;
1785 | 
1786 |         return delta_doc_nu_square;
1787 | 
1788 |     def function_nu_square(self, doc_lambda, doc_nu_square, doc_zeta_factor, total_word_count):
1789 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
1790 | 
1791 |         exp_over_doc_zeta = scipy.special.logsumexp(
1792 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
1793 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
1794 | 
1795 |         function_nu_square = 0.5 * numpy.sum(numpy.log(doc_nu_square));
1796 | 
1797 |         if self._diagonal_covariance_matrix:
1798 |             function_nu_square += -0.5 * numpy.sum(doc_nu_square / self._alpha_sigma)
1799 |         else:
1800 |             function_nu_square += -0.5 * numpy.sum(doc_nu_square * numpy.diag(self._alpha_sigma_inv));
1801 | 
1802 |         function_nu_square += -total_word_count * numpy.sum(exp_over_doc_zeta);
1803 | 
1804 |         return function_nu_square;
1805 | 
1806 |     def first_derivative_nu_square(self, doc_lambda, doc_nu_square, doc_zeta_factor, total_word_count):
1807 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
1808 | 
1809 |         exp_over_doc_zeta = scipy.special.logsumexp(
1810 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
1811 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
1812 | 
1813 |         if self._diagonal_covariance_matrix:
1814 |             first_derivative_nu_square = -0.5 / self._alpha_sigma;
1815 |         else:
1816 |             first_derivative_nu_square = -0.5 * numpy.diag(self._alpha_sigma_inv);
1817 |         first_derivative_nu_square += 0.5 / doc_nu_square;
1818 |         first_derivative_nu_square -= 0.5 * total_word_count * exp_over_doc_zeta;
1819 | 
1820 |         return first_derivative_nu_square;
1821 | 
1822 |     def second_derivative_nu_square(self, doc_lambda, doc_nu_square, doc_zeta_factor, total_word_count):
1823 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
1824 | 
1825 |         exp_over_doc_zeta = scipy.special.logsumexp(
1826 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
1827 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
1828 | 
1829 |         second_derivative_nu_square = -0.5 / (doc_nu_square ** 2);
1830 |         second_derivative_nu_square += -0.25 * total_word_count * exp_over_doc_zeta;
1831 | 
1832 |         return second_derivative_nu_square
1833 | 
1834 |     def hessian_direction_approximation_nu_square(self, doc_lambda, doc_nu_square, doc_zeta_factor, total_word_count,
1835 |                                                   direction_vector, epsilon=1e-6):
1836 |         assert doc_lambda.shape == (self._number_of_topics,)
1837 |         assert doc_nu_square.shape == (self._number_of_topics,)
1838 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics)
1839 |         assert direction_vector.shape == (self._number_of_topics,)
1840 | 
1841 |         log_exp_over_doc_zeta_a = scipy.special.logsumexp(doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * (
1842 |                     doc_nu_square[:, numpy.newaxis] + direction_vector[:, numpy.newaxis] * epsilon), axis=1)
1843 |         log_exp_over_doc_zeta_b = scipy.special.logsumexp(
1844 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)
1845 | 
1846 |         # hessian_direction_nu_square = total_word_count * numpy.exp(numpy.log(1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a)) - log_exp_over_doc_zeta_b)        
1847 |         hessian_direction_nu_square = total_word_count * numpy.exp(-log_exp_over_doc_zeta_b) * (
1848 |                     1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a))
1849 | 
1850 |         hessian_direction_nu_square += 0.5 / (doc_nu_square + epsilon * direction_vector);
1851 |         hessian_direction_nu_square -= 0.5 / (doc_nu_square)
1852 | 
1853 |         hessian_direction_nu_square /= epsilon
1854 | 
1855 |         return hessian_direction_nu_square
1856 | 
1857 |     def hessian_damping_direction_approximation_nu_square(self, doc_lambda, doc_nu_square, doc_zeta_factor,
1858 |                                                           total_word_count, direction_vector,
1859 |                                                           damping_factor_initialization=0.1,
1860 |                                                           damping_factor_iteration=10):
1861 |         damping_factor_numerator = self.function_nu_square(doc_lambda, doc_nu_square + direction_vector,
1862 |                                                            doc_zeta_factor, total_word_count);
1863 |         damping_factor_numerator -= self.function_nu_square(doc_lambda, doc_nu_square, doc_zeta_factor,
1864 |                                                             total_word_count);
1865 | 
1866 |         hessian_direction_approximation = self.hessian_direction_approximation_nu_square(doc_lambda, doc_nu_square,
1867 |                                                                                          doc_zeta_factor,
1868 |                                                                                          total_word_count,
1869 |                                                                                          direction_vector);
1870 | 
1871 |         damping_factor_denominator_temp = self.first_derivative_nu_square(doc_lambda, doc_nu_square, doc_zeta_factor,
1872 |                                                                           total_word_count);
1873 |         assert damping_factor_denominator_temp.shape == (self._number_of_topics,);
1874 |         damping_factor_denominator_temp += 0.5 * hessian_direction_approximation;
1875 |         assert damping_factor_denominator_temp.shape == (self._number_of_topics,);
1876 | 
1877 |         damping_factor_lambda = damping_factor_initialization;
1878 |         for damping_factor_iteration_index in range(damping_factor_iteration):
1879 |             damping_factor_denominator = damping_factor_denominator_temp + 0.5 * damping_factor_lambda * direction_vector;
1880 |             assert damping_factor_denominator.shape == (self._number_of_topics,);
1881 |             damping_factor_denominator *= direction_vector
1882 |             damping_factor_denominator = numpy.sum(damping_factor_denominator);
1883 | 
1884 |             damping_factor_rho = damping_factor_numerator / damping_factor_denominator
1885 |             if damping_factor_rho < 0.25:
1886 |                 damping_factor_lambda *= 1.5
1887 |             elif damping_factor_rho > 0.75:
1888 |                 damping_factor_lambda /= 1.5
1889 |             else:
1890 |                 return hessian_direction_approximation + damping_factor_lambda * direction_vector;
1891 | 
1892 |         return hessian_direction_approximation
1893 | 
1894 |     #
1895 |     #
1896 |     #
1897 |     #
1898 |     #
1899 | 
1900 |     def hessian_free_nu_square_in_log_space(self,
1901 |                                             doc_lambda,
1902 |                                             doc_nu_square,
1903 |                                             doc_zeta_factor,
1904 |                                             total_word_count,
1905 | 
1906 |                                             hessian_free_iteration=10,
1907 |                                             conjugate_gradient_threshold=1e-9,
1908 |                                             conjugate_gradient_reset_interval=100,
1909 |                                             ):
1910 |         for hessian_free_iteration_index in range(hessian_free_iteration):
1911 |             delta_doc_log_nu_square = self.conjugate_gradient_delta_log_nu_square(doc_lambda, doc_nu_square,
1912 |                                                                                   doc_zeta_factor, total_word_count,
1913 |                                                                                   self._number_of_topics);
1914 | 
1915 |             # print "check point 1", numpy.sum(numpy.exp(delta_doc_log_nu_square)**2), numpy.sum(delta_doc_log_nu_square**2)
1916 |             # print numpy.sum(numpy.exp(delta_doc_log_nu_square)**2), numpy.exp(delta_doc_log_nu_square);
1917 | 
1918 |             # delta_doc_log_nu_square /= numpy.sqrt(numpy.sum(delta_doc_log_nu_square**2));
1919 | 
1920 |             doc_nu_square *= numpy.exp(delta_doc_log_nu_square);
1921 | 
1922 |         return doc_nu_square;
1923 | 
1924 |     '''
1925 |     nu_square must be greater than 0, conjugate gradient does not perform very well on constrained optimization problem
1926 |     update nu_square in log scale, convert the constrained optimization problem to an unconstrained optimization
1927 |     '''
1928 | 
1929 |     def conjugate_gradient_delta_log_nu_square(self,
1930 |                                                doc_lambda,
1931 |                                                doc_nu_square,
1932 |                                                doc_zeta_factor,
1933 |                                                total_word_count,
1934 | 
1935 |                                                conjugate_gradient_iteration=100,
1936 |                                                conjugate_gradient_threshold=1e-9,
1937 |                                                conjugate_gradient_reset_interval=100,
1938 |                                                precondition_hessian_matrix=True,
1939 |                                                ):
1940 |         doc_log_nu_square = numpy.log(doc_nu_square);
1941 |         # delta_doc_log_nu_square = numpy.random.random(self._number_of_topics);
1942 |         delta_doc_log_nu_square = numpy.zeros(self._number_of_topics);
1943 |         # delta_doc_log_nu_square = numpy.log(doc_nu_square);
1944 | 
1945 |         if precondition_hessian_matrix:
1946 |             hessian_log_nu_square = self.second_derivative_log_nu_square(doc_lambda, doc_log_nu_square, doc_zeta_factor,
1947 |                                                                          total_word_count);
1948 |             if not numpy.all(numpy.isfinite(hessian_log_nu_square)):
1949 |                 return numpy.zeros(self._number_of_topics);
1950 |             M_inverse = 1.0 / hessian_log_nu_square;
1951 |         # print numpy.linalg.cond(hessian_log_nu_square), ">>>", numpy.linalg.cond(numpy.dot(numpy.diag(1.0/numpy.diag(hessian_log_nu_square)), hessian_log_nu_square)), ">>>", numpy.linalg.cond(numpy.dot(numpy.linalg.cholesky(hessian_log_nu_square), hessian_log_nu_square));
1952 | 
1953 |         r_vector = -self.first_derivative_log_nu_square(doc_lambda, doc_log_nu_square, doc_zeta_factor,
1954 |                                                         total_word_count);
1955 |         # r_vector -= self.hessian_direction_approximation_log_nu_square(doc_lambda, log_doc_nu_square, doc_zeta_factor, total_word_count, delta_doc_log_nu_square);
1956 |         r_vector -= self.hessian_damping_direction_approximation_log_nu_square(doc_lambda, doc_log_nu_square,
1957 |                                                                                doc_zeta_factor, total_word_count,
1958 |                                                                                delta_doc_log_nu_square);
1959 | 
1960 |         if precondition_hessian_matrix:
1961 |             z_vector = M_inverse * r_vector;
1962 |         else:
1963 |             z_vector = numpy.copy(r_vector);
1964 | 
1965 |         p_vector = numpy.copy(z_vector);
1966 |         r_z_vector_square_old = numpy.sum(r_vector * z_vector);
1967 | 
1968 |         for conjugate_gradient_iteration_index in range(conjugate_gradient_iteration):
1969 |             assert numpy.all(numpy.isfinite(doc_lambda)), (conjugate_gradient_iteration_index, doc_lambda)
1970 |             assert numpy.all(numpy.isfinite(doc_log_nu_square)), (conjugate_gradient_iteration_index, doc_log_nu_square)
1971 |             assert numpy.all(numpy.isfinite(doc_zeta_factor)), (conjugate_gradient_iteration_index, doc_zeta_factor)
1972 |             assert numpy.all(numpy.isfinite(r_vector)), (conjugate_gradient_iteration_index, r_vector, doc_nu_square,
1973 |                                                          -self.first_derivative_log_nu_square(doc_lambda,
1974 |                                                                                               doc_log_nu_square,
1975 |                                                                                               doc_zeta_factor,
1976 |                                                                                               total_word_count),
1977 |                                                          -self.hessian_direction_approximation_log_nu_square(doc_lambda,
1978 |                                                                                                              doc_log_nu_square,
1979 |                                                                                                              doc_zeta_factor,
1980 |                                                                                                              total_word_count,
1981 |                                                                                                              delta_doc_log_nu_square))
1982 |             assert numpy.all(numpy.isfinite(p_vector)), (conjugate_gradient_iteration_index, p_vector)
1983 | 
1984 |             # hessian_p_vector = self.hessian_direction_approximation_log_nu_square(doc_lambda, log_doc_nu_square, doc_zeta_factor, total_word_count, p_vector);
1985 |             hessian_p_vector = self.hessian_damping_direction_approximation_log_nu_square(doc_lambda, doc_log_nu_square,
1986 |                                                                                           doc_zeta_factor,
1987 |                                                                                           total_word_count, p_vector);
1988 | 
1989 |             alpha_value = r_z_vector_square_old / numpy.sum(p_vector * hessian_p_vector);
1990 | 
1991 |             delta_doc_log_nu_square += alpha_value * p_vector;
1992 |             assert not numpy.any(numpy.isnan(delta_doc_log_nu_square))
1993 | 
1994 |             '''
1995 |             if conjugate_gradient_iteration_index % conjugate_gradient_reset_interval==0:
1996 |                 r_vector = -self.first_derivative_log_nu_square(doc_lambda, log_doc_nu_square, doc_zeta_factor, total_word_count);
1997 |                 r_vector -= self.hessian_direction_approximation_log_nu_square(doc_lambda, log_doc_nu_square, doc_zeta_factor, total_word_count, delta_doc_log_nu_square);
1998 |             else:
1999 |                 r_vector -= alpha_value * hessian_p_vector;
2000 |             '''
2001 |             r_vector -= alpha_value * hessian_p_vector;
2002 |             assert not numpy.any(numpy.isnan(r_vector)), (alpha_value, hessian_p_vector, r_vector)
2003 | 
2004 |             if numpy.sqrt(numpy.sum(r_vector ** 2)) <= conjugate_gradient_threshold:
2005 |                 break;
2006 | 
2007 |             if precondition_hessian_matrix:
2008 |                 z_vector = M_inverse * r_vector;
2009 |             else:
2010 |                 z_vector = numpy.copy(r_vector);
2011 | 
2012 |             r_z_vector_square_new = numpy.sum(r_vector * z_vector);
2013 | 
2014 |             p_vector *= r_z_vector_square_new / r_z_vector_square_old;
2015 |             assert not numpy.any(numpy.isnan(p_vector))
2016 | 
2017 |             p_vector += z_vector;
2018 |             assert not numpy.any(numpy.isnan(p_vector))
2019 | 
2020 |             r_z_vector_square_old = r_z_vector_square_new;
2021 | 
2022 |         return delta_doc_log_nu_square;
2023 | 
2024 |     def function_log_nu_square(self, doc_lambda, doc_log_nu_square, doc_zeta_factor, total_word_count):
2025 |         return self.function_nu_square(doc_lambda, numpy.exp(doc_log_nu_square), doc_zeta_factor, total_word_count);
2026 | 
2027 |     def first_derivative_log_nu_square(self, doc_lambda, doc_log_nu_square, doc_zeta_factor, total_word_count):
2028 |         assert doc_log_nu_square.shape == (self._number_of_topics,)
2029 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
2030 | 
2031 |         exp_doc_log_nu_square = numpy.exp(doc_log_nu_square);
2032 | 
2033 |         exp_over_doc_zeta = scipy.special.logsumexp(
2034 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_doc_log_nu_square[:, numpy.newaxis], axis=1)
2035 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
2036 | 
2037 |         if self._diagonal_covariance_matrix:
2038 |             first_derivative_log_nu_square = -0.5 * exp_doc_log_nu_square / self._alpha_sigma;
2039 |         else:
2040 |             first_derivative_log_nu_square = -0.5 * exp_doc_log_nu_square * numpy.diag(self._alpha_sigma_inv);
2041 |         first_derivative_log_nu_square += 0.5
2042 |         first_derivative_log_nu_square -= 0.5 * total_word_count * exp_over_doc_zeta * exp_doc_log_nu_square;
2043 | 
2044 |         return first_derivative_log_nu_square;
2045 | 
2046 |     def second_derivative_log_nu_square(self, doc_lambda, doc_log_nu_square, doc_zeta_factor, total_word_count):
2047 |         assert doc_log_nu_square.shape == (self._number_of_topics,)
2048 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics);
2049 | 
2050 |         exp_doc_log_nu_square = numpy.exp(doc_log_nu_square);
2051 | 
2052 |         exp_over_doc_zeta = scipy.special.logsumexp(
2053 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_doc_log_nu_square[:, numpy.newaxis], axis=1)
2054 |         exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta);
2055 | 
2056 |         if self._diagonal_covariance_matrix:
2057 |             second_derivative_log_nu_square = -0.5 * exp_doc_log_nu_square / self._alpha_sigma;
2058 |         else:
2059 |             second_derivative_log_nu_square = -0.5 * exp_doc_log_nu_square * numpy.diag(self._alpha_sigma_inv);
2060 |         second_derivative_log_nu_square -= 0.5 * total_word_count * exp_over_doc_zeta * exp_doc_log_nu_square * (
2061 |                     1 + 0.5 * exp_doc_log_nu_square);
2062 | 
2063 |         return second_derivative_log_nu_square;
2064 | 
2065 |     def hessian_direction_approximation_log_nu_square(self, doc_lambda, doc_log_nu_square, doc_zeta_factor,
2066 |                                                       total_word_count, direction_vector, epsilon=1e-6):
2067 |         assert doc_lambda.shape == (self._number_of_topics,)
2068 |         assert doc_log_nu_square.shape == (self._number_of_topics,)
2069 |         assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics)
2070 |         assert direction_vector.shape == (self._number_of_topics,)
2071 | 
2072 |         exp_doc_log_nu_square = numpy.exp(doc_log_nu_square);
2073 |         exp_doc_log_nu_square_epsilon_direction = numpy.exp(doc_log_nu_square + direction_vector * epsilon);
2074 | 
2075 |         log_exp_over_doc_zeta_epsilon_direction = scipy.special.logsumexp(
2076 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_doc_log_nu_square_epsilon_direction[:,
2077 |                                                                    numpy.newaxis], axis=1)
2078 |         log_exp_over_doc_zeta = scipy.special.logsumexp(
2079 |             doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_doc_log_nu_square[:, numpy.newaxis], axis=1)
2080 | 
2081 |         # hessian_direction_log_nu_square = 0.5 * total_word_count * numpy.exp(log_doc_nu_square - log_exp_over_doc_zeta)
2082 |         # hessian_direction_log_nu_square += - 0.5 * total_word_count * numpy.exp(log_doc_nu_square + direction_vector * epsilon - log_exp_over_doc_zeta_epsilon_direction)
2083 | 
2084 |         hessian_direction_log_nu_square = 1 - numpy.exp(
2085 |             direction_vector * epsilon - log_exp_over_doc_zeta_epsilon_direction + log_exp_over_doc_zeta)
2086 |         hessian_direction_log_nu_square *= 0.5 * total_word_count * numpy.exp(doc_log_nu_square - log_exp_over_doc_zeta)
2087 | 
2088 |         if self._diagonal_covariance_matrix:
2089 |             hessian_direction_log_nu_square += 0.5 * (
2090 |                         exp_doc_log_nu_square - exp_doc_log_nu_square_epsilon_direction) / self._alpha_sigma;
2091 |         else:
2092 |             hessian_direction_log_nu_square += 0.5 * (
2093 |                         exp_doc_log_nu_square - exp_doc_log_nu_square_epsilon_direction) * numpy.diag(
2094 |                 self._alpha_sigma_inv);
2095 | 
2096 |         hessian_direction_log_nu_square /= epsilon
2097 | 
2098 |         return hessian_direction_log_nu_square
2099 | 
2100 |     def hessian_damping_direction_approximation_log_nu_square(self, doc_lambda, doc_log_nu_square, doc_zeta_factor,
2101 |                                                               total_word_count, direction_vector,
2102 |                                                               damping_factor_initialization=0.1,
2103 |                                                               damping_factor_iteration=10):
2104 |         # print "=========="
2105 |         # print log_doc_nu_square + direction_vector, numpy.exp(log_doc_nu_square + direction_vector)
2106 |         # print log_doc_nu_square, numpy.exp(log_doc_nu_square);
2107 | 
2108 |         damping_factor_numerator = self.function_log_nu_square(doc_lambda, doc_log_nu_square + direction_vector,
2109 |                                                                doc_zeta_factor, total_word_count);
2110 |         damping_factor_numerator -= self.function_log_nu_square(doc_lambda, doc_log_nu_square, doc_zeta_factor,
2111 |                                                                 total_word_count);
2112 | 
2113 |         hessian_direction_approximation = self.hessian_direction_approximation_log_nu_square(doc_lambda,
2114 |                                                                                              doc_log_nu_square,
2115 |                                                                                              doc_zeta_factor,
2116 |                                                                                              total_word_count,
2117 |                                                                                              direction_vector);
2118 | 
2119 |         damping_factor_denominator_temp = self.first_derivative_log_nu_square(doc_lambda, doc_log_nu_square,
2120 |                                                                               doc_zeta_factor, total_word_count);
2121 |         assert damping_factor_denominator_temp.shape == (self._number_of_topics,);
2122 |         damping_factor_denominator_temp += 0.5 * hessian_direction_approximation;
2123 |         assert damping_factor_denominator_temp.shape == (self._number_of_topics,);
2124 | 
2125 |         damping_factor_lambda = damping_factor_initialization;
2126 |         for damping_factor_iteration_index in range(damping_factor_iteration):
2127 |             damping_factor_denominator = damping_factor_denominator_temp + 0.5 * damping_factor_lambda * direction_vector;
2128 |             assert damping_factor_denominator.shape == (self._number_of_topics,);
2129 |             damping_factor_denominator *= direction_vector
2130 |             damping_factor_denominator = numpy.sum(damping_factor_denominator);
2131 | 
2132 |             # print "check point 2", damping_factor_numerator, damping_factor_denominator
2133 |             damping_factor_rho = damping_factor_numerator / damping_factor_denominator
2134 |             if damping_factor_rho < 0.25:
2135 |                 damping_factor_lambda *= 1.5
2136 |             elif damping_factor_rho > 0.75:
2137 |                 damping_factor_lambda /= 1.5
2138 |             else:
2139 |                 return hessian_direction_approximation + damping_factor_lambda * direction_vector;
2140 | 
2141 |         # print damping_factor_numerator, damping_factor_denominator, damping_factor_lambda
2142 |         # print "check point 1", hessian_direction_approximation, hessian_direction_approximation + damping_factor_lambda * direction_vector
2143 | 
2144 |         damping_factor_lambda = damping_factor_initialization;
2145 | 
2146 |         return hessian_direction_approximation + damping_factor_lambda * direction_vector;
2147 | 
2148 |     #
2149 |     #
2150 |     #
2151 |     #
2152 |     #
2153 | 
2154 | 
2155 | if __name__ == "__main__":
2156 |     print("not implemented...")
2157 | 


--------------------------------------------------------------------------------