├── __init__.py ├── pnas-abstract.tar.gz ├── .gitignore ├── README.md ├── launch_profiler.py ├── inferencer.py ├── launch_test.py ├── launch_resume.py ├── launch_train.py └── variational_bayes.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pnas-abstract.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzhai/PyCTM/HEAD/pnas-abstract.tar.gz -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | PyCTM 2 | ========== 3 | 4 | PyCTM is a Correlated Topic Modeling package, please download the latest version from our [GitHub repository](https://github.com/kzhai/PyCTM). 5 | 6 | Please send any bugs of problems to Ke Zhai (kzhai@umd.edu). 7 | 8 | Install and Build 9 | ---------- 10 | 11 | This package depends on many external python libraries, such as numpy, scipy and nltk. 12 | 13 | Launch and Execute 14 | ---------- 15 | 16 | Assume the PyCTM package is downloaded under directory ```$PROJECT_SPACE/src/```, i.e., 17 | 18 | $PROJECT_SPACE/src/PyCTM 19 | 20 | To prepare the example dataset, 21 | 22 | tar zxvf pnas-abstract.tar.gz 23 | 24 | To launch PyCTM, first redirect to the directory of PyCTM source code, 25 | 26 | cd $PROJECT_SPACE/src/PyCTM 27 | 28 | and run the following command on example dataset, 29 | 30 | python -m launch_train --input_directory=./pnas-abstract --output_directory=./ --number_of_topics=10 --training_iterations=50 31 | 32 | The generic argument to run PyCTM is 33 | 34 | python -m launch_train --input_directory=$INPUT_DIRECTORY/$CORPUS_NAME --output_directory=$OUTPUT_DIRECTORY --number_of_topics=$NUMBER_OF_TOPICS --training_iterations=$NUMBER_OF_ITERATIONS 35 | 36 | You should be able to find the output at directory ```$OUTPUT_DIRECTORY/$CORPUS_NAME```. 37 | 38 | Under any circumstances, you may also get help information and usage hints by running the following command 39 | 40 | python -m launch_train --help 41 | -------------------------------------------------------------------------------- /launch_profiler.py: -------------------------------------------------------------------------------- 1 | import pickle, string, numpy, getopt, sys, random, time, re, pprint 2 | import datetime, os; 3 | 4 | import nltk; 5 | import numpy; 6 | import cProfile 7 | 8 | 9 | def main(): 10 | # parameter set 1 11 | input_directory = "./nips-abstract" 12 | 13 | input_directory = input_directory.rstrip("/"); 14 | # corpus_name = os.path.basename(input_directory); 15 | 16 | ''' 17 | output_directory = options.output_directory; 18 | if not os.path.exists(output_directory): 19 | os.mkdir(output_directory); 20 | output_directory = os.path.join(output_directory, corpus_name); 21 | if not os.path.exists(output_directory): 22 | os.mkdir(output_directory); 23 | ''' 24 | 25 | # Document 26 | train_docs_path = os.path.join(input_directory, 'train.dat') 27 | input_doc_stream = open(train_docs_path, 'r'); 28 | train_docs = []; 29 | for line in input_doc_stream: 30 | train_docs.append(line.strip().lower()); 31 | print("successfully load all training docs from %s..." % (os.path.abspath(train_docs_path))); 32 | 33 | # Vocabulary 34 | vocabulary_path = os.path.join(input_directory, 'voc.dat'); 35 | input_voc_stream = open(vocabulary_path, 'r'); 36 | vocab = []; 37 | for line in input_voc_stream: 38 | vocab.append(line.strip().lower().split()[0]); 39 | vocab = list(set(vocab)); 40 | print("successfully load all the words from %s..." % (os.path.abspath(vocabulary_path))); 41 | 42 | # parameter 2 43 | number_of_topics = 10; 44 | alpha_mu = 0; 45 | alpha_sigma = 1; 46 | alpha_beta = 1.0 / len(vocab); 47 | 48 | # parameter set 3 49 | training_iterations = 1; 50 | 51 | import variational_bayes 52 | ctm_inferencer = variational_bayes.VariationalBayes(); 53 | 54 | ctm_inferencer._initialize(train_docs, vocab, number_of_topics, alpha_mu, alpha_sigma, alpha_beta); 55 | 56 | for iteration in range(training_iterations): 57 | clock = time.time(); 58 | log_likelihood = ctm_inferencer.learning(); 59 | clock = time.time() - clock; 60 | 61 | # print 'training iteration %d finished in %f seconds: number-of-topics = %d, log-likelihood = %f' % (hdp._iteration_counter, clock, hdp._K, log_likelihood); 62 | 63 | # gamma_path = os.path.join(output_directory, 'gamma.txt'); 64 | # numpy.savetxt(gamma_path, hdp._document_topic_distribution); 65 | 66 | # topic_inactive_counts_path = os.path.join(output_directory, "topic_inactive_counts.txt"); 67 | # numpy.savetxt(topic_inactive_counts_path, hdp._topic_inactive_counts); 68 | 69 | 70 | if __name__ == '__main__': 71 | main() 72 | -------------------------------------------------------------------------------- /inferencer.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author: Ke Zhai (zhaike@cs.umd.edu) 3 | """ 4 | 5 | import time 6 | import numpy 7 | import scipy 8 | import nltk; 9 | 10 | def compute_dirichlet_expectation(dirichlet_parameter): 11 | if (len(dirichlet_parameter.shape) == 1): 12 | return scipy.special.psi(dirichlet_parameter) - scipy.special.psi(numpy.sum(dirichlet_parameter)) 13 | return scipy.special.psi(dirichlet_parameter) - scipy.special.psi(numpy.sum(dirichlet_parameter, 1))[:, numpy.newaxis] 14 | 15 | def parse_vocabulary(vocab): 16 | type_to_index = {}; 17 | index_to_type = {}; 18 | for word in set(vocab): 19 | index_to_type[len(index_to_type)] = word; 20 | type_to_index[word] = len(type_to_index); 21 | 22 | return type_to_index, index_to_type; 23 | 24 | class Inferencer(): 25 | """ 26 | """ 27 | def __init__(self, 28 | hyper_parameter_optimize_interval=10, 29 | ): 30 | 31 | self._hyper_parameter_optimize_interval = hyper_parameter_optimize_interval; 32 | # assert(self._hyper_parameter_optimize_interval>0); 33 | 34 | # self._local_parameter_iterations = local_parameter_iterations 35 | # assert(self._local_maximum_iteration>0) 36 | 37 | """ 38 | """ 39 | def _initialize(self, vocab, number_of_topics, alpha_mu, alpha_sigma, alpha_beta): 40 | self.parse_vocabulary(vocab); 41 | 42 | # initialize the size of the vocabulary, i.e. total number of distinct tokens. 43 | self._number_of_types = len(self._type_to_index) 44 | 45 | self._counter = 0; 46 | 47 | # initialize the total number of topics. 48 | self._number_of_topics = number_of_topics 49 | 50 | # initialize a K-dimensional vector, valued at 1/K. 51 | if self._diagonal_covariance_matrix: 52 | self._alpha_mu = numpy.zeros(self._number_of_topics) + alpha_mu; 53 | self._alpha_sigma = numpy.zeros(self._number_of_topics) + alpha_sigma; 54 | else: 55 | self._alpha_mu = numpy.zeros((1, self._number_of_topics)) + alpha_mu; 56 | self._alpha_sigma = numpy.eye(self._number_of_topics) * alpha_sigma; 57 | self._alpha_sigma_inv = numpy.linalg.pinv(self._alpha_sigma); 58 | 59 | self._alpha_beta = numpy.zeros(self._number_of_types) + alpha_beta; 60 | 61 | def parse_vocabulary(self, vocab): 62 | self._type_to_index = {}; 63 | self._index_to_type = {}; 64 | for word in set(vocab): 65 | self._index_to_type[len(self._index_to_type)] = word; 66 | self._type_to_index[word] = len(self._type_to_index); 67 | 68 | self._vocab = list(self._type_to_index.keys()); 69 | 70 | def parse_data(self): 71 | raise NotImplementedError; 72 | 73 | """ 74 | """ 75 | def learning(self): 76 | raise NotImplementedError; 77 | 78 | """ 79 | """ 80 | def inference(self): 81 | raise NotImplementedError; 82 | 83 | def export_beta(self, exp_beta_path, top_display=-1): 84 | raise NotImplementedError; 85 | 86 | if __name__ == "__main__": 87 | raise NotImplementedError; 88 | -------------------------------------------------------------------------------- /launch_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import pickle, getopt, sys, time, re 3 | import datetime, os; 4 | 5 | import scipy.io; 6 | import nltk; 7 | import numpy; 8 | import optparse; 9 | 10 | def parse_args(): 11 | parser = optparse.OptionParser() 12 | parser.set_defaults(# parameter set 1 13 | input_directory=None, 14 | model_directory=None, 15 | snapshot_index=-1, 16 | ) 17 | # parameter set 1 18 | parser.add_option("--input_directory", type="string", dest="input_directory", 19 | help="input directory [None]"); 20 | parser.add_option("--model_directory", type="string", dest="model_directory", 21 | help="model directory [None]"); 22 | parser.add_option("--snapshot_index", type="int", dest="snapshot_index", 23 | help="snapshot index [-: evaluate on all available snapshots]"); 24 | 25 | (options, args) = parser.parse_args(); 26 | return options; 27 | 28 | def main(): 29 | options = parse_args(); 30 | 31 | # parameter set 1 32 | # assert(options.input_corpus_name!=None); 33 | assert(options.input_directory != None); 34 | assert(options.model_directory != None); 35 | 36 | input_directory = options.input_directory; 37 | input_directory = input_directory.rstrip("/"); 38 | input_corpus_name = os.path.basename(input_directory); 39 | 40 | model_directory = options.model_directory; 41 | model_directory = model_directory.rstrip("/"); 42 | if not os.path.exists(model_directory): 43 | sys.stderr.write("error: model directory %s does not exist...\n" % (os.path.abspath(model_directory))); 44 | return; 45 | corpus_directory = os.path.split(os.path.abspath(model_directory))[0]; 46 | model_corpus_name = os.path.split(os.path.abspath(corpus_directory))[1] 47 | if input_corpus_name != model_corpus_name: 48 | sys.stderr.write("error: corpus name does not match for input (%s) and model (%s)...\n" % (input_corpus_name, model_corpus_name)); 49 | return; 50 | 51 | snapshot_index = options.snapshot_index; 52 | 53 | print("========== ========== ========== ========== ==========") 54 | # parameter set 1 55 | print("model_directory=" + model_directory) 56 | print("input_directory=" + input_directory) 57 | print("corpus_name=" + input_corpus_name) 58 | print("snapshot_index=" + str(snapshot_index)); 59 | print("========== ========== ========== ========== ==========") 60 | 61 | # Document 62 | test_docs_path = os.path.join(input_directory, 'test.dat') 63 | input_doc_stream = open(test_docs_path, 'r'); 64 | test_docs = []; 65 | for line in input_doc_stream: 66 | test_docs.append(line.strip().lower()); 67 | print("successfully load all testing docs from %s..." % (os.path.abspath(test_docs_path))); 68 | 69 | if snapshot_index >= 0: 70 | input_snapshot_path = os.path.join(model_directory, ("model-%d" % (snapshot_index))) 71 | if not os.path.exists(input_snapshot_path): 72 | sys.stderr.write("error: model snapshot %s does not exist...\n" % (os.path.abspath(input_snapshot_path))); 73 | return; 74 | 75 | output_lambda_path = os.path.join(model_directory, "test-lambda-%d" % snapshot_index); 76 | output_nu_square_path = os.path.join(model_directory, "test-nu_square-%d" % snapshot_index); 77 | 78 | evaluate_snapshot(input_snapshot_path, test_docs, output_lambda_path, output_nu_square_path) 79 | else: 80 | for model_snapshot in os.listdir(model_directory): 81 | if not model_snapshot.startswith("model-"): 82 | continue; 83 | 84 | snapshot_index = int(model_snapshot.split("-")[-1]); 85 | 86 | input_snapshot_path = os.path.join(model_directory, model_snapshot); 87 | output_lambda_path = os.path.join(model_directory, "test-lambda-%d" % snapshot_index); 88 | output_nu_square_path = os.path.join(model_directory, "test-nu_square-%d" % snapshot_index); 89 | 90 | evaluate_snapshot(input_snapshot_path, test_docs, output_lambda_path, output_nu_square_path) 91 | 92 | def evaluate_snapshot(input_snapshot_path, test_docs, output_lambda_path, output_nu_square_path): 93 | # import hybrid, monte_carlo, variational_bayes; 94 | lda_inferencer = pickle.load(open(input_snapshot_path, "rb")); 95 | # print 'successfully load model snapshot %s...' % (os.path.abspath(input_snapshot_path)); 96 | 97 | log_likelihood, lambda_values, nu_square_values = lda_inferencer.inference(test_docs); 98 | print("held-out likelihood of snapshot %s is %g" % (os.path.abspath(input_snapshot_path), log_likelihood)); 99 | numpy.savetxt(output_lambda_path, lambda_values); 100 | numpy.savetxt(output_nu_square_path, nu_square_values); 101 | 102 | if __name__ == '__main__': 103 | main() 104 | -------------------------------------------------------------------------------- /launch_resume.py: -------------------------------------------------------------------------------- 1 | import pickle; 2 | import optparse 3 | import string, numpy, getopt, sys, random, time, re, pprint 4 | import datetime, os; 5 | 6 | import numpy; 7 | import shutil 8 | 9 | # model_settings_pattern = re.compile('\d+-\d+-ctm_inferencer-I(?P\d+)-S(?P\d+)-aa(?P[\d\.]+)(-smh(?P[\d]+))?(-sp(?P[\d]+)-mp(?P[\d]+))?'); 10 | model_settings_pattern = re.compile('\d+-\d+-ctm-I(?P\d+)-S(?P\d+)-K(?P\d+)-am(?P[\d\.]+)-as(?P[\d\.]+)-ab(?P[\d\.]+)'); 11 | 12 | def parse_args(): 13 | parser = optparse.OptionParser() 14 | parser.set_defaults(# parameter set 1 15 | # input_file=None, 16 | model_directory=None, 17 | snapshot_index=-1, 18 | 19 | # parameter set 2 20 | output_directory=None, 21 | training_iterations=-1, 22 | snapshot_interval=-1, 23 | ) 24 | # parameter set 1 25 | # parser.add_option("--input_file", type="string", dest="input_file", 26 | # help="input directory [None]"); 27 | # parser.add_option("--input_directory", type="string", dest="input_directory", 28 | # help="input directory [None]"); 29 | parser.add_option("--model_directory", type="string", dest="model_directory", 30 | help="model directory [None]"); 31 | parser.add_option("--snapshot_index", type="int", dest="snapshot_index", 32 | help="snapshot index [-1]"); 33 | # parser.add_option("--training_iterations", type="int", dest="training_iterations", 34 | # help="number of training iterations [1000]"); 35 | # parser.add_option("--dataset_name", type="string", dest="dataset_name", 36 | # help="the corpus name [None]"); 37 | 38 | # parameter set 2 39 | parser.add_option("--output_directory", type="string", dest="output_directory", 40 | help="output directory [None]"); 41 | # parser.add_option("--alpha_alpha", type="float", dest="alpha_alpha", 42 | # help="hyper-parameter for Dirichlet process of cluster [1]") 43 | # parser.add_option("--alpha_kappa", type="float", dest="alpha_kappa", 44 | # help="hyper-parameter for top level Dirichlet process of distribution over topics [1]") 45 | # parser.add_option("--alpha_nu", type="float", dest="alpha_nu", 46 | # help="hyper-parameter for bottom level Dirichlet process of distribution over topics [1]") 47 | parser.add_option("--training_iterations", type="int", dest="training_iterations", 48 | help="number of training iterations [-1]"); 49 | parser.add_option("--snapshot_interval", type="int", dest="snapshot_interval", 50 | help="snapshot interval [-1 (default): remain unchanged]"); 51 | 52 | (options, args) = parser.parse_args(); 53 | return options; 54 | 55 | def main(): 56 | options = parse_args(); 57 | 58 | assert(options.model_directory != None); 59 | model_directory = options.model_directory; 60 | 61 | if not os.path.exists(model_directory): 62 | sys.stderr.write("model directory %s not exists...\n" % (model_directory)); 63 | return; 64 | model_directory = model_directory.rstrip("/"); 65 | model_settings = os.path.basename(model_directory); 66 | 67 | assert options.snapshot_index > 0 68 | snapshot_index = options.snapshot_index; 69 | 70 | # load the existing model 71 | model_snapshot_file_path = os.path.join(model_directory, "model-%d" % snapshot_index); 72 | if not os.path.exists(model_snapshot_file_path): 73 | sys.stderr.write("error: model snapshot file unfound %s...\n" % (model_snapshot_file_path)); 74 | return; 75 | 76 | ctm_inferencer = pickle.load(open(model_snapshot_file_path, "rb")); 77 | print('successfully load model snapshot %s...' % (os.path.join(model_directory, "model-%d" % snapshot_index))); 78 | 79 | # set the resume options 80 | matches = re.match(model_settings_pattern, model_settings); 81 | 82 | # training_iterations = int(matches.group('iteration')); 83 | training_iterations = options.training_iterations; 84 | assert training_iterations > snapshot_index; 85 | if options.snapshot_interval == -1: 86 | snapshot_interval = int(matches.group('snapshot')); 87 | else: 88 | snapshot_interval = options.snapshot_interval; 89 | number_of_topics = int(matches.group('topic')); 90 | alpha_mu = float(matches.group('alpha_mu')); 91 | alpha_sigma = float(matches.group('alpha_sigma')); 92 | alpha_beta = float(matches.group('alpha_beta')); 93 | 94 | now = datetime.datetime.now(); 95 | suffix = now.strftime("%y%m%d-%H%M%S") + ""; 96 | suffix += "-%s" % ("ctm"); 97 | suffix += "-I%d" % (training_iterations); 98 | suffix += "-S%d" % (snapshot_interval); 99 | suffix += "-K%g" % (number_of_topics); 100 | suffix += "-am%g" % (alpha_mu); 101 | suffix += "-as%g" % (alpha_sigma); 102 | suffix += "-ab%g" % (alpha_beta); 103 | 104 | assert options.output_directory != None; 105 | output_directory = options.output_directory; 106 | output_directory = output_directory.rstrip("/"); 107 | output_directory = os.path.join(output_directory, suffix); 108 | assert (not os.path.exists(os.path.abspath(output_directory))); 109 | os.mkdir(os.path.abspath(output_directory)); 110 | 111 | shutil.copy(model_snapshot_file_path, os.path.join(output_directory, "model-" + str(snapshot_index))); 112 | shutil.copy(model_snapshot_file_path, os.path.join(output_directory, "exp_beta-" + str(snapshot_index))); 113 | 114 | for iteration in range(snapshot_index, training_iterations): 115 | # clock = time.time(); 116 | log_likelihood = ctm_inferencer.learning(); 117 | # clock = time.time()-clock; 118 | # print 'training iteration %d finished in %f seconds: number-of-clusters = %d, log-likelihood = %f' % (dpgm._iteration_counter, clock, dpgm._K, log_likelihood); 119 | 120 | if ((ctm_inferencer._counter) % snapshot_interval == 0): 121 | ctm_inferencer.export_beta(os.path.join(output_directory, 'exp_beta-' + str(ctm_inferencer._counter))); 122 | model_snapshot_path = os.path.join(output_directory, 'model-' + str(ctm_inferencer._counter)); 123 | pickle.dump(ctm_inferencer, open(model_snapshot_path, 'wb')); 124 | 125 | model_snapshot_path = os.path.join(output_directory, 'model-' + str(ctm_inferencer._counter)); 126 | pickle.dump(ctm_inferencer, open(model_snapshot_path, 'wb')); 127 | 128 | if __name__ == '__main__': 129 | main() 130 | -------------------------------------------------------------------------------- /launch_train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import pickle, getopt, sys, time, re 3 | import datetime, os; 4 | 5 | import scipy.io; 6 | import nltk; 7 | import numpy; 8 | import optparse; 9 | 10 | def parse_args(): 11 | parser = optparse.OptionParser() 12 | parser.set_defaults(# parameter set 1 13 | input_directory=None, 14 | output_directory=None, 15 | # dictionary=None, 16 | 17 | # parameter set 2 18 | training_iterations=-1, 19 | snapshot_interval=10, 20 | number_of_topics=-1, 21 | 22 | # parameter set 3 23 | alpha_mu=0., 24 | alpha_sigma=1, 25 | alpha_beta=-1, 26 | 27 | # parameter set 4 28 | optimization_method=None, 29 | number_of_processes=1, 30 | diagonal_covariance_matrix=False, 31 | # inference_mode=-1, 32 | ) 33 | # parameter set 1 34 | parser.add_option("--input_directory", type="string", dest="input_directory", 35 | help="input directory [None]"); 36 | parser.add_option("--output_directory", type="string", dest="output_directory", 37 | help="output directory [None]"); 38 | # parser.add_option("--corpus_name", type="string", dest="corpus_name", 39 | # help="the corpus name [None]") 40 | # parser.add_option("--dictionary", type="string", dest="dictionary", 41 | # help="the dictionary file [None]") 42 | 43 | # parameter set 2 44 | parser.add_option("--number_of_topics", type="int", dest="number_of_topics", 45 | help="total number of topics [-1]"); 46 | parser.add_option("--training_iterations", type="int", dest="training_iterations", 47 | help="total number of iterations [-1]"); 48 | parser.add_option("--snapshot_interval", type="int", dest="snapshot_interval", 49 | help="snapshot interval [10]"); 50 | 51 | # parameter set 3 52 | parser.add_option("--alpha_mu", type="float", dest="alpha_mu", 53 | help="hyper-parameter for logistic normal distribution of topic [0.0]") 54 | parser.add_option("--alpha_sigma", type="float", dest="alpha_sigma", 55 | help="hyper-parameter for logistic normal distribution of topic [1.0]") 56 | parser.add_option("--alpha_beta", type="float", dest="alpha_beta", 57 | help="hyper-parameter for Dirichlet distribution of vocabulary [1.0/number_of_types]") 58 | 59 | # parameter set 4 60 | parser.add_option("--optimization_method", type="string", dest="optimization_method", 61 | help="optimization method for logistic normal distribution"); 62 | parser.add_option("--number_of_processes", type="int", dest="number_of_processes", 63 | help="number of processes [1]") 64 | 65 | # parser.add_option("--diagonal_covariance_matrix", action="store_true", dest="diagonal_covariance_matrix", 66 | # help="diagonal covariance matrix"); 67 | # parser.add_option("--inference_mode", type="int", dest="inference_mode", 68 | # help="inference mode [ " + 69 | # "0: hybrid inference, " + 70 | # "1: monte carlo, " + 71 | # "2: variational bayes " + 72 | # "]"); 73 | # parser.add_option("--inference_mode", action="store_true", dest="inference_mode", 74 | # help="run latent Dirichlet allocation in lda mode"); 75 | 76 | (options, args) = parser.parse_args(); 77 | return options; 78 | 79 | def main(): 80 | options = parse_args(); 81 | 82 | # parameter set 2 83 | assert(options.number_of_topics > 0); 84 | number_of_topics = options.number_of_topics; 85 | assert(options.training_iterations > 0); 86 | training_iterations = options.training_iterations; 87 | assert(options.snapshot_interval > 0); 88 | if options.snapshot_interval > 0: 89 | snapshot_interval = options.snapshot_interval; 90 | 91 | # parameter set 4 92 | optimization_method = options.optimization_method; 93 | if optimization_method == None: 94 | optimization_method = "L-BFGS-B"; 95 | number_of_processes = options.number_of_processes; 96 | if number_of_processes <= 0: 97 | sys.stderr.write("invalid setting for number_of_processes, adjust to 1...\n"); 98 | number_of_processes = 1; 99 | # diagonal_covariance_matrix = options.diagonal_covariance_matrix; 100 | 101 | # parameter set 1 102 | # assert(options.corpus_name!=None); 103 | assert(options.input_directory != None); 104 | assert(options.output_directory != None); 105 | 106 | input_directory = options.input_directory; 107 | input_directory = input_directory.rstrip("/"); 108 | corpus_name = os.path.basename(input_directory); 109 | 110 | output_directory = options.output_directory; 111 | if not os.path.exists(output_directory): 112 | os.mkdir(output_directory); 113 | output_directory = os.path.join(output_directory, corpus_name); 114 | if not os.path.exists(output_directory): 115 | os.mkdir(output_directory); 116 | 117 | # Document 118 | train_docs_path = os.path.join(input_directory, 'train.dat') 119 | input_doc_stream = open(train_docs_path, 'r'); 120 | train_docs = []; 121 | for line in input_doc_stream: 122 | train_docs.append(line.strip().lower()); 123 | print("successfully load all training docs from %s..." % (os.path.abspath(train_docs_path))); 124 | 125 | # Vocabulary 126 | vocabulary_path = os.path.join(input_directory, 'voc.dat'); 127 | input_voc_stream = open(vocabulary_path, 'r'); 128 | vocab = []; 129 | for line in input_voc_stream: 130 | vocab.append(line.strip().lower().split()[0]); 131 | vocab = list(set(vocab)); 132 | print("successfully load all the words from %s..." % (os.path.abspath(vocabulary_path))); 133 | 134 | # parameter set 3 135 | alpha_mu = options.alpha_mu; 136 | # assert(options.alpha_sigma>0); 137 | alpha_sigma = options.alpha_sigma; 138 | if alpha_sigma <= 0: 139 | # alpha_sigma = 1.0/number_of_topics; 140 | alpha_sigma = 1.0 141 | assert(alpha_sigma > 0); 142 | alpha_beta = options.alpha_beta; 143 | if alpha_beta <= 0: 144 | alpha_beta = 1.0 / len(vocab); 145 | 146 | # create output directory 147 | now = datetime.datetime.now(); 148 | suffix = now.strftime("%y%m%d-%H%M%S") + ""; 149 | suffix += "-%s" % ("ctm"); 150 | suffix += "-I%d" % (training_iterations); 151 | suffix += "-S%d" % (snapshot_interval); 152 | suffix += "-K%d" % (number_of_topics); 153 | suffix += "-am%g" % (alpha_mu); 154 | suffix += "-as%g" % (alpha_sigma); 155 | suffix += "-ab%g" % (alpha_beta); 156 | if optimization_method != None: 157 | suffix += "-%s" % (optimization_method.replace("-", "_")); 158 | # suffix += "-DCM%s" % (diagonal_covariance_matrix); 159 | # suffix += "-%s" % (resample_topics); 160 | # suffix += "-%s" % (hash_oov_words); 161 | suffix += "/"; 162 | 163 | output_directory = os.path.join(output_directory, suffix); 164 | os.mkdir(os.path.abspath(output_directory)); 165 | 166 | # dict_file = options.dictionary; 167 | # if dict_file != None: 168 | # dict_file = dict_file.strip(); 169 | 170 | # store all the options to a file 171 | options_output_file = open(output_directory + "option.txt", 'w'); 172 | # parameter set 1 173 | options_output_file.write("input_directory=" + input_directory + "\n"); 174 | options_output_file.write("corpus_name=" + corpus_name + "\n"); 175 | # options_output_file.write("vocabulary_path=" + str(dict_file) + "\n"); 176 | # parameter set 2 177 | options_output_file.write("training_iterations=%d\n" % (training_iterations)); 178 | options_output_file.write("snapshot_interval=" + str(snapshot_interval) + "\n"); 179 | options_output_file.write("number_of_topics=" + str(number_of_topics) + "\n"); 180 | # parameter set 3 181 | options_output_file.write("alpha_mu=" + str(alpha_mu) + "\n"); 182 | options_output_file.write("alpha_sigma=" + str(alpha_sigma) + "\n"); 183 | options_output_file.write("alpha_beta=" + str(alpha_beta) + "\n"); 184 | # parameter set 4 185 | options_output_file.write("optimization_method=%s\n" % (optimization_method)); 186 | options_output_file.write("number_of_processes=%d\n" % (number_of_processes)); 187 | # options_output_file.write("diagonal_covariance_matrix=%s\n" % (diagonal_covariance_matrix)); 188 | options_output_file.close() 189 | 190 | print("========== ========== ========== ========== ==========") 191 | # parameter set 1 192 | print("output_directory=" + output_directory) 193 | print("input_directory=" + input_directory) 194 | print("corpus_name=" + corpus_name) 195 | # print "dictionary file=" + str(dict_file) 196 | # parameter set 2 197 | print("training_iterations=%d" % (training_iterations)); 198 | print("snapshot_interval=" + str(snapshot_interval)); 199 | print("number_of_topics=" + str(number_of_topics)) 200 | # parameter set 3 201 | print("alpha_mu=" + str(alpha_mu)) 202 | print("alpha_sigma=" + str(alpha_sigma)) 203 | print("alpha_beta=" + str(alpha_beta)) 204 | # parameter set 4 205 | print("optimization_method=%s" % (optimization_method)) 206 | print("number_of_processes=%d" % (number_of_processes)) 207 | # print "diagonal_covariance_matrix=%s" % (diagonal_covariance_matrix) 208 | print("========== ========== ========== ========== ==========") 209 | 210 | ''' 211 | if inference_mode==0: 212 | import hybrid 213 | ctm_inferencer = hybrid.Hybrid(); 214 | elif inference_mode==1: 215 | import monte_carlo 216 | ctm_inferencer = monte_carlo.MonteCarlo(); 217 | elif inference_mode==2: 218 | import variational_bayes 219 | ctm_inferencer = variational_bayes.VariationalBayes(); 220 | else: 221 | sys.stderr.write("error: unrecognized inference mode %d...\n" % (inference_mode)); 222 | return; 223 | ''' 224 | 225 | import variational_bayes 226 | ctm_inferencer = variational_bayes.VariationalBayes(optimization_method); 227 | 228 | ctm_inferencer._initialize(train_docs, vocab, number_of_topics, alpha_mu, alpha_sigma, alpha_beta); 229 | 230 | for iteration in range(training_iterations): 231 | ctm_inferencer.learning(number_of_processes); 232 | 233 | if (ctm_inferencer._counter % snapshot_interval == 0): 234 | ctm_inferencer.export_beta(os.path.join(output_directory, 'exp_beta-' + str(ctm_inferencer._counter))); 235 | model_snapshot_path = os.path.join(output_directory, 'model-' + str(ctm_inferencer._counter)); 236 | pickle.dump(ctm_inferencer, open(model_snapshot_path, 'wb')); 237 | 238 | model_snapshot_path = os.path.join(output_directory, 'model-' + str(ctm_inferencer._counter)); 239 | pickle.dump(ctm_inferencer, open(model_snapshot_path, 'wb')); 240 | 241 | if __name__ == '__main__': 242 | main() 243 | -------------------------------------------------------------------------------- /variational_bayes.py: -------------------------------------------------------------------------------- 1 | """ 2 | VariationalBayes for Correlated Topic Models 3 | @author: Ke Zhai (zhaike@cs.umd.edu) 4 | """ 5 | 6 | import multiprocessing 7 | import nltk; 8 | import numpy 9 | import queue; 10 | import scipy; 11 | import scipy.misc; 12 | import scipy.optimize; 13 | import sklearn; 14 | import sklearn.covariance; 15 | import string; 16 | import sys; 17 | import time 18 | 19 | from inferencer import compute_dirichlet_expectation 20 | from inferencer import Inferencer 21 | 22 | # numpy.random.seed(1000); 23 | 24 | ''' 25 | def parse_data(corpus, vocab): 26 | doc_count = 0 27 | 28 | word_ids = []; 29 | word_cts = []; 30 | 31 | for document_line in corpus: 32 | #words = document_line.split(); 33 | document_word_dict = [] 34 | for token in document_line.split(): 35 | if token in vocab: 36 | if token not in document_word_dict: 37 | document_word_dict[token] = 0; 38 | document_word_dict[token] += 1; 39 | else: 40 | continue; 41 | 42 | word_ids.append(numpy.array(document_word_dict.keys())); 43 | word_cts.append(numpy.array(document_word_dict.values())); 44 | 45 | doc_count+=1 46 | if doc_count%10000==0: 47 | print "successfully import %d documents..." % doc_count; 48 | 49 | print "successfully import %d documents..." % (doc_count); 50 | 51 | return word_ids, word_cts 52 | ''' 53 | 54 | 55 | class Process_E_Step_Queue(multiprocessing.Process): 56 | def __init__(self, 57 | task_queue, 58 | 59 | model_parameters, 60 | 61 | optimize_doc_lambda, 62 | # optimize_doc_nu_square, 63 | optimize_doc_nu_square_in_log_space, 64 | 65 | result_doc_parameter_queue, 66 | result_log_likelihood_queue, 67 | result_sufficient_statistics_queue, 68 | 69 | diagonal_covariance_matrix=False, 70 | 71 | parameter_iteration=10, 72 | parameter_converge_threshold=1e-3): 73 | multiprocessing.Process.__init__(self); 74 | 75 | self._task_queue = task_queue; 76 | self._result_doc_parameter_queue = result_doc_parameter_queue; 77 | self._result_log_likelihood_queue = result_log_likelihood_queue; 78 | self._result_sufficient_statistics_queue = result_sufficient_statistics_queue; 79 | 80 | self._parameter_iteration = parameter_iteration; 81 | 82 | self._diagonal_covariance_matrix = diagonal_covariance_matrix; 83 | if self._diagonal_covariance_matrix: 84 | (self._E_log_eta, self._alpha_mu, self._alpha_sigma) = model_parameters; 85 | else: 86 | (self._E_log_eta, self._alpha_mu, self._alpha_sigma, self._alpha_sigma_inv) = model_parameters; 87 | (self._number_of_topics, self._number_of_types) = self._E_log_eta.shape; 88 | 89 | if result_sufficient_statistics_queue != None: 90 | self._E_log_prob_eta = self._E_log_eta - scipy.special.logsumexp(self._E_log_eta, axis=1)[:, numpy.newaxis] 91 | 92 | self.optimize_doc_lambda = optimize_doc_lambda; 93 | # self.optimize_doc_nu_square = optimize_doc_nu_square; 94 | self.optimize_doc_nu_square_in_log_space = optimize_doc_nu_square_in_log_space; 95 | 96 | def run(self): 97 | document_log_likelihood = 0; 98 | words_log_likelihood = 0; 99 | 100 | # initialize a V-by-K matrix phi sufficient statistics 101 | phi_sufficient_statistics = numpy.zeros((self._number_of_topics, self._number_of_types)); 102 | 103 | # initialize a D-by-K matrix lambda and nu_square values 104 | # lambda_values = numpy.zeros((number_of_documents, self._number_of_topics)) # + self._alpha_mu[numpy.newaxis, :]; 105 | # nu_square_values = numpy.ones((number_of_documents, self._number_of_topics)) # + self._alpha_sigma[numpy.newaxis, :]; 106 | 107 | while not self._task_queue.empty(): 108 | try: 109 | (doc_id, term_ids, term_counts) = self._task_queue.get_nowait(); 110 | 111 | except queue.Empty: 112 | continue; 113 | 114 | doc_lambda = numpy.zeros(self._number_of_topics); 115 | doc_nu_square = numpy.ones(self._number_of_topics); 116 | 117 | assert term_counts.shape == (1, len(term_ids)); 118 | # compute the total number of words 119 | doc_word_count = numpy.sum(term_counts); 120 | 121 | # initialize gamma for this document 122 | # doc_lambda = lambda_values[doc_id, :] 123 | # doc_nu_square = nu_square_values[doc_id, :] 124 | ''' 125 | if self._diagonal_covariance_matrix: 126 | doc_lambda = numpy.random.multivariate_normal(self._alpha_mu, numpy.diag(self._alpha_sigma)); 127 | doc_nu_square = numpy.copy(self._alpha_sigma); 128 | else: 129 | #doc_lambda = numpy.random.multivariate_normal(self._alpha_mu[0, :], self._alpha_sigma); 130 | #doc_nu_square = numpy.copy(numpy.diag(self._alpha_sigma)); 131 | doc_lambda = numpy.random.multivariate_normal(numpy.zeros(self._number_of_topics), numpy.eye(self._number_of_topics)) 132 | doc_nu_square = numpy.ones(self._number_of_topics) 133 | assert doc_lambda.shape==(self._number_of_topics,) 134 | assert doc_nu_square.shape==(self._number_of_topics,) 135 | ''' 136 | 137 | # term_ids = word_ids[doc_id]; 138 | # term_counts = word_cts[doc_id]; 139 | 140 | # update zeta in close form 141 | # doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square)); 142 | doc_zeta_factor = doc_lambda + 0.5 * doc_nu_square 143 | assert doc_zeta_factor.shape == (self._number_of_topics,) 144 | doc_zeta_factor = numpy.tile(doc_zeta_factor, (self._number_of_topics, 1)) 145 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics) 146 | 147 | for local_parameter_iteration_index in range(self._parameter_iteration): 148 | # update phi in close form 149 | assert self._E_log_eta.shape == (self._number_of_topics, self._number_of_types); 150 | log_phi = self._E_log_eta[:, term_ids] + doc_lambda[:, numpy.newaxis] 151 | assert log_phi.shape == (self._number_of_topics, len(term_ids)); 152 | log_phi -= scipy.special.logsumexp(log_phi, axis=0)[numpy.newaxis, :]; 153 | assert log_phi.shape == (self._number_of_topics, len(term_ids)); 154 | 155 | # 156 | # 157 | # 158 | # 159 | # 160 | 161 | # update lambda 162 | sum_phi = numpy.exp(scipy.special.logsumexp(log_phi + numpy.log(term_counts), axis=1)) 163 | arguments = (doc_nu_square, doc_zeta_factor, sum_phi, doc_word_count) 164 | doc_lambda = self.optimize_doc_lambda(doc_lambda, arguments); 165 | # print "update lambda of doc %d to %s" % (doc_id, doc_lambda) 166 | 167 | # 168 | # 169 | # 170 | # 171 | # 172 | 173 | # update zeta in close form 174 | # doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square)); 175 | doc_zeta_factor = doc_lambda + 0.5 * doc_nu_square 176 | assert doc_zeta_factor.shape == (self._number_of_topics,) 177 | doc_zeta_factor = numpy.tile(doc_zeta_factor, (self._number_of_topics, 1)) 178 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics) 179 | 180 | # 181 | # 182 | # 183 | # 184 | # 185 | 186 | # update nu_square 187 | arguments = (doc_lambda, doc_zeta_factor, doc_word_count); 188 | # doc_nu_square = self.optimize_doc_nu_square(doc_nu_square, arguments); 189 | doc_nu_square = self.optimize_doc_nu_square_in_log_space(doc_nu_square, arguments); 190 | # print "update nu of doc %d to %s" % (doc_id, doc_nu_square) 191 | 192 | # 193 | # 194 | # 195 | # 196 | # 197 | 198 | # update zeta in close form 199 | # doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square)); 200 | doc_zeta_factor = doc_lambda + 0.5 * doc_nu_square 201 | assert doc_zeta_factor.shape == (self._number_of_topics,) 202 | doc_zeta_factor = numpy.tile(doc_zeta_factor, (self._number_of_topics, 1)) 203 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics) 204 | 205 | # mean_change = numpy.mean(abs(gamma_update - lambda_values[doc_id, :])); 206 | # lambda_values[doc_id, :] = gamma_update; 207 | # if mean_change <= local_parameter_converge_threshold: 208 | # break; 209 | 210 | # print doc_id, local_parameter_iteration_index 211 | 212 | # print "process document %d..." % doc_id 213 | 214 | # document_log_likelihood -= 0.5 * self._number_of_topics * numpy.log(2 * numpy.pi) 215 | if self._diagonal_covariance_matrix: 216 | document_log_likelihood -= 0.5 * numpy.sum(numpy.log(self._alpha_sigma)); 217 | document_log_likelihood -= 0.5 * numpy.sum(doc_nu_square / self._alpha_sigma); 218 | document_log_likelihood -= 0.5 * numpy.sum((doc_lambda - self._alpha_mu) ** 2 / self._alpha_sigma); 219 | else: 220 | # document_log_likelihood -= 0.5 * numpy.log(numpy.linalg.det(self._alpha_sigma)); 221 | document_log_likelihood -= 0.5 * numpy.log(scipy.linalg.det(self._alpha_sigma) + 1e-30); 222 | document_log_likelihood -= 0.5 * numpy.sum(doc_nu_square * numpy.diag(self._alpha_sigma_inv)); 223 | document_log_likelihood -= 0.5 * numpy.dot( 224 | numpy.dot((self._alpha_mu - doc_lambda[numpy.newaxis, :]), self._alpha_sigma_inv), 225 | (self._alpha_mu - doc_lambda[numpy.newaxis, :]).T); 226 | 227 | document_log_likelihood += numpy.sum(numpy.sum(numpy.exp(log_phi) * term_counts, axis=1) * doc_lambda); 228 | # use the fact that doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square)), to cancel the factors 229 | document_log_likelihood -= scipy.special.logsumexp(doc_lambda + 0.5 * doc_nu_square) * doc_word_count; 230 | 231 | document_log_likelihood += 0.5 * self._number_of_topics; 232 | # document_log_likelihood += 0.5 * self._number_of_topics * numpy.log(2 * numpy.pi) 233 | document_log_likelihood += 0.5 * numpy.sum(numpy.log(doc_nu_square)); 234 | 235 | document_log_likelihood -= numpy.sum(numpy.exp(log_phi) * log_phi * term_counts); 236 | 237 | # Note: all terms including E_q[p(\eta | \beta)], i.e., terms involving \Psi(\eta), are cancelled due to \eta updates in M-step 238 | if self._result_sufficient_statistics_queue == None: 239 | # compute the phi terms 240 | words_log_likelihood += numpy.sum( 241 | numpy.exp(log_phi + numpy.log(term_counts)) * self._E_log_prob_eta[:, term_ids]); 242 | 243 | # lambda_values[doc_id, :] = doc_lambda; 244 | # nu_square_values[doc_id, :] = doc_nu_square; 245 | 246 | assert numpy.all(doc_nu_square > 0); 247 | 248 | assert log_phi.shape == (self._number_of_topics, len(term_ids)); 249 | assert term_counts.shape == (1, len(term_ids)) 250 | phi_sufficient_statistics[:, term_ids] += numpy.exp(log_phi + numpy.log(term_counts)); 251 | 252 | # if (doc_id+1) % 1000==0: 253 | # print "successfully processed %d documents..." % (doc_id+1); 254 | 255 | self._result_doc_parameter_queue.put((doc_id, doc_lambda, doc_nu_square)); 256 | 257 | self._task_queue.task_done(); 258 | 259 | if self._result_sufficient_statistics_queue == None: 260 | self._result_log_likelihood_queue.put(words_log_likelihood); 261 | else: 262 | self._result_log_likelihood_queue.put(document_log_likelihood); 263 | self._result_sufficient_statistics_queue.put(phi_sufficient_statistics); 264 | 265 | 266 | class VariationalBayes(Inferencer): 267 | """ 268 | """ 269 | 270 | def __init__(self, 271 | scipy_optimization_method=None, 272 | hessian_free_optimization=False, 273 | diagonal_covariance_matrix=False, 274 | hyper_parameter_optimize_interval=1, 275 | 276 | hessian_direction_approximation_epsilon=1e-6 277 | # hyper_parameter_iteration=100, 278 | # hyper_parameter_decay_factor=0.9, 279 | # hyper_parameter_maximum_decay=10, 280 | # hyper_parameter_converge_threshold=1e-6, 281 | 282 | # model_converge_threshold=1e-6 283 | ): 284 | Inferencer.__init__(self, hyper_parameter_optimize_interval); 285 | self._scipy_optimization_method = scipy_optimization_method; 286 | 287 | self._hessian_free_optimization = hessian_free_optimization; 288 | self._diagonal_covariance_matrix = diagonal_covariance_matrix; 289 | 290 | self._hessian_direction_approximation_epsilon = hessian_direction_approximation_epsilon; 291 | 292 | """ 293 | @param num_topics: the number of topics 294 | @param data: a defaultdict(dict) data type, first indexed by doc id then indexed by term id 295 | take note: words are not terms, they are repeatable and thus might be not unique 296 | """ 297 | 298 | def _initialize(self, corpus, vocab, number_of_topics, alpha_mu, alpha_sigma, alpha_beta): 299 | Inferencer._initialize(self, vocab, number_of_topics, alpha_mu, alpha_sigma, alpha_beta); 300 | 301 | self._corpus = corpus; 302 | self._parsed_corpus = self.parse_data(); 303 | 304 | # define the total number of document 305 | self._number_of_documents = len(self._parsed_corpus[0]); 306 | 307 | # initialize a D-by-K matrix gamma 308 | self._lambda = numpy.zeros((self._number_of_documents, self._number_of_topics)) 309 | self._nu_square = numpy.ones((self._number_of_documents, self._number_of_topics)) 310 | 311 | # initialize a V-by-K matrix beta, subject to the sum over every row is 1 312 | self._eta = numpy.random.gamma(100., 1. / 100., (self._number_of_topics, self._number_of_types)); 313 | 314 | def parse_data(self, corpus=None): 315 | if corpus == None: 316 | corpus = self._corpus; 317 | 318 | doc_count = 0 319 | 320 | word_ids = []; 321 | word_cts = []; 322 | 323 | for document_line in corpus: 324 | # words = document_line.split(); 325 | document_word_dict = {} 326 | for token in document_line.split(): 327 | if token not in self._type_to_index: 328 | continue; 329 | 330 | type_id = self._type_to_index[token]; 331 | if type_id not in document_word_dict: 332 | document_word_dict[type_id] = 0; 333 | document_word_dict[type_id] += 1; 334 | 335 | if len(document_word_dict) == 0: 336 | sys.stderr.write("warning: document collapsed during parsing"); 337 | continue; 338 | 339 | word_ids.append(numpy.array(list(document_word_dict.keys()))); 340 | word_cts.append(numpy.array(list(document_word_dict.values()))[numpy.newaxis, :]); 341 | 342 | doc_count += 1 343 | if doc_count % 10000 == 0: 344 | print("successfully parse %d documents..." % doc_count); 345 | 346 | assert len(word_ids) == len(word_cts); 347 | print("successfully parse %d documents..." % (doc_count)); 348 | 349 | return (word_ids, word_cts) 350 | 351 | # 352 | # 353 | # 354 | # 355 | # 356 | 357 | def e_step_process_queue(self, 358 | parsed_corpus=None, 359 | number_of_processes=0, 360 | local_parameter_iteration=10, 361 | local_parameter_converge_threshold=1e-3, 362 | ): 363 | if parsed_corpus == None: 364 | word_ids = self._parsed_corpus[0]; 365 | word_cts = self._parsed_corpus[1]; 366 | else: 367 | word_ids = parsed_corpus[0] 368 | word_cts = parsed_corpus[1]; 369 | 370 | assert len(word_ids) == len(word_cts); 371 | number_of_documents = len(word_ids); 372 | 373 | E_log_eta = compute_dirichlet_expectation(self._eta); 374 | assert E_log_eta.shape == (self._number_of_topics, self._number_of_types); 375 | # if parsed_corpus!=None: 376 | # E_log_prob_eta = E_log_eta-scipy.special.logsumexp(E_log_eta, axis=1)[:, numpy.newaxis] 377 | 378 | task_queue = multiprocessing.JoinableQueue() 379 | for (doc_id, word_id, word_ct) in zip(list(range(number_of_documents)), word_ids, word_cts): 380 | task_queue.put((doc_id, word_id, word_ct)); 381 | 382 | result_doc_parameter_queue = multiprocessing.Queue(); 383 | result_log_likelihood_queue = multiprocessing.Queue(); 384 | if parsed_corpus == None: 385 | result_sufficient_statistics_queue = multiprocessing.Queue(); 386 | else: 387 | result_sufficient_statistics_queue = None; 388 | 389 | if self._diagonal_covariance_matrix: 390 | e_step_parameters = (E_log_eta, self._alpha_mu, self._alpha_sigma); 391 | else: 392 | e_step_parameters = (E_log_eta, self._alpha_mu, self._alpha_sigma, self._alpha_sigma_inv); 393 | 394 | # start consumers 395 | if number_of_processes <= 1: 396 | number_of_processes = multiprocessing.cpu_count(); 397 | print('creating %d processes' % number_of_processes) 398 | processes_e_step = [Process_E_Step_Queue(task_queue, 399 | 400 | e_step_parameters, 401 | 402 | self.optimize_doc_lambda, 403 | # self.optimize_doc_nu_square, 404 | self.optimize_doc_nu_square_in_log_space, 405 | 406 | result_doc_parameter_queue, 407 | result_log_likelihood_queue, 408 | result_sufficient_statistics_queue, 409 | 410 | diagonal_covariance_matrix=self._diagonal_covariance_matrix, 411 | parameter_iteration=local_parameter_iteration, 412 | ) 413 | for process_index in range(number_of_processes)]; 414 | 415 | for process_e_step in processes_e_step: 416 | process_e_step.start(); 417 | 418 | task_queue.join(); 419 | 420 | task_queue.close(); 421 | 422 | # initialize a D-by-K matrix lambda and nu_square values 423 | lambda_values = numpy.zeros( 424 | (number_of_documents, self._number_of_topics)) # + self._alpha_mu[numpy.newaxis, :]; 425 | nu_square_values = numpy.zeros( 426 | (number_of_documents, self._number_of_topics)) # + self._alpha_sigma[numpy.newaxis, :]; 427 | 428 | # for result_queue_element_index in xrange(result_doc_parameter_queue.qsize()): 429 | # while not result_doc_parameter_queue.empty(): 430 | for result_queue_element_index in range(number_of_documents): 431 | (doc_id, doc_lambda, doc_nu_square) = result_doc_parameter_queue.get(); 432 | 433 | assert doc_id >= 0 and doc_id < number_of_documents; 434 | lambda_values[doc_id, :] = doc_lambda; 435 | nu_square_values[doc_id, :] = doc_nu_square; 436 | 437 | log_likelihood = 0; 438 | # for result_queue_element_index in result_log_likelihood_queue.qsize(): 439 | # while not result_log_likelihood_queue.empty(): 440 | for result_queue_element_index in range(number_of_processes): 441 | log_likelihood += result_log_likelihood_queue.get(); 442 | # print "log_likelihood is", log_likelihood; 443 | 444 | if parsed_corpus == None: 445 | self._lambda = lambda_values; 446 | self._nu_square = nu_square_values; 447 | 448 | # initialize a K-by-V matrix phi sufficient statistics 449 | phi_sufficient_statistics = numpy.zeros((self._number_of_topics, self._number_of_types)); 450 | 451 | # for result_queue_element_index in xrange(result_sufficient_statistics_queue.qsize()): 452 | # while not result_sufficient_statistics_queue.empty(): 453 | for result_queue_element_index in range(number_of_processes): 454 | phi_sufficient_statistics += result_sufficient_statistics_queue.get(); 455 | # print "phi_sufficient_statistics", phi_sufficient_statistics 456 | 457 | for process_e_step in processes_e_step: 458 | process_e_step.join(); 459 | 460 | if parsed_corpus == None: 461 | return log_likelihood, phi_sufficient_statistics 462 | else: 463 | return log_likelihood, lambda_values, nu_square_values 464 | 465 | ''' 466 | if parsed_corpus==None: 467 | document_log_likelihood, lambda_values, nu_square_values, phi_sufficient_statistics = self.format_result_queues(number_of_documents, 468 | result_doc_parameter_queue, 469 | result_log_likelihood_queue, 470 | result_sufficient_statistics_queue 471 | ); 472 | 473 | self._lambda = lambda_values; 474 | self._nu_square = nu_square_values; 475 | 476 | return document_log_likelihood, phi_sufficient_statistics 477 | else: 478 | words_log_likelihood, lambda_values, nu_square_values = self.format_result_queues(number_of_documents, 479 | result_doc_parameter_queue, 480 | result_log_likelihood_queue, 481 | ); 482 | 483 | return words_log_likelihood, lambda_values, nu_square_values 484 | ''' 485 | 486 | def format_result_queues(self, number_of_documents, result_doc_parameter_queue, result_log_likelihood_queue, 487 | result_sufficient_statistics_queue=None): 488 | # initialize a D-by-K matrix lambda and nu_square values 489 | lambda_values = numpy.zeros( 490 | (number_of_documents, self._number_of_topics)) # + self._alpha_mu[numpy.newaxis, :]; 491 | nu_square_values = numpy.zeros( 492 | (number_of_documents, self._number_of_topics)) # + self._alpha_sigma[numpy.newaxis, :]; 493 | 494 | counter = 0 495 | # for result_queue_element_index in xrange(result_doc_parameter_queue.qsize()): 496 | while not result_doc_parameter_queue.empty(): 497 | (doc_id, doc_lambda, doc_nu_square) = result_doc_parameter_queue.get(); 498 | 499 | assert doc_id >= 0 and doc_id < number_of_documents; 500 | lambda_values[doc_id, :] = doc_lambda; 501 | nu_square_values[doc_id, :] = doc_nu_square; 502 | 503 | counter += 1; 504 | assert counter == number_of_documents, counter; 505 | 506 | log_likelihood = 0; 507 | # for result_queue_element_index in result_log_likelihood_queue.qsize(): 508 | while not result_log_likelihood_queue.empty(): 509 | log_likelihood += result_log_likelihood_queue.get(); 510 | # print "log_likelihood is", log_likelihood; 511 | 512 | if result_sufficient_statistics_queue == None: 513 | return log_likelihood, lambda_values, nu_square_values; 514 | else: 515 | # initialize a K-by-V matrix phi sufficient statistics 516 | phi_sufficient_statistics = numpy.zeros((self._number_of_topics, self._number_of_types)); 517 | 518 | # for result_queue_element_index in xrange(result_sufficient_statistics_queue.qsize()): 519 | while not result_sufficient_statistics_queue.empty(): 520 | phi_sufficient_statistics += result_sufficient_statistics_queue.get(); 521 | # print "phi_sufficient_statistics", phi_sufficient_statistics 522 | 523 | return log_likelihood, lambda_values, nu_square_values, phi_sufficient_statistics 524 | 525 | # 526 | # 527 | # 528 | # 529 | # 530 | 531 | def e_step(self, 532 | parsed_corpus=None, 533 | local_parameter_iteration=10, 534 | local_parameter_converge_threshold=1e-3, 535 | ): 536 | if parsed_corpus == None: 537 | word_ids = self._parsed_corpus[0]; 538 | word_cts = self._parsed_corpus[1]; 539 | else: 540 | word_ids = parsed_corpus[0] 541 | word_cts = parsed_corpus[1]; 542 | 543 | assert len(word_ids) == len(word_cts); 544 | number_of_documents = len(word_ids); 545 | 546 | E_log_eta = compute_dirichlet_expectation(self._eta); 547 | assert E_log_eta.shape == (self._number_of_topics, self._number_of_types); 548 | if parsed_corpus != None: 549 | E_log_prob_eta = E_log_eta - scipy.special.logsumexp(E_log_eta, axis=1)[:, numpy.newaxis] 550 | 551 | document_log_likelihood = 0; 552 | words_log_likelihood = 0; 553 | 554 | # initialize a V_matrix-by-K matrix phi sufficient statistics 555 | phi_sufficient_statistics = numpy.zeros((self._number_of_topics, self._number_of_types)); 556 | 557 | # initialize a D-by-K matrix lambda and nu_square values 558 | lambda_values = numpy.zeros( 559 | (number_of_documents, self._number_of_topics)) # + self._alpha_mu[numpy.newaxis, :]; 560 | nu_square_values = numpy.ones( 561 | (number_of_documents, self._number_of_topics)) # + self._alpha_sigma[numpy.newaxis, :]; 562 | 563 | # iterate over all documents 564 | for doc_id in numpy.random.permutation(number_of_documents): 565 | # initialize gamma for this document 566 | doc_lambda = lambda_values[doc_id, :] 567 | doc_nu_square = nu_square_values[doc_id, :] 568 | ''' 569 | if self._diagonal_covariance_matrix: 570 | doc_lambda = numpy.random.multivariate_normal(self._alpha_mu, numpy.diag(self._alpha_sigma)); 571 | doc_nu_square = numpy.copy(self._alpha_sigma); 572 | else: 573 | #doc_lambda = numpy.random.multivariate_normal(self._alpha_mu[0, :], self._alpha_sigma); 574 | #doc_nu_square = numpy.copy(numpy.diag(self._alpha_sigma)); 575 | doc_lambda = numpy.random.multivariate_normal(numpy.zeros(self._number_of_topics), numpy.eye(self._number_of_topics)) 576 | doc_nu_square = numpy.ones(self._number_of_topics) 577 | assert doc_lambda.shape==(self._number_of_topics,) 578 | assert doc_nu_square.shape==(self._number_of_topics,) 579 | ''' 580 | 581 | term_ids = word_ids[doc_id]; 582 | term_counts = word_cts[doc_id]; 583 | assert term_counts.shape == (1, len(term_ids)); 584 | # compute the total number of words 585 | doc_word_count = numpy.sum(word_cts[doc_id]); 586 | 587 | # update zeta in close form 588 | # doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square)); 589 | doc_zeta_factor = doc_lambda + 0.5 * doc_nu_square 590 | assert doc_zeta_factor.shape == (self._number_of_topics,) 591 | doc_zeta_factor = numpy.tile(doc_zeta_factor, (self._number_of_topics, 1)) 592 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics) 593 | 594 | for local_parameter_iteration_index in range(local_parameter_iteration): 595 | # update phi in close form 596 | assert E_log_eta.shape == (self._number_of_topics, self._number_of_types); 597 | log_phi = E_log_eta[:, term_ids] + doc_lambda[:, numpy.newaxis] 598 | assert log_phi.shape == (self._number_of_topics, len(term_ids)); 599 | log_phi -= scipy.special.logsumexp(log_phi, axis=0)[numpy.newaxis, :]; 600 | assert log_phi.shape == (self._number_of_topics, len(term_ids)); 601 | 602 | # 603 | # 604 | # 605 | # 606 | # 607 | 608 | # update lambda 609 | sum_phi = numpy.exp(scipy.special.logsumexp(log_phi + numpy.log(term_counts), axis=1)) 610 | arguments = (doc_nu_square, doc_zeta_factor, sum_phi, doc_word_count) 611 | doc_lambda = self.optimize_doc_lambda(doc_lambda, arguments); 612 | ''' 613 | if self._hessian_free_optimization: 614 | assert not self._diagonal_covariance_matrix 615 | doc_lambda = self.hessian_free_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi, doc_word_count); 616 | else: 617 | doc_lambda = self.newton_method_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi, doc_word_count); 618 | ''' 619 | # print "update lambda of doc %d to %s" % (doc_id, doc_lambda) 620 | 621 | # 622 | # 623 | # 624 | # 625 | # 626 | 627 | # update zeta in close form 628 | # doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square)); 629 | doc_zeta_factor = doc_lambda + 0.5 * doc_nu_square 630 | assert doc_zeta_factor.shape == (self._number_of_topics,) 631 | doc_zeta_factor = numpy.tile(doc_zeta_factor, (self._number_of_topics, 1)) 632 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics) 633 | 634 | # 635 | # 636 | # 637 | # 638 | # 639 | 640 | # update nu_square 641 | arguments = (doc_lambda, doc_zeta_factor, doc_word_count); 642 | # doc_nu_square = self.optimize_doc_nu_square(doc_nu_square, arguments); 643 | doc_nu_square = self.optimize_doc_nu_square_in_log_space(doc_nu_square, arguments); 644 | ''' 645 | if self._hessian_free_optimization: 646 | assert not self._diagonal_covariance_matrix 647 | #doc_nu_square = self.hessian_free_nu_square(doc_lambda, doc_nu_square, doc_zeta_factor, doc_word_count); 648 | doc_nu_square = self.hessian_free_nu_square_in_log_space(doc_lambda, doc_nu_square, doc_zeta_factor, doc_word_count); 649 | else: 650 | #doc_nu_square = self.newton_method_nu_square(doc_lambda, doc_nu_square, doc_zeta_factor, doc_word_count); 651 | doc_nu_square = self.newton_method_nu_square_in_log_space(doc_lambda, doc_nu_square, doc_zeta_factor, doc_word_count); 652 | ''' 653 | # print "update nu of doc %d to %s" % (doc_id, doc_nu_square) 654 | 655 | # 656 | # 657 | # 658 | # 659 | # 660 | 661 | # update zeta in close form 662 | # doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square)); 663 | doc_zeta_factor = doc_lambda + 0.5 * doc_nu_square 664 | assert doc_zeta_factor.shape == (self._number_of_topics,) 665 | doc_zeta_factor = numpy.tile(doc_zeta_factor, (self._number_of_topics, 1)) 666 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics) 667 | 668 | # mean_change = numpy.mean(abs(gamma_update - lambda_values[doc_id, :])); 669 | # lambda_values[doc_id, :] = gamma_update; 670 | # if mean_change <= local_parameter_converge_threshold: 671 | # break; 672 | 673 | # print doc_id, local_parameter_iteration_index 674 | 675 | # print "process document %d..." % doc_id 676 | 677 | # document_log_likelihood -= 0.5 * self._number_of_topics * numpy.log(2 * numpy.pi) 678 | if self._diagonal_covariance_matrix: 679 | document_log_likelihood -= 0.5 * numpy.sum(numpy.log(self._alpha_sigma)); 680 | document_log_likelihood -= 0.5 * numpy.sum(doc_nu_square / self._alpha_sigma); 681 | document_log_likelihood -= 0.5 * numpy.sum((doc_lambda - self._alpha_mu) ** 2 / self._alpha_sigma); 682 | else: 683 | # document_log_likelihood -= 0.5 * numpy.log(numpy.linalg.det(self._alpha_sigma)); 684 | document_log_likelihood -= 0.5 * numpy.log(scipy.linalg.det(self._alpha_sigma) + 1e-30); 685 | document_log_likelihood -= 0.5 * numpy.sum(doc_nu_square * numpy.diag(self._alpha_sigma_inv)); 686 | document_log_likelihood -= 0.5 * numpy.dot( 687 | numpy.dot((self._alpha_mu - doc_lambda[numpy.newaxis, :]), self._alpha_sigma_inv), 688 | (self._alpha_mu - doc_lambda[numpy.newaxis, :]).T); 689 | 690 | document_log_likelihood += numpy.sum(numpy.sum(numpy.exp(log_phi) * term_counts, axis=1) * doc_lambda); 691 | # use the fact that doc_zeta = numpy.sum(numpy.exp(doc_lambda+0.5*doc_nu_square)), to cancel the factors 692 | document_log_likelihood -= scipy.special.logsumexp(doc_lambda + 0.5 * doc_nu_square) * doc_word_count; 693 | 694 | document_log_likelihood += 0.5 * self._number_of_topics; 695 | # document_log_likelihood += 0.5 * self._number_of_topics * numpy.log(2 * numpy.pi) 696 | document_log_likelihood += 0.5 * numpy.sum(numpy.log(doc_nu_square)); 697 | 698 | document_log_likelihood -= numpy.sum(numpy.exp(log_phi) * log_phi * term_counts); 699 | 700 | # Note: all terms including E_q[p(\eta | \beta)], i.e., terms involving \Psi(\eta), are cancelled due to \eta updates in M-step 701 | if parsed_corpus != None: 702 | # compute the phi terms 703 | words_log_likelihood += numpy.sum( 704 | numpy.exp(log_phi + numpy.log(term_counts)) * E_log_prob_eta[:, term_ids]); 705 | 706 | lambda_values[doc_id, :] = doc_lambda; 707 | nu_square_values[doc_id, :] = doc_nu_square; 708 | 709 | assert log_phi.shape == (self._number_of_topics, len(term_ids)); 710 | assert term_counts.shape == (1, len(term_ids)) 711 | phi_sufficient_statistics[:, term_ids] += numpy.exp(log_phi + numpy.log(term_counts)); 712 | 713 | if (doc_id + 1) % 1000 == 0: 714 | print("successfully processed %d documents..." % (doc_id + 1)); 715 | 716 | assert numpy.all(nu_square_values > 0); 717 | 718 | if parsed_corpus == None: 719 | self._lambda = lambda_values; 720 | self._nu_square = nu_square_values; 721 | return document_log_likelihood, phi_sufficient_statistics 722 | else: 723 | return words_log_likelihood, lambda_values, nu_square_values 724 | 725 | # 726 | # 727 | # 728 | # 729 | # 730 | 731 | def optimize_doc_lambda(self, 732 | doc_lambda, 733 | arguments, 734 | ): 735 | 736 | optimize_result = scipy.optimize.minimize(self.f_doc_lambda, 737 | doc_lambda, 738 | args=arguments, 739 | method=self._scipy_optimization_method, 740 | jac=self.f_prime_doc_lambda, 741 | hess=self.f_hessian_doc_lambda, 742 | # hess=None, 743 | hessp=self.f_hessian_direction_doc_lambda, 744 | bounds=None, 745 | constraints=(), 746 | tol=None, 747 | callback=None, 748 | options={'disp': False} 749 | ) 750 | 751 | return optimize_result.x 752 | 753 | def f_doc_lambda(self, doc_lambda, *args): 754 | (doc_nu_square, doc_zeta_factor, sum_phi, total_word_count) = args; 755 | 756 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 757 | assert sum_phi.shape == (self._number_of_topics,) 758 | # if doc_lambda.shape==(1, self._number_of_topics): 759 | # doc_lambda = doc_lambda[0, :]; 760 | assert doc_lambda.shape == (self._number_of_topics,); 761 | 762 | exp_over_doc_zeta = scipy.special.logsumexp( 763 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 764 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 765 | 766 | function_doc_lambda = numpy.sum(sum_phi * doc_lambda); 767 | 768 | if self._diagonal_covariance_matrix: 769 | mean_adjustment = doc_lambda - self._alpha_mu 770 | assert mean_adjustment.shape == (self._number_of_topics,); 771 | function_doc_lambda += -0.5 * numpy.sum((mean_adjustment ** 2) / self._alpha_sigma) 772 | else: 773 | mean_adjustment = doc_lambda[numpy.newaxis, :] - self._alpha_mu; 774 | assert mean_adjustment.shape == (1, self._number_of_topics), ( 775 | doc_lambda.shape, mean_adjustment.shape, self._alpha_mu.shape) 776 | function_doc_lambda += -0.5 * numpy.dot(numpy.dot(mean_adjustment, self._alpha_sigma_inv), 777 | mean_adjustment.T) 778 | 779 | function_doc_lambda += -total_word_count * numpy.sum(exp_over_doc_zeta); 780 | 781 | return numpy.asscalar(-function_doc_lambda); 782 | 783 | def f_prime_doc_lambda(self, doc_lambda, *args): 784 | (doc_nu_square, doc_zeta_factor, sum_phi, total_word_count) = args; 785 | 786 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 787 | assert sum_phi.shape == (self._number_of_topics,) 788 | 789 | exp_over_doc_zeta = scipy.special.logsumexp( 790 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 791 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 792 | assert exp_over_doc_zeta.shape == (self._number_of_topics,); 793 | 794 | if self._diagonal_covariance_matrix: 795 | function_prime_doc_lambda = (self._alpha_mu - doc_lambda) / self._alpha_sigma; 796 | else: 797 | function_prime_doc_lambda = numpy.dot((self._alpha_mu - doc_lambda[numpy.newaxis, :]), 798 | self._alpha_sigma_inv)[0, :] 799 | 800 | function_prime_doc_lambda += sum_phi; 801 | function_prime_doc_lambda -= total_word_count * exp_over_doc_zeta; 802 | 803 | assert function_prime_doc_lambda.shape == (self._number_of_topics,); 804 | 805 | return numpy.asarray(-function_prime_doc_lambda); 806 | 807 | def f_hessian_doc_lambda(self, doc_lambda, *args): 808 | (doc_nu_square, doc_zeta_factor, sum_phi, total_word_count) = args; 809 | exp_over_doc_zeta = scipy.special.logsumexp( 810 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 811 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 812 | 813 | if self._diagonal_covariance_matrix: 814 | function_hessian_doc_lambda = -1.0 / self._alpha_sigma 815 | function_hessian_doc_lambda -= total_word_count * exp_over_doc_zeta; 816 | else: 817 | function_hessian_doc_lambda = -self._alpha_sigma_inv; 818 | assert function_hessian_doc_lambda.shape == (self._number_of_topics, self._number_of_topics); 819 | function_hessian_doc_lambda -= total_word_count * numpy.diag(exp_over_doc_zeta); 820 | assert function_hessian_doc_lambda.shape == (self._number_of_topics, self._number_of_topics); 821 | 822 | return numpy.asarray(-function_hessian_doc_lambda); 823 | 824 | def f_hessian_direction_doc_lambda(self, doc_lambda, direction_vector, *args): 825 | (doc_nu_square, doc_zeta_factor, sum_phi, total_word_count) = args; 826 | 827 | assert doc_lambda.shape == (self._number_of_topics,) 828 | assert doc_nu_square.shape == (self._number_of_topics,) 829 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics) 830 | assert direction_vector.shape == (self._number_of_topics,) 831 | 832 | log_exp_over_doc_zeta_a = scipy.special.logsumexp( 833 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - direction_vector[:, 834 | numpy.newaxis] * self._hessian_direction_approximation_epsilon - 0.5 * doc_nu_square[ 835 | :, 836 | numpy.newaxis], 837 | axis=1) 838 | log_exp_over_doc_zeta_b = scipy.special.logsumexp( 839 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 840 | assert log_exp_over_doc_zeta_a.shape == (self._number_of_topics,) 841 | assert log_exp_over_doc_zeta_b.shape == (self._number_of_topics,) 842 | 843 | # function_hessian_direction_doc_lambda = total_word_count * numpy.exp(numpy.log(1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a)) - log_exp_over_doc_zeta_b) 844 | function_hessian_direction_doc_lambda = total_word_count * numpy.exp(-log_exp_over_doc_zeta_b) * ( 845 | 1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a)) 846 | 847 | if self._diagonal_covariance_matrix: 848 | function_hessian_direction_doc_lambda += -direction_vector * self._hessian_direction_approximation_epsilon / self._alpha_sigma; 849 | else: 850 | function_hessian_direction_doc_lambda += -numpy.dot( 851 | direction_vector[numpy.newaxis, :] * self._hessian_direction_approximation_epsilon, 852 | self._alpha_sigma_inv)[0, :] 853 | assert function_hessian_direction_doc_lambda.shape == (self._number_of_topics,); 854 | 855 | function_hessian_direction_doc_lambda /= self._hessian_direction_approximation_epsilon; 856 | 857 | return numpy.asarray(-function_hessian_direction_doc_lambda) 858 | 859 | # 860 | # 861 | # 862 | # 863 | # 864 | 865 | def optimize_doc_nu_square(self, 866 | doc_nu_square, 867 | arguments, 868 | ): 869 | variable_bounds = tuple([(0, None)] * self._number_of_topics); 870 | 871 | optimize_result = scipy.optimize.minimize(self.f_doc_nu_square, 872 | doc_nu_square, 873 | args=arguments, 874 | method=self._scipy_optimization_method, 875 | jac=self.f_prime_doc_nu_square, 876 | hess=self.f_hessian_doc_nu_square, 877 | # hess=None, 878 | hessp=self.f_hessian_direction_doc_nu_square, 879 | bounds=variable_bounds, 880 | constraints=(), 881 | tol=None, 882 | callback=None, 883 | options={'disp': False} 884 | ); 885 | 886 | return optimize_result.x; 887 | 888 | def f_doc_nu_square(self, doc_nu_square, *args): 889 | (doc_lambda, doc_zeta_factor, total_word_count) = args; 890 | 891 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 892 | 893 | exp_over_doc_zeta = scipy.special.logsumexp( 894 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 895 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 896 | 897 | function_doc_nu_square = 0.5 * numpy.sum(numpy.log(doc_nu_square)); 898 | 899 | if self._diagonal_covariance_matrix: 900 | function_doc_nu_square += -0.5 * numpy.sum(doc_nu_square / self._alpha_sigma) 901 | else: 902 | function_doc_nu_square += -0.5 * numpy.sum(doc_nu_square * numpy.diag(self._alpha_sigma_inv)); 903 | 904 | function_doc_nu_square += -total_word_count * numpy.sum(exp_over_doc_zeta); 905 | 906 | return numpy.asscalar(-function_doc_nu_square); 907 | 908 | def f_prime_doc_nu_square(self, doc_nu_square, *args): 909 | (doc_lambda, doc_zeta_factor, total_word_count) = args; 910 | 911 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 912 | 913 | exp_over_doc_zeta = scipy.special.logsumexp( 914 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 915 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 916 | 917 | if self._diagonal_covariance_matrix: 918 | function_prime_doc_nu_square = -0.5 / self._alpha_sigma; 919 | else: 920 | function_prime_doc_nu_square = -0.5 * numpy.diag(self._alpha_sigma_inv); 921 | function_prime_doc_nu_square += 0.5 / doc_nu_square; 922 | function_prime_doc_nu_square -= 0.5 * total_word_count * exp_over_doc_zeta; 923 | 924 | return numpy.asarray(-function_prime_doc_nu_square); 925 | 926 | def f_hessian_doc_nu_square(self, doc_nu_square, *args): 927 | (doc_lambda, doc_zeta_factor, total_word_count) = args; 928 | 929 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 930 | 931 | exp_over_doc_zeta = scipy.special.logsumexp( 932 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 933 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 934 | 935 | function_hessian_doc_nu_square = -0.5 / (doc_nu_square ** 2); 936 | function_hessian_doc_nu_square += -0.25 * total_word_count * exp_over_doc_zeta; 937 | 938 | function_hessian_doc_nu_square = numpy.diag(function_hessian_doc_nu_square); 939 | 940 | assert function_hessian_doc_nu_square.shape == (self._number_of_topics, self._number_of_topics); 941 | 942 | return numpy.asarray(-function_hessian_doc_nu_square); 943 | 944 | def f_hessian_direction_doc_nu_square(self, doc_nu_square, direction_vector, *args): 945 | (doc_lambda, doc_zeta_factor, total_word_count) = args; 946 | 947 | assert direction_vector.shape == (self._number_of_topics,) 948 | 949 | # assert doc_lambda.shape==(self._number_of_topics,) 950 | # assert doc_nu_square.shape==(self._number_of_topics,) 951 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics) 952 | 953 | log_exp_over_doc_zeta_a = scipy.special.logsumexp(doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * ( 954 | doc_nu_square[:, numpy.newaxis] + direction_vector[:, 955 | numpy.newaxis] * self._hessian_direction_approximation_epsilon), 956 | axis=1) 957 | log_exp_over_doc_zeta_b = scipy.special.logsumexp( 958 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 959 | 960 | # function_hessian_direction_doc_nu_square = total_word_count * numpy.exp(numpy.log(1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a)) - log_exp_over_doc_zeta_b) 961 | function_hessian_direction_doc_nu_square = total_word_count * numpy.exp(-log_exp_over_doc_zeta_b) * ( 962 | 1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a)) 963 | 964 | function_hessian_direction_doc_nu_square += 0.5 / ( 965 | doc_nu_square + self._hessian_direction_approximation_epsilon * direction_vector); 966 | function_hessian_direction_doc_nu_square -= 0.5 / (doc_nu_square) 967 | 968 | function_hessian_direction_doc_nu_square /= self._hessian_direction_approximation_epsilon; 969 | 970 | assert function_hessian_direction_doc_nu_square.shape == (self._number_of_topics,); 971 | 972 | return numpy.asarray(-function_hessian_direction_doc_nu_square); 973 | 974 | # 975 | # 976 | # 977 | # 978 | # 979 | 980 | def optimize_doc_nu_square_in_log_space(self, 981 | doc_nu_square, 982 | arguments, 983 | method_name=None 984 | ): 985 | log_doc_nu_square = numpy.log(doc_nu_square); 986 | 987 | optimize_result = scipy.optimize.minimize(self.f_log_doc_nu_square, 988 | log_doc_nu_square, 989 | args=arguments, 990 | method=method_name, 991 | jac=self.f_prime_log_doc_nu_square, 992 | hess=self.f_hessian_log_doc_nu_square, 993 | # hess=None, 994 | hessp=self.f_hessian_direction_log_doc_nu_square, 995 | bounds=None, 996 | constraints=(), 997 | tol=None, 998 | callback=None, 999 | options={'disp': False} 1000 | ); 1001 | 1002 | log_doc_nu_square_update = optimize_result.x 1003 | 1004 | return numpy.exp(log_doc_nu_square_update); 1005 | 1006 | def f_log_doc_nu_square(self, log_doc_nu_square, *args): 1007 | return self.f_doc_nu_square(numpy.exp(log_doc_nu_square), *args); 1008 | 1009 | def f_prime_log_doc_nu_square(self, log_doc_nu_square, *args): 1010 | (doc_lambda, doc_zeta_factor, total_word_count) = args; 1011 | 1012 | assert log_doc_nu_square.shape == (self._number_of_topics,) 1013 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 1014 | 1015 | exp_log_doc_nu_square = numpy.exp(log_doc_nu_square); 1016 | 1017 | exp_over_doc_zeta = scipy.special.logsumexp( 1018 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_log_doc_nu_square[:, numpy.newaxis], axis=1) 1019 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 1020 | 1021 | if self._diagonal_covariance_matrix: 1022 | function_prime_log_doc_nu_square = -0.5 * exp_log_doc_nu_square / self._alpha_sigma; 1023 | else: 1024 | function_prime_log_doc_nu_square = -0.5 * exp_log_doc_nu_square * numpy.diag(self._alpha_sigma_inv); 1025 | function_prime_log_doc_nu_square += 0.5 1026 | function_prime_log_doc_nu_square -= 0.5 * total_word_count * exp_over_doc_zeta * exp_log_doc_nu_square; 1027 | 1028 | assert function_prime_log_doc_nu_square.shape == (self._number_of_topics,); 1029 | 1030 | return numpy.asarray(-function_prime_log_doc_nu_square); 1031 | 1032 | def f_hessian_log_doc_nu_square(self, log_doc_nu_square, *args): 1033 | (doc_lambda, doc_zeta_factor, total_word_count) = args; 1034 | 1035 | assert log_doc_nu_square.shape == (self._number_of_topics,) 1036 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 1037 | 1038 | exp_doc_log_nu_square = numpy.exp(log_doc_nu_square); 1039 | 1040 | exp_over_doc_zeta = scipy.special.logsumexp( 1041 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_doc_log_nu_square[:, numpy.newaxis], axis=1) 1042 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 1043 | 1044 | if self._diagonal_covariance_matrix: 1045 | function_hessian_log_doc_nu_square = -0.5 * exp_doc_log_nu_square / self._alpha_sigma; 1046 | else: 1047 | function_hessian_log_doc_nu_square = -0.5 * exp_doc_log_nu_square * numpy.diag(self._alpha_sigma_inv); 1048 | function_hessian_log_doc_nu_square -= 0.5 * total_word_count * exp_over_doc_zeta * exp_doc_log_nu_square * ( 1049 | 1 + 0.5 * exp_doc_log_nu_square); 1050 | 1051 | function_hessian_log_doc_nu_square = numpy.diag(function_hessian_log_doc_nu_square); 1052 | 1053 | assert function_hessian_log_doc_nu_square.shape == (self._number_of_topics, self._number_of_topics); 1054 | 1055 | return numpy.asarray(-function_hessian_log_doc_nu_square); 1056 | 1057 | def f_hessian_direction_log_doc_nu_square(self, log_doc_nu_square, direction_vector, *args): 1058 | (doc_lambda, doc_zeta_factor, total_word_count) = args; 1059 | 1060 | # assert doc_lambda.shape==(self._number_of_topics,) 1061 | assert log_doc_nu_square.shape == (self._number_of_topics,) 1062 | assert direction_vector.shape == (self._number_of_topics,) 1063 | 1064 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics) 1065 | 1066 | exp_log_doc_nu_square = numpy.exp(log_doc_nu_square); 1067 | exp_log_doc_nu_square_epsilon_direction = numpy.exp( 1068 | log_doc_nu_square + direction_vector * self._hessian_direction_approximation_epsilon); 1069 | 1070 | log_exp_over_doc_zeta_epsilon_direction = scipy.special.logsumexp( 1071 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_log_doc_nu_square_epsilon_direction[:, 1072 | numpy.newaxis], axis=1) 1073 | log_exp_over_doc_zeta = scipy.special.logsumexp( 1074 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_log_doc_nu_square[:, numpy.newaxis], axis=1) 1075 | 1076 | # function_hessian_direction_log_doc_nu_square = 0.5 * total_word_count * numpy.exp(log_doc_nu_square - log_exp_over_doc_zeta) 1077 | # function_hessian_direction_log_doc_nu_square += - 0.5 * total_word_count * numpy.exp(log_doc_nu_square + direction_vector * epsilon - log_exp_over_doc_zeta_epsilon_direction) 1078 | 1079 | function_hessian_direction_log_doc_nu_square = 1 - numpy.exp( 1080 | direction_vector * self._hessian_direction_approximation_epsilon - log_exp_over_doc_zeta_epsilon_direction + log_exp_over_doc_zeta) 1081 | function_hessian_direction_log_doc_nu_square *= 0.5 * total_word_count * numpy.exp( 1082 | log_doc_nu_square - log_exp_over_doc_zeta) 1083 | 1084 | if self._diagonal_covariance_matrix: 1085 | function_hessian_direction_log_doc_nu_square += 0.5 * ( 1086 | exp_log_doc_nu_square - exp_log_doc_nu_square_epsilon_direction) / self._alpha_sigma; 1087 | else: 1088 | function_hessian_direction_log_doc_nu_square += 0.5 * ( 1089 | exp_log_doc_nu_square - exp_log_doc_nu_square_epsilon_direction) * numpy.diag( 1090 | self._alpha_sigma_inv); 1091 | 1092 | function_hessian_direction_log_doc_nu_square /= self._hessian_direction_approximation_epsilon 1093 | 1094 | assert function_hessian_direction_log_doc_nu_square.shape == (self._number_of_topics,); 1095 | 1096 | return numpy.asarray(-function_hessian_direction_log_doc_nu_square); 1097 | 1098 | # 1099 | # 1100 | # 1101 | # 1102 | # 1103 | 1104 | def m_step(self, phi_sufficient_statistics): 1105 | # Note: all terms including E_q[p(\eta|\beta)], i.e., terms involving \Psi(\eta), are cancelled due to \eta updates 1106 | 1107 | # compute the beta terms 1108 | topic_log_likelihood = self._number_of_topics * (scipy.special.gammaln(numpy.sum(self._alpha_beta)) - numpy.sum( 1109 | scipy.special.gammaln(self._alpha_beta))); 1110 | # compute the eta terms 1111 | topic_log_likelihood += numpy.sum( 1112 | numpy.sum(scipy.special.gammaln(self._eta), axis=1) - scipy.special.gammaln(numpy.sum(self._eta, axis=1))); 1113 | 1114 | self._eta = phi_sufficient_statistics + self._alpha_beta 1115 | assert (self._eta.shape == (self._number_of_topics, self._number_of_types)); 1116 | 1117 | return topic_log_likelihood 1118 | 1119 | """ 1120 | """ 1121 | 1122 | def learning(self, number_of_processes=1): 1123 | self._counter += 1; 1124 | 1125 | clock_e_step = time.time(); 1126 | if number_of_processes == 1: 1127 | document_log_likelihood, phi_sufficient_statistics = self.e_step(); 1128 | else: 1129 | document_log_likelihood, phi_sufficient_statistics = self.e_step_process_queue(None, number_of_processes) 1130 | clock_e_step = time.time() - clock_e_step; 1131 | 1132 | clock_m_step = time.time(); 1133 | topic_log_likelihood = self.m_step(phi_sufficient_statistics); 1134 | clock_m_step = time.time() - clock_m_step; 1135 | 1136 | print(document_log_likelihood, topic_log_likelihood); 1137 | joint_log_likelihood = document_log_likelihood + topic_log_likelihood; 1138 | 1139 | print("e_step and m_step of iteration %d finished in %g and %g seconds respectively with log likelihood %g" % ( 1140 | self._counter, clock_e_step, clock_m_step, joint_log_likelihood)) 1141 | 1142 | clock_hyper_opt = time.time(); 1143 | if self._hyper_parameter_optimize_interval > 0 and self._counter % self._hyper_parameter_optimize_interval == 0: 1144 | self.optimize_hyperparameter(); 1145 | clock_hyper_opt = time.time() - clock_hyper_opt; 1146 | print("hyper-parameter optimization of iteration %d finished in %g seconds" % (self._counter, clock_hyper_opt)); 1147 | 1148 | # if abs((joint_log_likelihood - old_likelihood) / old_likelihood) < self._model_converge_threshold: 1149 | # print "model likelihood converged..." 1150 | # break 1151 | # old_likelihood = joint_log_likelihood; 1152 | 1153 | return joint_log_likelihood 1154 | 1155 | def inference(self, corpus): 1156 | parsed_corpus = self.parse_data(corpus); 1157 | number_of_documents = len(parsed_corpus[0]); 1158 | 1159 | clock_e_step = time.time(); 1160 | document_log_likelihood, lambda_values, nu_square_values = self.e_step(parsed_corpus); 1161 | clock_e_step = time.time() - clock_e_step; 1162 | 1163 | return document_log_likelihood, lambda_values, nu_square_values 1164 | 1165 | def optimize_hyperparameter(self): 1166 | assert self._lambda.shape == (self._number_of_documents, self._number_of_topics); 1167 | self._alpha_mu = numpy.mean(self._lambda, axis=0); 1168 | print("update hyper-parameter mu to %s" % self._alpha_mu); 1169 | 1170 | assert self._nu_square.shape == (self._number_of_documents, self._number_of_topics); 1171 | if self._diagonal_covariance_matrix: 1172 | self._alpha_sigma = numpy.mean(self._nu_square + (self._lambda - self._alpha_mu[numpy.newaxis, :]) ** 2, 1173 | axis=0); 1174 | print("update hyper-parameter sigma to %s" % self._alpha_sigma); 1175 | else: 1176 | self._alpha_mu = self._alpha_mu[numpy.newaxis, :]; 1177 | 1178 | assert self._lambda.shape == (self._number_of_documents, self._number_of_topics); 1179 | self._alpha_sigma = numpy.copy(numpy.diag(numpy.mean(self._nu_square, axis=0))); 1180 | adjusted_lambda = self._lambda - self._alpha_mu; 1181 | assert adjusted_lambda.shape == (self._number_of_documents, self._number_of_topics); 1182 | self._alpha_sigma += numpy.dot(adjusted_lambda.T, adjusted_lambda) / self._number_of_documents; 1183 | 1184 | # self._alpha_sigma_inv = scipy.linalg.pinv(self._alpha_sigma); 1185 | self._alpha_sigma_inv = scipy.linalg.inv(self._alpha_sigma); 1186 | print("update hyper-parameter sigma to") 1187 | print("%s" % self._alpha_sigma); 1188 | 1189 | return 1190 | 1191 | """ 1192 | @param alpha_vector: a dict data type represents dirichlet prior, indexed by topic_id 1193 | @param alpha_sufficient_statistics: a dict data type represents alpha sufficient statistics for alpha updating, indexed by topic_id 1194 | """ 1195 | 1196 | def optimize_hyperparameter_old(self): 1197 | assert self._lambda.shape == (self._number_of_documents, self._number_of_topics); 1198 | self._alpha_mu = numpy.mean(self._lambda, axis=0); 1199 | print("update hyper-parameter mu to %s" % self._alpha_mu); 1200 | 1201 | assert self._nu_square.shape == (self._number_of_documents, self._number_of_topics); 1202 | if self._diagonal_covariance_matrix: 1203 | self._alpha_sigma = numpy.mean(self._nu_square + (self._lambda - self._alpha_mu[numpy.newaxis, :]) ** 2, 1204 | axis=0); 1205 | print("update hyper-parameter sigma to %s" % self._alpha_sigma); 1206 | else: 1207 | self._alpha_mu = self._alpha_mu[numpy.newaxis, :]; 1208 | 1209 | self._alpha_sigma = sklearn.covariance.empirical_covariance(self._lambda, assume_centered=True); 1210 | 1211 | # self._alpha_sigma_inv = scipy.linalg.pinv(self._alpha_sigma); 1212 | self._alpha_sigma_inv = scipy.linalg.inv(self._alpha_sigma); 1213 | print("update hyper-parameter sigma to") 1214 | print("%s" % self._alpha_sigma); 1215 | 1216 | return 1217 | 1218 | def export_beta(self, exp_beta_path, top_display=-1): 1219 | output = open(exp_beta_path, 'w'); 1220 | E_log_eta = compute_dirichlet_expectation(self._eta); 1221 | for topic_index in range(self._number_of_topics): 1222 | output.write("==========\t%d\t==========\n" % (topic_index)); 1223 | 1224 | beta_probability = numpy.exp(E_log_eta[topic_index, :] - scipy.special.logsumexp(E_log_eta[topic_index, :])); 1225 | 1226 | i = 0; 1227 | for type_index in reversed(numpy.argsort(beta_probability)): 1228 | i += 1; 1229 | output.write("%s\t%g\n" % (self._index_to_type[type_index], beta_probability[type_index])); 1230 | if top_display > 0 and i >= top_display: 1231 | break; 1232 | 1233 | output.close(); 1234 | 1235 | # 1236 | # 1237 | # 1238 | # 1239 | # 1240 | 1241 | def newton_method_lambda(self, 1242 | doc_lambda, 1243 | doc_nu_square, 1244 | doc_zeta_factor, 1245 | sum_phi, 1246 | total_word_count, 1247 | newton_method_iteration=10, 1248 | newton_method_decay_factor=0.9, 1249 | # newton_method_step_size=0.1, 1250 | eigen_value_tolerance=1e-9 1251 | ): 1252 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 1253 | assert sum_phi.shape == (self._number_of_topics,) 1254 | 1255 | newton_method_power_index = 0; 1256 | for newton_method_iteration_index in range(newton_method_iteration): 1257 | exp_over_doc_zeta = scipy.special.logsumexp( 1258 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 1259 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 1260 | assert exp_over_doc_zeta.shape == (self._number_of_topics,); 1261 | 1262 | if self._diagonal_covariance_matrix: 1263 | first_derivative_lambda = (self._alpha_mu - doc_lambda) / self._alpha_sigma; 1264 | first_derivative_lambda += sum_phi 1265 | first_derivative_lambda -= total_word_count * exp_over_doc_zeta 1266 | else: 1267 | first_derivative_lambda = numpy.dot((self._alpha_mu - doc_lambda[numpy.newaxis, :]), 1268 | self._alpha_sigma_inv) 1269 | assert first_derivative_lambda.shape == (1, self._number_of_topics); 1270 | first_derivative_lambda += sum_phi[numpy.newaxis, :] 1271 | first_derivative_lambda -= total_word_count * exp_over_doc_zeta[numpy.newaxis, :] 1272 | assert first_derivative_lambda.shape == (1, self._number_of_topics); 1273 | 1274 | if self._diagonal_covariance_matrix: 1275 | second_derivative_lambda = -1.0 / self._alpha_sigma 1276 | second_derivative_lambda -= total_word_count * exp_over_doc_zeta 1277 | else: 1278 | second_derivative_lambda = -self._alpha_sigma_inv; 1279 | assert second_derivative_lambda.shape == (self._number_of_topics, self._number_of_topics); 1280 | second_derivative_lambda -= total_word_count * numpy.diag(exp_over_doc_zeta) 1281 | assert second_derivative_lambda.shape == (self._number_of_topics, self._number_of_topics); 1282 | 1283 | if self._diagonal_covariance_matrix: 1284 | if not numpy.all(second_derivative_lambda) > 0: 1285 | sys.stderr.write("Hessian matrix is not positive definite: %s\n" % second_derivative_lambda) 1286 | break; 1287 | else: 1288 | pass 1289 | ''' 1290 | print "%s" % second_derivative_lambda; 1291 | E_vector, V_matrix = scipy.linalg.eigh(second_derivative_lambda); 1292 | while not numpy.all(E_vector>eigen_value_tolerance): 1293 | second_derivative_lambda += numpy.eye(self._number_of_topics); 1294 | E_vector, V_matrix = scipy.linalg.eigh(second_derivative_lambda); 1295 | print "%s" % E_vector 1296 | ''' 1297 | 1298 | if self._diagonal_covariance_matrix: 1299 | step_change = first_derivative_lambda / second_derivative_lambda; 1300 | else: 1301 | # step_change = numpy.dot(first_derivative_lambda, numpy.linalg.pinv(second_derivative_lambda))[0, :] 1302 | step_change = numpy.dot(first_derivative_lambda, scipy.linalg.pinv(second_derivative_lambda))[0, :] 1303 | 1304 | # step_change *= newton_method_step_size; 1305 | step_change /= numpy.sqrt(numpy.sum(step_change ** 2)); 1306 | 1307 | # if numpy.any(numpy.isnan(step_change)) or numpy.any(numpy.isinf(step_change)): 1308 | # break; 1309 | 1310 | step_alpha = numpy.power(newton_method_decay_factor, newton_method_power_index); 1311 | 1312 | doc_lambda -= step_alpha * step_change; 1313 | assert doc_lambda.shape == (self._number_of_topics,); 1314 | 1315 | # if numpy.all(numpy.abs(step_change) <= local_parameter_converge_threshold): 1316 | # break; 1317 | 1318 | # print "update lambda to %s" % (doc_lambda) 1319 | 1320 | return doc_lambda; 1321 | 1322 | def newton_method_nu_square(self, 1323 | doc_lambda, 1324 | doc_nu_square, 1325 | doc_zeta_factor, 1326 | total_word_count, 1327 | newton_method_iteration=10, 1328 | newton_method_decay_factor=0.9, 1329 | # newton_method_step_size=0.1, 1330 | eigen_value_tolerance=1e-9 1331 | ): 1332 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 1333 | 1334 | newton_method_power_index = 0; 1335 | for newton_method_iteration_index in range(newton_method_iteration): 1336 | # print doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis] 1337 | # exp_over_doc_zeta = 1.0 / numpy.sum(numpy.exp(doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis]), axis=1); 1338 | # print scipy.special.logsumexp(doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 1339 | # exp_over_doc_zeta = numpy.exp(-scipy.special.logsumexp(doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1)); 1340 | exp_over_doc_zeta = scipy.special.logsumexp( 1341 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 1342 | # exp_over_doc_zeta = numpy.clip(exp_over_doc_zeta, -10, +10); 1343 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 1344 | 1345 | if self._diagonal_covariance_matrix: 1346 | first_derivative_nu_square = -0.5 / self._alpha_sigma; 1347 | else: 1348 | first_derivative_nu_square = -0.5 * numpy.diag(self._alpha_sigma_inv); 1349 | first_derivative_nu_square += 0.5 / doc_nu_square; 1350 | # first_derivative_nu_square -= 0.5 * (total_word_count / doc_zeta) * numpy.exp(doc_lambda+0.5*doc_nu_square) 1351 | first_derivative_nu_square -= 0.5 * total_word_count * exp_over_doc_zeta; 1352 | 1353 | second_derivative_nu_square = -0.5 / (doc_nu_square ** 2); 1354 | # second_derivative_nu_square += -0.25 * (total_word_count / doc_zeta) * numpy.exp(doc_lambda+0.5*doc_nu_square); 1355 | second_derivative_nu_square += -0.25 * total_word_count * exp_over_doc_zeta; 1356 | 1357 | if self._diagonal_covariance_matrix: 1358 | if not numpy.all(second_derivative_nu_square) > 0: 1359 | print("Hessian matrix is not positive definite: ", second_derivative_nu_square) 1360 | break; 1361 | else: 1362 | pass 1363 | ''' 1364 | print "%s" % second_derivative_nu_square; 1365 | E_vector, V_matrix = scipy.linalg.eigh(second_derivative_nu_square); 1366 | while not numpy.all(E_vector>eigen_value_tolerance): 1367 | second_derivative_nu_square += numpy.eye(self._number_of_topics); 1368 | E_vector, V_matrix = scipy.linalg.eigh(second_derivative_nu_square); 1369 | print "%s" % E_vector 1370 | ''' 1371 | 1372 | step_change = first_derivative_nu_square / second_derivative_nu_square; 1373 | 1374 | # step_change *= newton_method_step_size; 1375 | step_change /= numpy.sqrt(numpy.sum(step_change ** 2)); 1376 | 1377 | step_alpha = numpy.power(newton_method_decay_factor, newton_method_power_index); 1378 | while numpy.any(doc_nu_square <= step_alpha * step_change): 1379 | newton_method_power_index += 1; 1380 | step_alpha = numpy.power(newton_method_decay_factor, newton_method_power_index); 1381 | 1382 | doc_nu_square -= step_alpha * step_change; 1383 | 1384 | assert numpy.all(doc_nu_square > 0), ( 1385 | doc_nu_square, step_change, first_derivative_nu_square, second_derivative_nu_square); 1386 | 1387 | return doc_nu_square; 1388 | 1389 | def newton_method_nu_square_in_log_space(self, 1390 | doc_lambda, 1391 | doc_nu_square, 1392 | doc_zeta_factor, 1393 | total_word_count, 1394 | newton_method_iteration=10, 1395 | newton_method_decay_factor=0.9, 1396 | newton_method_step_size=0.1, 1397 | eigen_value_tolerance=1e-9 1398 | ): 1399 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 1400 | 1401 | doc_log_nu_square = numpy.log(doc_nu_square); 1402 | exp_doc_log_nu_square = numpy.exp(doc_log_nu_square); 1403 | 1404 | newton_method_power_index = 0; 1405 | for newton_method_iteration_index in range(newton_method_iteration): 1406 | log_exp_over_doc_zeta_combine = scipy.special.logsumexp( 1407 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_doc_log_nu_square[:, 1408 | numpy.newaxis] - doc_log_nu_square[:, 1409 | numpy.newaxis], axis=1) 1410 | exp_over_doc_zeta_combine = numpy.exp(-log_exp_over_doc_zeta_combine); 1411 | 1412 | if self._diagonal_covariance_matrix: 1413 | first_derivative_log_nu_square = -0.5 / self._alpha_sigma * exp_doc_log_nu_square; 1414 | else: 1415 | first_derivative_log_nu_square = -0.5 * numpy.diag(self._alpha_sigma_inv) * exp_doc_log_nu_square; 1416 | first_derivative_log_nu_square += 0.5 1417 | first_derivative_log_nu_square += -0.5 * total_word_count * exp_over_doc_zeta_combine 1418 | 1419 | if self._diagonal_covariance_matrix: 1420 | second_derivative_log_nu_square = -0.5 / self._alpha_sigma * exp_doc_log_nu_square; 1421 | else: 1422 | second_derivative_log_nu_square = -0.5 * numpy.diag(self._alpha_sigma_inv) * exp_doc_log_nu_square; 1423 | second_derivative_log_nu_square += -0.5 * total_word_count * exp_over_doc_zeta_combine * ( 1424 | 1 + 0.5 * exp_doc_log_nu_square); 1425 | 1426 | step_change = first_derivative_log_nu_square / second_derivative_log_nu_square; 1427 | 1428 | # step_change *= newton_method_step_size; 1429 | step_change /= numpy.sqrt(numpy.sum(step_change ** 2)); 1430 | 1431 | # if numpy.any(numpy.isnan(step_change)) or numpy.any(numpy.isinf(step_change)): 1432 | # break; 1433 | 1434 | step_alpha = numpy.power(newton_method_decay_factor, newton_method_power_index); 1435 | 1436 | doc_log_nu_square -= step_alpha * step_change; 1437 | exp_doc_log_nu_square = numpy.exp(doc_log_nu_square); 1438 | 1439 | # if numpy.all(numpy.abs(step_change) <= local_parameter_converge_threshold): 1440 | # break; 1441 | 1442 | # print "update nu to %s" % (doc_nu_square) 1443 | 1444 | doc_nu_square = numpy.exp(doc_log_nu_square); 1445 | 1446 | return doc_nu_square; 1447 | 1448 | # 1449 | # 1450 | # 1451 | # 1452 | # 1453 | 1454 | def hessian_free_lambda(self, 1455 | doc_lambda, 1456 | doc_nu_square, 1457 | doc_zeta_factor, 1458 | sum_phi, 1459 | total_word_count, 1460 | hessian_free_iteration=10, 1461 | hessian_free_threshold=1e-9, 1462 | ): 1463 | for hessian_free_iteration_index in range(hessian_free_iteration): 1464 | delta_doc_lambda = self.conjugate_gradient_delta_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi, 1465 | total_word_count, self._number_of_topics); 1466 | 1467 | # delta_doc_lambda /= numpy.sqrt(numpy.sum(delta_doc_lambda**2)); 1468 | 1469 | # print "check point 2", numpy.sum(delta_doc_lambda) 1470 | # print delta_doc_lambda 1471 | 1472 | doc_lambda += delta_doc_lambda; 1473 | 1474 | return doc_lambda; 1475 | 1476 | def conjugate_gradient_delta_lambda(self, 1477 | doc_lambda, 1478 | doc_nu_square, 1479 | doc_zeta_factor, 1480 | sum_phi, 1481 | total_word_count, 1482 | 1483 | conjugate_gradient_iteration=100, 1484 | conjugate_gradient_threshold=1e-9, 1485 | precondition_hessian_matrix=True 1486 | ): 1487 | # delta_doc_lambda = numpy.random.random(self._number_of_topics); 1488 | delta_doc_lambda = numpy.zeros(self._number_of_topics); 1489 | # delta_doc_lambda = numpy.ones(self._number_of_topics); 1490 | 1491 | if precondition_hessian_matrix: 1492 | hessian_lambda = self.second_derivative_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, 1493 | total_word_count); 1494 | if not numpy.all(numpy.isfinite(hessian_lambda)): 1495 | return numpy.zeros(self._number_of_topics); 1496 | M_inverse = 1.0 / numpy.diag(hessian_lambda); 1497 | # print numpy.linalg.cond(hessian_lambda), ">>>", numpy.linalg.cond(numpy.dot(numpy.diag(1.0/numpy.diag(hessian_lambda)), hessian_lambda)), ">>>", numpy.linalg.cond(numpy.dot(numpy.linalg.cholesky(hessian_lambda), hessian_lambda)); 1498 | 1499 | r_vector = -self.first_derivative_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi, total_word_count); 1500 | r_vector -= self.hessian_damping_direction_approximation_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, 1501 | sum_phi, total_word_count, delta_doc_lambda) 1502 | 1503 | if precondition_hessian_matrix: 1504 | z_vector = M_inverse * r_vector; 1505 | else: 1506 | z_vector = numpy.copy(r_vector); 1507 | 1508 | p_vector = numpy.copy(z_vector); 1509 | r_z_vector_square_old = numpy.sum(r_vector * z_vector); 1510 | 1511 | for conjugate_gradient_iteration_index in range(conjugate_gradient_iteration): 1512 | # hessian_p_vector = self.hessian_direction_approximation_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, total_word_count, p_vector); 1513 | hessian_p_vector = self.hessian_damping_direction_approximation_lambda(doc_lambda, doc_nu_square, 1514 | doc_zeta_factor, sum_phi, 1515 | total_word_count, p_vector); 1516 | 1517 | alpha_value = r_z_vector_square_old / numpy.sum(p_vector * hessian_p_vector); 1518 | 1519 | delta_doc_lambda += alpha_value * p_vector; 1520 | 1521 | r_vector -= alpha_value * hessian_p_vector; 1522 | 1523 | if numpy.sqrt(numpy.sum(r_vector ** 2)) <= conjugate_gradient_threshold: 1524 | break; 1525 | 1526 | if precondition_hessian_matrix: 1527 | z_vector = M_inverse * r_vector; 1528 | else: 1529 | z_vector = numpy.copy(r_vector); 1530 | 1531 | r_z_vector_square_new = numpy.sum(r_vector * z_vector); 1532 | 1533 | p_vector *= r_z_vector_square_new / r_z_vector_square_old; 1534 | 1535 | p_vector += z_vector 1536 | 1537 | r_z_vector_square_old = r_z_vector_square_new; 1538 | 1539 | return delta_doc_lambda; 1540 | 1541 | def function_lambda(self, doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi, total_word_count): 1542 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 1543 | assert sum_phi.shape == (self._number_of_topics,) 1544 | 1545 | exp_over_doc_zeta = scipy.special.logsumexp( 1546 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 1547 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 1548 | 1549 | function_lambda = numpy.sum(sum_phi * doc_lambda); 1550 | 1551 | if self._diagonal_covariance_matrix: 1552 | mean_adjustment = doc_lambda - self._alpha_mu 1553 | assert mean_adjustment.shape == (self._number_of_topics,); 1554 | function_lambda += -0.5 * numpy.sum((mean_adjustment ** 2) / self._alpha_sigma) 1555 | else: 1556 | mean_adjustment = doc_lambda[numpy.newaxis, :] - self._alpha_mu; 1557 | assert mean_adjustment.shape == (1, self._number_of_topics); 1558 | function_lambda += -0.5 * numpy.dot(numpy.dot(mean_adjustment, self._alpha_sigma_inv), mean_adjustment.T) 1559 | 1560 | function_lambda += -total_word_count * numpy.sum(exp_over_doc_zeta); 1561 | 1562 | return function_lambda; 1563 | 1564 | def first_derivative_lambda(self, doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi, total_word_count): 1565 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 1566 | assert sum_phi.shape == (self._number_of_topics,) 1567 | 1568 | exp_over_doc_zeta = scipy.special.logsumexp( 1569 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 1570 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 1571 | assert exp_over_doc_zeta.shape == (self._number_of_topics,); 1572 | 1573 | if self._diagonal_covariance_matrix: 1574 | first_derivative_lambda = (self._alpha_mu - doc_lambda) / self._alpha_sigma; 1575 | else: 1576 | first_derivative_lambda = numpy.dot((self._alpha_mu - doc_lambda[numpy.newaxis, :]), self._alpha_sigma_inv)[ 1577 | 0, :] 1578 | 1579 | first_derivative_lambda += sum_phi; 1580 | first_derivative_lambda -= total_word_count * exp_over_doc_zeta; 1581 | assert first_derivative_lambda.shape == (self._number_of_topics,); 1582 | 1583 | return first_derivative_lambda 1584 | 1585 | def second_derivative_lambda(self, doc_lambda, doc_nu_square, doc_zeta_factor, total_word_count): 1586 | exp_over_doc_zeta = scipy.special.logsumexp( 1587 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 1588 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 1589 | 1590 | if self._diagonal_covariance_matrix: 1591 | second_derivative_lambda = -1.0 / self._alpha_sigma 1592 | second_derivative_lambda -= total_word_count * exp_over_doc_zeta; 1593 | else: 1594 | second_derivative_lambda = -self._alpha_sigma_inv; 1595 | assert second_derivative_lambda.shape == (self._number_of_topics, self._number_of_topics); 1596 | second_derivative_lambda -= total_word_count * numpy.diag(exp_over_doc_zeta); 1597 | assert second_derivative_lambda.shape == (self._number_of_topics, self._number_of_topics); 1598 | 1599 | return second_derivative_lambda 1600 | 1601 | def hessian_direction_approximation_lambda(self, doc_lambda, doc_nu_square, doc_zeta_factor, total_word_count, 1602 | direction_vector, epsilon=1e-6): 1603 | assert doc_lambda.shape == (self._number_of_topics,) 1604 | assert doc_nu_square.shape == (self._number_of_topics,) 1605 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics) 1606 | assert direction_vector.shape == (self._number_of_topics,) 1607 | 1608 | log_exp_over_doc_zeta_a = scipy.special.logsumexp( 1609 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - direction_vector[:, 1610 | numpy.newaxis] * epsilon - 0.5 * doc_nu_square[:, 1611 | numpy.newaxis], axis=1) 1612 | log_exp_over_doc_zeta_b = scipy.special.logsumexp( 1613 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 1614 | assert log_exp_over_doc_zeta_a.shape == (self._number_of_topics,) 1615 | assert log_exp_over_doc_zeta_b.shape == (self._number_of_topics,) 1616 | 1617 | # hessian_direction_lambda = total_word_count * numpy.exp(numpy.log(1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a)) - log_exp_over_doc_zeta_b) 1618 | hessian_direction_lambda = total_word_count * numpy.exp(-log_exp_over_doc_zeta_b) * ( 1619 | 1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a)) 1620 | 1621 | if self._diagonal_covariance_matrix: 1622 | hessian_direction_lambda = -direction_vector * epsilon / self._alpha_sigma; 1623 | else: 1624 | hessian_direction_lambda += -numpy.dot(direction_vector[numpy.newaxis, :] * epsilon, self._alpha_sigma_inv)[ 1625 | 0, :] 1626 | assert hessian_direction_lambda.shape == (self._number_of_topics,); 1627 | 1628 | hessian_direction_lambda /= epsilon; 1629 | 1630 | return hessian_direction_lambda 1631 | 1632 | def hessian_damping_direction_approximation_lambda(self, doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi, 1633 | total_word_count, direction_vector, 1634 | damping_factor_initialization=0.1, damping_factor_iteration=10): 1635 | damping_factor_numerator = self.function_lambda(doc_lambda + direction_vector, doc_nu_square, doc_zeta_factor, 1636 | sum_phi, total_word_count); 1637 | damping_factor_numerator -= self.function_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, sum_phi, 1638 | total_word_count); 1639 | 1640 | hessian_direction_approximation = self.hessian_direction_approximation_lambda(doc_lambda, doc_nu_square, 1641 | doc_zeta_factor, total_word_count, 1642 | direction_vector); 1643 | 1644 | damping_factor_denominator_temp = self.first_derivative_lambda(doc_lambda, doc_nu_square, doc_zeta_factor, 1645 | sum_phi, total_word_count); 1646 | assert damping_factor_denominator_temp.shape == (self._number_of_topics,); 1647 | damping_factor_denominator_temp += 0.5 * hessian_direction_approximation; 1648 | assert damping_factor_denominator_temp.shape == (self._number_of_topics,); 1649 | 1650 | damping_factor_lambda = damping_factor_initialization; 1651 | for damping_factor_iteration_index in range(damping_factor_iteration): 1652 | damping_factor_denominator = damping_factor_denominator_temp + 0.5 * damping_factor_lambda * direction_vector; 1653 | assert damping_factor_denominator.shape == (self._number_of_topics,); 1654 | damping_factor_denominator *= direction_vector 1655 | damping_factor_denominator = numpy.sum(damping_factor_denominator); 1656 | 1657 | damping_factor_rho = damping_factor_numerator / damping_factor_denominator 1658 | if damping_factor_rho < 0.25: 1659 | damping_factor_lambda *= 1.5 1660 | elif damping_factor_rho > 0.75: 1661 | damping_factor_lambda /= 1.5 1662 | else: 1663 | return hessian_direction_approximation + damping_factor_lambda * direction_vector; 1664 | 1665 | return hessian_direction_approximation 1666 | 1667 | # 1668 | # 1669 | # 1670 | # 1671 | # 1672 | 1673 | def hessian_free_nu_square(self, 1674 | doc_lambda, 1675 | doc_nu_square, 1676 | doc_zeta_factor, 1677 | total_word_count, 1678 | hessian_free_iteration=10, 1679 | hessian_free_decay_factor=0.9, 1680 | hessian_free_reset_interval=100 1681 | ): 1682 | for hessian_free_iteration_index in range(hessian_free_iteration): 1683 | delta_doc_nu_square = self.conjugate_gradient_delta_nu_square(doc_lambda, doc_nu_square, doc_zeta_factor, 1684 | total_word_count, self._number_of_topics); 1685 | 1686 | # delta_doc_nu_square /= numpy.sqrt(numpy.sum(delta_doc_nu_square**2)); 1687 | 1688 | conjugate_gradient_power_index = 0 1689 | step_alpha = numpy.power(hessian_free_decay_factor, conjugate_gradient_power_index); 1690 | while numpy.any(doc_nu_square + step_alpha * delta_doc_nu_square <= 0): 1691 | conjugate_gradient_power_index += 1; 1692 | step_alpha = numpy.power(hessian_free_decay_factor, conjugate_gradient_power_index); 1693 | if conjugate_gradient_power_index >= hessian_free_reset_interval: 1694 | print("power index larger than 100", delta_doc_nu_square) 1695 | step_alpha = 0; 1696 | break 1697 | 1698 | doc_nu_square += step_alpha * delta_doc_nu_square; 1699 | assert numpy.all(doc_nu_square > 0); 1700 | 1701 | return doc_nu_square; 1702 | 1703 | def conjugate_gradient_delta_nu_square(self, 1704 | doc_lambda, 1705 | doc_nu_square, 1706 | doc_zeta_factor, 1707 | total_word_count, 1708 | 1709 | conjugate_gradient_iteration=100, 1710 | conjugate_gradient_threshold=1e-6, 1711 | conjugate_gradient_decay_factor=0.9, 1712 | conjugate_gradient_reset_interval=100, 1713 | ): 1714 | doc_nu_square_copy = numpy.copy(doc_nu_square); 1715 | # delta_doc_nu_square = numpy.ones(self._number_of_topics); 1716 | delta_doc_nu_square = numpy.zeros(self._number_of_topics); 1717 | # delta_doc_nu_square = numpy.random.random(self._number_of_topics); 1718 | 1719 | r_vector = -self.first_derivative_nu_square(doc_lambda, doc_nu_square_copy, doc_zeta_factor, total_word_count); 1720 | # r_vector -= self.hessian_direction_approximation_nu_square(doc_lambda, doc_nu_square_copy, doc_zeta_factor, total_word_count, delta_doc_nu_square, damping_coefficient=1); 1721 | r_vector -= self.hessian_damping_direction_approximation_nu_square(doc_lambda, doc_nu_square_copy, 1722 | doc_zeta_factor, total_word_count, 1723 | delta_doc_nu_square); 1724 | 1725 | p_vector = numpy.copy(r_vector); 1726 | 1727 | r_vector_square_old = numpy.sum(r_vector ** 2); 1728 | 1729 | for conjugate_gradient_iteration_index in range(conjugate_gradient_iteration): 1730 | assert not numpy.any(numpy.isnan(doc_lambda)) 1731 | assert not numpy.any(numpy.isnan(doc_nu_square_copy)) 1732 | assert not numpy.any(numpy.isnan(doc_zeta_factor)) 1733 | assert not numpy.any(numpy.isnan(p_vector)); 1734 | 1735 | # hessian_p_vector = self.hessian_direction_approximation_nu_square(doc_lambda, doc_nu_square_copy, doc_zeta_factor, total_word_count, p_vector, damping_coefficient=1); 1736 | hessian_p_vector = self.hessian_damping_direction_approximation_nu_square(doc_lambda, doc_nu_square_copy, 1737 | doc_zeta_factor, total_word_count, 1738 | p_vector); 1739 | assert not numpy.any(numpy.isnan(hessian_p_vector)) 1740 | 1741 | alpha_value = r_vector_square_old / numpy.sum(p_vector * hessian_p_vector); 1742 | assert not numpy.isnan(alpha_value), (r_vector_square_old, numpy.sum(p_vector * hessian_p_vector)) 1743 | 1744 | ''' 1745 | conjugate_gradient_power_index = 0 1746 | step_alpha = numpy.power(conjugate_gradient_decay_factor, conjugate_gradient_power_index); 1747 | while numpy.any(delta_doc_nu_square <= -step_alpha * alpha_value * p_vector): 1748 | conjugate_gradient_power_index += 1; 1749 | step_alpha = numpy.power(conjugate_gradient_decay_factor, conjugate_gradient_power_index); 1750 | if conjugate_gradient_power_index>=100: 1751 | print "power index larger than 100", delta_doc_nu_square, alpha_value * p_vector 1752 | break; 1753 | 1754 | delta_doc_nu_square += step_alpha * alpha_value * p_vector; 1755 | assert not numpy.any(numpy.isnan(delta_doc_nu_square)) 1756 | ''' 1757 | 1758 | # p_vector /= numpy.sqrt(numpy.sum(p_vector**2)); 1759 | 1760 | delta_doc_nu_square += alpha_value * p_vector; 1761 | assert not numpy.any(numpy.isnan(delta_doc_nu_square)), (alpha_value, p_vector) 1762 | 1763 | ''' 1764 | if conjugate_gradient_iteration_index % conjugate_gradient_reset_interval==0: 1765 | r_vector = -self.first_derivative_nu_square(doc_lambda, doc_nu_square_copy, doc_zeta_factor, total_word_count); 1766 | r_vector -= self.hessian_direction_approximation_nu_square(doc_lambda, doc_nu_square_copy, doc_zeta_factor, total_word_count, delta_doc_nu_square); 1767 | else: 1768 | r_vector -= alpha_value * hessian_p_vector; 1769 | ''' 1770 | r_vector -= alpha_value * hessian_p_vector; 1771 | assert not numpy.any(numpy.isnan(r_vector)) 1772 | 1773 | r_vector_square_new = numpy.sum(r_vector ** 2); 1774 | assert not numpy.isnan(r_vector_square_new); 1775 | 1776 | if numpy.sqrt(r_vector_square_new) <= conjugate_gradient_threshold: 1777 | break; 1778 | 1779 | p_vector *= r_vector_square_new / r_vector_square_old; 1780 | assert not numpy.any(numpy.isnan(p_vector)) 1781 | p_vector += r_vector; 1782 | assert not numpy.any(numpy.isnan(p_vector)) 1783 | 1784 | r_vector_square_old = r_vector_square_new; 1785 | 1786 | return delta_doc_nu_square; 1787 | 1788 | def function_nu_square(self, doc_lambda, doc_nu_square, doc_zeta_factor, total_word_count): 1789 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 1790 | 1791 | exp_over_doc_zeta = scipy.special.logsumexp( 1792 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 1793 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 1794 | 1795 | function_nu_square = 0.5 * numpy.sum(numpy.log(doc_nu_square)); 1796 | 1797 | if self._diagonal_covariance_matrix: 1798 | function_nu_square += -0.5 * numpy.sum(doc_nu_square / self._alpha_sigma) 1799 | else: 1800 | function_nu_square += -0.5 * numpy.sum(doc_nu_square * numpy.diag(self._alpha_sigma_inv)); 1801 | 1802 | function_nu_square += -total_word_count * numpy.sum(exp_over_doc_zeta); 1803 | 1804 | return function_nu_square; 1805 | 1806 | def first_derivative_nu_square(self, doc_lambda, doc_nu_square, doc_zeta_factor, total_word_count): 1807 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 1808 | 1809 | exp_over_doc_zeta = scipy.special.logsumexp( 1810 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 1811 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 1812 | 1813 | if self._diagonal_covariance_matrix: 1814 | first_derivative_nu_square = -0.5 / self._alpha_sigma; 1815 | else: 1816 | first_derivative_nu_square = -0.5 * numpy.diag(self._alpha_sigma_inv); 1817 | first_derivative_nu_square += 0.5 / doc_nu_square; 1818 | first_derivative_nu_square -= 0.5 * total_word_count * exp_over_doc_zeta; 1819 | 1820 | return first_derivative_nu_square; 1821 | 1822 | def second_derivative_nu_square(self, doc_lambda, doc_nu_square, doc_zeta_factor, total_word_count): 1823 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 1824 | 1825 | exp_over_doc_zeta = scipy.special.logsumexp( 1826 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 1827 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 1828 | 1829 | second_derivative_nu_square = -0.5 / (doc_nu_square ** 2); 1830 | second_derivative_nu_square += -0.25 * total_word_count * exp_over_doc_zeta; 1831 | 1832 | return second_derivative_nu_square 1833 | 1834 | def hessian_direction_approximation_nu_square(self, doc_lambda, doc_nu_square, doc_zeta_factor, total_word_count, 1835 | direction_vector, epsilon=1e-6): 1836 | assert doc_lambda.shape == (self._number_of_topics,) 1837 | assert doc_nu_square.shape == (self._number_of_topics,) 1838 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics) 1839 | assert direction_vector.shape == (self._number_of_topics,) 1840 | 1841 | log_exp_over_doc_zeta_a = scipy.special.logsumexp(doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * ( 1842 | doc_nu_square[:, numpy.newaxis] + direction_vector[:, numpy.newaxis] * epsilon), axis=1) 1843 | log_exp_over_doc_zeta_b = scipy.special.logsumexp( 1844 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * doc_nu_square[:, numpy.newaxis], axis=1) 1845 | 1846 | # hessian_direction_nu_square = total_word_count * numpy.exp(numpy.log(1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a)) - log_exp_over_doc_zeta_b) 1847 | hessian_direction_nu_square = total_word_count * numpy.exp(-log_exp_over_doc_zeta_b) * ( 1848 | 1 - numpy.exp(log_exp_over_doc_zeta_b - log_exp_over_doc_zeta_a)) 1849 | 1850 | hessian_direction_nu_square += 0.5 / (doc_nu_square + epsilon * direction_vector); 1851 | hessian_direction_nu_square -= 0.5 / (doc_nu_square) 1852 | 1853 | hessian_direction_nu_square /= epsilon 1854 | 1855 | return hessian_direction_nu_square 1856 | 1857 | def hessian_damping_direction_approximation_nu_square(self, doc_lambda, doc_nu_square, doc_zeta_factor, 1858 | total_word_count, direction_vector, 1859 | damping_factor_initialization=0.1, 1860 | damping_factor_iteration=10): 1861 | damping_factor_numerator = self.function_nu_square(doc_lambda, doc_nu_square + direction_vector, 1862 | doc_zeta_factor, total_word_count); 1863 | damping_factor_numerator -= self.function_nu_square(doc_lambda, doc_nu_square, doc_zeta_factor, 1864 | total_word_count); 1865 | 1866 | hessian_direction_approximation = self.hessian_direction_approximation_nu_square(doc_lambda, doc_nu_square, 1867 | doc_zeta_factor, 1868 | total_word_count, 1869 | direction_vector); 1870 | 1871 | damping_factor_denominator_temp = self.first_derivative_nu_square(doc_lambda, doc_nu_square, doc_zeta_factor, 1872 | total_word_count); 1873 | assert damping_factor_denominator_temp.shape == (self._number_of_topics,); 1874 | damping_factor_denominator_temp += 0.5 * hessian_direction_approximation; 1875 | assert damping_factor_denominator_temp.shape == (self._number_of_topics,); 1876 | 1877 | damping_factor_lambda = damping_factor_initialization; 1878 | for damping_factor_iteration_index in range(damping_factor_iteration): 1879 | damping_factor_denominator = damping_factor_denominator_temp + 0.5 * damping_factor_lambda * direction_vector; 1880 | assert damping_factor_denominator.shape == (self._number_of_topics,); 1881 | damping_factor_denominator *= direction_vector 1882 | damping_factor_denominator = numpy.sum(damping_factor_denominator); 1883 | 1884 | damping_factor_rho = damping_factor_numerator / damping_factor_denominator 1885 | if damping_factor_rho < 0.25: 1886 | damping_factor_lambda *= 1.5 1887 | elif damping_factor_rho > 0.75: 1888 | damping_factor_lambda /= 1.5 1889 | else: 1890 | return hessian_direction_approximation + damping_factor_lambda * direction_vector; 1891 | 1892 | return hessian_direction_approximation 1893 | 1894 | # 1895 | # 1896 | # 1897 | # 1898 | # 1899 | 1900 | def hessian_free_nu_square_in_log_space(self, 1901 | doc_lambda, 1902 | doc_nu_square, 1903 | doc_zeta_factor, 1904 | total_word_count, 1905 | 1906 | hessian_free_iteration=10, 1907 | conjugate_gradient_threshold=1e-9, 1908 | conjugate_gradient_reset_interval=100, 1909 | ): 1910 | for hessian_free_iteration_index in range(hessian_free_iteration): 1911 | delta_doc_log_nu_square = self.conjugate_gradient_delta_log_nu_square(doc_lambda, doc_nu_square, 1912 | doc_zeta_factor, total_word_count, 1913 | self._number_of_topics); 1914 | 1915 | # print "check point 1", numpy.sum(numpy.exp(delta_doc_log_nu_square)**2), numpy.sum(delta_doc_log_nu_square**2) 1916 | # print numpy.sum(numpy.exp(delta_doc_log_nu_square)**2), numpy.exp(delta_doc_log_nu_square); 1917 | 1918 | # delta_doc_log_nu_square /= numpy.sqrt(numpy.sum(delta_doc_log_nu_square**2)); 1919 | 1920 | doc_nu_square *= numpy.exp(delta_doc_log_nu_square); 1921 | 1922 | return doc_nu_square; 1923 | 1924 | ''' 1925 | nu_square must be greater than 0, conjugate gradient does not perform very well on constrained optimization problem 1926 | update nu_square in log scale, convert the constrained optimization problem to an unconstrained optimization 1927 | ''' 1928 | 1929 | def conjugate_gradient_delta_log_nu_square(self, 1930 | doc_lambda, 1931 | doc_nu_square, 1932 | doc_zeta_factor, 1933 | total_word_count, 1934 | 1935 | conjugate_gradient_iteration=100, 1936 | conjugate_gradient_threshold=1e-9, 1937 | conjugate_gradient_reset_interval=100, 1938 | precondition_hessian_matrix=True, 1939 | ): 1940 | doc_log_nu_square = numpy.log(doc_nu_square); 1941 | # delta_doc_log_nu_square = numpy.random.random(self._number_of_topics); 1942 | delta_doc_log_nu_square = numpy.zeros(self._number_of_topics); 1943 | # delta_doc_log_nu_square = numpy.log(doc_nu_square); 1944 | 1945 | if precondition_hessian_matrix: 1946 | hessian_log_nu_square = self.second_derivative_log_nu_square(doc_lambda, doc_log_nu_square, doc_zeta_factor, 1947 | total_word_count); 1948 | if not numpy.all(numpy.isfinite(hessian_log_nu_square)): 1949 | return numpy.zeros(self._number_of_topics); 1950 | M_inverse = 1.0 / hessian_log_nu_square; 1951 | # print numpy.linalg.cond(hessian_log_nu_square), ">>>", numpy.linalg.cond(numpy.dot(numpy.diag(1.0/numpy.diag(hessian_log_nu_square)), hessian_log_nu_square)), ">>>", numpy.linalg.cond(numpy.dot(numpy.linalg.cholesky(hessian_log_nu_square), hessian_log_nu_square)); 1952 | 1953 | r_vector = -self.first_derivative_log_nu_square(doc_lambda, doc_log_nu_square, doc_zeta_factor, 1954 | total_word_count); 1955 | # r_vector -= self.hessian_direction_approximation_log_nu_square(doc_lambda, log_doc_nu_square, doc_zeta_factor, total_word_count, delta_doc_log_nu_square); 1956 | r_vector -= self.hessian_damping_direction_approximation_log_nu_square(doc_lambda, doc_log_nu_square, 1957 | doc_zeta_factor, total_word_count, 1958 | delta_doc_log_nu_square); 1959 | 1960 | if precondition_hessian_matrix: 1961 | z_vector = M_inverse * r_vector; 1962 | else: 1963 | z_vector = numpy.copy(r_vector); 1964 | 1965 | p_vector = numpy.copy(z_vector); 1966 | r_z_vector_square_old = numpy.sum(r_vector * z_vector); 1967 | 1968 | for conjugate_gradient_iteration_index in range(conjugate_gradient_iteration): 1969 | assert numpy.all(numpy.isfinite(doc_lambda)), (conjugate_gradient_iteration_index, doc_lambda) 1970 | assert numpy.all(numpy.isfinite(doc_log_nu_square)), (conjugate_gradient_iteration_index, doc_log_nu_square) 1971 | assert numpy.all(numpy.isfinite(doc_zeta_factor)), (conjugate_gradient_iteration_index, doc_zeta_factor) 1972 | assert numpy.all(numpy.isfinite(r_vector)), (conjugate_gradient_iteration_index, r_vector, doc_nu_square, 1973 | -self.first_derivative_log_nu_square(doc_lambda, 1974 | doc_log_nu_square, 1975 | doc_zeta_factor, 1976 | total_word_count), 1977 | -self.hessian_direction_approximation_log_nu_square(doc_lambda, 1978 | doc_log_nu_square, 1979 | doc_zeta_factor, 1980 | total_word_count, 1981 | delta_doc_log_nu_square)) 1982 | assert numpy.all(numpy.isfinite(p_vector)), (conjugate_gradient_iteration_index, p_vector) 1983 | 1984 | # hessian_p_vector = self.hessian_direction_approximation_log_nu_square(doc_lambda, log_doc_nu_square, doc_zeta_factor, total_word_count, p_vector); 1985 | hessian_p_vector = self.hessian_damping_direction_approximation_log_nu_square(doc_lambda, doc_log_nu_square, 1986 | doc_zeta_factor, 1987 | total_word_count, p_vector); 1988 | 1989 | alpha_value = r_z_vector_square_old / numpy.sum(p_vector * hessian_p_vector); 1990 | 1991 | delta_doc_log_nu_square += alpha_value * p_vector; 1992 | assert not numpy.any(numpy.isnan(delta_doc_log_nu_square)) 1993 | 1994 | ''' 1995 | if conjugate_gradient_iteration_index % conjugate_gradient_reset_interval==0: 1996 | r_vector = -self.first_derivative_log_nu_square(doc_lambda, log_doc_nu_square, doc_zeta_factor, total_word_count); 1997 | r_vector -= self.hessian_direction_approximation_log_nu_square(doc_lambda, log_doc_nu_square, doc_zeta_factor, total_word_count, delta_doc_log_nu_square); 1998 | else: 1999 | r_vector -= alpha_value * hessian_p_vector; 2000 | ''' 2001 | r_vector -= alpha_value * hessian_p_vector; 2002 | assert not numpy.any(numpy.isnan(r_vector)), (alpha_value, hessian_p_vector, r_vector) 2003 | 2004 | if numpy.sqrt(numpy.sum(r_vector ** 2)) <= conjugate_gradient_threshold: 2005 | break; 2006 | 2007 | if precondition_hessian_matrix: 2008 | z_vector = M_inverse * r_vector; 2009 | else: 2010 | z_vector = numpy.copy(r_vector); 2011 | 2012 | r_z_vector_square_new = numpy.sum(r_vector * z_vector); 2013 | 2014 | p_vector *= r_z_vector_square_new / r_z_vector_square_old; 2015 | assert not numpy.any(numpy.isnan(p_vector)) 2016 | 2017 | p_vector += z_vector; 2018 | assert not numpy.any(numpy.isnan(p_vector)) 2019 | 2020 | r_z_vector_square_old = r_z_vector_square_new; 2021 | 2022 | return delta_doc_log_nu_square; 2023 | 2024 | def function_log_nu_square(self, doc_lambda, doc_log_nu_square, doc_zeta_factor, total_word_count): 2025 | return self.function_nu_square(doc_lambda, numpy.exp(doc_log_nu_square), doc_zeta_factor, total_word_count); 2026 | 2027 | def first_derivative_log_nu_square(self, doc_lambda, doc_log_nu_square, doc_zeta_factor, total_word_count): 2028 | assert doc_log_nu_square.shape == (self._number_of_topics,) 2029 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 2030 | 2031 | exp_doc_log_nu_square = numpy.exp(doc_log_nu_square); 2032 | 2033 | exp_over_doc_zeta = scipy.special.logsumexp( 2034 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_doc_log_nu_square[:, numpy.newaxis], axis=1) 2035 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 2036 | 2037 | if self._diagonal_covariance_matrix: 2038 | first_derivative_log_nu_square = -0.5 * exp_doc_log_nu_square / self._alpha_sigma; 2039 | else: 2040 | first_derivative_log_nu_square = -0.5 * exp_doc_log_nu_square * numpy.diag(self._alpha_sigma_inv); 2041 | first_derivative_log_nu_square += 0.5 2042 | first_derivative_log_nu_square -= 0.5 * total_word_count * exp_over_doc_zeta * exp_doc_log_nu_square; 2043 | 2044 | return first_derivative_log_nu_square; 2045 | 2046 | def second_derivative_log_nu_square(self, doc_lambda, doc_log_nu_square, doc_zeta_factor, total_word_count): 2047 | assert doc_log_nu_square.shape == (self._number_of_topics,) 2048 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics); 2049 | 2050 | exp_doc_log_nu_square = numpy.exp(doc_log_nu_square); 2051 | 2052 | exp_over_doc_zeta = scipy.special.logsumexp( 2053 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_doc_log_nu_square[:, numpy.newaxis], axis=1) 2054 | exp_over_doc_zeta = numpy.exp(-exp_over_doc_zeta); 2055 | 2056 | if self._diagonal_covariance_matrix: 2057 | second_derivative_log_nu_square = -0.5 * exp_doc_log_nu_square / self._alpha_sigma; 2058 | else: 2059 | second_derivative_log_nu_square = -0.5 * exp_doc_log_nu_square * numpy.diag(self._alpha_sigma_inv); 2060 | second_derivative_log_nu_square -= 0.5 * total_word_count * exp_over_doc_zeta * exp_doc_log_nu_square * ( 2061 | 1 + 0.5 * exp_doc_log_nu_square); 2062 | 2063 | return second_derivative_log_nu_square; 2064 | 2065 | def hessian_direction_approximation_log_nu_square(self, doc_lambda, doc_log_nu_square, doc_zeta_factor, 2066 | total_word_count, direction_vector, epsilon=1e-6): 2067 | assert doc_lambda.shape == (self._number_of_topics,) 2068 | assert doc_log_nu_square.shape == (self._number_of_topics,) 2069 | assert doc_zeta_factor.shape == (self._number_of_topics, self._number_of_topics) 2070 | assert direction_vector.shape == (self._number_of_topics,) 2071 | 2072 | exp_doc_log_nu_square = numpy.exp(doc_log_nu_square); 2073 | exp_doc_log_nu_square_epsilon_direction = numpy.exp(doc_log_nu_square + direction_vector * epsilon); 2074 | 2075 | log_exp_over_doc_zeta_epsilon_direction = scipy.special.logsumexp( 2076 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_doc_log_nu_square_epsilon_direction[:, 2077 | numpy.newaxis], axis=1) 2078 | log_exp_over_doc_zeta = scipy.special.logsumexp( 2079 | doc_zeta_factor - doc_lambda[:, numpy.newaxis] - 0.5 * exp_doc_log_nu_square[:, numpy.newaxis], axis=1) 2080 | 2081 | # hessian_direction_log_nu_square = 0.5 * total_word_count * numpy.exp(log_doc_nu_square - log_exp_over_doc_zeta) 2082 | # hessian_direction_log_nu_square += - 0.5 * total_word_count * numpy.exp(log_doc_nu_square + direction_vector * epsilon - log_exp_over_doc_zeta_epsilon_direction) 2083 | 2084 | hessian_direction_log_nu_square = 1 - numpy.exp( 2085 | direction_vector * epsilon - log_exp_over_doc_zeta_epsilon_direction + log_exp_over_doc_zeta) 2086 | hessian_direction_log_nu_square *= 0.5 * total_word_count * numpy.exp(doc_log_nu_square - log_exp_over_doc_zeta) 2087 | 2088 | if self._diagonal_covariance_matrix: 2089 | hessian_direction_log_nu_square += 0.5 * ( 2090 | exp_doc_log_nu_square - exp_doc_log_nu_square_epsilon_direction) / self._alpha_sigma; 2091 | else: 2092 | hessian_direction_log_nu_square += 0.5 * ( 2093 | exp_doc_log_nu_square - exp_doc_log_nu_square_epsilon_direction) * numpy.diag( 2094 | self._alpha_sigma_inv); 2095 | 2096 | hessian_direction_log_nu_square /= epsilon 2097 | 2098 | return hessian_direction_log_nu_square 2099 | 2100 | def hessian_damping_direction_approximation_log_nu_square(self, doc_lambda, doc_log_nu_square, doc_zeta_factor, 2101 | total_word_count, direction_vector, 2102 | damping_factor_initialization=0.1, 2103 | damping_factor_iteration=10): 2104 | # print "==========" 2105 | # print log_doc_nu_square + direction_vector, numpy.exp(log_doc_nu_square + direction_vector) 2106 | # print log_doc_nu_square, numpy.exp(log_doc_nu_square); 2107 | 2108 | damping_factor_numerator = self.function_log_nu_square(doc_lambda, doc_log_nu_square + direction_vector, 2109 | doc_zeta_factor, total_word_count); 2110 | damping_factor_numerator -= self.function_log_nu_square(doc_lambda, doc_log_nu_square, doc_zeta_factor, 2111 | total_word_count); 2112 | 2113 | hessian_direction_approximation = self.hessian_direction_approximation_log_nu_square(doc_lambda, 2114 | doc_log_nu_square, 2115 | doc_zeta_factor, 2116 | total_word_count, 2117 | direction_vector); 2118 | 2119 | damping_factor_denominator_temp = self.first_derivative_log_nu_square(doc_lambda, doc_log_nu_square, 2120 | doc_zeta_factor, total_word_count); 2121 | assert damping_factor_denominator_temp.shape == (self._number_of_topics,); 2122 | damping_factor_denominator_temp += 0.5 * hessian_direction_approximation; 2123 | assert damping_factor_denominator_temp.shape == (self._number_of_topics,); 2124 | 2125 | damping_factor_lambda = damping_factor_initialization; 2126 | for damping_factor_iteration_index in range(damping_factor_iteration): 2127 | damping_factor_denominator = damping_factor_denominator_temp + 0.5 * damping_factor_lambda * direction_vector; 2128 | assert damping_factor_denominator.shape == (self._number_of_topics,); 2129 | damping_factor_denominator *= direction_vector 2130 | damping_factor_denominator = numpy.sum(damping_factor_denominator); 2131 | 2132 | # print "check point 2", damping_factor_numerator, damping_factor_denominator 2133 | damping_factor_rho = damping_factor_numerator / damping_factor_denominator 2134 | if damping_factor_rho < 0.25: 2135 | damping_factor_lambda *= 1.5 2136 | elif damping_factor_rho > 0.75: 2137 | damping_factor_lambda /= 1.5 2138 | else: 2139 | return hessian_direction_approximation + damping_factor_lambda * direction_vector; 2140 | 2141 | # print damping_factor_numerator, damping_factor_denominator, damping_factor_lambda 2142 | # print "check point 1", hessian_direction_approximation, hessian_direction_approximation + damping_factor_lambda * direction_vector 2143 | 2144 | damping_factor_lambda = damping_factor_initialization; 2145 | 2146 | return hessian_direction_approximation + damping_factor_lambda * direction_vector; 2147 | 2148 | # 2149 | # 2150 | # 2151 | # 2152 | # 2153 | 2154 | 2155 | if __name__ == "__main__": 2156 | print("not implemented...") 2157 | --------------------------------------------------------------------------------