├── BayesNet.cpp ├── Factor.cpp ├── FactorGraph.cpp ├── MarkovNet.cpp ├── README.md └── ocr.cpp /BayesNet.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "Factor.cpp" 15 | #include "MarkovNet.cpp" 16 | 17 | using namespace std; 18 | 19 | class BayesNet 20 | { 21 | public: 22 | BayesNet(int num_nodes, vector node_var_names, vector > node_vals, vector > adj_list, vector CPT); 23 | BayesNet(string bif_file); 24 | int num_nodes; 25 | vector node_var_names; 26 | map node_name_to_index; 27 | vector > node_val_to_index; 28 | vector > node_vals; 29 | vector > adj_list; 30 | vector > rev_adj_list; 31 | vector CPT; 32 | 33 | vector > read_data_file(string filename); 34 | void inference_on_file(string test_filename, string gnd_filename, string out_filename, MarkovNet mn); 35 | void write_params_to_file(string filename, bool write_as_markov_net); 36 | void print(); 37 | 38 | MarkovNet moralize_bn(); 39 | 40 | // parameter learning 41 | void learn_parameters(vector > train_data); 42 | 43 | private: 44 | void init_bn(int num_nodes, vector node_var_names, vector > node_vals, vector > adj_list, vector CPT); 45 | 46 | // for parameter learning 47 | void normalize_CPT_from_counts(); 48 | 49 | }; 50 | 51 | BayesNet::BayesNet(int nn, vector nvn, vector > nv, vector > al, vector cpt) 52 | { 53 | // CPTs are stored as Factors such that last variable is the child of the remaining variables in the BayesNet 54 | init_bn(nn, nvn, nv, al, cpt); 55 | } 56 | 57 | void BayesNet::init_bn(int nn, vector nvn, vector > nv, vector > al, vector cpt) 58 | { 59 | num_nodes = nn; 60 | node_var_names = nvn; 61 | node_vals = nv; 62 | adj_list = al; 63 | CPT = cpt; 64 | rev_adj_list = vector >(nn, vector()); 65 | 66 | for (int i = 0 ; i < node_var_names.size() ; i++) 67 | node_name_to_index.insert(pair(node_var_names[i],i)); 68 | 69 | for (int i = 0 ; i < node_vals.size() ; i++) 70 | { 71 | node_val_to_index.push_back(map()); 72 | for (int j = 0 ; j < node_vals[i].size() ; j++) 73 | node_val_to_index[i].insert(pair(node_vals[i][j],j)); 74 | } 75 | 76 | for (int i = 0 ; i < num_nodes ; i++) 77 | for (int j = 0 ; j < adj_list[i].size() ; j++) 78 | rev_adj_list[adj_list[i][j]].push_back(i); 79 | } 80 | 81 | void BayesNet::learn_parameters(vector > train_data) 82 | { 83 | // assumes all CPT values are set to 1.0 initially 84 | for (int i = 0 ; i < CPT.size() ; i++) 85 | { 86 | vector assignment_map; 87 | for (int j = 0 ; j < CPT[i].num_vars ; j++) 88 | assignment_map.push_back(node_name_to_index[CPT[i].vars_name[j]]); 89 | 90 | for (int j = 0 ; j < train_data.size() ; j++) 91 | { 92 | vector assignment; 93 | for (int k = 0 ; k < assignment_map.size() ; k++) 94 | assignment.push_back(train_data[j][assignment_map[k]]); 95 | 96 | CPT[i].potentials[CPT[i].flat_index_from_assignment(assignment)] += 1.0; 97 | } 98 | } 99 | 100 | normalize_CPT_from_counts(); 101 | } 102 | 103 | void BayesNet::normalize_CPT_from_counts() 104 | { 105 | // each CPT is distribution of last variable in CPT given remaining variables 106 | // to normalize each CPT after counting from data 107 | // within each CPT, normalize for each assignment to remaining variables 108 | 109 | for (int i = 0 ; i < CPT.size() ; i++) 110 | { 111 | vector assignment_to_remaining(CPT[i].num_vars-1, 0); 112 | int tot_assn_to_rem = accumulate(CPT[i].num_vals_vars.begin(), CPT[i].num_vals_vars.end()-1, 1, multiplies()); // total assignments to the remaining variables 113 | vector full_assignment; 114 | 115 | for (int j = 0 ; j < tot_assn_to_rem ; j++) 116 | { 117 | int denominator = 0; 118 | for (int k = 0 ; k < CPT[i].num_vals_vars[CPT[i].num_vars-1] ; k++) // cycle through all values 119 | { 120 | full_assignment = assignment_to_remaining; 121 | full_assignment.push_back(k); 122 | denominator += CPT[i].pot_at(full_assignment); 123 | } 124 | 125 | // normalize 126 | for (int k = 0 ; k < CPT[i].num_vals_vars[CPT[i].num_vars-1] ; k++) 127 | { 128 | full_assignment = assignment_to_remaining; 129 | full_assignment.push_back(k); 130 | CPT[i].potentials[CPT[i].flat_index_from_assignment(full_assignment)] /= denominator; 131 | } 132 | 133 | CPT[i].increment(assignment_to_remaining, vector(CPT[i].num_vals_vars.begin(), CPT[i].num_vals_vars.end()-1)); 134 | } 135 | } 136 | } 137 | 138 | MarkovNet BayesNet::moralize_bn() 139 | { 140 | vector > mn_adj_list = adj_list; 141 | 142 | // making edges undirected 143 | for (int i = 0 ; i < num_nodes ; i++) 144 | for (int j = 0 ; j < rev_adj_list[i].size() ; j++) 145 | mn_adj_list[i].push_back(rev_adj_list[i][j]); 146 | 147 | // add edges between parents 148 | for (int i = 0 ; i < num_nodes ; i++) 149 | for (int j = 0 ; j < rev_adj_list[i].size() ; j++) 150 | for (int k = j + 1 ; k < rev_adj_list[i].size() ; k++) 151 | if (find(mn_adj_list[rev_adj_list[i][j]].begin(), mn_adj_list[rev_adj_list[i][j]].end(), rev_adj_list[i][k]) == mn_adj_list[rev_adj_list[i][j]].end()) 152 | { 153 | mn_adj_list[rev_adj_list[i][j]].push_back(rev_adj_list[i][k]); 154 | mn_adj_list[rev_adj_list[i][k]].push_back(rev_adj_list[i][j]); 155 | } 156 | 157 | vector mn_node_num_vals; 158 | for (int i = 0 ; i < num_nodes ; i++) 159 | mn_node_num_vals.push_back(node_vals[i].size()); 160 | 161 | return MarkovNet(num_nodes, node_var_names, mn_node_num_vals, mn_adj_list, CPT); 162 | } 163 | 164 | BayesNet::BayesNet(string bif_file) 165 | { 166 | ifstream infile(bif_file); 167 | string line; 168 | int num_nodes = 0; 169 | vector node_var_names; 170 | map node_name_to_index; 171 | vector > node_vals; 172 | vector > adj_list; 173 | vector cpt; 174 | 175 | if (infile.is_open()) 176 | { 177 | string temp = ""; 178 | 179 | while (true) 180 | { 181 | getline(infile,line); 182 | stringstream ss(line); 183 | ss >> temp; 184 | if (temp=="probability") break; 185 | else if (temp!="variable") continue; 186 | else // variable 187 | { 188 | ss >> temp; 189 | node_var_names.push_back(temp); 190 | node_name_to_index.insert(pair(temp,num_nodes)); 191 | node_vals.push_back(vector()); 192 | num_nodes ++; 193 | getline(infile, line); 194 | ss = stringstream(line); 195 | while(temp!="{") ss >> temp; 196 | while(true) 197 | { 198 | ss >> temp; 199 | if (temp=="};") break; 200 | if (temp[temp.size()-1]==',') node_vals[num_nodes-1].push_back(temp.substr(0,temp.size()-1)); 201 | else node_vals[num_nodes-1].push_back(temp); 202 | } 203 | } 204 | } 205 | 206 | adj_list = vector >(num_nodes, vector()); 207 | 208 | while (true) 209 | { 210 | string prob_of; 211 | stringstream ss(line); 212 | ss >> temp; 213 | if (temp=="probability") 214 | { 215 | vector cur_factor_vars_name; 216 | ss >> temp; ss >> temp; 217 | prob_of = temp; 218 | 219 | while (true) 220 | { 221 | ss >> temp; 222 | if (temp==")") break; 223 | if (temp=="|") continue; 224 | if (temp[temp.size()-1]==',') cur_factor_vars_name.push_back(temp.substr(0,temp.size()-1)); 225 | else cur_factor_vars_name.push_back(temp); 226 | } 227 | cur_factor_vars_name.push_back(prob_of); // last variable 228 | 229 | vector cur_factor_vars_id; 230 | for (int i = 0 ; i < cur_factor_vars_name.size() ; i++) 231 | cur_factor_vars_id.push_back(node_name_to_index[cur_factor_vars_name[i]]); 232 | vector cur_factor_num_vals_vars; 233 | int tot_pots = 1; 234 | for (int i = 0 ; i < cur_factor_vars_id.size() ; i++) 235 | { 236 | tot_pots *= node_vals[cur_factor_vars_id[i]].size(); 237 | cur_factor_num_vals_vars.push_back(node_vals[cur_factor_vars_id[i]].size()); 238 | } 239 | 240 | // add cpt 241 | cpt.push_back(Factor(cur_factor_vars_name.size(), cur_factor_vars_name, cur_factor_num_vals_vars, vector(tot_pots, 1.0))); // 1.0 for smoothing 242 | 243 | // modify adjacency list 244 | for (int i = 1 ; i < cur_factor_vars_name.size() ; i++) 245 | adj_list[cur_factor_vars_id[i]].push_back(cur_factor_vars_id[0]); 246 | } 247 | 248 | if (not getline(infile,line)) break; 249 | } 250 | } 251 | infile.close(); 252 | init_bn(num_nodes, node_var_names, node_vals, adj_list, cpt); 253 | } 254 | 255 | void BayesNet::inference_on_file(string test_filename, string gnd_filename, string out_filename, MarkovNet mn) 256 | { 257 | ifstream testfile(test_filename); 258 | ifstream gndfile(gnd_filename); 259 | ofstream ofile; 260 | ofile.open(out_filename); 261 | string test_line; 262 | string gnd_line; 263 | string tempt; 264 | string tempg; 265 | double total_missing_vars; 266 | double total_correct_preds; 267 | double total_ll; 268 | int num_queries = 0; 269 | 270 | getline(testfile, test_line); 271 | getline(gndfile, gnd_line); 272 | 273 | stringstream ss(test_line); 274 | 275 | vector node_id_order; 276 | for (int i = 0 ; i < num_nodes ; i++) 277 | { 278 | ss >> tempt; 279 | node_id_order.push_back(node_name_to_index[tempt]); 280 | } 281 | 282 | while(getline(testfile, test_line)) 283 | { 284 | num_queries++; 285 | getline(gndfile, gnd_line); 286 | stringstream sst(test_line); 287 | stringstream ssg(gnd_line); 288 | vector query(num_nodes, -1); 289 | vector gnd_assignment(num_nodes, -1); 290 | 291 | for (int i = 0 ; i < num_nodes ; i++) 292 | { 293 | sst >> tempt; 294 | ssg >> tempg; 295 | 296 | if (tempt!="?") query[node_id_order[i]] = node_val_to_index[node_id_order[i]][tempt]; 297 | 298 | gnd_assignment[node_id_order[i]] = node_val_to_index[node_id_order[i]][tempg]; 299 | } 300 | 301 | pair, vector > > inference_results = mn.inference_by_sampling(query, gnd_assignment); 302 | total_missing_vars += inference_results.first[0]; 303 | total_correct_preds += inference_results.first[1]; 304 | total_ll += inference_results.first[2]; 305 | 306 | // write marginal probabilities of missing assignments to file 307 | for (int i = 0 ; i < query.size() ; i++) 308 | if (query[i] == -1) 309 | { 310 | ofile << node_var_names[i] << " "; 311 | for (int j = 0 ; j < node_vals[i].size() ; j++) 312 | ofile << node_vals[i][j] << ":" << inference_results.second[i][j] << " "; 313 | ofile << endl; 314 | } 315 | ofile << endl; 316 | } 317 | 318 | testfile.close(); 319 | gndfile.close(); 320 | ofile.close(); 321 | 322 | cout << "Number of Queries : " << num_queries << endl; 323 | cout << "Total Missing Vars : " << (int) total_missing_vars << endl; 324 | cout << "Total Correct Preds : " << (int) total_correct_preds << endl; 325 | cout << "Percentage Correct : " << total_correct_preds*100.0/total_missing_vars << endl; 326 | cout << "Avg Loglikelihood per query : " << total_ll/num_queries << endl; 327 | cout << "Avg Loglikelihood per query var : " << total_ll/total_missing_vars << endl; 328 | } 329 | 330 | vector > BayesNet::read_data_file(string filename) 331 | { 332 | ifstream infile(filename); 333 | string line; 334 | string temp; 335 | vector > data; 336 | int dat_points = 0; 337 | 338 | getline(infile, line); 339 | stringstream ss(line); 340 | 341 | vector node_id_order; 342 | for (int i = 0 ; i < num_nodes ; i++) 343 | { 344 | ss >> temp; 345 | node_id_order.push_back(node_name_to_index[temp]); 346 | } 347 | 348 | while (getline(infile, line)) 349 | { 350 | ss = stringstream(line); 351 | data.push_back(vector(num_nodes, -1)); 352 | for (int i = 0 ; i < num_nodes ; i++) 353 | { 354 | ss >> temp; 355 | data[dat_points][node_id_order[i]] = node_val_to_index[node_id_order[i]][temp]; 356 | } 357 | 358 | dat_points ++; 359 | } 360 | infile.close(); 361 | 362 | return data; 363 | } 364 | 365 | void BayesNet::write_params_to_file(string filename, bool write_as_markov_net) 366 | { 367 | ofstream ofile; 368 | ofile.open(filename); 369 | 370 | ofile << "network unknown {" << endl << "}" << endl; 371 | 372 | for (int i = 0 ; i < num_nodes ; i++) 373 | { 374 | ofile << "variable " << node_var_names[i] << " {" << endl << " type discrete [ " << node_vals[i].size() << " ] { " ; 375 | for (int j = 0 ; j < node_vals[i].size() ; j++) 376 | { 377 | ofile << node_vals[i][j]; 378 | if (j!=node_vals[i].size()-1) ofile << ","; 379 | ofile << " "; 380 | } 381 | 382 | ofile << "};" << endl << "}" << endl; 383 | } 384 | 385 | for (int i = 0 ; i < CPT.size() ; i++) 386 | { 387 | ofile << "probability ( "; 388 | 389 | if (not write_as_markov_net) // write as BN 390 | { 391 | ofile << CPT[i].vars_name[CPT[i].num_vars-1] ; 392 | if (CPT[i].num_vars > 1) 393 | { 394 | ofile << " | "; 395 | 396 | for (int j = 0 ; j < CPT[i].num_vars-1 ; j++) 397 | { 398 | ofile << CPT[i].vars_name[j]; 399 | if (j!=CPT[i].num_vars-2) ofile << ","; 400 | ofile << " "; 401 | } 402 | 403 | ofile << ") {" << endl; 404 | } 405 | else ofile << " ) {" << endl; 406 | 407 | if (CPT[i].num_vars == 1) 408 | { 409 | ofile << " table "; 410 | for (int j = 0 ; j < CPT[i].potentials.size() ; j++) 411 | { 412 | ofile << CPT[i].potentials[j] ; 413 | if (j!= CPT[i].potentials.size() - 1) ofile << ", "; 414 | else ofile << ";" << endl; 415 | } 416 | } 417 | 418 | else 419 | { 420 | vector assignment_to_remaining(CPT[i].num_vars-1, 0); 421 | int tot_assn_to_rem = accumulate(CPT[i].num_vals_vars.begin(), CPT[i].num_vals_vars.end()-1, 1, multiplies()); // total assignments to the remaining variables 422 | vector full_assignment; 423 | 424 | for (int j = 0 ; j < tot_assn_to_rem ; j++) 425 | { 426 | ofile << " ("; 427 | for (int k = 0 ; k < assignment_to_remaining.size() ; k++) 428 | { 429 | ofile << node_vals[node_name_to_index[CPT[i].vars_name[k]]][assignment_to_remaining[k]]; 430 | if (k!=assignment_to_remaining.size()-1) ofile << ", "; 431 | else ofile << ") " ; 432 | } 433 | 434 | for (int k = 0 ; k < CPT[i].num_vals_vars[CPT[i].num_vars-1] ; k++) // cycle through all values 435 | { 436 | full_assignment = assignment_to_remaining; 437 | full_assignment.push_back(k); 438 | ofile << CPT[i].pot_at(full_assignment); 439 | if (k!=CPT[i].num_vals_vars[CPT[i].num_vars-1]-1) ofile << ", "; 440 | else ofile << ";" << endl; 441 | } 442 | 443 | CPT[i].increment(assignment_to_remaining, vector(CPT[i].num_vals_vars.begin(), CPT[i].num_vals_vars.end()-1)); 444 | } 445 | } 446 | } 447 | 448 | else // write as markov net 449 | { 450 | for (int j = 0 ; j < CPT[i].num_vars ; j++) 451 | { 452 | ofile << CPT[i].vars_name[j]; 453 | if (j!=CPT[i].num_vars-1) ofile << ","; 454 | ofile << " "; 455 | } 456 | ofile << ") {" << endl; 457 | 458 | vector assignment(CPT[i].num_vars, 0); 459 | int tot_assn = accumulate(CPT[i].num_vals_vars.begin(), CPT[i].num_vals_vars.end(), 1, multiplies()); 460 | for (int j = 0 ; j < tot_assn ; j++) 461 | { 462 | ofile << " ("; 463 | for (int k = 0 ; k < assignment.size() ; k++) 464 | { 465 | ofile << node_vals[node_name_to_index[CPT[i].vars_name[k]]][assignment[k]]; 466 | if (k!=assignment.size()-1) ofile << ", "; 467 | else ofile << ") " ; 468 | } 469 | 470 | ofile << CPT[i].pot_at(assignment) << ";" << endl; 471 | 472 | CPT[i].increment(assignment, vector(CPT[i].num_vals_vars.begin(), CPT[i].num_vals_vars.end())); 473 | } 474 | } 475 | ofile << "}" << endl; 476 | } 477 | 478 | ofile.close(); 479 | } 480 | 481 | void BayesNet::print() 482 | { 483 | // printing details 484 | cout << "Num Nodes : " << num_nodes << endl; 485 | for (int i = 0 ; i < num_nodes ; i++) 486 | { 487 | cout << "Node " << i << " : " << node_var_names[i] << endl; 488 | cout << "Node " << i << " vals : " ; 489 | for (int j = 0 ; j < node_vals[i].size() ; j++) 490 | cout << node_vals[i][j] << " "; 491 | cout << endl; 492 | } 493 | cout << endl; 494 | 495 | // adj list 496 | cout << "Adjacency List" << endl; 497 | for (int i = 0 ; i < num_nodes ; i++) 498 | { 499 | cout << i << " : " ; 500 | for (int j = 0 ; j < adj_list[i].size() ; j++) 501 | cout << adj_list[i][j] << " "; 502 | cout << endl; 503 | } 504 | cout << endl; 505 | 506 | // adj list 507 | cout << "Reverse adjacency List" << endl; 508 | for (int i = 0 ; i < num_nodes ; i++) 509 | { 510 | cout << i << " : " ; 511 | for (int j = 0 ; j < rev_adj_list[i].size() ; j++) 512 | cout << rev_adj_list[i][j] << " "; 513 | cout << endl; 514 | } 515 | cout << endl; 516 | 517 | cout << "CPTs (" << CPT.size() << " in all)" << endl; 518 | cout << "==================" << endl << endl; 519 | for (int i = 0 ; i < CPT.size() ; i++) 520 | CPT[i].print(); 521 | } 522 | 523 | int main() 524 | { 525 | BayesNet bn = BayesNet("../A3-data/insurance.bif"); 526 | vector > train_data = bn.read_data_file("../A3-data/insurance.dat"); 527 | 528 | bn.learn_parameters(train_data); 529 | 530 | // bn.write_params_to_file("bn_params.bif",false); 531 | MarkovNet mn = bn.moralize_bn(); 532 | 533 | //mn.learn_parameters(train_data, 0.1, 1.0, 0.005, 100); 534 | bn.CPT = mn.get_factors(); // transfer potentials back for writing to file 535 | bn.write_params_to_file("../A3-data/mn_params.txt", true); 536 | 537 | bn.inference_on_file("../A3-data/insurance_test.dat", "../A3-data/insurance_TrueValues.dat", "../A3-data/bn_mp.mn.out", mn); 538 | 539 | // MarkovNet mn = bn.moralize_bn(); 540 | // vector > samples = mn.gibbs_sampler(vector(mn.num_nodes,-1), 3000, 100, 2000, 20000, 0.01); 541 | 542 | // vector > train_data = bn.read_data_file("../A3-data/insurance_small.dat"); 543 | // bn.learn_parameters(train_data); 544 | // bn.print(); 545 | 546 | 547 | } -------------------------------------------------------------------------------- /Factor.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | #ifndef FACTOR_CPP 13 | #define FACTOR_CPP 14 | 15 | class Factor 16 | { 17 | // Factor table, stored as a flattened vector 18 | public: 19 | Factor(int num_vars, vector vars_name, vector num_vals_vars, vector potentials); 20 | Factor(); // unity factor 21 | double pot_at(vector indices) const; 22 | int flat_index_from_assignment(vector& assignment) const; 23 | void print(); 24 | 25 | int num_vars; 26 | vector vars_name; 27 | vector num_vals_vars; 28 | vector potentials; 29 | 30 | // operations 31 | Factor operator*(const Factor& f1); 32 | Factor operator/(const Factor& f1); 33 | Factor sum_out(string var_name); 34 | Factor max_out(string var_name); 35 | void normalize(); 36 | 37 | // for sampling 38 | int var_sample(string var_to_sample, map assignment_map); 39 | 40 | // auxiliary function for incrementing an assignment given the number of values taken by each variable 41 | void increment(vector& assignment, const vector& num_vals); 42 | }; 43 | 44 | Factor::Factor(int nv, vector vn, vector nvv, vector pot) 45 | { 46 | num_vars = nv; 47 | vars_name = vn; 48 | num_vals_vars = nvv; 49 | potentials = pot; 50 | } 51 | 52 | Factor::Factor() 53 | { 54 | // creates a unity factor 55 | num_vars = 0; 56 | vars_name = vector(); 57 | num_vals_vars = vector(); 58 | potentials = vector{1}; 59 | } 60 | 61 | int Factor::flat_index_from_assignment(vector& assignment) const 62 | { 63 | int flat_index = 0; 64 | int mult = 1; 65 | for (int i = assignment.size() - 1 ; i >= 0 ; i--) 66 | { 67 | flat_index += mult * assignment[i]; 68 | mult *= num_vals_vars[i]; 69 | } 70 | 71 | return flat_index; 72 | } 73 | 74 | double Factor::pot_at (vector assignment) const 75 | { 76 | int flat_index = flat_index_from_assignment(assignment); 77 | 78 | return potentials[flat_index]; 79 | } 80 | 81 | void Factor::print() 82 | { 83 | for (int i = 0 ; i < num_vars ; i++) 84 | cout << vars_name[i] << '\t'; 85 | cout << endl << endl; 86 | 87 | vector index(num_vars, 0); 88 | for (int i = 0 ; i < potentials.size() ; i++) 89 | { 90 | for (int j = 0 ; j < index.size() ; j++) 91 | cout << index[j] << '\t'; 92 | cout << pot_at(index) << endl; 93 | increment(index, this->num_vals_vars); 94 | } 95 | 96 | cout << endl; 97 | } 98 | 99 | void Factor::increment(vector& assignment, const vector& num_vals) 100 | { 101 | bool next; 102 | for (int i = assignment.size() - 1 ; i >= 0 ; i--) 103 | { 104 | next = false; 105 | if ((assignment[i] + 1) == num_vals[i]) next = true; 106 | assignment[i] = (assignment[i] + 1) % num_vals[i]; 107 | if (not next) break; 108 | } 109 | } 110 | 111 | Factor Factor::operator*(const Factor& f1) 112 | { 113 | set all_vars; 114 | all_vars.insert(this->vars_name.begin(), this->vars_name.end()); 115 | all_vars.insert(f1.vars_name.begin(), f1.vars_name.end()); 116 | vector vars_union(all_vars.begin(), all_vars.end()); 117 | vector new_num_vals_vars; 118 | vector new_pots; 119 | 120 | // constructing new_num_vals_vars (inefficient) 121 | vector::iterator it; 122 | for (int i = 0 ; ivars_name.begin(), this->vars_name.end(), vars_union[i]); 125 | if (it != this->vars_name.end()) 126 | { 127 | new_num_vals_vars.push_back(this->num_vals_vars[it-this->vars_name.begin()]); 128 | continue; 129 | } 130 | else 131 | { 132 | new_num_vals_vars.push_back(f1.num_vals_vars[find(f1.vars_name.begin(), f1.vars_name.end(), vars_union[i])-f1.vars_name.begin()]); 133 | } 134 | } 135 | 136 | map var_name_to_pos; 137 | for (int i = 0 ; i < vars_union.size() ; i++) 138 | var_name_to_pos.insert(pair(vars_union[i], i)); 139 | 140 | // element wise multiplication 141 | vector index(vars_union.size(), 0); 142 | for (int i = 0 ; i < accumulate(new_num_vals_vars.begin(), new_num_vals_vars.end(), 1, multiplies()) ; i++) 143 | { 144 | vector self_ind; 145 | for (int j = 0 ; j < this->vars_name.size() ; j++) 146 | self_ind.push_back(index[var_name_to_pos[this->vars_name[j]]]); 147 | vector f1_ind; 148 | for (int j = 0 ; j < f1.vars_name.size() ; j++) 149 | f1_ind.push_back(index[var_name_to_pos[f1.vars_name[j]]]); 150 | 151 | new_pots.push_back(this->pot_at(self_ind)*f1.pot_at(f1_ind)); 152 | 153 | increment(index, new_num_vals_vars); 154 | } 155 | 156 | return Factor(vars_union.size(), vars_union, new_num_vals_vars, new_pots); 157 | } 158 | 159 | Factor Factor::operator/(const Factor& f1) 160 | { 161 | // scope(this) is >= scope(f1) 162 | vector new_pots; 163 | 164 | vector vars_name_intersect_indices; 165 | for (int i = 0 ; i < f1.vars_name.size() ; i++) 166 | for (int j = 0 ; j < this->vars_name.size() ; j++) 167 | if (f1.vars_name[i] == this->vars_name[j]) 168 | { 169 | vars_name_intersect_indices.push_back(j); 170 | break; 171 | } 172 | 173 | vector index(this->vars_name.size(), 0); 174 | for (int i = 0 ; i < this->potentials.size() ; i++) 175 | { 176 | vector f1_index; 177 | for (int j = 0 ; j < vars_name_intersect_indices.size() ; j++) 178 | f1_index.push_back(index[vars_name_intersect_indices[j]]); 179 | 180 | if (f1.pot_at(f1_index) == 0) 181 | { 182 | new_pots.push_back(0); 183 | } 184 | 185 | else 186 | { 187 | new_pots.push_back(potentials[i]/f1.pot_at(f1_index)); 188 | } 189 | 190 | increment(index, this->num_vals_vars); 191 | } 192 | 193 | return Factor(this->num_vars, this->vars_name, this->num_vals_vars, new_pots); 194 | } 195 | 196 | Factor Factor::sum_out(string var_name) 197 | { 198 | // sum_out only if var_name present in scope! 199 | // came across some cases of BP in clique trees where initially scopes of nodes are not complete initially 200 | int pos = -1; 201 | vector new_vars_name; 202 | vector new_num_vals_vars; 203 | for (int i = 0 ; i new_pots; 217 | 218 | vector index(new_vars_name.size(), 0); 219 | vector mod_index; 220 | for (int i = 0 ; i < accumulate(new_num_vals_vars.begin(), new_num_vals_vars.end(), 1, multiplies()) ; i++) 221 | { 222 | double sum = 0; 223 | for (int j = 0 ; j < num_vals_vars[pos] ; j++) 224 | { 225 | mod_index = index; 226 | mod_index.insert(mod_index.begin() + pos, 1, j); 227 | sum += pot_at(mod_index); 228 | } 229 | new_pots.push_back(sum); 230 | 231 | increment(index, new_num_vals_vars); 232 | } 233 | 234 | return Factor(new_vars_name.size(), new_vars_name, new_num_vals_vars, new_pots); 235 | } 236 | 237 | Factor Factor::max_out(string var_name) 238 | { 239 | int pos = -1; 240 | vector new_vars_name; 241 | vector new_num_vals_vars; 242 | for (int i = 0 ; i new_pots; 256 | 257 | vector index(new_vars_name.size(), 0); 258 | vector mod_index; 259 | for (int i = 0 ; i < accumulate(new_num_vals_vars.begin(), new_num_vals_vars.end(), 1, multiplies()) ; i++) 260 | { 261 | double cur_max = -1; 262 | for (int j = 0 ; j < num_vals_vars[pos] ; j++) 263 | { 264 | mod_index = index; 265 | mod_index.insert(mod_index.begin() + pos, 1, j); 266 | if (pot_at(mod_index) > cur_max) cur_max = pot_at(mod_index); 267 | } 268 | new_pots.push_back(cur_max); 269 | 270 | increment(index, new_num_vals_vars); 271 | } 272 | 273 | return Factor(new_vars_name.size(), new_vars_name, new_num_vals_vars, new_pots); 274 | } 275 | 276 | void Factor::normalize() 277 | { 278 | double sum = accumulate(potentials.begin(), potentials.end(), 0.0); 279 | 280 | for (int i = 0 ; i assignment_map) 285 | { 286 | // in the current factor, given assignments to all variables other than var_to_sample, sample var_to_sample from the conditional distribution 287 | int var_to_sample_id; 288 | for (int i = 0 ; i < num_vars ; i++) 289 | if (vars_name[i] == var_to_sample) 290 | { 291 | var_to_sample_id = i; 292 | break; 293 | } 294 | 295 | vector prob(num_vals_vars[var_to_sample_id], 0.0); // will be unnormalized initially 296 | vector assignment(num_vars, -1); 297 | for (int i = 0 ; i < num_vars ; i++) 298 | if (i != var_to_sample_id) 299 | { 300 | assignment[i] = assignment_map[vars_name[i]]; 301 | } 302 | 303 | double sum = 0.0; 304 | for (int i = 0 ; i < num_vals_vars[var_to_sample_id] ; i++) 305 | { 306 | assignment[var_to_sample_id] = i; 307 | prob[i] = pot_at(assignment); 308 | sum += prob[i]; 309 | } 310 | 311 | double r = ((double) rand() / (RAND_MAX)); 312 | // normalize and make cumulative prob 313 | for (int i = 0 ; i < num_vals_vars[var_to_sample_id] ; i++) 314 | { 315 | prob[i] /= sum; 316 | if (i>0) prob[i] += prob[i-1]; // cumulative 317 | if (r <= prob[i]) return i; 318 | } 319 | 320 | cout << "Sampling Error" << endl; 321 | this->print(); 322 | // for safety 323 | return -1; 324 | } 325 | 326 | #endif -------------------------------------------------------------------------------- /FactorGraph.cpp: -------------------------------------------------------------------------------- 1 | using namespace std; 2 | #include 3 | 4 | #ifndef FACTORGRAPH_CPP 5 | #define FACTORGRAPH_CPP 6 | 7 | // TODO: test base case (one clique node, 2 nodes) 8 | 9 | class FactorGraph 10 | { 11 | public: 12 | FactorGraph(int num_nodes, vector var_names, vector > node_scopes, vector > adj_list, vector factors, vector > node_factors); 13 | FactorGraph(); 14 | void MessagePassing(int root, Factor (Factor::*margin_op)(string)); // populates node_marginals 15 | void BeliefProp(double epsilon, int max_iter, Factor (Factor::*margin_op)(string), bool silent); // populates node_marginals 16 | pair, vector > max_marginal_assignment(Factor (Factor::*margin_op)(string)); // after node_marginals is populated 17 | vector marginal_likelihood(bool run_bp, vector assignments); 18 | 19 | void print(bool marginals); 20 | 21 | int num_nodes; 22 | vector node_marginals; 23 | 24 | //private: 25 | vector > node_scopes; 26 | vector var_names; 27 | vector > adj_list; 28 | vector factors; 29 | vector > node_factors; 30 | 31 | // methods for MessagePassing 32 | void up_pass(int start_node, int parent, vector >& messages, vector& visited, Factor (Factor::*margin_op)(string)); 33 | void down_pass(int start_node, int parent, vector >& messages, Factor (Factor::*margin_op)(string)); 34 | 35 | }; 36 | 37 | FactorGraph::FactorGraph(int nn, vector vn, vector > ns, vector > al, vector f, vector > nf) 38 | { 39 | num_nodes = nn; 40 | var_names = vn; 41 | node_scopes = ns; 42 | adj_list = al; 43 | factors = f; 44 | node_factors = nf; 45 | node_marginals = vector(num_nodes); 46 | } 47 | 48 | FactorGraph::FactorGraph() {} // empty constructor 49 | 50 | void FactorGraph::MessagePassing(int root, Factor (Factor::*margin_op)(string)) 51 | { 52 | // works only if cluster graph is clique tree 53 | vector > messages(num_nodes, map()); // ith element stores messages coming to i, ie delta_(j->i) 54 | 55 | // node_marginals (beliefs) are set in down_pass 56 | // loop over in case of multiple connected components (after calling from root) 57 | vector visited(num_nodes, false); 58 | 59 | up_pass(root, -1, messages, visited, margin_op); 60 | down_pass(root, -1, messages, margin_op); 61 | 62 | for (int i = 0 ; i < num_nodes ; i++) 63 | if (not visited[i]) 64 | { 65 | up_pass(i, -1, messages, visited, margin_op); 66 | down_pass(i, -1, messages, margin_op); 67 | } 68 | } 69 | 70 | void FactorGraph::up_pass(int start_node, int parent, vector >& messages, vector& visited, Factor (Factor::*margin_op)(string)) 71 | { 72 | visited[start_node] = true; 73 | vector down_ngbs = adj_list[start_node]; 74 | down_ngbs.erase(remove(down_ngbs.begin(), down_ngbs.end(), parent), down_ngbs.end()); 75 | 76 | Factor up_message = Factor(); 77 | for (int j = 0 ; j < node_factors[start_node].size() ; j++) 78 | up_message = up_message * factors[node_factors[start_node][j]]; 79 | 80 | for (int i = 0 ; i < down_ngbs.size() ; i++) // down_ngbs.size() == 0 for leaf node 81 | up_pass(down_ngbs[i], start_node, messages, visited, margin_op); 82 | 83 | map::iterator it; 84 | for (it = messages[start_node].begin() ; it != messages[start_node].end() ; it++) 85 | up_message = up_message * it->second; 86 | 87 | if (parent != -1) // not root node => sum up 88 | { 89 | // sum out 90 | set scope_diff; 91 | set_difference(node_scopes[start_node].begin(), node_scopes[start_node].end(), node_scopes[parent].begin(), node_scopes[parent].end(), inserter(scope_diff, scope_diff.end())); 92 | 93 | set::iterator it; 94 | for (it = scope_diff.begin() ; it != scope_diff.end() ; it++) 95 | up_message = (up_message.*margin_op)(*it); 96 | 97 | messages[parent].insert(pair(start_node, up_message)); 98 | } 99 | } 100 | 101 | void FactorGraph::down_pass(int start_node, int parent, vector >& messages, Factor (Factor::*margin_op)(string)) 102 | { 103 | vector down_ngbs = adj_list[start_node]; 104 | down_ngbs.erase(remove(down_ngbs.begin(), down_ngbs.end(), parent), down_ngbs.end()); 105 | 106 | Factor belief = Factor(); 107 | for (int i = 0 ; i < node_factors[start_node].size() ; i++) 108 | belief = belief * factors[node_factors[start_node][i]]; 109 | 110 | map::iterator it; 111 | for (it = messages[start_node].begin() ; it != messages[start_node].end() ; it++) 112 | belief = belief * it->second; 113 | 114 | // setting node marginal 115 | node_marginals[start_node] = belief; 116 | 117 | for (int i = 0 ; i < down_ngbs.size() ; i++) 118 | { 119 | Factor down_message = belief; 120 | down_message = down_message/messages[start_node][down_ngbs[i]]; 121 | 122 | // sum out 123 | set scope_diff; 124 | set_difference(node_scopes[start_node].begin(), node_scopes[start_node].end(), node_scopes[down_ngbs[i]].begin(), node_scopes[down_ngbs[i]].end(), inserter(scope_diff, scope_diff.end())); 125 | set::iterator it; 126 | for (it = scope_diff.begin() ; it != scope_diff.end() ; it++) 127 | down_message = (down_message.*margin_op)(*it); 128 | 129 | messages[down_ngbs[i]].insert(pair(start_node, down_message)); 130 | 131 | down_pass(down_ngbs[i], start_node, messages, margin_op); 132 | } 133 | } 134 | 135 | void FactorGraph::BeliefProp(double epsilon, int max_iter, Factor (Factor::*margin_op)(string), bool silent) 136 | { 137 | // initialise 138 | vector > cur_messages(num_nodes, map()); // ith element stores messages coming to i, ie delta_(j->i) 139 | vector > new_messages(num_nodes, map()); 140 | 141 | for (int i = 0 ; i < num_nodes ; i++) 142 | for (int j = 0 ; j < adj_list[i].size() ; i ++) 143 | { 144 | // delta_(i->j) 145 | cur_messages[j].insert(pair(i, Factor())); 146 | new_messages[j].insert(pair(i, Factor())); 147 | } 148 | 149 | vector cur_beliefs(num_nodes, Factor()); 150 | vector new_beliefs(num_nodes, Factor()); 151 | 152 | for (int i = 0 ; i < num_nodes ; i++) 153 | for (int j = 0 ; j < node_factors[i].size() ; j++) 154 | cur_beliefs[i] = cur_beliefs[i] * factors[node_factors[i][j]]; 155 | 156 | bool converged = false; 157 | int iter = 0 ; 158 | 159 | while (!converged and iter < max_iter) 160 | { 161 | converged = true; 162 | // cout << "===============\nITERATION " << iter << "\n===============\n"; 163 | 164 | // update messages 165 | for (int i = 0 ; i < num_nodes ; i++) 166 | for (int j = 0 ; j < adj_list[i].size() ; j++) 167 | { 168 | // delta_(i->adj_list[i][j]) 169 | // cout << "delta_(" << i << "->" << adj_list[i][j] << ")"< scope_diff; 174 | set_difference(node_scopes[i].begin(), node_scopes[i].end(), node_scopes[adj_list[i][j]].begin(), node_scopes[adj_list[i][j]].end(), inserter(scope_diff, scope_diff.end())); 175 | set::iterator it; 176 | 177 | for (it = scope_diff.begin() ; it != scope_diff.end() ; it++) 178 | new_message = (new_message.*margin_op)(*it); 179 | 180 | new_messages[adj_list[i][j]][i] = new_message; 181 | new_messages[adj_list[i][j]][i].normalize(); 182 | //cout << "delta_(" << i << "->" << adj_list[i][j] << ")\n"; 183 | // new_messages[adj_list[i][j]][i].print(); 184 | } 185 | 186 | // update beliefs 187 | for (int i = 0 ; i < num_nodes ; i++) 188 | { 189 | Factor new_belief = Factor(); 190 | for (int j = 0 ; j < node_factors[i].size() ; j++) 191 | new_belief = new_belief * factors[node_factors[i][j]]; 192 | 193 | map::iterator it; 194 | for (it = new_messages[i].begin() ; it != new_messages[i].end() ; it++) 195 | new_belief = new_belief * it->second; 196 | 197 | new_beliefs[i] = new_belief; 198 | //cout << "belief_" << i << "\n"; 199 | //new_belief.print(); 200 | 201 | for (int j = 0 ; j < new_beliefs[i].potentials.size() ; j++) 202 | if (abs(cur_beliefs[i].potentials[j] - new_beliefs[i].potentials[j]) > epsilon) 203 | converged = false; 204 | } 205 | 206 | cur_beliefs = new_beliefs; 207 | cur_messages = new_messages; 208 | iter++ ; 209 | } 210 | 211 | for (int i = 0 ; i, vector > FactorGraph::max_marginal_assignment(Factor (Factor::*margin_op)(string)) 220 | { 221 | // can speed up 222 | vector assignments(var_names.size(), -1); 223 | vector loglikelihood; // as defined in problem statement for max_marginal, makes sense only for margin_op == sum_out 224 | for (int i = 0 ; i < assignments.size() ; i++) 225 | { 226 | int j ; 227 | for (j = 0 ; j < node_marginals.size() ; j++) 228 | if (find(node_marginals[j].vars_name.begin(), node_marginals[j].vars_name.end(), var_names[i]) != node_marginals[j].vars_name.end()) 229 | break; 230 | 231 | Factor var_marginal = node_marginals[j]; 232 | var_marginal.normalize(); 233 | for (int k = 0 ; k < node_marginals[j].vars_name.size() ; k++) 234 | if (node_marginals[j].vars_name[k] != var_names[i]) 235 | var_marginal = (var_marginal.*margin_op)(node_marginals[j].vars_name[k]); 236 | 237 | vector::iterator max_it = max_element(var_marginal.potentials.begin(), var_marginal.potentials.end()); 238 | loglikelihood.push_back(log(*max_it)); 239 | assignments[i] = distance(var_marginal.potentials.begin(), max_it); 240 | } 241 | 242 | return pair, vector >(assignments, loglikelihood); 243 | } 244 | 245 | vector FactorGraph::marginal_likelihood(bool run_bp, vector assignments) 246 | { 247 | // run quick BP (for max) to find the marginal at each node (not MP since it could be Bethe Cluster) 248 | if (run_bp) this->BeliefProp(0.01, 1000, &Factor::sum_out, true); 249 | vector loglikelihood; 250 | 251 | for (int i = 0 ; i < assignments.size() ; i++) 252 | { 253 | int j ; 254 | for (j = 0 ; j < node_marginals.size() ; j++) 255 | if (find(node_marginals[j].vars_name.begin(), node_marginals[j].vars_name.end(), var_names[i]) != node_marginals[j].vars_name.end()) 256 | break; 257 | 258 | Factor var_marginal = node_marginals[j]; 259 | var_marginal.normalize(); 260 | for (int k = 0 ; k < node_marginals[j].vars_name.size() ; k++) 261 | if (node_marginals[j].vars_name[k] != var_names[i]) 262 | var_marginal = var_marginal.sum_out(node_marginals[j].vars_name[k]); 263 | 264 | loglikelihood.push_back(log(var_marginal.potentials[assignments[i]])); 265 | } 266 | 267 | return loglikelihood; 268 | } 269 | 270 | void FactorGraph::print(bool marginals) 271 | { 272 | cout << "------- FACTOR GRAPH -------" << endl << endl; 273 | cout << "Num Nodes : " << num_nodes << endl << endl; 274 | 275 | cout << "Node Scopes\n===========\n"; 276 | for (int i = 0 ; i < node_scopes.size() ; i++) 277 | { 278 | set::iterator it; 279 | cout << i << " : " ; 280 | for (it = node_scopes[i].begin() ; it != node_scopes[i].end(); it++) 281 | cout << *it << " "; 282 | cout << endl; 283 | } 284 | 285 | cout << "\nFactor Scopes\n=============\n"; 286 | for (int i = 0 ; i < factors.size() ; i++) 287 | { 288 | cout << i << " : " ; 289 | for (int j = 0 ; j < factors[i].vars_name.size() ; j++) 290 | cout << factors[i].vars_name[j] << " "; 291 | cout << endl; 292 | } 293 | 294 | cout << "\nNode Factors\n============\n"; 295 | for (int i = 0 ; i < node_factors.size() ; i++) 296 | { 297 | vector::iterator it; 298 | cout << i << " : " ; 299 | for (it = node_factors[i].begin() ; it != node_factors[i].end(); it++) 300 | cout << *it << " "; 301 | cout << endl; 302 | } 303 | 304 | cout << "\nAdjaceny List\n=============\n"; 305 | for (int i = 0 ; i < adj_list.size() ; i++) 306 | { 307 | vector::iterator it; 308 | cout << i << " : " ; 309 | for (it = adj_list[i].begin() ; it != adj_list[i].end(); it++) 310 | cout << *it << " "; 311 | cout << endl; 312 | } 313 | 314 | if (marginals) 315 | { 316 | cout << "\nMarginals\n=========\n"; 317 | for (int i = 0 ; i < num_nodes ; i++) 318 | { 319 | node_marginals[i].normalize(); 320 | node_marginals[i].print(); 321 | } 322 | } 323 | cout << "----------------------------" << endl << endl; 324 | } 325 | 326 | #endif -------------------------------------------------------------------------------- /MarkovNet.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "Factor.cpp" 13 | #include "FactorGraph.cpp" 14 | 15 | #ifndef MARKOVNET_CPP 16 | #define MARKOVNET_CPP 17 | 18 | class MarkovNet 19 | { 20 | public: 21 | MarkovNet(int num_nodes, vector node_var_names, vector node_num_vals, vector > adj_list, vector factors); 22 | int num_nodes; 23 | vector min_fill_ve_order(); 24 | FactorGraph gen_clique_tree(vector elim_ordering); 25 | FactorGraph gen_bethe_cluster_graph(); 26 | 27 | void print(bool print_factors); 28 | vector get_factors(); 29 | 30 | // sampling based methods 31 | vector > gibbs_sampler(vector initial_assignments, int burn_in_samples, int check_convergence_every_n_transitions, int check_convergence_versus_last_samples, int max_samples, double epsilon); 32 | pair, vector > > marginal_prob_dist_from_samples(vector > samples); 33 | vector marginal_likelihood(vector assignment, vector >& prob_dist); 34 | 35 | // parameter learning 36 | void learn_parameters(vector >& train_data, double learning_rate, double reg_const, double epsilon, int max_iters); 37 | 38 | // assignment 3 specific 39 | pair, vector > > inference_by_sampling(vector query, vector gnd_assignment); 40 | 41 | private: 42 | vector node_var_names; 43 | vector node_num_vals; 44 | vector > adj_list; 45 | vector factors; 46 | 47 | // for Gibbs Sampling 48 | Factor reduced_factor(string var_to_reduce, vector variable_factors, map cur_state_map); 49 | 50 | // for parameter learning 51 | vector avg_feature_counts_from_samples(vector >& samples); 52 | bool update_parameters(vector& avg_feature_counts_data, vector& avg_feature_counts_param, int num_data_samples, double learning_rate, double reg_const, double epsilon); 53 | }; 54 | 55 | MarkovNet::MarkovNet(int nn, vector nvn, vector nnv, vector > al, vector f) 56 | { 57 | num_nodes = nn; 58 | node_var_names = nvn; 59 | node_num_vals = nnv; 60 | adj_list = al; 61 | factors = f; 62 | } 63 | 64 | vector MarkovNet::min_fill_ve_order() 65 | { 66 | vector marked(num_nodes, false); 67 | vector > am(num_nodes, vector(num_nodes, false)); // adj_matrix 68 | for (int i = 0 ; i < num_nodes ; i++) 69 | for (int j = 0 ; j < adj_list[i].size() ; j++) 70 | am[i][adj_list[i][j]] = true; 71 | 72 | vector order; 73 | 74 | // each iteration 75 | for (int i = 0 ; i < num_nodes ; i++) 76 | { 77 | int min_fill = 99999; // set to INF 78 | int min_node = -1; 79 | 80 | for (int j = 0 ; j < num_nodes ; j++) 81 | if (!marked[j]) 82 | { 83 | int cur_min_fill = 0; 84 | vector ngbs; 85 | for (int k = 0 ; k < num_nodes ; k++) 86 | if (am[j][k] == true) 87 | ngbs.push_back(k); 88 | 89 | for (int p = 0 ; p < ngbs.size() ; p++) 90 | { 91 | for (int q = p+1 ; q < ngbs.size() ; q++) 92 | { 93 | if (not am[ngbs[p]][ngbs[q]]) cur_min_fill++; 94 | if (cur_min_fill > min_fill) break; 95 | } 96 | if (cur_min_fill > min_fill) break; 97 | } 98 | 99 | if (cur_min_fill < min_fill) 100 | { 101 | min_fill = cur_min_fill; 102 | min_node = j; 103 | } 104 | } 105 | 106 | // add fill edges 107 | vector ngbs; 108 | for (int k = 0 ; k < num_nodes ; k++) 109 | if (am[min_node][k] == true) 110 | ngbs.push_back(k); 111 | 112 | for (int p = 0 ; p < ngbs.size() ; p++) 113 | for (int q = p+1 ; q < ngbs.size() ; q++) 114 | { 115 | am[ngbs[p]][ngbs[q]] = true; 116 | am[ngbs[q]][ngbs[p]] = true; 117 | } 118 | 119 | // remove min_node 120 | marked[min_node] = true; 121 | for (int j = 0 ; j < ngbs.size() ; j++) 122 | { 123 | am[min_node][ngbs[j]] = false; 124 | am[ngbs[j]][min_node] = false; 125 | } 126 | order.push_back(min_node); 127 | } 128 | 129 | return order; 130 | } 131 | 132 | vector MarkovNet::get_factors() {return factors;} 133 | 134 | FactorGraph MarkovNet::gen_clique_tree(vector elim_ordering) 135 | { 136 | vector > fg_adj_list; 137 | vector > fg_node_scopes; 138 | vector > fg_node_factors; 139 | vector factor_added(factors.size(), false); 140 | vector > tau; // intermediate factors 141 | vector tau_added; // added to a node 142 | 143 | // elim_ordering.size() == num_nodes 144 | for (int i = 0 ; i < num_nodes ; i++) 145 | { 146 | vector cur_node_factors; 147 | vector cur_tau_factors; 148 | set cur_node_scope; 149 | 150 | for (int j = 0 ; j < factors.size() ; j++) 151 | if (!factor_added[j] and find(factors[j].vars_name.begin(),factors[j].vars_name.end(), node_var_names[elim_ordering[i]]) != factors[j].vars_name.end()) 152 | { 153 | cur_node_factors.push_back(j); 154 | factor_added[j] = true; 155 | } 156 | 157 | for (int j = 0 ; j < tau.size() ; j++) 158 | if (!tau_added[j] and find(tau[j].begin(), tau[j].end(), node_var_names[elim_ordering[i]]) != tau[j].end()) 159 | { 160 | cur_tau_factors.push_back(j); 161 | tau_added[j] = true; 162 | } 163 | 164 | fg_node_factors.push_back(cur_node_factors); 165 | 166 | // determine scope of current node from factors and taus 167 | for (int j = 0 ; j < cur_node_factors.size() ; j++) 168 | set_difference(factors[cur_node_factors[j]].vars_name.begin(), factors[cur_node_factors[j]].vars_name.end(), cur_node_scope.begin(), cur_node_scope.end(), inserter(cur_node_scope, cur_node_scope.end())); 169 | // ^ added difference of factor_scope - cur_node_scope to cur_node_scope => effectively union 170 | 171 | for (int j = 0 ; j < cur_tau_factors.size() ; j++) 172 | set_difference(tau[cur_tau_factors[j]].begin(), tau[cur_tau_factors[j]].end(), cur_node_scope.begin(), cur_node_scope.end(), inserter(cur_node_scope, cur_node_scope.end())); 173 | 174 | fg_node_scopes.push_back(cur_node_scope); 175 | 176 | // add edges 177 | fg_adj_list.push_back(vector()); 178 | for (int j = 0 ; j < cur_tau_factors.size() ; j++) 179 | { 180 | fg_adj_list[cur_tau_factors[j]].push_back(i); 181 | fg_adj_list[i].push_back(cur_tau_factors[j]); 182 | } 183 | 184 | // add new intermediate tau 185 | cur_node_scope.erase(node_var_names[elim_ordering[i]]); // eliminating from scope 186 | tau.push_back(cur_node_scope); 187 | tau_added.push_back(false); 188 | } 189 | 190 | return FactorGraph(fg_node_scopes.size(), node_var_names, fg_node_scopes, fg_adj_list, factors, fg_node_factors); 191 | } 192 | 193 | bool compareFactorScopeSize(Factor a, Factor b) { return (a.vars_name.size() > b.vars_name.size());} 194 | 195 | FactorGraph MarkovNet::gen_bethe_cluster_graph() 196 | { 197 | vector > fg_adj_list; 198 | vector > fg_node_scopes; 199 | vector > fg_node_factors; 200 | int fg_num_nodes = 0; 201 | 202 | // insert variable nodes to Bethe-Cluster graph 203 | map var_name_to_pos; 204 | for (int i = 0 ; i < node_var_names.size() ; i++) 205 | { 206 | var_name_to_pos.insert(pair(node_var_names[i],i)); 207 | fg_adj_list.push_back(vector()); 208 | fg_node_factors.push_back(vector()); 209 | fg_node_scopes.push_back(set{node_var_names[i]}); 210 | fg_num_nodes++ ; 211 | } 212 | 213 | // sort in decreasing sizes of scope 214 | sort(factors.begin(), factors.end(), compareFactorScopeSize); 215 | 216 | // absorb smaller factors into bigger ones 217 | vector > reduced_factors; 218 | for (int i = 0 ; i < factors.size() ; i++) 219 | { 220 | bool absorbed = false; 221 | for (int j = 0 ; j < reduced_factors.size() ; j++) 222 | if (includes(factors[reduced_factors[j][0]].vars_name.begin(), factors[reduced_factors[j][0]].vars_name.end(), factors[i].vars_name.begin(), factors[i].vars_name.end())) 223 | { 224 | absorbed = true; 225 | reduced_factors[j].push_back(i); 226 | break; 227 | } 228 | 229 | if (not absorbed) reduced_factors.push_back(vector{i}); 230 | } 231 | 232 | // add factor nodes to Bethe-Cluster graph 233 | for (int i = 0 ; i < reduced_factors.size() ; i++) 234 | { 235 | fg_adj_list.push_back(vector()); 236 | fg_node_scopes.push_back(set(factors[reduced_factors[i][0]].vars_name.begin(),factors[reduced_factors[i][0]].vars_name.end())); 237 | fg_node_factors.push_back(reduced_factors[i]); 238 | 239 | for (int k = 0 ; k < factors[reduced_factors[i][0]].vars_name.size() ; k++) 240 | { 241 | fg_adj_list[var_name_to_pos[factors[reduced_factors[i][0]].vars_name[k]]].push_back(fg_num_nodes); 242 | fg_adj_list[fg_num_nodes].push_back(var_name_to_pos[factors[reduced_factors[i][0]].vars_name[k]]); 243 | } 244 | 245 | fg_num_nodes++; 246 | } 247 | 248 | return FactorGraph(fg_num_nodes, node_var_names, fg_node_scopes, fg_adj_list, factors, fg_node_factors); 249 | } 250 | 251 | pair, vector > > MarkovNet::marginal_prob_dist_from_samples(vector > samples) 252 | { 253 | // returns max marginal assignment for each node, and also a distribution over all its values 254 | vector > prob_dist(num_nodes); 255 | for (int i = 0 ; i < num_nodes ; i++) 256 | prob_dist[i] = vector(node_num_vals[i], 1.0/(samples.size()+node_num_vals[i])); // smoothing! 257 | 258 | for (int i = 0 ; i < samples.size() ; i++) 259 | for (int j = 0 ; j < num_nodes ; j++) 260 | prob_dist[j][samples[i][j]] += 1.0/(samples.size()+node_num_vals[j]); 261 | 262 | vector max_marginal_assignment(num_nodes); 263 | 264 | for (int i = 0 ; i < num_nodes ; i++) 265 | { 266 | vector::iterator max_it = max_element(prob_dist[i].begin(), prob_dist[i].end()); 267 | max_marginal_assignment[i] = distance(prob_dist[i].begin(), max_it); 268 | } 269 | 270 | return pair, vector > >(max_marginal_assignment, prob_dist); 271 | } 272 | 273 | vector MarkovNet::marginal_likelihood(vector assignment, vector >& prob_dist) 274 | { 275 | vector loglikelihood; 276 | 277 | for (int i = 0 ; i < num_nodes ; i++) 278 | loglikelihood.push_back(log(prob_dist[i][assignment[i]])); 279 | 280 | return loglikelihood; 281 | } 282 | 283 | vector > MarkovNet::gibbs_sampler(vector initial_assignments, int burn_in_samples, int check_convergence_every_n_transitions, int check_convergence_versus_last_samples, int max_samples, double epsilon) 284 | { 285 | // initial_assignment to a variable is -1 if no assignment (unobserved), else index of assignment (observed) 286 | // returns samples after burn_in_samples till convergence 287 | 288 | vector > samples; 289 | vector cur_state(num_nodes, 0); 290 | map cur_state_map; 291 | 292 | // initialise state 293 | for (int i = 0 ; i < num_nodes ; i++) 294 | { 295 | if (initial_assignments[i] != -1) 296 | cur_state[i] = initial_assignments[i]; 297 | 298 | cur_state_map.insert(pair(node_var_names[i], cur_state[i])); 299 | } 300 | 301 | // initialise variable factors 302 | vector > variable_factors_indices(num_nodes, vector()); 303 | for (int i = 0 ; i < num_nodes ; i++) 304 | for (int j = 0 ; j < factors.size() ; j++) 305 | if (find(factors[j].vars_name.begin(),factors[j].vars_name.end(), node_var_names[i]) != factors[j].vars_name.end()) 306 | variable_factors_indices[i].push_back(j); 307 | 308 | bool converged = false; 309 | int num_samples = 0; 310 | int num_n_step_transitions = 0; 311 | 312 | // start sampling 313 | while (not converged and num_samples - burn_in_samples < max_samples) 314 | { 315 | for (int i = 0 ; i < num_nodes ; i++) 316 | if (initial_assignments[i] == -1) 317 | { 318 | // sampling ith variable given others, if ith variable is not observed 319 | int var_sample = reduced_factor(node_var_names[i], variable_factors_indices[i], cur_state_map).var_sample(node_var_names[i], cur_state_map); 320 | 321 | // updates 322 | cur_state[i] = var_sample; 323 | cur_state_map[node_var_names[i]] = var_sample; 324 | num_samples ++; 325 | if (num_samples > burn_in_samples) 326 | { 327 | samples.push_back(cur_state); 328 | } 329 | } 330 | 331 | num_n_step_transitions ++; 332 | 333 | if ((num_samples > burn_in_samples) and (num_n_step_transitions%check_convergence_every_n_transitions==0) and (num_samples - burn_in_samples > 2 * check_convergence_versus_last_samples)) 334 | { 335 | converged = true; 336 | // TODO: check if the last check_convergence_versus_last samples give the same expected value wise distribution as all the samples till samples.size() - last check_convergence_versus_last 337 | for (int i = 0 ; i < num_nodes - 1 ; i++) 338 | { 339 | double first_sample_avg = 0.0; 340 | double all_sample_avg = 0.0; 341 | 342 | for (int j = 0 ; j < samples.size() - check_convergence_versus_last_samples ; j++) 343 | first_sample_avg += samples[j][i]; 344 | 345 | all_sample_avg = first_sample_avg; 346 | for (int j = samples.size() - check_convergence_versus_last_samples ; j < samples.size() ; j++) 347 | all_sample_avg += samples[j][i]; 348 | 349 | first_sample_avg /= samples.size() - check_convergence_versus_last_samples; 350 | all_sample_avg /= samples.size(); 351 | 352 | if (abs(first_sample_avg - all_sample_avg) > epsilon) 353 | { 354 | converged = false; 355 | break; 356 | } 357 | } 358 | } 359 | } 360 | 361 | if (converged) cout << "Gibbs Sampling converged after : " << num_samples << " samples\n"; 362 | else cout << "Gibbs Sampling did not converge after : " << num_samples << " samples\n"; 363 | return samples; 364 | } 365 | 366 | Factor MarkovNet::reduced_factor(string var_to_reduce, vector variable_factors_indices, map cur_state_map) 367 | { 368 | Factor reduced; 369 | 370 | for (int i = 0 ; i < variable_factors_indices.size() ; i++) 371 | { 372 | int var_to_red_ind_in_cur_factor; 373 | vector assignment(factors[variable_factors_indices[i]].num_vars, -1); 374 | for (int j = 0 ; j < factors[variable_factors_indices[i]].num_vars ; j++) 375 | { 376 | if (factors[variable_factors_indices[i]].vars_name[j] == var_to_reduce) 377 | var_to_red_ind_in_cur_factor = j; 378 | assignment[j] = cur_state_map[factors[variable_factors_indices[i]].vars_name[j]]; 379 | } 380 | 381 | vector pots; 382 | for (int j = 0 ; j < factors[variable_factors_indices[i]].num_vals_vars[var_to_red_ind_in_cur_factor] ; j++) 383 | { 384 | assignment[var_to_red_ind_in_cur_factor] = j; 385 | pots.push_back(factors[variable_factors_indices[i]].pot_at(assignment)); 386 | } 387 | 388 | reduced = reduced * Factor(1, vector(1,var_to_reduce), vector(1, factors[variable_factors_indices[i]].num_vals_vars[var_to_red_ind_in_cur_factor]), pots); 389 | } 390 | 391 | return reduced; 392 | } 393 | 394 | pair, vector > > MarkovNet::inference_by_sampling(vector query, vector gnd_assignment) 395 | { 396 | // query consists of -1 if variable not observed, else value of observation 397 | // gnd_assignment has the ground assingment 398 | // return vector is of the form {total_correct_vars, num_missing_vars, LL_gnd_assgnmnt} 399 | // also returns the probability distribution over each variable 400 | // prediction and LL at marginal probability level 401 | 402 | vector > samples = gibbs_sampler(query, 5000, 100, 4000, 50000, 0.01); 403 | pair, vector > > dist = marginal_prob_dist_from_samples(samples); 404 | vector gnd_ll_vars = marginal_likelihood(gnd_assignment, dist.second); 405 | double gnd_ll = accumulate(gnd_ll_vars.begin(), gnd_ll_vars.end(), 0.0); // note that LL will be 0 for observed vars (because of how sampling is done) 406 | double correct_vars_pred = 0; 407 | double missing_vars_total = 0; 408 | 409 | for (int i = 0 ; i < query.size() ; i++) 410 | if (query[i]==-1) 411 | { 412 | missing_vars_total ++; 413 | if (gnd_assignment[i] == dist.first[i]) correct_vars_pred++; 414 | } 415 | 416 | return pair, vector > >(vector{missing_vars_total, correct_vars_pred, gnd_ll}, dist.second); 417 | } 418 | 419 | 420 | void MarkovNet::learn_parameters(vector >& train_data, double learning_rate, double reg_const, double epsilon, int max_iters) 421 | { 422 | // assumes structure and factors have been initialised 423 | 424 | // each factor phi in a factor table is the exp(lambda) in the equivalent log linear model 425 | // to learn the phis, we update the lambdas first using the update equation 426 | // dLL/dl_k = m * [ (avg value of f_k from data) - (avg value of f_k from parameters) ] - 2*C*l_k {l_k is lambda corresponding to feature f_k} 427 | // refer class notes for more details 428 | 429 | // calculate expected feature counts from data, store as factor tables for each feature, ie each row of factor table (but they don't act as factors!) 430 | vector avg_feature_counts_data = avg_feature_counts_from_samples(train_data); 431 | 432 | bool converged = false; 433 | vector avg_feature_counts_param; 434 | vector > samples; 435 | int iters = 0; 436 | 437 | while (not converged and iters < max_iters) 438 | { 439 | samples = gibbs_sampler(vector(num_nodes, -1), 3000, 100, 1000, 20000, 0.001); 440 | avg_feature_counts_param = avg_feature_counts_from_samples(samples); 441 | 442 | converged = update_parameters(avg_feature_counts_data, avg_feature_counts_param, train_data.size(), learning_rate, reg_const, epsilon); 443 | factors[0].print(); 444 | iters++; 445 | 446 | cout << "Num iterations : " << iters << endl; 447 | } 448 | } 449 | 450 | vector MarkovNet::avg_feature_counts_from_samples(vector >& samples) 451 | { 452 | vector avg_feat_counts; 453 | map var_name_to_pos; 454 | for (int i = 0 ; i < node_var_names.size() ; i++) 455 | var_name_to_pos.insert(pair(node_var_names[i],i)); 456 | 457 | for (int i = 0 ; i < factors.size() ; i++) 458 | { 459 | // build a table corresponding to this factor table that stores the average counts of each assignment in train_data 460 | avg_feat_counts.push_back(Factor(factors[i].num_vars, factors[i].vars_name, factors[i].num_vals_vars, vector(factors[i].potentials.size(), 0.0))); 461 | 462 | vector assignment_map; 463 | for (int j = 0 ; j < factors[i].num_vars ; j++) 464 | assignment_map.push_back(var_name_to_pos[factors[i].vars_name[j]]); 465 | 466 | for (int j = 0 ; j < samples.size() ; j++) 467 | { 468 | vector assignment; 469 | for (int k = 0 ; k < assignment_map.size() ; k++) 470 | assignment.push_back(samples[j][assignment_map[k]]); 471 | 472 | avg_feat_counts[i].potentials[factors[i].flat_index_from_assignment(assignment)] += 1.0/samples.size(); 473 | } 474 | } 475 | 476 | return avg_feat_counts; 477 | } 478 | 479 | bool MarkovNet::update_parameters(vector& avg_feature_counts_data, vector& avg_feature_counts_param, int num_data_samples, double learning_rate, double reg_const, double epsilon) 480 | { 481 | bool converged = true; 482 | int cur_num_features; 483 | double cur_lambda; 484 | double new_lambda; 485 | double delta_lambda; 486 | double cur_reg; 487 | for (int i = 0 ; i < factors.size() ; i++) 488 | { 489 | cur_num_features = accumulate(factors[i].num_vals_vars.begin(), factors[i].num_vals_vars.end(), 1, multiplies()); 490 | for (int j = 0 ; j < cur_num_features ; j++) 491 | { 492 | cur_lambda = log(factors[i].potentials[j]); 493 | delta_lambda = avg_feature_counts_data[i].potentials[j] - avg_feature_counts_param[i].potentials[j] ; 494 | cur_reg = - (2 * reg_const * cur_lambda)/num_data_samples; // L2 regularization 495 | 496 | new_lambda = cur_lambda + learning_rate * (delta_lambda + cur_reg); 497 | 498 | factors[i].potentials[j] = exp(new_lambda); 499 | if (abs(exp(new_lambda) - exp(cur_lambda)) > epsilon) converged = false; 500 | } 501 | } 502 | 503 | return converged; 504 | } 505 | 506 | void MarkovNet::print(bool print_factors) 507 | { 508 | cout << "------- MARKOV NET -------" << endl << endl; 509 | cout << "Num Nodes : " << num_nodes << endl << endl; 510 | 511 | cout << "Node Names\n==========\n"; 512 | for (int i = 0 ; i < num_nodes ; i++) 513 | cout << i << " : " << node_var_names[i] << endl; 514 | 515 | cout << "\nFactor Scopes\n=============\n"; 516 | for (int i = 0 ; i < factors.size() ; i++) 517 | { 518 | cout << i << " : " ; 519 | for (int j = 0 ; j < factors[i].vars_name.size() ; j++) 520 | cout << factors[i].vars_name[j] << " "; 521 | cout << endl; 522 | } 523 | 524 | cout << "\nAdjaceny List\n=============\n"; 525 | for (int i = 0 ; i < adj_list.size() ; i++) 526 | { 527 | vector::iterator it; 528 | cout << i << " : " ; 529 | for (it = adj_list[i].begin() ; it != adj_list[i].end(); it++) 530 | cout << *it << " "; 531 | cout << endl; 532 | } 533 | 534 | if (print_factors) 535 | { 536 | cout << "\nFactors\n=======\n"; 537 | for (int i = 0 ; i < factors.size() ; i++) 538 | factors[i].print(); 539 | } 540 | 541 | cout << "----------------------------" << endl << endl; 542 | } 543 | 544 | #endif 545 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ProbabilisticGraphicalModels 2 | General Purpose C++ Implementation for Inference and Learning in Bayesian and Markov Networks. 3 | 4 | This code contains: 5 | - an implementation for Bayesian and Markov Networks 6 | - exact inference using Clique Tree Message Passing and Belief Propogation 7 | - approximate inference using Gibbs Sampling and Loopy Belief Propagation over the Bethe Cluster Graph 8 | - parameter learning for Bayesian Network from observed data samples (counting and smoothing) 9 | - gradient ascent to learn parameters of a Markov Network from observed data samples 10 | 11 | If you think you could use this code, let me know and I'll speed up the documentation! 12 | -------------------------------------------------------------------------------- /ocr.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "MarkovNet.cpp" 13 | #include "FactorGraph.cpp" 14 | #include "Factor.cpp" 15 | 16 | class OCR 17 | { 18 | public: 19 | OCR(int num_images, vector chars, double skip_factor, double pair_skip_factor, string ocr_factors_filename, string trans_factors_filename); 20 | void classify_file(string input_filename, string output_filename, string gnd_filename, int mode, bool margin_sum, bool trans, bool skip, bool pair_skip); 21 | pair, pair > classify_img_pair(vector imgs1, vector imgs2, vector gnd_assignment1, vector gnd_assignment2, int mode, bool margin_sum, bool trans, bool skip, bool pair_skip); 22 | MarkovNet gen_pair_mn(vector imgs1, vector imgs2, bool trans, bool skip, bool pair_skip); // make private later? 23 | private: 24 | void load_phi_o(string filename); 25 | void load_phi_t(string filename); 26 | 27 | Factor gen_ocr_factor(string var_name, int img); 28 | Factor gen_trans_factor(string var_name1, string var_name2); 29 | Factor gen_pair_factor(string var_name1, string var_name2, bool pair_skip); 30 | 31 | 32 | 33 | int num_images; 34 | int dict_size; 35 | map char_int_map; 36 | map int_char_map; 37 | vector > ocr_factors; 38 | vector > trans_factors; 39 | double skip_factor; 40 | double pair_skip_factor; 41 | }; 42 | 43 | OCR::OCR(int ni, vector chars, double sf, double psf, string ocr_factors_filename, string trans_factors_filename) 44 | { 45 | num_images = ni; 46 | dict_size = chars.size(); 47 | skip_factor = sf; 48 | pair_skip_factor = psf; 49 | ocr_factors = vector >(num_images, vector(dict_size, 0)); 50 | trans_factors = vector >(dict_size, vector(dict_size, 0)); 51 | 52 | for (int i = 0 ; i(chars[i], i)); 55 | int_char_map.insert(pair(i, chars[i])); 56 | } 57 | 58 | load_phi_o(ocr_factors_filename); 59 | load_phi_t(trans_factors_filename); 60 | } 61 | 62 | void OCR::load_phi_o(string filename) 63 | { 64 | string line; 65 | ifstream infile(filename); 66 | if (infile.is_open()) 67 | { 68 | while (getline(infile,line)) 69 | { 70 | stringstream ss(line); 71 | int i; ss >> i; ss.ignore(); // ignore tab 72 | string s; ss >> s; ss.ignore(); 73 | double f; ss >> f; 74 | 75 | ocr_factors[i][char_int_map[s]] = f; 76 | } 77 | infile.close(); 78 | } 79 | } 80 | 81 | void OCR::load_phi_t(string filename) 82 | { 83 | string line; 84 | ifstream infile(filename); 85 | if (infile.is_open()) 86 | { 87 | while (getline(infile,line)) 88 | { 89 | stringstream ss(line); 90 | string s1; ss >> s1; ss.ignore(); // ignore tab 91 | string s2; ss >> s2; ss.ignore(); 92 | double f; ss >> f; 93 | 94 | trans_factors[char_int_map[s1]][char_int_map[s2]] = f; 95 | } 96 | infile.close(); 97 | } 98 | } 99 | 100 | Factor OCR::gen_ocr_factor(string var_name, int img) 101 | { 102 | // var_name.size() == 1 103 | return Factor(1, vector{var_name}, vector{dict_size}, ocr_factors[img]); 104 | } 105 | 106 | Factor OCR::gen_trans_factor(string var_name1, string var_name2) 107 | { 108 | // var_names.size() == 2 109 | vector potentials; 110 | 111 | for (int i = 0 ; i < dict_size ; i++) 112 | potentials.insert(potentials.end(), trans_factors[i].begin(), trans_factors[i].end()); 113 | 114 | return Factor(2, vector{var_name1, var_name2}, vector{dict_size, dict_size}, potentials); 115 | } 116 | 117 | Factor OCR::gen_pair_factor(string var_name1, string var_name2, bool pair_skip) 118 | { 119 | // var_names.size() == 2 120 | // if pair_skip => pair_skip factor, else normal skip factor 121 | 122 | vector potentials; 123 | 124 | for (int i = 0 ; i < dict_size ; i++) 125 | for (int j = 0 ; j < dict_size ; j++) 126 | { 127 | if (i==j and not pair_skip) potentials.push_back(skip_factor); 128 | else if (i==j and pair_skip) potentials.push_back(pair_skip_factor); 129 | else potentials.push_back(1.0); 130 | } 131 | 132 | return Factor(2, vector{var_name1, var_name2}, vector{dict_size, dict_size}, potentials); 133 | } 134 | 135 | 136 | MarkovNet OCR::gen_pair_mn(vector imgs1, vector imgs2, bool trans, bool skip, bool pair_skip) 137 | { 138 | vector node_var_names; 139 | vector > adj_list(imgs1.size()+imgs2.size(), vector()); 140 | vector factors; 141 | 142 | for (int i = 0 ; i < imgs1.size() ; i++) 143 | node_var_names.push_back(string("w1_")+to_string(i)); 144 | for (int i = 0 ; i < imgs2.size() ; i++) 145 | node_var_names.push_back(string("w2_")+to_string(i)); 146 | 147 | // ocr factors 148 | for (int i = 0 ; i < imgs1.size() ; i++) 149 | factors.push_back(gen_ocr_factor(node_var_names[i], imgs1[i])); 150 | for (int i = 0 ; i < imgs2.size() ; i++) 151 | factors.push_back(gen_ocr_factor(node_var_names[i+imgs1.size()], imgs2[i])); 152 | 153 | // trans factors 154 | if (trans) 155 | { 156 | for (int i = 0 ; i < imgs1.size() - 1 ; i++) 157 | { 158 | factors.push_back(gen_trans_factor(node_var_names[i], node_var_names[i+1])); 159 | adj_list[i].push_back(i+1); 160 | adj_list[i+1].push_back(i); 161 | } 162 | for (int i = 0 ; i < imgs2.size() - 1 ; i++) 163 | { 164 | factors.push_back(gen_trans_factor(node_var_names[i+imgs1.size()], node_var_names[i+1+imgs1.size()])); 165 | adj_list[i+imgs1.size()].push_back(i+1+imgs1.size()); 166 | adj_list[i+imgs1.size()+1].push_back(i+imgs1.size()); 167 | } 168 | } 169 | 170 | // skip factors 171 | if (skip) 172 | { 173 | for (int i = 0 ; i < imgs1.size() ; i++) 174 | for (int j = i+1 ; j < imgs1.size() ; j++) 175 | if (imgs1[i]==imgs1[j]) 176 | { 177 | factors.push_back(gen_pair_factor(node_var_names[i], node_var_names[j], false)); 178 | adj_list[i].push_back(j); 179 | adj_list[j].push_back(i); 180 | } 181 | 182 | for (int i = 0 ; i < imgs2.size() ; i++) 183 | for (int j = i+1 ; j < imgs2.size() ; j++) 184 | if (imgs2[i]==imgs2[j]) 185 | { 186 | factors.push_back(gen_pair_factor(node_var_names[i + imgs1.size()], node_var_names[j + imgs1.size()], false)); 187 | adj_list[i + imgs1.size()].push_back(j + imgs1.size()); 188 | adj_list[j + imgs1.size()].push_back(i + imgs1.size()); 189 | } 190 | } 191 | 192 | // pair skip factors 193 | if (pair_skip) 194 | { 195 | for (int i = 0 ; i < imgs1.size() ; i++) 196 | for (int j = 0 ; j < imgs2.size() ; j++) 197 | if (imgs1[i] == imgs2[j]) 198 | { 199 | factors.push_back(gen_pair_factor(node_var_names[i], node_var_names[j + imgs1.size()], true)); 200 | adj_list[i].push_back(j + imgs1.size()); 201 | adj_list[j + imgs1.size()].push_back(i); 202 | } 203 | } 204 | 205 | return MarkovNet(node_var_names.size(), node_var_names, vector(node_var_names.size(), dict_size), adj_list, factors); 206 | } 207 | 208 | pair, pair > OCR::classify_img_pair(vector imgs1, vector imgs2, vector gnd_assignment1, vector gnd_assignment2, int mode, bool margin_sum, bool trans, bool skip, bool pair_skip) 209 | { 210 | // modes 211 | // 1: Message Passing 212 | // 2: Loopy BP 213 | // 3: Gibbs Sampling 214 | 215 | MarkovNet mn = gen_pair_mn(imgs1, imgs2, trans, skip, pair_skip); 216 | 217 | FactorGraph fg; 218 | vector pred_assignment; 219 | vector gnd_assignment = gnd_assignment1; 220 | gnd_assignment.insert(gnd_assignment.end(), gnd_assignment2.begin(), gnd_assignment2.end()); 221 | vector gnd_loglikelihood(imgs1.size() + imgs2.size(), 0.0); 222 | 223 | if (mode==1) 224 | { 225 | fg = mn.gen_clique_tree(mn.min_fill_ve_order()); 226 | 227 | if (margin_sum) fg.MessagePassing(0, &Factor::sum_out); 228 | else fg.MessagePassing(0, &Factor::max_out); 229 | } 230 | 231 | else if (mode==2) // Loopy BeliefProp 232 | { 233 | fg = mn.gen_bethe_cluster_graph(); 234 | if (margin_sum) fg.BeliefProp(0.001, 1000, &Factor::sum_out, true); 235 | else fg.BeliefProp(0.001, 1000, &Factor::max_out, true); 236 | } 237 | 238 | else if (mode==3) // Gibbs Sampling 239 | { 240 | if (margin_sum) 241 | { 242 | vector > samples = mn.gibbs_sampler(vector(mn.num_nodes, -1), 5000, 100, 5000, 15000, 0.1); 243 | pair, vector > > dist = mn.marginal_prob_dist_from_samples(samples); 244 | pred_assignment = dist.first; 245 | gnd_loglikelihood = mn.marginal_likelihood(gnd_assignment, dist.second); 246 | } 247 | else cout << "Gibbs Sampling not implemented for MAP" << endl; 248 | } 249 | 250 | if (mode==1 or mode==2) 251 | { 252 | if (margin_sum) 253 | { 254 | pred_assignment = fg.max_marginal_assignment(&Factor::sum_out).first; 255 | gnd_loglikelihood = fg.marginal_likelihood(false, gnd_assignment); 256 | } 257 | else 258 | { 259 | pred_assignment = fg.max_marginal_assignment(&Factor::max_out).first; 260 | gnd_loglikelihood = fg.marginal_likelihood(true, gnd_assignment); 261 | } 262 | } 263 | 264 | string w1 = ""; 265 | string w2 = ""; 266 | double ll1 = 0.0; 267 | double ll2 = 0.0; 268 | 269 | for (int i = 0 ; i < imgs1.size() ; i++) 270 | { 271 | w1 += int_char_map[pred_assignment[i]]; 272 | ll1 += gnd_loglikelihood[i]; 273 | } 274 | for (int j = 0 ; j < imgs2.size() ; j++) 275 | { 276 | w2 += int_char_map[pred_assignment[j + imgs1.size()]]; 277 | ll2 += gnd_loglikelihood[j + imgs1.size()]; 278 | } 279 | 280 | return pair, pair >(pair(w1, ll1), pair(w2, ll2)); 281 | } 282 | 283 | void OCR::classify_file(string input_filename, string output_filename, string gnd_filename, int mode, bool margin_sum, bool trans, bool skip, bool pair_skip) 284 | { 285 | // modes 286 | // 1: Message Passing 287 | // 2: Loopy BP 288 | // 3: Gibbs Sampling 289 | 290 | string inp_line; 291 | string gnd_line; 292 | ifstream infile(input_filename); 293 | ifstream gndfile(gnd_filename); 294 | ofstream ofile; 295 | ofile.open(output_filename); 296 | double avgLogProb = 0.0; 297 | int i = 0; 298 | 299 | if (infile.is_open()) 300 | { 301 | while (getline(infile,inp_line)) 302 | { 303 | getline(gndfile,gnd_line); 304 | if (not inp_line.empty()) 305 | { 306 | // cout << i << endl; 307 | vector cur_imgs1; 308 | vector cur_imgs2; 309 | vector gnd_assngmt1; 310 | vector gnd_assngmt2; 311 | 312 | stringstream ss1(inp_line); 313 | int n; 314 | 315 | while (ss1 >> n) 316 | { 317 | cur_imgs1.push_back(n); 318 | if (ss1.peek() == '\t') ss1.ignore(); 319 | } 320 | 321 | for (int j = 0 ; j> n) 327 | { 328 | cur_imgs2.push_back(n); 329 | if (ss2.peek() == '\t') ss2.ignore(); 330 | } 331 | 332 | getline(gndfile,gnd_line); 333 | for (int j = 0 ; j, pair > pred = classify_img_pair(cur_imgs1, cur_imgs2, gnd_assngmt1, gnd_assngmt2, mode, margin_sum, trans, skip, pair_skip); 336 | ofile << pred.first.first << endl << pred.second.first << endl << endl; 337 | 338 | avgLogProb += pred.first.second + pred.second.second; 339 | i += 2; 340 | } 341 | } 342 | infile.close(); 343 | } 344 | ofile.close(); 345 | avgLogProb = avgLogProb/i; 346 | cout << "Average Log Likelihood (as defined for both cases in the assignment) : " << avgLogProb << endl; 347 | } 348 | 349 | void print_stats(string ref_file, string pred_file) 350 | { 351 | string line; 352 | ifstream f1(ref_file); 353 | ifstream f2(pred_file); 354 | 355 | vector words1; 356 | vector words2; 357 | 358 | int total_chars = 0; 359 | int match_chars = 0; 360 | int total_words = 0; 361 | int match_words = 0; 362 | 363 | if (f1.is_open()) 364 | { 365 | while (getline(f1, line)) 366 | if (not line.empty()) 367 | words1.push_back(line); 368 | f1.close(); 369 | } 370 | 371 | if (f2.is_open()) 372 | { 373 | while (getline(f2, line)) 374 | if (not line.empty()) 375 | words2.push_back(line); 376 | 377 | f2.close(); 378 | } 379 | 380 | for (int i = 0 ; i{"d","o","i","r","a","h","t","n","s","e"}, 5.0, 5.0, "../OCRdataset-2/potentials/ocr.dat", "../OCRdataset-2/potentials/trans.dat"); 402 | 403 | //pair, pair > pred = ocr.classify_img_pair(vector{592,688,240,592}, vector{999,773,575,592,721,960}, vector{0,0,0,0}, vector{0,0,0,0,0,0}, 3, true, true, true, true); 404 | //cout << pred.first.first << " " << pred.second.first << endl; 405 | //MarkovNet mn = ocr.gen_pair_mn(vector{592,688,240,592}, vector{999,773,575,592,721,960}, true, true, true); 406 | 407 | //ocr.classify_file("../OCRdataset-2/data/data-tree.dat", "../OCRdataset-2/data/pred.dat","../OCRdataset-2/data/truth-tree.dat", 2, true, true, true, true); 408 | //print_stats("../OCRdataset-2/data/truth-tree.dat", "../OCRdataset-2/data/pred.dat"); 409 | 410 | //ocr.classify_file("../OCRdataset-2/data/data-treeWS.dat", "../OCRdataset-2/data/pred.dat","../OCRdataset-2/data/truth-treeWS.dat", 2, true, true, true, true); 411 | //print_stats("../OCRdataset-2/data/truth-treeWS.dat", "../OCRdataset-2/data/pred.dat"); 412 | 413 | //ocr.classify_file("../OCRdataset-2/data/data-loops.dat", "../OCRdataset-2/data/pred.dat","../OCRdataset-2/data/truth-loops.dat", 2, true, true, true, true); 414 | //print_stats("../OCRdataset-2/data/truth-loops.dat", "../OCRdataset-2/data/pred.dat"); 415 | 416 | ocr.classify_file("../OCRdataset-2/data/data-loopsWS.dat", "../OCRdataset-2/data/pred.dat","../OCRdataset-2/data/truth-loopsWS.dat", 3, true, true, true, true); 417 | print_stats("../OCRdataset-2/data/truth-loopsWS.dat", "../OCRdataset-2/data/pred.dat"); 418 | 419 | /* 420 | string sA = string("A"); 421 | string sB = string("B"); 422 | string sC = string("C"); 423 | string sD = string("D"); 424 | string sE = string("E"); 425 | string sF = string("F"); 426 | string sG = string("G"); 427 | string sH = string("H"); 428 | 429 | 430 | Factor phi_A(1, vector{sA}, vector{2}, vector{1,1}); 431 | Factor phi_B(1, vector{sB}, vector{2}, vector{2,1}); 432 | Factor sob = phi_B.sum_out(sB); 433 | sob.print(); 434 | Factor phi_C(1, vector{sC}, vector{2}, vector{1,1}); 435 | Factor phi_D(1, vector{sD}, vector{2}, vector{1,2}); 436 | Factor phi_AB(2, vector{sA, sB}, vector{2,2}, vector{3,2,1,3}); 437 | Factor phi_BC(2, vector{sB, sC}, vector{2,2}, vector{1,1,1,1}); 438 | Factor phi_CD(2, vector{sC, sD}, vector{2,2}, vector{5,1,2,5}); 439 | Factor phi_DA(2, vector{sD, sA}, vector{2,2}, vector{1,3,3,1}); 440 | 441 | Factor phi_ab(2, vector{sA, sB}, vector{2,2}, vector{1,1,1,1}); 442 | Factor phi_bc(2, vector{sB, sC}, vector{2,2}, vector{1,1,1,1}); 443 | Factor phi_cd(2, vector{sC, sD}, vector{2,2}, vector{1,1,1,1}); 444 | Factor phi_da(2, vector{sD, sA}, vector{2,2}, vector{1,1,1,1}); 445 | 446 | MarkovNet mn = MarkovNet(4, vector{sA,sB,sC,sD}, vector{2,2,2,2}, vector >{vector{1,3}, vector{0,2}, vector{1,3},vector{0,2}}, vector{phi_AB, phi_BC, phi_CD, phi_DA}); 447 | mn.print(true); 448 | 449 | vector > samples = mn.gibbs_sampler(vector(mn.num_nodes, -1), 3000, 100, 1000, 15000, 0.01); 450 | cout << samples.size() << endl; 451 | 452 | MarkovNet mn2 = MarkovNet(4, vector{sA,sB,sC,sD}, vector{2,2,2,2}, vector >{vector{1,3}, vector{0,2}, vector{1,3},vector{0,2}}, vector{phi_ab, phi_bc, phi_cd, phi_da}); 453 | mn2.learn_parameters(samples, 0.01, 0, 0.001); 454 | mn2.print(true); 455 | 456 | 457 | Factor phi_ABC(3, vector{sA, sB, sC}, vector{2,2,2}, vector{}); 458 | Factor phi_CDE(3, vector{sC, sD, sE}, vector{2,2,2}, vector{}); 459 | Factor phi_BCE(3, vector{sB, sC, sE}, vector{2,2,2}, vector{}); 460 | Factor phi_BEG(3, vector{sE, sB, sG}, vector{2,2,2}, vector{}); 461 | Factor phi_BFG(3, vector{sF, sB, sG}, vector{2,2,2}, vector{}); 462 | Factor phi_GEH(3, vector{sG, sE, sH}, vector{2,2,2}, vector{}); 463 | 464 | FactorGraph fgr(4, vector >{set{sA, sB, sD}, set{sB, sC, sD}, set{sC, sD}, set{sD}}, vector >{vector{1}, vector{0,2}, vector{1,3}, vector{2}}, vector{phi_A, phi_B, phi_C, phi_D, phi_AB, phi_BC, phi_CD, phi_DA}, vector >{vector{0,4,7}, vector{1,5}, vector{2,6}, vector{3}}); 465 | fgr.print(); 466 | fgr.MessagePassing(3); 467 | 468 | for (int i = 0 ; i < fgr.num_nodes ; i++) 469 | {fgr.node_marginals[i].normalize(); fgr.node_marginals[i].print();} 470 | 471 | fg.BeliefProp(0.05, 1000); 472 | 473 | for (int i = 0 ; i < fg.num_nodes ; i++) 474 | { 475 | fg.node_marginals[i].normalize(); 476 | fg.node_marginals[i].print(); 477 | } 478 | 479 | FactorGraph bcgr(8, vector >{set{sA, sB}, set{sB, sC}, set{sC, sD}, set{sD, sA}, set{sA}, set{sB}, set{sC}, set{sD}}, vector >{vector{4,5}, vector{5,6}, vector{6,7}, vector{7, 4}, vector{0,3}, vector{0,1}, vector{1,2}, vector{2,3}}, vector{phi_A, phi_B, phi_C, phi_D, phi_AB, phi_BC, phi_CD, phi_DA}, vector >{vector{0,4}, vector{1,5}, vector{2,6}, vector{3,7}, vector(), vector(), vector(), vector()}); 480 | bcgr.print(); 481 | bcgr.BeliefProp(0.001, 1000); 482 | for (int i = 0 ; i < bcgr.num_nodes ; i++) 483 | { 484 | bcgr.node_marginals[i].normalize(); 485 | bcgr.node_marginals[i].print(); 486 | } 487 | 488 | MarkovNet mn = MarkovNet(4, vector{sA,sB,sC,sD}, vector >{vector{1,3}, vector{0,2}, vector{1,3},vector{0,2}}, vector{phi_A, phi_B, phi_C, phi_D, phi_AB, phi_BC, phi_CD, phi_DA}); 489 | mn.print(true); 490 | // MarkovNet mn = MarkovNet(8, vector{sA,sB,sC,sD,sE,sF,sG,sH}, vector >{vector{1,2}, vector{0,2,4,5,6}, vector{0,1,3,4}, vector{2,4}, vector{1,2,3,6,7}, vector{1,6}, vector{1,4,5,7}, vector{4,6}}, vector{phi_ABC, phi_CDE, phi_BCE, phi_BEG, phi_BFG, phi_GEH}); 491 | 492 | FactorGraph bcg = mn.gen_bethe_cluster_graph(); 493 | bcg.print(); 494 | bcg.BeliefProp(0.001,1000); 495 | for (int i = 0 ; i < bcg.num_nodes ; i++) 496 | { 497 | bcg.node_marginals[i].normalize(); 498 | bcg.node_marginals[i].print(); 499 | } 500 | 501 | 502 | vector order = mn.min_fill_ve_order(); 503 | cout << "Min Fill Order : "; 504 | for (int i = 0 ; i