├── .gitattributes
├── .gitignore
├── BNs-for-Genetic-Inheritance
    ├── helper.py
    ├── quiz.ipynb
    ├── sol.py
    ├── spinalMuscularAtrophyBayesNet.net
    └── submit.py
├── CRF-Learning-For-OCR
    ├── check.ipynb
    ├── generate_all_features.py
    ├── helper.py
    ├── instance_neg_log_likelyhood.py
    ├── solution.py
    └── submit.py
├── Decision-Making
    ├── README.md
    ├── helper.py
    ├── quiz.ipynb
    ├── sol.py
    └── submit.py
├── Exact-Inference
    ├── check.ipynb
    ├── data
    │   └── README.md
    ├── helper.py
    ├── solution.py
    └── submit.py
├── Learning-Tree-Structured-Networks
    ├── check.ipynb
    ├── helper.py
    ├── solution.py
    └── submit.py
├── Learning-With-Incomplete-Data
    ├── check.ipynb
    ├── helper.py
    ├── sol.py
    ├── submit.py
    └── vis_helper.py
├── Markov-Networks-for-OCR
    ├── .gitignore
    ├── README.md
    ├── check.ipynb
    ├── convert_mats.py
    ├── helper.py
    ├── inference
    │   ├── doinference-linux
    │   ├── doinference-mac
    │   ├── doinference.exe
    │   └── inference-src.zip
    ├── sol.py
    └── submit.py
├── README.md
├── Sampling-Methods
    ├── check.ipynb
    ├── crandom.py
    ├── data
    │   ├── seed1.txt
    │   ├── seed2.txt
    │   └── seed26288942.txt
    ├── drandom.py
    ├── helper.py
    ├── solution.py
    ├── submit.py
    ├── visualization.ipynb
    └── visualization.py
├── Simple-BN-Knowledge-Engineering
    ├── Credit_net.net
    ├── README.md
    ├── check.ipynb
    ├── factor_tutorial.ipynb
    ├── helper.py
    ├── sol.py
    └── submit.py
├── clap.gif
└── commons
    ├── __init__.py
    ├── factor.py
    └── utils.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.py linguist-language=python
2 | *.ipynb linguist-documentation
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | token.pkl
3 | __pycache__/
4 | .ipynb_checkpoints/
5 | !data/README.md
6 | 


--------------------------------------------------------------------------------
/BNs-for-Genetic-Inheritance/helper.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import sys
  3 | 
  4 | import numpy as np
  5 | 
  6 | sys.path.insert(0, '..')
  7 | import sol
  8 | from commons.factor import Factor
  9 | 
 10 | 
 11 | def generate_allele_genotype_mappers(num_alleles):
 12 |     alleles_to_genotypes = {}
 13 |     genotypes_to_alleles = []
 14 |     
 15 |     for i in range(num_alleles):
 16 |         for j in range(i, num_alleles):
 17 |             alleles_to_genotypes[i, j] = alleles_to_genotypes[j, i] = len(genotypes_to_alleles)
 18 |             genotypes_to_alleles.append((i, j))
 19 |     
 20 |     return alleles_to_genotypes, genotypes_to_alleles
 21 | 
 22 | 
 23 | def child_copy_given_freqs_factor(allele_freqs, gene_copy_var):
 24 |     F = Factor([gene_copy_var], [len(allele_freqs)], init=0.0)
 25 |     for i, freq in enumerate(allele_freqs):
 26 |         F[i] = freq
 27 |     return F
 28 | 
 29 | 
 30 | def child_copy_given_parentals_factor(num_alleles, gene_copy_var_child, gene_copy_var1, gene_copy_var2):
 31 |     n = num_alleles
 32 |     F = Factor([gene_copy_var_child, gene_copy_var1, gene_copy_var2], [n, n, n], init=0.0)
 33 |     for i in range(n): #  Father
 34 |         for j in range(n): #  Mother
 35 |             for k in range(n): #  child
 36 |                 if i == j == k:
 37 |                     F[k, i, j] = 1.0
 38 |                 elif i == k or j == k:
 39 |                     F[k, i, j] = 0.5
 40 |     return F
 41 | 
 42 | 
 43 | 
 44 | ####
 45 | 
 46 | net_template = """net 
 47 | {
 48 |         node_size = (90 36);
 49 | }
 50 | """
 51 | 
 52 | node_template = """
 53 | node {label}
 54 | {{
 55 |         label = "{label}";
 56 |         position = {position};
 57 |         states = {states};
 58 | }}
 59 | """
 60 | 
 61 | potential_template = """
 62 | potential ({var} |{given})
 63 | {{
 64 |         data = {data};
 65 | }}
 66 | """
 67 | 
 68 | def iter_vals(f):
 69 |     domains = reversed(f.domains)
 70 |     for assignment in itertools.product(*domains):
 71 |         yield f[tuple(reversed(assignment))]
 72 | 
 73 | def send_to_samiam(pedigree, factor_list, allele_list, phenotype_list, positions, output_file):
 74 |     names = pedigree['names']
 75 |     parents = pedigree['parents']
 76 |     
 77 |     num_peoples = len(names)
 78 |     
 79 |     with open(output_file, 'w') as f:
 80 |         f.write(net_template)
 81 |         
 82 |         genotypes = ' '.join('"%s%s"' % x for x in itertools.combinations_with_replacement(allele_list, 2))
 83 |         genotypes = "(%s)" % genotypes
 84 |         
 85 |         phenotypes = ' '.join('"%s"' % p for p in phenotype_list)
 86 |         phenotypes = "(%s)" % phenotypes
 87 |         
 88 |         for i in range(num_peoples):
 89 |             name = "%sGenotype" % names[i]
 90 |             position = "(%d %d)" % (positions[i][0][0], positions[i][0][1])
 91 |             f.write(node_template.format(label=name, position=position, states=genotypes))
 92 |             
 93 |             name = "%sPhenotype" % names[i]
 94 |             position = "(%d %d)" % (positions[i][1][0], positions[i][1][1])
 95 |             f.write(node_template.format(label=name, position=position, states=phenotypes))
 96 |             
 97 |         for i in range(num_peoples):
 98 |             F = factor_list[i]
 99 |             var = "%sGenotype" % names[i]
100 |             if len(F.vars) == 1:
101 |                 given = ""
102 |             else:
103 |                 j, k = parents[i]
104 |                 given = " %sGenotype %sGenotype" % (names[j], names[k])
105 |             data = '(' + ' '.join('%f' % x for x in iter_vals(F)) + ')'
106 |             f.write(potential_template.format(var=var, given=given, data=data))
107 |             
108 |         for i in range(num_peoples):
109 |             F = factor_list[num_peoples+i]
110 |             var = "%sPhenotype" % names[i]
111 |             given = " %sGenotype" % names[i]
112 |             
113 |             data = '('
114 |             it = iter_vals(F)
115 |             max_len = len(F.val)//len(F.domains[0])
116 |             for j in F.domains[0]:
117 |                 data += '(' + ' '.join('%f' % next(it) for _ in range(max_len)) + ')\n' + ' '*16
118 |             data = data[:-17] + ')'
119 |             f.write(potential_template.format(var=var, given=given, data=data))
120 |                 
121 |             
122 | def send_to_samiam_copy(pedigree, factor_list, allele_list, phenotype_list, positions, output_file):
123 |     names = pedigree['names']
124 |     parents = pedigree['parents']
125 |     
126 |     num_peoples = len(names)
127 |     
128 |     genes = '(' + ' '.join('"%s"' % x for x in allele_list) + ')'
129 |     phenotypes = '(' + ' '.join('"%s"' % p for p in phenotype_list) + ')'
130 |     
131 |     with open(output_file, 'w') as f:
132 |         f.write(net_template)
133 |         
134 |         for i, per_name in enumerate(names):
135 |             name = per_name + 'Parent1GeneCopy'
136 |             position = "(%d %d)" % (positions[i][0][0], positions[i][0][1])
137 |             f.write(node_template.format(label=name, position=position, states=genes))
138 |             
139 |             name = per_name + 'Parent2GeneCopy'
140 |             position = "(%d %d)" % (positions[i][1][0], positions[i][1][1])
141 |             f.write(node_template.format(label=name, position=position, states=genes))
142 |             
143 |             name = "%sPhenotype" % names[i]
144 |             position = "(%d %d)" % (positions[i][2][0], positions[i][2][1])
145 |             f.write(node_template.format(label=name, position=position, states=phenotypes))
146 |             
147 |         for i, per_name in enumerate(names):
148 |             F = factor_list[i]
149 |             if len(F.vars) == 1:
150 |                 given = ""
151 |             else:
152 |                 p1 = names[parents[i][0]]
153 |                 given = " %sParent1GeneCopy %sParent2GeneCopy" % (p1, p1)
154 |             data = '(' + ' '.join('%f' % x for x in iter_vals(F)) + ')'
155 |             f.write(potential_template.format(var=per_name+'Parent1GeneCopy', given=given, data=data))
156 |             
157 |         for i, per_name in enumerate(names):
158 |             F = factor_list[i+num_peoples]
159 |             if len(F.vars) == 1:
160 |                 given = ""
161 |             else:
162 |                 p1 = names[parents[i][1]]
163 |                 given = " %sParent1GeneCopy %sParent2GeneCopy" % (p1, p1)
164 |             data = '(' + ' '.join('%f' % x for x in iter_vals(F)) + ')'
165 |             f.write(potential_template.format(var=per_name+'Parent2GeneCopy', given=given, data=data))
166 |             
167 |         for i, per_name in enumerate(names):
168 |             F = factor_list[i+2*num_peoples]
169 |             given = " %sParent1GeneCopy %sParent2GeneCopy" % (per_name, per_name)
170 |             data = '(' + ' '.join('%f' % x for x in iter_vals(F)) + ')'
171 |             f.write(potential_template.format(var=per_name+'Phenotype', given=given, data=data))
172 |         
173 | 


--------------------------------------------------------------------------------
/BNs-for-Genetic-Inheritance/quiz.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "\n",
 11 |     "import helper\n",
 12 |     "import sol"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "metadata": {},
 18 |    "source": [
 19 |     "# Quiz 1 and 2\n",
 20 |     "\n",
 21 |     "Just run below cell once you have implemented `sol.construct_genetic_network`. \n",
 22 |     "\n",
 23 |     "This will save `cysticFibrosisBayesNet.net` in this directory. You can open that file with samiam and run the inference. Same instruction as PDF applies."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 3,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "pedigree = {\n",
 33 |     "    'parents': [None, (0, 2), None, (0, 2), (1, 5), None, (1, 5), (3, 8), None],\n",
 34 |     "    'names': ['Ira','James','Robin','Eva','Jason','Rene','Benjamin','Sandra','Aaron']\n",
 35 |     "}\n",
 36 |     "allele_freqs = [0.1, 0.9]\n",
 37 |     "allele_list = ['F', 'f']\n",
 38 |     "alpha_list = [0.8, 0.6, 0.1]\n",
 39 |     "phenotype_list = ['CysticFibrosis', 'NoCysticFibrosis']\n",
 40 |     "positions = [520, 600, 520, 500, 650, 400, 650, 300, 390, 600, 390, 500, 260, 400, 260, 300, 780, 200, 780, 100, 1040, 400, 1040, 300, 910, 200, 910, 100, 130, 200, 130, 100, 0, 400, 0, 300]\n",
 41 |     "positions = np.array(positions).reshape(-1, 2, 2).tolist()\n",
 42 |     "\n",
 43 |     "factor_list = sol.construct_genetic_network(pedigree, allele_freqs, alpha_list)\n",
 44 |     "\n",
 45 |     "helper.send_to_samiam(pedigree, factor_list, allele_list, phenotype_list, positions, 'cysticFibrosisBayesNet.net')"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "# Quiz 6"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 4,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "pedigree = {\n",
 62 |     "    'parents': [None, (0, 2), None, (0, 2), (1, 5), None, (1, 5), (3, 8), None],\n",
 63 |     "    'names': ['Ira','James','Robin','Eva','Jason','Rene','Benjamin','Sandra','Aaron']\n",
 64 |     "}\n",
 65 |     "allele_freqs = [0.1, 0.7, .2]\n",
 66 |     "allele_list = ['F', 'f', 'n']\n",
 67 |     "alpha_list = [0.8, 0.6, 0.1, .5, .05, .01]\n",
 68 |     "phenotype_list = ['CysticFibrosis', 'NoCysticFibrosis']\n",
 69 |     "positions = [1040, 600, 1170, 600, 1105, 500, 1300, 400, 1430, 400, 1365, 300, 780, 600, 910, 600, 845, 500, 520, 400, 650, 400, 585, 300, 1560, 200, 1690, 200, 1625, 100, 2080, 400, 2210, 400, 2145, 300, 1820, 200, 1950, 200, 1885, 100, 260, 200, 390, 200, 325, 100, 0, 400, 130, 400, 65, 300]\n",
 70 |     "positions = np.array(positions).reshape(-1, 3, 2).tolist()\n",
 71 |     "\n",
 72 |     "factor_list = sol.construct_decoupled_genetic_network(pedigree, allele_freqs, alpha_list)\n",
 73 |     "\n",
 74 |     "helper.send_to_samiam_copy(pedigree, factor_list, allele_list, phenotype_list, positions, 'cysticFibrosisBayesNetCopy.net')"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": []
 83 |   }
 84 |  ],
 85 |  "metadata": {
 86 |   "kernelspec": {
 87 |    "display_name": "Python 3",
 88 |    "language": "python",
 89 |    "name": "python3"
 90 |   },
 91 |   "language_info": {
 92 |    "codemirror_mode": {
 93 |     "name": "ipython",
 94 |     "version": 3
 95 |    },
 96 |    "file_extension": ".py",
 97 |    "mimetype": "text/x-python",
 98 |    "name": "python",
 99 |    "nbconvert_exporter": "python",
100 |    "pygments_lexer": "ipython3",
101 |    "version": "3.9.7"
102 |   }
103 |  },
104 |  "nbformat": 4,
105 |  "nbformat_minor": 4
106 | }
107 | 


--------------------------------------------------------------------------------
/BNs-for-Genetic-Inheritance/spinalMuscularAtrophyBayesNet.net:
--------------------------------------------------------------------------------
  1 | net
  2 | {
  3 | 	node_size = (90 36);
  4 | }
  5 | 
  6 | node IraParent1Gene1Copy
  7 | {
  8 | 	lablel = "IraParent1Gene1Copy";
  9 | 	position = (1040 400);
 10 | 	states = ("M" "m");
 11 | }
 12 | 
 13 | node IraParent1Gene2Copy
 14 | {
 15 | 	lablel = "IraParent1Gene2Copy";
 16 | 	position = (1170 400);
 17 | 	states = ("B" "b");
 18 | }
 19 | 
 20 | node IraParent2Gene1Copy
 21 | {
 22 | 	lablel = "IraParent2Gene1Copy";
 23 | 	position = (1300 400);
 24 | 	states = ("M" "m");
 25 | }
 26 | 
 27 | node IraParent2Gene2Copy
 28 | {
 29 | 	lablel = "IraParent2Gene2Copy";
 30 | 	position = (1430 400);
 31 | 	states = ("B" "b");
 32 | }
 33 | 
 34 | node IraPhenotype
 35 | {
 36 | 	lablel = "IraPhenotype";
 37 | 	position = (1235 300);
 38 | 	states = ("SpinalMuscularAtrophy" "NoSpinalMuscularAtrophy");
 39 | }
 40 | 
 41 | node JamesParent1Gene1Copy
 42 | {
 43 | 	lablel = "JamesParent1Gene1Copy";
 44 | 	position = (520 200);
 45 | 	states = ("M" "m");
 46 | }
 47 | 
 48 | node JamesParent1Gene2Copy
 49 | {
 50 | 	lablel = "JamesParent1Gene2Copy";
 51 | 	position = (650 200);
 52 | 	states = ("B" "b");
 53 | }
 54 | 
 55 | node JamesParent2Gene1Copy
 56 | {
 57 | 	lablel = "JamesParent2Gene1Copy";
 58 | 	position = (780 200);
 59 | 	states = ("M" "m");
 60 | }
 61 | 
 62 | node JamesParent2Gene2Copy
 63 | {
 64 | 	lablel = "JamesParent2Gene2Copy";
 65 | 	position = (910 200);
 66 | 	states = ("B" "b");
 67 | }
 68 | 
 69 | node JamesPhenotype
 70 | {
 71 | 	lablel = "JamesPhenotype";
 72 | 	position = (705 100);
 73 | 	states = ("SpinalMuscularAtrophy" "NoSpinalMuscularAtrophy");
 74 | }
 75 | 
 76 | node RobinParent1Gene1Copy
 77 | {
 78 | 	lablel = "RobinParent1Gene1Copy";
 79 | 	position = (0 400);
 80 | 	states = ("M" "m");
 81 | }
 82 | 
 83 | node RobinParent1Gene2Copy
 84 | {
 85 | 	lablel = "RobinParent1Gene2Copy";
 86 | 	position = (130 400);
 87 | 	states = ("B" "b");
 88 | }
 89 | 
 90 | node RobinParent2Gene1Copy
 91 | {
 92 | 	lablel = "RobinParent2Gene1Copy";
 93 | 	position = (260 400);
 94 | 	states = ("M" "m");
 95 | }
 96 | 
 97 | node RobinParent2Gene2Copy
 98 | {
 99 | 	lablel = "RobinParent2Gene2Copy";
100 | 	position = (390 400);
101 | 	states = ("B" "b");
102 | }
103 | 
104 | node RobinPhenotype
105 | {
106 | 	lablel = "RobinPhenotype";
107 | 	position = (195 300);
108 | 	states = ("SpinalMuscularAtrophy" "NoSpinalMuscularAtrophy");
109 | }
110 | 
111 | potential (IraParent1Gene1Copy |)
112 | {
113 | 	data = (0.200000 0.800000);
114 | }
115 | 
116 | potential (IraParent1Gene2Copy |)
117 | {
118 | 	data = (0.300000 0.700000);
119 | }
120 | 
121 | potential (IraParent2Gene1Copy |)
122 | {
123 | 	data = (0.200000 0.800000);
124 | }
125 | 
126 | potential (IraParent2Gene2Copy |)
127 | {
128 | 	data = (0.300000 0.700000);
129 | }
130 | 
131 | potential (IraPhenotype | IraParent1Gene1Copy IraParent1Gene2Copy IraParent2Gene1Copy IraParent2Gene2Copy)
132 | {
133 | 	data = (0.999590 0.000410 0.858149 0.141851 0.997762 0.002238 0.524979 0.475021 0.858149 0.141851 0.014774 0.985226 0.524979 0.475021 0.002732 0.997268 0.997762 0.002238 0.524979 0.475021 0.987872 0.012128 0.167982 0.832018 0.524979 0.475021 0.002732 0.997268 0.167982 0.832018 0.000500 0.999500);
134 | }
135 | 
136 | potential (JamesParent1Gene1Copy | IraParent1Gene1Copy IraParent2Gene1Copy)
137 | {
138 | 	data = (1.000000 0.000000 0.500000 0.500000 0.500000 0.500000 0.000000 1.000000);
139 | }
140 | 
141 | potential (JamesParent1Gene2Copy | IraParent1Gene2Copy IraParent2Gene2Copy)
142 | {
143 | 	data = (1.000000 0.000000 0.500000 0.500000 0.500000 0.500000 0.000000 1.000000);
144 | }
145 | 
146 | potential (JamesParent2Gene1Copy | RobinParent1Gene1Copy RobinParent2Gene1Copy)
147 | {
148 | 	data = (1.000000 0.000000 0.500000 0.500000 0.500000 0.500000 0.000000 1.000000);
149 | }
150 | 
151 | potential (JamesParent2Gene2Copy | RobinParent1Gene2Copy RobinParent2Gene2Copy)
152 | {
153 | 	data = (1.000000 0.000000 0.500000 0.500000 0.500000 0.500000 0.000000 1.000000);
154 | }
155 | 
156 | potential (JamesPhenotype | JamesParent1Gene1Copy JamesParent1Gene2Copy JamesParent2Gene1Copy JamesParent2Gene2Copy)
157 | {
158 | 	data = (0.999590 0.000410 0.858149 0.141851 0.997762 0.002238 0.524979 0.475021 0.858149 0.141851 0.014774 0.985226 0.524979 0.475021 0.002732 0.997268 0.997762 0.002238 0.524979 0.475021 0.987872 0.012128 0.167982 0.832018 0.524979 0.475021 0.002732 0.997268 0.167982 0.832018 0.000500 0.999500);
159 | }
160 | 
161 | potential (RobinParent1Gene1Copy |)
162 | {
163 | 	data = (0.200000 0.800000);
164 | }
165 | 
166 | potential (RobinParent1Gene2Copy |)
167 | {
168 | 	data = (0.300000 0.700000);
169 | }
170 | 
171 | potential (RobinParent2Gene1Copy |)
172 | {
173 | 	data = (0.200000 0.800000);
174 | }
175 | 
176 | potential (RobinParent2Gene2Copy |)
177 | {
178 | 	data = (0.300000 0.700000);
179 | }
180 | 
181 | potential (RobinPhenotype | RobinParent1Gene1Copy RobinParent1Gene2Copy RobinParent2Gene1Copy RobinParent2Gene2Copy)
182 | {
183 | 	data = (0.999590 0.000410 0.858149 0.141851 0.997762 0.002238 0.524979 0.475021 0.858149 0.141851 0.014774 0.985226 0.524979 0.475021 0.002732 0.997268 0.997762 0.002238 0.524979 0.475021 0.987872 0.012128 0.167982 0.832018 0.524979 0.475021 0.002732 0.997268 0.167982 0.832018 0.000500 0.999500);
184 | }
185 | 
186 | 


--------------------------------------------------------------------------------
/BNs-for-Genetic-Inheritance/submit.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import sys
  3 | 
  4 | from scipy.io import loadmat
  5 | import numpy as np
  6 | import helper
  7 | import sol
  8 | 
  9 | sys.path.insert(0, '..')
 10 | 
 11 | import commons
 12 | from commons.factor import Factor
 13 | 
 14 | 
 15 | class Grader(commons.SubmissionBase):
 16 |     def __init__(self):
 17 |         part_names = [None,
 18 |                       '6qJ5F', 'YW7hB', '4SCHR', 'dMfG8',
 19 |                       'sVijF', '7ccHc', 'YTHBY', 'mFn3B', 
 20 |                       'wCUvg', 'wyMoY', 'flSZ2', 'GfmuH', 
 21 |                       'JmkmK', 'J0R6g', 'dpFFP', 'nBdS8']
 22 |         super().__init__('BNs for Genetic Inheritance', 'haijvkP8EeaOwRI5GO98Xw', part_names)
 23 | 
 24 |     def __iter__(self):
 25 |         for part_id in range(1, len(self.part_names)):
 26 |             try:
 27 |                 if part_id == 1:
 28 |                     F = sol.phenotype_given_genotype_mendelian_factor(1, 0, 2)
 29 |                     res = serialize_factors_fg_grading([F])
 30 |                 elif part_id == 2:
 31 |                     F = sol.phenotype_given_genotype_mendelian_factor(0, 0, 2)
 32 |                     res = serialize_factors_fg_grading([F])
 33 |                 elif part_id == 3:
 34 |                     F = sol.phenotype_given_genotype([.8, .6, .1], 0, 2)
 35 |                     res = serialize_factors_fg_grading([F])
 36 |                 elif part_id == 4:
 37 |                     F = sol.phenotype_given_genotype([.2, .5, .9], 0, 2)
 38 |                     res = serialize_factors_fg_grading([F])
 39 |                 elif part_id == 5:
 40 |                     F = sol.genotype_given_allele_freqs_factor([.1, .9], 0)
 41 |                     res = serialize_factors_fg_grading([F])
 42 |                 elif part_id == 6:
 43 |                     F = sol.genotype_given_allele_freqs_factor([.98, .02], 0)
 44 |                     res = serialize_factors_fg_grading([F])
 45 |                 elif part_id == 7:
 46 |                     F = sol.genotype_given_parents_genotypes_factor(2, 2, 0, 1)
 47 |                     F = sort_factor_later_vars(F)
 48 |                     res = serialize_factors_fg_grading([F])
 49 |                 elif part_id == 8:
 50 |                     F = sol.genotype_given_parents_genotypes_factor(3, 2, 0, 1)
 51 |                     F = sort_factor_later_vars(F)
 52 |                     res = serialize_factors_fg_grading([F])
 53 |                 elif part_id == 9:
 54 |                     allele_freqs = [.1, .9]
 55 |                     alpha_list = [.8, .6, .1]
 56 |                     pedigree = {
 57 |                         'parents': [None, (0, 2), None, (0, 2), (1, 5), None, (1, 5), (3, 8), None],
 58 |                         'names': ['Ira','James','Robin','Eva','Jason','Rene','Benjamin','Sandra','Aaron']
 59 |                     }
 60 | 
 61 |                     cgn = sol.construct_genetic_network(pedigree, allele_freqs, alpha_list)
 62 |                     cgn = [sort_factor_later_vars(F) for F in cgn]
 63 |                     cgn = sort_struct(cgn)
 64 |                     res = serialize_factors_fg_grading(cgn)
 65 |                 elif part_id == 10:
 66 |                     allele_freqs = [.1, .9]
 67 |                     alpha_list = [.8, .6, .1]
 68 |                     pedigree = {
 69 |                     'parents': [None, None, (1, 0), None, (1, 0), None, None, (2, 3), (4, 6), (4, 5)],
 70 |                     'names': ['Alan','Vivian','Alice','Larry','Beth','Henry','Leon','Frank','Amy', 'Martin']
 71 |                     }
 72 | 
 73 |                     cgn = sol.construct_genetic_network(pedigree, allele_freqs, alpha_list)
 74 |                     cgn = [sort_factor_later_vars(F) for F in cgn]
 75 |                     cgn = sort_struct(cgn)
 76 |                     res = serialize_factors_fg_grading(cgn)
 77 |                 elif part_id == 11:
 78 |                     alpha_list = [0.8, 0.6, 0.1, 0.5, 0.05, 0.01]
 79 |                     F = sol.phenotype_given_copies_factor(alpha_list, 3, 0, 1, 2)
 80 |                     F = sort_factor_later_vars(F)
 81 |                     res = serialize_factors_fg_grading([F])
 82 |                 elif part_id == 12:
 83 |                     alpha_list = [0.001, 0.009, 0.3, 0.2, 0.75, 0.95]
 84 |                     F = sol.phenotype_given_copies_factor(alpha_list, 3, 0, 1, 2)
 85 |                     F = sort_factor_later_vars(F)
 86 |                     res = serialize_factors_fg_grading([F])
 87 |                 elif part_id == 13:
 88 |                     pedigree = {
 89 |                         'parents': [None, (0, 2), None, (0, 2), (1, 5), None, (1, 5), (3, 8), None],
 90 |                         'names': ['Ira','James','Robin','Eva','Jason','Rene','Benjamin','Sandra','Aaron']
 91 |                     }
 92 |                     alpha_list = [0.8, 0.6, 0.1, 0.5, 0.05, 0.01]
 93 |                     allele_freqs = [.1, .7, .2]
 94 | 
 95 |                     cgn = sol.construct_decoupled_genetic_network(pedigree, allele_freqs, alpha_list)
 96 |                     cgn = [sort_factor_later_vars(F) for F in cgn]
 97 |                     cgn = sort_struct(cgn)
 98 |                     res = serialize_factors_fg_grading(cgn)
 99 |                 elif part_id == 14:
100 |                     pedigree = {
101 |                         'parents': [None, None, (1, 0), None, (1, 0), None, None, (2, 3), (4, 6), (4, 5)],
102 |                         'names': ['Alan','Vivian','Alice','Larry','Beth','Henry','Leon','Frank','Amy', 'Martin']
103 |                     }
104 |                     alpha_list = [0.8, 0.6, 0.1, 0.5, 0.05, 0.01]
105 |                     allele_freqs = [.1, .7, .2]
106 | 
107 |                     cgn = sol.construct_decoupled_genetic_network(pedigree, allele_freqs, alpha_list)
108 |                     cgn = [sort_factor_later_vars(F) for F in cgn]
109 |                     cgn = sort_struct(cgn)
110 |                     res = serialize_factors_fg_grading(cgn)
111 |                 elif part_id == 15:
112 |                     allele_weights = [[3, -3], [0.9, -0.8]]
113 |                     phenotype_var = 2;
114 |                     gene_copy_var_parent1_list = [0, 1]
115 |                     gene_copy_var_parent2_list = [3, 4]
116 |                     F = sol.construct_sigmoid_phenotype_factor(allele_weights, gene_copy_var_parent1_list, 
117 |                                                                gene_copy_var_parent2_list, phenotype_var)
118 |                     F = sort_factor_later_vars(F)
119 |                     res = serialize_factors_fg_grading([F])
120 |                 elif part_id == 16:
121 |                     allele_weights = [[0.01, -.2], [1, -.5]]
122 |                     phenotype_var = 2;
123 |                     gene_copy_var_parent1_list = [0, 1]
124 |                     gene_copy_var_parent2_list = [3, 4]
125 |                     F = sol.construct_sigmoid_phenotype_factor(allele_weights, gene_copy_var_parent1_list, 
126 |                                                                gene_copy_var_parent2_list, phenotype_var)
127 |                     F = sort_factor_later_vars(F)
128 |                     res = serialize_factors_fg_grading([F])
129 |                 else:
130 |                     raise KeyError
131 |                 
132 |                 yield self.part_names[part_id], res
133 |             except KeyError:
134 |                 yield self.part_names[part_id], 0
135 |                 
136 |                 
137 | def serialize_factors_fg_grading(factors, skip=1) -> str:
138 |     lines = ["%d\n" % len(factors)]
139 | 
140 |     for f in factors:
141 |         var = [v+1 for v in f.vars]
142 |         lines.append("%d" % (len(var), ))
143 |         lines.append("  ".join(map(str, var)))
144 |         lines.append("  ".join(str(len(d)) for d in f.domains))
145 |         placeholder_idx = len(lines)
146 |         lines.append(None)  # will be replace by nonzero count once we know
147 | 
148 |         # libDAI expects first variable to change fastest
149 |         # but itertools.product changes the last element fastest
150 |         # hence reversed list
151 |         domains = reversed(f.domains)
152 |         num_lines = 0
153 |         new_lines = []
154 |         for i, assignment in enumerate(itertools.product(*domains)):
155 |             num_lines += 1
156 |             val = f[tuple(reversed(assignment))]
157 |             new_lines.append("%d %0.8g" % (i, val, ))
158 |         new_lines = new_lines[::skip]
159 |         lines[placeholder_idx] = "%d" % (num_lines, )
160 |         lines.extend(new_lines)
161 |         lines.append("")
162 | 
163 |     return "\n".join(lines)
164 | 
165 | def sort_factor(F):
166 |     domains_d = dict(zip(F.vars, F.domains))
167 |     var = sorted(F.vars)
168 |     domains = [domains_d[v] for v in var]
169 |     newF = Factor(var, domains)
170 |     for k in F:
171 |         assignment = dict(zip(F.vars, k))
172 |         newF[assignment] = F[k]
173 |     return newF
174 | 
175 | def sort_factor_later_vars(F):
176 |     if not F.vars:
177 |         return F
178 |     domains_d = dict(zip(F.vars, F.domains))
179 |     var = [F.vars[0]] + sorted(F.vars[1:])
180 |     domains = [domains_d[v] for v in var]
181 |     newF = Factor(var, domains)
182 |     for k in F:
183 |         assignment = dict(zip(F.vars, k))
184 |         newF[assignment] = F[k]
185 |     return newF
186 | 
187 | def sort_struct(S):
188 |     def key(F):
189 |         s = []
190 |         s.extend([x+1 for x in F.vars])
191 |         s.extend([len(x) for x in F.domains])
192 |         domains = reversed(F.domains)
193 |         s.extend([F[tuple(reversed(assignment))] for assignment in itertools.product(*domains)])
194 |         
195 |         fmt = itertools.cycle(['%d', '%d', '%f'])
196 |         s = ''.join(f%x for f, x in zip(fmt, s))
197 |         return s
198 |     return sorted(S, key=key)
199 | 
200 | 
201 | if __name__ == '__main__':
202 |     grader = Grader()
203 |     grader.grade()


--------------------------------------------------------------------------------
/CRF-Learning-For-OCR/check.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%load_ext autoreload\n",
 10 |     "%autoreload 2"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import sys\n",
 20 |     "sys.path.insert(0, '..')\n",
 21 |     "import numpy as np\n",
 22 |     "import matplotlib.pyplot as plt\n",
 23 |     "from scipy.io import loadmat\n",
 24 |     "\n",
 25 |     "from instance_neg_log_likelyhood import *\n",
 26 |     "import helper\n",
 27 |     "import solution\n"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "# Part 1\n",
 35 |     "## Stochastic Gradient Descent"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 3,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "X = loadmat(\"./data/Train1X.mat\")['Train1X']\n",
 45 |     "y = loadmat(\"./data/Train1Y.mat\")['Train1Y'].squeeze()"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 4,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "theta = helper.lr_train(X, y, 0)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 5,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "pred = helper.lr_predict(X, theta)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 6,
 69 |    "metadata": {},
 70 |    "outputs": [
 71 |     {
 72 |      "data": {
 73 |       "text/plain": [
 74 |        "0.96"
 75 |       ]
 76 |      },
 77 |      "execution_count": 6,
 78 |      "metadata": {},
 79 |      "output_type": "execute_result"
 80 |     }
 81 |    ],
 82 |    "source": [
 83 |     "helper.lr_accuracy(y, pred)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "### Test set"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 7,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "X_test = loadmat(\"./data/Test1X.mat\")['Test1X']\n",
100 |     "y_test = loadmat(\"./data/Test1Y.mat\")['Test1Y'].squeeze()"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 8,
106 |    "metadata": {},
107 |    "outputs": [
108 |     {
109 |      "data": {
110 |       "text/plain": [
111 |        "0.9125"
112 |       ]
113 |      },
114 |      "execution_count": 8,
115 |      "metadata": {},
116 |      "output_type": "execute_result"
117 |     }
118 |    ],
119 |    "source": [
120 |     "pred_test = helper.lr_predict(X_test, theta)\n",
121 |     "helper.lr_accuracy(y_test, pred_test)"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "### LR Search Lambdas"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 9,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "X_val = loadmat(\"./data/Validation1X.mat\")['Validation1X']\n",
138 |     "y_val = loadmat(\"./data/Validation1Y.mat\")['Validation1Y'].squeeze()\n",
139 |     "lambdas = loadmat(\"./data/Part1Lambdas.mat\")['Part1Lambdas'].squeeze()\n",
140 |     "expected_out = loadmat(\"./data/ValidationAccuracy.mat\")['ValidationAccuracy'].squeeze()"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 10,
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": [
149 |     "acc = solution.lr_search_lambda_sgd(X, y, X_val, y_val, lambdas)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 11,
155 |    "metadata": {},
156 |    "outputs": [
157 |     {
158 |      "data": {
159 |       "text/plain": [
160 |        "True"
161 |       ]
162 |      },
163 |      "execution_count": 11,
164 |      "metadata": {},
165 |      "output_type": "execute_result"
166 |     }
167 |    ],
168 |    "source": [
169 |     "np.allclose(acc, expected_out)"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "# Part 2"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 12,
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": [
185 |     "data = loadmat(\"./data/Part2Sample.mat\", simplify_cells=True)\n",
186 |     "\n",
187 |     "sample_calibrated = [Factor.from_matlab(clique) for clique in data['sampleCalibratedTree']['cliqueList']]\n",
188 |     "sample_calibrated_tree = {'clique_list': sample_calibrated, 'adj_list': {}}\n",
189 |     "sample_uncalibrated = [Factor.from_matlab(clique) for clique in data['sampleUncalibratedTree']['cliqueList']]\n",
190 |     "sample_uncalibrated_tree = {'clique_list': sample_uncalibrated, 'adj_list': {}}\n",
191 |     "sample_logZ = data['sampleLogZ']\n",
192 |     "\n",
193 |     "edges = data['sampleUncalibratedTree']['edges']\n",
194 |     "for i in range(len(edges)):\n",
195 |     "    sample_uncalibrated_tree['adj_list'][i] = set()\n",
196 |     "    sample_calibrated_tree['adj_list'][i] = set()\n",
197 |     "    for j in range(len(edges)):\n",
198 |     "        if edges[i, j] == 1:\n",
199 |     "            sample_uncalibrated_tree['adj_list'][i].add(j)\n",
200 |     "            sample_calibrated_tree['adj_list'][i].add(j)\n",
201 |     "            \n",
202 |     "#all_data = loadmat(\"./data/Part2FullDataset.mat\", simplify_cells=True)"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "markdown",
207 |    "metadata": {},
208 |    "source": [
209 |     "## Check LogZ (Clique Tree Calibrate: Implement it in solution.py)"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 13,
215 |    "metadata": {},
216 |    "outputs": [],
217 |    "source": [
218 |     "out_calib, out_logZ = solution.clique_tree_calibrate(sample_uncalibrated_tree, True)"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": 14,
224 |    "metadata": {},
225 |    "outputs": [
226 |     {
227 |      "data": {
228 |       "text/plain": [
229 |        "(True, True)"
230 |       ]
231 |      },
232 |      "execution_count": 14,
233 |      "metadata": {},
234 |      "output_type": "execute_result"
235 |     }
236 |    ],
237 |    "source": [
238 |     "out_calib == sample_calibrated, abs(out_logZ - sample_logZ ) < 1e-10"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "metadata": {},
244 |    "source": [
245 |     "## Instance Neg Log Likelyhood (Implement it in instance_neg_log_likelyhood.py)"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 15,
251 |    "metadata": {},
252 |    "outputs": [
253 |     {
254 |      "data": {
255 |       "text/plain": [
256 |        "{'num_hidden_states': 26, 'num_observed_states': 2, 'lambda': 0.003}"
257 |       ]
258 |      },
259 |      "execution_count": 15,
260 |      "metadata": {},
261 |      "output_type": "execute_result"
262 |     }
263 |    ],
264 |    "source": [
265 |     "sample_x = data['sampleX']-1\n",
266 |     "sample_y = data['sampleY']-1\n",
267 |     "sample_theta = data['sampleTheta']\n",
268 |     "sample_params = data['sampleModelParams']\n",
269 |     "model_param = {'num_hidden_states': sample_params['numHiddenStates'], \n",
270 |     "               'num_observed_states': sample_params['numObservedStates'],\n",
271 |     "               'lambda': sample_params['lambda']}\n",
272 |     "model_param"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": 16,
278 |    "metadata": {},
279 |    "outputs": [],
280 |    "source": [
281 |     "nll, grad = instance_neg_log_likelyhood(sample_x, sample_y, sample_theta, model_param)"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": 17,
287 |    "metadata": {},
288 |    "outputs": [
289 |     {
290 |      "data": {
291 |       "text/plain": [
292 |        "(14.126845092942453, 14.126845092942455, True)"
293 |       ]
294 |      },
295 |      "execution_count": 17,
296 |      "metadata": {},
297 |      "output_type": "execute_result"
298 |     }
299 |    ],
300 |    "source": [
301 |     "nll, data['sampleNLL'], abs(nll - data['sampleNLL']) < 1e-8"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": 18,
307 |    "metadata": {},
308 |    "outputs": [
309 |     {
310 |      "data": {
311 |       "text/plain": [
312 |        "True"
313 |       ]
314 |      },
315 |      "execution_count": 18,
316 |      "metadata": {},
317 |      "output_type": "execute_result"
318 |     }
319 |    ],
320 |    "source": [
321 |     "np.allclose(grad, data['sampleGrad'])"
322 |    ]
323 |   }
324 |  ],
325 |  "metadata": {
326 |   "kernelspec": {
327 |    "display_name": "Python 3",
328 |    "language": "python",
329 |    "name": "python3"
330 |   },
331 |   "language_info": {
332 |    "codemirror_mode": {
333 |     "name": "ipython",
334 |     "version": 3
335 |    },
336 |    "file_extension": ".py",
337 |    "mimetype": "text/x-python",
338 |    "name": "python",
339 |    "nbconvert_exporter": "python",
340 |    "pygments_lexer": "ipython3",
341 |    "version": "3.9.5"
342 |   }
343 |  },
344 |  "nbformat": 4,
345 |  "nbformat_minor": 4
346 | }
347 | 


--------------------------------------------------------------------------------
/CRF-Learning-For-OCR/generate_all_features.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | import numpy as np
 4 | 
 5 | Feature = namedtuple("Feature", 'var assignment param_idx')
 6 | 
 7 | 
 8 | def compute_conditioned_singleton_features(mat, model_params, param_idx_base=0):
 9 |     height, width = mat.shape
10 |     n_hidden_states = model_params['num_hidden_states']
11 |     n_observed_states = model_params['num_observed_states']
12 | 
13 |     features = []
14 |     for hidden_state in range(n_hidden_states):
15 |         for feature_num in range(width):
16 |             for v in range(height):
17 |                 param_idx = np.ravel_multi_index([mat[v, feature_num], feature_num, hidden_state],
18 |                                                  dims=[n_observed_states, width, n_hidden_states],
19 |                                                  order='F')
20 |                 features.append(Feature(var=(v, ), assignment=(hidden_state, ), param_idx=param_idx_base+param_idx))
21 | 
22 |     return features
23 | 
24 | 
25 | def compute_unconditioned_singleton_features(length, model_params, param_idx_base=0):
26 |     features = []
27 |     for state in range(model_params['num_hidden_states']):
28 |         for v in range(length):
29 |             features.append(Feature(var=(v, ), assignment=(state, ), param_idx=param_idx_base+state))
30 |     return features
31 | 
32 | 
33 | def compute_unconditioned_pair_features(length, model_params, param_idx_base=0):
34 |     features = []
35 |     if length < 2:
36 |         return features
37 |     K = model_params['num_hidden_states']
38 |     for state1 in range(K):
39 |         for state2 in range(K):
40 |             param_idx = param_idx_base + np.ravel_multi_index([state2, state1], [K, K], order='F')
41 |             for v in range(length-1):
42 |                 features.append(Feature(var=(v, v+1), assignment=(state1, state2), param_idx=param_idx))
43 |     return features
44 | 
45 | 
46 | def generate_all_features(mat, model_params):
47 |     param_idx_base = 0
48 | 
49 |     all_features = []
50 | 
51 |     features = compute_conditioned_singleton_features(mat, model_params, param_idx_base)
52 |     if features:
53 |         all_features.extend(features)
54 |         # we can not look into max(f.param_idx for f in features) as some combination might not have been observed
55 |         # so I'm computing this using below formula.
56 |         # In case there are more than one conditioned features, one will have to adjust it accordingly.
57 |         # Code by Daphne Koller handles it differently, but I didn't understand it, so I used below formula.
58 |         param_idx_base = mat.shape[1] * model_params['num_hidden_states'] * model_params['num_observed_states']
59 | 
60 |     for fn in [compute_unconditioned_singleton_features, compute_unconditioned_pair_features]:
61 |         features = fn(mat.shape[0], model_params, param_idx_base)
62 |         if features:
63 |             param_idx_base = max(f.param_idx for f in features) + 1
64 |             all_features.extend(features)
65 | 
66 |     return {'num_params': param_idx_base, 'features': all_features}
67 | 
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/CRF-Learning-For-OCR/helper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import itertools
  3 | 
  4 | import numpy as np
  5 | 
  6 | sys.path.insert(0, '..')
  7 | from commons.factor import Factor
  8 | import solution as sol
  9 | 
 10 | 
 11 | def create_clique_tree(factors, evidence=[]):
 12 |     V, domains = set(), dict()
 13 |     for f in factors:
 14 |         V.update(f.vars)
 15 |         for v, d in zip(f.vars, f.domains):
 16 |             if v in domains:
 17 |                 assert domains[v] == d, "Domain mismatch between factors"
 18 |             else:
 19 |                 domains[v] = d
 20 | 
 21 |     adj_list = {v: {v, } for v in V}
 22 |     for f in factors:
 23 |         for u, v in itertools.permutations(f.vars, 2):
 24 |             adj_list[u].add(v)
 25 | 
 26 |     cliques_considered = 0
 27 |     F = factors
 28 |     skeleton = {'nodes': [], 'factor_idxs': [], 'edges': [], 'factor_list': factors}
 29 |     while cliques_considered < len(V):
 30 |         next_var = min(adj_list, key=lambda x: len(adj_list[x]))
 31 |         F = eliminate_var(F, adj_list, next_var, skeleton)
 32 |         cliques_considered += 1
 33 |         if not adj_list:
 34 |             break
 35 | 
 36 |     prune_tree(skeleton)
 37 |     return sol.compute_initial_potentials(skeleton)
 38 | 
 39 | 
 40 | def eliminate_var(F, adj_list, next_var, skeleton):
 41 |     use_factors, non_use_factors = [], []
 42 |     scope = set()
 43 |     for i, f in enumerate(F):
 44 |         if next_var in f.vars:
 45 |             use_factors.append(i)
 46 |             scope.update(f.vars)
 47 |         else:
 48 |             non_use_factors.append(i)
 49 |     scope = sorted(scope)
 50 | 
 51 |     for i, j in itertools.permutations(scope, 2):
 52 |         if i not in adj_list:
 53 |             adj_list[i] = {j, }
 54 |         else:
 55 |             adj_list[i].add(j)
 56 | 
 57 |     # next steps removes the next_var from adj_list
 58 |     for k in adj_list:
 59 |         if next_var in adj_list[k]:
 60 |             adj_list[k].remove(next_var)
 61 |     del adj_list[next_var]
 62 | 
 63 |     newF, newmap = [], {}
 64 |     for i in non_use_factors:
 65 |         newmap[i] = len(newF)
 66 |         newF.append(F[i])
 67 | 
 68 |     new_factor = Factor([], [])
 69 |     for i in use_factors:
 70 |         new_factor = new_factor @ F[i]  # Since this just a simulation, we don't really need to compute values. So @
 71 |     new_factor = new_factor.dummy_marginalise({next_var, })
 72 |     newF.append(new_factor)
 73 | 
 74 |     for i in range(len(skeleton['nodes'])):
 75 |         if skeleton['factor_idxs'][i] in use_factors:
 76 |             skeleton['edges'].append((skeleton['nodes'][i], set(scope)))
 77 |             skeleton['factor_idxs'][i] = None
 78 |         elif skeleton['factor_idxs'][i] is not None:
 79 |             skeleton['factor_idxs'][i] = newmap[skeleton['factor_idxs'][i]]
 80 |     skeleton['nodes'].append(set(scope))
 81 |     skeleton['factor_idxs'].append(len(newF) - 1)
 82 | 
 83 |     return newF
 84 | 
 85 | 
 86 | def prune_tree(skeleton):
 87 |     found = True
 88 |     while found:
 89 |         found = False
 90 | 
 91 |         for u, v in skeleton['edges']:
 92 |             if u.issuperset(v):
 93 |                 found = True
 94 |                 parent = u
 95 |                 child = v
 96 |                 break
 97 |             elif v.issuperset(u):
 98 |                 found = True
 99 |                 parent = v
100 |                 child = u
101 |                 break
102 | 
103 |         if not found:
104 |             break
105 | 
106 |         new_edges = []
107 |         for u, v in skeleton['edges']:
108 |             if (u, v) == (child, parent) or (v, u) == (child, parent):
109 |                 continue
110 |             elif u == child:
111 |                 new_edges.append((parent, v))
112 |             elif v == child:
113 |                 new_edges.append((u, parent))
114 |             else:
115 |                 new_edges.append((u, v))
116 |         skeleton['edges'] = new_edges
117 |         skeleton['nodes'] = [node for node in skeleton['nodes'] if node != child]
118 | 
119 | 
120 | def sigmoid(x):
121 |     return 1/(1+np.exp(-x))
122 | 
123 | 
124 | def lr_predict(X, theta):
125 |     return ((X@theta) > 0).astype(np.uint8)
126 | 
127 | 
128 | def lr_accuracy(ground_truth, prediction):
129 |     return (ground_truth == prediction).mean()
130 | 
131 | 
132 | def grad_fn_generator(X, y, lambda_):
133 |     N = X.shape[0]
134 | 
135 |     def lr_grad(theta, i):
136 |         i = ((i+1) % N)
137 |         x = X[i]
138 |         h = sigmoid(x@theta)
139 |         cost = -y[i]*np.log(h) - (1-y[i])*np.log(1 - h) + 0.5*lambda_*(theta[1:]**2).sum()
140 |         grad = x * (h-y[i])
141 |         grad[1:] += lambda_*theta[1:]
142 |         return cost, grad
143 | 
144 |     return lr_grad
145 | 
146 | 
147 | def lr_train(X, y, lambda_):
148 |     grad_fn = grad_fn_generator(X, y, lambda_)
149 |     theta_opt = sol.stochastic_gradient_descent(grad_fn, np.zeros(X.shape[1]), 5000)
150 |     return theta_opt
151 | 
152 | 
153 | def from_mat_to_tree(data):
154 |     factors = [Factor.from_matlab(clique) for clique in data['cliqueList']]
155 |     edges = data['edges']
156 |     tree = {'clique_list': factors, 'adj_list': {}}
157 | 
158 |     for i in range(len(edges)):
159 |         tree['adj_list'][i] = set()
160 |         for j in range(len(edges)):
161 |             if edges[i, j] == 1:
162 |                 tree['adj_list'][i].add(j)
163 |                 tree['adj_list'][i].add(j)
164 |     return tree


--------------------------------------------------------------------------------
/CRF-Learning-For-OCR/instance_neg_log_likelyhood.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import numpy as np
 4 | 
 5 | import helper
 6 | import solution as sol
 7 | 
 8 | sys.path.insert(0, '..')
 9 | from commons.factor import Factor
10 | import generate_all_features
11 | 
12 | 
13 | def instance_neg_log_likelyhood(X, y, theta, model_params):
14 |     """
15 |     Args:
16 |         X: Data, (numCharacters, numImageFeatures matrix) shaped numpy array.
17 |             X[:,0] is all ones, i.e., it encodes the intercept/bias term.
18 |         y: Data labels. (numCharacters, ) shaped numpy array
19 |         theta: CRF weights/parameters. (numParams, ) shaped np array.
20 |                These are shared among the various singleton / pairwise features.
21 |         modelParams:  A dict with three keys:
22 |                 num_hidden_states: in our case, set to 26 (26 possible characters)
23 |                 .num_observed_states: in our case, set to 2  (each pixel is either on or off)
24 |                 .lambda: the regularization parameter lambda
25 |                 
26 |     Returns:
27 |         nll: Negative log-likelihood of the data.    (scalar)
28 |         grad: Gradient of nll with respect to theta   (numParams, ) shaped np array
29 |         
30 |     Copyright (C) Daphne Koller, Stanford Univerity, 2012
31 |     """
32 |     
33 |     feature_set = generate_all_features.generate_all_features(X, model_params)
34 |     """
35 |     feature_set is a dict with two keys:
36 |     num_params - the number of parameters in the CRF (this is not numImageFeatures
37 |                  nor numFeatures, because of parameter sharing)
38 |     features   - a list comprising the features in the CRF.
39 | 
40 |     Each feature is a binary indicator variable, represented by a named tuple with three fields.
41 |     .var           - a tuple containing the variables in the scope of this feature
42 |     .assignment    - the assignment(tuple) that this indicator variable corresponds to
43 |     .param_idx     - the index in theta that this feature corresponds to
44 | 
45 |     For example, if we have:
46 |     feature = Feature(var=(0, 1), assignment=(4, 5), param_idx=8)
47 | 
48 |     then feature is an indicator function over X_0 and X_1, 
49 |     which takes on a value of 1 if X_0 = 5 and X_1 = 6 (which would be 'e' and 'f'), and 0 otherwise. 
50 |     Its contribution to the log-likelihood would be theta[8] if it's 1, and 0 otherwise.
51 | 
52 |     If you're interested in the implementation details of CRFs, 
53 |     feel free to read through generate_all_features.py and the functions it calls!
54 |     For the purposes of this assignment, though, you don't
55 |     have to understand how this code works. (It's complicated.)
56 |     """
57 |     
58 | 
59 |     length = len(y)
60 |     K = model_params['num_hidden_states']
61 |     nll = 0.
62 |     grad = np.zeros_like(theta)
63 |     
64 |     # Use the feature_set to calculate nll and grad.
65 |     # This is the main part of the assignment, and it is very tricky - be careful!
66 |     # You might want to code up your own numerical gradient checker to make sure
67 |     # your answers are correct.
68 | 
69 |     # Hint: you can use `helper.clique_tree_calibrate` to calculate logZ effectively. 
70 |     # We have halfway-modified clique_tree_calibrate; 
71 |     # complete our implementation if you want to use it to compute logZ.
72 | 
73 |     # Solution Start
74 |         
75 |     # Solution End
76 | 
77 |     return nll, grad
78 | 


--------------------------------------------------------------------------------
/CRF-Learning-For-OCR/solution.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import numpy as np
  4 | 
  5 | import helper
  6 | sys.path.insert(0, '..')
  7 | from commons.factor import Factor
  8 | 
  9 | 
 10 | def stochastic_gradient_descent(grad_func, theta0, max_iter):
 11 |     """
 12 |     Args:
 13 |         grad_fund: A function f: (theta, i) -> (cost, grad),
 14 |             returns cost and gradient for instance i wrt current theta.
 15 |         theta0: initial theta
 16 |         max_iter: run the loop for these many iterations.
 17 |         
 18 |     Returns:
 19 |         theta_opt: Theta after max_iter iterations of SGD.
 20 |     """
 21 |     theta_opt = theta0.copy()
 22 | 
 23 |     # Solution Start
 24 |     
 25 |     # Solution End
 26 |     return theta_opt
 27 | 
 28 | 
 29 | def lr_search_lambda_sgd(x_train, y_train, x_validation, y_validation, lambdas):
 30 |     """
 31 |     Args:
 32 |         x_train: training numpy array of shape (N, 129)
 33 |         y_train: training numpy array of shape (N, )
 34 |         x_validation: validation numpy array of shape (M, 129)
 35 |         y_validation: validation numpy array of shape (M, )
 36 |         lambdas: a numpy array of candidate regularization parameters.
 37 |         
 38 |     Returns: a numpy array containing accuracy for each lambda.
 39 |         See helper.lr_train, helper.lr_predict, and helper.lr_accuarcy
 40 |     """
 41 |     acc = np.zeros(len(lambdas))
 42 | 
 43 |     # Solution Start
 44 |     
 45 |     # Solution End
 46 | 
 47 |     return acc
 48 | 
 49 | def compute_initial_potentials(skeleton):
 50 |     """
 51 |     Args:
 52 |         skeleton: a dictionary with following keys.
 53 |             'nodes': a list of sets. Each set is a set of constituent variables. e.g. {1,2,3}
 54 |             'edges': a list of edges. A single element would look like ({1,2,3}, {2,3,4})
 55 |                 which means there is an edge between node {1,2,3} and node {2,3,4}. If (a, b) is
 56 |                 in the list, (b, a) will not be in the list.
 57 |             'factor_list': a list of initialized Factors.
 58 | 
 59 |     Returns:
 60 |         a dict with ['clique_list', 'edges'] keys.
 61 |         'clique_list': a list of factors associated with each clique
 62 |         'adj_list': adjacency list with integer nodes. adj_list[0] = {1,2}
 63 |             implies that there is an edges  clique_list[0]-lique_list[1]
 64 |             and clique_list[0]-clique_list[2]
 65 |     """
 66 |     n = len(skeleton['nodes'])
 67 |     clique_list = [Factor([], []) for i in range(n)]
 68 |     adj_list = {i: set() for i in range(n)}
 69 | 
 70 |     nodes = skeleton['nodes']
 71 |     edges = skeleton['edges']
 72 | 
 73 |     for (u, v) in edges:
 74 |         u_idx = nodes.index(u)
 75 |         v_idx = nodes.index(v)
 76 |         adj_list[u_idx].add(v_idx)
 77 |         adj_list[v_idx].add(u_idx)
 78 | 
 79 |     for factor in skeleton['factor_list']:
 80 |         for candidate in np.random.permutation(n):  # A heuristic, not sure if it is better
 81 |             if nodes[candidate].issuperset(set(factor.vars)):
 82 |                 clique_list[candidate] = clique_list[candidate] * factor
 83 |                 break
 84 |                 # This approach is not efficient as it computes intermediate factors
 85 |                 # even though we could know all constituent factors and multiply them
 86 |                 # in one go.
 87 | 
 88 |     return {'clique_list': clique_list, 'adj_list': adj_list}
 89 | 
 90 | 
 91 | def get_next_clique(clique_tree):
 92 |     adj = clique_tree['adj_list']
 93 |     msgs = clique_tree['messages']
 94 | 
 95 |     for u in adj:
 96 |         n_neighbours = len(adj[u])
 97 |         for v in adj[u]:
 98 |             if u not in msgs[v] and sum(1 for w in msgs[u] if v != w) == n_neighbours - 1:
 99 |                 return u, v
100 |     return None
101 | 
102 | 
103 | def clique_tree_calibrate(clique_tree, do_logZ=False):
104 |     """
105 |     Args:
106 |         clique_tree: A dict with ['clique_list', 'edges'] keys.
107 |             'clique_list': a list of factors associated with each clique
108 |             'adj_list': adjacency list with integer nodes. adj_list[0] = {1,2}
109 |                 implies that there is an edges  clique_list[0]-clique_list[1]
110 |                 and clique_list[0]-clique_list[2]
111 |         do_logZ: If True, also returns logZ
112 |     """
113 |     clique_tree['messages'] = {i: {} for i in range(len(clique_tree['clique_list']))}
114 |     msgs = clique_tree['messages']
115 |     if do_logZ:
116 |         msgs_unnorm = {i: {} for i in range(len(clique_tree['clique_list']))}
117 |     adj = clique_tree['adj_list']
118 |     ## clique_tree['messages'][u] = {v: msg_from_v_to_u}
119 | 
120 |     cliques = clique_tree['clique_list']
121 | 
122 |     while True:
123 |         ready_edge = get_next_clique(clique_tree)
124 |         if ready_edge is None:
125 |             break
126 |         u, v = ready_edge
127 |         msg_norm = cliques[u]
128 |         msg_unnormalized = cliques[u]
129 |         for w in adj[u]:
130 |             if w == v:
131 |                 continue
132 |             msg_norm = msg_norm * msgs[u][w]
133 |             if do_logZ:
134 |                 msg_unnormalized = msg_unnormalized * msgs_unnorm[u][w]
135 | 
136 |         diff_set = set(msg_norm.vars) - (set(msg_norm.vars) & set(cliques[v].vars))
137 | 
138 |         msg_norm = msg_norm.marginalise(diff_set)
139 |         normalizer = sum(msg_norm.val.values())
140 |         for assignment in msg_norm:
141 |             msg_norm[assignment] /= normalizer
142 |         msgs[v][u] = msg_norm
143 | 
144 |         if do_logZ:
145 |             msgs_unnorm[v][u] = msg_unnormalized.marginalise(diff_set)
146 | 
147 |     
148 |     if do_logZ:
149 |         logZ = 0
150 |         # Solution Start
151 |     
152 |         # Solution End
153 | 
154 |     # return clique_tree
155 |     calibrated_potentials = []
156 |     for i in range(len(cliques)):
157 |         factor = cliques[i]
158 |         for msg in msgs[i].values():
159 |             factor = factor * msg
160 |         calibrated_potentials.append(factor)
161 | 
162 |     if do_logZ:
163 |         return calibrated_potentials, logZ
164 |     else:
165 |         return calibrated_potentials
166 |     


--------------------------------------------------------------------------------
/CRF-Learning-For-OCR/submit.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from scipy.io import loadmat
 4 | 
 5 | import helper
 6 | import solution as sol
 7 | 
 8 | sys.path.insert(0, '..')
 9 | 
10 | import commons
11 | from instance_neg_log_likelyhood import instance_neg_log_likelyhood
12 | 
13 | 
14 | class Grader(commons.SubmissionBase):
15 |     def __init__(self):
16 |         part_names = [None, 
17 |                       'QD9Fo', 'apl5y', 'n3klU', 'I45dY', 
18 |                       'jrVA2', 'HIPL6', '0ctrF', 'cM7Gc', 
19 |                       'zl35t', 'ZtLBP']
20 |         super().__init__('CRF Learning for OCR', 'hdcdUQNgEealXw52htHS4Q', part_names)
21 | 
22 |     def __iter__(self):
23 |         for part_id in range(1, len(self.part_names)):
24 |             try:
25 |                 if part_id == 1:
26 |                     X = loadmat("./data/Train1X.mat")['Train1X']
27 |                     y = loadmat("./data/Train1Y.mat")['Train1Y']
28 |                     res = helper.lr_train(X, y, 1)
29 |                 elif part_id == 2:
30 |                     X = loadmat("./data/Train2X.mat")['Train2X']
31 |                     y = loadmat("./data/Train2Y.mat")['Train2Y']
32 |                     res = helper.lr_train(X, y, 1)
33 |                 elif part_id == 3:
34 |                     x_train = loadmat('./data/Train1X.mat')['Train1X']
35 |                     y_train = loadmat('./data/Train1Y.mat')['Train1Y'].squeeze()
36 |                     x_validation = loadmat('./data/Validation1X.mat')['Validation1X']
37 |                     y_validation = loadmat('./data/Validation1Y.mat')['Validation1Y'].squeeze()
38 |                     lambdas = [2, 8]
39 |                     res = sol.lr_search_lambda_sgd(x_train, y_train, x_validation, y_validation, lambdas)
40 |                 elif part_id == 4:
41 |                     x_train = loadmat('./data/Train2X.mat')['Train2X']
42 |                     y_train = loadmat('./data/Train2Y.mat')['Train2Y'].squeeze()
43 |                     x_validation = loadmat('./data/Validation2X.mat')['Validation2X']
44 |                     y_validation = loadmat('./data/Validation2Y.mat')['Validation2Y'].squeeze()
45 |                     lambdas = [2, 8]
46 |                     res = sol.lr_search_lambda_sgd(x_train, y_train, x_validation, y_validation, lambdas)
47 |                 elif part_id == 5:
48 |                     mat = loadmat("./data/Part2Sample.mat", simplify_cells=True)
49 |                     tree = helper.from_mat_to_tree(mat['sampleUncalibratedTree'])
50 |                     _, logz = sol.clique_tree_calibrate(tree, True)
51 |                     res = logz
52 |                 elif part_id == 6:
53 |                     mat = loadmat("./data/Part2LogZTest.mat", simplify_cells=True)
54 |                     tree = helper.from_mat_to_tree(mat['logZTestCliqueTree'])
55 |                     _, logz = sol.clique_tree_calibrate(tree, True)
56 |                     res = logz
57 |                 elif part_id == 7 or part_id == 9:
58 |                     mat = loadmat("./data/Part2Sample.mat", simplify_cells=True)
59 |                     sample_params = mat['sampleModelParams']
60 |                     model_param = {'num_hidden_states': sample_params['numHiddenStates'],
61 |                                    'num_observed_states': sample_params['numObservedStates'],
62 |                                    'lambda': sample_params['lambda']}
63 |                     nll, grad = instance_neg_log_likelyhood(mat['sampleX']-1, mat['sampleY']-1, mat['sampleTheta'],
64 |                                                             model_param)
65 |                     res = nll if part_id == 7 else grad
66 |                 elif part_id == 8 or part_id == 10:
67 |                     mat = loadmat("./data/Part2Test.mat", simplify_cells=True)
68 |                     model_param = mat['testModelParams']
69 |                     model_param = {'num_hidden_states': model_param['numHiddenStates'],
70 |                                    'num_observed_states': model_param['numObservedStates'],
71 |                                    'lambda': model_param['lambda']}
72 |                     nll, grad = instance_neg_log_likelyhood(mat['testX'] - 1, mat['testY'] - 1, mat['testTheta'],
73 |                                                             model_param)
74 |                     res = nll if part_id == 8 else grad
75 |                 else:
76 |                     raise KeyError
77 |                 yield self.part_names[part_id], res
78 |             except KeyError:
79 |                 yield self.part_names[part_id], 0
80 | 
81 | 
82 | if __name__ == '__main__':
83 |     grader = Grader()
84 |     grader.grade()


--------------------------------------------------------------------------------
/Decision-Making/README.md:
--------------------------------------------------------------------------------
 1 | # Notes: 
 2 | 
 3 | - `I` will be a dict with three keys. 
 4 |     - random_factors
 5 |     - decision_factors
 6 |     - utility_factors
 7 |     
 8 |     Each of this valeus **MUST** be a list, even if the lenght of the list is one.
 9 | 
10 | - Values for each of these keys will be a list of `Factor`s.
11 | 
12 | - Unlike the original MATLAB code, `print_factor` is not implemented separately.
13 |     But `repr` is implemented on Factor, you can print them directly.
14 | 
15 | - `variable_elimination` is provided in `helper.py`
16 | 
17 | 


--------------------------------------------------------------------------------
/Decision-Making/helper.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import numpy as np
 4 | 
 5 | import helper
 6 | sys.path.append('..')
 7 | from commons.factor import Factor
 8 | 
 9 | 
10 | def variable_elimination(Fs, Zs):
11 |     """
12 |     Args:
13 |         Fs: a list of Factors
14 |         Zs: a list of variables to marginalize.
15 |         
16 |     Returns:
17 |         A single factor, with Zs marginalized out.
18 |     """
19 |     
20 |     F = Factor([], [])
21 |     for f in Fs:
22 |         F = F * f
23 |         
24 |     return F.marginalise(Zs)
25 | 
26 | 
27 | def observe_evidence(Fs, E, normalize=False):
28 |     """
29 |     Args:
30 |         Fs: List of Factors.
31 |         E: Dictionary of evidence in the form {'var': observed_value, ...}.
32 |         normalize: Should this function normalize the CPD after observing?
33 |             Assumes that first variable (i.e. F.vars[0]) is the child and 
34 |             all remaining ones (i.e. F.vars[1:]) are parent.
35 |             
36 |     Returns:
37 |         A list of factors after observing the evidence. 
38 |         If the intersection of F.vars and E is empty, factor is returned
39 |         unchanged.
40 |     """
41 |     new_Fs = []
42 |     for F in Fs:
43 |         new_F = F.evidence(E)
44 |         if normalize and new_F != F:
45 |             new_F.conditional_normalize(F.vars[1:])
46 |         new_Fs.append(new_F)
47 |     return new_Fs


--------------------------------------------------------------------------------
/Decision-Making/quiz.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%load_ext autoreload\n",
 10 |     "%autoreload 2"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from scipy.io import loadmat\n",
 20 |     "import matplotlib.pyplot as plt\n",
 21 |     "import numpy as np\n",
 22 |     "\n",
 23 |     "from IPython.display import HTML\n",
 24 |     "\n",
 25 |     "import sys\n",
 26 |     "sys.path.append('..')\n",
 27 |     "import commons\n",
 28 |     "from commons.factor import Factor\n",
 29 |     "import helper\n",
 30 |     "import sol"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "def are_equal(a, b, precision=1e-4):\n",
 40 |     "    if isinstance(a, (int, float)):\n",
 41 |     "        return (abs(a-b) < precision)\n",
 42 |     "    elif isinstance(a, np.ndarray):\n",
 43 |     "        return (np.allclose(a, b, atol=precision, rtol=0))\n",
 44 |     "    elif isinstance(a, dict):\n",
 45 |     "        common = set(a) & set(b)\n",
 46 |     "        return all([are_equal(a[k], b[k]) for k in common])\n",
 47 |     "    else:\n",
 48 |     "        if len(a) != len(b):\n",
 49 |     "            return False\n",
 50 |     "        return all([are_equal(a[i], b[i]) for i in range(len(a))])\n",
 51 |     "    "
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "def load_I(which='TestI0'):\n",
 61 |     "    mat = loadmat(f'./data/{which}.mat', simplify_cells=True)[which]\n",
 62 |     "    I = {}\n",
 63 |     "    \n",
 64 |     "    for key in ['random', 'decision', 'utility']:\n",
 65 |     "        Fs = mat[f'{key.title()}Factors']\n",
 66 |     "        if not isinstance(Fs, list):\n",
 67 |     "            Fs = [Fs]\n",
 68 |     "        I[f'{key}_factors'] = [Factor.from_matlab(f) for f in Fs]\n",
 69 |     "    return I"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "## Quiz 1"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "FullI = load_I('FullI')"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "sol.simple_calc_expected_utility(FullI)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "metadata": {},
100 |    "source": [
101 |     "## Quiz 2"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "# Note:\n",
111 |     "# In the assignment description, variable name is `3` and value is `2`. \n",
112 |     "# However, for uniformity, in this Python implemenation these are 2 and 1 respectively.\n",
113 |     "# Because, Python indexes start from 0 unlike Matlab.\n",
114 |     "\n",
115 |     "E = {\n",
116 |     "    2: 1 \n",
117 |     "}"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "FullI = load_I('FullI')\n",
127 |     "FullI['random_factors'] = helper.observe_evidence(FullI['random_factors'], E, normalize=True)"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": [
136 |     "sol.simple_calc_expected_utility(FullI)"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": []
145 |   }
146 |  ],
147 |  "metadata": {
148 |   "kernelspec": {
149 |    "display_name": "Python 3",
150 |    "language": "python",
151 |    "name": "python3"
152 |   },
153 |   "language_info": {
154 |    "codemirror_mode": {
155 |     "name": "ipython",
156 |     "version": 3
157 |    },
158 |    "file_extension": ".py",
159 |    "mimetype": "text/x-python",
160 |    "name": "python",
161 |    "nbconvert_exporter": "python",
162 |    "pygments_lexer": "ipython3",
163 |    "version": "3.9.7"
164 |   }
165 |  },
166 |  "nbformat": 4,
167 |  "nbformat_minor": 4
168 | }
169 | 


--------------------------------------------------------------------------------
/Decision-Making/sol.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import itertools
  3 | 
  4 | import numpy as np
  5 | 
  6 | import helper
  7 | sys.path.append('..')
  8 | from commons.factor import Factor
  9 | 
 10 | 
 11 | def simple_calc_expected_utility(I):
 12 |     """
 13 |     Given a fully instantiated influence diagram with a single utility node and decision node,
 14 |     calculate and return the expected utility.  Note - assumes that the decision rule for the 
 15 |     decision node is fully assigned.
 16 |     
 17 |     Args: 
 18 |         I: See README.md for details.
 19 |         
 20 |     Returns: A scaler, expected utility of I.
 21 |     """
 22 |     
 23 |     
 24 |     """
 25 |     Hints:
 26 |     
 27 |     You can use F.Z to get the sum of all values for the factor F.
 28 |     """
 29 |     EU = 0.0
 30 |     
 31 |     # Solution Start
 32 | 
 33 | 
 34 |     
 35 |     # Solution End
 36 |     
 37 |     return EU
 38 | 
 39 | 
 40 | def calculate_expected_utility_factor(I):
 41 |     """
 42 |     Args: 
 43 |         I: An influence diagram I with a single decision node and a single utility node.
 44 |             See README.md for details.
 45 |             
 46 |     Returns:
 47 |         A factor over the scope of the decision rule D from I that
 48 |            gives the conditional utility given each assignment for D.vars
 49 |     """
 50 |     
 51 |     DF = I['decision_factors'][0]
 52 |     EUF = Factor(DF.vars, DF.domains, init=0)
 53 |     
 54 |     # Solution Start
 55 | 
 56 | 
 57 |     # Solution End
 58 |     
 59 |     return EUF
 60 |     
 61 | 
 62 | def optimize_meu(I):
 63 |     """
 64 |     Args:
 65 |         I: see README.md for the details.
 66 |         
 67 |     Returns:
 68 |         MEU, opt_decision_rule:
 69 |             MEU: a scaler. Maximum Expected Utility
 70 |             decision_rule: A factor.
 71 |     """
 72 |     MEU = 0
 73 |     D = I['decision_factors'][0]
 74 |     opt_decision_rule = Factor(D.vars, D.domains, init=0)
 75 |     
 76 |     """
 77 |     Python related hints:
 78 |     - You can use itertools.product(domain1, domain2, domain3 ...) to enumerate though 
 79 |     the cartesian product of the domains.
 80 |     - You can index a factor using a dictionary of full assignment.
 81 |         F[{'a': 0, 'b': 1}] for factor with two variables 'a' and 'b'.
 82 |     """
 83 |     
 84 |     # Solution Start
 85 | 
 86 | 
 87 |         
 88 |     # Solution End
 89 |     
 90 |     return MEU, opt_decision_rule
 91 | 
 92 | 
 93 | def optimize_with_joint_utility(I):
 94 |     """
 95 |     Same signature as optimize_meu. Now len(I['utility_factors']) > 1.
 96 |     """
 97 |     
 98 |     """
 99 |     Tip: You should try to implement factor sum on your own, if you haven't so far.
100 |         Eventhough, Factor implementation overloads `+` operator.
101 |     """
102 |     
103 |     MEU = 0.0
104 |     opt_decision_rule = Factor([], [])
105 |     
106 |     # Solution Start
107 | 
108 | 
109 | 
110 |     # Solution End
111 |     
112 |     return MEU, opt_decision_rule
113 | 
114 | 
115 | def optimize_linear_expectations(I):
116 |     """
117 |     Same signature as optimize_meu. Now len(I['utility_factors']) > 1.
118 |     """
119 |         
120 |     D = I['decision_factors'][0]
121 |     MEU = 0.0
122 |     opt_decision_rule = Factor(D.vars, D.domains, init=0)
123 |     
124 |     # Solution Start
125 |     
126 | 
127 | 
128 |     # Solution End
129 |     
130 |     return MEU, opt_decision_rule
131 |     


--------------------------------------------------------------------------------
/Decision-Making/submit.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import itertools
  3 | 
  4 | from scipy.io import loadmat
  5 | import numpy as np
  6 | 
  7 | # import convert_mats
  8 | import helper
  9 | import sol
 10 | 
 11 | sys.path.insert(0, '..')
 12 | import commons
 13 | from commons.factor import Factor
 14 | 
 15 | 
 16 | def load_I(which='FullI'):
 17 |     mat = loadmat(f'./data/{which}.mat', simplify_cells=True)[which]
 18 |     I = {}
 19 |     
 20 |     for key in ['random', 'decision', 'utility']:
 21 |         Fs = mat[f'{key.title()}Factors']
 22 |         if not isinstance(Fs, list):
 23 |             Fs = [Fs]
 24 |         I[f'{key}_factors'] = [Factor.from_matlab(f) for f in Fs]
 25 |     return I
 26 | 
 27 | 
 28 | class Grader(commons.SubmissionBase):
 29 |     def __init__(self):
 30 |         part_names = [None, 'ABz0W', 'B5ynW', 
 31 |                       'LYITS', 'fJqCV', 'I0Bvr', 
 32 |                       '37kCm', 'D2EEw', '2AHBQ', 
 33 |                       'aFNuE', 'Th2m9']
 34 |         super().__init__('Decision Making', 'ZPEHfwNgEealXw52htHS4Q', part_names)
 35 | 
 36 |     def __iter__(self):
 37 |         for part_id in range(1, len(self.part_names)):
 38 |             try:
 39 |                 if part_id == 1:
 40 |                     I = load_I('FullI')
 41 |                     F = sol.calculate_expected_utility_factor(I)
 42 |                     res = serialize_factors_fg_grading([F])
 43 |                 elif part_id == 2:
 44 |                     I = load_I('FullI')
 45 |                     I['random_factors'] = helper.observe_evidence(I['random_factors'], {2: 1}, True)
 46 |                     F = sol.calculate_expected_utility_factor(I)
 47 |                     res = serialize_factors_fg_grading([F])
 48 |                 elif part_id == 3:
 49 |                     I = load_I('FullI')
 50 |                     meu, optdr = sol.optimize_meu(I)
 51 |                     res = serialize_meu_optimization_fg(meu, optdr)
 52 |                 elif part_id == 4:
 53 |                     I = load_I('FullI')
 54 |                     I['random_factors'] = helper.observe_evidence(I['random_factors'], {2: 1}, True)
 55 |                     meu, optdr = sol.optimize_meu(I)
 56 |                     res = serialize_meu_optimization_fg(meu, optdr)
 57 |                 elif part_id == 5:
 58 |                     I = load_I('MultipleUtilityI')
 59 |                     meu, optdr = sol.optimize_with_joint_utility(I)
 60 |                     res = serialize_meu_optimization_fg(meu, optdr)
 61 |                 elif part_id == 6:
 62 |                     I = load_I('MultipleUtilityI')
 63 |                     I['random_factors'] = helper.observe_evidence(I['random_factors'], {2: 0}, True)
 64 |                     meu, optdr = sol.optimize_with_joint_utility(I)
 65 |                     res = serialize_meu_optimization_fg(meu, optdr)
 66 |                 elif part_id == 7:
 67 |                     I = load_I('MultipleUtilityI')
 68 |                     meu, optdr = sol.optimize_linear_expectations(I)
 69 |                     res = serialize_meu_optimization_fg(meu, optdr)
 70 |                 elif part_id == 8:
 71 |                     I = load_I('MultipleUtilityI')
 72 |                     I['random_factors'] = helper.observe_evidence(I['random_factors'], {2: 0}, True)
 73 |                     meu, optdr = sol.optimize_linear_expectations(I)
 74 |                     res = serialize_meu_optimization_fg(meu, optdr)
 75 |                 elif part_id == 9:
 76 |                     I = load_I('FullI')
 77 |                     res = "%.4f" % sol.simple_calc_expected_utility(I)
 78 |                 elif part_id == 10:
 79 |                     I = load_I('FullI')
 80 |                     I['random_factors'] = helper.observe_evidence(I['random_factors'], {2: 1}, True)
 81 |                     res = "%.4f" % sol.simple_calc_expected_utility(I)
 82 |                 else:
 83 |                     raise KeyError
 84 |                 yield self.part_names[part_id], res
 85 |             except KeyError:
 86 |                 yield self.part_names[part_id], 0
 87 |                 
 88 |                 
 89 | def serialize_factors_fg_grading(factors) -> str:
 90 |     lines = ["%d\n" % len(factors)]
 91 | 
 92 |     for f in factors:
 93 |         var = [v+1 for v in f.vars]
 94 |         lines.append("%d" % (len(var), ))
 95 |         lines.append("  ".join(map(str, var)))
 96 |         lines.append("  ".join(str(len(d)) for d in f.domains))
 97 |         lines.append(str(len(f.val)))
 98 | 
 99 |         # libDAI expects first variable to change fastest
100 |         # but itertools.product changes the last element fastest
101 |         # hence reversed list
102 |         domains = reversed(f.domains)
103 |         num_lines = 0
104 |         new_lines = []
105 |         for i, assignment in enumerate(itertools.product(*domains)):
106 |             num_lines += 1
107 |             val = f[tuple(reversed(assignment))]
108 |             new_lines.append("%d %0.8g" % (i, val))
109 |         lines.extend(new_lines)
110 |         
111 |     return "\n".join(lines)
112 | 
113 | 
114 | def sort_factor(F):
115 |     domains_d = dict(zip(F.vars, F.domains))
116 |     var = sorted(F.vars)
117 |     domains = [domains_d[v] for v in var]
118 |     newF = Factor(var, domains)
119 |     for k in F:
120 |         assignment = dict(zip(F.vars, k))
121 |         newF[assignment] = F[k]
122 |     return newF
123 | 
124 | 
125 | def serialize_meu_optimization_fg(meu, opt_dr):
126 |     opt_dr = sort_factor(opt_dr)
127 |     res = serialize_factors_fg_grading([opt_dr])
128 |     return '%s\n\n%.4f\n' % (res, meu)
129 | 
130 | 
131 | if __name__ == '__main__':
132 |     grader = Grader()
133 |     grader.grade()


--------------------------------------------------------------------------------
/Exact-Inference/data/README.md:
--------------------------------------------------------------------------------
1 | Copy all `*.mat` files for this assignment here.
2 | 


--------------------------------------------------------------------------------
/Exact-Inference/helper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import itertools
  3 | import pprint
  4 | 
  5 | sys.path.insert(0, '..')
  6 | from commons.factor import Factor
  7 | import solution as sol
  8 | 
  9 | def create_clique_tree(factors, evidence=None):
 10 |     V, domains = set(), dict()
 11 |     for factor in factors:
 12 |         V.update(factor.vars)
 13 |         for v, d in zip(factor.vars, factor.domains):
 14 |             if v in domains:
 15 |                 assert domains[v] == d, "Domain mismatch between factors"
 16 |             else:
 17 |                 domains[v] = d
 18 | 
 19 |     adj_list = {v: {v, } for v in V}
 20 |     for factor in factors:
 21 |         for u, v in itertools.permutations(factor.vars, 2):
 22 |             adj_list[u].add(v)
 23 | 
 24 |     cliques_considered = 0
 25 |     F = factors
 26 |     skeleton = {'nodes': [], 'factor_idxs': [], 'edges': [], 'factor_list': factors}
 27 |     while cliques_considered < len(V):
 28 |         next_var = min(adj_list, key=lambda x: len(adj_list[x]))
 29 |         F = eliminate_var(F, adj_list, next_var, skeleton)
 30 |         cliques_considered += 1
 31 |         if not adj_list:
 32 |             break
 33 | 
 34 |     prune_tree(skeleton)
 35 |     tree = sol.compute_initial_potentials(skeleton)
 36 |     if evidence:
 37 |         for i, f in enumerate(tree['clique_list']):
 38 |             tree['clique_list'][i] = f.evidence(evidence)
 39 | 
 40 |     return tree
 41 | 
 42 | 
 43 | def eliminate_var(F, adj_list, next_var, skeleton):
 44 |     use_factors, non_use_factors = [], []
 45 |     scope = set()
 46 |     for i, f in enumerate(F):
 47 |         if next_var in f.vars:
 48 |             use_factors.append(i)
 49 |             scope.update(f.vars)
 50 |         else:
 51 |             non_use_factors.append(i)
 52 |     scope = sorted(scope)
 53 | 
 54 |     for i, j in itertools.permutations(scope, 2):
 55 |         if i not in adj_list:
 56 |             adj_list[i] = {j, }
 57 |         else:
 58 |             adj_list[i].add(j)
 59 | 
 60 |     # next steps removes the next_var from adj_list
 61 |     for k in adj_list:
 62 |         if next_var in adj_list[k]:
 63 |             adj_list[k].remove(next_var)
 64 |     del adj_list[next_var]
 65 | 
 66 |     newF, newmap = [], {}
 67 |     for i in non_use_factors:
 68 |         newmap[i] = len(newF)
 69 |         newF.append(F[i])
 70 | 
 71 |     new_factor = Factor([], [])
 72 |     for i in use_factors:
 73 |         new_factor = new_factor @ F[i]  # Since this just a simulation, we don't really need to compute values. So @
 74 |     new_factor = new_factor.dummy_marginalise({next_var, })
 75 |     newF.append(new_factor)
 76 | 
 77 |     for i in range(len(skeleton['nodes'])):
 78 |         if skeleton['factor_idxs'][i] in use_factors:
 79 |             skeleton['edges'].append((skeleton['nodes'][i], set(scope)))
 80 |             skeleton['factor_idxs'][i] = None
 81 |         elif skeleton['factor_idxs'][i] is not None:
 82 |             skeleton['factor_idxs'][i] = newmap[skeleton['factor_idxs'][i]]
 83 |     skeleton['nodes'].append(set(scope))
 84 |     skeleton['factor_idxs'].append(len(newF) - 1)
 85 | 
 86 |     return newF
 87 | 
 88 | 
 89 | def prune_tree(skeleton):
 90 |     found = True
 91 |     while found:
 92 |         found = False
 93 | 
 94 |         for u, v in skeleton['edges']:
 95 |             if u.issuperset(v):
 96 |                 found = True
 97 |                 parent = u
 98 |                 child = v
 99 |                 break
100 |             elif v.issuperset(u):
101 |                 found = True
102 |                 parent = v
103 |                 child = u
104 |                 break
105 | 
106 |         if not found:
107 |             break
108 | 
109 |         new_edges = []
110 |         for u, v in skeleton['edges']:
111 |             if (u, v) == (child, parent) or (v, u) == (child, parent):
112 |                 continue
113 |             elif u == child:
114 |                 new_edges.append((parent, v))
115 |             elif v == child:
116 |                 new_edges.append((u, parent))
117 |             else:
118 |                 new_edges.append((u, v))
119 |         skeleton['edges'] = new_edges
120 |         skeleton['nodes'] = [node for node in skeleton['nodes'] if node != child]
121 | 
122 | 
123 | def adj_matrix_to_adj_list(matrix):
124 |     edges = {}
125 |     for i in range(len(matrix)):
126 |         nbs = set()
127 |         for j in range(len(matrix)):
128 |             if matrix[i, j] == 1:
129 |                 nbs.add(j)
130 |         edges[i] = nbs
131 |     return edges
132 | 
133 | 
134 | if __name__ == '__main__':
135 |     fs = []
136 |     fs.append(Factor(['C'], [2]))
137 |     fs.append(Factor(['C', 'D'], [2, 2]))
138 |     fs.append(Factor(['I'], [2]))
139 |     fs.append(Factor(['G', 'I', 'D'], [2, 2, 2]))
140 |     fs.append(Factor(['I', 'S'], [2, 2]))
141 |     fs.append(Factor(['J', 'S', 'L'], [2, 2, 2]))
142 |     fs.append(Factor(['G', 'L'], [2, 2]))
143 |     fs.append(Factor(['G', 'H'], [2, 2]))
144 |     for f in fs:
145 |         for a in itertools.product(*f.domains):
146 |             f[a] = 42
147 |     pprint.pprint(create_clique_tree(fs, []))
148 | 


--------------------------------------------------------------------------------
/Exact-Inference/solution.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import sys
  3 | 
  4 | import numpy as np
  5 | 
  6 | sys.path.insert(0, '..')
  7 | from commons.factor import Factor
  8 | import helper
  9 | 
 10 | 
 11 | def compute_initial_potentials(skeleton):
 12 |     """
 13 |     Args:
 14 |         skeleton: a dictionary with following keys.
 15 |             'nodes': a list of sets. Each set is a set of constituent variables. e.g. {1,2,3}
 16 |             'edges': a list of edges. A single element would look like ({1,2,3}, {2,3,4})
 17 |                 which means there is an edge between node {1,2,3} and node {2,3,4}. If (a, b) is
 18 |                 in the list, (b, a) will not be in the list.
 19 |             'factor_list': a list of initialized Factors.
 20 | 
 21 |     Returns:
 22 |         a dict with ['clique_list', 'edges'] keys.
 23 |         'clique_list': a list of factors associated with each clique
 24 |         'adj_list': adjacency list with integer nodes. adj_list[0] = {1,2}
 25 |             implies that there is an edges  clique_list[0]-clique_list[1]
 26 |             and clique_list[0]-clique_list[2]
 27 |     """
 28 |     n = len(skeleton['nodes'])
 29 |     var_domain = {}
 30 |     for factor in skeleton['factor_list']:
 31 |         for var, domain in zip(factor.vars, factor.domains):
 32 |             var_domain[var] = domain
 33 | 
 34 |     clique_list = []
 35 |     for clique in skeleton['nodes']:
 36 |         clique = sorted(clique)
 37 |         domains = [var_domain[v] for v in clique]
 38 |         clique_list.append(Factor(clique, domains, init=1))
 39 |     adj_list = {i: set() for i in range(n)}
 40 | 
 41 |     # Solution Start
 42 | 
 43 |     # Solution End
 44 | 
 45 |     return {'clique_list': clique_list, 'adj_list': adj_list}
 46 | 
 47 | 
 48 | def get_next_clique(clique_tree, msgs):
 49 |     """
 50 |     Args:
 51 |         clique_tree: a structure returned by `compute_initial_potentials`
 52 |         msgs: A dictionary of dictionary.
 53 |             If u has sent message to v, that msg will be msgs[v][u].
 54 | 
 55 |     Returns:
 56 |         a tuple (i, j) if i is ready to send the message to j. 
 57 |         If all the messages has been passed, return None.
 58 |         
 59 |         If more than one message is ready to be transmitted, return 
 60 |         the pair (i,j) that is numerically smallest. If you use an outer
 61 |         for loop over i and an inner for loop over j, breaking when you find a 
 62 |         ready pair of cliques, you will get the right answer.
 63 |     """
 64 |     adj = clique_tree['adj_list']
 65 |     
 66 |     # Solution Start
 67 | 
 68 |     # Solution End
 69 |     
 70 |     return None
 71 | 
 72 | 
 73 | def clique_tree_calibrate(clique_tree, is_max=0):
 74 |     # msgs[u] = {v: msg_from_v_to_u}
 75 |     msgs = {i: {} for i in range(len(clique_tree['clique_list']))}
 76 |     adj = clique_tree['adj_list']
 77 | 
 78 |     # Solution Start
 79 |     
 80 |     # Following is a dummy line to make the grader happy when this is unimplemented.
 81 |     # Delete it or create new list `calibrated_potentials`
 82 |     calibrated_potentials = [f for f in clique_tree['clique_list']]
 83 | 
 84 |     # Solution End
 85 | 
 86 |     return {'clique_list': calibrated_potentials, 'adj_list': adj}
 87 | 
 88 | 
 89 | def compute_exact_marginals_bp(factors, evidence=None, is_max=0):
 90 |     """
 91 |     this function takes a list of factors, evidence, and a flag is_max, 
 92 |     runs exact inference and returns the final marginals for the 
 93 |     variables in the network. If is_max is 1, then it runs exact MAP inference,
 94 |     otherwise exact inference (sum-prod).
 95 |     It returns a list of size equal to the number of variables in the 
 96 |     network where M[i] represents the factor for ith variable.
 97 |  
 98 |     Args:
 99 |         factors: list[Factor]
100 |         evidence: dict[variable] -> observation
101 |         is_max: use max product algorithm
102 | 
103 |     Returns:
104 |         list of factors. Each factor should have only one variable.
105 |     """
106 | 
107 |     marginals = []
108 |     if evidence is None:
109 |         evidence = {}
110 | 
111 |     # Solution Start
112 |     
113 |     # Solution End
114 |     return marginals
115 | 
116 | 
117 | def factor_max_marginalization(factor, variables=None):
118 |     if not variables or not factor.vars:
119 |         return factor
120 | 
121 |     new_vars = sorted(set(factor.vars) - set(variables))
122 |     if not new_vars:
123 |         raise ValueError("Resultant factor has empty scope.")
124 |     new_map = [factor.vars.index(v) for v in new_vars]
125 | 
126 |     new_factor = Factor(new_vars, [factor.domains[i] for i in new_map], init=float('-inf'))
127 | 
128 |     # Solution Start
129 |    
130 |     # Solution End
131 |     
132 |     return new_factor
133 | 
134 | 
135 | def max_decoding(marginals):
136 |     """
137 |     Finds the best assignment for each variable from the marginals passed in.
138 |     Returns A such that A[i] returns the index of the best instantiation for variable i.
139 | 
140 |     For instance: Let's say we have two variables 0 and 1. 
141 |     Marginals for 0 = [0.1, 0.3, 0.6]
142 |     Marginals for 1 = [0.92, 0.08]
143 |     max_decoding(marginals) == [2, 0]
144 | 
145 |     M is a list of factors, where each factor is only over one variable.
146 |     """
147 | 
148 |     A = np.zeros(len(marginals), dtype=np.int32)
149 | 
150 |     # Solution Start
151 |     
152 |     # Solution End
153 |     return A
154 | 


--------------------------------------------------------------------------------
/Exact-Inference/submit.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import sys
  3 | 
  4 | from scipy.io import loadmat
  5 | import numpy as np
  6 | 
  7 | import helper
  8 | import solution as sol
  9 | 
 10 | sys.path.insert(0, '..')
 11 | 
 12 | import commons
 13 | from commons.factor import Factor
 14 | 
 15 | 
 16 | class Grader(commons.SubmissionBase):
 17 |     def __init__(self):
 18 |         part_names = [None, 
 19 |                       'u4XdY', 'SFny2', 'vJtk1', 'oxFvg', 
 20 |                       'mdAFl', '4nqJB', 'lXwFM', 'rKODa', 
 21 |                       'IxUeH', 'cKBqa', 'PxFaH', 'i3nTw', 
 22 |                       'H3HVG', '47MjN', 'x0vXX', 'XGmvB']
 23 |         super().__init__('Exact Inference', 'ITvOkANgEea1SAr5vIqVXQ', part_names)
 24 | 
 25 |     def __iter__(self):
 26 |         for part_id in range(1, len(self.part_names)):
 27 |             if part_id % 2 == 0:
 28 |                 mat = loadmat('./data/PA4Test.mat', simplify_cells=True)
 29 |             else:
 30 |                 mat = loadmat('./data/PA4Sample.mat', simplify_cells=True)
 31 | 
 32 |             try:
 33 |                 if part_id == 1 or part_id == 2:
 34 |                     inp = mat['InitPotential']['INPUT']
 35 |                     nodes = [set(x - 1) for x in inp['nodes']]
 36 | 
 37 |                     edges = []
 38 |                     edge_mat = inp['edges']
 39 |                     for i in range(len(edge_mat)):
 40 |                         for j in range(i + 1, len(edge_mat)):
 41 |                             if edge_mat[i, j] == 1:
 42 |                                 edges.append((nodes[i], nodes[j]))
 43 | 
 44 |                     factors = [Factor.from_matlab(f) for f in inp['factorList']]
 45 |                     skeleton = {'nodes': nodes, 'edges': edges, 'factor_list': factors}
 46 |                     clique_tree = sol.compute_initial_potentials(skeleton)
 47 |                     res = serialize_compact_tree(clique_tree, part_id)
 48 |                 elif part_id == 3 or part_id == 4:
 49 |                     arg1, arg2 = mat['GetNextC']['INPUT1'], mat['GetNextC']['INPUT2']
 50 |                     clique_tree = {'clique_list': [Factor.from_matlab(f) for f in arg1['cliqueList']],
 51 |                                    'adj_list': helper.adj_matrix_to_adj_list(arg1['edges'])}
 52 |                     N = arg2.shape[0]
 53 |                     msgs = {i: {} for i in range(N)}
 54 |                     for i in range(N):
 55 |                         for j in range(N):
 56 |                             if isinstance(arg2[i, j].var, int) or len(arg2[i, j].var) > 0:
 57 |                                 msgs[j][i] = Factor.from_mat_struct(arg2[i, j])
 58 | 
 59 |                     res = sol.get_next_clique(clique_tree, msgs)
 60 |                     if res is None:
 61 |                         i, j = 0, 0
 62 |                     else:
 63 |                         i, j = res
 64 |                     res = "%d %d" % (i+1, j+1)
 65 |                 elif part_id == 5 or part_id == 6:
 66 |                     inp = mat['SumProdCalibrate']['INPUT']
 67 |                     clique_tree = {'clique_list': [Factor.from_matlab(f) for f in inp['cliqueList']],
 68 |                                    'adj_list': helper.adj_matrix_to_adj_list(inp['edges'])}
 69 |                     calibrated_tree = sol.clique_tree_calibrate(clique_tree)
 70 |                     res = serialize_compact_tree(calibrated_tree)
 71 |                 elif part_id == 7 or part_id == 8:
 72 |                     inp = mat['ExactMarginal']['INPUT']
 73 |                     inp = [Factor.from_matlab(f) for f in inp]
 74 |                     out = sol.compute_exact_marginals_bp(inp, evidence=None, is_max=0)
 75 |                     for f in out:
 76 |                         f.vars = [v+1 for v in f.vars]
 77 |                     res = serialize_factors_fg_grading(out)
 78 |                 elif part_id == 9 or part_id == 10:
 79 |                     arg1 = Factor.from_matlab(mat['FactorMax']['INPUT1'], start_from_zero=False)
 80 |                     arg2 = mat['FactorMax']['INPUT2']
 81 |                     out = sol.factor_max_marginalization(arg1, [arg2, ])
 82 |                     res = serialize_factors_fg_grading([out])
 83 |                 elif part_id == 11:
 84 |                     res = ''  # Coursera seems to have a bug for this case
 85 |                 elif part_id == 12:
 86 |                     arg1 = mat['MaxSumCalibrate']['INPUT']
 87 |                     clique_tree = {'clique_list': [Factor.from_matlab(clique) for clique in arg1['cliqueList']],
 88 |                                    'adj_list': helper.adj_matrix_to_adj_list(arg1['edges'])}
 89 |                     calibrated_tree = sol.clique_tree_calibrate(clique_tree, is_max=1)
 90 |                     res = serialize_compact_tree(calibrated_tree)
 91 |                 elif part_id == 13 or part_id == 14:
 92 |                     arg1 = [Factor.from_matlab(f) for f in mat['MaxMarginals']['INPUT']]
 93 |                     out = sol.compute_exact_marginals_bp(arg1, evidence=None, is_max=1)
 94 |                     for f in out:
 95 |                         f.vars = [v + 1 for v in f.vars]
 96 |                     res = serialize_factors_fg_grading(out)
 97 |                 elif part_id == 15 or part_id == 16:
 98 |                     arg1 = [Factor.from_matlab(f) for f in mat['MaxDecoded']['INPUT']]
 99 |                     res = " ".join(map(str, sol.max_decoding(arg1)+1))
100 |                 yield self.part_names[part_id], res
101 |             except KeyError:
102 |                 raise
103 | 
104 | 
105 | def serialize_factors_fg_grading(factors, skip=1) -> str:
106 |     lines = ["%d\n" % len(factors)]
107 | 
108 |     for f in factors:
109 |         lines.append("%d" % (len(f.vars, )))
110 |         lines.append("  ".join(map(str, f.vars)))
111 |         lines.append("  ".join(str(len(d)) for d in f.domains))
112 |         placeholder_idx = len(lines)
113 |         lines.append(None)  # will be replace by nonzero count once we know
114 | 
115 |         # libDAI expects first variable to change fastest
116 |         # but itertools.product changes the last element fastest
117 |         # hence reversed list
118 |         domains = reversed(f.domains)
119 |         num_lines = 0
120 |         new_lines = []
121 |         for i, assignment in enumerate(itertools.product(*domains)):
122 |             num_lines += 1
123 |             val = f[tuple(reversed(assignment))]
124 |             if abs(val) <= 1e-40 or abs(val - 1) <= 1e-40 or np.isinf(val) or np.isnan(val):
125 |                 continue
126 |             new_lines.append("%0.8g" % (val, ))
127 |         new_lines = new_lines[::skip]
128 |         lines[placeholder_idx] = "%d" % (num_lines, )
129 |         lines.extend(new_lines)
130 |         lines.append("")
131 | 
132 |     return "\n".join(lines)
133 | 
134 | 
135 | def serialize_compact_tree(tree, skip=1) -> str:
136 |     adj_list = tree['adj_list']
137 |     N = len(adj_list)
138 |     lines = ['%d' % N]
139 | 
140 |     for i in range(N):
141 |         nbs = adj_list[i]
142 |         lines.append("  ".join("1" if i in nbs else "0" for i in range(N)))
143 | 
144 |     # convert 0 based index into 1 based index!! Uuhhh!!
145 |     for factor in tree['clique_list']:
146 |         factor.vars = [v+1 for v in factor.vars]
147 | 
148 |     factor_graph = serialize_factors_fg_grading(tree['clique_list'], skip)
149 |     lines.append(factor_graph)
150 | 
151 |     # convert 1 based index into 0 based index!! Uuhhh!!
152 |     for factor in tree['clique_list']:
153 |         factor.vars = [v - 1 for v in factor.vars]
154 | 
155 |     return '\n'.join(lines)
156 | 
157 | 
158 | if __name__ == '__main__':
159 |     grader = Grader()
160 |     grader.grade()
161 | 


--------------------------------------------------------------------------------
/Learning-Tree-Structured-Networks/helper.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import matplotlib.pyplot as plt
  4 | from matplotlib.animation import FuncAnimation
  5 | import matplotlib as mpl
  6 | 
  7 | 
  8 | def show_pose(pose):
  9 |     """
 10 |     Original Authors: Huayan Wang, Andrew Duchi
 11 |     """
 12 |     pose[:, 0] += 100
 13 |     pose[:, 1] += 150
 14 | 
 15 |     part_length = np.array([60, 20, 32, 33, 32, 33, 46, 49, 46, 49])
 16 |     part_width = np.array([18, 10, 7, 5, 7, 5, 10, 7, 10, 7])
 17 | 
 18 |     img = np.zeros((300, 300), dtype=np.uint8)
 19 | 
 20 |     for i in range(10):
 21 |         startpt = pose[i, :2].round().astype(int)
 22 |         axis = np.r_[np.sin(pose[i, 2] - np.pi / 2), np.cos(pose[i, 2] - np.pi / 2)]
 23 |         xaxis = np.r_[np.cos(pose[i, 2] - np.pi / 2), -np.sin(pose[i, 2] - np.pi / 2)]
 24 |         endpt = (startpt + part_length[i] * axis).round().astype(int)
 25 | 
 26 |         corner1 = (startpt + xaxis * part_width[i]).round().astype(int)
 27 |         corner2 = (startpt - xaxis * part_width[i]).round().astype(int)
 28 |         corner3 = (endpt + xaxis * part_width[i]).round().astype(int)
 29 |         corner4 = (endpt - xaxis * part_width[i]).round().astype(int)
 30 | 
 31 |         img = cv2.line(img, tuple(corner1[::-1]), tuple(corner2[::-1]), 255, 2)
 32 |         img = cv2.line(img, tuple(corner3[::-1]), tuple(corner4[::-1]), 255, 2)
 33 |         img = cv2.line(img, tuple(corner1[::-1]), tuple(corner3[::-1]), 255, 2)
 34 |         img = cv2.line(img, tuple(corner2[::-1]), tuple(corner4[::-1]), 255, 2)
 35 | 
 36 |         img = cv2.rectangle(img, tuple(startpt[::-1] - 4), tuple(startpt[::-1] + 4), 255, -1)
 37 |     return img
 38 | 
 39 | 
 40 | def sample_pose(P, G, k):
 41 |     """
 42 |     Args:
 43 |         P:
 44 |         G:
 45 |         k: None for unknown class, else label $\in$ 0, 1, 2, ... , k-1
 46 |     """
 47 |     sample = np.zeros((10, 3))
 48 |     if k is None:
 49 |         k = np.random.choice(len(P['c']), p=P['c'])
 50 |         
 51 |     remaining = set(range(10))
 52 |     while remaining:
 53 |         i = remaining.pop()
 54 |         clg = P['clg'][i]
 55 |         par = G[i, 1]
 56 |         
 57 |         if G[i, 0] == 0:
 58 |             sample[i, 0] = clg['mu_y'][k] + clg['sigma_y'][k]*np.random.randn()
 59 |             sample[i, 1] = clg['mu_x'][k] + clg['sigma_x'][k]*np.random.randn()
 60 |             sample[i, 2] = clg['mu_angle'][k] + clg['sigma_angle'][k]*np.random.randn()
 61 |         elif G[i, 0] == 1:
 62 |             if par in remaining:
 63 |                 remaining.add(i)
 64 |                 continue
 65 |                 
 66 |             muy = (clg['theta'][k,0] + 
 67 |                    clg['theta'][k,1] * sample[par,0] +
 68 |                    clg['theta'][k,2] * sample[par,1] +
 69 |                    clg['theta'][k,3] * sample[par,2])
 70 |             mux = (clg['theta'][k,4] +
 71 |                    clg['theta'][k,5] * sample[par,0] +
 72 |                    clg['theta'][k,6] * sample[par,1] +
 73 |                    clg['theta'][k,7] * sample[par,2])
 74 |             muangle = (clg['theta'][k,8] + 
 75 |                    clg['theta'][k,9] * sample[par,0] +
 76 |                    clg['theta'][k,10] * sample[par,1] +
 77 |                    clg['theta'][k,11] * sample[par,2])
 78 |             
 79 |             sample[i, 0] = muy + clg['sigma_y'][k]*np.random.randn()
 80 |             sample[i, 1] = mux + clg['sigma_x'][k]*np.random.randn()
 81 |             sample[i, 2] = muangle + clg['sigma_angle'][k]*np.random.randn()
 82 |         elif G[i, 0] == 2:
 83 |             if par in remaining:
 84 |                 remaining.add(i)
 85 |                 continue
 86 |                 
 87 |             muy = (clg['gamma'][k,0] + 
 88 |                    clg['gamma'][k,1] * sample[par,0] +
 89 |                    clg['gamma'][k,2] * sample[par,1] +
 90 |                    clg['gamma'][k,3] * sample[par,2])
 91 |             mux = (clg['gamma'][k,4] +
 92 |                    clg['gamma'][k,5] * sample[par,0] +
 93 |                    clg['gamma'][k,6] * sample[par,1] +
 94 |                    clg['gamma'][k,7] * sample[par,2])
 95 |             muangle = (clg['gamma'][k,8] + 
 96 |                    clg['gamma'][k,9] * sample[par,0] +
 97 |                    clg['gamma'][k,10] * sample[par,1] +
 98 |                    clg['gamma'][k,11] * sample[par,2])
 99 |             
100 |             sample[i, 0] = muy + clg['sigma_y'][k]*np.random.randn()
101 |             sample[i, 1] = mux + clg['sigma_x'][k]*np.random.randn()
102 |             sample[i, 2] = muangle + clg['sigma_angle'][k]*np.random.randn()
103 |     return sample
104 |             
105 |             
106 | def visualize_models(P, G):
107 |     K = len(P['c'])
108 |     figs = []
109 |     for k in range(K):
110 |         if G.ndim == 2:
111 |             pose = sample_pose(P, G, k)
112 |         else:
113 |             pose = sample_pose(P, G[:, :, k], k)
114 |         pose = show_pose(pose)
115 |         figs.append(pose)
116 |     return figs
117 | 
118 | 
119 | def visualize_dataset(dataset):
120 |     images = []
121 |     for pose in dataset:
122 |         images.append(show_pose(pose))
123 |     return images
124 | 
125 | 
126 | def create_html5_animation(*images, labels=None, nframes=10000, interval=500):
127 |     nrows = len(images)
128 |     nframes = min(nframes, min(map(len, images)))
129 |     
130 |     width = 3
131 |     height = nrows*3
132 |     
133 |     fig, axs = plt.subplots(nrows, 1, figsize=(width, height))
134 |     ims = []
135 |     for i in range(nrows):
136 |         ims.append(axs[i].imshow(images[i][0], cmap='binary'))
137 |         axs[i].set_axis_off()
138 |         if labels is not None:
139 |             axs[i].set_title(labels[i])
140 |     plt.tight_layout()
141 |     
142 |     def init():
143 |         for i in range(nrows):
144 |             ims[i].set_data(images[i][0])
145 |     
146 |     def animate(j, *args, **kwargs):
147 |         for i in range(nrows):
148 |             ims[i].set_data(images[i][j])
149 |         
150 |     ani = FuncAnimation(fig, animate, frames=nframes, init_func=init, interval=interval)
151 |     ani_html = ani.to_html5_video();
152 |     plt.close()
153 |     return ani_html
154 |  
155 | 
156 | def maximum_spanning_tree(G):
157 |     N = G.shape[0]
158 |     parent = [None]*N
159 |     value = np.ones(N)*-1
160 |     
161 |     Q = set(range(N))
162 |     
163 |     def next_v():
164 |         best_v = None
165 |         best_val = -10
166 |         for v in Q:
167 |             if value[v] > best_val:
168 |                 best_val = value[v]
169 |                 best_v = v
170 |         return best_v
171 |     
172 |     while Q:
173 |         u = next_v()
174 |         Q.remove(u)
175 |         
176 |         for v in range(N):
177 |             if v in Q and G[u, v] > value[v]:
178 |                 value[v] = G[u, v]
179 |                 parent[v] = u
180 |     
181 |     adj = np.zeros((N, N), dtype=np.int64)
182 |     for i in range(N):
183 |         if parent[i] is not None:
184 |             adj[parent[i], i] = 1
185 |     
186 |     return adj
187 |     
188 | 
189 | def gaussian_mutual_information(X, Y):
190 |     Z = np.concatenate([X, Y], axis=-1)
191 |     Sxx = np.cov(X, rowvar=False)
192 |     Syy = np.cov(Y, rowvar=False)
193 |     S = np.cov(Z, rowvar=False)
194 |     
195 |     return 0.5 * np.log(np.linalg.det(Sxx)*np.linalg.det(Syy)
196 |                         / np.linalg.det(S))
197 | 
198 | 
199 | def convert_A2G(A):
200 |     G = np.zeros((10, 2), dtype=np.int64)
201 |     A = A + A.T
202 |     assert A.sum() == 18
203 |     
204 |     visited = {0}
205 |     while len(visited) < 10:
206 |         for i in range(1, 10):
207 |             for j in range(10):
208 |                 if A[i, j] == 1 and j in visited:
209 |                     G[i, 0] = 1
210 |                     G[i, 1] = j
211 |                     visited.add(i)
212 |                     break
213 |     return G


--------------------------------------------------------------------------------
/Learning-Tree-Structured-Networks/solution.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | from scipy.stats import norm
  4 | 
  5 | import helper
  6 | 
  7 | 
  8 | def fit_gaussian_parameters(x):
  9 |     """
 10 |     Args:
 11 |         x: (N, ) shaped numpy array
 12 |         
 13 |     Returns:
 14 |         (mu, std)
 15 |     """
 16 |     mu = 0
 17 |     sigma = 1
 18 | 
 19 |     # Solution Start
 20 |     
 21 |     # Solution End
 22 | 
 23 |     return mu, np.sqrt(sigma)
 24 | 
 25 | 
 26 | def fit_linear_gaussian_parameters(x, u):
 27 |     """Estimate parameters of the linear Gaussian model:
 28 | 
 29 |     X|U ~ N(Beta(0)*U(0) + ... + Beta(n-1)*U(n-1) + Beta(n), sigma^2);
 30 | 
 31 | 
 32 |     X: (M, ), the child variable, M examples
 33 |     U: (M x N), N parent variables, M examples
 34 | 
 35 |     Copyright (C) Daphne Koller, Stanford Univerity, 2012
 36 |     """
 37 |     M = len(x)
 38 |     N = u.shape[1]
 39 | 
 40 |     beta = np.zeros(N+1)
 41 |     sigma = 1
 42 | 
 43 |     # collect expectations and solve the linear system
 44 |     # A = [ E[U(1)],      E[U(2)],      ... , E[U(n)],      1     ;
 45 |     #       E[U(1)*U(1)], E[U(2)*U(1)], ... , E[U(n)*U(1)], E[U(1)];
 46 |     #       ...         , ...         , ... , ...         , ...   ;
 47 |     #       E[U(1)*U(n)], E[U(2)*U(n)], ... , E[U(n)*U(n)], E[U(n)] ]
 48 | 
 49 |     # construct A
 50 | 
 51 |     # Solution Start
 52 | 
 53 |     # Solution End
 54 | 
 55 |     
 56 |     
 57 |     # B = [ E[X]; E[X*U(1)]; ... ; E[X*U(n)] ]
 58 |     # construct B
 59 |     # Solution Start
 60 |     
 61 |     # Solution End
 62 | 
 63 |     # solve A*Beta = B
 64 |     # Solution Start
 65 |     
 66 |     # Solution End
 67 | 
 68 |     # then compute sigma according to eq. (11) in PA description
 69 |     # Solution Start
 70 |     
 71 |     # Solution End
 72 | 
 73 |     return beta, sigma
 74 | 
 75 | 
 76 | def compute_log_likelihood(P, G, dataset):
 77 |     """
 78 |     returns the (natural) log-likelihood of data given the model and graph structure
 79 | 
 80 |     Args:
 81 |         P: dict of parameters (explained in PA description)
 82 |         G: graph structure and parameterization (explained in PA description)
 83 | 
 84 |            NOTICE that G could be either (10, 2) (same graph shared by all classes)
 85 |            or (10, 2, 2) (each class has its own graph). Your code should compute
 86 |            the log-likelihood using the right graph.
 87 | 
 88 |         dataset: N x 10 x 3, N poses represented by 10 parts in (y, x, alpha)
 89 | 
 90 |     Returns:
 91 |         log_likelihood: log-likelihood of the data (scalar)
 92 | 
 93 |     Copyright (C) Daphne Koller, Stanford Univerity, 2012
 94 |     """
 95 |     log_likelyhood = 0
 96 |     N = dataset.shape[0]
 97 | 
 98 |     # You should compute the log likelihood of data as in eq. (12) and (13)
 99 |     # in the PA description
100 |     # Hint: Use scipy.stats.norm.logpdf instead of log(normpdf) to prevent underflow.
101 |     #       You may use log(sum(exp(logProb))) to do addition in the original
102 |     #       space, sum(Prob).
103 |     # Solution Start
104 | 
105 |     
106 |     # Solution End
107 |     return log_likelyhood
108 | 
109 | 
110 | def learn_cpd_given_graph(dataset, G, labels):
111 |     """
112 |     Args:
113 |         dataset: (N, 10, 3), N poses represented by 10 parts in (y, x, alpha)
114 |         G: graph parameterization as explained in PA description.
115 |         labels: (N, 2) true class labels for the examples. labels[i,j]=1 if the
116 |             the ith example belongs to class j and 0 elsewhere
117 | 
118 |     Returns:
119 |         P: dict (explained in PA description, and in README)
120 |         loglikelihood: log-likelihood of the data (scalar)
121 | 
122 |     Copyright (C) Daphne Koller, Stanford Univerity, 2012
123 |     """
124 | 
125 |     N = dataset.shape[0]
126 |     K = labels.shape[1]
127 |     log_likelyhood = 0
128 |     P = {'c': np.zeros(K)}
129 | 
130 |     # estimate parameters
131 |     # fill in P['c'], MLE for class probabilities
132 |     # fill in P['clg'] for each body part and each class
133 |     # choose the right parameterization based on G[i,0]
134 |     # compute the likelihood - you may want to use compute_log_likelyhood
135 |     # you just implemented.
136 | 
137 |     # Solution Start
138 | 
139 |     # Solution End
140 | 
141 |     # Following dummy line is added so that submit.py works even without implementing this
142 |     # function. Kindly comment/remove it once solution is implemented.
143 |     P['clg'] = [{'sigma_x': np.array([]), 'sigma_y': np.array([]), 'sigma_angle': np.array([])}]
144 | 
145 |     return P, log_likelyhood
146 | 
147 | 
148 | def classify_dataset(dataset, labels, P, G):
149 |     """returns the accuracy of the model P and graph G on the dataset
150 | 
151 |     Args:
152 |         dataset: N x 10 x 3, N test instances represented by 10 parts
153 |         labels:  N x 2 true class labels for the instances.
154 |                  labels(i,j)=1 if the ith instance belongs to class j
155 |         P: struct array model parameters (explained in PA description, and in README)
156 |         G: graph structure and parameterization (explained in PA description)
157 | 
158 |     Returns:
159 |         accuracy: fraction of correctly classified instances (scalar)
160 | 
161 |     Copyright (C) Daphne Koller, Stanford Univerity, 2012
162 |     """
163 |     accuracy = 0.
164 |     N = dataset.shape[0]
165 |     K = labels.shape[1]
166 | 
167 |     # Solution Start
168 | 
169 |     # Solution End
170 | 
171 |     return accuracy
172 | 
173 | 
174 | def learn_graph_structure(dataset):
175 |     """
176 |     Args:
177 |         dataset: a (N, 10, 3) numpy array.
178 |     
179 |     Returns:
180 |         A: maximum spanning tree computed from the weight matrix W
181 |         W: 10 x 10 weight matrix, where W(i,j) is the mutual information between
182 |            node i and j. 
183 |     """
184 |     
185 |     N = dataset.shape[0]
186 |     W = np.zeros((10, 10))
187 |     
188 |     # Solution Start
189 | 
190 |     # Solution End
191 |     
192 |     return helper.maximum_spanning_tree(W), W
193 | 
194 | 
195 | def learn_graph_and_cpds(dataset, labels):
196 |     """
197 |     Args:
198 |         dataset: An (N, 10, 3) dim numpy array
199 |         labels: (N, 2) array for class
200 |         
201 |     Returns:
202 |         P: Learned parameters
203 |         G: Learned graph structure
204 |         likelyhood: likelyhood
205 |     """
206 |     N = len(dataset)
207 |     K = labels.shape[1]
208 |     
209 |     G = np.zeros((10, 2, K), dtype=np.int64)
210 |     G[1:, :, :] = 1
211 |     
212 |     # estimate graph structure for each class
213 |     for k in range(K):
214 |         pass
215 |         # fill in G[:,:,k]
216 |         # use helper.convert_A2G to convert a maximum spanning tree to a graph G
217 |         # Solution Start
218 | 
219 |         # Solution End
220 |         
221 |     log_likelyhood = 0
222 |     P = {'c': np.zeros(K)}
223 |     
224 |     P['clg'] = []
225 |     for i in range(10):
226 |         create_mu = False
227 |         create_theta = False
228 |         for k in range(K):
229 |             if G[i, 0, k] == 0:
230 |                 create_mu = True
231 |             else:
232 |                 create_theta = True
233 |         d = {'sigma_y': np.zeros(K),
234 |              'sigma_x': np.zeros(K),
235 |              'sigma_angle': np.zeros(K)}
236 |         if create_mu:
237 |             d['mu_y'], d['mu_x'] = np.zeros(K), np.zeros(K)
238 |             d['mu_angle'] = np.zeros(K)
239 |         else:
240 |             d['mu_x'] = d['mu_y'] = d['mu_angle'] = np.array([])
241 |         
242 |         if create_theta:
243 |             d['theta'] = np.zeros((K, 12))
244 |         else:
245 |             d['theta'] = np.array([])
246 |         P['clg'].append(d)
247 |     
248 |     # Solution Start
249 | 
250 |     # Solution End        
251 |     return P, G, log_likelyhood


--------------------------------------------------------------------------------
/Learning-Tree-Structured-Networks/submit.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | from scipy.io import loadmat
  4 | import numpy as np
  5 | 
  6 | import helper
  7 | import solution as sol
  8 | 
  9 | sys.path.insert(0, '..')
 10 | 
 11 | import commons
 12 | 
 13 | 
 14 | class Grader(commons.SubmissionBase):
 15 |     def __init__(self):
 16 |         part_names = [None, 
 17 |                       'ZWTe6', 'MHKM2', 'ersuQ', 'bzqOa', 
 18 |                       '7vCzT', 'N3abb', 'YMrzB', 'loxgM', 
 19 |                       'izsvG', 'xWOCw', 'pBr7I', 'a7bhx', 
 20 |                       '22gxv', 'Bd2ZE']
 21 |         super().__init__('Learning Tree-structured Networks', 'ZtTFjgNhEeahzAr11F1cUw', part_names)
 22 | 
 23 |     def __iter__(self):
 24 |         for part_id in range(1, len(self.part_names)):
 25 |             try:
 26 |                 if part_id == 1:
 27 |                     mat = loadmat('./data/PA8SampleCases.mat', simplify_cells=True)
 28 |                     mu, sigma = sol.fit_gaussian_parameters(mat['exampleINPUT']['t1a1'])
 29 |                     res = np.array([mu, sigma])
 30 |                 elif part_id == 2:
 31 |                     mat = loadmat('./data/submit_input.mat', simplify_cells=True)
 32 |                     mu, sigma = sol.fit_gaussian_parameters(mat['INPUT']['t1a1'])
 33 |                     res = np.array([mu, sigma])
 34 |                 elif part_id == 3:
 35 |                     mat = loadmat('./data/PA8SampleCases.mat', simplify_cells=True)
 36 |                     beta, sigma = sol.fit_linear_gaussian_parameters(mat['exampleINPUT']['t2a1'],
 37 |                                                                      mat['exampleINPUT']['t2a2'])
 38 |                     res = np.r_[beta, sigma]
 39 |                 elif part_id == 4:
 40 |                     mat = loadmat('./data/submit_input.mat', simplify_cells=True)
 41 |                     beta, sigma = sol.fit_linear_gaussian_parameters(mat['INPUT']['t2a1'],
 42 |                                                                      mat['INPUT']['t2a2'])
 43 |                     res = np.r_[beta, sigma]
 44 |                 elif part_id == 5:
 45 |                     mat = loadmat('./data/PA8SampleCases.mat', simplify_cells=True)
 46 |                     G = mat['exampleINPUT']['t3a2']
 47 |                     G[:, 1] -= 1
 48 |                     res = sol.compute_log_likelihood(mat['exampleINPUT']['t3a1'], G, mat['exampleINPUT']['t3a3'])
 49 |                 elif part_id == 6:
 50 |                     mat = loadmat('./data/submit_input.mat', simplify_cells=True)
 51 |                     G = mat['INPUT']['t3a2']
 52 |                     G[:, 1] -= 1
 53 |                     res = sol.compute_log_likelihood(mat['INPUT']['t3a1'], G, mat['INPUT']['t3a3'])
 54 |                 elif part_id == 7:
 55 |                     mat = loadmat('./data/PA8SampleCases.mat', simplify_cells=True)
 56 |                     G = mat['exampleINPUT']['t4a2']
 57 |                     G[:, 1] -= 1
 58 |                     P, L = sol.learn_cpd_given_graph(mat['exampleINPUT']['t4a1'], G, mat['exampleINPUT']['t4a3'])
 59 |                     res = np.r_[P['c'],
 60 |                                 np.concatenate([clg['sigma_x'] for clg in P['clg']]),
 61 |                                 np.concatenate([clg['sigma_y'] for clg in P['clg']]),
 62 |                                 np.concatenate([clg['sigma_angle'] for clg in P['clg']]), L]
 63 | 
 64 |                 elif part_id == 8:
 65 |                     mat = loadmat('./data/submit_input.mat', simplify_cells=True)
 66 |                     G = mat['INPUT']['t4a2']
 67 |                     G[:, 1] -= 1
 68 |                     P, L = sol.learn_cpd_given_graph(mat['INPUT']['t4a1'], G, mat['INPUT']['t4a3'])
 69 |                     res = np.r_[P['c'],
 70 |                                 np.concatenate([clg['sigma_x'] for clg in P['clg']]),
 71 |                                 np.concatenate([clg['sigma_y'] for clg in P['clg']]),
 72 |                                 np.concatenate([clg['sigma_angle'] for clg in P['clg']]), L]
 73 |                 elif part_id == 9:
 74 |                     mat = loadmat('./data/PA8SampleCases.mat', simplify_cells=True)
 75 |                     G = mat['exampleINPUT']['t5a4']
 76 |                     G[:, 1] -= 1
 77 |                     res = sol.classify_dataset(mat['exampleINPUT']['t5a1'], mat['exampleINPUT']['t5a2'],
 78 |                                                mat['exampleINPUT']['t5a3'], G)
 79 |                 elif part_id == 10:
 80 |                     mat = loadmat('./data/submit_input.mat', simplify_cells=True)
 81 |                     G = mat['INPUT']['t5a4']
 82 |                     G[:, 1] -= 1 
 83 |                     res = sol.classify_dataset(mat['INPUT']['t5a1'], mat['INPUT']['t5a2'],
 84 |                                                mat['INPUT']['t5a3'], G)
 85 |                 elif part_id == 11:
 86 |                     mat = loadmat('./data/PA8SampleCases.mat', simplify_cells=True)
 87 |                     A, W = sol.learn_graph_structure(mat['exampleINPUT']['t6a1'])
 88 |                     res = ' '.join(map(str, A.ravel(order='F')))
 89 |                 elif part_id == 12:
 90 |                     mat = loadmat('./data/submit_input.mat', simplify_cells=True)
 91 |                     A, W = sol.learn_graph_structure(mat['INPUT']['t6a1'])
 92 |                     res = ' '.join(map(str, A.ravel(order='F')))
 93 |                 elif part_id == 13 or part_id == 14:
 94 |                     if part_id == 13:
 95 |                         mat = loadmat('./data/PA8SampleCases.mat', simplify_cells=True)['exampleINPUT']
 96 |                     else:
 97 |                         mat = loadmat('./data/submit_input.mat', simplify_cells=True)['INPUT']
 98 |                     P, G, L = sol.learn_graph_and_cpds(mat['t7a1'], mat['t7a2'])
 99 |                     G[G[:, 0, 0]!=0, 1, 0] += 1
100 |                     G[G[:, 0, 1]!=0, 1, 1] += 1
101 |                     tmp = [P['c']]
102 |                     tmp.extend([P['clg'][i]['sigma_x'] for i in range(10)])
103 |                     tmp.extend([P['clg'][i]['sigma_y'] for i in range(10)])
104 |                     tmp.extend([P['clg'][i]['sigma_angle'] for i in range(10)])
105 |                     tmp.append(G[:, 0, 0]); tmp.append(G[:, 1, 0]);
106 |                     tmp.append(G[:, 0, 1]); tmp.append(G[:, 1, 1]);
107 |                     tmp.append(np.array([L]))
108 |                     tmp = np.concatenate(tmp)
109 |                     res = commons.sprintf("%.4f", tmp)
110 |                 else:
111 |                     raise KeyError
112 |                 yield self.part_names[part_id], res
113 |             except KeyError:
114 |                 yield self.part_names[part_id], 0
115 | 
116 | 
117 | if __name__ == '__main__':
118 |     grader = Grader()
119 |     grader.grade()


--------------------------------------------------------------------------------
/Learning-With-Incomplete-Data/helper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import itertools
  3 | 
  4 | import numpy as np
  5 | from scipy.special import logsumexp
  6 | import cv2
  7 | import matplotlib.pyplot as plt
  8 | from matplotlib.animation import FuncAnimation
  9 | import matplotlib as mpl
 10 | 
 11 | sys.path.append('..')
 12 | from commons.factor import Factor
 13 | 
 14 | from vis_helper import *
 15 | 
 16 | 
 17 | def wavg(x, w, axis=None):
 18 |     if axis is None:
 19 |         return np.average(x, weights=w)
 20 |     return np.average(x, weights=w, axis=axis)
 21 | 
 22 | 
 23 | def fit_g(x, w):
 24 |     """
 25 |     Args:
 26 |         x: numpy array with shape (N, )
 27 |         w: weights of shape (N, )
 28 |         
 29 |     Returns:
 30 |         mu, sigma
 31 |     """
 32 |     
 33 |     mu = wavg(x, w)
 34 |     sigma = np.sqrt(wavg(x*x, w) - mu**2)
 35 |     
 36 |     return mu, sigma
 37 | 
 38 | 
 39 | 
 40 | 
 41 | def fit_lg(x, u, w):
 42 |     M, N = len(x), u.shape[1]
 43 |     beta = np.zeros(N+1)
 44 |     sigma = 1
 45 | 
 46 |     A = np.zeros((N+1, N+1))
 47 |     A[0, :-1] = np.average(u, weights=w, axis=0)
 48 |     A[0, -1] = 1
 49 | 
 50 |     A[1:, -1] = A[0, :-1]
 51 |     for i in range(N):
 52 |         A[i+1, :-1] = np.average(u * u[:, i][:, None], weights=w, axis=0)
 53 | 
 54 |     B = np.zeros(N+1)
 55 |     B[0] = np.average(x, weights=w)
 56 |     B[1:] = np.average(u * x[:, None], weights=w, axis=0)
 57 | 
 58 |     beta = np.linalg.solve(A, B)
 59 |     
 60 |     var = wavg(x*x, w) - wavg(x, w)**2
 61 | 
 62 |     for i in range(N):
 63 |         for j in range(N):
 64 |             cov = wavg(u[:, i]*u[:, j], w) - wavg(u[:, i], w)*wavg(u[:, j], w)
 65 |             var -= beta[i] * beta[j] * cov
 66 |     if var < 0:
 67 |         var = 0.
 68 |     sigma = np.sqrt(var)
 69 |     
 70 |     if sigma == 0:
 71 |         sigma = .01;
 72 |     else:
 73 |         sigma = sigma + .01
 74 | 
 75 |     return beta, sigma
 76 |  
 77 | 
 78 | # Clique Tree related
 79 | 
 80 | def create_clique_tree_hmm(factors):
 81 |     max_var = max(max(f.vars) for f in factors)
 82 |     card = len(factors[0].domains[0])
 83 |     tree = {
 84 |         'clique_list': [Factor([i, i+1], [card, card], init=1) for i in range(max_var)], 
 85 |         'adj_list': {i: set() for i in range(max_var)}
 86 |     }
 87 |     
 88 |     for i in range(max_var):
 89 |         if i > 0:
 90 |             tree['adj_list'][i].add(i-1)
 91 |             tree['adj_list'][i-1].add(i)
 92 |         if i < max_var-1:
 93 |             tree['adj_list'][i].add(i+1)
 94 |             tree['adj_list'][i+1].add(i)
 95 |     
 96 |     for f in factors:
 97 |         if len(f.vars) == 1:
 98 |             if f.vars[0] == 0:
 99 |                 clique_idx = 0
100 |             else:
101 |                 clique_idx = f.vars[0]-1
102 |         else:
103 |             clique_idx = min(f.vars)
104 |             
105 |         tree['clique_list'][clique_idx] = tree['clique_list'][clique_idx] + f
106 |     return tree
107 |         
108 |         
109 | def get_next_clique(clique_tree, msgs):
110 |     adj = clique_tree['adj_list']
111 | 
112 |     for u in adj:
113 |         n_neighbours = len(adj[u])
114 |         for v in adj[u]:
115 |             if u not in msgs[v] and sum(1 for w in msgs[u] if v != w) == n_neighbours - 1:
116 |                 return u, v
117 |     return None
118 | 
119 | 
120 | def log_marginalise(factor, vars_to_marginalise):
121 |     new_factor = factor.dummy_marginalise(vars_to_marginalise)
122 |     new_vars_idx = [i for i, v in enumerate(factor.vars) if v not in vars_to_marginalise]
123 | 
124 |     tmp_dict = {}
125 |     for assignment in itertools.product(*new_factor.domains):
126 |         tmp_dict[assignment] = []
127 | 
128 |     for assignment in itertools.product(*factor.domains):
129 |         new_assignment = tuple(assignment[i] for i in new_vars_idx)
130 |         tmp_dict[new_assignment].append(factor.val[assignment])
131 |         
132 |     for assignment, values in tmp_dict.items():
133 |         new_factor[assignment] = logsumexp(values)
134 |     
135 |     return new_factor
136 | 
137 | 
138 | def clique_tree_calibrate(clique_tree, is_max=0):
139 |     # Note: msgs[u] = {v: msg_from_v_to_u}
140 |     msgs = {i: {} for i in range(len(clique_tree['clique_list']))}
141 |     adj = clique_tree['adj_list']
142 |     cliques = clique_tree['clique_list']
143 | 
144 | 
145 |     while True:
146 |         ready_edge = get_next_clique(clique_tree, msgs)
147 |         if ready_edge is None:
148 |             break
149 |         u, v = ready_edge
150 |         msg = cliques[u]
151 |         for w in adj[u]:
152 |             if w == v:
153 |                 continue
154 |             msg = msg + msgs[u][w]
155 |             
156 |         diff_set = set(msg.vars) - (set(msg.vars) & set(cliques[v].vars))
157 | 
158 |         msg = log_marginalise(msg, diff_set)
159 | #         z = logsumexp(list(msg.val.values()))
160 | #         for assignment in msg:
161 | #             msg[assignment] -= z
162 |         msgs[v][u] = msg
163 | 
164 |     calibrated_potentials = []
165 |     for i in range(len(cliques)):
166 |         factor = cliques[i]
167 |         for msg in msgs[i].values():
168 |             factor = factor + msg
169 |         calibrated_potentials.append(factor)
170 | 
171 |     return {'clique_list': calibrated_potentials, 'adj_list': adj}
172 | 
173 | 
174 | def compute_exact_marginals_hmm(factors):
175 |     clique_tree = create_clique_tree_hmm(factors)
176 |     calibrated_tree = clique_tree_calibrate(clique_tree)
177 |     calibrated_cliques = calibrated_tree['clique_list']
178 |     
179 |     variables = set()
180 |     for f in factors:
181 |         variables.update(f.vars)
182 |     variables = sorted(list(variables))
183 |     
184 |     marginals = [None]*len(variables)
185 |     for var in variables:
186 |         if var == 0:
187 |             clique = calibrated_cliques[0]
188 |             clique = log_marginalise(clique, {1, })
189 |         else:
190 |             clique = calibrated_cliques[var-1]
191 |             clique = log_marginalise(clique, {var-1, })
192 |         
193 |         z = logsumexp(list(clique.val.values()))
194 |         for k in clique.val:
195 |             clique.val[k] -= z
196 |         
197 |         marginals[var] = clique
198 |         
199 |     return marginals, calibrated_cliques
200 |         
201 |             
202 |         


--------------------------------------------------------------------------------
/Learning-With-Incomplete-Data/sol.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import numpy as np
  4 | from scipy.stats import norm
  5 | from scipy.special import logsumexp
  6 | 
  7 | import helper
  8 | sys.path.append('..')
  9 | from commons.factor import Factor
 10 | 
 11 | def em_cluster(pose_data, G, initial_class_prob, max_iter):
 12 |     """
 13 |     Args:
 14 |         pose_data: (N, 10, 3) array, where N is number of poses;
 15 |             pose_data[i,:,:] yields the (10, 3) matrix for pose i.
 16 |         G: graph parameterization as explained in PA8
 17 |         initial_class_prob: (N, K), initial allocation of the N poses to the K
 18 |           classes. initial_class_prob[i, j] is the probability that example i belongs
 19 |           to class j
 20 |         max_iter: max number of iterations to run EM
 21 |     
 22 |     Returns:
 23 |         (P, log_likelihood, class_prob):
 24 |             P: dict holding the learned parameters as described in previous Python PA
 25 |             log_likelihood: (#(iterations run), 1) vector of loglikelihoods stored for
 26 |                 each iteration
 27 |             class_prob:  (N, K) conditional class probability of the N examples to the
 28 |                 K classes in the final iteration. class_prob[i, j] is the probability that
 29 |                 example i belongs to class j
 30 |             
 31 |     
 32 |     Copyright (C) Daphne Koller, Stanford Univerity, 2012
 33 |     """
 34 |     
 35 |     N = pose_data.shape[0]
 36 |     K = initial_class_prob.shape[1]
 37 |     
 38 |     class_prob = initial_class_prob.copy()
 39 |     log_likelihood = np.zeros(max_iter)
 40 |     P = {'c': np.zeros(K)}
 41 |     
 42 |     # Following four lines are added to make grader work.
 43 |     # Remove once you have your implementation
 44 |     clg = []
 45 |     for i in range(10):
 46 |         clg.append({'sigma_x': [], 'sigma_y': [], 'sigma_angle': []})
 47 |     P['clg'] = clg
 48 |     
 49 |     for iter in range(max_iter):
 50 |         pass
 51 |         # M-STEP to estimate parameters for Gaussians
 52 |         #
 53 |         # Fill in P['c'] with the estimates for prior class probabilities
 54 |         # Fill in P['clg'] for each body part and each class
 55 |         # Make sure to choose the right parameterization based on G[i, 0]
 56 |         #
 57 |         # Hint: This part should be similar to your work from PA8
 58 |         
 59 |         ################
 60 |         # Your Code Here
 61 |         ################
 62 |             
 63 |         # E-STEP to re-estimate class_prob using the new parameters
 64 |         # 
 65 |         # Update class_prob with the new conditional class probabilities.
 66 |         # Recall that class_prob[i, j] is the probability that example i belongs to
 67 |         # class j.
 68 |         # 
 69 |         # You should compute everything in log space, and only convert to
 70 |         # probability space at the end.
 71 |         # 
 72 |         # Tip: Consider scipy.stats.norm for log pdf computation.
 73 |         # 
 74 |         # Hint: You should use the scipy.special.logsumexp(already imported)
 75 |         # function here to do probability normalization in log space 
 76 |         # to avoid numerical issues.
 77 |         class_prob = np.zeros_like(class_prob)
 78 |         
 79 |         ################
 80 |         # Your Code Here
 81 |         ################
 82 |        
 83 |         # Compute log likelihood of dataset for this iteration
 84 |         print("EM Iteration %d: log likelihood %f" % (iter, log_likelihood[iter]))
 85 |         
 86 |         if iter > 0 and log_likelihood[iter] < log_likelihood[iter-1]:
 87 |             break
 88 | 
 89 |     log_likelihood = log_likelihood[:iter+1]
 90 |     return P, log_likelihood, class_prob
 91 | 
 92 | 
 93 | def em_hmm(action_data, pose_data, G, initial_class_prob, initial_pair_prob, max_iter):
 94 |     """
 95 |     Args:
 96 |       action_data: list holding the actions as described in the PA
 97 |       pose_data: (N,10,3) numpy array, where N is number of poses in all actions
 98 |       G: graph parameterization as explained in PA description
 99 |       initial_class_prob: (N, K) numpy array, initial allocation of the N poses to the K
100 |         states. initial_class_prob[i,j] is the probability that example i belongs
101 |         to state j.
102 |         This is described in more detail in the PA.
103 |       initial_pair_prob: (V, K^2) numpy array, where V is the total number of pose
104 |         transitions in all HMM action models, and K is the number of states.
105 |         This is described in more detail in the PA.
106 |       max_iter: max number of iterations to run EM
107 | 
108 |     Returns:
109 |       P: dict holding the learned parameters as described in the PA
110 |       log_likelihood: #(iterations run) x 1 vector of loglikelihoods stored for
111 |         each iteration
112 |       class_prob: (N, K) numpy array of the conditional class probability of the N examples to the
113 |         K states in the final iteration. class_prob[i,j] is the probability that
114 |         example i belongs to state j. This is described in more detail in the PA.
115 |       pair_prob: (V, K^2) numpy array, where V is the total number of pose transitions
116 |         in all HMM action models, and K is the number of states. This is
117 |         described in more detail in the PA.
118 | 
119 |     Copyright (C) Daphne Koller, Stanford Univerity, 2012
120 |     """
121 |     
122 |     N = pose_data.shape[0]
123 |     K = initial_class_prob.shape[1]
124 |     L = len(action_data)
125 |     V = initial_pair_prob.shape[0]
126 |     
127 |     
128 |     
129 |     first_pose_idxs = np.array([a['marg_ind'][0] for a in action_data])
130 |     
131 |     class_prob = initial_class_prob
132 |     pair_prob = initial_pair_prob
133 |     log_likelihood = np.zeros(max_iter)
134 |     
135 |     P = {'c': np.zeros(K)}
136 |     
137 |     # Following four lines are added to make grader work.
138 |     # Remove once you have your implementation
139 |     clg = []
140 |     for i in range(10):
141 |         clg.append({'sigma_x': [], 'sigma_y': [], 'sigma_angle': []})
142 |     P['clg'] = clg
143 |     
144 |     for iter in range(max_iter):
145 |         P['c'] = np.zeros(K)
146 |         # M-STEP to estimate parameters for Gaussians
147 |         # Fill in P['c'], the initial state prior probability 
148 |         # (NOT the class probability as in PA8 and em_cluster)
149 |         # Fill in P['clg'] for each body part and each class
150 |         # Make sure to choose the right parameterization based on G[i, 1]
151 |         # Hint: This part should be similar to your work from PA8 and em_cluster
152 |         
153 |         ################
154 |         # Your Code Here
155 |         ################
156 |          
157 |         # M-STEP to estimate parameters for transition matrix
158 |         # Fill in P['transMatrix'], the transition matrix for states
159 |         # P['transMatrix'][i,j] is the probability of transitioning from state i to state j
160 |         
161 |         # Add Dirichlet prior based on size of poseData to avoid 0 probabilities
162 |         P['transMatrix'] = np.zeros((K,K)) + pair_prob.shape[0] * .05
163 |     
164 |         ################
165 |         # Your Code Here
166 |         ################
167 |         
168 |         # E-STEP preparation: compute the emission model factors
169 |         # (emission probabilities) in log space for each 
170 |         # of the poses in all actions = log( P(Pose | State) )
171 |         # Hint: This part should be similar to (but NOT the same as) your code in em_cluster
172 |         
173 |         log_emission_prob = np.zeros((N,K))
174 |         
175 |         ################
176 |         # Your Code Here
177 |         ################
178 |         
179 |         # E-STEP to compute expected sufficient statistics
180 |         # class_prob contains the conditional class probabilities 
181 |         # for each pose in all actions
182 |         # pair_prob contains the expected sufficient statistics 
183 |         # for the transition CPDs (pairwise transition probabilities)
184 |         # Also compute log likelihood of dataset for this iteration
185 |         # You should do inference and compute everything in log space, 
186 |         # only converting to probability space at the end
187 |         # Hint: You should use the logsumexp function here to do 
188 |         # probability normalization in log space to avoid numerical issues
189 | 
190 |         class_prob = np.zeros((N,K))
191 |         pair_prob = np.zeros((V,K*K))
192 |         log_likelihood[iter] = 0
193 |         
194 |         ################
195 |         # Your Code Here
196 |         ################
197 |         
198 |         print('EM iteration %d: log likelihood: %f' % (iter, log_likelihood[iter]))
199 |         if iter > 0 and log_likelihood[iter] < log_likelihood[iter-1]:
200 |             break
201 |         
202 |     log_likelihood = log_likelihood[:iter+1]    
203 |     return P, log_likelihood, class_prob, pair_prob
204 | 
205 | 
206 | def recognize_actions(dataset_train, dataset_test, G, max_iter, return_Ps=False):
207 |     """
208 |     Args:
209 |         dataset_train: dataset for training models, see PA for details
210 |         dataset_test: dataset for testing models, see PA for details
211 |         G: graph parameterization as explained in PA decription
212 |         max_iter: max number of iterations to run for EM
213 |         return_Ps: (Not in the original assignment), If True, also return Ps.
214 |         
215 |     Returns:
216 |         accuracy: recognition accuracy, defined as (#correctly classified examples / #total examples)
217 |         predicted_labels: (N,) shaped numpy array with the predicted labels for
218 |             each of the instances in dataset_test, with N being the number of unknown test instances
219 |             
220 |     Copyright (C) Daphne Koller, Stanford Univerity, 2012
221 |     """
222 |     accuracy = 0.0
223 |     pred = np.zeros(3, dtype=int) # replace this line with appropriate line
224 |     
225 |     # Train a model for each action
226 |     # Note that all actions share the same graph parameterization and number of max iterations
227 |     
228 |     ################
229 |     # Your Code Here
230 |     ################    
231 |     
232 |     
233 |     # Classify each of the instances in dataset_test
234 |     # Compute and return the predicted labels and accuracy
235 |     # Accuracy is defined as (#correctly classified examples / #total examples)
236 |     # Note that all actions share the same graph parameterization
237 | 
238 |     ################
239 |     # Your Code Here
240 |     ################
241 | 
242 |     
243 |     if return_Ps:
244 |         return accuracy, pred, Ps
245 |     else:
246 |         return accuracy, pred


--------------------------------------------------------------------------------
/Learning-With-Incomplete-Data/submit.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | from scipy.io import loadmat
 5 | import numpy as np
 6 | 
 7 | import helper
 8 | import sol
 9 | 
10 | sys.path.insert(0, '..')
11 | 
12 | import commons
13 | 
14 | def load_mat(part_id):
15 |     if part_id%2 == 1:
16 |         mat = loadmat('./data/PA9SampleCases.mat', simplify_cells=True)['exampleINPUT']
17 |     else:
18 |         mat = loadmat('./data/submit_input.mat', simplify_cells=True)['INPUT']
19 |     return mat
20 |         
21 | class Grader(commons.SubmissionBase):
22 |     def __init__(self):
23 |         part_names = [None, 
24 |                       'BwB8H', 'CWFts',
25 |                       '0tRrx', 'EgIZ9',
26 |                       'Ev05z', 'zPvH5',
27 |                       'BGZlk']
28 |         super().__init__('Learning with Incomplete Data', 'HRaFgotIEeaoKxJCmMZ6SQ', part_names)
29 | 
30 |     def __iter__(self):
31 |         for part_id in range(1, len(self.part_names)):
32 |             try:
33 |                 if part_id in (1, 2):
34 |                     mat = load_mat(part_id)
35 |                     G = mat['t1a2']
36 |                     G[1:, 1] -= 1
37 |                     P, log_likelihood, class_prob = sol.em_cluster(mat['t1a1'], G, mat['t1a3'], mat['t1a4'])
38 |                     tmp = np.r_[P['c'], 
39 |                                 np.concatenate([clg['sigma_x'] for clg in P['clg']]),
40 |                                 np.concatenate([clg['sigma_y'] for clg in P['clg']]),
41 |                                 np.concatenate([clg['sigma_angle'] for clg in P['clg']]),
42 |                                 log_likelihood, np.ravel(class_prob, order='F')]
43 |                     res = commons.sprintf("%.4f", tmp)
44 |                 elif part_id in (3, 4):
45 |                     mat = load_mat(part_id)
46 |                     action_data = mat['t2a1']
47 |                     for a in action_data:
48 |                         a['marg_ind'] -= 1
49 |                         a['pair_ind'] -= 1
50 |                     G = mat['t2a3']
51 |                     G[1:, 1] -= 1
52 |                     P, log_likelihood, class_prob, pair_prob = sol.em_hmm(action_data, mat['t2a2'], G, mat['t2a4'], mat['t2a5'], mat['t2a6'])
53 |                     tmp = np.r_[P['c'], 
54 |                                 np.concatenate([clg['sigma_x'] for clg in P['clg']]),
55 |                                 np.concatenate([clg['sigma_y'] for clg in P['clg']]),
56 |                                 np.concatenate([clg['sigma_angle'] for clg in P['clg']]),
57 |                                 log_likelihood, 
58 |                                 np.ravel(class_prob, order='F'), 
59 |                                 np.ravel(pair_prob, order='F')]
60 |                     res = commons.sprintf("%.4f", tmp)
61 |                 elif part_id in (5, 6):
62 |                     mat = load_mat(part_id)
63 |                     for actionData in mat['t3a1']:
64 |                         for action in actionData['actionData']:
65 |                             action['marg_ind'] -= 1
66 |                             action['pair_ind'] -= 1
67 | 
68 |                     for action in mat['t3a2']['actionData']:
69 |                         action['marg_ind'] -= 1
70 |                         action['pair_ind'] -= 1
71 |                     mat['t3a2']['labels'] -= 1
72 |                     
73 |                     G = mat['t3a3']
74 |                     G[1:, 1] -= 1
75 |                     acc, pred = sol.recognize_actions(mat['t3a1'], mat['t3a2'], G, mat['t3a4'])
76 |                     tmp = np.r_[acc, pred+1]
77 |                     res = commons.sprintf("%.4f", tmp)
78 |                 elif part_id == 7:
79 |                     if not os.path.isfile('./Predictions.npy'):
80 |                         print("Warning: Prediction.py is not generated. Not grading it.")
81 |                         res = 0
82 |                     else:
83 |                         mat = np.load('./Predictions.npy')
84 |                         if max(mat) < 3:
85 |                             mat += 1
86 |                         tmp = np.r_[len(mat), mat]
87 |                         res = '\n'.join(map(str, tmp))
88 |                 else:
89 |                     raise KeyError
90 |                 yield self.part_names[part_id], res
91 |             except KeyError:
92 |                 yield self.part_names[part_id], 0
93 | 
94 | 
95 | if __name__ == '__main__':
96 |     grader = Grader()
97 |     grader.grade()


--------------------------------------------------------------------------------
/Learning-With-Incomplete-Data/vis_helper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import itertools
  3 | 
  4 | import numpy as np
  5 | from scipy.special import logsumexp
  6 | import cv2
  7 | import matplotlib.pyplot as plt
  8 | from matplotlib.animation import FuncAnimation
  9 | import matplotlib as mpl
 10 | 
 11 | 
 12 | def show_pose(pose):
 13 |     """
 14 |     Original Authors: Huayan Wang, Andrew Duchi
 15 |     """
 16 |     pose[:, 0] += 100
 17 |     pose[:, 1] += 150
 18 | 
 19 |     part_length = np.array([60, 20, 32, 33, 32, 33, 46, 49, 46, 49])
 20 |     part_width = np.array([18, 10, 7, 5, 7, 5, 10, 7, 10, 7])
 21 | 
 22 |     img = np.zeros((300, 300), dtype=np.uint8)
 23 | 
 24 |     for i in range(10):
 25 |         startpt = pose[i, :2].round().astype(int)
 26 |         axis = np.r_[np.sin(pose[i, 2] - np.pi / 2), np.cos(pose[i, 2] - np.pi / 2)]
 27 |         xaxis = np.r_[np.cos(pose[i, 2] - np.pi / 2), -np.sin(pose[i, 2] - np.pi / 2)]
 28 |         endpt = (startpt + part_length[i] * axis).round().astype(int)
 29 | 
 30 |         corner1 = (startpt + xaxis * part_width[i]).round().astype(int)
 31 |         corner2 = (startpt - xaxis * part_width[i]).round().astype(int)
 32 |         corner3 = (endpt + xaxis * part_width[i]).round().astype(int)
 33 |         corner4 = (endpt - xaxis * part_width[i]).round().astype(int)
 34 | 
 35 |         img = cv2.line(img, tuple(corner1[::-1]), tuple(corner2[::-1]), 255, 2)
 36 |         img = cv2.line(img, tuple(corner3[::-1]), tuple(corner4[::-1]), 255, 2)
 37 |         img = cv2.line(img, tuple(corner1[::-1]), tuple(corner3[::-1]), 255, 2)
 38 |         img = cv2.line(img, tuple(corner2[::-1]), tuple(corner4[::-1]), 255, 2)
 39 | 
 40 |         img = cv2.rectangle(img, tuple(startpt[::-1] - 4), tuple(startpt[::-1] + 4), 255, -1)
 41 |     return img
 42 | 
 43 | 
 44 | def sample_pose(P, G, k):
 45 |     """
 46 |     Args:
 47 |         P:
 48 |         G:
 49 |         k: None for unknown class, else label $\in$ 0, 1, 2, ... , k-1
 50 |     """
 51 |     sample = np.zeros((10, 3))
 52 |     if k is None:
 53 |         k = np.random.choice(len(P['c']), p=P['c'])
 54 |         
 55 |     remaining = set(range(10))
 56 |     while remaining:
 57 |         i = remaining.pop()
 58 |         clg = P['clg'][i]
 59 |         par = G[i, 1]
 60 |         
 61 |         if G[i, 0] == 0:
 62 |             sample[i, 0] = clg['mu_y'][k] + clg['sigma_y'][k]*np.random.randn()
 63 |             sample[i, 1] = clg['mu_x'][k] + clg['sigma_x'][k]*np.random.randn()
 64 |             sample[i, 2] = clg['mu_angle'][k] + clg['sigma_angle'][k]*np.random.randn()
 65 |         elif G[i, 0] == 1:
 66 |             if par in remaining:
 67 |                 remaining.add(i)
 68 |                 continue
 69 |                 
 70 |             muy = (clg['theta'][k,0] + 
 71 |                    clg['theta'][k,1] * sample[par,0] +
 72 |                    clg['theta'][k,2] * sample[par,1] +
 73 |                    clg['theta'][k,3] * sample[par,2])
 74 |             mux = (clg['theta'][k,4] +
 75 |                    clg['theta'][k,5] * sample[par,0] +
 76 |                    clg['theta'][k,6] * sample[par,1] +
 77 |                    clg['theta'][k,7] * sample[par,2])
 78 |             muangle = (clg['theta'][k,8] + 
 79 |                    clg['theta'][k,9] * sample[par,0] +
 80 |                    clg['theta'][k,10] * sample[par,1] +
 81 |                    clg['theta'][k,11] * sample[par,2])
 82 |             
 83 |             sample[i, 0] = muy + clg['sigma_y'][k]*np.random.randn()
 84 |             sample[i, 1] = mux + clg['sigma_x'][k]*np.random.randn()
 85 |             sample[i, 2] = muangle + clg['sigma_angle'][k]*np.random.randn()
 86 |         elif G[i, 0] == 2:
 87 |             if par in remaining:
 88 |                 remaining.add(i)
 89 |                 continue
 90 |                 
 91 |             muy = (clg['gamma'][k,0] + 
 92 |                    clg['gamma'][k,1] * sample[par,0] +
 93 |                    clg['gamma'][k,2] * sample[par,1] +
 94 |                    clg['gamma'][k,3] * sample[par,2])
 95 |             mux = (clg['gamma'][k,4] +
 96 |                    clg['gamma'][k,5] * sample[par,0] +
 97 |                    clg['gamma'][k,6] * sample[par,1] +
 98 |                    clg['gamma'][k,7] * sample[par,2])
 99 |             muangle = (clg['gamma'][k,8] + 
100 |                    clg['gamma'][k,9] * sample[par,0] +
101 |                    clg['gamma'][k,10] * sample[par,1] +
102 |                    clg['gamma'][k,11] * sample[par,2])
103 |             
104 |             sample[i, 0] = muy + clg['sigma_y'][k]*np.random.randn()
105 |             sample[i, 1] = mux + clg['sigma_x'][k]*np.random.randn()
106 |             sample[i, 2] = muangle + clg['sigma_angle'][k]*np.random.randn()
107 |     return sample
108 |             
109 |             
110 | def visualize_models(P, G):
111 |     K = len(P['c'])
112 |     figs = []
113 |     for k in range(K):
114 |         if G.ndim == 2:
115 |             pose = sample_pose(P, G, k)
116 |         else:
117 |             pose = sample_pose(P, G[:, :, k], k)
118 |         pose = show_pose(pose)
119 |         figs.append(pose)
120 |     return figs
121 | 
122 | 
123 | def visualize_dataset(dataset):
124 |     images = []
125 |     for pose in dataset:
126 |         images.append(show_pose(pose))
127 |     return images
128 | 
129 | 
130 | def create_html5_animation(*images, labels=None, nframes=10000, interval=500):
131 |     nrows = len(images)
132 |     nframes = min(nframes, min(map(len, images)))
133 |     width = 3
134 |     height = nrows*3
135 |     
136 |     fig, axs = plt.subplots(nrows, 1, figsize=(width, height), squeeze=False)
137 |     ims = []
138 |     for i in range(nrows):
139 |         ims.append(axs[i, 0].imshow(images[i][0], cmap='binary'))
140 |         axs[i, 0].set_axis_off()
141 |         if labels is not None:
142 |             axs[i, 0].set_title(labels[i])
143 |     plt.tight_layout()
144 |     
145 |     def init():
146 |         for i in range(nrows):
147 |             ims[i].set_data(images[i][0])
148 |     
149 |     def animate(j, *args, **kwargs):
150 |         for i in range(nrows):
151 |             ims[i].set_data(images[i][j])
152 |         
153 |     ani = FuncAnimation(fig, animate, frames=nframes, init_func=init, interval=interval)
154 |     ani_html = ani.to_html5_video();
155 |     plt.close()
156 |     return ani_html
157 | 


--------------------------------------------------------------------------------
/Markov-Networks-for-OCR/.gitignore:
--------------------------------------------------------------------------------
1 | factors.fg
2 | inf.log
3 | 


--------------------------------------------------------------------------------
/Markov-Networks-for-OCR/README.md:
--------------------------------------------------------------------------------
1 | Although the assignment is worth 90 marks,
2 | due a bug at Coursera, triplet factor part fails in grader script,
3 | (see: Discussions for this week).
4 | 
5 | Thus you will only be able to achieve 75 marks. Though this is enough for honors.
6 | 
7 | To see if your implementation was correct for that part, use `check.ipynb`.
8 | 


--------------------------------------------------------------------------------
/Markov-Networks-for-OCR/convert_mats.py:
--------------------------------------------------------------------------------
 1 | from string import ascii_lowercase
 2 | import pickle
 3 | from collections import namedtuple
 4 | import os, sys
 5 | 
 6 | from scipy.io import loadmat
 7 | import numpy as np
 8 | 
 9 | 
10 | sys.path.insert(0, os.path.abspath(os.path.pardir))
11 | import commons
12 | from commons import utils
13 | from commons.factor import Factor
14 | 
15 | 
16 | def pa3_data():
17 |     allWords = loadmat('./data/PA3Data.mat')['allWords']
18 | 
19 |     data = []
20 |     for datum in allWords:
21 |         datum = datum[0]
22 |         datum_new = {'img': [], 'ground_truth': [], 'word': []}
23 |         for img, ground_truth in zip(datum['img'], datum['groundTruth']):
24 |             img = img[0]
25 |             ground_truth = ground_truth[0].item() - 1 # matlab as 1 based index
26 |             datum_new['img'].append(img)
27 |             datum_new['ground_truth'].append(ground_truth)
28 |             datum_new['word'].append(ascii_lowercase[ground_truth])
29 |         data.append(datum_new)
30 | 
31 |     return data
32 | 
33 | 
34 | def pa3_models():
35 |     mat = loadmat('./data/PA3Models.mat', simplify_cells=True)
36 | 
37 |     data = {
38 |         'image_model': mat['imageModel'],
39 |         'pairwise_model': mat['pairwiseModel'],
40 |         'triplet_list': []
41 |     }
42 | 
43 |     for triplet in mat['tripletList']:
44 |         chars = tuple(triplet['chars']-1)  # 'a' is zero now and not 1
45 |         factor_val = triplet['factorVal']
46 |         data['triplet_list'].append((chars, factor_val))
47 | 
48 |     return data
49 | 
50 | 
51 | def pa3_sample_cases(is_test=False):
52 |     if is_test:
53 |         mat = loadmat('./data/PA3TestCases.mat', simplify_cells=True)
54 |     else:
55 |         mat = loadmat('./data/PA3SampleCases.mat', simplify_cells=True)
56 |     out = {}
57 |     for k, v in mat.items():
58 |         if k.startswith('__'):
59 |             continue
60 |         if k.lower().endswith('imagesinput'):
61 |             char_new = {'img': [], 'ground_truth': [], 'word': []}
62 |             for char in v:
63 |                 char_new['img'].append(char['img'])
64 |                 if 'ground_truth' in char:
65 |                     ground_truth = char['groundTruth'] - 1 # matlab as 1 based index
66 |                     char_new['ground_truth'].append(ground_truth)
67 |                     char_new['word'].append(ascii_lowercase[ground_truth])
68 |             out['part%s_sample_image_input'%k[4]] = char_new
69 |         elif k.lower().endswith('factorsoutput'):
70 |             factors = []
71 |             for factor_dict in v:
72 |                 factors.append(Factor.from_matlab(factor_dict))
73 |             out['part%s_sample_factors_output'%k[4]] = factors
74 |         elif k.lower().endswith('factoroutput'):
75 |             factor = Factor.from_matlab(v)
76 |             out['part%s_sample_factor_output'%k[4]] = factor
77 |         elif k.lower().endswith('factorsinput'):
78 |             factors = []
79 |             for factor_dict in v:
80 |                 factors.append(Factor.from_matlab(factor_dict))
81 |             out['part%s_sample_factors_input'%k[4]] = factors
82 |         else:
83 |             raise NotImplementedError(k)
84 | 
85 |     return out
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     pa3_data()
90 |     pa3_models()


--------------------------------------------------------------------------------
/Markov-Networks-for-OCR/helper.py:
--------------------------------------------------------------------------------
  1 | import subprocess
  2 | import itertools
  3 | from string import ascii_lowercase
  4 | 
  5 | import numpy as np
  6 | 
  7 | import sol
  8 | 
  9 | 
 10 | def factors_to_fg(factors, file_path):
 11 |     """Converts factor graph into .fg file
 12 | 
 13 |     This should only work if your factor names are
 14 |     integer and domains of that are also integers
 15 |     in the range of [0, cardinality(variable))
 16 | 
 17 |     Luckily PGM course assignments has this structure.
 18 |     """
 19 |     lines = [len(factors), ""]
 20 | 
 21 |     for f in factors:
 22 |         lines.append("%d" % (len(f.vars, )))
 23 |         lines.append(" ".join(map(str, f.vars)))
 24 |         lines.append(" ".join(str(len(d)) for d in f.domains))
 25 |         placeholder_idx = len(lines)
 26 |         lines.append(None)  # will be replace by nonzero count once we know
 27 | 
 28 |         domains = reversed(f.domains)
 29 |         # libDAI expects first variable to change fastest
 30 |         # but itertools.product changes the last element fastest
 31 |         # hence reversed list
 32 |         n_nonzero = 0
 33 |         for i, assignment in enumerate(itertools.product(*domains)):
 34 |             assignment = tuple(reversed(assignment))
 35 |             # if abs(f[assignment]) < 1e-5:
 36 |             #     continue
 37 |             n_nonzero += 1
 38 |             line = "%d %.9g" % (i, f[assignment])
 39 |             lines.append(line)
 40 |         lines[placeholder_idx] = "%d" % (n_nonzero, )
 41 |         lines.append("")
 42 | 
 43 |     with open(file_path, 'wt') as f:
 44 |         for line in lines:
 45 |             print(line, file=f)
 46 | 
 47 | 
 48 | def image_similarity(img1, img2):
 49 |     mean_sim = 0.283  # Avg sim score computed over held - out data.
 50 |     img1, img2 = img1.reshape(-1), img2.reshape(-1)
 51 |     cosine_dist = (img1 @ img2)/(np.sqrt((img1**2).sum()) * np.sqrt((img2**2).sum()))
 52 |     diff = (cosine_dist - mean_sim)**2
 53 | 
 54 |     if cosine_dist > mean_sim:
 55 |         return 1 + 5*diff
 56 |     else:
 57 |         return 1 / (1 + 5*diff)
 58 | 
 59 | 
 60 | def compute_image_factor(img, img_model):
 61 |     img = np.array(img)
 62 |     img = img.T.reshape(-1)  # matlab is column first oriented!
 63 |     X = img
 64 |     N = len(X)
 65 |     K = img_model['K']
 66 | 
 67 |     theta = np.array(img_model['params'][:N*(K-1)]).reshape(N, K-1).T
 68 |     bias = np.array(img_model['params'][N*(K-1):]).reshape(-1)
 69 | 
 70 |     W = (theta @ X) + bias
 71 |     W = np.concatenate([W, [0]])
 72 |     W -= W.max()
 73 |     W = np.exp(W)
 74 |     return W/W.sum()
 75 | 
 76 | 
 77 | def run_inference(factors):
 78 |     fg_path = './factors.fg'
 79 |     factors_to_fg(factors, fg_path)
 80 | 
 81 |     output = subprocess.run(['./inference/doinference-linux', './factors.fg', 'map'], 
 82 |                             text=True, capture_output=True)
 83 | 
 84 |     if output.returncode != 0:
 85 |         raise Exception("doinference command failed:" + output.stderr)
 86 | 
 87 |     lines = output.stdout.rstrip().split('\n')
 88 |     n_lines = int(lines[0])
 89 |     if len(lines) != n_lines+1:
 90 |         raise ValueError("Parsing error")
 91 | 
 92 |     values = [s.split(' ') for s in lines[1:]]
 93 |     return {int(k): int(v)-1 for k, v in values}
 94 |     # don't really know why -1 is needed here, but it works!
 95 | 
 96 | 
 97 | def build_ocr_network(images, image_model, pairwise_model=None, triplet_list=None):
 98 |     factors = sol.compute_single_factors(images, image_model)
 99 | 
100 |     if pairwise_model is not None:
101 | #         factors.extend(sol.compute_equal_pairwise_factors(images, image_model['K']))
102 |         factors.extend(sol.compute_pairwise_factors(images, pairwise_model, image_model['K']))
103 | 
104 |     if triplet_list is not None:
105 |         factors.extend(sol.compute_triplet_factors(images, triplet_list, image_model['K']))
106 | 
107 |     if not image_model.get('ignore_similarity', True):
108 |         all_sim_factors = sol.compute_all_similarity_factors(images, image_model['K'])
109 |         factors.extend(sol.choose_top_similarity_factors(all_sim_factors, 2))
110 | 
111 |     return factors
112 | 
113 | 
114 | def compute_word_predictions(all_words, image_model, pairwise_model=None, triplet_list=None):
115 |     predictions = []
116 |     for i, word in enumerate(all_words):
117 |         factors = build_ocr_network(word, image_model, pairwise_model, triplet_list)
118 |         prediction = run_inference(factors)
119 |         predictions.append([prediction[i] for i in range(len(prediction))])
120 |     return predictions
121 | 
122 | 
123 | def score_predictions(words, predictions, show_output=True):
124 |     assert len(words) == len(predictions), "Length mismatch"
125 | 
126 |     n_words_correct = n_words_total = 0
127 |     n_chars_correct = n_chars_total = 0
128 | 
129 |     for word, pred in zip(words, predictions):
130 |         n_words_total += 1
131 |         n_chars_total += len(pred)
132 | 
133 |         n_chars_tmp = sum(1 for x, y in zip(word['ground_truth'], pred) if x==y)
134 |         n_chars_correct += n_chars_tmp
135 | 
136 |         if n_chars_tmp == len(pred):
137 |             n_words_correct += 1
138 | 
139 |         if show_output:
140 |             correct = ''.join(ascii_lowercase[c] for c in word['ground_truth'])
141 |             predicted = ''.join(ascii_lowercase[c] for c in pred)
142 |             print("%s predicted as %s" % (correct, predicted))
143 | 
144 |     char_acc = n_chars_correct/n_chars_total
145 |     word_acc = n_words_correct/n_words_total
146 |     if show_output:
147 |         print("Char accuracy: %.3f" % char_acc)
148 |         print("Word accuracy: %.3f" % word_acc)
149 | 
150 |     return char_acc, word_acc
151 | 
152 | 
153 | def score_model(words, image_model, pairwise_model=None, triplet_list=None):
154 |     preds = compute_word_predictions(words, image_model, pairwise_model, triplet_list)
155 |     return score_predictions(words, preds)
156 | 


--------------------------------------------------------------------------------
/Markov-Networks-for-OCR/inference/doinference-linux:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DhruvPatel01/coursera_pgm_python/ff2921fde29a8b0f1cc336f1263b625940bc95d1/Markov-Networks-for-OCR/inference/doinference-linux


--------------------------------------------------------------------------------
/Markov-Networks-for-OCR/inference/doinference-mac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DhruvPatel01/coursera_pgm_python/ff2921fde29a8b0f1cc336f1263b625940bc95d1/Markov-Networks-for-OCR/inference/doinference-mac


--------------------------------------------------------------------------------
/Markov-Networks-for-OCR/inference/doinference.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DhruvPatel01/coursera_pgm_python/ff2921fde29a8b0f1cc336f1263b625940bc95d1/Markov-Networks-for-OCR/inference/doinference.exe


--------------------------------------------------------------------------------
/Markov-Networks-for-OCR/inference/inference-src.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DhruvPatel01/coursera_pgm_python/ff2921fde29a8b0f1cc336f1263b625940bc95d1/Markov-Networks-for-OCR/inference/inference-src.zip


--------------------------------------------------------------------------------
/Markov-Networks-for-OCR/sol.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.insert(0, '..')
  4 | import helper
  5 | from commons.factor import Factor
  6 | 
  7 | # Note: Variable names start with 0, i.e. first node in the chain has name `0`
  8 | # Unlike, MATLAB assignment, use zero based indexing for both variables and values.
  9 | # i.e. charater `a` has index 0, and `z` has index 25.
 10 | 
 11 | 
 12 | def compute_single_factors(images, image_model):
 13 |     n = len(images['img'])
 14 |     factors = []  # fill this array with factors
 15 |     # Solution Start
 16 |    
 17 |     # Solution End
 18 | 
 19 |     return factors
 20 | 
 21 | 
 22 | def compute_equal_pairwise_factors(images, K):
 23 |     n = len(images['img'])
 24 |     factors = []  # fill this array with factors, first factor will have score [0, 1] etc.
 25 |     
 26 |     # Solution Start
 27 |   
 28 |     # Solution End
 29 | 
 30 |     return factors
 31 | 
 32 | 
 33 | def compute_pairwise_factors(images, pairwise_model, K):
 34 |     n = len(images['img'])
 35 |     factors = []
 36 |     if n < 2:
 37 |         return factors
 38 | 
 39 |     # Solution Start
 40 |     
 41 |         
 42 |     # Solution End
 43 | 
 44 |     return factors
 45 | 
 46 | 
 47 | def compute_triplet_factors(images, triplet_list, K):
 48 |     n = len(images['img'])
 49 |     factors = []
 50 |     if n < 3:
 51 |         return factors
 52 | 
 53 |     # Solution Start
 54 |     
 55 |     
 56 | 
 57 |     # Solution End
 58 | 
 59 |     return factors
 60 | 
 61 | 
 62 | def compute_similarity_factor(images, K, i, j):
 63 |     f = Factor([i, j], [K, K], init=1.)
 64 | 
 65 |     # Solution Start
 66 | 
 67 |     
 68 |         
 69 |     # Solution End
 70 | 
 71 |     return f
 72 | 
 73 | 
 74 | def compute_all_similarity_factors(images, K):
 75 |     n = len(images['img'])
 76 |     factors = []
 77 | 
 78 |     # Solution Start
 79 |     
 80 |     
 81 | 
 82 |     # Solution End
 83 | 
 84 |     return factors
 85 | 
 86 | 
 87 | def choose_top_similarity_factors(factors, F):
 88 |     if len(factors) <= F:
 89 |         return factors
 90 | 
 91 |     new_factors = []
 92 |     # Solution Start
 93 |     
 94 |     
 95 |     
 96 |     # Solution End
 97 | 
 98 |     return new_factors
 99 | 
100 | 


--------------------------------------------------------------------------------
/Markov-Networks-for-OCR/submit.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import sys
  3 | 
  4 | from scipy.io import loadmat
  5 | import numpy as np
  6 | import convert_mats
  7 | 
  8 | import helper
  9 | import sol
 10 | 
 11 | sys.path.insert(0, '..')
 12 | 
 13 | import commons
 14 | from commons.factor import Factor
 15 | 
 16 | 
 17 | class Grader(commons.SubmissionBase):
 18 |     def __init__(self):
 19 |         part_names = [None, 'Ga9CX', 'Y6ud3', 'YX6FP', 
 20 |                       'sVpuc', 'ZzAEz', 'jF5vU', 'IQZRx', 
 21 |                       'bWL2q', 'TfTAH', '44rjP', 'eGTyV', 'VfH4h']
 22 |         super().__init__('Markov Networks for OCR', '1RFc-gNfEeapUhL5oS3IIQ', part_names)
 23 | 
 24 |     def __iter__(self):
 25 |         for part_id in range(1, len(self.part_names)):
 26 |             models = convert_mats.pa3_models()
 27 |             if part_id % 2 == 0:
 28 |                 data = convert_mats.pa3_sample_cases(is_test=True)
 29 |             else:
 30 |                 data = convert_mats.pa3_sample_cases(is_test=False)
 31 | 
 32 |             try:
 33 |                 if part_id == 1 or part_id == 2:
 34 |                     inp = data['part1_sample_image_input']
 35 |                     F = sol.compute_single_factors(inp, models['image_model'])
 36 |                     F = [sort_factor(f) for f in F]
 37 |                     F = sorted(F, key=lambda f: tuple(f.vars))
 38 |                     res = serialize_factors_fg_grading(F)
 39 |                 elif part_id == 3 or part_id == 4:
 40 |                     inp = data['part2_sample_image_input']
 41 |                     F = sol.compute_pairwise_factors(inp, 
 42 |                                                      models['pairwise_model'],
 43 |                                                      models['image_model']['K'])
 44 |                     F = [sort_factor(f) for f in F]
 45 |                     F = sorted(F, key=lambda f: tuple(f.vars))
 46 |                     res = serialize_factors_fg_grading(F)
 47 |                 elif part_id == 5 or part_id == 6:
 48 |                     print("NOTE: compute_triplet_factors(15 marks) will not be submitted, as there is")
 49 |                     print("a known bug at Coursera for this assignment.")
 50 | #                     inp = data['part3_sample_image_input']
 51 | #                     F = sol.compute_triplet_factors(inp, models['triplet_list'], models['image_model']['K'])
 52 | #                     F = [sort_factor(f) for f in F]
 53 | #                     res = serialize_factors_fg_grading(F, 2)
 54 |                     res = '0'
 55 |                 elif part_id == 7:
 56 |                     inp = data['part4_sample_image_input']
 57 |                     F = sol.compute_similarity_factor(inp, models['image_model']['K'], 0, 1)
 58 |                     F = sort_factor(F)
 59 |                     res = serialize_factors_fg_grading([F])
 60 |                 elif part_id == 8:
 61 |                     inp = data['part4_sample_image_input']
 62 |                     F = sol.compute_similarity_factor(inp, models['image_model']['K'], 2, 3)
 63 |                     F = sort_factor(F)
 64 |                     res = serialize_factors_fg_grading([F])
 65 |                 elif part_id == 9 or part_id == 10:
 66 |                     inp = data['part5_sample_image_input']
 67 |                     F = sol.compute_all_similarity_factors(inp, models['image_model']['K'])
 68 |                     F = [sort_factor(f) for f in F]
 69 |                     F = sorted(F, key=lambda f: tuple(f.vars))
 70 |                     res = serialize_factors_fg_grading(F)
 71 |                 elif part_id == 11 or part_id == 12:
 72 |                     inp = data['part6_sample_factors_input']
 73 |                     F = sol.choose_top_similarity_factors(inp, 2)
 74 |                     F = [sort_factor(f) for f in F]
 75 |                     F = sorted(F, key=lambda f: tuple(f.vars))
 76 |                     res = serialize_factors_fg_grading(F)
 77 |                 yield self.part_names[part_id], res
 78 |             except KeyError:
 79 |                 raise
 80 |                 
 81 |                 
 82 | def serialize_factors_fg_grading(factors, skip=1) -> str:
 83 |     lines = ["%d\n" % len(factors)]
 84 | 
 85 |     for f in factors:
 86 |         var = [v+1 for v in f.vars]
 87 |         lines.append("%d" % (len(var), ))
 88 |         lines.append("  ".join(map(str, var)))
 89 |         lines.append("  ".join(str(len(d)) for d in f.domains))
 90 |         placeholder_idx = len(lines)
 91 |         lines.append(None)  # will be replace by nonzero count once we know
 92 | 
 93 |         # libDAI expects first variable to change fastest
 94 |         # but itertools.product changes the last element fastest
 95 |         # hence reversed list
 96 |         domains = reversed(f.domains)
 97 |         num_lines = 0
 98 |         new_lines = []
 99 |         for i, assignment in enumerate(itertools.product(*domains)):
100 |             num_lines += 1
101 |             val = f[tuple(reversed(assignment))]
102 |             if val != 1:
103 |                 new_lines.append("%0.8g" % (val, ))
104 |         new_lines = new_lines[::skip]
105 |         lines[placeholder_idx] = "%d" % (num_lines, )
106 |         lines.extend(new_lines)
107 |         lines.append("")
108 | 
109 |     return "\n".join(lines)
110 | 
111 | def sort_factor(F):
112 |     domains_d = dict(zip(F.vars, F.domains))
113 |     var = sorted(F.vars)
114 |     domains = [domains_d[v] for v in var]
115 |     newF = Factor(var, domains)
116 |     for k in F:
117 |         assignment = dict(zip(F.vars, k))
118 |         newF[assignment] = F[k]
119 |     return newF
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     grader = Grader()
124 |     grader.grade()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # What?
 2 | 
 3 | ![Claps](clap.gif)
 4 | 
 5 | This repository contains Python skeleton for programming 
 6 | assignments of Coursera course "Probabilistic Graphical Models" by Daphne Koller.
 7 | 
 8 | All the assignments from part A, B and C have been ported.
 9 | 
10 | The course assignments are written in MATLAB/OCTAVE. I personally find
11 | this language harder in modern times and feel that newcomers generally
12 | would be more familiar with Python than MATLAB. 
13 | 
14 | Also, the course assignments provide a lot of boilerplate code which also contains
15 | a lot of details. I felt that implementing the assignments+boilerplate
16 | in Python would be a better way to master the course material.
17 | 
18 | # How?
19 | 
20 | Download the original zip and extract it somewhere. Copy all `*.mat` files 
21 | from that directory into `data` directory inside respective assignment here.
22 | 
23 | All functions that you will have to implement are in `solution.py`. 
24 | Once you are finished with one function implementation, you can check 
25 | your implementation locally in `check.ipynb`. This notebook does 
26 | the work of reading `.mat` file and converting it into appropriate
27 | structures.
28 | 
29 | 
30 | Once you are satisfied with your solution, you can 
31 | submit your solution by running `submit.py` script. The script
32 | will ask you for your email address and token.
33 | 
34 | 
35 | ## Before you start second assignment,
36 | 
37 | Please go through my implementation of [factor](commons/factor.py). 
38 | This is the most important/common class that you will work with.
39 | 
40 | **NOTE**: Please do not view that file for assignment 1. As it might
41 | spoil couple of answers for this assignment. This is same in original
42 | MATLAB code as well. Answers to some of the questions are implemented
43 | in later assignments, so you do not have to keep reinventing the wheel
44 | over and over again.
45 | 
46 | Factor is basically a lookup table (a dictionary) made of discrete variables.
47 | Each variable has some cardinality. Say you want to create a factor over two binary variables. Use `Factor([0, 1], [2, 2])`. The first argument is a list
48 | of variable names. Second argument is cardinality of each variable in first 
49 | argument.
50 | 
51 | - Also note that I've used 0 based indexing unlike original MATLAB code, so variable `1` can 
52 | take either value `0` or `1`.
53 | 
54 | ## Note:
55 | - Although I've tested the assignments (i.e. almost in all cases I got 100/100.), there 
56 | could be hidden bugs which might mark your solution wrong even though your solution is correct. If you feel that your solution is correct but it is marked wrong, please debug first 
57 | in the `check.ipynb` and see if expected output matches with your output, and then raise an issue. I'll look into it.
58 | 
59 | # Acknowledgements 
60 | - The Python grader is derived from [Gerges Dib](https://github.com/dibgerge/ml-coursera-python-assignments)'s code, which itself is derived from [Marcel Stampfer](https://github.com/mstampfer/Coursera-Stanford-ML-Python)'s code.
61 | 


--------------------------------------------------------------------------------
/Sampling-Methods/crandom.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Following code is converted from Octave/Matlab to Python
 3 | so that this Python assignment can be submitted successfully
 4 | on Coursera.
 5 | 
 6 | Note: I don't think this code works!
 7 | 
 8 | Ported by: Dhruv Patel (https://github.com/DhruvPatel01)
 9 | """
10 | import math
11 | 
12 | import numpy as np
13 | 
14 | 
15 | # x_i is the global state. Please do not make it integer ever.
16 | x_i = 1.0
17 | p1 = 160481183
18 | p2 = 179424673
19 | 
20 | 
21 | def seed(arg=1.):
22 |     global x_i
23 |     x_i = arg * 1.  # seed needs to be float tobe compatible with matlab
24 | 
25 | 
26 | def mod(x, y):  # custom mod is required, as % operator does not behave similar to matlab mod
27 |     return x - math.floor(x/y) * y
28 | 
29 | 
30 | def randi(max_val, n_rows=None, n_cols=None):
31 |     global x_i
32 | 
33 |     if n_rows is None:
34 |         x_i = mod(x_i * (p1 + 1) + p1, p2)
35 |         return int(x_i % max_val)
36 | 
37 |     if n_cols is None:
38 |         n_cols = n_rows
39 | 
40 |     rand_matrix = np.zeros((n_rows, n_cols), dtype=np.int64)
41 |     for i in range(n_rows):
42 |         for j in range(n_cols):
43 |             x_i = mod(x_i * (p1 + 1) + p1, p2)
44 |             rand_matrix[i, j] = int(x_i % max_val)
45 |     return rand_matrix
46 | 
47 | 
48 | def rand(n_rows=None, n_cols=None):
49 |     if n_rows is None:
50 |         return randi(1e6)/1e6
51 | 
52 |     if n_cols is None:
53 |         n_cols = n_rows
54 | 
55 |     return randi(1e6, n_rows, n_cols)/1e6
56 | 
57 | 
58 | def randsample(vals: int, num_samples: int, p=None):
59 |     """
60 |     Returns n random integers(with replacement) from [0, vals).
61 | 
62 |     Note: Original Octave code seems to have a bug
63 |     when replacement flag is False. However since
64 |     none of the tasks requires replacement to be False, I've
65 |     not implemented bug free version of without replacement.
66 |     If you need it please implement by yourself and open a
67 |     PR request so I can have that too!
68 | 
69 |     Args:
70 |         vals: 1 plus the highest value required
71 |         num_samples: how many samples are required?
72 |         p: An array of length V, gives weights. If None all weights
73 |          are same
74 | 
75 |     Returns:
76 |         Array of n longs
77 |     """
78 |     assert vals > 0, "Only +ve vals is allowed"
79 |     if p is None:
80 |         w = np.linspace(0, 1, vals+1)
81 |     else:
82 |         p = np.array(p)
83 |         w = np.r_[0, (p/p.sum()).cumsum()]
84 | 
85 |     w = w[None, :]
86 | 
87 |     probs = rand(num_samples, 1)
88 |     idx = (w[:, :-1] <= probs) & (w[:, 1:] >= probs)
89 |     _, sample = idx.nonzero()
90 |     return sample
91 | 
92 | 
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/Sampling-Methods/drandom.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | seeds = {}
 4 | 
 5 | disable = False
 6 | 
 7 | for s in [1, 2, 26288942]:
 8 |     with open(f'./data/seed{s}.txt') as fin:
 9 |         data = fin.read()
10 |         seeds[s] = list(map(int, data.split()))
11 | 
12 | current_store = seeds[1]
13 | current_index = 0
14 | 
15 | 
16 | def seed(arg):
17 |     assert arg in seeds, "Invalid seed"
18 |     global current_store, current_index
19 |     current_store = seeds[arg]
20 |     current_index = 0
21 | 
22 | 
23 | def randi(max_val, n_rows=None, n_cols=None):
24 |     global current_index
25 |     if n_rows is None:
26 |         current_index += 1
27 |         return current_store[current_index-1] % max_val
28 | 
29 |     if n_cols is None:
30 |         n_cols = n_rows
31 | 
32 |     rand_matrix = np.zeros((n_rows, n_cols), dtype=np.int64)
33 |     for i in range(n_rows):
34 |         for j in range(n_cols):
35 |             current_index += 1
36 |             rand_matrix[i, j] = current_store[current_index-1] % max_val
37 |     return rand_matrix
38 | 
39 | 
40 | def rand(n_rows=None, n_cols=None):
41 |     """
42 |     Args:
43 |         (None, None) returns random scalar in between 0 and 1.
44 |         (n_rows, None) returns random matrix of (n_rows, n_rows) size
45 |         (n_rows, n_cols) returns random matrix of (n_rows, n_cols) size
46 |     """
47 |     if n_rows is None:
48 |         if disable:
49 |             return np.random.rand()
50 |         return randi(1e6)/1e6
51 | 
52 |     if n_cols is None:
53 |         n_cols = n_rows
54 |         
55 |     if disable:
56 |         return np.random.rand(n_rows, n_cols)
57 |     else:
58 |         return randi(1e6, n_rows, n_cols)/1e6
59 | 
60 | 
61 | def randsample(vals: int, num_samples: int, p=None):
62 |     """
63 |     Returns n random integers(with replacement) from [0, vals).
64 | 
65 |     Note: Original Octave code seems to have a bug
66 |     when replacement flag is False. However since
67 |     none of the tasks requires replacement to be False, I've
68 |     not implemented bug free version of without replacement.
69 |     If you need it please implement by yourself and open a
70 |     PR request so I can have that too!
71 | 
72 |     Args:
73 |         vals: 1 plus the highest value required
74 |         num_samples: how many samples are required?
75 |         p: An array of length V, gives weights. If None all weights
76 |          are same
77 | 
78 |     Returns:
79 |         Array of n longs
80 |     """
81 |     assert vals > 0, "Only +ve vals is allowed"
82 |     if p is None:
83 |         w = np.linspace(0, 1, vals+1)
84 |     else:
85 |         p = np.array(p)
86 |         w = np.r_[0, (p/p.sum()).cumsum()]
87 | 
88 |     w = w[None, :]
89 |     probs = rand(num_samples, 1)
90 |     idx = (w[:, :-1] <= probs) & (w[:, 1:] >= probs)
91 |     _, sample = idx.nonzero()
92 |     return sample


--------------------------------------------------------------------------------
/Sampling-Methods/helper.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import numpy as np
 4 | 
 5 | sys.path.insert(0, '../')
 6 | from commons.factor import Factor
 7 | 
 8 | 
 9 | def extract_marginals_from_samples(G, collected_samples):
10 |     M = [Factor([i], [card], init=0.) for i, card in enumerate((G['card']))]
11 | 
12 |     for sample in collected_samples:
13 |         for i, v in enumerate(sample):
14 |             M[i][v] += 1/len(collected_samples)
15 |     return M
16 | 
17 | 
18 | def variable_2_factor(V, F):
19 |     var2f = {i: set() for i in range(V)}
20 |     for i, f in enumerate(F):
21 |         for j in f.vars:
22 |             var2f[j].add(i)
23 |     return var2f
24 | 
25 | 
26 | def construct_toy_network(on_diag_weight, off_diag_weight):
27 |     n = 4
28 |     k = 2
29 |     V = n * n
30 | 
31 |     G = {
32 |         'names': ['pixel%d' % i for i in range(V)],
33 |         'card': [2]*V
34 |     }
35 | 
36 |     adj_list = {i: set() for i in range(V)}
37 |     for i in range(V):
38 |         for j in range(i+1, V):
39 |             idx_i = np.array(np.unravel_index(i, [n, n], order='F'))
40 |             idx_j = np.array(np.unravel_index(j, [n, n], order='F'))
41 |             if abs(idx_i - idx_j).sum() == 1:
42 |                 adj_list[i].add(j)
43 |                 adj_list[j].add(i)
44 |     G['adj_list'] = adj_list
45 | 
46 |     factors = []
47 |     for i in range(V):
48 |         f = Factor([i, ], [2, ])
49 |         if i < V//2:
50 |             f[0] = .4
51 |             f[1] = .6
52 |         else:
53 |             f[0] = .6
54 |             f[1] = .4
55 |         factors.append(f)
56 | 
57 |     for u, vs in adj_list.items():
58 |         for v in vs:
59 |             if u >= v:
60 |                 continue
61 | 
62 |             f = Factor([u, v], [2, 2])
63 |             f[0, 0] = f[1, 1] = on_diag_weight
64 |             f[0, 1] = f[1, 0] = off_diag_weight
65 |             factors.append(f)
66 | 
67 |     G['var2factors'] = variable_2_factor(V, factors)
68 |     return G, factors
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/Sampling-Methods/submit.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import sys
  3 | 
  4 | from scipy.io import loadmat
  5 | import numpy as np
  6 | 
  7 | import helper
  8 | import solution
  9 | import drandom
 10 | 
 11 | sys.path.insert(0, '..')
 12 | 
 13 | import commons
 14 | from commons.factor import Factor
 15 | 
 16 | 
 17 | def _get_toy1(m2=False):
 18 |     mat = loadmat('./data/submit_input.mat', simplify_cells=True)['INPUT']
 19 |     if m2:
 20 |         G = mat['toyNet_m2']
 21 |     else:
 22 |         G = mat['toyNet']
 23 |     G['adj_list'] = commons.adj_matrix_to_adj_list(G['edges'])
 24 |     G['var2factors'] = np.array([x - 1 for x in G['var2factors']], dtype='object')
 25 |     G['q_list'][:, 0] -= 1
 26 |     G['q_list'][:, 1] -= 1
 27 |     F = np.array([Factor.from_matlab(f) for f in mat['toyFac']])
 28 |     return mat, G, F, mat['A0'] - 1
 29 | 
 30 | def _get_toy2(m2=False):
 31 |     mat = loadmat('./data/submit_input.mat', simplify_cells=True)['INPUT']
 32 |     if m2:
 33 |         G = mat['toyNet2_m2']
 34 |     else:
 35 |         G = mat['toyNet2']
 36 |     G['adj_list'] = commons.adj_matrix_to_adj_list(G['edges'])
 37 |     G['var2factors'] = np.array([x - 1 for x in G['var2factors']], dtype='object')
 38 |     G['q_list'][:, 0] -= 1
 39 |     G['q_list'][:, 1] -= 1
 40 |     F = np.array([Factor.from_matlab(f) for f in mat['toyFac2']])
 41 |     return mat, G, F, mat['A0'] - 1
 42 | 
 43 | class Grader(commons.SubmissionBase):
 44 |     def __init__(self):
 45 |         part_names = [None, 
 46 |                       'oLlCG', 'u0bdr', 'ZvlVb', 'kAwIs', 
 47 |                       'BnmgG', 'x4EL4', '7vRgM', 'o2kO8', 
 48 |                       'E3nMZ', 'NYOmz', 'IhNcD', 'IWmCs', 
 49 |                       'R9Ium', 'RiDeK']
 50 |         super().__init__('Sampling Methods', 'RT4_2ANgEeacSgpo_ExyYQ', part_names)
 51 | 
 52 |     def __iter__(self):
 53 |         for part_id in range(1, len(self.part_names)):
 54 |             try:
 55 |                 if part_id == 1:
 56 |                     drandom.seed(1)
 57 |                     mat, G, F, A = _get_toy1()
 58 |                     V = [0]
 59 |                     res = solution.block_log_distribution(V, G, F, A.copy())
 60 |                 elif part_id == 2:
 61 |                     drandom.seed(2)
 62 |                     mat, G, F, A = _get_toy1()
 63 |                     res1 = solution.block_log_distribution([9], G, F, A.copy())
 64 |                     res2 = solution.block_log_distribution([14], G, F, A.copy())
 65 |                     res = np.r_[res1, res2]
 66 |                 elif part_id == 3:
 67 |                     mat, G, F, A = _get_toy1()
 68 |                     drandom.seed(1)
 69 |                     out = [A.copy()]
 70 |                     for i in range(10):
 71 |                         out.append(solution.gibbs_trans(out[-1].copy(), G, F))
 72 |                     res = serialize_matrix(out)
 73 |                 elif part_id == 4:
 74 |                     mat, G, F, A = _get_toy2()
 75 |                     drandom.seed(2)
 76 |                     out = [A.copy()]
 77 |                     for i in range(20):
 78 |                         out.append(solution.gibbs_trans(out[-1].copy(), G, F))
 79 |                     res = serialize_matrix(out)
 80 |                     with open('/tmp/pyPart4.txt','w') as f:
 81 |                         f.write(res)
 82 |                 elif part_id == 5:
 83 |                     drandom.seed(1)
 84 |                     mat, G, F, A = _get_toy1()
 85 |                     M, all_samples = solution.mcmc_inference(G, F, None, "Gibbs", 0, 500, 1, A.copy())
 86 |                     for f in M:
 87 |                         f.vars = [v+1 for v in f.vars]
 88 |                     res = serialize_factors_fg_grading(M)
 89 |                     
 90 |                     M, all_samples = solution.mcmc_inference(G, F, None, "MHGibbs", 0, 500, 1, A.copy())
 91 |                     for f in M:
 92 |                         f.vars = [v+1 for v in f.vars]
 93 |                     res = res.strip()+'\n'+serialize_factors_fg_grading(M)
 94 |                     with open('/tmp/pyPart5.txt','w') as f:
 95 |                         f.write(res)
 96 |                 elif part_id == 6:
 97 |                     drandom.seed(1)
 98 |                     mat, G, F, A = _get_toy2()
 99 |                     M, all_samples = solution.mcmc_inference(G, F, None, "Gibbs", 0, 500, 1, A.copy())
100 |                     for f in M:
101 |                         f.vars = [v+1 for v in f.vars]
102 |                     res = serialize_factors_fg_grading(M)
103 |                     
104 |                     M, all_samples = solution.mcmc_inference(G, F, None, "MHGibbs", 0, 500, 1, A.copy())
105 |                     for f in M:
106 |                         f.vars = [v+1 for v in f.vars]
107 |                     res = res.strip()+'\n'+serialize_factors_fg_grading(M)
108 |                     with open('/tmp/pyPart5.txt','w') as f:
109 |                         f.write(res)
110 |                 elif part_id == 7:
111 |                     mat, G, F, A = _get_toy1()
112 |                     drandom.seed(1)
113 |                     out = [A.copy()]
114 |                     for i in range(10):
115 |                         out.append(solution.mh_uniform_trans(out[-1].copy(), G, F))
116 |                     res = serialize_matrix(out)
117 |                 elif part_id == 8:
118 |                     mat, G, F, A = _get_toy2()
119 |                     drandom.seed(2)
120 |                     out = [A.copy()]
121 |                     for i in range(20):
122 |                         out.append(solution.mh_uniform_trans(out[-1].copy(), G, F))
123 |                     res = serialize_matrix(out)
124 |                 elif part_id == 9:
125 |                     mat, G, F, A = _get_toy1()
126 |                     drandom.seed(1)
127 |                     out = [A.copy()]
128 |                     for i in range(10):
129 |                         out.append(solution.mhsw_trans(out[-1].copy(), G, F, 1))
130 |                     res = serialize_matrix(out)
131 |                 elif part_id == 10:
132 |                     mat, G, F, A = _get_toy2()
133 |                     drandom.seed(2)
134 |                     out = [A.copy()]
135 |                     for i in range(20):
136 |                         out.append(solution.mhsw_trans(out[-1].copy(), G, F, 1))
137 |                     res = serialize_matrix(out)
138 |                 elif part_id == 11:
139 |                     mat, G, F, A = _get_toy1(m2=True)
140 |                     drandom.seed(1)
141 |                     out = [A.copy()]
142 |                     for i in range(20):
143 |                         out.append(solution.mhsw_trans(out[-1].copy(), G, F, 2))
144 |                     res = serialize_matrix(out)
145 |                 elif part_id == 12:
146 |                     mat, G, F, A = _get_toy2(m2=True)
147 |                     drandom.seed(2)
148 |                     out = [A.copy()]
149 |                     for i in range(20):
150 |                         out.append(solution.mhsw_trans(out[-1].copy(), G, F, 2))
151 |                     res = serialize_matrix(out)
152 |                 elif part_id == 13:
153 |                     drandom.seed(1)
154 |                     mat, G, F, A = _get_toy1()
155 |                     M, all_samples = solution.mcmc_inference(G, F, None, "MHUniform", 0, 500, 1, A.copy())
156 |                     for f in M: f.vars = [v+1 for v in f.vars]
157 |                     res = serialize_factors_fg_grading(M)
158 |                     
159 |                     M, all_samples = solution.mcmc_inference(G, F, None, "MHSwendsenWang1", 0, 500, 1, A.copy())
160 |                     for f in M: f.vars = [v+1 for v in f.vars]
161 |                     res = res + '\n' + serialize_factors_fg_grading(M)
162 |                     
163 |                     M, all_samples = solution.mcmc_inference(G, F, None, "MHSwendsenWang2", 0, 500, 1, A.copy())
164 |                     for f in M: f.vars = [v+1 for v in f.vars]
165 |                     res = res + '\n' + serialize_factors_fg_grading(M)
166 |                 elif part_id == 14:
167 |                     drandom.seed(2)
168 |                     mat, G, F, A = _get_toy2()
169 |                     M, all_samples = solution.mcmc_inference(G, F, None, "MHUniform", 0, 500, 1, A.copy())
170 |                     for f in M: f.vars = [v+1 for v in f.vars]
171 |                     res = serialize_factors_fg_grading(M)
172 |                     
173 |                     M, all_samples = solution.mcmc_inference(G, F, None, "MHSwendsenWang1", 0, 500, 1, A.copy())
174 |                     for f in M: f.vars = [v+1 for v in f.vars]
175 |                     res = res+ '\n' + serialize_factors_fg_grading(M)
176 |                     
177 |                     M, all_samples = solution.mcmc_inference(G, F, None, "MHSwendsenWang2", 0, 500, 1, A.copy())
178 |                     for f in M: f.vars = [v+1 for v in f.vars]
179 |                     res = res + '\n' + serialize_factors_fg_grading(M)
180 |                 else:
181 |                     raise KeyError
182 | 
183 |                 yield self.part_names[part_id], res
184 |             except KeyError:
185 |                 yield self.part_names[part_id], 0
186 |                 
187 | 
188 | def serialize_matrix(matrix, add_one=True) -> str:
189 |     res = ''
190 |     for l in matrix:
191 |         if add_one:
192 |             l += 1
193 |         res = res + '\n'
194 |         for x in l:
195 |             res += '%d ' % x
196 |     return res
197 |             
198 | def serialize_factors_fg_grading(factors) -> str:
199 |     lines = ["%d\n" % len(factors)]
200 | 
201 |     for f in factors:
202 |         lines.append("%d" % (len(f.vars, )))
203 |         lines.append("  ".join(map(str, f.vars)))
204 |         lines.append("  ".join(str(len(d)) for d in f.domains))
205 |         placeholder_idx = len(lines)
206 |         lines.append(None)  # will be replace by nonzero count once we know
207 | 
208 |         # libDAI expects first variable to change fastest
209 |         # but itertools.product changes the last element fastest
210 |         # hence reversed list
211 |         domains = reversed(f.domains)
212 |         num_lines = 0
213 |         for i, assignment in enumerate(itertools.product(*domains)):
214 |             num_lines += 1
215 |             val = f[tuple(reversed(assignment))]
216 |             # if abs(val) <= 1e-40 or abs(val - 1) <= 1e-40 or np.isinf(val) or np.isnan(val):
217 |             #     continue
218 |             lines.append("%d %0.8g" % (i, val, ))
219 |         lines[placeholder_idx] = "%d" % (num_lines, )
220 |         lines.append("")
221 | 
222 |     return "\n".join(lines)
223 | 
224 | 
225 | def serialize_compact_tree(tree, skip=1) -> str:
226 |     adj_list = tree['adj_list']
227 |     N = len(adj_list)
228 |     lines = ['%d' % N]
229 | 
230 |     for i in range(N):
231 |         nbs = adj_list[i]
232 |         lines.append("  ".join("1" if i in nbs else "0" for i in range(N)))
233 | 
234 |     # convert 0 based index into 1 based index!! Uuhhh!!
235 |     for factor in tree['clique_list']:
236 |         factor.vars = [v+1 for v in factor.vars]
237 | 
238 |     factor_graph = serialize_factors_fg_grading(tree['clique_list'], skip)
239 |     lines.append(factor_graph)
240 | 
241 |     # convert 1 based index into 0 based index!! Uuhhh!!
242 |     for factor in tree['clique_list']:
243 |         factor.vars = [v - 1 for v in factor.vars]
244 | 
245 |     return '\n'.join(lines)
246 | 
247 | 
248 | if __name__ == '__main__':
249 |     grader = Grader()
250 |     grader.grade()
251 | 


--------------------------------------------------------------------------------
/Sampling-Methods/visualization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def smooth(Y, window=5):
 4 |     Y = Y.squeeze()
 5 |     if window%2 == 0:
 6 |         window = window+1
 7 |     mid = (window-1)//2
 8 |     
 9 |     smoother = np.zeros((len(Y), len(Y)))
10 |     for i in range(len(Y)):
11 |         dev = min(mid, min(i, len(Y)-1-i))
12 |         smoother[i, i-dev:i+dev+1] = 1
13 |     col = smoother.sum(axis=1)
14 |     return (smoother @ Y)/col
15 | 


--------------------------------------------------------------------------------
/Simple-BN-Knowledge-Engineering/Credit_net.net:
--------------------------------------------------------------------------------
  1 | net
  2 | {
  3 | 	propagationenginegenerator1791944048146838126L = "edu.ucla.belief.approx.BeliefPropagationSettings@62c5294d";
  4 | 	huginenginegenerator3061656038650325130L = "edu.ucla.belief.inference.JoinTreeSettings@66867263";
  5 | 	recoveryenginegenerator6944530267470113528l = "edu.ucla.util.SettingsImpl@15eccd21";
  6 | 	node_size = (130.0 55.0);
  7 | }
  8 | 
  9 | node Income
 10 | {
 11 | 	states = ("High" "Medium" "Low" );
 12 | 	position = (-16 -208);
 13 | 	excludepolicy = "include whole CPT";
 14 | 	DSLxEXTRA_DEFINITIONxRANKED = "false";
 15 | 	ismapvariable = "false";
 16 | 	ID = "Profession";
 17 | 	DSLxPOSITIONxHEIGHT = "30";
 18 | 	DSLxTYPE = "CPT";
 19 | 	label = "Income";
 20 | 	DSLxEXTRA_DEFINITIONxDIAGNOSIS_TYPE = "AUXILIARY";
 21 | 	DSLxEXTRA_DEFINITIONxMANDATORY = "false";
 22 | 	DSLxPOSITIONxWIDTH = "83";
 23 | 	diagnosistype = "AUXILIARY";
 24 | }
 25 | node PaymentHistory
 26 | {
 27 | 	states = ("Excellent" "Acceptable" "Unacceptable" );
 28 | 	position = (159 -208);
 29 | 	excludepolicy = "include whole CPT";
 30 | 	DSLxEXTRA_DEFINITIONxRANKED = "false";
 31 | 	ismapvariable = "false";
 32 | 	ID = "PaymentHistory";
 33 | 	DSLxPOSITIONxHEIGHT = "30";
 34 | 	DSLxTYPE = "CPT";
 35 | 	label = "Payment History";
 36 | 	DSLxEXTRA_DEFINITIONxDIAGNOSIS_TYPE = "AUXILIARY";
 37 | 	DSLxEXTRA_DEFINITIONxMANDATORY = "false";
 38 | 	DSLxPOSITIONxWIDTH = "93";
 39 | 	diagnosistype = "AUXILIARY";
 40 | }
 41 | node Age
 42 | {
 43 | 	states = ("Between16and21" "Between22and64" "Over65" );
 44 | 	position = (340 -208);
 45 | 	excludepolicy = "include whole CPT";
 46 | 	DSLxEXTRA_DEFINITIONxRANKED = "false";
 47 | 	ismapvariable = "false";
 48 | 	ID = "Age";
 49 | 	DSLxPOSITIONxHEIGHT = "30";
 50 | 	DSLxTYPE = "CPT";
 51 | 	label = "Age";
 52 | 	DSLxEXTRA_DEFINITIONxDIAGNOSIS_TYPE = "AUXILIARY";
 53 | 	DSLxEXTRA_DEFINITIONxMANDATORY = "false";
 54 | 	DSLxPOSITIONxWIDTH = "70";
 55 | 	diagnosistype = "AUXILIARY";
 56 | }
 57 | node Assets
 58 | {
 59 | 	states = ("High" "Medium" "Low" );
 60 | 	position = (154 -305);
 61 | 	excludepolicy = "include whole CPT";
 62 | 	DSLxEXTRA_DEFINITIONxRANKED = "false";
 63 | 	ismapvariable = "false";
 64 | 	ID = "Worth";
 65 | 	DSLxPOSITIONxHEIGHT = "30";
 66 | 	label = "Assets";
 67 | 	DSLxTYPE = "CPT";
 68 | 	DSLxEXTRA_DEFINITIONxMANDATORY = "false";
 69 | 	DSLxEXTRA_DEFINITIONxDIAGNOSIS_TYPE = "AUXILIARY";
 70 | 	DSLxPOSITIONxWIDTH = "70";
 71 | 	diagnosistype = "AUXILIARY";
 72 | }
 73 | node DebtIncomeRatio
 74 | {
 75 | 	states = ("Low" "High" );
 76 | 	position = (-19 -297);
 77 | 	excludepolicy = "include whole CPT";
 78 | 	DSLxEXTRA_DEFINITIONxRANKED = "false";
 79 | 	ismapvariable = "false";
 80 | 	ID = "RatioDebInc";
 81 | 	DSLxPOSITIONxHEIGHT = "30";
 82 | 	DSLxTYPE = "CPT";
 83 | 	label = "Ratio of Debts To Income";
 84 | 	DSLxEXTRA_DEFINITIONxDIAGNOSIS_TYPE = "AUXILIARY";
 85 | 	DSLxEXTRA_DEFINITIONxMANDATORY = "false";
 86 | 	DSLxPOSITIONxWIDTH = "81";
 87 | 	diagnosistype = "AUXILIARY";
 88 | }
 89 | node Reliability
 90 | {
 91 | 	states = ("Reliable" "Unreliable" );
 92 | 	position = (345 -306);
 93 | 	excludepolicy = "include whole CPT";
 94 | 	DSLxEXTRA_DEFINITIONxRANKED = "false";
 95 | 	ismapvariable = "false";
 96 | 	ID = "Reliability";
 97 | 	DSLxPOSITIONxHEIGHT = "30";
 98 | 	DSLxTYPE = "CPT";
 99 | 	label = "Reliability";
100 | 	DSLxEXTRA_DEFINITIONxDIAGNOSIS_TYPE = "AUXILIARY";
101 | 	DSLxEXTRA_DEFINITIONxMANDATORY = "false";
102 | 	DSLxPOSITIONxWIDTH = "70";
103 | 	diagnosistype = "AUXILIARY";
104 | }
105 | node CreditWorthiness
106 | {
107 | 	states = ("Positive" "Negative" );
108 | 	position = (158 -390);
109 | 	excludepolicy = "include whole CPT";
110 | 	DSLxEXTRA_DEFINITIONxRANKED = "false";
111 | 	ismapvariable = "false";
112 | 	ID = "CreditWorthiness";
113 | 	DSLxPOSITIONxHEIGHT = "30";
114 | 	label = "Credit Worthiness";
115 | 	DSLxTYPE = "CPT";
116 | 	DSLxEXTRA_DEFINITIONxMANDATORY = "false";
117 | 	DSLxEXTRA_DEFINITIONxDIAGNOSIS_TYPE = "AUXILIARY";
118 | 	DSLxPOSITIONxWIDTH = "100";
119 | 	diagnosistype = "AUXILIARY";
120 | }
121 | node FutureIncome
122 | {
123 | 	states = ("Promising" "Not_promising" );
124 | 	position = (-20 -386);
125 | 	excludepolicy = "include whole CPT";
126 | 	DSLxEXTRA_DEFINITIONxRANKED = "false";
127 | 	ismapvariable = "false";
128 | 	ID = "FutureIncome";
129 | 	DSLxPOSITIONxHEIGHT = "30";
130 | 	label = "Future Income";
131 | 	DSLxTYPE = "CPT";
132 | 	DSLxEXTRA_DEFINITIONxMANDATORY = "false";
133 | 	DSLxEXTRA_DEFINITIONxDIAGNOSIS_TYPE = "AUXILIARY";
134 | 	DSLxPOSITIONxWIDTH = "90";
135 | 	diagnosistype = "AUXILIARY";
136 | }
137 | potential ( Income | )
138 | {
139 | 	data = (	0.3333333333333333	0.3333333333333333	0.3333333333333333	);
140 | }
141 | potential ( PaymentHistory | )
142 | {
143 | 	data = (	0.3333333333333333	0.3333333333333333	0.3333333333333333	);
144 | }
145 | potential ( Age | )
146 | {
147 | 	data = (	0.3333333333333333	0.3333333333333333	0.3333333333333333	);
148 | }
149 | potential ( Assets | )
150 | {
151 | 	data = (	0.3333333333333333	0.3333333333333333	0.3333333333333333	);
152 | }
153 | potential ( DebtIncomeRatio | )
154 | {
155 | 	data = (	0.5	0.5	);
156 | }
157 | potential ( Reliability | )
158 | {
159 | 	data = (	0.5	0.5	);
160 | }
161 | potential ( CreditWorthiness | )
162 | {
163 | 	data = (	0.5	0.5	);
164 | }
165 | potential ( FutureIncome | )
166 | {
167 | 	data = (	0.5	0.5	);
168 | }
169 | 


--------------------------------------------------------------------------------
/Simple-BN-Knowledge-Engineering/README.md:
--------------------------------------------------------------------------------
 1 | 1. Copy Credit_net.net file from the ZIP file you downloaded from Coursera here.
 2 | 2. Also copy `submit_input.mat` from there into a new directory called `./data`.
 3 |  this file is required for `submit.py` to work.
 4 | 
 5 | In general, for all the remaining assignments, copy all the `*.mat` files from 
 6 | the source ZIP folder to `./data` folder for the appropriate assignment.
 7 | 
 8 | 3. You can run the `factor_tutorial.ipynb` to understand the factor implementations
 9 |  (as opposed to `FactorTutorial.m`, which you would have read in the Octave/Matlab version
10 |  of the assignment.
11 | 
12 | 4. You can check your implementation in `check.ipynb` notebook.
13 | 
14 | 


--------------------------------------------------------------------------------
/Simple-BN-Knowledge-Engineering/check.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%load_ext autoreload\n",
 10 |     "%autoreload 2"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 4,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from scipy.io import loadmat\n",
 20 |     "\n",
 21 |     "import sol\n",
 22 |     "import helper"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 5,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "A = sol.Factor([0], [2])\n",
 32 |     "A[0] = .11\n",
 33 |     "A[1] = .89\n",
 34 |     "\n",
 35 |     "B = sol.Factor([1, 0], [2, 2])\n",
 36 |     "B[0, 0] = .59\n",
 37 |     "B[1, 0] = .41\n",
 38 |     "B[0, 1] = .22\n",
 39 |     "B[1, 1] = .78\n",
 40 |     "\n",
 41 |     "C = sol.Factor([2, 1], [2, 2])\n",
 42 |     "C[0, 0] = .39\n",
 43 |     "C[1, 0] = .61\n",
 44 |     "C[0, 1] = .06\n",
 45 |     "C[1, 1] = .94"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 6,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "def print_factor(F):\n",
 55 |     "    for k, v in F.val.items():\n",
 56 |     "        print(k, round(v, 4))"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "# Factor Product\n",
 64 |     "\n",
 65 |     "`print_factor(sol.factor_product(A, B))` should give:\n",
 66 |     "\n",
 67 |     "```\n",
 68 |     "(0, 0) 0.0649\n",
 69 |     "(0, 1) 0.0451\n",
 70 |     "(1, 0) 0.1958\n",
 71 |     "(1, 1) 0.6942\n",
 72 |     "```"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 7,
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "name": "stdout",
 82 |      "output_type": "stream",
 83 |      "text": [
 84 |       "(0, 0) 0.0649\n",
 85 |       "(0, 1) 0.0451\n",
 86 |       "(1, 0) 0.1958\n",
 87 |       "(1, 1) 0.6942\n"
 88 |      ]
 89 |     }
 90 |    ],
 91 |    "source": [
 92 |     "print_factor(sol.factor_product(A, B))"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "## Factor Marginalization\n",
100 |     "\n",
101 |     "`print_factor(sol.factor_marginalization(B, [1]))` should print:\n",
102 |     "\n",
103 |     "```\n",
104 |     "(0,) 1.0\n",
105 |     "(1,) 1.0\n",
106 |     "```"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 8,
112 |    "metadata": {},
113 |    "outputs": [
114 |     {
115 |      "name": "stdout",
116 |      "output_type": "stream",
117 |      "text": [
118 |       "(0,) 1.0\n",
119 |       "(1,) 1.0\n"
120 |      ]
121 |     }
122 |    ],
123 |    "source": [
124 |     "print_factor(sol.factor_marginalization(B, [1]))"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "## Observe Evidence\n",
132 |     "\n",
133 |     "```\n",
134 |     "obs == [{(0,): 0.11, (1,): 0.89},\n",
135 |     "        {(0, 0): 0.59, (0, 1): 0.22, (1, 0): 0, (1, 1): 0},\n",
136 |     "        {(0, 0): 0, (0, 1): 0, (1, 0): 0.61, (1, 1): 0}]\n",
137 |     "```"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 9,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "# Make copy of A, B, C\n",
147 |     "Fs = []\n",
148 |     "for F in [A, B, C]:\n",
149 |     "    new = sol.Factor(F.vars, F.card)\n",
150 |     "    for k, v in F.val.items():\n",
151 |     "        new[k] = v\n",
152 |     "    Fs.append(new)"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 10,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "obs = sol.observe_evidence(Fs, {1: 0, 2: 1})"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 11,
167 |    "metadata": {},
168 |    "outputs": [
169 |     {
170 |      "data": {
171 |       "text/plain": [
172 |        "[{(0,): 0.11, (1,): 0.89},\n",
173 |        " {(0, 0): 0.59, (0, 1): 0.22, (1, 0): 0, (1, 1): 0},\n",
174 |        " {(0, 0): 0, (0, 1): 0, (1, 0): 0.61, (1, 1): 0}]"
175 |       ]
176 |      },
177 |      "execution_count": 11,
178 |      "metadata": {},
179 |      "output_type": "execute_result"
180 |     }
181 |    ],
182 |    "source": [
183 |     "obs"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "## Compute Joint Distribution\n",
191 |     "\n",
192 |     "Should print: \n",
193 |     "\n",
194 |     "```\n",
195 |     "[0, 1, 2]\n",
196 |     "\n",
197 |     "(0, 0, 0) 0.0253\n",
198 |     "(0, 0, 1) 0.0396\n",
199 |     "(0, 1, 0) 0.0027\n",
200 |     "(0, 1, 1) 0.0424\n",
201 |     "(1, 0, 0) 0.0764\n",
202 |     "(1, 0, 1) 0.1194\n",
203 |     "(1, 1, 0) 0.0417\n",
204 |     "(1, 1, 1) 0.6525\n",
205 |     "```"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": 12,
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": [
214 |     "F = sol.compute_joint_distribution([A, B, C])"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 13,
220 |    "metadata": {},
221 |    "outputs": [
222 |     {
223 |      "name": "stdout",
224 |      "output_type": "stream",
225 |      "text": [
226 |       "[0, 1, 2]\n",
227 |       "\n",
228 |       "(0, 0, 0) 0.0253\n",
229 |       "(0, 0, 1) 0.0396\n",
230 |       "(0, 1, 0) 0.0027\n",
231 |       "(0, 1, 1) 0.0424\n",
232 |       "(1, 0, 0) 0.0764\n",
233 |       "(1, 0, 1) 0.1194\n",
234 |       "(1, 1, 0) 0.0417\n",
235 |       "(1, 1, 1) 0.6525\n"
236 |      ]
237 |     }
238 |    ],
239 |    "source": [
240 |     "print(F.vars)\n",
241 |     "print()\n",
242 |     "print_factor(F)"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "markdown",
247 |    "metadata": {},
248 |    "source": [
249 |     "## Compute Marginal\n",
250 |     "\n",
251 |     "`print_factor(F)`  should print:\n",
252 |     "\n",
253 |     "```\n",
254 |     "(0, 0) 0.0858\n",
255 |     "(0, 1) 0.1342\n",
256 |     "(1, 0) 0.0468\n",
257 |     "(1, 1) 0.7332\n",
258 |     "```\n"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": 14,
264 |    "metadata": {},
265 |    "outputs": [],
266 |    "source": [
267 |     "# Make copy of A, B, C\n",
268 |     "Fs = []\n",
269 |     "for F in [A, B, C]:\n",
270 |     "    new = sol.Factor(F.vars, F.card)\n",
271 |     "    for k, v in F.val.items():\n",
272 |     "        new[k] = v\n",
273 |     "    Fs.append(new)"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 15,
279 |    "metadata": {},
280 |    "outputs": [
281 |     {
282 |      "name": "stdout",
283 |      "output_type": "stream",
284 |      "text": [
285 |       "(0, 0) 0.0858\n",
286 |       "(0, 1) 0.1342\n",
287 |       "(1, 0) 0.0468\n",
288 |       "(1, 1) 0.7332\n"
289 |      ]
290 |     }
291 |    ],
292 |    "source": [
293 |     "F = sol.compute_marginal({1, 2}, Fs, {0: 1})\n",
294 |     "\n",
295 |     "print_factor(F)"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": null,
301 |    "metadata": {},
302 |    "outputs": [],
303 |    "source": []
304 |   }
305 |  ],
306 |  "metadata": {
307 |   "kernelspec": {
308 |    "display_name": "Python 3",
309 |    "language": "python",
310 |    "name": "python3"
311 |   },
312 |   "language_info": {
313 |    "codemirror_mode": {
314 |     "name": "ipython",
315 |     "version": 3
316 |    },
317 |    "file_extension": ".py",
318 |    "mimetype": "text/x-python",
319 |    "name": "python",
320 |    "nbconvert_exporter": "python",
321 |    "pygments_lexer": "ipython3",
322 |    "version": "3.9.7"
323 |   }
324 |  },
325 |  "nbformat": 4,
326 |  "nbformat_minor": 4
327 | }
328 | 


--------------------------------------------------------------------------------
/Simple-BN-Knowledge-Engineering/factor_tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 26,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import itertools\n",
 10 |     "import pprint"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "In this an subsequent assignments we will be working with factors.\n",
 18 |     "\n",
 19 |     "In Python assignments, factors are instances of class `Factor`. This tutorial goes through the \n",
 20 |     "basic implementation of it."
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 12,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "class Factor:\n",
 30 |     "    def __init__(self, var, domains, init=None):\n",
 31 |     "        self.domains = [range(d) for d in domains]\n",
 32 |     "        self.var = var\n",
 33 |     "        self.val = {}\n",
 34 |     "        \n",
 35 |     "        for assignment in itertools.product(*self.domains):\n",
 36 |     "            self.val[assignment] = init\n",
 37 |     "            \n",
 38 |     "    def __getitem__(self, assignment):\n",
 39 |     "        if isinstance(assignment, dict):\n",
 40 |     "            assignment = tuple(assignment[v] for v in self.var)\n",
 41 |     "        return self.val[assignment]\n",
 42 |     "    \n",
 43 |     "    def __setitem__(self, assignment, value):\n",
 44 |     "        if isinstance(assignment, dict):\n",
 45 |     "            assignment = tuple(assignment[v] for v in self.var)\n",
 46 |     "        self.val[assignment] = value\n",
 47 |     "        \n",
 48 |     "    def __repr__(self):\n",
 49 |     "        return repr(self.val)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "## Creating a factor\n",
 57 |     "\n",
 58 |     "`phi = Factor([2, 0, 1], [2, 2, 2], init=1.0)`\n",
 59 |     "\n",
 60 |     "creates a factor over variables X_2, X_0, X_1, which are all binary\n",
 61 |     "valued, because `phi.domains[i]` (the cardinality of X_i, |Val(X_i)|) is 2.\n",
 62 |     "\n",
 63 |     "phi(X_2, X_0, X_1) = 1 for any assignment to the variables."
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 28,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "phi = Factor([2, 0, 1], [2, 2, 2], init=1.0)"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 29,
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "data": {
 82 |       "text/plain": [
 83 |        "{(0, 0, 0): 1.0, (0, 0, 1): 1.0, (0, 1, 0): 1.0, (0, 1, 1): 1.0, (1, 0, 0): 1.0, (1, 0, 1): 1.0, (1, 1, 0): 1.0, (1, 1, 1): 1.0}"
 84 |       ]
 85 |      },
 86 |      "execution_count": 29,
 87 |      "metadata": {},
 88 |      "output_type": "execute_result"
 89 |     }
 90 |    ],
 91 |    "source": [
 92 |     "phi"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "## Getting and setting values of factor\n",
100 |     "\n",
101 |     "There are two ways to get the value of factor. Indexing by\n",
102 |     "1. a tuple\n",
103 |     "2. a dictionary\n",
104 |     "\n",
105 |     "When index is a tuple, the value ordering must be same as provided in `var` argument while constructing  the factor. Dictionary allows to stop worring about order.\n",
106 |     "\n",
107 |     "Say you want to get the value of assignment $X_0 = 1, X_1 = 0, X_2 = 1$, follwing are the ways.\n",
108 |     "\n",
109 |     "- phi.var = [2, 0, 1] => so tuple index must be $(X_2, X_0, X_1)$"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 30,
115 |    "metadata": {},
116 |    "outputs": [
117 |     {
118 |      "data": {
119 |       "text/plain": [
120 |        "1.0"
121 |       ]
122 |      },
123 |      "execution_count": 30,
124 |      "metadata": {},
125 |      "output_type": "execute_result"
126 |     }
127 |    ],
128 |    "source": [
129 |     "phi[1, 1, 0]"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 31,
135 |    "metadata": {},
136 |    "outputs": [
137 |     {
138 |      "data": {
139 |       "text/plain": [
140 |        "1.0"
141 |       ]
142 |      },
143 |      "execution_count": 31,
144 |      "metadata": {},
145 |      "output_type": "execute_result"
146 |     }
147 |    ],
148 |    "source": [
149 |     "# alternatively you can,\n",
150 |     "phi[{0: 1, \n",
151 |     "     1: 0, \n",
152 |     "     2: 1}]"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {},
158 |    "source": [
159 |     "Setting values is similar."
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 32,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "phi[{0: 1, \n",
169 |     "     1: 0, \n",
170 |     "     2: 1}] = 6.0"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 35,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "assert phi[1, 1, 0] == 6."
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 36,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "data": {
189 |       "text/plain": [
190 |        "{(): None}"
191 |       ]
192 |      },
193 |      "execution_count": 36,
194 |      "metadata": {},
195 |      "output_type": "execute_result"
196 |     }
197 |    ],
198 |    "source": [
199 |     "Factor([], [])"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": []
208 |   }
209 |  ],
210 |  "metadata": {
211 |   "kernelspec": {
212 |    "display_name": "Python 3",
213 |    "language": "python",
214 |    "name": "python3"
215 |   },
216 |   "language_info": {
217 |    "codemirror_mode": {
218 |     "name": "ipython",
219 |     "version": 3
220 |    },
221 |    "file_extension": ".py",
222 |    "mimetype": "text/x-python",
223 |    "name": "python",
224 |    "nbconvert_exporter": "python",
225 |    "pygments_lexer": "ipython3",
226 |    "version": "3.9.7"
227 |   }
228 |  },
229 |  "nbformat": 4,
230 |  "nbformat_minor": 4
231 | }
232 | 


--------------------------------------------------------------------------------
/Simple-BN-Knowledge-Engineering/helper.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import sol
 4 | 
 5 | def from_matlab(factor_dict, start_from_zero=True):
 6 |     """This method is used for course assignments.
 7 | 
 8 |     Factor saved in .mat format can be loaded by `scipy.io.loadmat` matrix and
 9 |     can be passed to this function to create Factor object.
10 |     """
11 |     var = factor_dict['var']
12 |     if start_from_zero:
13 |         var = var - 1
14 | 
15 |     card = factor_dict['card']
16 |     if not isinstance(var, np.ndarray):
17 |         var = [int(var)]
18 |         card = [int(card)]
19 |     else:
20 |         var = var.astype(int).tolist()
21 |         card = card.astype(int).tolist()
22 | 
23 |     f = sol.Factor(var, card)
24 |     for i, val in enumerate(factor_dict['val']):
25 |         assignment = np.unravel_index(i, card, order='F')
26 |         f[assignment] = val
27 | 
28 |     return f
29 | 
30 | 
31 | def from_mat_struct(struct, start_from_zero=True):
32 |     var = struct.var
33 |     if start_from_zero:
34 |         var = var - 1
35 | 
36 |     card = struct.card
37 |     if not isinstance(var, np.ndarray):
38 |         var = [var]
39 |         card = [card]
40 |     else:
41 |         var = var.tolist()
42 |         card = card.tolist()
43 | 
44 |     f = sol.Factor(var, card)
45 |     for i, val in enumerate(struct.val):
46 |         assignment = np.unravel_index(i, card, order='F')
47 |         f[assignment] = val
48 | 
49 |     return f
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/Simple-BN-Knowledge-Engineering/sol.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | 
  3 | class Factor:
  4 |     def __init__(self, var, card, init=0.0):
  5 |         self.card = card
  6 |         self.domains = [range(d) for d in card]
  7 |         self.vars = var
  8 |         self.val = {}
  9 |         
 10 |         for assignment in itertools.product(*self.domains):
 11 |             self[assignment] = init
 12 |             
 13 |     def __getitem__(self, assignment):
 14 |         if isinstance(assignment, (int, float)):
 15 |             assignment = (assignment, )
 16 |         elif isinstance(assignment, dict):
 17 |             assignment = tuple(assignment[v] for v in self.vars)
 18 |         return self.val[assignment]
 19 |     
 20 |     def __setitem__(self, assignment, value):
 21 |         if isinstance(assignment, (int, float)):
 22 |             assignment = (assignment, )
 23 |         elif isinstance(assignment, dict):
 24 |             assignment = tuple(assignment[v] for v in self.vars)
 25 |         self.val[assignment] = value
 26 |         
 27 |     def __repr__(self):
 28 |         return repr(self.val)
 29 |     
 30 |     
 31 | def factor_product(A: Factor, B: Factor):
 32 |     """Return a product of factor
 33 |     
 34 |     DO NOT DELETE ANYTHING BEFORE `# Solution Start`
 35 |     """
 36 |     
 37 |     if not A.vars:
 38 |         return B
 39 |     if not B.vars:
 40 |         return A
 41 |     
 42 |     C = Factor([], [])
 43 |     
 44 |     # Solution Start
 45 |     
 46 |     
 47 |     # Solution End
 48 |     
 49 |     return C
 50 | 
 51 | 
 52 | def factor_marginalization(A, V):
 53 |     """
 54 |     Return a new factor B with variables V marginalized out of A.
 55 |     
 56 |     Hint: You can index a factor with a dict. 
 57 |     This dict can contain variables that are not part of factor itself. 
 58 |         they will be ignored.
 59 |     """
 60 |     
 61 |     if not A.vars or not V:
 62 |         return A
 63 |     
 64 |     B_vars = sorted(set(A.vars) - set(V))
 65 |     
 66 |     A_card = dict(zip(A.vars, A.domains))
 67 |     B_card = [len(A_card[k]) for k in B_vars]
 68 |     
 69 |     B = Factor(B_vars, B_card, init=0.0)
 70 |     
 71 |     # Solution Start
 72 |     
 73 |     
 74 |     # Solution End
 75 |     
 76 |     return B
 77 | 
 78 | 
 79 | def observe_evidence(Fs, E):
 80 |     """
 81 |     For each factor F in Fs
 82 |         Overwrite entries in F that are not consistent with E to 0.
 83 |     
 84 |     E is a dictionary with keys = variables and values = observed values.
 85 |     """
 86 |     
 87 |     # Solution Start
 88 |     
 89 | 
 90 |     # Solution End
 91 |     
 92 |     return Fs
 93 | 
 94 | 
 95 | def compute_joint_distribution(Fs):
 96 |     """
 97 |     Compute the joint distribution.
 98 |     """
 99 |     
100 |     F = Factor([], [])
101 |     
102 |     # Solution Start
103 |     
104 |     
105 |     # Solution End
106 |     
107 |     return F
108 | 
109 | 
110 | 
111 | def compute_marginal(V, Fs, E):
112 |     """
113 |     computes the marginal over variables V in the distribution induced by the set of factors Fs, given evidence E
114 |     """
115 |     
116 |     marginal = Factor([], [])
117 |     
118 |     # Solution Start
119 |     
120 |         
121 |     # Solution End
122 |     
123 |     return marginal


--------------------------------------------------------------------------------
/Simple-BN-Knowledge-Engineering/submit.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import sys
  3 | 
  4 | from scipy.io import loadmat
  5 | import numpy as np
  6 | 
  7 | import helper
  8 | 
  9 | sys.path.insert(0, '..')
 10 | 
 11 | import commons
 12 | from commons.factor import Factor
 13 | import sol
 14 | 
 15 | 
 16 | class Grader(commons.SubmissionBase):
 17 |     def __init__(self):
 18 |         part_names = [None,
 19 |                       'COq9M','1AayO','bsUeZ','Y0uy4',
 20 |                       '5nfbi','VNVAv','c8RCS','rFh8a',
 21 |                       'ThtGy','lM96X','gde9N']
 22 |         super().__init__('Simple BN Knowledge Engineering', 'jk3STQNfEeadkApJXdJa6Q', part_names)
 23 | 
 24 |     def __iter__(self):
 25 |         d = loadmat('./data/submit_input.mat', simplify_cells=True)
 26 |         
 27 |         for part_id in range(1, len(self.part_names)):
 28 |             try:
 29 |                 if part_id == 1:
 30 |                     net = convert_network('./Credit_net.net')
 31 |                     res = serialize_factors(net)
 32 |                 elif part_id == 2:
 33 |                     A, B = (helper.from_matlab(f) for f in d['PART2']['SAMPLEINPUT'])
 34 |                     C = sol.factor_product(A, B)
 35 |                     res = serialize_factors([C])
 36 |                 elif part_id == 3:
 37 |                     A, B = (helper.from_matlab(f) for f in d['PART2']['INPUT1'])
 38 |                     C, D = (helper.from_matlab(f) for f in d['PART2']['INPUT2'])
 39 |                     F = [sol.factor_product(A, B), sol.factor_product(C, D)]
 40 |                     res = serialize_factors(F)
 41 |                 elif part_id == 4:
 42 |                     A = helper.from_matlab(d['PART3']['SAMPLEINPUT'][0]) 
 43 |                     V = set([1])
 44 |                     C = sol.factor_marginalization(A, V)
 45 |                     res = serialize_factors([C])
 46 |                 elif part_id == 5:
 47 |                     A, V1 = helper.from_matlab(d['PART3']['INPUT1'][0]), [d['PART3']['INPUT1'][1]-1]
 48 |                     B, V2 = helper.from_matlab(d['PART3']['INPUT2'][0]), [d['PART3']['INPUT2'][1]-1]
 49 |                     F = [sol.factor_marginalization(A, V1), sol.factor_marginalization(B, V2)]
 50 |                     res = serialize_factors(F)
 51 |                 elif part_id == 6:
 52 |                     Fs = d['PART4']['SAMPLEINPUT'][0]
 53 |                     Fs = [helper.from_mat_struct(s) for s in Fs]
 54 |                     E = dict(d['PART4']['SAMPLEINPUT'][1] - 1 )
 55 |                     O = sol.observe_evidence(Fs, E)
 56 |                     res = serialize_factors(O)
 57 |                 elif part_id == 7:
 58 |                     Fs = d['PART4']['INPUT1'][0]
 59 |                     Fs = [helper.from_mat_struct(s) for s in Fs]
 60 |                     E = dict(d['PART4']['INPUT1'][1] - 1 )
 61 |                     O = sol.observe_evidence(Fs, E)
 62 |                     res = serialize_factors(O)
 63 |                 elif part_id == 8:
 64 |                     Fs = [helper.from_matlab(f) for f in d['PART5']['SAMPLEINPUT']]
 65 |                     J = sol.compute_joint_distribution(Fs)
 66 |                     res = serialize_factors([J])
 67 |                 elif part_id == 9:
 68 |                     Fs = [helper.from_matlab(f) for f in d['PART5']['INPUT1']]
 69 |                     J = sol.compute_joint_distribution(Fs)
 70 |                     res = serialize_factors([J])
 71 |                 elif part_id == 10:
 72 |                     V = set(d['PART6']['SAMPLEINPUT'][0]-1)
 73 |                     Fs = [helper.from_mat_struct(f) for f in d['PART6']['SAMPLEINPUT'][1]]
 74 |                     E = dict(d['PART6']['SAMPLEINPUT'][2][None]-1)
 75 |                     F = sol.compute_marginal(V, Fs, E)
 76 |                     res = serialize_factors([F])
 77 |                 elif part_id == 11:
 78 |                     res = []
 79 |                     for i in ['INPUT1', 'INPUT2', 'INPUT3', 'INPUT4']:
 80 |                         V = d['PART6'][i][0]-1
 81 |                         if isinstance(V, int):
 82 |                             V = [V]
 83 |                         V = set(V)
 84 |                         Fs = [helper.from_mat_struct(f) for f in d['PART6'][i][1]]
 85 |                         E = d['PART6'][i][2]-1
 86 |                         if E.ndim == 2:
 87 |                             E = dict(E)
 88 |                         else:
 89 |                             E = {}
 90 |                         F = sol.compute_marginal(V, Fs, E)
 91 |                         res.append(F)
 92 |                     res = serialize_factors(res)
 93 |                 else:
 94 |                     raise KeyError
 95 |                 
 96 |                 yield self.part_names[part_id], res
 97 |             except KeyError:
 98 |                 yield self.part_names[part_id], 0
 99 |                 
100 |                 
101 | def serialize_factors(factors, skip=1) -> str:
102 |     lines = ["%d\n" % len(factors)]
103 | 
104 |     for f in factors:
105 |         var = [v+1 for v in f.vars]
106 |         lines.append("%d" % (len(var), ))
107 |         lines.append("  ".join(map(str, var)))
108 |         lines.append("  ".join(str(len(d)) for d in f.domains))
109 |         placeholder_idx = len(lines)
110 |         lines.append(None)  # will be replace by nonzero count once we know
111 | 
112 |         # libDAI expects first variable to change fastest
113 |         # but itertools.product changes the last element fastest
114 |         # hence reversed list
115 |         domains = reversed(f.domains)
116 |         num_lines = 0
117 |         new_lines = []
118 |         for i, assignment in enumerate(itertools.product(*domains)):
119 |             num_lines += 1
120 |             val = f[tuple(reversed(assignment))]
121 |             new_lines.append("%d %0.8g" % (i, val, ))
122 |         new_lines = new_lines[::skip]
123 |         lines[placeholder_idx] = "%d" % (num_lines, )
124 |         lines.extend(new_lines)
125 |         lines.append("")
126 | 
127 |     return "\n".join(lines)
128 | 
129 | 
130 | nodes = ['DebtIncomeRatio', 'Assets', 'CreditWorthiness', 'Income', 'PaymentHistory', 'FutureIncome', 'Reliability', 'Age']
131 | var_idx = {k: i for i, k in enumerate(nodes)}
132 | 
133 | def convert_network(fname='./Credit_net.net'):
134 |     var_states = {}
135 |     factors = []
136 |     tbl = str.maketrans("", "", "();")
137 | 
138 |     with open(fname) as fin:
139 |         for line in fin:
140 |             line = line.strip()
141 |             if not line:
142 |                 continue
143 | 
144 |             if 'node ' in line:
145 |                 node = line.split()[1]
146 |                 while True:
147 |                     line = fin.readline().strip()
148 |                     if 'states' in line:
149 |                         states = line.split(' = ')[1]
150 |                         lidx = states.index('(')
151 |                         ridx = states.index(')')
152 |                         states = [s[1:-1] for s in states[lidx+1:ridx].split()]
153 |                     elif line == "}":
154 |                         break
155 |                 var_states[node] = states
156 |             elif 'potential ' in line:
157 |                 lidx = line.index('(')
158 |                 ridx = line.index(')')
159 |                 lst = line[lidx+1:ridx].split()
160 |                 bar_idx = lst.index("|")
161 |                 var_names = lst[:bar_idx]
162 |                 par_names = lst[bar_idx+1:]
163 | 
164 |                 while True:
165 |                     line = fin.readline().strip()
166 |                     if 'data' in line:
167 |                         lidx = line.index('(')
168 |                         s = line[lidx:]
169 |                         while ';' not in line:
170 |                             line = fin.readline()
171 |                             s += line
172 |                         s = s.translate(tbl).split()
173 |                         s = list(map(float, s))
174 |                     elif line == '}':
175 |                         break
176 | 
177 |                 var = list(reversed(par_names + var_names))
178 |                 card = [len(var_states[v]) for v in var]
179 |                 F = Factor(var, card)
180 |                 rev_domains = reversed(F.domains)
181 |                 for assn, v in zip(itertools.product(*rev_domains), s):
182 |                     F[tuple(reversed(assn))] = v
183 |                 factors.append(F)
184 | 
185 |     for factor in factors:
186 |         try:
187 |             factor.vars = [var_idx[v] for v in factor.vars]
188 |         except KeyError:
189 |             print("Unwanted variable found. Did you change variable names? ")
190 |             raise
191 |         
192 |     return factors
193 | 
194 | 
195 | if __name__ == '__main__':
196 |     grader = Grader()
197 |     grader.grade()


--------------------------------------------------------------------------------
/clap.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DhruvPatel01/coursera_pgm_python/ff2921fde29a8b0f1cc336f1263b625940bc95d1/clap.gif


--------------------------------------------------------------------------------
/commons/__init__.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import json
  3 | from collections import OrderedDict
  4 | import numpy as np
  5 | import os
  6 | 
  7 | import requests
  8 | from scipy.sparse import coo_matrix
  9 | 
 10 | 
 11 | class SubmissionBase:
 12 |     """
 13 |     Author: Gerges Dib (https://github.com/dibgerge)
 14 |     This is adapted from Gerges Dib's repository for ML assignments to work with PGM assignments.
 15 |     """
 16 |     submit_url = 'https://www.coursera.org/api/onDemandProgrammingScriptSubmissionsController.v1'
 17 |     save_file = 'token.pkl'
 18 | 
 19 |     def __init__(self, assignment_name, assignment_key, part_names):
 20 |         self.assignment_name = assignment_name
 21 |         self.assignment_key = assignment_key
 22 |         self.part_names = part_names
 23 |         self.login = None
 24 |         self.token = None
 25 |         self.functions = OrderedDict()
 26 |         self.args = dict()
 27 | 
 28 |     def grade(self):
 29 |         print('\nSubmitting Solutions | Programming Exercise %s\n' % self.assignment_name)
 30 |         self.login_prompt()
 31 | 
 32 |         # Evaluate the different parts of exercise
 33 |         parts = OrderedDict()
 34 |         for part_id, result in self:
 35 |             if not isinstance(result, str):
 36 |                 output = sprintf('%0.5f ', result)
 37 |             else:
 38 |                 output = result.strip()
 39 |             #print(part_id, output)
 40 |             parts[str(part_id)] = {'output': output}
 41 |         ret = self.request(parts)
 42 |         if 'errorMessage' in ret:
 43 |             print(ret['errorMessage'])
 44 |         else:
 45 |             print(ret)
 46 |             print("Submitted successfully, view results on assignment page.")
 47 | 
 48 |     def login_prompt(self):
 49 |         if os.path.isfile(self.save_file):
 50 |             with open(self.save_file, 'rb') as f:
 51 |                 login, token = pickle.load(f)
 52 |             reenter = input('Use token from last successful submission (%s)? (Y/n/N): '
 53 |                             '\n Y: use both previous email and token'
 54 |                             '\n n: use only email'
 55 |                             '\n N: reenter email and token: ' % (login, ))
 56 | 
 57 |             self.login = None
 58 |             if reenter == '' or reenter[0] == 'Y' or reenter[0] == 'y':
 59 |                 self.login, self.token = login, token
 60 |                 return
 61 |             elif reenter == 'n':
 62 |                 self.login = login
 63 |                 os.remove(self.save_file)
 64 |             else:
 65 |                 os.remove(self.save_file)
 66 |         
 67 |         if not self.login:
 68 |             self.login = input('Login (email address): ')
 69 |         self.token = input('Token: ')
 70 | 
 71 |         # Save the entered credentials
 72 |         if not os.path.isfile(self.save_file):
 73 |             with open(self.save_file, 'wb') as f:
 74 |                 pickle.dump((self.login, self.token), f)
 75 | 
 76 |     def request(self, parts):
 77 |         params = {
 78 |             'assignmentKey': self.assignment_key,
 79 |             'secret': self.token,
 80 |             'parts': parts,
 81 |             'submitterEmail': self.login}
 82 |         with open('/tmp/python_post', 'w') as f:
 83 |             f.write(json.dumps(params))
 84 |         req = requests.post(self.submit_url, data={'jsonBody': json.dumps(params)})
 85 |         return req.json()
 86 | 
 87 |     def __iter__(self):
 88 |         for part_id in self.functions:
 89 |             yield part_id
 90 | 
 91 |     def __setitem__(self, key, value):
 92 |         self.functions[key] = value
 93 | 
 94 | 
 95 | def sprintf(fmt, arg):
 96 |     """ Emulates (part of) Octave sprintf function. """
 97 |     if isinstance(arg, tuple):
 98 |         # for multiple return values, only use the first one
 99 |         arg = arg[0]
100 | 
101 |     if isinstance(arg, (np.ndarray, list)):
102 |         # concatenates all elements, column by column
103 |         return ' '.join(fmt % e for e in np.asarray(arg).ravel('F'))
104 |     else:
105 |         return fmt % arg
106 | 
107 | 
108 | def adj_matrix_to_adj_list(matrix):
109 |     edges = {}
110 |     for i in range(len(matrix)):
111 |         nbs = set()
112 |         for j in range(len(matrix)):
113 |             if matrix[i, j] == 1:
114 |                 nbs.add(j)
115 |         edges[i] = nbs
116 |     return edges
117 | 
118 | 
119 | def adj_list_to_csgraph(adjlist):
120 |     coo = []
121 |     for u in adjlist:
122 |         for v in adjlist[u]:
123 |             coo.append((u, v, 1))
124 |     coo = coo_matrix(coo)
125 |     return coo.tocsr()
126 | 


--------------------------------------------------------------------------------
/commons/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import warnings
 3 | 
 4 | 
 5 | def index_2_assignment(I, C, zero_based_index=True):
 6 |     """Assignments are zero based. 
 7 |        e.g. Binary assignment is either 0 or 1
 8 |     """
 9 |     warnings.warn("Use np.unravel_index instead.", DeprecationWarning)
10 |     if isinstance(C, np.ndarray):
11 |         C = C.tolist()
12 | 
13 |     if not isinstance(I, np.ndarray):
14 |         I = np.array(I)
15 | 
16 |     if I.ndim == 1:  # to apply broadcasting
17 |         I = I[:, None]
18 | 
19 |     if not zero_based_index:
20 |         I -= 1
21 | 
22 |     cumprod = np.cumprod([1] + C[:-1])
23 |     return (I // cumprod) % C
24 | 


--------------------------------------------------------------------------------