\n",
1182 | "\n",
1195 | "
\n",
1196 | " \n",
1197 | " \n",
1198 | " | \n",
1199 | " - | \n",
1200 | " G | \n",
1201 | " C | \n",
1202 | " A | \n",
1203 | " T | \n",
1204 | " G | \n",
1205 | " C | \n",
1206 | " U | \n",
1207 | " A | \n",
1208 | "
\n",
1209 | " \n",
1210 | " \n",
1211 | " \n",
1212 | " - | \n",
1213 | " 0 | \n",
1214 | " -1 | \n",
1215 | " -2 | \n",
1216 | " -3 | \n",
1217 | " -4 | \n",
1218 | " -5 | \n",
1219 | " -6 | \n",
1220 | " -7 | \n",
1221 | " -8 | \n",
1222 | "
\n",
1223 | " \n",
1224 | " G | \n",
1225 | " -1 | \n",
1226 | " 1 | \n",
1227 | " 0 | \n",
1228 | " -1 | \n",
1229 | " -2 | \n",
1230 | " -3 | \n",
1231 | " -4 | \n",
1232 | " -5 | \n",
1233 | " -6 | \n",
1234 | "
\n",
1235 | " \n",
1236 | " A | \n",
1237 | " -2 | \n",
1238 | " 0 | \n",
1239 | " 0 | \n",
1240 | " 1 | \n",
1241 | " 0 | \n",
1242 | " -1 | \n",
1243 | " -2 | \n",
1244 | " -3 | \n",
1245 | " -4 | \n",
1246 | "
\n",
1247 | " \n",
1248 | " T | \n",
1249 | " -3 | \n",
1250 | " -1 | \n",
1251 | " -1 | \n",
1252 | " 0 | \n",
1253 | " 2 | \n",
1254 | " 1 | \n",
1255 | " 0 | \n",
1256 | " -1 | \n",
1257 | " -2 | \n",
1258 | "
\n",
1259 | " \n",
1260 | " T | \n",
1261 | " -4 | \n",
1262 | " -2 | \n",
1263 | " -2 | \n",
1264 | " -1 | \n",
1265 | " 1 | \n",
1266 | " 1 | \n",
1267 | " 0 | \n",
1268 | " -1 | \n",
1269 | " -2 | \n",
1270 | "
\n",
1271 | " \n",
1272 | " A | \n",
1273 | " -5 | \n",
1274 | " -3 | \n",
1275 | " -3 | \n",
1276 | " -1 | \n",
1277 | " 0 | \n",
1278 | " 0 | \n",
1279 | " 0 | \n",
1280 | " -1 | \n",
1281 | " 0 | \n",
1282 | "
\n",
1283 | " \n",
1284 | " C | \n",
1285 | " -6 | \n",
1286 | " -4 | \n",
1287 | " -2 | \n",
1288 | " -2 | \n",
1289 | " -1 | \n",
1290 | " -1 | \n",
1291 | " 1 | \n",
1292 | " 0 | \n",
1293 | " -1 | \n",
1294 | "
\n",
1295 | " \n",
1296 | " A | \n",
1297 | " -7 | \n",
1298 | " -5 | \n",
1299 | " -3 | \n",
1300 | " -1 | \n",
1301 | " -2 | \n",
1302 | " -2 | \n",
1303 | " 0 | \n",
1304 | " 0 | \n",
1305 | " 1 | \n",
1306 | "
\n",
1307 | " \n",
1308 | "
\n",
1309 | "
"
1310 | ],
1311 | "text/plain": [
1312 | " - G C A T G C U A\n",
1313 | "- 0 -1 -2 -3 -4 -5 -6 -7 -8\n",
1314 | "G -1 1 0 -1 -2 -3 -4 -5 -6\n",
1315 | "A -2 0 0 1 0 -1 -2 -3 -4\n",
1316 | "T -3 -1 -1 0 2 1 0 -1 -2\n",
1317 | "T -4 -2 -2 -1 1 1 0 -1 -2\n",
1318 | "A -5 -3 -3 -1 0 0 0 -1 0\n",
1319 | "C -6 -4 -2 -2 -1 -1 1 0 -1\n",
1320 | "A -7 -5 -3 -1 -2 -2 0 0 1"
1321 | ]
1322 | },
1323 | "execution_count": 28,
1324 | "metadata": {},
1325 | "output_type": "execute_result"
1326 | }
1327 | ],
1328 | "source": [
1329 | "# Example values\n",
1330 | "a = list('GCATGCUA')\n",
1331 | "b = list('GATTACA')\n",
1332 | "\n",
1333 | "# Run algorithm\n",
1334 | "dp_table = calc_seq_align(a, b)\n",
1335 | "pd.DataFrame(dp_table, index=['-'] + b, columns=['-'] + a)"
1336 | ]
1337 | },
1338 | {
1339 | "cell_type": "markdown",
1340 | "metadata": {},
1341 | "source": [
1342 | "With time complexity of $ \\Theta(nm) $ and a space complexity of $ \\Theta((n + 1)(m + 1)) $."
1343 | ]
1344 | },
1345 | {
1346 | "cell_type": "markdown",
1347 | "metadata": {},
1348 | "source": [
1349 | "#### Calculate the Sequence Alignment result\n",
1350 | "Greedy approach"
1351 | ]
1352 | },
1353 | {
1354 | "cell_type": "code",
1355 | "execution_count": 29,
1356 | "metadata": {},
1357 | "outputs": [],
1358 | "source": [
1359 | "def get_seq_align(matrix, a, b, verbose=False):\n",
1360 | " alignmentA = \"\"\n",
1361 | " alignmentB = \"\"\n",
1362 | " j = len(a)\n",
1363 | " i = len(b)\n",
1364 | " \n",
1365 | " while i > -1 and j > -1:\n",
1366 | " if verbose:\n",
1367 | " print(i, j)\n",
1368 | " \n",
1369 | " if i > 0 and j > 0 and matrix[i][j] == matrix[i - 1][j - 1] + s(a[j - 1], b[i - 1]):\n",
1370 | " alignmentA = a[j - 1] + alignmentA\n",
1371 | " alignmentB = b[i - 1] + alignmentB\n",
1372 | " i = i - 1\n",
1373 | " j = j - 1\n",
1374 | " elif i > 0 and matrix[i][j] == matrix[i - 1][j] + s('-', b[i - 1]):\n",
1375 | " alignmentA = \"-\" + alignmentA\n",
1376 | " alignmentB = b[i - 1] + alignmentB\n",
1377 | " i = i - 1\n",
1378 | " elif j > 0 and matrix[i][j] == matrix[i][j - 1] + s(a[j - 1], '-'):\n",
1379 | " alignmentA = a[j - 1] + alignmentA\n",
1380 | " alignmentB = \"-\" + alignmentB\n",
1381 | " j = j - 1\n",
1382 | " else:\n",
1383 | " break\n",
1384 | " \n",
1385 | " return (alignmentA, alignmentB)"
1386 | ]
1387 | },
1388 | {
1389 | "cell_type": "code",
1390 | "execution_count": 30,
1391 | "metadata": {},
1392 | "outputs": [
1393 | {
1394 | "data": {
1395 | "text/plain": [
1396 | "('GCA-TGCUA', 'G-ATTAC-A')"
1397 | ]
1398 | },
1399 | "execution_count": 30,
1400 | "metadata": {},
1401 | "output_type": "execute_result"
1402 | }
1403 | ],
1404 | "source": [
1405 | "# This function gets the Sequence Alignment\n",
1406 | "get_seq_align(dp_table, a, b)"
1407 | ]
1408 | },
1409 | {
1410 | "cell_type": "markdown",
1411 | "metadata": {},
1412 | "source": [
1413 | "With time complexity of $ \\Theta(n + m) $ and a space complexity of $ \\Theta((n + 1)(m + 1)) $."
1414 | ]
1415 | },
1416 | {
1417 | "cell_type": "markdown",
1418 | "metadata": {},
1419 | "source": [
1420 | "## 4.7. All-Pairs Shortest Path"
1421 | ]
1422 | },
1423 | {
1424 | "cell_type": "markdown",
1425 | "metadata": {},
1426 | "source": [
1427 | "The all-pairs shortest path problem is the determination of the shortest graph distances between every pair of vertices in a given graph. The problem can be solved using n applications of Dijkstra's algorithm or all at once using the Floyd-Warshall algorithm