├── README.md ├── information_retrieval.ipynb ├── output_kmeans.csv ├── pipeline.png └── siamese.png /README.md: -------------------------------------------------------------------------------- 1 | # Information Retrieval System 2 | 3 |
4 | Motivation
5 |
25 |
26 |
59 |
60 |
\n", 133 | " | id | \n", 134 | "qid1 | \n", 135 | "qid2 | \n", 136 | "question1 | \n", 137 | "question2 | \n", 138 | "is_duplicate | \n", 139 | "
---|---|---|---|---|---|---|
0 | \n", 144 | "0 | \n", 145 | "1 | \n", 146 | "2 | \n", 147 | "What is the step by step guide to invest in sh... | \n", 148 | "What is the step by step guide to invest in sh... | \n", 149 | "0 | \n", 150 | "
1 | \n", 153 | "1 | \n", 154 | "3 | \n", 155 | "4 | \n", 156 | "What is the story of Kohinoor (Koh-i-Noor) Dia... | \n", 157 | "What would happen if the Indian government sto... | \n", 158 | "0 | \n", 159 | "
2 | \n", 162 | "2 | \n", 163 | "5 | \n", 164 | "6 | \n", 165 | "How can I increase the speed of my internet co... | \n", 166 | "How can Internet speed be increased by hacking... | \n", 167 | "0 | \n", 168 | "
3 | \n", 171 | "3 | \n", 172 | "7 | \n", 173 | "8 | \n", 174 | "Why am I mentally very lonely? How can I solve... | \n", 175 | "Find the remainder when [math]23^{24}[/math] i... | \n", 176 | "0 | \n", 177 | "
4 | \n", 180 | "4 | \n", 181 | "9 | \n", 182 | "10 | \n", 183 | "Which one dissolve in water quikly sugar, salt... | \n", 184 | "Which fish would survive in salt water? | \n", 185 | "0 | \n", 186 | "
\n", 321 | " | id | \n", 322 | "qid1 | \n", 323 | "qid2 | \n", 324 | "question1 | \n", 325 | "question2 | \n", 326 | "is_duplicate | \n", 327 | "
---|---|---|---|---|---|---|
0 | \n", 332 | "0 | \n", 333 | "1 | \n", 334 | "2 | \n", 335 | "What is the step by step guide to invest in sh... | \n", 336 | "What is the step by step guide to invest in sh... | \n", 337 | "0 | \n", 338 | "
1 | \n", 341 | "1 | \n", 342 | "3 | \n", 343 | "4 | \n", 344 | "What is the story of Kohinoor (Koh-i-Noor) Dia... | \n", 345 | "What would happen if the Indian government sto... | \n", 346 | "0 | \n", 347 | "
2 | \n", 350 | "2 | \n", 351 | "5 | \n", 352 | "6 | \n", 353 | "How can I increase the speed of my internet co... | \n", 354 | "How can Internet speed be increased by hacking... | \n", 355 | "0 | \n", 356 | "
3 | \n", 359 | "3 | \n", 360 | "7 | \n", 361 | "8 | \n", 362 | "Why am I mentally very lonely? How can I solve... | \n", 363 | "Find the remainder when [math]23^{24}[/math] i... | \n", 364 | "0 | \n", 365 | "
4 | \n", 368 | "4 | \n", 369 | "9 | \n", 370 | "10 | \n", 371 | "Which one dissolve in water quikly sugar, salt... | \n", 372 | "Which fish would survive in salt water? | \n", 373 | "0 | \n", 374 | "
\n", 633 | " | id | \n", 634 | "qid1 | \n", 635 | "qid2 | \n", 636 | "question1 | \n", 637 | "question2 | \n", 638 | "is_duplicate | \n", 639 | "
---|---|---|---|---|---|---|
0 | \n", 644 | "0 | \n", 645 | "1 | \n", 646 | "2 | \n", 647 | "[step, step, guide, invest, share, market, india] | \n", 648 | "[step, step, guide, invest, share, market] | \n", 649 | "0 | \n", 650 | "
1 | \n", 653 | "1 | \n", 654 | "3 | \n", 655 | "4 | \n", 656 | "[story, kohinoor, koh, noor, diamond] | \n", 657 | "[would, happen, indian, government, stole, koh... | \n", 658 | "0 | \n", 659 | "
2 | \n", 662 | "2 | \n", 663 | "5 | \n", 664 | "6 | \n", 665 | "[increase, speed, internet, connection, using,... | \n", 666 | "[internet, speed, increased, hacking, dns] | \n", 667 | "0 | \n", 668 | "
3 | \n", 671 | "3 | \n", 672 | "7 | \n", 673 | "8 | \n", 674 | "[mentally, lonely, solve] | \n", 675 | "[find, remainder, math, 23, 24, math, divided,... | \n", 676 | "0 | \n", 677 | "
4 | \n", 680 | "4 | \n", 681 | "9 | \n", 682 | "10 | \n", 683 | "[one, dissolve, water, quikly, sugar, salt, me... | \n", 684 | "[fish, would, survive, salt, water] | \n", 685 | "0 | \n", 686 | "
\n", 864 | " | id | \n", 865 | "qid1 | \n", 866 | "qid2 | \n", 867 | "question1 | \n", 868 | "question2 | \n", 869 | "is_duplicate | \n", 870 | "
---|---|---|---|---|---|---|
0 | \n", 875 | "0 | \n", 876 | "1 | \n", 877 | "2 | \n", 878 | "[1, 1, 2, 3, 4, 5, 6] | \n", 879 | "[1, 1, 2, 3, 4, 5] | \n", 880 | "0 | \n", 881 | "
1 | \n", 884 | "1 | \n", 885 | "3 | \n", 886 | "4 | \n", 887 | "[7, 8, 9, 10, 11] | \n", 888 | "[12, 13, 14, 15, 16, 8, 9, 10, 11, 17] | \n", 889 | "0 | \n", 890 | "
2 | \n", 893 | "2 | \n", 894 | "5 | \n", 895 | "6 | \n", 896 | "[18, 19, 20, 21, 22, 23] | \n", 897 | "[20, 19, 24, 25, 26] | \n", 898 | "0 | \n", 899 | "
3 | \n", 902 | "3 | \n", 903 | "7 | \n", 904 | "8 | \n", 905 | "[27, 28, 29] | \n", 906 | "[30, 31, 32, 33, 34, 32, 35, 34, 33] | \n", 907 | "0 | \n", 908 | "
4 | \n", 911 | "4 | \n", 912 | "9 | \n", 913 | "10 | \n", 914 | "[36, 37, 38, 39, 40, 41, 42, 43, 44, 45] | \n", 915 | "[46, 12, 47, 41, 38] | \n", 916 | "0 | \n", 917 | "
\n", 2178 | " | Query | \n", 2179 | "Closest-1 | \n", 2180 | "Closest-2 | \n", 2181 | "Closest-3 | \n", 2182 | "
---|---|---|---|---|
0 | \n", 2187 | "What are the things that we can do to bring change in Indian education system? | \n", 2188 | "What are the things that we can do to bring change in Indian education system? | \n", 2189 | "What are the things that we can do to bring change in Indian education system? | \n", 2190 | "What are the things that we can do to bring change in Indian education system? | \n", 2191 | "
1 | \n", 2194 | "Is Donald Trump in league with Putin? | \n", 2195 | "Is Donald Trump in league with Putin? | \n", 2196 | "Is Donald Trump in league with Putin? | \n", 2197 | "What are some ways to contact Jesse Ventura? | \n", 2198 | "
2 | \n", 2201 | "What does Bootstrap do? | \n", 2202 | "What is Kairos like? | \n", 2203 | "What is frugal? | \n", 2204 | "What's so special about Grana? | \n", 2205 | "
3 | \n", 2208 | "Why did Sanskrit fail to become a suitable language for computers? | \n", 2209 | "Why did Sanskrit fail to become a suitable language for computers? | \n", 2210 | "Why did Sanskrit fail to become a suitable language for computers? | \n", 2211 | "Technology: Are we compromising on wisdom over information? | \n", 2212 | "
4 | \n", 2215 | "How can I increase my typing speed fast? | \n", 2216 | "How can I increase my typing speed fast? | \n", 2217 | "How can I increase my typing speed fast? | \n", 2218 | "How can I increase my typing speed fast? | \n", 2219 | "
\n", 2521 | " | Query | \n", 2522 | "Closest-1 | \n", 2523 | "Closest-2 | \n", 2524 | "Closest-3 | \n", 2525 | "
---|---|---|---|---|
0 | \n", 2530 | "What are the things that we can do to bring change in Indian education system? | \n", 2531 | "What are the things that we can do to bring change in Indian education system? | \n", 2532 | "What are the things that we can do to bring change in Indian education system? | \n", 2533 | "What are the things that we can do to bring change in Indian education system? | \n", 2534 | "
1 | \n", 2537 | "Is Donald Trump in league with Putin? | \n", 2538 | "Is Donald Trump in league with Putin? | \n", 2539 | "Is Donald Trump in league with Putin? | \n", 2540 | "What are some ways to contact Jesse Ventura? | \n", 2541 | "
2 | \n", 2544 | "What does Bootstrap do? | \n", 2545 | "What is Kairos like? | \n", 2546 | "What is frugal? | \n", 2547 | "What is a Discord chat? | \n", 2548 | "
3 | \n", 2551 | "Why did Sanskrit fail to become a suitable language for computers? | \n", 2552 | "Why did Sanskrit fail to become a suitable language for computers? | \n", 2553 | "Why did Sanskrit fail to become a suitable language for computers? | \n", 2554 | "Technology: Are we compromising on wisdom over information? | \n", 2555 | "
4 | \n", 2558 | "How can I increase my typing speed fast? | \n", 2559 | "How can I increase my typing speed fast? | \n", 2560 | "How can I increase my typing speed fast? | \n", 2561 | "How can I increase my typing speed fast? | \n", 2562 | "