├── .gitignore ├── ans ├── corpus-final09.xls ├── g4pE_taska_ans.txt ├── g0pA_taske_ans.txt ├── g0pC_taskb_ans.txt ├── g1pA_taske_ans.txt ├── g1pB_taska_ans.txt ├── g2pB_taska_ans.txt ├── g2pE_taske_ans.txt ├── g3pA_taske_ans.txt ├── g4pB_taska_ans.txt ├── g0pB_taska_ans.txt ├── g0pB_taskb_ans.txt ├── g0pD_taske_ans.txt ├── g0pE_taskc_ans.txt ├── g0pE_taskd_ans.txt ├── g1pA_taskb_ans.txt ├── g1pD_taske_ans.txt ├── g2pA_taska_ans.txt ├── g2pC_taskc_ans.txt ├── g2pC_taske_ans.txt ├── g3pA_taska_ans.txt ├── g3pA_taskb_ans.txt ├── g3pB_taskb_ans.txt ├── g3pC_taskc_ans.txt ├── g4pB_taskb_ans.txt ├── g4pD_taskc_ans.txt ├── g4pE_taskd_ans.txt ├── g4pE_taske_ans.txt ├── g0pA_taska_ans.txt ├── g1pD_taskc_ans.txt ├── g1pD_taskd_ans.txt ├── g2pA_taskb_ans.txt ├── g2pA_taske_ans.txt ├── g2pE_taskc_ans.txt ├── g2pE_taskd_ans.txt ├── g3pC_taskd_ans.txt ├── g4pC_taskb_ans.txt ├── g4pC_taskc_ans.txt ├── g0pC_taskc_ans.txt ├── g0pD_taskd_ans.txt ├── g1pA_taska_ans.txt ├── g1pB_taskb_ans.txt ├── g2pB_taskb_ans.txt ├── g2pC_taskb_ans.txt ├── g3pB_taska_ans.txt ├── g3pC_taskb_ans.txt ├── g4pD_taskb_ans.txt ├── g4pD_taskd_ans.txt ├── g0pC_taska_ans.txt ├── g0pE_taska_ans.txt ├── g2pC_taska_ans.txt ├── g2pE_taska_ans.txt ├── g3pB_taske_ans.txt ├── g4pC_taska_ans.txt ├── g0pB_taske_ans.txt ├── g0pC_taske_ans.txt ├── g0pD_taska_ans.txt ├── g1pB_taskd_ans.txt ├── g1pB_taske_ans.txt ├── g1pD_taska_ans.txt ├── g3pC_taska_ans.txt ├── g4pD_taska_ans.txt ├── g4pD_taske_ans.txt ├── g0pA_taskc_ans.txt ├── g0pB_taskd_ans.txt ├── g0pD_taskc_ans.txt ├── g0pE_taske_ans.txt ├── g1pB_taskc_ans.txt ├── g2pB_taskc_ans.txt ├── g2pB_taske_ans.txt ├── g2pC_taskd_ans.txt ├── g3pA_taskc_ans.txt ├── g3pB_taskc_ans.txt ├── g3pB_taskd_ans.txt ├── g3pC_taske_ans.txt ├── g4pB_taskc_ans.txt ├── g4pB_taske_ans.txt ├── g4pC_taske_ans.txt ├── g0pB_taskc_ans.txt ├── g1pA_taskc_ans.txt ├── g1pA_taskd_ans.txt ├── g2pA_taskc_ans.txt ├── g2pA_taskd_ans.txt ├── g4pB_taskd_ans.txt ├── g4pE_taskb_ans.txt ├── g4pE_taskc_ans.txt ├── g0pA_taskb_ans.txt ├── g0pA_taskd_ans.txt ├── g0pC_taskd_ans.txt ├── g0pE_taskb_ans.txt ├── g1pD_taskb_ans.txt ├── g2pB_taskd_ans.txt ├── g2pE_taskb_ans.txt ├── g3pA_taskd_ans.txt ├── g4pC_taskd_ans.txt └── g0pD_taskb_ans.txt ├── README.md ├── ssk.py ├── main.py ├── network.py └── plagiarism.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.pdf 3 | 4 | *.txt 5 | !ans/* 6 | 7 | *.pyc 8 | 9 | tester.py 10 | -------------------------------------------------------------------------------- /ans/corpus-final09.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jgera/plagiarism/HEAD/ans/corpus-final09.xls -------------------------------------------------------------------------------- /ans/g4pE_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: class, inher, als, child, parent, us, main, bas, progr, new 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:56) 4 | Search ended, found 0 plagiated blocks (01:22:56) 5 | -------------------------------------------------------------------------------- /ans/g0pA_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: solut, dyn, progr, opt, comput, probl, ma, how, val, conquer 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:47) 4 | Search ended, found 0 plagiated blocks (01:22:48) 5 | -------------------------------------------------------------------------------- /ans/g0pC_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: rank, pag, link, us, word, web, googl, method, ke, probl 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:46) 4 | Search ended, found 0 plagiated blocks (01:22:46) 5 | -------------------------------------------------------------------------------- /ans/g1pA_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: us, solv, dyn, recalcul, als, exampl, probl, mea, order, solut 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:50) 4 | Search ended, found 0 plagiated blocks (01:22:50) 5 | -------------------------------------------------------------------------------- /ans/g1pB_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: inher, exampl, class, kind, object, wa, split, fiction, string, histor 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:48) 4 | Search ended, found 0 plagiated blocks (01:22:48) 5 | -------------------------------------------------------------------------------- /ans/g2pB_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: class, subclass, method, tim, us, could, variabl, new, part, extend 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:51) 4 | Search ended, found 0 plagiated blocks (01:22:51) 5 | -------------------------------------------------------------------------------- /ans/g2pE_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: solut, subprobl, opt, val, solv, dyn, comput, would, first, ma 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:54) 4 | Search ended, found 0 plagiated blocks (01:22:54) 5 | -------------------------------------------------------------------------------- /ans/g3pA_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probl, progr, rout, calcul, comput, us, common, reduc, effic, nod 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:55) 4 | Search ended, found 0 plagiated blocks (01:22:56) 5 | -------------------------------------------------------------------------------- /ans/g4pB_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: attribut, method, object, class, inher, an, how, subclass, liv, cast 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:56) 4 | Search ended, found 0 plagiated blocks (01:22:56) 5 | -------------------------------------------------------------------------------- /ans/g0pB_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: class, circl, shap, public, ava, characterist, offic, inher, hous, subclass 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:45) 4 | Search ended, found 0 plagiated blocks (01:22:45) 5 | -------------------------------------------------------------------------------- /ans/g0pB_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: pag, link, sit, webs, particular, man, googl, vis, import, us 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:46) 4 | Search ended, found 0 plagiated blocks (01:22:46) 5 | -------------------------------------------------------------------------------- /ans/g0pD_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: subprobl, how, conquer, solut, look, exampl, solv, experienc, opt, wit 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:48) 4 | Search ended, found 0 plagiated blocks (01:22:48) 5 | -------------------------------------------------------------------------------- /ans/g0pE_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: vector, spac, document, quer, develop, scor, step, oper, first, retr 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:47) 4 | Search ended, found 0 plagiated blocks (01:22:47) 5 | -------------------------------------------------------------------------------- /ans/g0pE_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: prob, theor, import, probabl, ba, mea, vast, earlier, cond, sav 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:47) 4 | Search ended, found 0 plagiated blocks (01:22:47) 5 | -------------------------------------------------------------------------------- /ans/g1pA_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: pagerank, pag, link, syst, rank, us, giv, accord, googl, fals 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:49) 4 | Search ended, found 0 plagiated blocks (01:22:49) 5 | -------------------------------------------------------------------------------- /ans/g1pD_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probl, solut, us, sub, solv, opt, find, method, process, substructur 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:50) 4 | Search ended, found 0 plagiated blocks (01:22:50) 5 | -------------------------------------------------------------------------------- /ans/g2pA_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: train, transport, car, class, inher, method, extend, could, exampl, add 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:50) 4 | Search ended, found 0 plagiated blocks (01:22:51) 5 | -------------------------------------------------------------------------------- /ans/g2pC_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: document, vector, quer, similar, term, spac, model, tak, us, repres 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:52) 4 | Search ended, found 0 plagiated blocks (01:22:52) 5 | -------------------------------------------------------------------------------- /ans/g2pC_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: solv, probl, us, progr, subprobl, dyn, solut, opt, approach, memoiz 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:53) 4 | Search ended, found 0 plagiated blocks (01:22:54) 5 | -------------------------------------------------------------------------------- /ans/g3pA_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: class, inher, us, child, parent, dat, function, subclass, object, languag 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:54) 4 | Search ended, found 0 plagiated blocks (01:22:54) 5 | -------------------------------------------------------------------------------- /ans/g3pA_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: pagerank, pag, link, googl, import, search, val, affect, us, inc 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:54) 4 | Search ended, found 0 plagiated blocks (01:22:54) 5 | -------------------------------------------------------------------------------- /ans/g3pB_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: pagerank, web, googl, pag, val, algorithm, import, rel, sit, tri 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:54) 4 | Search ended, found 0 plagiated blocks (01:22:54) 5 | -------------------------------------------------------------------------------- /ans/g3pC_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: us, pag, vector, quer, comput, result, repres, cosin, model, alwa 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:55) 4 | Search ended, found 0 plagiated blocks (01:22:55) 5 | -------------------------------------------------------------------------------- /ans/g4pB_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: link, pag, rank, higher, number, farm, on, user, sit, search 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:56) 4 | Search ended, found 0 plagiated blocks (01:22:57) 5 | -------------------------------------------------------------------------------- /ans/g4pD_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: term, word, formul, appear, frequenc, us, giv, document, tim, cred 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:57) 4 | Search ended, found 0 plagiated blocks (01:22:57) 5 | -------------------------------------------------------------------------------- /ans/g4pE_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: search, ba, bayesia, scenar, engin, term, wa, call, sidf, eval 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:58) 4 | Search ended, found 0 plagiated blocks (01:22:58) 5 | -------------------------------------------------------------------------------- /ans/g4pE_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: us, dyn, progr, languag, comput, solv, develop, opt, new, mathematicia 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:59) 4 | Search ended, found 0 plagiated blocks (01:22:59) 5 | -------------------------------------------------------------------------------- /ans/g0pA_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: student, class, would, postgrad, nod, new, inher, relationship, kind, extend 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:45) 4 | Search ended, found 0 plagiated blocks (01:22:45) 5 | -------------------------------------------------------------------------------- /ans/g1pD_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: vector, spac, two, document, us, higher, retr, repres, inform, compar 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:49) 4 | Search ended, found 0 plagiated blocks (01:22:49) 5 | -------------------------------------------------------------------------------- /ans/g1pD_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probabl, us, record, giv, jo, event, correct, lik, calcul, theor 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:50) 4 | Search ended, found 0 plagiated blocks (01:22:50) 5 | -------------------------------------------------------------------------------- /ans/g2pA_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: pagerank, link, pag, rat, googl, result, vot, webs, algorithm, actua 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:51) 4 | Search ended, found 0 plagiated blocks (01:22:51) 5 | -------------------------------------------------------------------------------- /ans/g2pA_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: solut, subprobl, opt, progr, dyn, comput, solv, val, algorithm, correct 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:53) 4 | Search ended, found 0 plagiated blocks (01:22:53) 5 | -------------------------------------------------------------------------------- /ans/g2pE_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: retr, inform, document, scienc, search, syst, us, librar, provid, wid 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:52) 4 | Search ended, found 0 plagiated blocks (01:22:52) 5 | -------------------------------------------------------------------------------- /ans/g2pE_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probabl, event, theor, occur, number, tota, usua, val, relat, cond 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:53) 4 | Search ended, found 0 plagiated blocks (01:22:53) 5 | -------------------------------------------------------------------------------- /ans/g3pC_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probabl, theor, giv, cond, ba, prior, comput, us, probabilt, observ 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:55) 4 | Search ended, found 0 plagiated blocks (01:22:55) 5 | -------------------------------------------------------------------------------- /ans/g4pC_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: web, algorithm, pagerank, pag, link, search, googl, val, us, sinc 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:57) 4 | Search ended, found 0 plagiated blocks (01:22:57) 5 | -------------------------------------------------------------------------------- /ans/g4pC_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: document, uniq, word, elimin, identif, extract, repres, vector, step, ide 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:57) 4 | Search ended, found 0 plagiated blocks (01:22:57) 5 | -------------------------------------------------------------------------------- /ans/g0pC_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: vector, document, inord, similar, word, cosin, comput, approach, find, dimens 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:47) 4 | Search ended, found 0 plagiated blocks (01:22:47) 5 | -------------------------------------------------------------------------------- /ans/g0pD_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: prob, volum, avoid, connect, sinc, import, cond, probabl, calcul, inform 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:47) 4 | Search ended, found 0 plagiated blocks (01:22:47) 5 | -------------------------------------------------------------------------------- /ans/g1pA_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: object, inher, class, propert, new, process, subclass, orient, relationship, similar 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:48) 4 | Search ended, found 0 plagiated blocks (01:22:48) 5 | -------------------------------------------------------------------------------- /ans/g1pB_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: pag, link, rank, popular, algorithm, number, rand, outbound, webs, divid 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:49) 4 | Search ended, found 0 plagiated blocks (01:22:49) 5 | -------------------------------------------------------------------------------- /ans/g2pB_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: pagerank, googl, us, sit, pag, factor, number, popular, keyword, algorithm 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:51) 4 | Search ended, found 0 plagiated blocks (01:22:52) 5 | -------------------------------------------------------------------------------- /ans/g2pC_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: link, pag, pagerank, import, webpag, outbound, man, accord, us, method 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:52) 4 | Search ended, found 0 plagiated blocks (01:22:52) 5 | -------------------------------------------------------------------------------- /ans/g3pB_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: inher, defin, attribut, subclass, two, repres, behaviour, class, said, characterist 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:54) 4 | Search ended, found 0 plagiated blocks (01:22:54) 5 | -------------------------------------------------------------------------------- /ans/g3pC_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: pag, us, link, pagerank, algorithm, import, wid, googl, rank, structur 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:54) 4 | Search ended, found 0 plagiated blocks (01:22:55) 5 | -------------------------------------------------------------------------------- /ans/g4pD_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: pagerank, probabl, val, distribut, document, collect, pag, init, would, assum 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:57) 4 | Search ended, found 0 plagiated blocks (01:22:57) 5 | -------------------------------------------------------------------------------- /ans/g4pD_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: theor, probabl, subjectivist, us, ba, cond, scienc, insight, clarif, centra 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:58) 4 | Search ended, found 0 plagiated blocks (01:22:58) 5 | -------------------------------------------------------------------------------- /ans/g0pC_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: class, inher, cod, wa, new, similar, orient, cheater, als, genera 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:45) 4 | Search ended, found 1 plagiated blocks (01:22:45) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g0pE_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: inher, class, appl, fru, new, cod, call, genera, ancestor, shar 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:45) 4 | Search ended, found 2 plagiated blocks (01:22:45) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g2pC_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: inher, class, object, cod, new, ancestor, defin, access, us, singl 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:51) 4 | Search ended, found 2 plagiated blocks (01:22:51) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g2pE_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: class, inher, appl, cod, known, fru, new, genera, process, usua 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:51) 4 | Search ended, found 2 plagiated blocks (01:22:51) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g3pB_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: progr, opt, subprobl, us, probl, sa, mea, plan, dyn, term 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:56) 4 | Search ended, found 2 plagiated blocks (01:22:56) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g4pC_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: inher, class, appl, fru, new, cod, call, genera, shar, categor 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:56) 4 | Search ended, found 2 plagiated blocks (01:22:56) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g0pB_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: opt, progr, us, path, comput, probl, method, on, subprobl, process 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:48) 4 | Search ended, found 2 plagiated blocks (01:22:48) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g0pC_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: progr, dyn, probl, opt, comput, term, action, bellma, solv, best 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:48) 4 | Search ended, found 1 plagiated blocks (01:22:48) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g0pD_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: class, appl, inher, fru, new, ancestor, genera, exist, expos, orang 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:45) 4 | Search ended, found 2 plagiated blocks (01:22:45) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g1pB_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probabl, event, theor, giv, us, ba, doctor, relat, wa, cond 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:50) 4 | Search ended, found 1 plagiated blocks (01:22:50) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g1pB_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: progr, probl, solv, need, sub, approach, dyn, opt, function, on 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:50) 4 | Search ended, found 1 plagiated blocks (01:22:50) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g1pD_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: inher, us, class, new, method, occasion, du, genera, propert, advantag 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:48) 4 | Search ended, found 1 plagiated blocks (01:22:49) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g3pC_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: appl, inher, fru, class, new, genera, instanc, expos, ancestor, orang 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:54) 4 | Search ended, found 2 plagiated blocks (01:22:54) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g4pD_taska_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: inher, class, provid, modul, refer, cod, basic, econom, cogn, genera 2 | Keywords for text 2: inher, class, appl, fru, new, cod, call, genera, shar, categor 3 | Searching for plagiated blocks (01:22:56) 4 | Search ended, found 1 plagiated blocks (01:22:56) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g4pD_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: comput, progr, subprobl, opt, term, overlap, us, mea, solv, dyn 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:58) 4 | Search ended, found 1 plagiated blocks (01:22:59) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g0pA_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: term, vector, document, word, model, us, match, repres, spac, val 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:46) 4 | Search ended, found 2 plagiated blocks (01:22:47) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g0pB_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probabl, theor, ba, cond, giv, prior, margin, form, two, der 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:47) 4 | Search ended, found 2 plagiated blocks (01:22:47) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g0pD_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: term, vector, word, document, index, inform, longer, val, wa, keyword 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:47) 4 | Search ended, found 1 plagiated blocks (01:22:47) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g0pE_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: progr, action, opt, plan, dyn, method, exhib, schedul, event, propert 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:48) 4 | Search ended, found 1 plagiated blocks (01:22:48) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g1pB_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: document, vector, term, us, val, word, keyword, match, inform, spac 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:49) 4 | Search ended, found 2 plagiated blocks (01:22:49) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g2pB_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: vector, model, spac, term, us, differ, document, word, possibl, val 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:52) 4 | Search ended, found 1 plagiated blocks (01:22:52) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g2pB_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: opt, progr, us, probl, solut, solv, process, comput, find, subprobl 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:53) 4 | Search ended, found 3 plagiated blocks (01:22:53) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g2pC_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probabl, theor, giv, student, wear, ba, trouser, us, cond, girl 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:53) 4 | Search ended, found 1 plagiated blocks (01:22:53) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g3pA_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: document, vector, term, repres, model, match, val, us, word, quer 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:55) 4 | Search ended, found 3 plagiated blocks (01:22:55) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g3pB_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: vector, document, model, term, val, spac, word, within, bas, occur 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:55) 4 | Search ended, found 1 plagiated blocks (01:22:55) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g3pB_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: ba, theor, probabl, test, us, drug, identif, tim, how, connect 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:55) 4 | Search ended, found 1 plagiated blocks (01:22:55) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g3pC_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: progr, opt, thus, comput, dyn, method, mathemat, plan, schedul, utilis 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:56) 4 | Search ended, found 1 plagiated blocks (01:22:56) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g4pB_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: term, vector, document, val, word, inform, model, equa, cosinus, angl 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:57) 4 | Search ended, found 1 plagiated blocks (01:22:57) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g4pB_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: opt, subprobl, comput, progr, solut, probl, us, solv, approach, overlap 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:58) 4 | Search ended, found 4 plagiated blocks (01:22:58) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g4pC_taske_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: opt, progr, us, probl, substructur, comput, solut, bellma, dyn, process 2 | Keywords for text 2: opt, subprobl, comput, probl, solut, progr, solv, us, need, overlap 3 | Searching for plagiated blocks (01:22:58) 4 | Search ended, found 2 plagiated blocks (01:22:58) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g0pB_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: vector, document, term, word, repres, spac, model, keyword, weight, match 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:47) 4 | Search ended, found 3 plagiated blocks (01:22:47) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g1pA_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: document, term, vector, model, us, known, repres, weight, val, frequenc 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:49) 4 | Search ended, found 1 plagiated blocks (01:22:49) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g1pA_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probabl, event, ba, first, happ, als, cond, bayesia, second, express 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:49) 4 | Search ended, found 1 plagiated blocks (01:22:50) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g2pA_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: term, document, vector, word, val, match, repres, inform, differ, spac 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:52) 4 | Search ended, found 2 plagiated blocks (01:22:52) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g2pA_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: student, probabl, wear, ba, girl, observ, theor, giv, trouser, comput 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:52) 4 | Search ended, found 1 plagiated blocks (01:22:52) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g4pB_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probabl, theor, ba, giv, margin, cond, prior, observ, usua, relat 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:57) 4 | Search ended, found 2 plagiated blocks (01:22:57) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g4pE_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: pag, pagerank, link, googl, import, algorithm, patent, web, vot, numer 2 | Keywords for text 2: pagerank, patent, pag, link, googl, wid, numer, process, web, hyperlink 3 | Searching for plagiated blocks (01:27:53) 4 | Search ended, found 4 plagiated blocks (01:27:53) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g4pE_taskc_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: vector, spac, model, enhanc, document, term, word, depend, similar, genera 2 | Keywords for text 2: term, document, vector, word, model, val, repres, inform, spac, match 3 | Searching for plagiated blocks (01:22:57) 4 | Search ended, found 1 plagiated blocks (01:22:57) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g0pA_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: pagerank, pag, algorithm, googl, numer, assign, weight, search, denot, als 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:46) 4 | Search ended, found 4 plagiated blocks (01:22:46) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g0pA_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probabl, ba, theor, student, frequentist, rand, us, bayesia, fema, wear 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:47) 4 | Search ended, found 1 plagiated blocks (01:22:47) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g0pC_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probabl, theor, ba, bayesia, frequentist, comput, rand, debat, us, assign 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:47) 4 | Search ended, found 1 plagiated blocks (01:22:47) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g0pE_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: pagerank, googl, us, within, algorithm, link, measur, director, search, val 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:46) 4 | Search ended, found 1 plagiated blocks (01:22:46) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g1pD_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: pagerank, probabl, document, link, rand, numer, click, damp, val, person 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:49) 4 | Search ended, found 2 plagiated blocks (01:22:49) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g2pB_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probabl, theor, ba, giv, cond, previous, bayesia, frequentist, rand, debat 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:53) 4 | Search ended, found 2 plagiated blocks (01:22:53) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g2pE_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: patent, googl, pagerank, univers, assign, million, us, shar, numer, element 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:52) 4 | Search ended, found 1 plagiated blocks (01:22:52) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g3pA_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probabl, theor, ba, margin, giv, cond, observ, bayesia, frequentist, prior 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:55) 4 | Search ended, found 2 plagiated blocks (01:22:55) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g4pC_taskd_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: probabl, theor, ba, giv, margin, cond, bayesia, frequentist, observ, prior 2 | Keywords for text 2: probabl, theor, ba, margin, giv, cond, bayesia, frequentist, observ, prior 3 | Searching for plagiated blocks (01:22:57) 4 | Search ended, found 3 plagiated blocks (01:22:58) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /ans/g0pD_taskb_ans.txt: -------------------------------------------------------------------------------- 1 | Keywords for text 1: googl, patent, algorithm, measur, stanford, us, purpos, search, pagerank, hyperlink 2 | Keywords for text 2: pag, pagerank, link, googl, patent, algorithm, web, import, numer, assign 3 | Searching for plagiated blocks (01:22:46) 4 | Search ended, found 1 plagiated blocks (01:22:46) 5 | 6 | All plagiated blocks were written to log.txt 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | plagiarism 2 | ========== 3 | 4 | Requirements: 5 | * Python 3.3.1 6 | * pdfminer3k: https://pypi.python.org/pypi/pdfminer3k/ 7 | * distribute: https://pypi.python.org/pypi/distribute 8 | * nltk: https://github.com/nltk/nltk/ 9 | * nltk-data (stopwords): http://nltk.org/data.html 10 | * bs4: http://www.crummy.com/software/BeautifulSoup/ 11 | 12 | Usage: 13 | 14 | To search for plagiated documents in google: 15 | 16 | main.py localfile.txt 17 | main.py localfile.pdf 18 | main.py http://example.ru/somefile.txt 19 | main.py ftp://example.com/somefile.pdf 20 | 21 | Press Ctrl-C to skip any file you don't want to test 22 | 23 | To compare two documents: 24 | 25 | main.py localfile.txt ftp://example.com/somefile.pdf 26 | main.py http://example.ru/somefile.txt localfile.pdf 27 | main.py localfile1.pdf localfile2.pdf 28 | 29 | Notice, that somefile.txt must have 'utf-8' encoding. 30 | To change encoding search for data.decode('utf-8') in plagiarism.py 31 | 32 | English or russian documents expected. For other languages just change global langs variable in main.py 33 | -------------------------------------------------------------------------------- /ssk.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import math 5 | 6 | class SSK: 7 | def __init__(self, s, t, l = 0.5): 8 | self.s = s 9 | self.t = t 10 | self.l = l 11 | 12 | def __solve(self, s, t, p): 13 | result = [0.0 for x in range(0, p)] 14 | 15 | n = len(s) 16 | m = len(t) 17 | 18 | if p > max(n, m): 19 | return result 20 | 21 | d10 = [[0.0 for x in range(0, m + 1)] for x in range(0, n + 1)] 22 | d20 = [[0.0 for x in range(0, m + 1)] for x in range(0, n + 1)] 23 | d11 = [[0.0 for x in range(0, m + 1)] for x in range(0, n + 1)] 24 | d21 = [[0.0 for x in range(0, m + 1)] for x in range(0, n + 1)] 25 | d0 = [0.0 for x in range(0, n + 1)] 26 | 27 | for i in range(0, n + 1): 28 | for j in range(0, m + 1): 29 | d10[i][j] = 1.0 30 | 31 | l = self.l 32 | l2 = l * l 33 | d0[0] = 0 34 | 35 | for i in range(1, n + 1): 36 | d0[i] = d0[i - 1] 37 | for j in range(0, m): 38 | if t[j] == s[i - 1]: 39 | d0[i] += d10[i - 1][j] * l2 40 | 41 | result[0] = d0[n] 42 | 43 | for k in range(1, p): 44 | for j in range(0, m + 1): 45 | d21[k - 1][j] = 0.0 46 | for i in range(k, n + 1): 47 | for j in range(0, k): 48 | d21[i][j] = 0.0 49 | for j in range(k, m + 1): 50 | if s[i - 1] == t[j - 1]: 51 | d21[i][j] = l * (d21[i][j - 1] + l * d10[i - 1][j - 1]) 52 | else: 53 | d21[i][j] = l * (d21[i][j - 1]) 54 | 55 | for j in range(0, m + 1): 56 | d11[k - 1][j] = 0.0 57 | for i in range(k, n + 1): 58 | for j in range(0, k): 59 | d11[i][j] = 0.0 60 | for j in range(k, m + 1): 61 | d11[i][j] = l * d11[i - 1][j] + d21[i][j] 62 | 63 | d11, d10 = d10, d11 64 | d21, d20 = d20, d21 65 | 66 | for i in range(0, n + 1): 67 | d0[i] = 0.0 68 | for i in range(k + 1, n + 1): 69 | d0[i] = d0[i - 1] 70 | for j in range(0, m): 71 | if t[j] == s[i - 1]: 72 | d0[i] += d10[i - 1][j] * l2 73 | result[k] = d0[n] 74 | 75 | return result 76 | 77 | def solve(self, p): 78 | st = self.__solve(self.s, self.t, p) 79 | ss = self.__solve(self.s, self.s, p) 80 | tt = self.__solve(self.t, self.t, p) 81 | try: 82 | return st[p-1] / math.sqrt(ss[p-1] * tt[p-1]) 83 | except ZeroDivisionError: 84 | pass 85 | return 0.0 -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys, re, string 5 | from plagiarism import downloadfile, readfile, getkeywords, evaluate, timestr 6 | from network import google 7 | 8 | langs = ["english", "russian"] 9 | 10 | def writelog(log, file1, keywords1, file2, keywords2, blocks, local = False): 11 | log.write("=" * 30 + "\n") 12 | log.write("Source file: " + file1 + "\n") 13 | log.write("Keywords 1: " + ", ".join(keywords1) + "\n") 14 | if local: 15 | log.write("Source file 2: " + file2 + "\n") 16 | else: 17 | log.write("Googled file: " + file2 + "\n") 18 | log.write("Keywords 2: " + ", ".join(keywords2) + "\n") 19 | for ssk, s, t in blocks: 20 | log.write("-" * 30 + "\n") 21 | log.write("Plagiated block with ssk: " + "%0.5f\n" % ssk) 22 | log.write("Source: " + " ".join(s) + "\n") 23 | log.write("Googled: " + " ".join(t) + "\n") 24 | 25 | def main(argc, argv): 26 | if argc < 2: 27 | print("No input file specified") 28 | return 29 | 30 | if argv[1].startswith('http://') or argv[1].startswith('ftp://'): 31 | text = downloadfile(argv[1]) 32 | else: 33 | text = readfile(argv[1]) 34 | if text == None: 35 | print("File don't exist or do not have .pdf or .txt extension") 36 | return 37 | 38 | log = open("log.txt", 'w') 39 | 40 | if (argc == 3): 41 | if argv[2].startswith('http://') or argv[2].startswith('ftp://'): 42 | text2 = downloadfile(argv[2]) 43 | else: 44 | text2 = readfile(argv[2]) 45 | keywords = getkeywords(text, langs=langs) 46 | keywords2 = getkeywords(text2, langs=langs) 47 | print("Keywords for text 1: ", ", ".join(keywords)) 48 | print("Keywords for text 2: ", ", ".join(keywords2)) 49 | print("Searching for plagiated blocks ({0})".format(timestr())) 50 | blocks = evaluate(text, text2, langs=langs, debug=False) 51 | print("Search ended, found {0} plagiated blocks ({1})".format(len(blocks), timestr())) 52 | if len(blocks) > 0: 53 | writelog(log, argv[1], keywords, argv[2], keywords2, blocks, local=True) 54 | print("\nAll plagiated blocks were written to log.txt") 55 | log.close() 56 | return 57 | 58 | keywords = getkeywords(text, langs=langs) 59 | print("Keywords for source: " + ", ".join(keywords)) 60 | query = "filetype:pdf " + " ".join(keywords) 61 | print("Googling: ", query) 62 | g = google(query) 63 | results = g.get_results(0) 64 | if len(results) == 0: 65 | print("Sorry, googling failed, maybe we are banned") 66 | return 67 | print("Google'd ", len(results), " documents:") 68 | for i, result in enumerate(results): 69 | print(str(i+1) + ": " + result['url']) 70 | 71 | for i, result in enumerate(results): 72 | try: 73 | print("\nProcessing file " + str(i+1) + ": " + result['url']) 74 | text2 = downloadfile(result['url']) 75 | if text2 == None: 76 | print("This file appears to be invalid .pdf file") 77 | continue 78 | keywords2 = getkeywords(text2, langs=langs) 79 | print("Keywords: ", ", ".join(keywords2)) 80 | print("Searching for plagiated blocks ({0})".format(timestr())) 81 | blocks = evaluate(text, text2, langs=langs, debug=False) 82 | print("Search ended, found {0} plagiated blocks ({1})".format(len(blocks), timestr())) 83 | except KeyboardInterrupt: 84 | print("Interrupted by User") 85 | pass 86 | continue 87 | if len(blocks) > 0: 88 | writelog(log, argv[1], keywords, result['url'], keywords2, blocks) 89 | print("\nAll plagiated blocks were written to log.txt") 90 | log.close() 91 | 92 | if __name__ == "__main__": 93 | main(len(sys.argv), sys.argv) -------------------------------------------------------------------------------- /network.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import urllib.request 5 | import random 6 | from bs4 import BeautifulSoup 7 | import urllib.parse 8 | import re 9 | 10 | browsers = [ 11 | 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6', 12 | 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.6) Gecko/2009011912 Firefox/3.0.6', 13 | 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6 (.NET CLR 3.5.30729)', 14 | 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.6) Gecko/2009020911 Ubuntu/8.10 (intrepid) Firefox/3.0.6', 15 | 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6', 16 | 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6 (.NET CLR 3.5.30729)', 17 | 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.48 Safari/525.19', 18 | 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648)', 19 | 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.0.6) Gecko/2009020911 Ubuntu/8.10 (intrepid) Firefox/3.0.6', 20 | 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.5) Gecko/2008121621 Ubuntu/8.04 (hardy) Firefox/3.0.5', 21 | 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1', 22 | 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)', 23 | 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)', 24 | 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)' 25 | ] 26 | 27 | def download(url): 28 | if (len(url) < 3): 29 | return None 30 | headers = { 31 | 'User-Agent': browsers[random.randint(0, len(browsers) - 1)], 32 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 33 | 'Accept-Language': 'en-us,en;q=0.5' 34 | } 35 | try: 36 | request = urllib.request.Request(url=url, headers=headers) 37 | response = urllib.request.urlopen(request, timeout = 5) 38 | data = response.read() 39 | return data 40 | except: 41 | pass 42 | return None 43 | 44 | class google: 45 | SEARCH_URL = "http://www.google.%(tld)s/search?hl=%(lang)s&newwindow=1&output=search&sclient=psy-ab&q=%(query)s" 46 | NEXT_PAGE = "http://www.google.%(tld)s/search?hl=%(lang)s&newwindow=1&q=%(query)s&start=%(start)d&sa=N" 47 | 48 | def __init__(self, query, lang="en", tld="com"): 49 | self.query = query 50 | self.lang = lang 51 | self.tld = tld 52 | self.num = 10 53 | 54 | def __fetch_page(self, page): 55 | pattern = google.SEARCH_URL 56 | if page > 0: 57 | pattern = google.NEXT_PAGE 58 | 59 | url = pattern % { 60 | 'query': urllib.parse.quote_plus(self.query), 61 | 'start': page * self.num, 62 | 'tld' : self.tld, 63 | 'lang' : self.lang 64 | } 65 | 66 | return download(url) 67 | 68 | def get_results(self, page): 69 | text = self.__fetch_page(page) 70 | soup = BeautifulSoup(text) 71 | 72 | res = [] 73 | results = soup.findAll('li', {'class': 'g'}) 74 | 75 | for result in results: 76 | try: 77 | a = result.find('a') 78 | name = a.text 79 | match = re.match(r'/url\?q=(http[^&]+)&', a['href']) 80 | url = urllib.parse.unquote(match.group(1)) 81 | res.append({"name" : name, "url" : url}) 82 | except: 83 | pass 84 | continue 85 | 86 | return res -------------------------------------------------------------------------------- /plagiarism.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from io import StringIO, BytesIO 5 | import sys, re, string, logging 6 | from collections import defaultdict 7 | from datetime import datetime 8 | 9 | from pdfminer.pdfinterp import PDFResourceManager, process_pdf 10 | from pdfminer.converter import TextConverter 11 | 12 | from nltk.corpus import stopwords 13 | from nltk.tokenize import TreebankWordTokenizer 14 | from nltk.stem import SnowballStemmer 15 | import nltk.data 16 | 17 | from network import download 18 | from ssk import SSK 19 | 20 | def timestr(): 21 | return datetime.now().strftime('%H:%M:%S') 22 | 23 | def decodepdf(fp, debug = False): 24 | with StringIO() as outfp: 25 | rsrcmgr = PDFResourceManager() 26 | device = TextConverter(rsrcmgr, outfp) 27 | logging.disable(logging.WARNING) 28 | if debug: print("processing pdf begin ({0})".format(timestr())) 29 | process_pdf(rsrcmgr, device, fp) 30 | if debug: print("processing pdf ended ({0})".format(timestr())) 31 | logging.disable(logging.NOTSET) 32 | return outfp.getvalue() 33 | 34 | def decodetxt(fp): 35 | return fp.read() 36 | 37 | def readfile(file, debug = False): 38 | try: 39 | if file.endswith('.txt'): 40 | with open(file, 'r') as fp: 41 | return fp.read() 42 | elif file.endswith('.pdf'): 43 | with open(file, 'rb') as fp: 44 | return decodepdf(fp, debug=debug) 45 | except KeyboardInterrupt: 46 | raise 47 | except: 48 | pass 49 | 50 | def downloadfile(url, debug = False): 51 | data = download(url) 52 | try: 53 | if url.endswith('.txt'): 54 | return data.decode('utf-8') 55 | elif url.endswith('.pdf'): 56 | return decodepdf(BytesIO(data), debug=debug) 57 | except KeyboardInterrupt: 58 | raise 59 | except: 60 | pass 61 | 62 | words_cache = {} 63 | 64 | def getwords(text, langs=["english", "russian"], debug = False): 65 | key = (text, tuple(langs)) 66 | if (key in words_cache): 67 | if debug: print("found words in cache") 68 | return words_cache[key] 69 | punct = re.compile('[%s0-9\–]' % re.escape(string.punctuation)) 70 | 71 | if debug: print("tokenize begin ({0})".format(timestr())) 72 | words = TreebankWordTokenizer().tokenize(str(text)); 73 | if debug: print("tokenize ended ({0})".format(timestr())) 74 | 75 | if debug: print("del short words begin ({0})".format(timestr())) 76 | words[:] = [word for word in words if len(word)>2] 77 | if debug: print("del short words ended ({0})".format(timestr())) 78 | 79 | if debug: print("punctuation begin ({0})".format(timestr())) 80 | words[:] = [word for word in words if punct.sub("", word) == word] 81 | if debug: print("punctuation ended ({0})".format(timestr())) 82 | 83 | if debug: print("stopwords begin ({0})".format(timestr())) 84 | words[:] = [word.lower() for word in words] 85 | stops = [stopwords.words(lang) for lang in langs] 86 | for stop in stops: 87 | words[:] = [word for word in words if word not in stop] 88 | if debug: print("stopwords ended ({0})".format(timestr())) 89 | 90 | if debug: print("stemming begin ({0})".format(timestr())) 91 | stemmers = [SnowballStemmer(lang) for lang in langs] 92 | for stemmer in stemmers: 93 | words[:] = [stemmer.stem(word) for word in words] 94 | if debug: print("stemming ended ({0})".format(timestr())) 95 | 96 | words_cache[key] = words 97 | return words 98 | 99 | def getkeywords(text, langs=["english", "russian"], num = 10, debug = False): 100 | 101 | words = getwords(text, langs, debug=debug) 102 | 103 | wordsCount = defaultdict(int) 104 | for word in words: 105 | wordsCount[word] += 1 106 | 107 | if debug: print("sorting begin ({0})".format(timestr())) 108 | words = sorted(wordsCount.items(), key=lambda x: x[1], reverse=True)[:num] 109 | if debug: print("sorting ended ({0})".format(timestr())) 110 | 111 | words[:] = [word for (word, cnt) in words] 112 | 113 | return words 114 | 115 | def evaluate(text1, text2, langs = ["english", "russian"], debug = False): 116 | text1 = getwords(text1, langs=langs, debug=debug) 117 | text2 = getwords(text2, langs=langs, debug=debug) 118 | 119 | block_sz = 100 120 | threshold = 0.1 121 | text1s = [text1[i:i+block_sz] for i in range(0, len(text1), block_sz)] 122 | text2s = [text2[i:i+block_sz] for i in range(0, len(text2), block_sz)] 123 | 124 | blocks = [] 125 | for s in text1s: 126 | for t in text2s: 127 | res = SSK(s, t).solve(3) 128 | if res > threshold: 129 | blocks.append((res, s, t)) 130 | 131 | return blocks --------------------------------------------------------------------------------