├── .gradle ├── 4.3 │ ├── fileChanges │ │ └── last-build.bin │ ├── fileHashes │ │ ├── fileHashes.bin │ │ ├── fileHashes.lock │ │ └── resourceHashesCache.bin │ ├── taskHistory │ │ ├── taskHistory.bin │ │ └── taskHistory.lock │ └── fileContent │ │ ├── fileContent.lock │ │ └── annotation-processors.bin └── buildOutputCleanup │ ├── cache.properties │ ├── outputFiles.bin │ └── buildOutputCleanup.lock ├── .gitattributes ├── bin ├── Library.class ├── main │ └── Main.class ├── LibraryTest.class ├── base │ ├── Crawler.class │ ├── Element.class │ ├── PageRank.class │ ├── Scrapper.class │ ├── MongoConnect.class │ ├── SearchQuery.class │ └── SortedPageRank.class ├── methods │ └── Jakobi.class └── utils │ ├── SaveOpen.class │ └── SaveTest.class ├── .settings ├── org.eclipse.buildship.core.prefs └── org.eclipse.core.resources.prefs ├── src ├── main │ └── java │ │ ├── images │ │ └── logo.png │ │ ├── Library.java │ │ ├── utils │ │ ├── SaveTest.java │ │ └── SaveOpen.java │ │ ├── methods │ │ └── Jakobi.java │ │ ├── base │ │ ├── SortedPageRank.java │ │ ├── MongoConnect.java │ │ ├── Scrapper.java │ │ ├── Element.java │ │ ├── Crawler.java │ │ ├── SearchQuery.java │ │ └── PageRank.java │ │ └── main │ │ └── Main.java └── test │ └── java │ └── LibraryTest.java ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── README.md ├── .classpath ├── settings.gradle ├── .project ├── gradlew.bat └── gradlew /.gradle/4.3/fileChanges/last-build.bin: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /bin/Library.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/bin/Library.class -------------------------------------------------------------------------------- /bin/main/Main.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/bin/main/Main.class -------------------------------------------------------------------------------- /.gradle/buildOutputCleanup/cache.properties: -------------------------------------------------------------------------------- 1 | #Sat Jan 27 02:17:25 CET 2018 2 | gradle.version=4.3 3 | -------------------------------------------------------------------------------- /bin/LibraryTest.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/bin/LibraryTest.class -------------------------------------------------------------------------------- /bin/base/Crawler.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/bin/base/Crawler.class -------------------------------------------------------------------------------- /bin/base/Element.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/bin/base/Element.class -------------------------------------------------------------------------------- /.settings/org.eclipse.buildship.core.prefs: -------------------------------------------------------------------------------- 1 | #Sat Jan 27 02:17:37 CET 2018 2 | connection.project.dir= 3 | -------------------------------------------------------------------------------- /.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding/=UTF-8 3 | -------------------------------------------------------------------------------- /bin/base/PageRank.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/bin/base/PageRank.class -------------------------------------------------------------------------------- /bin/base/Scrapper.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/bin/base/Scrapper.class -------------------------------------------------------------------------------- /bin/methods/Jakobi.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/bin/methods/Jakobi.class -------------------------------------------------------------------------------- /bin/utils/SaveOpen.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/bin/utils/SaveOpen.class -------------------------------------------------------------------------------- /bin/utils/SaveTest.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/bin/utils/SaveTest.class -------------------------------------------------------------------------------- /bin/base/MongoConnect.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/bin/base/MongoConnect.class -------------------------------------------------------------------------------- /bin/base/SearchQuery.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/bin/base/SearchQuery.class -------------------------------------------------------------------------------- /bin/base/SortedPageRank.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/bin/base/SortedPageRank.class -------------------------------------------------------------------------------- /src/main/java/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/src/main/java/images/logo.png -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /.gradle/4.3/fileHashes/fileHashes.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/.gradle/4.3/fileHashes/fileHashes.bin -------------------------------------------------------------------------------- /.gradle/4.3/fileHashes/fileHashes.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/.gradle/4.3/fileHashes/fileHashes.lock -------------------------------------------------------------------------------- /.gradle/4.3/taskHistory/taskHistory.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/.gradle/4.3/taskHistory/taskHistory.bin -------------------------------------------------------------------------------- /.gradle/4.3/fileContent/fileContent.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/.gradle/4.3/fileContent/fileContent.lock -------------------------------------------------------------------------------- /.gradle/4.3/taskHistory/taskHistory.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/.gradle/4.3/taskHistory/taskHistory.lock -------------------------------------------------------------------------------- /.gradle/buildOutputCleanup/outputFiles.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/.gradle/buildOutputCleanup/outputFiles.bin -------------------------------------------------------------------------------- /.gradle/4.3/fileHashes/resourceHashesCache.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/.gradle/4.3/fileHashes/resourceHashesCache.bin -------------------------------------------------------------------------------- /.gradle/4.3/fileContent/annotation-processors.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/.gradle/4.3/fileContent/annotation-processors.bin -------------------------------------------------------------------------------- /.gradle/buildOutputCleanup/buildOutputCleanup.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciganche/Wiki-page-rank/HEAD/.gradle/buildOutputCleanup/buildOutputCleanup.lock -------------------------------------------------------------------------------- /src/main/java/Library.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This Java source file was generated by the Gradle 'init' task. 3 | */ 4 | public class Library { 5 | public boolean someLibraryMethod() { 6 | return true; 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | zipStoreBase=GRADLE_USER_HOME 4 | zipStorePath=wrapper/dists 5 | distributionUrl=https\://services.gradle.org/distributions/gradle-4.3-bin.zip 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WikiPageRank 2 | 3 | #### Language: Java 4 | 5 | #### Date: 20.2.2018 6 | 7 | #### Project Description: 8 | - Numerical Algorithams and Numerical Software student project 9 | - This program crawls trough 8000 Wikipedia pages from a seed page and uses gathered relational data to calcualte PageRank of every page using the Jacobi method. 10 | -------------------------------------------------------------------------------- /src/test/java/LibraryTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This Java source file was generated by the Gradle 'init' task. 3 | */ 4 | import org.junit.Test; 5 | import static org.junit.Assert.*; 6 | 7 | public class LibraryTest { 8 | @Test public void testSomeLibraryMethod() { 9 | Library classUnderTest = new Library(); 10 | assertTrue("someLibraryMethod should return 'true'", classUnderTest.someLibraryMethod()); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This settings file was generated by the Gradle 'init' task. 3 | * 4 | * The settings file is used to specify which projects to include in your build. 5 | * In a single project build this file can be empty or even removed. 6 | * 7 | * Detailed information about configuring a multi-project build in Gradle can be found 8 | * in the user guide at https://docs.gradle.org/4.3/userguide/multi_project_builds.html 9 | */ 10 | 11 | /* 12 | // To declare projects as part of a multi-project build use the 'include' method 13 | include 'shared' 14 | include 'api' 15 | include 'services:webservice' 16 | */ 17 | 18 | rootProject.name = 'EndzinSrc' 19 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | EndzinSrc 4 | Project EndzinSrc created by Buildship. 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.buildship.core.gradleprojectbuilder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.buildship.core.gradleprojectnature 22 | 23 | 24 | -------------------------------------------------------------------------------- /src/main/java/utils/SaveTest.java: -------------------------------------------------------------------------------- 1 | package utils; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileOutputStream; 6 | import java.io.FileWriter; 7 | import java.io.IOException; 8 | import java.io.OutputStreamWriter; 9 | import java.io.PrintWriter; 10 | import java.io.UnsupportedEncodingException; 11 | import java.io.Writer; 12 | 13 | public class SaveTest { 14 | 15 | 16 | private static PrintWriter writer = null; 17 | 18 | 19 | public SaveTest() { 20 | 21 | try { 22 | writer = new PrintWriter("query.txt", "UTF-8"); 23 | } catch (FileNotFoundException | UnsupportedEncodingException e) { 24 | // TODO Auto-generated catch block 25 | e.printStackTrace(); 26 | } 27 | } 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | public void upisi(String s) throws IOException 36 | { 37 | System.out.println(s); 38 | 39 | writer.write(s); 40 | } 41 | 42 | 43 | 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/methods/Jakobi.java: -------------------------------------------------------------------------------- 1 | package methods; 2 | 3 | import java.util.Arrays; 4 | 5 | import javax.swing.plaf.synth.SynthSpinnerUI; 6 | 7 | public class Jakobi { 8 | 9 | public static float[] racunaj(float[][] A, float[] b, float[] x0, int itMax, float errMax) { 10 | int n = b.length; 11 | float[] x = new float[n]; 12 | float err; 13 | float sum; 14 | 15 | System.out.println("Jakobi starts."); 16 | for (int it = 0; it < itMax; it++ ){ 17 | err = 0; 18 | for (int i = 0; i < n; i++) { 19 | sum = 0; 20 | for (int j = 0; j < i-1; j++) 21 | sum = sum + A[i][j]*x0[j]; 22 | for (int j = i+1; j < n; j++) 23 | sum = sum + A[i][j]*x0[j]; 24 | 25 | x[i] = (b[i] - sum)/A[i][i]; 26 | 27 | err = err + ((x[i]-x0[i])*(x[i]-x0[i])); 28 | 29 | } 30 | System.out.println("Iteration["+it+"] --- err: " + err); 31 | for (int i = 0; i < n; i++) 32 | x0[i] = x[i]; 33 | 34 | if (err < errMax*errMax) 35 | return x; 36 | } 37 | 38 | return x; 39 | } 40 | 41 | } -------------------------------------------------------------------------------- /src/main/java/base/SortedPageRank.java: -------------------------------------------------------------------------------- 1 | package base; 2 | 3 | import java.io.Serializable; 4 | 5 | public class SortedPageRank implements Comparable, Serializable { 6 | 7 | int rbr; 8 | int value; 9 | 10 | private String naziv; 11 | private String opis; 12 | private String URL; 13 | 14 | 15 | public SortedPageRank(int rbr, int value){ 16 | this.rbr = rbr; 17 | this.value = value; 18 | } 19 | 20 | public int compareTo(SortedPageRank o) { 21 | return this.value - o.value; 22 | } 23 | 24 | public int getValue(){ 25 | return value; 26 | } 27 | 28 | 29 | public int getRbr(){ 30 | return rbr; 31 | } 32 | public String getNaziv() { 33 | return naziv; 34 | } 35 | 36 | public void setNaziv(String sting) { 37 | this.naziv = sting; 38 | } 39 | 40 | public String getOpis() { 41 | return opis; 42 | } 43 | 44 | public void setOpis(String opis) { 45 | this.opis = opis; 46 | } 47 | 48 | public String getURL() { 49 | return URL; 50 | } 51 | 52 | public void setURL(String URL) { 53 | this.URL = URL; 54 | } 55 | 56 | 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/base/MongoConnect.java: -------------------------------------------------------------------------------- 1 | package base; 2 | 3 | import com.mongodb.BasicDBList; 4 | import com.mongodb.BasicDBObject; 5 | import com.mongodb.DB; 6 | import com.mongodb.DBCollection; 7 | import com.mongodb.DBCursor; 8 | import com.mongodb.DBObject; 9 | import com.mongodb.MongoClient; 10 | 11 | public class MongoConnect { 12 | 13 | public DB db; 14 | public static DBCollection coll; 15 | 16 | public MongoConnect(String dbName, String collName) throws InterruptedException { 17 | 18 | MongoClient cl = new MongoClient(); 19 | db = cl.getDB(dbName); 20 | coll = db.getCollection(collName); 21 | Thread.sleep(3000); 22 | } 23 | 24 | public static void insertIntoMongo(int rbr, String title, String desc, String url, int pripadnost, BasicDBList vodiNa) { 25 | String s; 26 | 27 | 28 | BasicDBObject doc = new BasicDBObject("naslov", title) 29 | .append("rbr", rbr) 30 | .append("opis", desc) 31 | .append("URL", url) 32 | .append("otkrivenNa", pripadnost) 33 | .append("vodiNa", vodiNa); 34 | coll.insert(doc); 35 | } 36 | 37 | public boolean existsinDB(String title) { 38 | DBObject query = new BasicDBObject("naslov", title); 39 | DBCursor result = coll.find(query); 40 | if (result.size() != 0 ) { 41 | System.out.println("Exists!"); 42 | return true; 43 | } else { 44 | System.out.println("Does not exist!"); 45 | return false; 46 | } 47 | 48 | } 49 | 50 | public int numberOfElementsinBase() { 51 | DBCursor result = coll.find(); 52 | return result.size(); 53 | 54 | } 55 | 56 | 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/base/Scrapper.java: -------------------------------------------------------------------------------- 1 | package base; 2 | import java.io.IOException; 3 | 4 | import org.jsoup.Jsoup; 5 | import org.jsoup.nodes.Document; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | import utils.SaveTest; 9 | 10 | 11 | public class Scrapper { 12 | 13 | public static String naziv; 14 | public static String opis; 15 | public static long id; 16 | public static String url; 17 | public static int niz[]; 18 | 19 | public static String[] daj_html(String source) { 20 | 21 | String retval[] = {null,null,null}; 22 | 23 | try { 24 | Document doc = Jsoup.connect(source).timeout(5*60*1000).get(); 25 | Elements cont_temp = doc.select("div.mw-content-ltr"); 26 | Elements head_temp = doc.select("h1.firstHeading"); 27 | 28 | for(Element pom:head_temp ) 29 | { 30 | naziv = pom.text(); 31 | } 32 | retval[0] = naziv; 33 | 34 | 35 | 36 | for(Element paragraf:cont_temp ) 37 | { 38 | 39 | if(paragraf.getElementsByTag("p").text().length() == 0) 40 | { 41 | Elements pom = paragraf.getElementsByTag("div"); 42 | 43 | for(Element nesto:pom) 44 | { 45 | 46 | if(nesto.getElementsByTag("p").text().length()==0) 47 | return retval; 48 | 49 | retval[1] = nesto.getElementsByTag("p").get(0).text(); 50 | 51 | 52 | retval[2] = nesto.getElementsByTag("p").html(); 53 | } 54 | 55 | } 56 | 57 | 58 | else 59 | { 60 | opis = paragraf.getElementsByTag("p").get(0).text(); 61 | retval[1] = opis; 62 | retval[2] = paragraf.getElementsByTag("p").html(); 63 | } 64 | 65 | } 66 | 67 | 68 | 69 | } catch (IOException e) { 70 | // TODO Auto-generated catch block 71 | e.printStackTrace(); 72 | 73 | } 74 | 75 | return retval; 76 | 77 | } 78 | 79 | 80 | 81 | } 82 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /src/main/java/base/Element.java: -------------------------------------------------------------------------------- 1 | package base; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import com.mongodb.BasicDBList; 7 | 8 | import utils.SaveTest; 9 | 10 | public class Element { 11 | 12 | private int rbr; 13 | private String URL; 14 | private int pripadnost; //na kom rbr stranice je otkrivena, init nikome 15 | private List pripada_mu; 16 | private String naziv; 17 | private String opis; 18 | 19 | public Element(String url) 20 | { 21 | this.URL = url; 22 | this.rbr = -1; 23 | this.pripadnost = -1; 24 | this.pripada_mu = new ArrayList(); 25 | } 26 | 27 | 28 | 29 | 30 | 31 | 32 | public int getRbr() { 33 | return rbr; 34 | } 35 | 36 | 37 | public void setRbr(int rbr) { 38 | this.rbr = rbr; 39 | } 40 | 41 | 42 | 43 | public String getURL() { 44 | return URL; 45 | } 46 | public void setURL(String uRL) { 47 | URL = uRL; 48 | } 49 | 50 | 51 | 52 | public List getPripada_mu() { 53 | return pripada_mu; 54 | } 55 | public void setPripada_mu(List pripada_mu) { 56 | this.pripada_mu = pripada_mu; 57 | } 58 | 59 | 60 | public int getPripadnost() { 61 | return pripadnost; 62 | } 63 | public void setPripadnost(int pripadnost) { 64 | this.pripadnost = pripadnost; 65 | } 66 | 67 | 68 | public void append(int l) 69 | { 70 | pripada_mu.add(l); 71 | } 72 | 73 | 74 | public String getNaziv() { 75 | return naziv; 76 | } 77 | public void setNaziv(String n) { 78 | naziv = n; 79 | } 80 | 81 | 82 | public String getOpis() { 83 | return opis; 84 | } 85 | public void setOpis(String o) { 86 | opis = o; 87 | } 88 | 89 | 90 | public void saveElement() 91 | { 92 | //utils.SaveTest.upisi(int.toString(rbr)+ ": "); 93 | //utils.SaveTest.upisi(naziv); 94 | //utils.SaveTest.upisi("\n\n"); 95 | //utils.SaveTest.upisi("Otkriven u: "+int.toString(pripadnost)+"\n"); 96 | //utils.SaveTest.upisi("Vodi na: [ "); 97 | 98 | BasicDBList vodiNa = new BasicDBList(); 99 | for(int i = 0;i o, String filename) { 69 | 70 | try { 71 | 72 | 73 | FileOutputStream fos = new FileOutputStream(filename + ".pr"); 74 | ObjectOutputStream oos = new ObjectOutputStream(fos); 75 | oos.writeObject(o); 76 | oos.close(); 77 | 78 | 79 | } catch (FileNotFoundException e) { 80 | e.printStackTrace(); 81 | } catch (IOException e) { 82 | e.printStackTrace(); 83 | 84 | } 85 | System.out.println("PageRank object saved as: " + filename + ".pr"); 86 | } 87 | 88 | public List openSortedPageRank(String loc) { 89 | List o = null; 90 | try { 91 | 92 | FileInputStream fis = new FileInputStream(loc); 93 | ObjectInputStream ois = new ObjectInputStream(fis); 94 | o = (List) ois.readObject(); 95 | ois.close(); 96 | 97 | } catch (FileNotFoundException e) { 98 | e.printStackTrace(); 99 | } catch (IOException e) { 100 | e.printStackTrace(); 101 | } catch (ClassNotFoundException e) { 102 | e.printStackTrace(); 103 | } 104 | System.out.println("Opened."); 105 | return o; 106 | 107 | } 108 | 109 | 110 | } 111 | -------------------------------------------------------------------------------- /src/main/java/base/Crawler.java: -------------------------------------------------------------------------------- 1 | package base; 2 | 3 | 4 | import java.util.ArrayList; 5 | import java.util.HashSet; 6 | import java.util.LinkedList; 7 | import java.util.List; 8 | import java.util.Queue; 9 | import java.util.Set; 10 | import java.util.regex.Matcher; 11 | import java.util.regex.Pattern; 12 | 13 | 14 | 15 | public class Crawler { 16 | 17 | public static Queue kju = new LinkedList<>(); 18 | public static Set marked = new HashSet<>(); 19 | public static String regex = "/wiki/(.+?)\""; 20 | public static int id; 21 | public static int temp_id; 22 | public static MongoConnect mongo; 23 | 24 | 25 | public static List unique = new ArrayList(); 26 | 27 | public static void algoritam(String root, int n) 28 | { 29 | 30 | Element start = new Element(root); //inicijalizacija roota 31 | start.setRbr(id); //pripadnost ostaje na -1 jer od root-a se krece 32 | id++; 33 | unique.add(start); 34 | 35 | kju.add(start); 36 | while(!kju.isEmpty()) 37 | { 38 | 39 | Element curr = kju.poll(); 40 | String crawledURL = curr.getURL(); 41 | 42 | System.out.println("Kroluje se stranica: " + curr.getRbr()+ " izronjena iz " + Integer.toString(curr.getPripadnost())); 43 | 44 | 45 | 46 | String html = null; 47 | String naziv = null; 48 | String opis = null; 49 | 50 | 51 | String retval[]; 52 | retval = base.Scrapper.daj_html(crawledURL); 53 | 54 | 55 | 56 | curr.setNaziv(retval[0]); 57 | curr.setOpis(retval[1]); 58 | 59 | 60 | 61 | html = retval[2]; 62 | 63 | if(html==null) 64 | { 65 | curr.saveElement(); 66 | continue; 67 | } 68 | 69 | 70 | Pattern pattern = Pattern.compile(regex); 71 | Matcher matcher = pattern.matcher(html); 72 | 73 | while(matcher.find()) // trazenje linkova iz elementa curr, dodavanje istih na kraj 74 | { 75 | 76 | String w = matcher.group(); 77 | String adresa = "https://sr.wikipedia.org" + w.substring(0, w.length()-1); //secenje " karaktera zbog nacina definisanja regex-a 78 | 79 | if(adresa.startsWith("https://sr.wikipedia.org/w/index.php?title") || adresa.startsWith("https://sr.wikipedia.org/wiki/en:")) 80 | { 81 | System.out.println("Naleteli smo na praznu stranu wikipedije."); 82 | continue; 83 | } 84 | marked.add(adresa); 85 | 86 | 87 | 88 | 89 | 90 | boolean klasican_inc_id = true; 91 | for(int i = 0;in) 134 | { 135 | System.out.println("Done. Crawled " + n + " pages."); 136 | return; 137 | } 138 | } 139 | 140 | 141 | } 142 | 143 | 144 | } 145 | -------------------------------------------------------------------------------- /src/main/java/base/SearchQuery.java: -------------------------------------------------------------------------------- 1 | package base; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.List; 6 | import java.util.Scanner; 7 | 8 | import com.mongodb.BasicDBList; 9 | import com.mongodb.BasicDBObject; 10 | import com.mongodb.DBCollection; 11 | import com.mongodb.DBCursor; 12 | import com.mongodb.DBObject; 13 | 14 | 15 | 16 | 17 | public class SearchQuery { 18 | private DBCollection coll; 19 | private String typed; 20 | 21 | private List pageRankSorted; 22 | 23 | 24 | 25 | public SearchQuery(String typed, DBCollection coll, List rank) { 26 | setTyped(typed); 27 | setCollection(coll); 28 | setPageRankSortedList(rank); 29 | 30 | naslovQuery(); 31 | 32 | } 33 | 34 | public DBObject formQuery(String typed) { 35 | DBObject query = new BasicDBObject(); 36 | Scanner s = new Scanner(typed); 37 | String[] typedS = new String[10]; 38 | DBObject[] regex = new DBObject[10]; 39 | DBObject[] naslov = new DBObject[10]; 40 | DBObject[] opis = new DBObject[10]; 41 | 42 | BasicDBList or = new BasicDBList(); 43 | int k = 0; 44 | while(s.hasNext() && k < 10) { 45 | typedS[k] = s.next(); 46 | regex[k] = new BasicDBObject("$regex", typedS[k]); 47 | naslov[k] = new BasicDBObject("naslov", regex[k]); 48 | //opis[k] = new BasicDBObject("opis", regex[k]); 49 | 50 | or.add(naslov[k]); 51 | //or.add(opis[k]); 52 | k++; 53 | } 54 | 55 | s.close(); 56 | query = new BasicDBObject("$or", or); 57 | 58 | return query; 59 | } 60 | 61 | 62 | public DBCursor naslovQuery() { 63 | 64 | System.out.println("Eneted keywords: " + typed); 65 | System.out.println("СрчЕнџин: Searching " + base.PageRank.n + " pages."); 66 | 67 | 68 | DBObject query = formQuery(typed); 69 | DBObject fields = new BasicDBObject("naslov", true).append("_id", false).append("rbr", true).append("opis", true).append("URL", true); 70 | DBCursor result = coll.find(query, fields); 71 | int queryCount = result.size(); 72 | int k = 0; 73 | int kmax = 20; 74 | System.out.println("Pages found: " + queryCount); 75 | 76 | if(queryCount > 20) 77 | System.out.println("Printing " +kmax+"/" + queryCount +" pages."); 78 | 79 | while (result.hasNext()) { 80 | //System.out.println(result.next()); 81 | int i = (int) result.next().get("rbr"); 82 | String naslov_t = (String) result.curr().get("naslov"); 83 | String opis_t = (String) result.curr().get("opis"); 84 | String URL_t = (String) result.curr().get("URL"); 85 | 86 | for (int j = 0; j < pageRankSorted.size(); j++) { 87 | if (i == pageRankSorted.get(j).getRbr()) { 88 | k++; 89 | pageRankSorted.get(i).setNaziv(naslov_t); 90 | pageRankSorted.get(i).setOpis(opis_t); 91 | pageRankSorted.get(i).setURL(URL_t); 92 | 93 | if (k pageRankSorted) { 114 | this.pageRankSorted = pageRankSorted; 115 | 116 | } 117 | 118 | 119 | public void printQuery(int i) { 120 | 121 | System.out.println("Naslov: " + pageRankSorted.get(i).getNaziv()); 122 | System.out.println("Opis: " + pageRankSorted.get(i).getOpis()); 123 | System.out.println("URL: " + pageRankSorted.get(i).getURL()); 124 | System.out.println("PageRank = " + pageRankSorted.get(i).getValue()); 125 | System.out.println("\n"); 126 | 127 | 128 | } 129 | 130 | 131 | 132 | } 133 | -------------------------------------------------------------------------------- /src/main/java/main/Main.java: -------------------------------------------------------------------------------- 1 | package main; 2 | 3 | import java.io.IOException; 4 | import java.util.Scanner; 5 | 6 | import base.MongoConnect; 7 | import base.PageRank; 8 | import base.SearchQuery; 9 | 10 | public class Main { 11 | 12 | public static void main(String[] args) throws InterruptedException, IOException { 13 | 14 | 15 | System.out.println("Welcome to СрчЕнџин console app. "); 16 | 17 | Thread.sleep(1000); 18 | System.out.println("MongoDB setup: MongoDatabase will try to connect to localhost after entering database name and collection name. "); 19 | Thread.sleep(2000); 20 | System.out.println("Enter a database name: "); 21 | 22 | Scanner s = new Scanner(System.in); 23 | String dbName = s.next(); 24 | System.out.println("Database name set to: " + dbName); 25 | 26 | System.out.println("Enter a database collection name: "); 27 | String collName = s.next(); 28 | s = new Scanner(System.in); 29 | System.out.println("Collection name set to: " + collName); 30 | 31 | MongoConnect mongo = new MongoConnect(dbName, collName); 32 | 33 | 34 | System.out.println("Please select a working mode: "); 35 | System.out.println("\n"); 36 | System.out.println("\t1) Making dynamic search engine [Entering starting Serbian Wiki page, crawling, calculating PageRank аnd using СрчЕнџин.] "); 37 | System.out.println("\t2) Calculating PageRank Value and using СрчЕнџин for existing Wiki instances in database"); 38 | int choice = -1; 39 | int flag = 0; 40 | do { 41 | System.out.println("\n"); 42 | System.out.println("Enter a number: "); 43 | s = new Scanner(System.in); 44 | choice = s.nextInt(); 45 | 46 | if (choice == 1 || choice == 2 || choice == 0) { 47 | flag = 1; 48 | } 49 | 50 | 51 | } while (flag == 0); 52 | 53 | 54 | switch (choice) { 55 | case 0: 56 | System.out.println("Exiting СрчЕнџин... Хвала and goodbye."); 57 | return; 58 | 59 | case 1: 60 | System.out.println("Enter a starting Serbian Wiki web page for crawling: "); 61 | s = new Scanner(System.in); 62 | String urlRoot = s.next(); 63 | flag = 1; 64 | do { 65 | System.out.println("Enter a max number of crawled pages[not recommended more than 8000]: "); 66 | 67 | } while (flag == 0); 68 | s = new Scanner(System.in); 69 | choice = s.nextInt(); 70 | 71 | System.out.println("Crawling starts in 3 seconds... Sorry for very hysteric log."); 72 | Thread.sleep(3000); 73 | base.Crawler.algoritam(urlRoot, choice); 74 | System.out.println("Crawling finished."); 75 | System.out.println("\n"); 76 | 77 | System.out.println("Please enter floder location for saving H matrix and PageRank list: [format: D:\\folder\\]"); 78 | 79 | s = new Scanner(System.in); 80 | String loc = s.next(); 81 | 82 | 83 | System.out.println("Starting PageRank calculations in 3 seconds... A little better log."); 84 | PageRank pr = new PageRank(mongo.coll, loc, choice); 85 | System.out.println("PageRank calculations done. Log above."); 86 | System.out.println("\n"); 87 | 88 | System.out.println("Continuing to СрчЕнџин in 3 seconds..."); 89 | 90 | System.out.println("\n"); 91 | 92 | do { 93 | System.out.println("Enter a keyword[only Ћирилица]: [type \"0\" to exit the program.]"); 94 | s = new Scanner(System.in, "UTF-8"); 95 | String first = s.nextLine(); 96 | 97 | 98 | 99 | System.out.println(first); 100 | if (first.contains("exit")) return; 101 | 102 | SearchQuery sq = new SearchQuery(first, mongo.coll, pr.getPageRank()); 103 | 104 | } while(true); 105 | 106 | 107 | case 2: 108 | System.out.println("Trying to do calculations for existing files in specified database..."); 109 | Thread.sleep(1000); 110 | int num = mongo.numberOfElementsinBase(); 111 | if (num < 100) { 112 | System.out.println("Can't find more than 100 elements in database. Returning."); 113 | return; 114 | } 115 | System.out.println("\n"); 116 | System.out.println("Found " + num + " elements in database."); 117 | System.out.println("\n"); 118 | System.out.println("Please enter floder location for saving H matrix and PageRank list: [format: D:\\folder\\]"); 119 | 120 | s = new Scanner(System.in); 121 | String l = s.next(); 122 | 123 | 124 | 125 | PageRank p = new PageRank(mongo.coll, l, num); 126 | System.out.println("PageRank calculations done. Log above."); 127 | System.out.println("Continuing to СрчЕнџин in 3 seconds..."); 128 | 129 | 130 | do { 131 | System.out.println("Enter a keyword[only Ћирилица]: [type \"0\" to exit the program.]"); 132 | s = new Scanner(System.in, "UTF-8"); 133 | String first = s.nextLine(); 134 | 135 | 136 | 137 | 138 | if (first.contains("exit")) { 139 | System.out.println("Bye."); 140 | return; 141 | } 142 | 143 | SearchQuery sq = new SearchQuery(first, mongo.coll, p.getPageRank()); 144 | 145 | } while(true); 146 | 147 | 148 | 149 | 150 | 151 | default: 152 | break; 153 | } 154 | 155 | 156 | 157 | } 158 | 159 | } 160 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS="" 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn () { 37 | echo "$*" 38 | } 39 | 40 | die () { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save () { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /src/main/java/base/PageRank.java: -------------------------------------------------------------------------------- 1 | package base; 2 | 3 | 4 | 5 | 6 | import java.io.Serializable; 7 | import java.util.ArrayList; 8 | import java.util.Arrays; 9 | import java.util.Collections; 10 | import java.util.List; 11 | 12 | import com.mongodb.BasicDBObject; 13 | import com.mongodb.DBCollection; 14 | import com.mongodb.DBCursor; 15 | 16 | import utils.SaveOpen; 17 | 18 | public class PageRank implements Serializable{ 19 | 20 | private static float H[][]; 21 | private static float A[][]; 22 | private static float I[][]; 23 | private static float p; 24 | private static float pocetniUslovi[]; 25 | private static float b[]; 26 | private static float x[]; 27 | final private static float d = (float) 0.85; 28 | private List pageRank = new ArrayList(); 29 | 30 | public static int n; //ideja je bila n = cursor.count(); ali outOfMemoryExeption 31 | 32 | public PageRank(DBCollection coll, String loc, int no) { 33 | n = no; 34 | makeHMatrix(coll); 35 | 36 | saveHMatrix(loc + "H"); 37 | // H = openHMatrix("D:\\mongodb\\H.ha"); 38 | 39 | checkCollSum(H); 40 | 41 | 42 | makeOtherMatrixs(); 43 | System.out.println("Is matrix a diagonally dominant: " + isDiagonallyDominant(A)); 44 | 45 | 46 | 47 | x = methods.Jakobi.racunaj(A, b, pocetniUslovi, 1000, (float)0.000001); 48 | 49 | 50 | for (int i = 0; i < x.length; i++) { 51 | pageRank.add(new SortedPageRank(i, (int)(100000000*x[i]))); 52 | } 53 | 54 | Collections.sort(pageRank); 55 | Collections.reverse(pageRank); 56 | 57 | saveSortedPageRank(pageRank, loc + "pageRank"); 58 | 59 | 60 | 61 | } 62 | 63 | 64 | 65 | public static float[][] openHMatrix(String loc) { 66 | System.out.println("Opening H matrix from following location: " + loc); 67 | SaveOpen s = new SaveOpen(); 68 | return s.openMatrix(loc); 69 | } 70 | 71 | public static void saveHMatrix(String loc) { 72 | System.out.println("Saving H matrix to following location: " + loc + ".ha"); 73 | SaveOpen s = new SaveOpen(); 74 | s.saveMatrix(H, loc); 75 | } 76 | 77 | public static List openSortedPageRank(String loc) { 78 | System.out.println("Opening PageRank object from following location: " + loc); 79 | SaveOpen s = new SaveOpen(); 80 | return s.openSortedPageRank(loc); 81 | } 82 | 83 | public static void saveSortedPageRank(List o, String loc) { 84 | System.out.println("Saving PageRank object from following location: " + loc + ".pr"); 85 | SaveOpen s = new SaveOpen(); 86 | s.saveSortedPageRank(o, loc); 87 | } 88 | 89 | 90 | public void makeHMatrix(DBCollection coll) { 91 | DBCursor cursor; 92 | 93 | 94 | System.out.println("N[CursorCount] = " + n); 95 | 96 | 97 | //float matrix[][] = new float[n][n]; 98 | H = new float[n][n]; 99 | 100 | 101 | BasicDBObject keys = new BasicDBObject(); 102 | 103 | keys.put("vodiNa", true); 104 | keys.put("_id", false); 105 | final DBCursor usersCursor = coll.find(new BasicDBObject(), keys); 106 | 107 | 108 | 109 | 110 | long start = System.currentTimeMillis(); 111 | for (int i =0; i vodiNaList = (ArrayList) usersCursor.next().get("vodiNa"); 114 | ArrayList list = new ArrayList<>(); 115 | 116 | 117 | 118 | for (int j = 0; j < n; j++) { 119 | 120 | if (vodiNaList.contains(j)) { 121 | 122 | H[i][j] = 1; 123 | list.add(j); 124 | } 125 | 126 | } 127 | if (i % 1000 ==0 ) 128 | System.out.println("C LOOP: i: "+ i); 129 | } 130 | System.out.println("C done."); 131 | 132 | int sum; 133 | for (int i = 0; i < n; i++) { 134 | sum = 0; 135 | for (int j = 0; j < n; j++) { 136 | sum += H[j][i]; 137 | //System.out.println("H Loop 1 : i: "+ i + " \\ j: " + j); 138 | } 139 | for (int j = 0; j < n; j++) { 140 | if (H[j][i] == 1) 141 | H[j][i] = (float)1.0/sum; 142 | //System.out.println("H Loop 2 : i: "+ i + " \\ j: " + j); 143 | } 144 | if (i % 1000 ==0 ) 145 | System.out.println("H LOOP: i: "+ i); 146 | } 147 | System.out.println("H done."); 148 | 149 | 150 | long end = System.currentTimeMillis(); 151 | System.out.println("Time consumed for H matrix of " + n + " dimension: " + (end-start) + " ms."); 152 | //setH(matrix); 153 | 154 | 155 | 156 | //System.out.println(Arrays.deepToString(matrix)); 157 | } 158 | 159 | 160 | public void makeOtherMatrixs(){ 161 | p = (float)1.00000/ n; 162 | pocetniUslovi = new float[n]; 163 | b = new float[n]; 164 | I = new float[n][n]; 165 | A = new float[n][n]; 166 | System.out.println("Making A and b matrixs..."); 167 | long start = System.currentTimeMillis(); 168 | for(int i = 0; i< n ;i++) { 169 | 170 | pocetniUslovi[i] = 0; 171 | b[i] = (1 - d)*p; 172 | for(int j = 0; j< n;j++) 173 | { 174 | if(i==j) I[i][j] = 1; 175 | else I[i][j] = 0; 176 | 177 | A[i][j] = I[i][j] - d*H[i][j]; 178 | //System.out.println("A["+ i + "][" + j + "] = " + A[i][j]); 179 | } 180 | 181 | } 182 | long end = System.currentTimeMillis(); 183 | System.out.println("DONE with makeOtherMatrixs();"); 184 | System.out.println("Time consumed makeOtherMatrixs: " + (end-start) + " ms."); 185 | } 186 | 187 | 188 | public static void print2D(float H[][]){ 189 | 190 | for (float[] row : H) 191 | System.out.println(Arrays.toString(row)); 192 | } 193 | 194 | public void setH(float mat[][]) { 195 | this.H = mat; 196 | } 197 | 198 | public float[][] getH(){ 199 | 200 | return this.H; 201 | } 202 | 203 | public void initI(){ 204 | 205 | 206 | } 207 | 208 | 209 | 210 | public static void checkCollSum(float H[][]) { 211 | int flag = 0; 212 | System.out.println("Checking column sums..." ); 213 | for (int i = 0; i < H.length; i++) { 214 | float sum = 0; 215 | for (int j = 0; j < H.length; j++) { 216 | sum += H[j][i]; 217 | 218 | } 219 | //System.out.println("Sum [" + i + "] = " + sum ); 220 | if ( sum < 0.99992 || sum > 1.00001) { 221 | System.out.println("Irregular sum of [" + i + "] column!" ); 222 | flag = 1; 223 | } 224 | 225 | } 226 | 227 | if (flag == 0) 228 | System.out.println("Sum of H matrix columns is just fine!"); 229 | else 230 | System.err.println("Sum of H matrix columns is corrupted! H matrix not well built. Again prick."); 231 | } 232 | 233 | 234 | public static void printPageRankObject(List o) { 235 | for (SortedPageRank element : o) { 236 | System.out.println("Value: " + element.value + " --- RBR: " + element.rbr); 237 | } 238 | 239 | } 240 | 241 | public static void printAll(DBCollection coll) { 242 | DBCursor cursor = coll.find(); 243 | try { 244 | while(cursor.hasNext()) 245 | System.out.println(cursor.next()); 246 | 247 | } finally { 248 | 249 | } 250 | 251 | } 252 | 253 | public List getPageRank(){ 254 | 255 | return pageRank; 256 | } 257 | 258 | public boolean isDiagonallyDominant(float[][] array) { 259 | int otherTotal = 0; 260 | 261 | // Loop through every row in the array 262 | for(int row = 0; row < array.length; row++) { 263 | otherTotal = 0; 264 | 265 | // Loop through every element in the row 266 | for(int column = 0; column < array[row].length; column++) { 267 | 268 | // If this element is NOT on the diagonal 269 | if(column != row) { 270 | 271 | // Add it to the running total 272 | otherTotal += Math.abs(array[row][column]); 273 | } 274 | } 275 | 276 | // If this diagonal element is LESS than the sum of the other ones... 277 | if(Math.abs(array[row][row]) < otherTotal) { 278 | 279 | // then the array isn't diagonally dominant and we can return. 280 | return false; 281 | } 282 | } 283 | return true; 284 | } 285 | 286 | } 287 | --------------------------------------------------------------------------------