├── .DS_Store ├── Inverted-Index ├── .DS_Store ├── .classpath ├── .project ├── InputFiles │ ├── .DS_Store │ ├── China.txt │ ├── India.txt │ ├── IndiaPakistanRelations.txt │ ├── IndiaUSChinaPakistanRelationship.txt │ ├── Russia.txt │ ├── USARussiaRelations.txt │ └── UnitedStates.txt ├── Resources for Input Files.txt ├── outputInvertedIndex │ ├── _SUCCESS │ └── part-r-00000 └── src │ ├── constants │ └── Constants.java │ └── invertedindex │ ├── InvertedIndex.java │ ├── InvertedIndexMapper.java │ └── InvertedIndexReducer.java ├── JAR_Files ├── .DS_Store ├── inverted-index.jar ├── matrix-multiplication.jar └── page-rank.jar ├── LICENSE.md ├── README.md ├── matrix-multiplication ├── .DS_Store ├── .classpath ├── .project ├── finalResult │ └── MatrixC.txt ├── input │ └── InputMatrices.txt ├── output │ ├── _SUCCESS │ └── part-r-00000 └── src │ ├── .DS_Store │ └── matrixmultiplication │ ├── .DS_Store │ ├── Constants.java │ ├── FileNames.java │ ├── MatrixCProcessing.java │ ├── MatrixMultiplication.java │ ├── MatrixMultiplicationCombiner.java │ ├── MatrixMultiplicationMapper.java │ └── MatrixMultiplicationReducer.java └── page-rank ├── .DS_Store ├── .classpath ├── .project ├── finalOutput └── finalOutput.txt ├── input ├── originalinput.txt └── pagerankinput.txt ├── outputs ├── output0 │ ├── _SUCCESS │ └── part-r-00000 ├── output1 │ ├── _SUCCESS │ └── part-r-00000 ├── output10 │ ├── _SUCCESS │ └── part-r-00000 ├── output2 │ ├── _SUCCESS │ └── part-r-00000 ├── output3 │ ├── _SUCCESS │ └── part-r-00000 ├── output4 │ ├── _SUCCESS │ └── part-r-00000 ├── output5 │ ├── _SUCCESS │ └── part-r-00000 ├── output6 │ ├── _SUCCESS │ └── part-r-00000 ├── output7 │ ├── _SUCCESS │ └── part-r-00000 ├── output8 │ ├── _SUCCESS │ └── part-r-00000 └── output9 │ ├── _SUCCESS │ └── part-r-00000 └── src ├── filepreprocess └── HadoopDFSFileReadWrite.java ├── finalpagerank └── FinalPageRankCalculator.java └── pagerank ├── PageRank.java ├── PageRankMapper.java └── PageRankReducer.java /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/.DS_Store -------------------------------------------------------------------------------- /Inverted-Index/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/Inverted-Index/.DS_Store -------------------------------------------------------------------------------- /Inverted-Index/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /Inverted-Index/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | Inverted-Index 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.apache.hadoop.eclipse.Nature 16 | org.eclipse.jdt.core.javanature 17 | 18 | 19 | -------------------------------------------------------------------------------- /Inverted-Index/InputFiles/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/Inverted-Index/InputFiles/.DS_Store -------------------------------------------------------------------------------- /Inverted-Index/InputFiles/China.txt: -------------------------------------------------------------------------------- 1 | China officially the People Republic of China is a sovereign state located in East Asia 2 | It is the world most populous country 3 | The People Republic of China is a single party state governed by the Communist Party with its seat of government in the capital city of Beijing exercises jurisdiction many provinces five autonomous regions four direct controlled municipalities Beijing Tianjin Shanghai and Chongqing and two mostly self governing special administrative regions Hong Kong and Macau 4 | China also claims Taiwan which is controlled by the Republic of China a separate political entity as its extra province a claim which is controversial due to the complex political status of Taiwan -------------------------------------------------------------------------------- /Inverted-Index/InputFiles/India.txt: -------------------------------------------------------------------------------- 1 | India officially the Republic of India is a country in South Asia 2 | It is the seventh largest country by area the second most populous country 3 | The most populous democracy in the world 4 | Bounded by the Indian Ocean on the south the Arabian Sea on the south west and the Bay of Bengal on the south east 5 | It shares land borders with Pakistan to the west China Nepal and Bhutan to the north east and Burma and Bangladesh to the east 6 | In the Indian Ocean India is in the vicinity of Sri Lanka and the Maldives 7 | In addition India Andaman and Nicobar Islands share a maritime border with Thailand and Indonesia -------------------------------------------------------------------------------- /Inverted-Index/InputFiles/IndiaPakistanRelations.txt: -------------------------------------------------------------------------------- 1 | Relations between India and Pakistan have been strained by a number of historical and political issues and are defined by the violent partition of British India 2 | The Kashmir dispute and the numerous military conflicts fought between the two nations 3 | Consequently even though the two South Asian nations share historic cultural ethnic geographic and economic links their relationship has been plagued by hostility and suspicion 4 | After the dissolution of the British Raj two new sovereign nations were formed the Union of India and the Dominion of Pakistan 5 | The subsequent partition of the former British India displaced millions of people with estimates of loss of life varying from several hundred thousand to a million 6 | India emerged as a secular nation with a Hindu majority population and a large Muslim minority while Pakistan was established as an Islamic republic with an overwhelming Muslim majority population 7 | Soon after their independence India and Pakistan established diplomatic relations but the violent partition and numerous territorial disputes would overshadow their relationship 8 | Since their independence the two countries have fought three major wars one undeclared war and have been involved in numerous armed skirmishes and military standoffs 9 | The Kashmir dispute is the main centre point of all of these conflicts with the exception of the India Pakistan War and Bangladesh Liberation War which resulted in the secession of East Pakistan now Bangladesh -------------------------------------------------------------------------------- /Inverted-Index/InputFiles/IndiaUSChinaPakistanRelationship.txt: -------------------------------------------------------------------------------- 1 | The India US China Pakistan strategic quadrilateral 2 | Although the disputed border between China and India is often highlighted as the major sticking point in Sino Indian relations in reality it has remained relatively peaceful since the end of a war the potential for overt military conflict in the region remains minimal 3 | Of much greater concern is the strategic quadrilateral relationship in South Asia involving China India the United States and Pakistan 4 | It has both regional and wider implications 5 | At the heart of this matter is the India Pakistan conflict over Kashmir and continuing US involvement in Afghanistan and Pakistan 6 | The relationships between these four actors are extremely complex 7 | China support for Pakistan in its conflict with India is a serious and ongoing source of tension in the Sino Indian relationship 8 | While the US relationship with Pakistan is looking increasingly fraught even as its relationship with India improves in the wake of a civilian nuclear deal 9 | Growing closeness between India and the United States has caused some concern in China about the possibility that the US may be establishing a policy of containment or encirclement 10 | This concern in turn affects China relationship with both the US and India 11 | Understanding this complex web of relationships is key to understanding the issues which are at the heart of China India relations and which affect markedly how these two countries interact in the region 12 | China still sees its South Asian interests as firmly linked with Pakistan a stance which is problematic not only for the Sino Indian relationship but also for the China US relationship because of significant US investment in Pakistan 13 | So although US relationships with India and Pakistan are more likely the result of America pursuing its national interests than an overt attempt to contain China the region volatility and its location in China traditional strategic backyard mean that any US attempt to befriend India or other South and Southeast Asian countries is often viewed with suspicion by the Chinese -------------------------------------------------------------------------------- /Inverted-Index/InputFiles/Russia.txt: -------------------------------------------------------------------------------- 1 | Russia also officially known as the Russian Federation is a country in northern Eurasia 2 | It is a federal semi presidential republic 3 | Russia shares land borders with Norway Finland Estonia Latvia Lithuania and Poland Belarus Ukraine Georgia Azerbaijan Kazakhstan China Mongolia and North Korea 4 | It shares maritime borders with Japan by the Sea of Okhotsk 5 | The United States state of Alaska across the Bering Strait and Canada Arctic islands 6 | Russia is the largest country in the world covering more than one eighth of the Earth inhabited land area 7 | Russia is also the world ninth most populous nation 8 | Extending across the entirety of northern Asia and much of Eastern Europe Russia spans nine time zones and incorporates a wide range of environments and landforms -------------------------------------------------------------------------------- /Inverted-Index/InputFiles/USARussiaRelations.txt: -------------------------------------------------------------------------------- 1 | Russia United States relations is the bilateral relationship between the Russian Federation and the United States of America and their predecessor states 2 | Relations between the United States and Russia has long been tense in the past 3 | In recent years the two countries have attempted to reset frosty relations and establish a growing partnership 4 | However as the two nations conflicting ideologies became more prominent hopes of a benevolent relationship between the two world powers have slowly deteriorated 5 | Despite this Russia and the United States still agree on supporting a bilateral agenda to confront several problems such as discarding stockpiles of nuclear weapons and combating the palpable threats of terrorism 6 | As unrest spread into eastern Ukraine relations between the USA and Russia became more strained 7 | Russian support for separatists fighting Ukrainian forces attracted USA sanctions 8 | After one bout of sanctions announced by President Obama Russian President Vladimir Putin said sanctions were driving Russia into a corner that could bring relations between the two countries to a dead end 9 | Relations between the two countries deteriorated further when Malaysia Airlines Flight was brought down by a surface to air missile in eastern Ukraine near the Russian border 10 | Obama said the missile was fired from an area controlled by Russian back separatists who he said were being supplied by Russia with sophisticated weapons training heavy arms and anti aircraft equipment -------------------------------------------------------------------------------- /Inverted-Index/InputFiles/UnitedStates.txt: -------------------------------------------------------------------------------- 1 | The United States of America or USA commonly referred to as the United States America and sometimes the States 2 | Is a federal republic consisting of many states and a federal district 3 | The contiguous states and Washington are in central North America between Canada and Mexico 4 | The state of Alaska is the northwestern part of North America and the state of Hawaii is an archipelago in the mid-Pacific 5 | The country also has five populated and nine unpopulated territories in the Pacific and the Caribbean 6 | The United States is third or fourth largest country by total area and third largest by population 7 | It is one of the most ethnically diverse and multicultural nations the product of large scale immigration from many countries 8 | The geography and climate of the United States is also extremely diverse and home to a wide variety of wildlife -------------------------------------------------------------------------------- /Inverted-Index/Resources for Input Files.txt: -------------------------------------------------------------------------------- 1 | http://en.wikipedia.org/wiki/United_States 2 | http://en.wikipedia.org/wiki/Russia 3 | http://en.wikipedia.org/wiki/China 4 | http://en.wikipedia.org/wiki/Russia–United_States_relations 5 | http://en.wikipedia.org/wiki/India–Pakistan_relations
http://en.wikipedia.org/wiki/Russia–United_States_relations 6 | http://www.eastasiaforum.org/2012/04/11/the-india-us-china-pakistan-strategic-quadrilateral/ 7 | http://en.wikipedia.org/wiki/India -------------------------------------------------------------------------------- /Inverted-Index/outputInvertedIndex/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/Inverted-Index/outputInvertedIndex/_SUCCESS -------------------------------------------------------------------------------- /Inverted-Index/outputInvertedIndex/part-r-00000: -------------------------------------------------------------------------------- 1 | a IndiaPakistanRelations UnitedStates Russia USARussiaRelations IndiaUSChinaPakistanRelationship China India 2 | about IndiaUSChinaPakistanRelationship 3 | across Russia 4 | actors IndiaUSChinaPakistanRelationship 5 | addition India 6 | administrative China 7 | affect IndiaUSChinaPakistanRelationship 8 | affects IndiaUSChinaPakistanRelationship 9 | afghanistan IndiaUSChinaPakistanRelationship 10 | after IndiaPakistanRelations USARussiaRelations 11 | agenda USARussiaRelations 12 | agree USARussiaRelations 13 | air USARussiaRelations 14 | aircraft USARussiaRelations 15 | airlines USARussiaRelations 16 | alaska UnitedStates Russia 17 | all IndiaPakistanRelations 18 | also UnitedStates Russia IndiaUSChinaPakistanRelationship China 19 | although IndiaUSChinaPakistanRelationship 20 | america UnitedStates USARussiaRelations IndiaUSChinaPakistanRelationship 21 | an IndiaPakistanRelations UnitedStates USARussiaRelations IndiaUSChinaPakistanRelationship 22 | and IndiaPakistanRelations UnitedStates Russia USARussiaRelations China India IndiaUSChinaPakistanRelationship 23 | andaman India 24 | announced USARussiaRelations 25 | anti USARussiaRelations 26 | any IndiaUSChinaPakistanRelationship 27 | arabian India 28 | archipelago UnitedStates 29 | arctic Russia 30 | are IndiaPakistanRelations UnitedStates IndiaUSChinaPakistanRelationship 31 | area UnitedStates USARussiaRelations Russia India 32 | armed IndiaPakistanRelations 33 | arms USARussiaRelations 34 | as IndiaPakistanRelations UnitedStates Russia USARussiaRelations IndiaUSChinaPakistanRelationship China 35 | asia Russia China India IndiaUSChinaPakistanRelationship 36 | asian IndiaPakistanRelations IndiaUSChinaPakistanRelationship 37 | at IndiaUSChinaPakistanRelationship 38 | attempt IndiaUSChinaPakistanRelationship 39 | attempted USARussiaRelations 40 | attracted USARussiaRelations 41 | autonomous China 42 | azerbaijan Russia 43 | back USARussiaRelations 44 | backyard IndiaUSChinaPakistanRelationship 45 | bangladesh IndiaPakistanRelations India 46 | bay India 47 | be IndiaUSChinaPakistanRelationship 48 | became USARussiaRelations 49 | because IndiaUSChinaPakistanRelationship 50 | been IndiaPakistanRelations USARussiaRelations 51 | befriend IndiaUSChinaPakistanRelationship 52 | beijing China 53 | being USARussiaRelations 54 | belarus Russia 55 | benevolent USARussiaRelations 56 | bengal India 57 | bering Russia 58 | between IndiaPakistanRelations UnitedStates USARussiaRelations IndiaUSChinaPakistanRelationship 59 | bhutan India 60 | bilateral USARussiaRelations 61 | border USARussiaRelations India IndiaUSChinaPakistanRelationship 62 | borders Russia India 63 | both IndiaUSChinaPakistanRelationship 64 | bounded India 65 | bout USARussiaRelations 66 | bring USARussiaRelations 67 | british IndiaPakistanRelations 68 | brought USARussiaRelations 69 | burma India 70 | but IndiaPakistanRelations IndiaUSChinaPakistanRelationship 71 | by IndiaPakistanRelations UnitedStates Russia USARussiaRelations China IndiaUSChinaPakistanRelationship India 72 | canada UnitedStates Russia 73 | capital China 74 | caribbean UnitedStates 75 | caused IndiaUSChinaPakistanRelationship 76 | central UnitedStates 77 | centre IndiaPakistanRelations 78 | china Russia China IndiaUSChinaPakistanRelationship India 79 | chinese IndiaUSChinaPakistanRelationship 80 | chongqing China 81 | city China 82 | civilian IndiaUSChinaPakistanRelationship 83 | claim China 84 | claims China 85 | climate UnitedStates 86 | closeness IndiaUSChinaPakistanRelationship 87 | combating USARussiaRelations 88 | commonly UnitedStates 89 | communist China 90 | complex IndiaUSChinaPakistanRelationship China 91 | concern IndiaUSChinaPakistanRelationship 92 | conflict IndiaUSChinaPakistanRelationship 93 | conflicting USARussiaRelations 94 | conflicts IndiaPakistanRelations 95 | confront USARussiaRelations 96 | consequently IndiaPakistanRelations 97 | consisting UnitedStates 98 | contain IndiaUSChinaPakistanRelationship 99 | containment IndiaUSChinaPakistanRelationship 100 | contiguous UnitedStates 101 | continuing IndiaUSChinaPakistanRelationship 102 | controlled USARussiaRelations China 103 | controversial China 104 | corner USARussiaRelations 105 | could USARussiaRelations 106 | countries IndiaPakistanRelations UnitedStates USARussiaRelations IndiaUSChinaPakistanRelationship 107 | country UnitedStates Russia India China 108 | covering Russia 109 | cultural IndiaPakistanRelations 110 | dead USARussiaRelations 111 | deal IndiaUSChinaPakistanRelationship 112 | defined IndiaPakistanRelations 113 | democracy India 114 | despite USARussiaRelations 115 | deteriorated USARussiaRelations 116 | diplomatic IndiaPakistanRelations 117 | direct China 118 | discarding USARussiaRelations 119 | displaced IndiaPakistanRelations 120 | dispute IndiaPakistanRelations 121 | disputed IndiaUSChinaPakistanRelationship 122 | disputes IndiaPakistanRelations 123 | dissolution IndiaPakistanRelations 124 | district UnitedStates 125 | diverse UnitedStates 126 | dominion IndiaPakistanRelations 127 | down USARussiaRelations 128 | driving USARussiaRelations 129 | due China 130 | earth Russia 131 | east IndiaPakistanRelations India China 132 | eastern USARussiaRelations Russia 133 | economic IndiaPakistanRelations 134 | eighth Russia 135 | emerged IndiaPakistanRelations 136 | encirclement IndiaUSChinaPakistanRelationship 137 | end USARussiaRelations IndiaUSChinaPakistanRelationship 138 | entirety Russia 139 | entity China 140 | environments Russia 141 | equipment USARussiaRelations 142 | establish USARussiaRelations 143 | established IndiaPakistanRelations 144 | establishing IndiaUSChinaPakistanRelationship 145 | estimates IndiaPakistanRelations 146 | estonia Russia 147 | ethnic IndiaPakistanRelations 148 | ethnically UnitedStates 149 | eurasia Russia 150 | europe Russia 151 | even IndiaPakistanRelations IndiaUSChinaPakistanRelationship 152 | exception IndiaPakistanRelations 153 | exercises China 154 | extending Russia 155 | extra China 156 | extremely UnitedStates IndiaUSChinaPakistanRelationship 157 | federal UnitedStates Russia 158 | federation USARussiaRelations Russia 159 | fighting USARussiaRelations 160 | finland Russia 161 | fired USARussiaRelations 162 | firmly IndiaUSChinaPakistanRelationship 163 | five UnitedStates China 164 | flight USARussiaRelations 165 | for USARussiaRelations IndiaUSChinaPakistanRelationship 166 | forces USARussiaRelations 167 | formed IndiaPakistanRelations 168 | former IndiaPakistanRelations 169 | fought IndiaPakistanRelations 170 | four China IndiaUSChinaPakistanRelationship 171 | fourth UnitedStates 172 | fraught IndiaUSChinaPakistanRelationship 173 | from IndiaPakistanRelations UnitedStates USARussiaRelations 174 | frosty USARussiaRelations 175 | further USARussiaRelations 176 | geographic IndiaPakistanRelations 177 | geography UnitedStates 178 | georgia Russia 179 | governed China 180 | governing China 181 | government China 182 | greater IndiaUSChinaPakistanRelationship 183 | growing USARussiaRelations IndiaUSChinaPakistanRelationship 184 | has IndiaPakistanRelations UnitedStates USARussiaRelations IndiaUSChinaPakistanRelationship 185 | have IndiaPakistanRelations USARussiaRelations 186 | hawaii UnitedStates 187 | he USARussiaRelations 188 | heart IndiaUSChinaPakistanRelationship 189 | heavy USARussiaRelations 190 | highlighted IndiaUSChinaPakistanRelationship 191 | hindu IndiaPakistanRelations 192 | historic IndiaPakistanRelations 193 | historical IndiaPakistanRelations 194 | home UnitedStates 195 | hong China 196 | hopes USARussiaRelations 197 | hostility IndiaPakistanRelations 198 | how IndiaUSChinaPakistanRelationship 199 | however USARussiaRelations 200 | hundred IndiaPakistanRelations 201 | ideologies USARussiaRelations 202 | immigration UnitedStates 203 | implications IndiaUSChinaPakistanRelationship 204 | improves IndiaUSChinaPakistanRelationship 205 | in IndiaPakistanRelations UnitedStates Russia USARussiaRelations China India IndiaUSChinaPakistanRelationship 206 | incorporates Russia 207 | increasingly IndiaUSChinaPakistanRelationship 208 | independence IndiaPakistanRelations 209 | india IndiaPakistanRelations India IndiaUSChinaPakistanRelationship 210 | indian IndiaUSChinaPakistanRelationship India 211 | indonesia India 212 | inhabited Russia 213 | interact IndiaUSChinaPakistanRelationship 214 | interests IndiaUSChinaPakistanRelationship 215 | into USARussiaRelations 216 | investment IndiaUSChinaPakistanRelationship 217 | involved IndiaPakistanRelations 218 | involvement IndiaUSChinaPakistanRelationship 219 | involving IndiaUSChinaPakistanRelationship 220 | is IndiaPakistanRelations UnitedStates Russia USARussiaRelations India China IndiaUSChinaPakistanRelationship 221 | islamic IndiaPakistanRelations 222 | islands Russia India 223 | issues IndiaPakistanRelations IndiaUSChinaPakistanRelationship 224 | it UnitedStates Russia India IndiaUSChinaPakistanRelationship China 225 | its IndiaUSChinaPakistanRelationship China 226 | japan Russia 227 | jurisdiction China 228 | kashmir IndiaPakistanRelations IndiaUSChinaPakistanRelationship 229 | kazakhstan Russia 230 | key IndiaUSChinaPakistanRelationship 231 | known Russia 232 | kong China 233 | korea Russia 234 | land Russia India 235 | landforms Russia 236 | lanka India 237 | large IndiaPakistanRelations UnitedStates 238 | largest UnitedStates Russia India 239 | latvia Russia 240 | liberation IndiaPakistanRelations 241 | life IndiaPakistanRelations 242 | likely IndiaUSChinaPakistanRelationship 243 | linked IndiaUSChinaPakistanRelationship 244 | links IndiaPakistanRelations 245 | lithuania Russia 246 | located China 247 | location IndiaUSChinaPakistanRelationship 248 | long USARussiaRelations 249 | looking IndiaUSChinaPakistanRelationship 250 | loss IndiaPakistanRelations 251 | macau China 252 | main IndiaPakistanRelations 253 | major IndiaPakistanRelations IndiaUSChinaPakistanRelationship 254 | majority IndiaPakistanRelations 255 | malaysia USARussiaRelations 256 | maldives India 257 | many UnitedStates China 258 | maritime Russia India 259 | markedly IndiaUSChinaPakistanRelationship 260 | matter IndiaUSChinaPakistanRelationship 261 | may IndiaUSChinaPakistanRelationship 262 | mean IndiaUSChinaPakistanRelationship 263 | mexico UnitedStates 264 | mid-pacific UnitedStates 265 | military IndiaPakistanRelations IndiaUSChinaPakistanRelationship 266 | million IndiaPakistanRelations 267 | millions IndiaPakistanRelations 268 | minimal IndiaUSChinaPakistanRelationship 269 | minority IndiaPakistanRelations 270 | missile USARussiaRelations 271 | mongolia Russia 272 | more USARussiaRelations Russia IndiaUSChinaPakistanRelationship 273 | most UnitedStates Russia China India 274 | mostly China 275 | much Russia IndiaUSChinaPakistanRelationship 276 | multicultural UnitedStates 277 | municipalities China 278 | muslim IndiaPakistanRelations 279 | nation IndiaPakistanRelations Russia 280 | national IndiaUSChinaPakistanRelationship 281 | nations IndiaPakistanRelations UnitedStates USARussiaRelations 282 | near USARussiaRelations 283 | nepal India 284 | new IndiaPakistanRelations 285 | nicobar India 286 | nine UnitedStates Russia 287 | ninth Russia 288 | north UnitedStates Russia India 289 | northern Russia 290 | northwestern UnitedStates 291 | norway Russia 292 | not IndiaUSChinaPakistanRelationship 293 | now IndiaPakistanRelations 294 | nuclear USARussiaRelations IndiaUSChinaPakistanRelationship 295 | number IndiaPakistanRelations 296 | numerous IndiaPakistanRelations 297 | obama USARussiaRelations 298 | ocean India 299 | of IndiaPakistanRelations UnitedStates USARussiaRelations Russia China IndiaUSChinaPakistanRelationship India 300 | officially Russia China India 301 | often IndiaUSChinaPakistanRelationship 302 | okhotsk Russia 303 | on USARussiaRelations India 304 | one IndiaPakistanRelations UnitedStates Russia USARussiaRelations 305 | ongoing IndiaUSChinaPakistanRelationship 306 | only IndiaUSChinaPakistanRelationship 307 | or UnitedStates IndiaUSChinaPakistanRelationship 308 | other IndiaUSChinaPakistanRelationship 309 | over IndiaUSChinaPakistanRelationship 310 | overshadow IndiaPakistanRelations 311 | overt IndiaUSChinaPakistanRelationship 312 | overwhelming IndiaPakistanRelations 313 | pacific UnitedStates 314 | pakistan IndiaPakistanRelations India IndiaUSChinaPakistanRelationship 315 | palpable USARussiaRelations 316 | part UnitedStates 317 | partition IndiaPakistanRelations 318 | partnership USARussiaRelations 319 | party China 320 | past USARussiaRelations 321 | peaceful IndiaUSChinaPakistanRelationship 322 | people IndiaPakistanRelations China 323 | plagued IndiaPakistanRelations 324 | point IndiaPakistanRelations IndiaUSChinaPakistanRelationship 325 | poland Russia 326 | policy IndiaUSChinaPakistanRelationship 327 | political IndiaPakistanRelations China 328 | populated UnitedStates 329 | population IndiaPakistanRelations UnitedStates 330 | populous Russia China India 331 | possibility IndiaUSChinaPakistanRelationship 332 | potential IndiaUSChinaPakistanRelationship 333 | powers USARussiaRelations 334 | predecessor USARussiaRelations 335 | president USARussiaRelations 336 | presidential Russia 337 | problematic IndiaUSChinaPakistanRelationship 338 | problems USARussiaRelations 339 | product UnitedStates 340 | prominent USARussiaRelations 341 | province China 342 | provinces China 343 | pursuing IndiaUSChinaPakistanRelationship 344 | putin USARussiaRelations 345 | quadrilateral IndiaUSChinaPakistanRelationship 346 | raj IndiaPakistanRelations 347 | range Russia 348 | reality IndiaUSChinaPakistanRelationship 349 | recent USARussiaRelations 350 | referred UnitedStates 351 | region IndiaUSChinaPakistanRelationship 352 | regional IndiaUSChinaPakistanRelationship 353 | regions China 354 | relations IndiaPakistanRelations USARussiaRelations IndiaUSChinaPakistanRelationship 355 | relationship IndiaPakistanRelations USARussiaRelations IndiaUSChinaPakistanRelationship 356 | relationships IndiaUSChinaPakistanRelationship 357 | relatively IndiaUSChinaPakistanRelationship 358 | remained IndiaUSChinaPakistanRelationship 359 | remains IndiaUSChinaPakistanRelationship 360 | republic IndiaPakistanRelations UnitedStates Russia India China 361 | reset USARussiaRelations 362 | result IndiaUSChinaPakistanRelationship 363 | resulted IndiaPakistanRelations 364 | russia Russia USARussiaRelations 365 | russian USARussiaRelations Russia 366 | said USARussiaRelations 367 | sanctions USARussiaRelations 368 | scale UnitedStates 369 | sea Russia India 370 | seat China 371 | secession IndiaPakistanRelations 372 | second India 373 | secular IndiaPakistanRelations 374 | sees IndiaUSChinaPakistanRelationship 375 | self China 376 | semi Russia 377 | separate China 378 | separatists USARussiaRelations 379 | serious IndiaUSChinaPakistanRelationship 380 | seventh India 381 | several IndiaPakistanRelations USARussiaRelations 382 | shanghai China 383 | share IndiaPakistanRelations India 384 | shares Russia India 385 | significant IndiaUSChinaPakistanRelationship 386 | since IndiaPakistanRelations IndiaUSChinaPakistanRelationship 387 | single China 388 | sino IndiaUSChinaPakistanRelationship 389 | skirmishes IndiaPakistanRelations 390 | slowly USARussiaRelations 391 | so IndiaUSChinaPakistanRelationship 392 | some IndiaUSChinaPakistanRelationship 393 | sometimes UnitedStates 394 | soon IndiaPakistanRelations 395 | sophisticated USARussiaRelations 396 | source IndiaUSChinaPakistanRelationship 397 | south IndiaPakistanRelations India IndiaUSChinaPakistanRelationship 398 | southeast IndiaUSChinaPakistanRelationship 399 | sovereign IndiaPakistanRelations China 400 | spans Russia 401 | special China 402 | spread USARussiaRelations 403 | sri India 404 | stance IndiaUSChinaPakistanRelationship 405 | standoffs IndiaPakistanRelations 406 | state UnitedStates Russia China 407 | states UnitedStates USARussiaRelations Russia IndiaUSChinaPakistanRelationship 408 | status China 409 | sticking IndiaUSChinaPakistanRelationship 410 | still USARussiaRelations IndiaUSChinaPakistanRelationship 411 | stockpiles USARussiaRelations 412 | strained IndiaPakistanRelations USARussiaRelations 413 | strait Russia 414 | strategic IndiaUSChinaPakistanRelationship 415 | subsequent IndiaPakistanRelations 416 | such USARussiaRelations 417 | supplied USARussiaRelations 418 | support USARussiaRelations IndiaUSChinaPakistanRelationship 419 | supporting USARussiaRelations 420 | surface USARussiaRelations 421 | suspicion IndiaPakistanRelations IndiaUSChinaPakistanRelationship 422 | taiwan China 423 | tense USARussiaRelations 424 | tension IndiaUSChinaPakistanRelationship 425 | territorial IndiaPakistanRelations 426 | territories UnitedStates 427 | terrorism USARussiaRelations 428 | thailand India 429 | than Russia IndiaUSChinaPakistanRelationship 430 | that USARussiaRelations IndiaUSChinaPakistanRelationship 431 | the IndiaPakistanRelations UnitedStates USARussiaRelations Russia India China IndiaUSChinaPakistanRelationship 432 | their IndiaPakistanRelations USARussiaRelations 433 | these IndiaPakistanRelations IndiaUSChinaPakistanRelationship 434 | third UnitedStates 435 | this USARussiaRelations IndiaUSChinaPakistanRelationship 436 | though IndiaPakistanRelations 437 | thousand IndiaPakistanRelations 438 | threats USARussiaRelations 439 | three IndiaPakistanRelations 440 | tianjin China 441 | time Russia 442 | to IndiaPakistanRelations UnitedStates USARussiaRelations IndiaUSChinaPakistanRelationship India China 443 | total UnitedStates 444 | traditional IndiaUSChinaPakistanRelationship 445 | training USARussiaRelations 446 | turn IndiaUSChinaPakistanRelationship 447 | two IndiaPakistanRelations USARussiaRelations China IndiaUSChinaPakistanRelationship 448 | ukraine USARussiaRelations Russia 449 | ukrainian USARussiaRelations 450 | undeclared IndiaPakistanRelations 451 | understanding IndiaUSChinaPakistanRelationship 452 | union IndiaPakistanRelations 453 | united UnitedStates Russia USARussiaRelations IndiaUSChinaPakistanRelationship 454 | unpopulated UnitedStates 455 | unrest USARussiaRelations 456 | us IndiaUSChinaPakistanRelationship 457 | usa UnitedStates USARussiaRelations 458 | variety UnitedStates 459 | varying IndiaPakistanRelations 460 | vicinity India 461 | viewed IndiaUSChinaPakistanRelationship 462 | violent IndiaPakistanRelations 463 | vladimir USARussiaRelations 464 | volatility IndiaUSChinaPakistanRelationship 465 | wake IndiaUSChinaPakistanRelationship 466 | war IndiaPakistanRelations IndiaUSChinaPakistanRelationship 467 | wars IndiaPakistanRelations 468 | was IndiaPakistanRelations USARussiaRelations 469 | washington UnitedStates 470 | weapons USARussiaRelations 471 | web IndiaUSChinaPakistanRelationship 472 | were IndiaPakistanRelations USARussiaRelations 473 | west India 474 | when USARussiaRelations 475 | which IndiaPakistanRelations China IndiaUSChinaPakistanRelationship 476 | while IndiaPakistanRelations IndiaUSChinaPakistanRelationship 477 | who USARussiaRelations 478 | wide UnitedStates Russia 479 | wider IndiaUSChinaPakistanRelationship 480 | wildlife UnitedStates 481 | with IndiaPakistanRelations USARussiaRelations Russia India IndiaUSChinaPakistanRelationship China 482 | world USARussiaRelations Russia China India 483 | would IndiaPakistanRelations 484 | years USARussiaRelations 485 | zones Russia 486 | -------------------------------------------------------------------------------- /Inverted-Index/src/constants/Constants.java: -------------------------------------------------------------------------------- 1 | package constants; 2 | 3 | public interface Constants { 4 | final String inputPath = "/invertedIndex/inputDocuments"; 5 | final String outputPath = "/invertedIndex/outputInvertedIndex"; 6 | } -------------------------------------------------------------------------------- /Inverted-Index/src/invertedindex/InvertedIndex.java: -------------------------------------------------------------------------------- 1 | package invertedindex; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.conf.Configuration; 6 | import org.apache.hadoop.fs.FileSystem; 7 | import org.apache.hadoop.fs.Path; 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.mapreduce.Job; 10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 12 | 13 | import constants.Constants; 14 | 15 | public class InvertedIndex { 16 | public static void main(String[] args) throws Exception { 17 | Configuration conf = new Configuration(); 18 | Job job = Job.getInstance(conf, "JobName"); 19 | job.setJarByClass(invertedindex.InvertedIndex.class); 20 | 21 | /** 22 | * Code can be inserted to do the Data Munging 23 | * That can include removing stop words, punctuation, etc. 24 | * But that is not the scope of this Project 25 | */ 26 | 27 | // Delete Output directory if it already exists 28 | deleteFolder(conf, Constants.outputPath); 29 | 30 | myMapReduceTask(job, Constants.inputPath, Constants.outputPath); 31 | } 32 | 33 | public static void deleteFolder(Configuration conf, String folderPath) 34 | throws IOException{ 35 | // Delete the Folder 36 | FileSystem fs = FileSystem.get(conf); 37 | Path path = new Path(folderPath); 38 | if(fs.exists(path)) { 39 | fs.delete(path,true); 40 | } 41 | } 42 | 43 | public static void myMapReduceTask(Job job, String inputPath, String outputPath) throws 44 | IllegalArgumentException, 45 | IOException, 46 | ClassNotFoundException, 47 | InterruptedException { 48 | // Set Mapper Class 49 | job.setMapperClass(InvertedIndexMapper.class); 50 | 51 | // Set Mapper Output Types 52 | job.setMapOutputKeyClass(Text.class); 53 | job.setMapOutputValueClass(Text.class); 54 | 55 | // Set Reducer Class 56 | job.setReducerClass(InvertedIndexReducer.class); 57 | 58 | // Set the Reducer Output Types 59 | job.setOutputKeyClass(Text.class); 60 | job.setOutputValueClass(Text.class); 61 | 62 | // Specify input and output Directories 63 | FileInputFormat.addInputPath(job, new Path(Constants.inputPath)); 64 | FileOutputFormat.setOutputPath(job, new Path(Constants.outputPath)); 65 | 66 | // Wait condition for the Mapper and Reducer Class to finish their execution 67 | if (!job.waitForCompletion(true)) 68 | return; 69 | } 70 | } -------------------------------------------------------------------------------- /Inverted-Index/src/invertedindex/InvertedIndexMapper.java: -------------------------------------------------------------------------------- 1 | package invertedindex; 2 | 3 | import java.io.IOException; 4 | import org.apache.hadoop.io.Text; 5 | import org.apache.hadoop.mapreduce.Mapper; 6 | import org.apache.hadoop.mapreduce.lib.input.FileSplit; 7 | 8 | public class InvertedIndexMapper extends Mapper { 9 | 10 | public void map(Object key, Text value, Context context) 11 | throws IOException, InterruptedException { 12 | 13 | // To know which document I am currently reading 14 | String fileName = ((FileSplit) context.getInputSplit()).getPath().getName(); 15 | 16 | // Get each key word in the Document that is being read 17 | String [] indexKeys = value.toString().split("\\s+"); 18 | 19 | for(String indexKey : indexKeys) { 20 | // Emit the as pair 21 | context.write(new Text(indexKey.toLowerCase()), new Text(fileName)); 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /Inverted-Index/src/invertedindex/InvertedIndexReducer.java: -------------------------------------------------------------------------------- 1 | package invertedindex; 2 | 3 | import java.io.IOException; 4 | import java.util.HashSet; 5 | import java.util.Set; 6 | import org.apache.hadoop.io.Text; 7 | import org.apache.hadoop.mapreduce.Reducer; 8 | 9 | public class InvertedIndexReducer extends Reducer { 10 | public void reduce(Text keyIndexWord, Iterable valuesDocumentNames, Context context) 11 | throws IOException, InterruptedException { 12 | // This set will store the names of the document where the key had word(key) appeared 13 | Set documentNames = new HashSet(); 14 | 15 | for (Text valueDocumentName : valuesDocumentNames) { 16 | // Duplicates not included 17 | documentNames.add(valueDocumentName.toString()); 18 | } 19 | 20 | String indexString = new String(""); 21 | 22 | // Read the set one by one and concat to a string 23 | for(String valueDocumentName : documentNames) { 24 | indexString = new String(indexString.concat 25 | (valueDocumentName.replaceAll(".txt", "")).concat(" ")); 26 | } 27 | 28 | indexString = new String(indexString.trim()); 29 | 30 | context.write(keyIndexWord, new Text(indexString)); 31 | } 32 | } -------------------------------------------------------------------------------- /JAR_Files/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/JAR_Files/.DS_Store -------------------------------------------------------------------------------- /JAR_Files/inverted-index.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/JAR_Files/inverted-index.jar -------------------------------------------------------------------------------- /JAR_Files/matrix-multiplication.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/JAR_Files/matrix-multiplication.jar -------------------------------------------------------------------------------- /JAR_Files/page-rank.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/JAR_Files/page-rank.jar -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Ankit Sarraf 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Algorithm-Implementation-Using-Map-Reduce 2 | ========================================= 3 | 4 | Realized 3 popular applications of Map Reduce in this pet project 5 | 1) Implemented Page Rank algorithm to estimate the page rank of all nodes given a unidirectional connected graph represented in a form of adjacency matrix as input 6 | 2) Constructed of Inverted Index for all the words occurring in a given set of documents 7 | 3) Calculated Matrix (Vector) Product of two 3 X 3 matrices 8 | 9 | PAGE RANK ALGORITHM: Project to find the page rank of nodes in the a unidirectional connected graph. 10 | 1. A file containing the Adjacency Matrix for the graph was fetched from the Hadoop Distributed File System (HDFS) as an input to the Map Reduce task. 11 | 2. The output was the final page rank (in form of probability of reaching the Node) was stored in back to a file on Hadoop Distributed File System (HDFS). 12 | 13 | INVERTED INDEX: Project to read documents as input and construct an inverted index for each word occurring in those documents. 14 | 1. Number of Documents containing text related to various countries were fetched from the Hadoop Distributed File System (HDFS) as an input for the Map Reduce task. 15 | 2. The output representing an inverted index (with key as the Words and Value as names of documents in which this word appears) was stored in back to a file on Hadoop Distributed File System (HDFS). 16 | 17 | MATRIX MULTIPLICATION: Project to Read a file containing two 3 X 3 Matrices and calculate their Vector Product. 18 | 1. A file containing two Matrices - MatrixA and MatrixB, was fetched from the Hadoop Distributed File System (HDFS) as an input for the Map Reduce task. 19 | 2. The output was the Vector Product (MatrixC = MatrixA X MatrixB) that was stored back to a file on Hadoop Distributed File System (HDFS). 20 | -------------------------------------------------------------------------------- /matrix-multiplication/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/matrix-multiplication/.DS_Store -------------------------------------------------------------------------------- /matrix-multiplication/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /matrix-multiplication/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | matrix-multiplication 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.apache.hadoop.eclipse.Nature 16 | org.eclipse.jdt.core.javanature 17 | 18 | 19 | -------------------------------------------------------------------------------- /matrix-multiplication/finalResult/MatrixC.txt: -------------------------------------------------------------------------------- 1 | Matrix C 2 | 5 14 23 3 | 14 50 86 4 | 23 86 149 5 | -------------------------------------------------------------------------------- /matrix-multiplication/input/InputMatrices.txt: -------------------------------------------------------------------------------- 1 | MatrixA 2 | 0 1 2 3 | 3 4 5 4 | 6 7 8 5 | MatrixB 6 | 0 3 6 7 | 1 4 7 8 | 2 5 8 -------------------------------------------------------------------------------- /matrix-multiplication/output/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/matrix-multiplication/output/_SUCCESS -------------------------------------------------------------------------------- /matrix-multiplication/output/part-r-00000: -------------------------------------------------------------------------------- 1 | (0,0) 5 2 | (0,1) 14 3 | (0,2) 23 4 | (1,0) 14 5 | (1,1) 50 6 | (1,2) 86 7 | (2,0) 23 8 | (2,1) 86 9 | (2,2) 149 10 | -------------------------------------------------------------------------------- /matrix-multiplication/src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/matrix-multiplication/src/.DS_Store -------------------------------------------------------------------------------- /matrix-multiplication/src/matrixmultiplication/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/matrix-multiplication/src/matrixmultiplication/.DS_Store -------------------------------------------------------------------------------- /matrix-multiplication/src/matrixmultiplication/Constants.java: -------------------------------------------------------------------------------- 1 | // Inteface that stores Constants to be used in the project 2 | 3 | package matrixmultiplication; 4 | 5 | public interface Constants { 6 | final String inputFilePath = "/matrixMultiplication/input"; 7 | final String outputFilePath = "/matrixMultiplication/output"; 8 | final String finalResultPath = "/matrixMultiplication/finalResult/MatrixC.txt"; 9 | final int DIMENSIONS = 3; 10 | } -------------------------------------------------------------------------------- /matrix-multiplication/src/matrixmultiplication/FileNames.java: -------------------------------------------------------------------------------- 1 | package matrixmultiplication; 2 | 3 | public interface FileNames { 4 | final String inputFilePath = "/matrixMultiplication/input"; 5 | final String outputFilePath = "/matrixMultiplication/output"; 6 | } -------------------------------------------------------------------------------- /matrix-multiplication/src/matrixmultiplication/MatrixCProcessing.java: -------------------------------------------------------------------------------- 1 | // Java Program to process the output generated by the Map Reduce Job 2 | // To obtain the final Matrix C = A X B 3 | 4 | package matrixmultiplication; 5 | 6 | import java.io.BufferedReader; 7 | import java.io.IOException; 8 | import java.io.InputStreamReader; 9 | 10 | import org.apache.hadoop.conf.Configuration; 11 | import org.apache.hadoop.fs.FSDataOutputStream; 12 | import org.apache.hadoop.fs.FileSystem; 13 | import org.apache.hadoop.fs.Path; 14 | 15 | public class MatrixCProcessing { 16 | private int [][] MatrixC; 17 | 18 | MatrixCProcessing() { 19 | MatrixC = new int[Constants.DIMENSIONS][Constants.DIMENSIONS]; 20 | } 21 | 22 | void usage () { 23 | System.out.println("Usage : HadoopDFSFileReadWrite "); 24 | System.exit(1); 25 | } 26 | 27 | void printErrorMessage(String str) { 28 | System.out.println(str); 29 | return; 30 | } 31 | 32 | public void getMatrixC(String inputFileName, String outputFileName) throws IOException { 33 | Configuration conf = new Configuration(); 34 | FileSystem fs = FileSystem.get(conf); 35 | 36 | // Hadoop DFS deals with Path 37 | Path inFile = new Path(inputFileName); 38 | Path outFile = new Path(outputFileName); 39 | 40 | // Check if input/output are valid 41 | if (!fs.exists(inFile)) { 42 | printErrorMessage("Input file not found"); 43 | throw new IOException(); 44 | } if (!fs.isFile(inFile)) { 45 | printErrorMessage("Input should be a file"); 46 | throw new IOException(); 47 | } if (fs.exists(outFile)) { 48 | printErrorMessage("Final Page Rank Input File Exists. Deleting it"); 49 | fs.delete(outFile, true); 50 | } 51 | 52 | BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(inFile))); 53 | FSDataOutputStream out = fs.create(outFile); 54 | 55 | try{ 56 | String index; 57 | int valueAtIndex; 58 | String line; 59 | 60 | while((line = in.readLine()) != null) { 61 | // Line has the pre-processed output 62 | index = (line.split("\\s+"))[0]; 63 | valueAtIndex = Integer.parseInt((line.split("\\s+"))[1]); 64 | 65 | index = index.substring(1, index.length() - 1); 66 | int i = Integer.parseInt((index.split(","))[0]); 67 | int j = Integer.parseInt((index.split(","))[1]); 68 | 69 | MatrixC[i][j] = valueAtIndex; 70 | } 71 | 72 | out.writeBytes("Matrix C\n"); 73 | for(int i = 0 ; i < Constants.DIMENSIONS ; i++) { 74 | for(int j = 0 ; j < Constants.DIMENSIONS ; j++) { 75 | out.writeBytes(MatrixC[i][j] + "\t"); 76 | } 77 | out.writeBytes("\n"); 78 | } 79 | } catch(Exception e) { 80 | System.out.println("Un-understandable Exception : " + e.getMessage()); 81 | } finally { 82 | in.close(); 83 | out.close(); 84 | } 85 | } 86 | } -------------------------------------------------------------------------------- /matrix-multiplication/src/matrixmultiplication/MatrixMultiplication.java: -------------------------------------------------------------------------------- 1 | /** 2 | * DEVELOPER: ANKIT SARRAF 3 | * ABOUT: This project finds the Matrix Multiplication of 3 X 3 Integer Matrices 4 | */ 5 | 6 | package matrixmultiplication; 7 | 8 | import java.io.IOException; 9 | 10 | import org.apache.hadoop.conf.Configuration; 11 | import org.apache.hadoop.fs.FileSystem; 12 | import org.apache.hadoop.fs.Path; 13 | import org.apache.hadoop.io.IntWritable; 14 | import org.apache.hadoop.io.Text; 15 | import org.apache.hadoop.mapreduce.Job; 16 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 17 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 18 | 19 | public class MatrixMultiplication { 20 | public static void main(String[] args) throws IOException { 21 | Configuration conf = new Configuration(); 22 | Job job = Job.getInstance(conf, "JobName"); 23 | 24 | try { 25 | // Delete the output folder if it already exists 26 | deleteFolder(conf, Constants.outputFilePath); 27 | deleteFolder(conf, Constants.finalResultPath); 28 | 29 | // Method to call the Map, Combine and Reduce Task 30 | myMCRTask(job); 31 | 32 | new MatrixCProcessing().getMatrixC( 33 | Constants.outputFilePath + "/part-r-00000", Constants.finalResultPath); 34 | 35 | System.out.println("OPERATION SUCCESSFUL"); 36 | System.out.println("FINAL OUTPUT WRITTEN TO FILE: " + Constants.finalResultPath); 37 | } catch(Exception e) { 38 | System.out.println("Cannot Continue ! Some Issue"); 39 | System.out.println("Reason => " + e.getMessage()); 40 | } 41 | } 42 | 43 | private static void myMCRTask(Job job) 44 | throws IllegalArgumentException, 45 | IOException, 46 | ClassNotFoundException, 47 | InterruptedException { 48 | job.setJarByClass(matrixmultiplication.MatrixMultiplication.class); 49 | 50 | // Define the MCR Classes 51 | job.setMapperClass(MatrixMultiplicationMapper.class); 52 | job.setCombinerClass(MatrixMultiplicationCombiner.class); 53 | job.setReducerClass(MatrixMultiplicationReducer.class); 54 | 55 | // Mapper Output Key Value Types 56 | job.setMapOutputKeyClass(Text.class); 57 | job.setMapOutputValueClass(Text.class); 58 | 59 | // Combiner Output Key Value Types 60 | // Check this later 61 | 62 | // Reducer Output Key Value Types 63 | job.setOutputKeyClass(Text.class); 64 | job.setOutputValueClass(IntWritable.class); 65 | 66 | FileInputFormat.setInputPaths(job, new Path(Constants.inputFilePath)); 67 | FileOutputFormat.setOutputPath(job, new Path(Constants.outputFilePath)); 68 | 69 | if (!job.waitForCompletion(true)) 70 | return; 71 | 72 | // Once the Reduce Task has been done, Process the Reducer Output file 73 | // TODO: Create new File for Final Output Matrix 74 | } 75 | 76 | private static void deleteFolder(Configuration conf, String folderPath) throws IOException { 77 | // Delete the Folder 78 | FileSystem fs = FileSystem.get(conf); 79 | Path path = new Path(folderPath); 80 | if(fs.exists(path)) { 81 | fs.delete(path,true); 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /matrix-multiplication/src/matrixmultiplication/MatrixMultiplicationCombiner.java: -------------------------------------------------------------------------------- 1 | // Developer: ANKIT SARRAF 2 | // Combiner Class 3 | 4 | package matrixmultiplication; 5 | 6 | import java.io.IOException; 7 | 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.mapreduce.Reducer; 10 | 11 | // Combiner Must have the same inputs/ outputs as the Mapper output 12 | public class MatrixMultiplicationCombiner extends 13 | Reducer { 14 | private int tempStorage [][] = new int[Constants.DIMENSIONS][Constants.DIMENSIONS]; 15 | private int currentIndex [] = new int[Constants.DIMENSIONS]; 16 | 17 | public void reduce(Text key, Iterable values, Context context) 18 | throws IOException, InterruptedException { 19 | for(int p = 0 ; p < Constants.DIMENSIONS ; p++) { 20 | currentIndex[p] = 0; 21 | } 22 | 23 | for (Text val : values) { 24 | // Ignore Brackets from the val 25 | String actualKey = val.toString().substring(1, val.toString().length() - 1); 26 | 27 | // get the j index and the actual Key 28 | int j = Integer.parseInt(actualKey.split(",")[0]); 29 | int valJ = Integer.parseInt(actualKey.split(",")[1]); 30 | 31 | tempStorage[j][currentIndex[j]] = valJ; 32 | currentIndex[j]++; 33 | } 34 | 35 | for(int i = 0 ; i < Constants.DIMENSIONS ; i++) { 36 | tempStorage[i][0] = tempStorage[i][0] * tempStorage[i][1]; 37 | 38 | System.out.println("Combiner Emiting : <" + key + "," + tempStorage[i][0] + ">"); 39 | context.write(key, new Text("" + tempStorage[i][0])); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /matrix-multiplication/src/matrixmultiplication/MatrixMultiplicationMapper.java: -------------------------------------------------------------------------------- 1 | // ANKIT SARRAF 2 | // Mapper Class 3 | 4 | package matrixmultiplication; 5 | 6 | import java.io.IOException; 7 | import java.util.ArrayList; 8 | 9 | import org.apache.hadoop.io.Text; 10 | import org.apache.hadoop.mapreduce.Mapper; 11 | 12 | public class MatrixMultiplicationMapper extends 13 | Mapper { 14 | private ArrayList> matrixA = new ArrayList>(); 15 | private ArrayList> matrixB = new ArrayList>(); 16 | 17 | private int matAi = 0; 18 | private int matBj = 0; 19 | 20 | private String currentMatrix = ""; 21 | 22 | public void map(Object key, Text value, Context context) 23 | throws IOException, InterruptedException { 24 | // Get each key word in the Document that is being read 25 | String [] matrixValues = value.toString().split("\\s+"); 26 | 27 | ArrayList eachRow = new ArrayList(); 28 | if(!matrixValues[0].equals("MatrixA") && !matrixValues[0].equals("MatrixB")) { 29 | for(String matrixValue: matrixValues) { 30 | eachRow.add(Integer.parseInt(matrixValue)); 31 | } 32 | 33 | if(currentMatrix.equals("MatrixA")) { 34 | matrixA.add(eachRow); 35 | matAi++; 36 | 37 | if(matAi == Constants.DIMENSIONS) { 38 | // Emit Matrix A Values 39 | for(int i = 0 ; i < Constants.DIMENSIONS ; i++) { 40 | for(int j = 0 ; j < Constants.DIMENSIONS ; j++) { 41 | for(int k = 0 ; k < Constants.DIMENSIONS ; k++) { 42 | // Now the MatrixA is filled with all Values - Time to Emit 43 | String matAKey = 44 | new String("(" + i + "," + k + ")"); 45 | String matAValue = 46 | new String("(" + j + "," + matrixA.get(i).get(j) + ")"); 47 | 48 | System.out.println("Emiting A : <" + matAKey + "," + matAValue + ">"); 49 | context.write(new Text(matAKey), new Text(matAValue)); 50 | } 51 | } 52 | } 53 | } 54 | } else if(currentMatrix.equals("MatrixB")) { 55 | matrixB.add(eachRow); 56 | matBj++; 57 | 58 | if(matBj == Constants.DIMENSIONS) { 59 | // Emit Matrix B Values 60 | for(int j = 0 ; j < Constants.DIMENSIONS ; j++) { 61 | for(int k = 0 ; k < Constants.DIMENSIONS ; k++) { 62 | for(int i = 0 ; i < Constants.DIMENSIONS ; i++) { 63 | // Now the MatrixB is filled with all Values - Time to Emit 64 | String matBKey = 65 | new String("(" + i + "," + k + ")"); 66 | String matBValue = 67 | new String("(" + j + "," + matrixB.get(j).get(k) + ")"); 68 | 69 | System.out.println("Emiting B : <" + matBKey + "," + matBValue + ">"); 70 | context.write(new Text(matBKey), new Text(matBValue)); 71 | } 72 | } 73 | } 74 | } 75 | } 76 | } else { 77 | if(matrixValues[0].equals("MatrixA")) { 78 | currentMatrix = new String("MatrixA"); 79 | } else { 80 | currentMatrix = new String("MatrixB"); 81 | } 82 | } 83 | } 84 | } -------------------------------------------------------------------------------- /matrix-multiplication/src/matrixmultiplication/MatrixMultiplicationReducer.java: -------------------------------------------------------------------------------- 1 | // ANKIT SARRAF 2 | // Reducer Class 3 | 4 | package matrixmultiplication; 5 | 6 | import java.io.IOException; 7 | 8 | import org.apache.hadoop.io.IntWritable; 9 | import org.apache.hadoop.io.Text; 10 | import org.apache.hadoop.mapreduce.Reducer; 11 | 12 | public class MatrixMultiplicationReducer extends 13 | Reducer { 14 | 15 | public void reduce(Text key, Iterable values, Context context) 16 | throws IOException, InterruptedException { 17 | // Summation of all the received Values for a particular Index 18 | int sum = 0; 19 | 20 | for (Text val : values) { 21 | sum += Integer.parseInt(val.toString()); 22 | } 23 | 24 | System.out.println("Reducer Emiting : <" + key + "," + sum + ">"); 25 | context.write(key, new IntWritable(sum)); 26 | } 27 | } -------------------------------------------------------------------------------- /page-rank/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/page-rank/.DS_Store -------------------------------------------------------------------------------- /page-rank/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /page-rank/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | page-rank 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.apache.hadoop.eclipse.Nature 16 | org.eclipse.jdt.core.javanature 17 | 18 | 19 | -------------------------------------------------------------------------------- /page-rank/finalOutput/finalOutput.txt: -------------------------------------------------------------------------------- 1 | 0 0.08951327267661183 2 | 1 0.16418882351680386 3 | 2 0.26865107113768866 4 | 3 0.17913779846107686 5 | 4 0.298509034207819 6 | -------------------------------------------------------------------------------- /page-rank/input/originalinput.txt: -------------------------------------------------------------------------------- 1 | 1 0:1:1:1:0: 2 | 2 0:0:0:0:1: 3 | 3 0:1:0:0:1: 4 | 4 1:0:1:0:0: 5 | 5 0:0:1:1:0: -------------------------------------------------------------------------------- /page-rank/input/pagerankinput.txt: -------------------------------------------------------------------------------- 1 | 1 0.0:0.06666666666666667:0.06666666666666667:0.06666666666666667:0.0: 2 | 2 0.0:0.0:0.0:0.0:0.2: 3 | 3 0.0:0.1:0.0:0.0:0.1: 4 | 4 0.1:0.0:0.1:0.0:0.0: 5 | 5 0.0:0.0:0.1:0.1:0.0: -------------------------------------------------------------------------------- /page-rank/outputs/output0/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/page-rank/outputs/output0/_SUCCESS -------------------------------------------------------------------------------- /page-rank/outputs/output0/part-r-00000: -------------------------------------------------------------------------------- 1 | 1 0.0:0.03333333333333333:0.03333333333333333:0.03333333333333333:0.0: 2 | 2 0.0:0.0:0.0:0.0:0.16666666666666669: 3 | 3 0.0:0.13333333333333336:0.0:0.0:0.13333333333333336: 4 | 4 0.08333333333333334:0.0:0.08333333333333334:0.0:0.0: 5 | 5 0.0:0.0:0.15000000000000002:0.15000000000000002:0.0: 6 | -------------------------------------------------------------------------------- /page-rank/outputs/output1/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/page-rank/outputs/output1/_SUCCESS -------------------------------------------------------------------------------- /page-rank/outputs/output1/part-r-00000: -------------------------------------------------------------------------------- 1 | 1 0.0:0.02777777777777778:0.02777777777777778:0.02777777777777778:0.0: 2 | 2 0.0:0.0:0.0:0.0:0.16666666666666669: 3 | 3 0.0:0.13333333333333336:0.0:0.0:0.13333333333333336: 4 | 4 0.09166666666666667:0.0:0.09166666666666667:0.0:0.0: 5 | 5 0.0:0.0:0.15000000000000002:0.15000000000000002:0.0: 6 | -------------------------------------------------------------------------------- /page-rank/outputs/output10/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/page-rank/outputs/output10/_SUCCESS -------------------------------------------------------------------------------- /page-rank/outputs/output10/part-r-00000: -------------------------------------------------------------------------------- 1 | 1 0.0:0.029870220336076822:0.029870220336076822:0.029870220336076822:0.0: 2 | 2 0.0:0.0:0.0:0.0:0.16419043102709194: 3 | 3 0.0:0.13431860318072705:0.0:0.0:0.13431860318072705: 4 | 4 0.08951327267661183:0.0:0.08951327267661183:0.0:0.0: 5 | 5 0.0:0.0:0.14926757812500002:0.14926757812500002:0.0: 6 | -------------------------------------------------------------------------------- /page-rank/outputs/output2/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/page-rank/outputs/output2/_SUCCESS -------------------------------------------------------------------------------- /page-rank/outputs/output2/part-r-00000: -------------------------------------------------------------------------------- 1 | 1 0.0:0.030555555555555558:0.030555555555555558:0.030555555555555558:0.0: 2 | 2 0.0:0.0:0.0:0.0:0.16111111111111115: 3 | 3 0.0:0.13472222222222224:0.0:0.0:0.13472222222222224: 4 | 4 0.0888888888888889:0.0:0.0888888888888889:0.0:0.0: 5 | 5 0.0:0.0:0.15000000000000002:0.15000000000000002:0.0: 6 | -------------------------------------------------------------------------------- /page-rank/outputs/output3/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/page-rank/outputs/output3/_SUCCESS -------------------------------------------------------------------------------- /page-rank/outputs/output3/part-r-00000: -------------------------------------------------------------------------------- 1 | 1 0.0:0.029629629629629634:0.029629629629629634:0.029629629629629634:0.0: 2 | 2 0.0:0.0:0.0:0.0:0.1652777777777778: 3 | 3 0.0:0.13472222222222224:0.0:0.0:0.13472222222222224: 4 | 4 0.09027777777777779:0.0:0.09027777777777779:0.0:0.0: 5 | 5 0.0:0.0:0.1479166666666667:0.1479166666666667:0.0: 6 | -------------------------------------------------------------------------------- /page-rank/outputs/output4/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/page-rank/outputs/output4/_SUCCESS -------------------------------------------------------------------------------- /page-rank/outputs/output4/part-r-00000: -------------------------------------------------------------------------------- 1 | 1 0.0:0.030092592592592598:0.030092592592592598:0.030092592592592598:0.0: 2 | 2 0.0:0.0:0.0:0.0:0.1643518518518519: 3 | 3 0.0:0.13391203703703705:0.0:0.0:0.13391203703703705: 4 | 4 0.08877314814814817:0.0:0.08877314814814817:0.0:0.0: 5 | 5 0.0:0.0:0.15000000000000002:0.15000000000000002:0.0: 6 | -------------------------------------------------------------------------------- /page-rank/outputs/output5/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/page-rank/outputs/output5/_SUCCESS -------------------------------------------------------------------------------- /page-rank/outputs/output5/part-r-00000: -------------------------------------------------------------------------------- 1 | 1 0.0:0.029591049382716058:0.029591049382716058:0.029591049382716058:0.0: 2 | 2 0.0:0.0:0.0:0.0:0.16400462962962964: 3 | 3 0.0:0.13443287037037038:0.0:0.0:0.13443287037037038: 4 | 4 0.0900462962962963:0.0:0.0900462962962963:0.0:0.0: 5 | 5 0.0:0.0:0.14913194444444447:0.14913194444444447:0.0: 6 | -------------------------------------------------------------------------------- /page-rank/outputs/output6/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/page-rank/outputs/output6/_SUCCESS -------------------------------------------------------------------------------- /page-rank/outputs/output6/part-r-00000: -------------------------------------------------------------------------------- 1 | 1 0.0:0.030015432098765435:0.030015432098765435:0.030015432098765435:0.0: 2 | 2 0.0:0.0:0.0:0.0:0.16402391975308644: 3 | 3 0.0:0.13438464506172842:0.0:0.0:0.13438464506172842: 4 | 4 0.08936149691358027:0.0:0.08936149691358027:0.0:0.0: 5 | 5 0.0:0.0:0.14921875:0.14921875:0.0: 6 | -------------------------------------------------------------------------------- /page-rank/outputs/output7/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/page-rank/outputs/output7/_SUCCESS -------------------------------------------------------------------------------- /page-rank/outputs/output7/part-r-00000: -------------------------------------------------------------------------------- 1 | 1 0.0:0.02978716563786009:0.02978716563786009:0.02978716563786009:0.0: 2 | 2 0.0:0.0:0.0:0.0:0.16440007716049387: 3 | 3 0.0:0.13429783950617286:0.0:0.0:0.13429783950617286: 4 | 4 0.08961709104938273:0.0:0.08961709104938273:0.0:0.0: 5 | 5 0.0:0.0:0.14920428240740743:0.14920428240740743:0.0: 6 | -------------------------------------------------------------------------------- /page-rank/outputs/output8/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/page-rank/outputs/output8/_SUCCESS -------------------------------------------------------------------------------- /page-rank/outputs/output8/part-r-00000: -------------------------------------------------------------------------------- 1 | 1 0.0:0.029872363683127578:0.029872363683127578:0.029872363683127578:0.0: 2 | 2 0.0:0.0:0.0:0.0:0.16408500514403296: 3 | 3 0.0:0.13430426954732513:0.0:0.0:0.13430426954732513: 4 | 4 0.08949572402263375:0.0:0.08949572402263375:0.0:0.0: 5 | 5 0.0:0.0:0.14934895833333336:0.14934895833333336:0.0: 6 | -------------------------------------------------------------------------------- /page-rank/outputs/output9/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asarraf/Algorithm-Implementation-Using-Map-Reduce/a86463ba2159508b02666a43a2289562d84c845f/page-rank/outputs/output9/_SUCCESS -------------------------------------------------------------------------------- /page-rank/outputs/output9/part-r-00000: -------------------------------------------------------------------------------- 1 | 1 0.0:0.029831908007544584:0.029831908007544584:0.029831908007544584:0.0: 2 | 2 0.0:0.0:0.0:0.0:0.1641766332304527: 3 | 3 0.0:0.13435852301954734:0.0:0.0:0.13435852301954734: 4 | 4 0.08961066100823047:0.0:0.08961066100823047:0.0:0.0: 5 | 5 0.0:0.0:0.14919463734567906:0.14919463734567906:0.0: 6 | -------------------------------------------------------------------------------- /page-rank/src/filepreprocess/HadoopDFSFileReadWrite.java: -------------------------------------------------------------------------------- 1 | package filepreprocess; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStreamReader; 6 | 7 | import org.apache.hadoop.conf.Configuration; 8 | import org.apache.hadoop.fs.FSDataOutputStream; 9 | import org.apache.hadoop.fs.FileSystem; 10 | import org.apache.hadoop.fs.Path; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | import pagerank.PageRank; 15 | 16 | public class HadoopDFSFileReadWrite { 17 | private static final transient Logger LOG = LoggerFactory.getLogger(PageRank.class); 18 | 19 | void usage () { 20 | System.out.println("Usage : HadoopDFSFileReadWrite "); 21 | System.exit(1); 22 | } 23 | 24 | void printErrorMessage(String str) { 25 | LOG.info(str); 26 | return; 27 | } 28 | 29 | public void preprocess(String inputFileName, String outputFileName) throws IOException { 30 | Configuration conf = new Configuration(); 31 | FileSystem fs = FileSystem.get(conf); 32 | 33 | // Hadoop DFS deals with Path 34 | Path inFile = new Path(inputFileName); 35 | Path outFile = new Path(outputFileName); 36 | 37 | // Check if input/output are valid 38 | if (!fs.exists(inFile)) { 39 | printErrorMessage("Input file not found"); 40 | throw new IOException(); 41 | } if (!fs.isFile(inFile)) { 42 | printErrorMessage("Input should be a file"); 43 | throw new IOException(); 44 | } if (fs.exists(outFile)) { 45 | printErrorMessage("Page Rank Input File Exists. Deleting it"); 46 | fs.delete(outFile, true); 47 | } 48 | 49 | BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(inFile))); 50 | 51 | FSDataOutputStream out = fs.create(outFile); 52 | LOG.info("Opening the output file successful"); 53 | 54 | double [][] tempAdjacency = null; 55 | String [] stringAdjacency = null; 56 | int counter = 0; 57 | 58 | try{ 59 | String line; 60 | while((line = in.readLine()) != null) { 61 | 62 | // Line has the pre-processed output 63 | line = (line.split("\\s+"))[1]; 64 | 65 | if(tempAdjacency == null || stringAdjacency == null) { 66 | tempAdjacency = new double[line.split(":").length][line.split(":").length]; 67 | stringAdjacency = new String[line.split(":").length]; 68 | } 69 | 70 | stringAdjacency[counter] = line; 71 | counter++; 72 | } 73 | 74 | LOG.info("COUNTER = " + counter); 75 | 76 | // This takes O(n^2) : Need a way to reduce the time 77 | for(int i = 0 ; i < counter ; i++) { 78 | String [] stringTempAdjacency = stringAdjacency[i].split(":"); 79 | 80 | int onesInThisRow = 0; 81 | stringAdjacency[i] = new String(""); 82 | 83 | for(int j = 0 ; j < stringTempAdjacency.length ; j++) { 84 | //LOG.info("[" + i + ", " + j + "] : " + stringTempAdjacency[j]); 85 | 86 | if(stringTempAdjacency[j].equals("1")) { 87 | onesInThisRow++; 88 | } 89 | 90 | tempAdjacency[i][j] = Double.parseDouble(stringTempAdjacency[j]); 91 | tempAdjacency[i][j] = tempAdjacency[i][j] / counter; 92 | } 93 | 94 | for(int j = 0 ; j < stringTempAdjacency.length ; j++) { 95 | tempAdjacency[i][j] = tempAdjacency[i][j] / (double) onesInThisRow; 96 | stringAdjacency[i] = new String( 97 | stringAdjacency[i].concat(tempAdjacency[i][j] + ":")); 98 | } 99 | } 100 | 101 | for(int i = 0 ; i < counter ; i++) { 102 | out.writeBytes((i+1) + "\t" + stringAdjacency[i]); 103 | if(i != (counter - 1)) { 104 | out.writeBytes("\n"); 105 | } 106 | } 107 | } catch(Exception e) { 108 | LOG.info("Un-understandable Exception : " + e.getMessage()); 109 | } finally { 110 | in.close(); 111 | out.close(); 112 | } 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /page-rank/src/finalpagerank/FinalPageRankCalculator.java: -------------------------------------------------------------------------------- 1 | package finalpagerank; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStreamReader; 6 | 7 | import org.apache.hadoop.conf.Configuration; 8 | import org.apache.hadoop.fs.FSDataOutputStream; 9 | import org.apache.hadoop.fs.FileSystem; 10 | import org.apache.hadoop.fs.Path; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | import pagerank.PageRank; 15 | 16 | public class FinalPageRankCalculator { 17 | private static final transient Logger LOG = LoggerFactory.getLogger(PageRank.class); 18 | 19 | void usage () { 20 | System.out.println("Usage : HadoopDFSFileReadWrite "); 21 | System.exit(1); 22 | } 23 | 24 | void printErrorMessage(String str) { 25 | LOG.info(str); 26 | return; 27 | } 28 | 29 | public void getFinalPageRank(String inputFileName, String outputFileName) throws IOException { 30 | Configuration conf = new Configuration(); 31 | FileSystem fs = FileSystem.get(conf); 32 | 33 | // Hadoop DFS deals with Path 34 | Path inFile = new Path(inputFileName); 35 | Path outFile = new Path(outputFileName); 36 | 37 | // Check if input/output are valid 38 | if (!fs.exists(inFile)) { 39 | printErrorMessage("Input file not found"); 40 | throw new IOException(); 41 | } if (!fs.isFile(inFile)) { 42 | printErrorMessage("Input should be a file"); 43 | throw new IOException(); 44 | } if (fs.exists(outFile)) { 45 | printErrorMessage("Final Page Rank Input File Exists. Deleting it"); 46 | fs.delete(outFile, true); 47 | } 48 | 49 | BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(inFile))); 50 | FSDataOutputStream out = fs.create(outFile); 51 | 52 | double [][] tempAdjacency = null; 53 | String [] stringAdjacency = null; 54 | int counter = 0; 55 | 56 | try{ 57 | String line; 58 | while((line = in.readLine()) != null) { 59 | // Line has the pre-processed output 60 | line = (line.split("\\s+"))[1]; 61 | 62 | if(tempAdjacency == null || stringAdjacency == null) { 63 | tempAdjacency = new double[line.split(":").length][line.split(":").length]; 64 | stringAdjacency = new String[line.split(":").length]; 65 | } 66 | 67 | stringAdjacency[counter] = line; 68 | counter++; 69 | } 70 | 71 | LOG.info("COUNTER = " + counter); 72 | 73 | // This takes O(n^2) : Need a way to reduce the time 74 | for(int i = 0 ; i < counter ; i++) { 75 | String [] stringTempAdjacency = stringAdjacency[i].split(":"); 76 | for(int j = 0 ; j < stringTempAdjacency.length ; j++) { 77 | tempAdjacency[i][j] = Double.parseDouble(stringTempAdjacency[j]); 78 | } 79 | } 80 | 81 | double [] sum = new double[counter]; 82 | 83 | for(int i = 0 ; i < counter ; i++) { 84 | for(int j = 0 ; j < counter ; j++) { 85 | sum[i] += tempAdjacency[j][i]; 86 | } 87 | } 88 | 89 | // Now add to this to the final Page Rank File 90 | for(int i = 0 ; i < counter ; i++) { 91 | out.writeBytes(i + "\t" + sum[i] + "\n"); 92 | } 93 | } catch(Exception e) { 94 | LOG.info("Un-understandable Exception : " + e.getMessage()); 95 | } finally { 96 | in.close(); 97 | out.close(); 98 | } 99 | } 100 | } -------------------------------------------------------------------------------- /page-rank/src/pagerank/PageRank.java: -------------------------------------------------------------------------------- 1 | // ANKIT SARRAF 2 | // Page Rank Algorithm Implementation using Hadoop Map Reduce 3 | 4 | package pagerank; 5 | 6 | import java.io.IOException; 7 | 8 | import org.apache.hadoop.conf.Configuration; 9 | import org.apache.hadoop.fs.FileSystem; 10 | import org.apache.hadoop.fs.Path; 11 | import org.apache.hadoop.io.IntWritable; 12 | import org.apache.hadoop.io.Text; 13 | import org.apache.hadoop.mapreduce.Counter; 14 | import org.apache.hadoop.mapreduce.Job; 15 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 16 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 17 | import org.slf4j.Logger; 18 | import org.slf4j.LoggerFactory; 19 | 20 | import filepreprocess.HadoopDFSFileReadWrite; 21 | import finalpagerank.FinalPageRankCalculator; 22 | 23 | public class PageRank { 24 | private static final transient Logger LOG = LoggerFactory.getLogger(PageRank.class); 25 | private static volatile int roundNumber = 0; 26 | 27 | // Consider there are only 5 nodes 28 | public static final int TOTALNODES = 5; 29 | 30 | // Counter 31 | public enum MyCounter { 32 | COUNTER; 33 | } 34 | 35 | // Some Pre - Processing of the Input file 36 | static { 37 | HadoopDFSFileReadWrite preprocessor = new HadoopDFSFileReadWrite(); 38 | String originalInputFile = "/pageRank/input/originalinput.txt"; 39 | String newInputFile = "/pageRank/input/pagerankinput.txt"; 40 | try{ 41 | preprocessor.preprocess(originalInputFile, newInputFile); 42 | } catch(Exception e) { 43 | LOG.info("Some Error In Reading the Input File"); 44 | LOG.info(e.getMessage()); 45 | System.exit(0); 46 | } finally { 47 | LOG.info("No Error In Reading the Input File"); 48 | // Proceed to the Map Reduce Job 49 | } 50 | } 51 | 52 | public static void main(String[] args) throws Exception { 53 | String inputPath = "/pageRank/input/pagerankinput.txt"; 54 | String outputPath = "/pageRank/outputs/output"; 55 | String finalPath = "/pageRank/finalOutput/finalOutput.txt"; 56 | Counter counter; 57 | 58 | do { 59 | Configuration conf = new Configuration(); 60 | Job job = Job.getInstance(conf); 61 | 62 | deleteFolder(conf, outputPath + roundNumber); 63 | 64 | LOG.info("Input : " + inputPath + " :: Output : " + outputPath); 65 | 66 | myMapReduceTask(job, inputPath, outputPath + roundNumber); 67 | inputPath = outputPath + roundNumber + "/part-r-00000"; 68 | roundNumber++; 69 | 70 | // Configure the Counter 71 | counter = job.getCounters().findCounter(MyCounter.COUNTER); 72 | 73 | LOG.info("Counter Value : " + counter.getValue()); 74 | } while(counter.getValue() > 0); 75 | // The above loop executes til the time the Page ranks Stabilize 76 | 77 | // Now calculate the sum of In Links for each node 78 | FinalPageRankCalculator finalPageRankCalculator = new FinalPageRankCalculator(); 79 | finalPageRankCalculator.getFinalPageRank( 80 | outputPath + (roundNumber - 1) + "/part-r-00000", finalPath); 81 | 82 | LOG.info("Final Page Rank File Created"); 83 | LOG.info("Check the Final Output in the path /pageRank/finalOutput/finalOutput.txt"); 84 | } 85 | 86 | private static void myMapReduceTask(Job job, String inputPath, String outputPath) 87 | throws IOException, ClassNotFoundException, InterruptedException { 88 | job.setJarByClass(PageRank.class); 89 | 90 | // Set the Mapper Class 91 | job.setMapperClass(PageRankMapper.class); 92 | job.setMapOutputKeyClass(IntWritable.class); 93 | job.setMapOutputValueClass(Text.class); 94 | 95 | // Set the Reducer Class 96 | job.setReducerClass(PageRankReducer.class); 97 | job.setOutputKeyClass(IntWritable.class); 98 | job.setOutputValueClass(Text.class); 99 | 100 | // Specify input and output Directories 101 | FileInputFormat.addInputPath(job, new Path(inputPath)); 102 | FileOutputFormat.setOutputPath(job, new Path(outputPath)); 103 | 104 | // Condition to wait for the completion of MR Job 105 | 106 | while(!job.waitForCompletion(true)) {} 107 | 108 | return; 109 | } 110 | 111 | private static void deleteFolder(Configuration conf, String folderPath ) throws IOException { 112 | // Delete the Folder 113 | FileSystem fs = FileSystem.get(conf); 114 | Path path = new Path(folderPath); 115 | if(fs.exists(path)) { 116 | fs.delete(path,true); 117 | } 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /page-rank/src/pagerank/PageRankMapper.java: -------------------------------------------------------------------------------- 1 | package pagerank; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.io.IntWritable; 6 | import org.apache.hadoop.io.Text; 7 | import org.apache.hadoop.mapreduce.Mapper; 8 | import org.slf4j.Logger; 9 | import org.slf4j.LoggerFactory; 10 | 11 | public class PageRankMapper extends 12 | Mapper { 13 | 14 | private static final transient Logger LOG = LoggerFactory.getLogger(PageRank.class); 15 | 16 | public void map(Object ikey, Text adjList, Context context) 17 | throws IOException, InterruptedException { 18 | 19 | LOG.info("Start Mapper Activity"); 20 | 21 | // Split the strings along Space 22 | String [] inputAdj = adjList.toString().split("\\s+"); 23 | 24 | IntWritable keyNode = new IntWritable(Integer.parseInt(inputAdj[0])); 25 | 26 | // Display the Key Value Pair Emitted 27 | LOG.info("<" + keyNode.get() + " , " + inputAdj[1] + ">"); 28 | 29 | // Emit the Key Value Pair 30 | context.write(keyNode, new Text(inputAdj[1])); 31 | 32 | // Split the second part of the Adjacency List along ":" 33 | String [] inLinks = inputAdj[1].split(":"); 34 | 35 | for(int i = 0 ; i < inLinks.length ; i++) { 36 | // Display the Key Value pair Emitted 37 | LOG.info("<" + (i + 1) + " , " + inLinks[i] + ">"); 38 | 39 | // Emit Key Value Pairs 40 | context.write(new IntWritable(i + 1), new Text(inLinks[i])); 41 | } 42 | } 43 | } -------------------------------------------------------------------------------- /page-rank/src/pagerank/PageRankReducer.java: -------------------------------------------------------------------------------- 1 | package pagerank; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.io.IntWritable; 6 | import org.apache.hadoop.io.Text; 7 | import org.apache.hadoop.mapreduce.Reducer; 8 | import org.slf4j.Logger; 9 | import org.slf4j.LoggerFactory; 10 | 11 | import pagerank.PageRank.MyCounter; 12 | 13 | public class PageRankReducer extends Reducer { 14 | private static final transient Logger LOG = LoggerFactory.getLogger(PageRank.class); 15 | 16 | public void reduce(IntWritable keyNode, Iterable nodeDistances, Context context) 17 | throws IOException, InterruptedException { 18 | 19 | LOG.info("Start Reducer Activity"); 20 | 21 | // Find the min distance per reducer 22 | double sumOfInLinks = 0.0; 23 | 24 | // The String storing the Adjacency List 25 | String [] adjList = new String[PageRank.TOTALNODES]; 26 | 27 | for (Text nodeDistance : nodeDistances) { 28 | if(nodeDistance.toString().contains(":")) { 29 | // It is the Adjacency List (The complex Data Structure) 30 | // In the form "OutLinkRank1:OutLinkRank2:...:OutLinkRankN:" 31 | adjList = nodeDistance.toString().split(":"); 32 | } else { 33 | sumOfInLinks += Double.parseDouble(nodeDistance.toString()); 34 | } 35 | } 36 | 37 | // Count number of the Nodes for which there is a outLink from this node 38 | int count = 0; 39 | for(String outLinks : adjList) { 40 | if(!outLinks.equals("0.0")) { 41 | count++; 42 | } 43 | } 44 | 45 | // Replace all the non-zero Out Links with the Page Ranks 46 | String [] newAdjList = new String[PageRank.TOTALNODES]; 47 | 48 | for(int i = 0 ; i < adjList.length ; i++) { 49 | if(adjList[i].equals("0.0")) { 50 | newAdjList[i] = "0.0"; 51 | } else { 52 | double newRank = sumOfInLinks / count; 53 | newAdjList[i] = new String(newRank + ""); 54 | } 55 | } 56 | 57 | String finalAdjList = new String(""); 58 | for(String aList : newAdjList) { 59 | finalAdjList = new String(finalAdjList.concat(aList.concat(":"))); 60 | } 61 | 62 | for(int i = 0 ; i < adjList.length ; i++) { 63 | if((Double.parseDouble(adjList[i]) - Double.parseDouble(newAdjList[i])) > 0.0001) { 64 | // If the change in Rank is high (Greater than 0.0001) 65 | context.getCounter(MyCounter.COUNTER).increment(1); 66 | break; 67 | } 68 | } 69 | 70 | LOG.info("REDUCER EMITS : <" + keyNode.get() + " , " + finalAdjList + ">"); 71 | 72 | context.write(keyNode, new Text(finalAdjList)); 73 | } 74 | } 75 | --------------------------------------------------------------------------------