├── .gitignore ├── Gruntfile.js ├── LICENSE ├── Procfile ├── README.md ├── app ├── __init__.py ├── looper.py ├── static │ ├── script.js │ └── styles.css ├── templates │ └── index.html ├── utils.py └── views.py ├── package.json ├── requirements.txt └── run.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | venv 3 | .sass-cache/ 4 | node_modules/ 5 | -------------------------------------------------------------------------------- /Gruntfile.js: -------------------------------------------------------------------------------- 1 | module.exports = function(grunt) { 2 | grunt.initConfig({ 3 | pkg: grunt.file.readJSON('package.json'), 4 | sass: { 5 | dist: { 6 | files: { 7 | 'app/static/styles.css': 'app/static/styles.scss' 8 | } 9 | } 10 | }, 11 | cssmin: { 12 | css:{ 13 | src: 'app/static/styles.css', 14 | dest: 'app/static/styles.min.css' 15 | } 16 | }, 17 | uglify: { 18 | js: { 19 | files: { 20 | 'app/static/looper.min.js': ['app/static/looper.js'] 21 | } 22 | } 23 | }, 24 | watch: { 25 | files: ['app/static/looper.js', 'app/static/styles.scss'], 26 | tasks: ['sass', 'cssmin', 'uglify'] 27 | } 28 | }); 29 | 30 | grunt.loadNpmTasks('grunt-contrib-uglify'); 31 | grunt.loadNpmTasks('grunt-contrib-cssmin'); 32 | grunt.loadNpmTasks('grunt-contrib-watch'); 33 | grunt.loadNpmTasks('grunt-contrib-sass'); 34 | 35 | grunt.registerTask('default', ['sass', 'cssmin:css', 'uglify:js']); 36 | }; 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) [2015] [Kartik Anand] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn app:app --log-file=- 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # wikilooper 2 | Wikilooper web version 3 | 4 | ### View Demo at 5 | [Wikilooper](http://wikilooper.herokuapp.com/) 6 | 7 | ### Description 8 | Clicking on the first link in the main text of a Wikipedia article, and then repeating the process for subsequent articles, one would eventually get to the Philosophy article. 9 | Unless there is an article with no wikilinks or with links to pages that do not exist, or gets stuck in a loop. By entering the first article, wikilooper will take you through the links one would encounter until it reaches Philosophy or gets stuck in a loop. 10 | 11 | ### XKCD 12 | Relevant XKCD article -> [Extended Mind](http://xkcd.com/903/) 13 | 14 | ### Requirements 15 | - beautifulsoup4==4.3.2 16 | - Flask==0.10.1 17 | - gunicorn==19.3.0 18 | - itsdangerous==0.24 19 | - Jinja2==2.7.3 20 | - MarkupSafe==0.23 21 | - requests==2.6.0 22 | - Werkzeug==0.10.4 23 | -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from flask import Flask 4 | 5 | app = Flask(__name__) 6 | 7 | app.logger.addHandler(logging.StreamHandler(sys.stdout)) 8 | app.logger.setLevel(logging.ERROR) 9 | 10 | from app import views 11 | -------------------------------------------------------------------------------- /app/looper.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import requests 3 | import re 4 | 5 | try: 6 | import urllib.parse as urllib 7 | except ImportError: 8 | import urllib 9 | 10 | from bs4 import BeautifulSoup 11 | from .utils import lru_cache 12 | 13 | def getLinkInPara(para_list, skips): 14 | count = 0 15 | for para in para_list: 16 | bracket = 0 17 | for tag in para.children: 18 | if tag.name is None and tag.string is not None: 19 | if '(' in tag.string.strip(): 20 | bracket += 1 21 | if ')' in tag.string.strip(): 22 | bracket -= 1 23 | 24 | if tag.name == 'a' and bracket == 0: 25 | # if we've already found a skips 26 | # get the next link instead 27 | if skips: 28 | skips -= 1 29 | continue 30 | 31 | next_link = tag['href'] 32 | next_link = re.sub(r"#.*", "", next_link) 33 | 34 | return next_link 35 | 36 | return None 37 | 38 | @lru_cache(maxsize=65536) 39 | def getNextLink(link, skips): 40 | wiki_url = "http://en.wikipedia.org/wiki/" 41 | 42 | r = requests.get(wiki_url + link) 43 | if r.status_code != 200: 44 | print(link + "Not a valid wiki link") 45 | return "--INVALID--" 46 | 47 | data = r.text 48 | soup = BeautifulSoup(data) 49 | div = soup.find_all(id="mw-content-text")[0] 50 | para_list = [child for child in div.children if child.name == 'p'] 51 | 52 | if len(para_list) == 0: 53 | div = div.find_all('div', class_="mw-parser-output")[0] 54 | para_list = [child for child in div.children if child.name == 'p'] 55 | 56 | next_link = getLinkInPara(para_list, skips) 57 | 58 | if next_link is None: 59 | for child in div.children: 60 | if child.name == 'ul': 61 | ul = child 62 | break 63 | else: 64 | return "--ERROR--" 65 | 66 | try: 67 | next_link = ul.li.a['href'] 68 | if next_link is None: # this should never happen 69 | return "--ERROR--" 70 | except: 71 | return "--ERROR--" 72 | 73 | args = next_link.split("/")[-1] 74 | args = urllib.unquote(str(args)) 75 | try: 76 | print(args) 77 | except UnicodeEncodeError: 78 | print("unprintable Unicode args") 79 | return args 80 | -------------------------------------------------------------------------------- /app/static/script.js: -------------------------------------------------------------------------------- 1 | let list = null; 2 | let running = false; 3 | 4 | 5 | window.onload = function() { 6 | const btn = document.querySelector('#js-start'); 7 | btn.addEventListener('click', (ev) => { 8 | // prevent any default triggers 9 | ev.preventDefault(); 10 | 11 | const currentLink = document.getElementById("wiki-form-search-box").value; 12 | if (currentLink == '') { 13 | removeElementsfromList(); 14 | addToList("Enter Something!"); 15 | 16 | return; 17 | } 18 | 19 | // reset everything 20 | hideAbout(); 21 | removeElementsfromList(); 22 | list = []; 23 | 24 | wikiLoop(currentLink); 25 | }) 26 | }; 27 | 28 | 29 | function wikiLoop(link) { 30 | if (link != 'Philosophy') { 31 | if (link == '--INVALID--') { 32 | addToList('Not a valid Wiki Link'); 33 | return; 34 | } else if (link == '--ERROR--') { 35 | addToList('Something went wrong'); 36 | return; 37 | } else if (link == '--LOOP--') { 38 | addToList('Too many loops'); 39 | return; 40 | } 41 | 42 | addToList(link); 43 | list.push(link); 44 | 45 | link = getNextLink(link, 0); 46 | } else { 47 | addToList('Philosophy'); 48 | addToList('STOP'); 49 | } 50 | } 51 | 52 | 53 | function getNextLink(link, skips) { 54 | // don't handle too many loops 55 | if (skips == 5) { 56 | wikiLoop('--LOOP--'); 57 | 58 | return; 59 | } 60 | 61 | let xmlHttp = new XMLHttpRequest(); 62 | xmlHttp.onreadystatechange = function () { 63 | if (xmlHttp.readyState === 4) { 64 | if (xmlHttp.status === 200) { 65 | nextLink = xmlHttp.responseText; 66 | // check if next link already exists in the list 67 | if (nextLink == '' || list.indexOf(nextLink) > -1) { 68 | getNextLink(link, skips + 1); 69 | } else { 70 | wikiLoop(nextLink); 71 | } 72 | } 73 | } 74 | }; 75 | 76 | xmlHttp.open("GET", "/loop?link=" + link + "&skips=" + skips, true); 77 | xmlHttp.send(null); 78 | } 79 | 80 | 81 | function toggleAbout() { 82 | var about = document.getElementById("wiki-loop-about"); 83 | var disp = about.style.display; 84 | if (disp == "") { 85 | about.style.display = "block"; 86 | } 87 | else { 88 | about.style.display = ""; 89 | } 90 | } 91 | 92 | 93 | function hideAbout() { 94 | var about = document.getElementById("wiki-loop-about"); 95 | about.style.display = "none"; 96 | } 97 | 98 | 99 | function removeClassActive(list) { 100 | var items = Array.prototype.slice.call(list.childNodes); 101 | 102 | if (items.length > 0) { 103 | var tail = items[items.length - 1]; 104 | tail.className = tail.className.replace('active', ''); 105 | } 106 | } 107 | 108 | 109 | function addToList(nextLink) { 110 | if (nextLink) { 111 | var list = document.getElementById("wiki-loop-list"), 112 | entry = document.createElement("li"), 113 | anchor = document.createElement("a"); 114 | 115 | removeClassActive(list); 116 | 117 | anchor.appendChild(document.createTextNode(nextLink)); 118 | anchor.setAttribute('href', 'http://en.wikipedia.org/wiki/' + nextLink); 119 | entry.className = "wiki-loop-list-item big-font active"; 120 | entry.appendChild(anchor); 121 | list.appendChild(entry); 122 | } 123 | } 124 | 125 | 126 | function removeElementsfromList() { 127 | var list = document.getElementById('wiki-loop-list'), 128 | items = Array.prototype.slice.call(list.childNodes), 129 | item; 130 | 131 | while ((item = items.pop())) { 132 | list.removeChild(item); 133 | } 134 | } 135 | 136 | -------------------------------------------------------------------------------- /app/static/styles.css: -------------------------------------------------------------------------------- 1 | @import url(//fonts.googleapis.com/css?family=Poiret+One); 2 | body { 3 | font-size: 100%; 4 | background-color: #171717; } 5 | 6 | .heading { 7 | color: #FBC93D; 8 | font-weight: bold; 9 | padding: 0; 10 | text-align: center; 11 | font-family: 'Poiret One'; } 12 | 13 | .container { 14 | max-width: 710px; 15 | margin: 0 auto; 16 | text-align: center; } 17 | .container button, .container ul li, .container input, .container label { 18 | font-size: 1.5em; } 19 | .container button { 20 | margin: 0.5em; 21 | background-color: white; 22 | border-radius: 5px; 23 | padding-top: 0.15em; 24 | padding-bottom: 0.15em; } 25 | .container ul { 26 | list-style: none; 27 | padding-left: 0; } 28 | .container input { 29 | margin-top: 0.5em; } 30 | .container label { 31 | color: #ECECEC; } 32 | 33 | #wiki-loop-about { 34 | color: #FBC93D; 35 | display: none; } 36 | 37 | a, a:visited { 38 | text-decoration: none; 39 | color: #fb4f4f; } 40 | 41 | .active { 42 | opacity: 0.5; } 43 | 44 | /*# sourceMappingURL=styles.css.map */ 45 | -------------------------------------------------------------------------------- /app/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 |25 | Clicking on the first link in the main text of a Wikipedia article, and then repeating the process for subsequent articles, one would eventually get to the Philosophy article. Unless there is an article with no wikilinks or with links to pages that do not exist, or gets stuck in a loop. By entering the first article, this website will take you through the links one would encounter until it reaches Philosophy or gets stuck in a loop. 26 |
27 |