├── .gitignore ├── .ipynb_checkpoints ├── Heat Model-checkpoint.ipynb └── texas-checkpoint.ipynb ├── Analysis ├── .ipynb_checkpoints │ ├── HousingVisualization-checkpoint.ipynb │ ├── ListingPricePrediction-checkpoint.ipynb │ ├── Visualize (no outlier)-checkpoint.ipynb │ └── Visualize-checkpoint.ipynb ├── HousingVisualization.ipynb ├── ListingPricePrediction.ipynb ├── Visualize (no outlier).ipynb ├── final_no_outlier.csv └── listings.csv ├── Data ├── Clean │ ├── .ipynb_checkpoints │ │ ├── Clean-checkpoint.ipynb │ │ ├── Distance-checkpoint.ipynb │ │ └── data_clean-checkpoint.ipynb │ ├── Clean.ipynb │ ├── Distance.ipynb │ ├── data_clean.ipynb │ ├── rent.csv │ ├── restaurant.csv │ └── summary.csv ├── Final │ ├── .ipynb_checkpoints │ │ ├── Distance-checkpoint.ipynb │ │ ├── Outlier-checkpoint.ipynb │ │ ├── Visualize+edge_dis-checkpoint.ipynb │ │ └── Visualize-checkpoint.ipynb │ ├── Outlier.ipynb │ ├── README.txt │ ├── Visualize+edge_dis.ipynb │ ├── distance.csv │ ├── final.csv │ ├── final_edge_distance.csv │ ├── final_no_outlier │ ├── food.csv │ └── internal.csv └── Raw │ ├── .ipynb_checkpoints │ ├── VisualizeRentCollectionData-checkpoint.ipynb │ └── clean-checkpoint.ipynb │ ├── VisualizeRentCollectionData.ipynb │ ├── address.csv │ ├── addresses.json │ ├── cleanlistings.json │ ├── craiglist.json │ ├── generateRentCollectionCSV.py │ ├── rentCollectionData.json │ ├── rent_collection_board.csv │ ├── rent_collection_board_avg_ppr.csv │ ├── restaurants.json │ ├── yelp.json │ └── yelp_clean2.json ├── Map ├── .ipynb_checkpoints │ ├── Area Divider-checkpoint.ipynb │ ├── Distance Divider-checkpoint.ipynb │ ├── HeatMap-checkpoint.ipynb │ └── Selection Box-checkpoint.ipynb ├── Area Divider │ ├── Area Divider.ipynb │ ├── Area Divider.kml │ ├── Distance Divider.ipynb │ └── Distance Divider.kml ├── AreaMap.txt ├── Heap Map │ ├── HeatMap.ipynb │ └── gmap_plot.html ├── HeatMap.py ├── MISC │ ├── .ipynb_checkpoints │ │ └── texas-checkpoint.ipynb │ ├── Heat Model.ipynb │ ├── Screen Shot 2016-04-14 at 12.42.44 AM.png │ ├── Screen Shot 2016-04-14 at 12.44.11 AM.png │ ├── Screen Shot 2016-04-14 at 12.45.47 AM.png │ └── texas.ipynb └── final_no_outlier ├── README.rtfd ├── Screen Shot 2016-04-14 at 12.42.44 AM.png ├── Screen Shot 2016-04-14 at 12.44.11 AM.png ├── Screen Shot 2016-04-14 at 12.45.47 AM.png └── TXT.rtf ├── Scraper ├── CraigslistHousingScraper │ ├── CraigslistHousingScraper │ │ ├── CustomDupeFilter.py │ │ ├── __init__.py │ │ ├── items.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── __init__.py │ │ │ └── housingScraper.py │ └── scrapy.cfg ├── RentBoardDataCollection │ ├── dataCollection.js │ └── scrapeDataFromApts.py └── YelpScraper │ ├── .ipynb_checkpoints │ ├── get-checkpoint.ipynb │ ├── get_data-checkpoint.ipynb │ ├── get_yelp-checkpoint.ipynb │ └── get_yelp2-checkpoint.ipynb │ ├── .python-version │ ├── clean.py │ ├── get_yelp1.ipynb │ ├── get_yelp2.ipynb │ ├── requirements.txt │ ├── sample.py │ ├── scrape_area.py │ └── scrape_loc.py ├── Server ├── input-text-styles-source │ ├── css │ │ ├── base.css │ │ ├── font-awesome │ │ │ ├── FontAwesome.otf │ │ │ ├── font-awesome.css │ │ │ ├── fontawesome-webfont.eot │ │ │ ├── fontawesome-webfont.svg │ │ │ ├── fontawesome-webfont.ttf │ │ │ └── fontawesome-webfont.woff │ │ └── style.css │ ├── img │ │ └── core │ │ │ ├── logo-alt.png │ │ │ ├── logo-alt.svg │ │ │ ├── logo.png │ │ │ ├── logo.svg │ │ │ ├── social-dark.png │ │ │ ├── social-dark.svg │ │ │ ├── social-light.png │ │ │ └── social-light.svg │ ├── index.html │ └── js │ │ └── html5shiv.min.js ├── listingPrediction.py ├── listings.json ├── predictionServer.py └── training_data.csv ├── listings.json └── training_data.csv /.gitignore: -------------------------------------------------------------------------------- 1 | venv/* 2 | RentBoardDataCollection/node_modules/* 3 | RentBoardDataCollection/key.js 4 | *.pyc 5 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/Heat Model-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 0 6 | } 7 | -------------------------------------------------------------------------------- /Analysis/.ipynb_checkpoints/Visualize (no outlier)-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 0 6 | } 7 | -------------------------------------------------------------------------------- /Analysis/HousingVisualization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 69, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "Populating the interactive namespace from numpy and matplotlib\n" 15 | ] 16 | }, 17 | { 18 | "name": "stderr", 19 | "output_type": "stream", 20 | "text": [ 21 | "WARNING: pylab import has clobbered these variables: ['plt']\n", 22 | "`%matplotlib` prevents importing * from pylab and numpy\n" 23 | ] 24 | } 25 | ], 26 | "source": [ 27 | "import numpy as np\n", 28 | "import matplotlib as plt\n", 29 | "import csv\n", 30 | "import json\n", 31 | "import pandas as pd\n", 32 | "import pymongo\n", 33 | "%pylab inline " 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "Which visualizations/tools do we want for this?\n", 41 | "\n", 42 | "1. A price estimater, where you enter in desired qualities of an apartment, and we return an estimated price for this apartment.\n", 43 | "\n", 44 | "2. A tool to determine whether listings are over or underpriced, based upon the factors you enter (actually, just enter a craigslist listing, and we return a determination of over/underpricing, and the degree.\n", 45 | "\n", 46 | "3. An analysis of what factors influence prices: of when you control for other things, how much does distance to campus affect pricing? Square footage? Time since original posting? Number of images in the listing? Which of these factors is the most influentials (how to measure that - research? \n", 47 | "\n", 48 | "\n" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 27, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "def generateCurrentCSV():\n", 60 | " collection = pymongo.MongoClient().HousingListings.listings\n", 61 | " listings = [listing for listing in collection.find()]\n", 62 | " for el in listings:\n", 63 | " del el['_id']\n", 64 | " del el['link']\n", 65 | " del el['description']\n", 66 | " \n", 67 | " with open(\"listings.csv\", 'w') as csvfile:\n", 68 | " writer = csv.DictWriter(csvfile, fieldnames=listings[0].keys())\n", 69 | " writer.writeheader()\n", 70 | " for el in listings:\n", 71 | " writer.writerow(el)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 52, 77 | "metadata": { 78 | "collapsed": false 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "generateCurrentCSV()" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 62, 88 | "metadata": { 89 | "collapsed": false 90 | }, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/plain": [ 95 | "1555" 96 | ] 97 | }, 98 | "execution_count": 62, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "cv = pd.read_csv(\"listings.csv\")\n", 105 | "cv['price'].as_matrix()\n" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 94, 111 | "metadata": { 112 | "collapsed": false 113 | }, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "\n" 120 | ] 121 | }, 122 | { 123 | "data": { 124 | "text/plain": [ 125 | "(array([ 2.06400000e+03, 0.00000000e+00, 0.00000000e+00,\n", 126 | " 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,\n", 127 | " 0.00000000e+00, 0.00000000e+00, 1.00000000e+00,\n", 128 | " 1.00000000e+00]),\n", 129 | " array([ 5.00000000e+01, 3.60044500e+06, 7.20084000e+06,\n", 130 | " 1.08012350e+07, 1.44016300e+07, 1.80020250e+07,\n", 131 | " 2.16024200e+07, 2.52028150e+07, 2.88032100e+07,\n", 132 | " 3.24036050e+07, 3.60040000e+07]),\n", 133 | " )" 134 | ] 135 | }, 136 | "execution_count": 94, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | }, 140 | { 141 | "data": { 142 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAENCAYAAAAG6bK5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEUtJREFUeJzt3X+sZGV9x/H3RxYsFsyW0CzLsi2krD+2oYVQWSu13rQp\nWU0LNCGAViWGGtKtYmzSFExT1n+sNtEKbSBtRFn8sc2qKYEIlB9yU5sGNlh+LF5W2YRNuLfskrRW\noP4D8u0fc1aG27t35v7YmVmf9yuZ8MxznnPOdx5gPnPOmTM3VYUkqT2vG3cBkqTxMAAkqVEGgCQ1\nygCQpEYZAJLUKANAkhq1aAAk2ZjkgSTfS/JEkqu7/u1JZpM80j3e3bfOtUmeSrI3yQV9/ecm2dMt\nu/7IvSRJ0jCy2H0ASU4BTqmqR5OcAHwXuBi4FHihqj43b/xm4GvA24ANwH3ApqqqJLuBj1TV7iR3\nAjdU1d1H5FVJkgZa9Aigqg5U1aNd+0XgSXpv7ABZYJWLgJ1V9VJV7Qf2AVuSrAdOrKrd3bhb6QWJ\nJGlMhr4GkOR04Bzgwa7ro0keS3JzkrVd36nAbN9qs/QCY37/HK8GiSRpDIYKgO70zzeAj3VHAjcB\nZwBnA88Cnz1iFUqSjog1gwYkORb4JvCVqroNoKqe61v+BeCO7ukcsLFv9dPoffKf69r9/XML7Msf\nJpKkZaiqhU7LL2rQt4AC3AzMVNXn+/rX9w37Q2BP174duDzJcUnOADYBu6vqAPB8ki3dNj8A3HaY\nFzHxj+uuu27sNVindR6tNVrn6j+Wa9ARwPnA+4HHkzzS9X0CeG+Ss4ECngau6t68Z5LsAmaAl4Ft\n9Wp124BbgOOBO8tvAEnSWC0aAFX1byx8lHDXIut8CvjUAv3fBc5aaoGSpCPDO4GXYWpqatwlDMU6\nV9fRUOfRUCNY56RY9EawUUtSk1SPJB0NklCrfRFYkvSzywCQpEYZAJLUKANAkhplAEhSowwASWqU\nASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkA\nktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJ\njTIAJKlRa8ZdwHxr1rx+5Ps888y3sHfvYyPfrySN08QFwE9+8vyI9/gkL774vhHvU5LGb9FTQEk2\nJnkgyfeSPJHk6q7/pCT3JvlBknuSrO1b59okTyXZm+SCvv5zk+zpll1/+L2+fsSP44abKUn6GTPo\nGsBLwMer6leBtwN/muStwDXAvVX1JuD+7jlJNgOXAZuBrcCNSdJt6ybgyqraBGxKsnXVX40kaWiL\nBkBVHaiqR7v2i8CTwAbgQmBHN2wHcHHXvgjYWVUvVdV+YB+wJcl64MSq2t2Nu7VvHUnSGAz9LaAk\npwPnAA8B66rqYLfoILCua58KzPatNksvMOb3z3X9kqQxGeoicJITgG8CH6uqF149qwNVVUlq9Ura\n3tee6h6SpEOmp6eZnp5e8XYGBkCSY+m9+X+5qm7rug8mOaWqDnSnd57r+ueAjX2rn0bvk/9c1+7v\nn1t4j9uXUL4ktWdqaoqpqamfPv/kJz+5rO0M+hZQgJuBmar6fN+i24EruvYVwG19/ZcnOS7JGcAm\nYHdVHQCeT7Kl2+YH+taRJI3BoCOA84H3A48neaTruxb4NLAryZXAfuBSgKqaSbILmAFeBrZV1aHT\nQ9uAW4DjgTur6u5VfB2SpCXKq+/P49e7ljDqembYsOESZmdnRrxfSVodSaiqDB75Wv4WkCQ1ygCQ\npEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElq\nlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZ\nAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1KiBAZDki0kOJtnT17c9yWyS\nR7rHu/uWXZvkqSR7k1zQ139ukj3dsutX/6VIkpZimCOALwFb5/UV8LmqOqd73AWQZDNwGbC5W+fG\nJOnWuQm4sqo2AZuSzN+mJGmEBgZAVX0H+OECi7JA30XAzqp6qar2A/uALUnWAydW1e5u3K3Axcsr\nWZK0GlZyDeCjSR5LcnOStV3fqcBs35hZYMMC/XNdvyRpTJYbADcBZwBnA88Cn121iiRJI7FmOStV\n1XOH2km+ANzRPZ0DNvYNPY3eJ/+5rt3fP7fw1rf3tae6hyTpkOnpaaanp1e8nVTV4EHJ6cAdVXVW\n93x9VT3btT8OvK2q3tddBP4acB69Uzz3AWdWVSV5CLga2A18C7ihqu6et5/qXV8epRk2bLiE2dmZ\nEe9XklZHEqpqoeuyixp4BJBkJ/Au4OQkzwDXAVNJzqb3bv00cBVAVc0k2QXMAC8D2+rVhNkG3AIc\nD9w5/81fkjRaQx0BjIpHAJK0dMs9AvBOYElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUA\nSFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAk\nNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKj\nDABJapQBIEmNMgAkqVEGgCQ1amAAJPlikoNJ9vT1nZTk3iQ/SHJPkrV9y65N8lSSvUku6Os/N8me\nbtn1q/9SJElLMcwRwJeArfP6rgHurao3Afd3z0myGbgM2Nytc2OSdOvcBFxZVZuATUnmb1OSNEID\nA6CqvgP8cF73hcCOrr0DuLhrXwTsrKqXqmo/sA/YkmQ9cGJV7e7G3dq3jiRpDJZ7DWBdVR3s2geB\ndV37VGC2b9wssGGB/rmuX5I0Jiu+CFxVBdQq1CJJGqE1y1zvYJJTqupAd3rnua5/DtjYN+40ep/8\n57p2f//cwpve3tee6h6SpEOmp6eZnp5e8XbS+wA/YFByOnBHVZ3VPf8b4L+q6jNJrgHWVtU13UXg\nrwHn0TvFcx9wZlVVkoeAq4HdwLeAG6rq7nn7qdEfTMywYcMlzM7OjHi/krQ6klBVGTzytQYeASTZ\nCbwLODnJM8BfAZ8GdiW5EtgPXApQVTNJdgEzwMvAtno1YbYBtwDHA3fOf/OXJI3WUEcAo+IRgCQt\n3XKPALwTWJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQB\nIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS\n1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmN\nWlEAJNmf5PEkjyTZ3fWdlOTeJD9Ick+StX3jr03yVJK9SS5YafGSpOVb6RFAAVNVdU5Vndf1XQPc\nW1VvAu7vnpNkM3AZsBnYCtyYxCMQSRqT1XgDzrznFwI7uvYO4OKufRGws6peqqr9wD7gPCRJY7Ea\nRwD3JXk4yYe7vnVVdbBrHwTWde1Tgdm+dWeBDSvcvyRpmdascP3zq+rZJL8I3Jtkb//Cqqoktcj6\nCyzb3tee6h6SpEOmp6eZnp5e8XZStdj78xI2lFwHvAh8mN51gQNJ1gMPVNVbklwDUFWf7sbfDVxX\nVQ/1baMWzIQjaoYNGy5hdnZmxPuVpNWRhKqafzp+oGWfAkryhiQndu2fBy4A9gC3A1d0w64Abuva\ntwOXJzkuyRnAJmD3cvcvSVqZlZwCWgf8c5JD2/lqVd2T5GFgV5Irgf3ApQBVNZNkFzADvAxsq9U6\n/JAkLdmqnQJaDZ4CkqSlG/kpIEnS0c0AkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaA\nJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhS\nowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXK\nAJCkRhkAktSokQZAkq1J9iZ5KslfjHLfkqTXGlkAJDkG+HtgK7AZeG+St45q/6tpenp63CUMxTpX\n19FQ59FQI1jnpBjlEcB5wL6q2l9VLwH/BFw0wv2vmqPlPwrrXF1HQ51HQ41gnZNilAGwAXim7/ls\n1ydJGoM1I9xXDTPojW/8gyNdx2u88soLHHPMSHcpSRMhVUO9L698R8nbge1VtbV7fi3wSlV9pm/M\naIqRpJ8xVZWlrjPKAFgDfB/4XeA/gd3Ae6vqyZEUIEl6jZGdAqqql5N8BPgX4BjgZt/8JWl8RnYE\nIEmaLGO5E3iYG8KS3NAtfyzJOaOusath0TqTTCX5UZJHusdfjqHGLyY5mGTPImMmYS4XrXNC5nJj\nkgeSfC/JE0muPsy4sc7nMHVOyHz+XJKHkjyaZCbJXx9m3Ljnc2CdkzCffbUc09Vwx2GWDz+fVTXS\nB73TP/uA04FjgUeBt84b8x7gzq69BXhwQuucAm4fdW3zangncA6w5zDLxz6XQ9Y5CXN5CnB21z6B\n3jWrSfxvc5g6xz6fXR1v6P65BngQ+K1Jm88h65yI+exq+TPgqwvVs9T5HMcRwDA3hF0I7ACoqoeA\ntUnWjbbMoW9cW/KV99VUVd8BfrjIkEmYy2HqhPHP5YGqerRrvwg8CZw6b9jY53PIOmHM8wlQVT/u\nmsfR+1D13/OGjH0+u30PqhMmYD6TnEbvTf4LLFzPkuZzHAEwzA1hC4057QjXNd8wdRbwju5Q684k\nm0dW3fAmYS6HMVFzmeR0ekcsD81bNFHzuUidEzGfSV6X5FHgIPBAVc3MGzIR8zlEnRMxn8DfAn8O\nvHKY5Uuaz3EEwLBXneen26ivVg+zv/8ANlbVrwN/B9x2ZEtatnHP5TAmZi6TnAB8A/hY9wn7/w2Z\n93ws8zmgzomYz6p6parOpvcm9NtJphYYNvb5HKLOsc9nkt8HnquqR1j8aGTo+RxHAMwBG/ueb6SX\nUouNOa3rG6WBdVbVC4cOHavqLuDYJCeNrsShTMJcDjQpc5nkWOCbwFeqaqH/ySdiPgfVOSnz2VfP\nj4BvAb8xb9FEzOchh6tzQubzHcCFSZ4GdgK/k+TWeWOWNJ/jCICHgU1JTk9yHHAZcPu8MbcDH4Sf\n3kH8P1V1cLRlDq4zybok6drn0fta7ULnDsdpEuZyoEmYy27/NwMzVfX5wwwb+3wOU+eEzOfJSdZ2\n7eOB3wMemTdsEuZzYJ2TMJ9V9Ymq2lhVZwCXA9+uqg/OG7ak+RzlbwEBh78hLMlV3fJ/qKo7k7wn\nyT7gf4EPTWKdwCXAnyR5GfgxvX8pI5VkJ/Au4OQkzwDX0fvW0sTM5TB1MgFzCZwPvB94PMmhN4BP\nAL90qM4Jmc+BdTIZ87ke2JHkdfQ+bH65qu6ftP/Xh6mTyZjP+QpgJfPpjWCS1Cj/JKQkNcoAkKRG\nGQCS1CgDQJIaZQBI0hhliB907Bv7ub4fpPt+kkE/r7L49vwWkCSNT5J3Ai8Ct1bVWUtY7yP0fhTw\nj5e7b48AJGmMFvqhxCS/kuSuJA8n+dckb15g1ffRuyN42UZ+I5gkaaB/BK6qqn1JtgA30vtzugAk\n+WV6P1X/7ZXsxACQpAnS/cjfbwJf7359Ano/U93vcuDrtcJz+AaAJE2W19H7DZ/F/prXZcC21diR\nJGlCVNXzwNNJLoHej/8l+bVDy5O8BfiFqnpwpfsyACRpjLofSvx34M1JnknyIeCPgCu7P1LzBL2/\n9HXIZazw4u9P9+3XQCWpTR4BSFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhr1f0kH\nOeeXBr9NAAAAAElFTkSuQmCC\n", 143 | "text/plain": [ 144 | "" 145 | ] 146 | }, 147 | "metadata": {}, 148 | "output_type": "display_data" 149 | } 150 | ], 151 | "source": [ 152 | "def histogramOfPrices():\n", 153 | " cv = pd.read_csv(\"listings.csv\")\n", 154 | " #f = [int(price) for price in cv['price'].tolist()]\n", 155 | " curr = cv['price'].as_matrix()\n", 156 | " l = np.array([el for el in curr])\n", 157 | " print(type(l))\n", 158 | " return plt.hist(l)#cv['price'].as_matrix())\n", 159 | " \n", 160 | "histogramOfPrices()\n", 161 | "\n" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 93, 167 | "metadata": { 168 | "collapsed": false 169 | }, 170 | "outputs": [ 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "\n" 176 | ] 177 | }, 178 | { 179 | "data": { 180 | "text/plain": [ 181 | "(array([ 5., 5., 9., 8., 16., 21., 29., 37., 42.,\n", 182 | " 53., 76., 104., 124., 191., 180., 253., 276., 336.,\n", 183 | " 369., 388., 427., 461., 493., 513., 554., 517., 507.,\n", 184 | " 457., 468., 439., 429., 356., 328., 270., 274., 205.,\n", 185 | " 149., 159., 111., 94., 75., 54., 42., 34., 21.,\n", 186 | " 16., 10., 6., 7., 2.]),\n", 187 | " array([ 50.28597794, 52.26617655, 54.24637516, 56.22657377,\n", 188 | " 58.20677238, 60.186971 , 62.16716961, 64.14736822,\n", 189 | " 66.12756683, 68.10776544, 70.08796405, 72.06816266,\n", 190 | " 74.04836127, 76.02855988, 78.00875849, 79.9889571 ,\n", 191 | " 81.96915572, 83.94935433, 85.92955294, 87.90975155,\n", 192 | " 89.88995016, 91.87014877, 93.85034738, 95.83054599,\n", 193 | " 97.8107446 , 99.79094321, 101.77114183, 103.75134044,\n", 194 | " 105.73153905, 107.71173766, 109.69193627, 111.67213488,\n", 195 | " 113.65233349, 115.6325321 , 117.61273071, 119.59292932,\n", 196 | " 121.57312794, 123.55332655, 125.53352516, 127.51372377,\n", 197 | " 129.49392238, 131.47412099, 133.4543196 , 135.43451821,\n", 198 | " 137.41471682, 139.39491543, 141.37511404, 143.35531266,\n", 199 | " 145.33551127, 147.31570988, 149.29590849]),\n", 200 | " )" 201 | ] 202 | }, 203 | "execution_count": 93, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | }, 207 | { 208 | "data": { 209 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXsAAAEACAYAAABS29YJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAE6NJREFUeJzt3VusXNd93/Hvz5QVWw5qmnBBSZQKE4kIm2mcWohl95Jm\nkiqKEASkniQFjcHaah4qNHWBNjHppCb74sjuJQ1Q6CWJBcKI2BCuI1CtY5FSPGiCoJLiSLYimhXZ\nlq2PEx7JrpPGTtuQ4L8PsyVNDs9lzmXOXNb3Axxoz541M2uJs3+zZu2116SqkCTNtzdMugKSpPEz\n7CWpAYa9JDXAsJekBhj2ktQAw16SGjBS2CfZmeQzSb6S5GyS9yXZleRMkpeSnE6yc6j8kSTnk5xL\nctf4qi9JGsWoPftfAj5XVe8C3g2cAw4DZ6pqH/BUd5sk+4H7gP3A3cDDSfwGIUkTtGYIJ3kr8ANV\n9SmAqrpSVX8CHACOd8WOA/d02weBE1V1uaouAheAO7a64pKk0Y3S494LvJLkkSS/n+SXk7wF2F1V\ni12ZRWB3t30zsDD0+AVgz5bVWJK0bqOE/XXA7cDDVXU78G26IZtX1WDNhdXWXXBNBkmaoOtGKLMA\nLFTVs93tzwBHgEtJbqyqS0luAl7u7v8acOvQ42/p9r0mieEvSRtQVdnI49bs2VfVJeCrSfZ1u+4E\nXgQeBw51+w4Bj3Xbp4D7k1yfZC9wG/DMMs87t39Hjx6deB1sn+1rrW0ttG8zRunZA/w08GtJrgf+\nK/BBYAdwMskDwEXg3i7EzyY5CZwFrgAP1mZrKUnalJHCvqq+BLx3mbvuXKH8x4GPb6JekqQt5Pz3\nMej1epOuwljZvtk1z22D+W/fZmQSIyxJHNmRpHVKQo3rBK0kafYZ9pLUAMNekhpg2EtSAwx7SWqA\nYS9JDTDsJakBhr0kNcCwl6QGGPaS1ADDXpIaMOoSx9JcSVZeXsR1mzSPDHs1bLlQ39AaU9LUcxhH\nkhpg2EtSAwx7SWqAYS9JDTDsJakBhr0kNcCwl6QGGPaS1ADDXpIaYNhLUgMMe0lqgGvjSEsst0ia\ni6Np1hn20jWWBruLo2n2jTSMk+Riki8neS7JM92+XUnOJHkpyekkO4fKH0lyPsm5JHeNq/LSKJJc\n8ye1ZtQx+wJ6VfWeqrqj23cYOFNV+4Cnutsk2Q/cB+wH7gYeTuK5AU1YLfmT2rKeEF7aHToAHO+2\njwP3dNsHgRNVdbmqLgIXgDuQZthy3w78hqBZsp6e/ZNJfi/JT3X7dlfVYre9COzutm8GFoYeuwDs\n2XRNpYla+s3AbweaLaOeoP2bVfVHSf4ycCbJueE7q6qSrPbu98iQpAkaKeyr6o+6/76S5DcYDMss\nJrmxqi4luQl4uSv+NeDWoYff0u37C44dO/badq/Xo9frbaT+kjS3+v0+/X5/S54ra80fTnIDsKOq\n/jTJW4DTwD8H7gS+UVWfSHIY2FlVh7sTtI8y+EDYAzwJfHcNvVCSct6ytstgbH256ZQr/Qbt6GV9\nH2s7JaGqNnSyaJSe/W7gN7qTUdcBv1ZVp5P8HnAyyQPAReBegKo6m+QkcBa4Ajxosms7eMJUWtma\nPfuxvKg9e43B8j14WG9v3Z69ptVmevbOf5ekBhj2ktQAw16SGmDYS1IDDHtJaoBhL0kNMOwlqQGG\nvSQ1wF+qkrbYSlfyegGWJsmwl8bCnzbUdDHsNZNcB0daH8NeM8zeszQqw17aBL9haFYY9tKmrLRy\npjRdnHopSQ0w7CWpAYa9JDXAsJekBniCVtomXlmrSTLspW3jzB1NjsM4ktQAw16SGuAwjjRhy43l\nO46vrWbYa6q1sRyBa/xo/Ax7zQBPbEqb5Zi9JDXAsJekBhj2ktQAw16SGjBS2CfZkeS5JI93t3cl\nOZPkpSSnk+wcKnskyfkk55LcNa6KS5JGN2rP/sPAWV6fFnEYOFNV+4Cnutsk2Q/cB+wH7gYeTuK3\nB0masDWDOMktwI8Bv8Lr890OAMe77ePAPd32QeBEVV2uqovABeCOraywJGn9Rul1/yLwM8DVoX27\nq2qx214EdnfbNwMLQ+UWgD2braQkaXNWvagqyY8DL1fVc0l6y5Wpqkqy2rXdy9537Nix17Z7vR69\n3rJPL0nN6vf79Pv9LXmurLYGR5KPAx8ArgBvAv4S8FngvUCvqi4luQn4QlW9M8lhgKp6qHv854Gj\nVfX0kuct1/7QKAbLJax0Be1yywxMuuzWvJ7Hh5aThKra0OXjqw7jVNVHq+rWqtoL3A/8VlV9ADgF\nHOqKHQIe67ZPAfcnuT7JXuA24JmNVEyStHXWuzbOq92Nh4CTSR4ALgL3AlTV2SQnGczcuQI8aBde\nkiZv1WGcsb2owzhaxsorXE56aMZhHE2HzQzjuOqlpozL/Urj4AVPktQAw16SGmDYS1IDDHtJaoBh\nL0kNMOwlqQFOvdS2W3k+vaRxMew1IStdYCRpHBzGkaQGGPaS1ACHcaQptNJ5DdfM0UYZ9tJU8pyG\ntpbDOJLUAMNekhpg2EtSAwx7SWqAYS9JDTDsJakBTr3UWLkOjjQdDHttA39XVpo0h3EkqQH27KUZ\nstywmEsoaBSGvTRTHBLTxjiMI0kNMOwlqQGGvSQ1wLCXpAasGvZJ3pTk6STPJzmb5Be6/buSnEny\nUpLTSXYOPeZIkvNJziW5a9wNkCStLWtN20pyQ1X9WZLrgN8B/ilwAPh6VX0yyUeAt1XV4ST7gUeB\n9wJ7gCeBfVV1dclzltPF2jCYKrjcDJKVfpxj1stuf908ltqRhKra0BSsNYdxqurPus3rgR3ANxmE\n/fFu/3Hgnm77IHCiqi5X1UXgAnDHRiomSdo6a4Z9kjckeR5YBL5QVS8Cu6tqsSuyCOzutm8GFoYe\nvsCghy9JmqA1L6rqhmD+WpK3Ak8k+aEl91eS1b5HLnvfsWPHXtvu9Xr0er1R6itJzej3+/T7/S15\nrjXH7P9C4eSfAf8H+PtAr6ouJbmJQY//nUkOA1TVQ135zwNHq+rpJc/jmH0jHLMff1mPpXaMbcw+\nydtfnWmT5M3AjwDPAaeAQ12xQ8Bj3fYp4P4k1yfZC9wGPLORikmSts5awzg3AceTvIHBB8Onq+qp\nJM8BJ5M8AFwE7gWoqrNJTgJngSvAg3bhJWny1jWMs2Uv6jBOMxzGGX9Zj6V2jHXqpSRp9hn2ktQA\nw16SGmDYS1ID/KUqbYnlfi5P0vQw7LWFVppBou220oevM3faZdhLM27lb1X+Xq1eZ9hLM89vVFqb\nJ2glqQGGvSQ1wLCXpAYY9pLUAMNekhrgbBytmxdQSbPHsNcGOYdbmiUO40hSAwx7SWqAYS9JDTDs\nJakBhr0kNcCwl6QGGPaS1ADDXpIaYNhLUgMMe0lqgGEvSQ0w7CWpAYa9JDVgzbBPcmuSLyR5Mckf\nJPlH3f5dSc4keSnJ6SQ7hx5zJMn5JOeS3DXOBkiS1paq5X6ZfqhAciNwY1U9n+Q7gS8C9wAfBL5e\nVZ9M8hHgbVV1OMl+4FHgvcAe4ElgX1VdHXrOWut1Nb0G69kvt8Txcv+mrZWd5roFj7vZloSq2tB6\n4mv27KvqUlU9321/C/gKgxA/ABzvih1n8AEAcBA4UVWXq+oicAG4YyOVkyRtjXWN2Sd5B/Ae4Glg\nd1UtdnctAru77ZuBhaGHLTD4cJA0YUmW/dP8G/mXqrohnH8PfLiq/nT4DVJVlWS174fX3Hfs2LHX\ntnu9Hr1eb9SqSNqwlYZ8NI36/T79fn9LnmvNMXuAJG8E/gPwm1X1b7p954BeVV1KchPwhap6Z5LD\nAFX1UFfu88DRqnp66Pkcs59hjtlP0+ttTVmPx9kw1jH7DI7sXwXOvhr0nVPAoW77EPDY0P77k1yf\nZC9wG/DMRionSdoao8zG+VvAfwK+zOvdgiMMAvwk8FeAi8C9VfXH3WM+CnwIuMJg2OeJJc9pz36G\n2bOfptezZ9+SzfTsRxrG2WqG/fRZ6STdcv9Ohv00vZ5h35LNhP3IJ2jVguXCQdI8cLkESWqAYS9J\nDXAYR6vyghtpPhj2WoMX4UjzwGEcSWqAYS9JDTDsJakBhr0kNcCwl6QGGPaS1ACnXkq6xmrXV7iO\nzmwy7CWtwGss5onDOJLUAHv2klwWowGGvSRc3nr+OYwjSQ0w7CWpAYa9JDXAsJekBhj2ktQAw16S\nGmDYS1IDDHtJaoAXVTXGKyWlNhn2TXKBK6k1DuNIUgMMe0lqwJphn+RTSRaTvDC0b1eSM0leSnI6\nyc6h+44kOZ/kXJK7xlVxSdLoRunZPwLcvWTfYeBMVe0Dnupuk2Q/cB+wv3vMw0n89iDNkSTX/Gn6\nrRnEVfXbwDeX7D4AHO+2jwP3dNsHgRNVdbmqLgIXgDu2pqqSpkMt+dMs2Give3dVLXbbi8Dubvtm\nYGGo3AKwZ4OvoU2yBybpVZseYqnBrw+v9vHuR/9E2QuTtPF59otJbqyqS0luAl7u9n8NuHWo3C3d\nvmscO3bste1er0ev19tgVSRpPvX7ffr9/pY8VwYd8zUKJe8AHq+q7+1ufxL4RlV9IslhYGdVHe5O\n0D7KYJx+D/Ak8N215EWSLN2lMRgM2yz3c3MrXVRl2c2Xnea6ja+sx/P2SEJVbWg8ds2efZITwA8C\nb0/yVeBjwEPAySQPABeBewGq6mySk8BZ4ArwoKkuzb+Vzgd5+E+PkXr2W/6i9uy3hT37SZSd5rpt\nf1mP8621mZ69c+AlqQGGvSQ1wLCXpAYY9pLUAMNekhrgj5fMAZdB0LRa7r3pDJ3JMOznhr8+pWm0\n3DRNTYLDOJLUAMNekhpg2EtSAwx7SWqAYS9JDTDsJakBTr2cMc6p16xzOeTJMOxnknOXNcu8JmQS\nHMaRpAYY9pLUAIdxppRj82qN6+iMl2E/1RzbVEs8FzVOhv0UsBcvadwM+6lhr0ZaymmaW8ewlzTF\nHMrcKob9NnK4RtKkGPbbzp6KpO3nPHtJaoA9+zFwuEYaL+fkr59hPzYO10jj4+y19TLsN8levDQd\nnKa5urGM2Se5O8m5JOeTfGQcrzFdasmfpO239Dj0WBy25WGfZAfwb4G7gf3ATyR511a/zjglWfZP\nrehPugJj1J90Bcasf80ej+WBcQzj3AFcqKqLAEn+HXAQ+MoYXmskjzxynGefff6a/Tfe+HY+9rGf\nW+FR144JtvomaU8f6E24DuPSZ37bBsu3b7Tx/dWO73kYChpH2O8Bvjp0ewF43xheZ2S//uv/kSee\neBPwnqG9rwA/z9GjP7+OZ/KkqzQPVg72+T3GxxH2U/cRuGMHvPnNL/LGN37ztX1Xr36bb30L5vkf\nV9JK1nfcz8NUz2x1hZO8HzhWVXd3t48AV6vqE0NlZuv/kiRNiaraUG90HGF/HfBfgL8D/CHwDPAT\nVTWxMXtJat2WD+NU1ZUk/xB4AtgB/KpBL0mTteU9e0nS9NmWhdCS7EjyXJLHu9u7kpxJ8lKS00l2\nbkc9xiHJziSfSfKVJGeTvG/O2nckyYtJXkjyaJLvmOX2JflUksUkLwztW7E9XfvPdxcJ3jWZWo9u\nhfb9i+79+aUkn03y1qH7Zr59Q/f9kyRXk+wa2jcz7VupbUl+uvv3+4Mkw+c+19W27Vr18sPAWV4/\nBX4YOFNV+4Cnutuz6peAz1XVu4B3A+eYk/YleQfwU8DtVfW9DIbl7me22/cIgwv+hi3bniT7gfsY\nXBx4N/BwkmlfKXa59p0Gvqeqvg94CTgCc9U+ktwK/AjwP4b2zVr7rmlbkh8CDgDvrqq/CvzLbv+6\n2zb2hie5Bfgx4Fd4fW7TAeB4t30cuGfc9RiHrof0A1X1KRicr6iqP2FO2gf8b+AycEN34v0GBifd\nZ7Z9VfXbwDeX7F6pPQeBE1V1ubtI8AKDiwan1nLtq6ozVXW1u/k0cEu3PRft6/xr4GeX7Jup9q3Q\ntn8A/EJVXe7KvNLtX3fbtuNT7heBnwGuDu3bXVWL3fYisHsb6jEOe4FXkjyS5PeT/HKStzAn7auq\n/wX8K+B/Mgj5P66qM8xJ+4as1J6bGVwU+KoFBhcNzrIPAZ/rtueifUkOAgtV9eUld81D+24D/naS\n/5ykn+T7u/3rbttYwz7JjwMvV9VzrHDFQg3OEM/qWeLrgNuBh6vqduDbLBnSmOX2Jfku4B8D72Dw\n5vrOJD85XGaW27ecEdozs21N8nPAn1fVo6sUm6n2JbkB+ChwdHj3Kg+ZqfYxyJi3VdX7GXSaT65S\ndtW2jbtn/zeAA0n+O3AC+OEknwYWk9wIkOQm4OUx12NcFhj0KJ7tbn+GQfhfmpP2fT/wu1X1jaq6\nAnwW+OvMT/tetdL78WvArUPlbun2zZwkf4/BcOrfHdo9D+37LgadkS91OXML8MUku5mP9i0wOO7o\ncuZqkrezgbaNNeyr6qNVdWtV7WVwYu+3quoDwCngUFfsEPDYOOsxLlV1Cfhqkn3drjuBF4HHmYP2\nMTjZ/P4kb87gevE7GZxon5f2vWql9+Mp4P4k1yfZy+Ar9TMTqN+mJLmbQa/wYFX936G7Zr59VfVC\nVe2uqr1dziwwmFCwyBy0j8F78YcBupy5vqq+zkbaVlXb8gf8IHCq294FPMlgZsBpYOd21WMM7fo+\n4FngSww+gd86Z+37WQYfYC8wOHn5xlluH4NvmH8I/DmDBfs+uFp7GAwRXGDwwfejk67/Btr3IeA8\ng1kqz3V/D89B+/7fq/9+S+7/b8CuWWzfcm3rjrdPd8ffF4HeRtvmRVWS1IBpnnMqSdoihr0kNcCw\nl6QGGPaS1ADDXpIaYNhLUgMMe0lqgGEvSQ34/2aNPC1QjlmSAAAAAElFTkSuQmCC\n", 210 | "text/plain": [ 211 | "" 212 | ] 213 | }, 214 | "metadata": {}, 215 | "output_type": "display_data" 216 | } 217 | ], 218 | "source": [ 219 | "mu, sigma = 100, 15\n", 220 | "x = mu + sigma * np.random.randn(10000)\n", 221 | "print(type(x))\n", 222 | "plt.hist(x, 50)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": { 229 | "collapsed": true 230 | }, 231 | "outputs": [], 232 | "source": [] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": { 238 | "collapsed": true 239 | }, 240 | "outputs": [], 241 | "source": [] 242 | } 243 | ], 244 | "metadata": { 245 | "kernelspec": { 246 | "display_name": "Python 3", 247 | "language": "python", 248 | "name": "python3" 249 | }, 250 | "language_info": { 251 | "codemirror_mode": { 252 | "name": "ipython", 253 | "version": 3 254 | }, 255 | "file_extension": ".py", 256 | "mimetype": "text/x-python", 257 | "name": "python", 258 | "nbconvert_exporter": "python", 259 | "pygments_lexer": "ipython3", 260 | "version": "3.4.4" 261 | } 262 | }, 263 | "nbformat": 4, 264 | "nbformat_minor": 0 265 | } 266 | -------------------------------------------------------------------------------- /Data/Clean/.ipynb_checkpoints/Clean-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import json\n", 12 | "with open('rentCollectionData.json') as d:\n", 13 | " raw = d.read()\n", 14 | "raw = '[' + raw + ']'\n", 15 | "rentCollect = json.loads(raw)\n", 16 | "\n", 17 | "with open('addresses.json') as d:\n", 18 | " raw2 = d.read()\n", 19 | "raw2 = '[' + raw2 + ']'\n", 20 | "address = json.loads(raw2)\n", 21 | "\n", 22 | "with open('cleanlistings.json') as d:\n", 23 | " raw3 = d.read()\n", 24 | "raw3 = '[' + raw3 + ']'\n", 25 | "craig = json.loads(raw3)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "rentCollect = rentCollect[0]['housing']\n", 37 | "address = address[0]['addrs']\n", 38 | "craig = craig[0]['listings']" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/plain": [ 51 | "['2171', 'Allston Way', 37.8698901, -122.2661499]" 52 | ] 53 | }, 54 | "execution_count": 3, 55 | "metadata": {}, 56 | "output_type": "execute_result" 57 | } 58 | ], 59 | "source": [ 60 | "address[0]" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "# clean rentCollect\n", 72 | "rent = []\n", 73 | "for i in range(len(rentCollect)):\n", 74 | " temp = {}\n", 75 | " temp['number'] = rentCollect[i]['number']\n", 76 | " temp['street'] = rentCollect[i]['name'] \n", 77 | " temp['avg0'] = rentCollect[i]['avg0']\n", 78 | " temp['avg1'] = rentCollect[i]['avg1']\n", 79 | " temp['avg2'] = rentCollect[i]['avg2']\n", 80 | " temp['avg3'] = rentCollect[i]['avg3']\n", 81 | " temp['avg4plus'] = rentCollect[i]['avg4plus']\n", 82 | " rent.append(temp)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 5, 88 | "metadata": { 89 | "collapsed": true 90 | }, 91 | "outputs": [], 92 | "source": [ 93 | "# clean address\n", 94 | "loc = []\n", 95 | "for i in range(len(address)):\n", 96 | " temp = {}\n", 97 | " temp['number'] = address[i][0]\n", 98 | " temp['street'] = address[i][1] \n", 99 | " temp['latitude'] = address[i][2]\n", 100 | " temp['longitude'] = address[i][3]\n", 101 | " loc.append(temp)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 6, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "import pandas as pd\n", 113 | "rent_df = pd.DataFrame(rent)\n", 114 | "loc_df = pd.DataFrame(loc)\n", 115 | "craig_df = pd.DataFrame(craig)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 7, 121 | "metadata": { 122 | "collapsed": false, 123 | "scrolled": true 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "df = pd.merge(rent_df, loc_df)\n", 128 | "craig_df = craig_df.drop(['_id', 'link', 'numImages', 'postingDate'], 1)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 8, 134 | "metadata": { 135 | "collapsed": false 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "df1 = df.drop_duplicates(subset = 'latitude')\n", 140 | "df2 = craig_df.drop_duplicates('latitude')" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 9, 146 | "metadata": { 147 | "collapsed": false 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "result = pd.concat([df1, df2])\n", 152 | "result = result.drop_duplicates('latitude')" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 10, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "result = result.reset_index(drop = True)\n", 164 | "result = result.fillna(value = 0)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 12, 170 | "metadata": { 171 | "collapsed": false 172 | }, 173 | "outputs": [ 174 | { 175 | "name": "stderr", 176 | "output_type": "stream", 177 | "text": [ 178 | "/Users/yikaluo/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning: \n", 179 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 180 | "\n", 181 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" 182 | ] 183 | } 184 | ], 185 | "source": [ 186 | "# address\n", 187 | "for i in range(2291):\n", 188 | " if result['address'][i] == 0:\n", 189 | " result['address'][i] = str(result['number'][i]) + ' ' + str(result['street'][i])\n", 190 | " " 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 13, 196 | "metadata": { 197 | "collapsed": false 198 | }, 199 | "outputs": [], 200 | "source": [ 201 | "result = result.drop(['number', 'street'], 1)\n", 202 | "result['price per room'] = pd.Series(0, index=result.index)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 14, 208 | "metadata": { 209 | "collapsed": false 210 | }, 211 | "outputs": [ 212 | { 213 | "name": "stderr", 214 | "output_type": "stream", 215 | "text": [ 216 | "/Users/yikaluo/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:23: SettingWithCopyWarning: \n", 217 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 218 | "\n", 219 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 220 | "/Users/yikaluo/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:7: SettingWithCopyWarning: \n", 221 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 222 | "\n", 223 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 224 | "/Users/yikaluo/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:6: SettingWithCopyWarning: \n", 225 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 226 | "\n", 227 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" 228 | ] 229 | } 230 | ], 231 | "source": [ 232 | "# price\n", 233 | "\n", 234 | "for i in range(2291):\n", 235 | " if result['title'][i] != 0:\n", 236 | " if result['bedrooms'][i] == 0:\n", 237 | " result['bedrooms'][i] = 1\n", 238 | " result['price per room'][i] = result['price'][i] / result['bedrooms'][i]\n", 239 | " else:\n", 240 | " total = 0\n", 241 | " num = 0\n", 242 | " if result['avg0'][i] != 0:\n", 243 | " total += result['avg0'][i]\n", 244 | " num += 1\n", 245 | " if result['avg1'][i] != 0:\n", 246 | " total += result['avg1'][i]\n", 247 | " num += 1\n", 248 | " if result['avg2'][i] != 0:\n", 249 | " total += result['avg2'][i]\n", 250 | " num += 1\n", 251 | " if result['avg3'][i] != 0:\n", 252 | " total += result['avg3'][i]\n", 253 | " num += 1\n", 254 | " result['price per room'][i] = total / num" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 17, 260 | "metadata": { 261 | "collapsed": false 262 | }, 263 | "outputs": [], 264 | "source": [ 265 | "result = result.drop(['avg0', 'avg1', 'avg2', 'avg3', 'avg4plus', 'price'], 1)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 18, 271 | "metadata": { 272 | "collapsed": true 273 | }, 274 | "outputs": [], 275 | "source": [ 276 | "result.to_csv(\"clean_rent2.csv\")" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": { 283 | "collapsed": true 284 | }, 285 | "outputs": [], 286 | "source": [] 287 | } 288 | ], 289 | "metadata": { 290 | "kernelspec": { 291 | "display_name": "Python 3", 292 | "language": "python", 293 | "name": "python3" 294 | }, 295 | "language_info": { 296 | "codemirror_mode": { 297 | "name": "ipython", 298 | "version": 3 299 | }, 300 | "file_extension": ".py", 301 | "mimetype": "text/x-python", 302 | "name": "python", 303 | "nbconvert_exporter": "python", 304 | "pygments_lexer": "ipython3", 305 | "version": "3.5.1" 306 | } 307 | }, 308 | "nbformat": 4, 309 | "nbformat_minor": 0 310 | } 311 | -------------------------------------------------------------------------------- /Data/Clean/.ipynb_checkpoints/data_clean-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import json\n", 12 | "with open('restaurants.json') as d:\n", 13 | " RAW = d.read()\n", 14 | "RAW = '[' + RAW.replace('}{', '},{') + ']'\n", 15 | "rest = json.loads(RAW)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": { 22 | "collapsed": false 23 | }, 24 | "outputs": [ 25 | { 26 | "data": { 27 | "text/plain": [ 28 | "[u'is_claimed',\n", 29 | " u'rating',\n", 30 | " u'mobile_url',\n", 31 | " u'rating_img_url',\n", 32 | " u'review_count',\n", 33 | " u'name',\n", 34 | " u'snippet_image_url',\n", 35 | " u'rating_img_url_small',\n", 36 | " u'url',\n", 37 | " u'location',\n", 38 | " u'menu_date_updated',\n", 39 | " u'phone',\n", 40 | " u'snippet_text',\n", 41 | " u'image_url',\n", 42 | " u'categories',\n", 43 | " u'display_phone',\n", 44 | " u'rating_img_url_large',\n", 45 | " u'menu_provider',\n", 46 | " u'id',\n", 47 | " u'is_closed',\n", 48 | " u'distance']" 49 | ] 50 | }, 51 | "execution_count": 2, 52 | "metadata": {}, 53 | "output_type": "execute_result" 54 | } 55 | ], 56 | "source": [ 57 | "#keys in each dic\n", 58 | "list(rest[0].keys())\n", 59 | "#keys in businesses\n", 60 | "list(rest[0]['businesses'][0].keys())" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 29, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "## Target format: [{'latitude': xxx, 'longtitude': xxx,\n", 72 | "## 'businesses': [{'categories': xxx, 'name': xxx, 'rating': xxx, 'neighborhoods': xxx}, {}, {}...]}, {}, {} ...]\n", 73 | "\n", 74 | "# import pdb; pdb.set_trace()\n", 75 | "new = []\n", 76 | "num_loc = len(rest)\n", 77 | "\n", 78 | "for i in range(num_loc):\n", 79 | " one_loc = {}\n", 80 | " businesses = []\n", 81 | " one_loc['latitude'] = rest[i]['region']['center']['latitude']\n", 82 | " one_loc['longitude'] = rest[i]['region']['center']['longitude']\n", 83 | " # businesses\n", 84 | " num_bus = len(rest[i]['businesses'])\n", 85 | " for j in range(num_bus): \n", 86 | " try:\n", 87 | " one_bus = rest[i]['businesses'][j]\n", 88 | " except:\n", 89 | " import pdb; pdb.set_trace()\n", 90 | " new_bus = {}\n", 91 | " new_bus['categories'] = one_bus['categories'][0][0]\n", 92 | " new_bus['name'] = one_bus['name']\n", 93 | " new_bus['rating'] = one_bus['rating']\n", 94 | " businesses.append(new_bus)\n", 95 | " one_loc['businesses'] = businesses\n", 96 | " new.append(one_loc)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 30, 102 | "metadata": { 103 | "collapsed": false 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "json_str = json.dumps(new)\n", 108 | "with open('clean_restaurant.json', 'w') as d:\n", 109 | " json.dump(json_str, d)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 52, 115 | "metadata": { 116 | "collapsed": true 117 | }, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "{'businesses': [{'categories': u'Brazilian',\n", 123 | " 'name': u'Brazil Fresh Squeeze Cafe',\n", 124 | " 'rating': 4.5},\n", 125 | " {'categories': u'Japanese', 'name': u'Simply Bowl', 'rating': 4.0},\n", 126 | " {'categories': u'Cajun/Creole',\n", 127 | " 'name': u\"Angeline's Louisiana Kitchen\",\n", 128 | " 'rating': 4.0},\n", 129 | " {'categories': u'Asian Fusion', 'name': u'Toss Noodle Bar', 'rating': 4.0},\n", 130 | " {'categories': u'Pizza', 'name': u'Sliver Pizzeria', 'rating': 4.5},\n", 131 | " {'categories': u'Vegan', 'name': u'Cinnaholic', 'rating': 4.5},\n", 132 | " {'categories': u'Japanese', 'name': u'Sushinista', 'rating': 4.5},\n", 133 | " {'categories': u'African',\n", 134 | " 'name': u'Suya African Caribbean Grill',\n", 135 | " 'rating': 4.0},\n", 136 | " {'categories': u'Burgers', 'name': u'Eureka!', 'rating': 4.0},\n", 137 | " {'categories': u'Thai', 'name': u'Imm Thai Street Food', 'rating': 4.0},\n", 138 | " {'categories': u'Japanese', 'name': u'Sushi Secrets', 'rating': 4.0},\n", 139 | " {'categories': u'Sushi Bars', 'name': u'Sumo Roll', 'rating': 4.5},\n", 140 | " {'categories': u'Sandwiches', 'name': u'The Sandwich Spot', 'rating': 4.0},\n", 141 | " {'categories': u'American (New)', 'name': u'Gather', 'rating': 4.0},\n", 142 | " {'categories': u'Chinese', 'name': u'Great China', 'rating': 4.0},\n", 143 | " {'categories': u'Japanese', 'name': u'Tamon Tea', 'rating': 4.5},\n", 144 | " {'categories': u'Pubs', 'name': u'Jupiter', 'rating': 4.0},\n", 145 | " {'categories': u'Latin American', 'name': u'Platano', 'rating': 4.0},\n", 146 | " {'categories': u'Japanese', 'name': u'Ippuku', 'rating': 4.0},\n", 147 | " {'categories': u'Italian', 'name': u'PIQ Bakery', 'rating': 4.0}],\n", 148 | " 'latitude': 37.8701173693928,\n", 149 | " 'longitude': -122.267956974363}" 150 | ] 151 | }, 152 | "execution_count": 52, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "new[0]" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 54, 164 | "metadata": { 165 | "collapsed": false 166 | }, 167 | "outputs": [], 168 | "source": [ 169 | "# Average rating & #Cafes & #Pubs\n", 170 | "analysis = []\n", 171 | "for i in range(len(new)):\n", 172 | " loc = {}\n", 173 | " loc['latitude'] = new[i]['latitude']\n", 174 | " loc['longitude'] = new[i]['longitude']\n", 175 | " total = 0\n", 176 | " num_bus = len(new[i]['businesses'])\n", 177 | " for j in range(num_bus):\n", 178 | " total += new[i]['businesses'][j]['rating']\n", 179 | " loc['avg_rating'] = float(total) / float(num_bus)\n", 180 | " \n", 181 | " cafe = 0\n", 182 | " pub = 0\n", 183 | " num_bus = len(new[i]['businesses'])\n", 184 | " for j in range(num_bus):\n", 185 | " if ('Coffee' in new[i]['businesses'][j]['categories']) or ('Cafe' in new[i]['businesses'][j]['name']):\n", 186 | " cafe += 1\n", 187 | " if ('Pubs' in new[i]['businesses'][j]['categories']) or ('Bar' in new[i]['businesses'][j]['name']):\n", 188 | " pub += 1\n", 189 | " loc['#cafes'] = cafe\n", 190 | " loc['#pubs'] = pub\n", 191 | " \n", 192 | " analysis.append(loc)\n", 193 | " " 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 56, 199 | "metadata": { 200 | "collapsed": true 201 | }, 202 | "outputs": [], 203 | "source": [ 204 | "summary = json.dumps(analysis)\n", 205 | "with open('analysis.json', 'w') as d:\n", 206 | " json.dump(summary, d)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 57, 212 | "metadata": { 213 | "collapsed": true 214 | }, 215 | "outputs": [], 216 | "source": [ 217 | "# CSV\n", 218 | "import csv\n", 219 | "import json\n", 220 | "\n", 221 | "x = json.loads(summary)\n", 222 | "f = csv.writer(open(\"restaurant.csv\", \"wb+\"))\n", 223 | "f.writerow(['latitude', 'longitude', 'avg_rating', '#cafes', '#pubs'])\n", 224 | "\n", 225 | "for x in x:\n", 226 | " f.writerow([x['latitude'], \n", 227 | " x['longitude'], \n", 228 | " x['avg_rating'], \n", 229 | " x['#cafes'],\n", 230 | " x['#pubs']])" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 1, 236 | "metadata": { 237 | "collapsed": false 238 | }, 239 | "outputs": [ 240 | { 241 | "ename": "NameError", 242 | "evalue": "name 'analysis' is not defined", 243 | "output_type": "error", 244 | "traceback": [ 245 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 246 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 247 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0manalysis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 248 | "\u001b[0;31mNameError\u001b[0m: name 'analysis' is not defined" 249 | ] 250 | } 251 | ], 252 | "source": [ 253 | "len(analysis)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": { 260 | "collapsed": true 261 | }, 262 | "outputs": [], 263 | "source": [] 264 | } 265 | ], 266 | "metadata": { 267 | "kernelspec": { 268 | "display_name": "Python 2", 269 | "language": "python", 270 | "name": "python2" 271 | }, 272 | "language_info": { 273 | "codemirror_mode": { 274 | "name": "ipython", 275 | "version": 2 276 | }, 277 | "file_extension": ".py", 278 | "mimetype": "text/x-python", 279 | "name": "python", 280 | "nbconvert_exporter": "python", 281 | "pygments_lexer": "ipython2", 282 | "version": "2.7.11" 283 | } 284 | }, 285 | "nbformat": 4, 286 | "nbformat_minor": 0 287 | } 288 | -------------------------------------------------------------------------------- /Data/Clean/Clean.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import json\n", 12 | "with open('rentCollectionData.json') as d:\n", 13 | " raw = d.read()\n", 14 | "raw = '[' + raw + ']'\n", 15 | "rentCollect = json.loads(raw)\n", 16 | "\n", 17 | "with open('addresses.json') as d:\n", 18 | " raw2 = d.read()\n", 19 | "raw2 = '[' + raw2 + ']'\n", 20 | "address = json.loads(raw2)\n", 21 | "\n", 22 | "with open('cleanlistings.json') as d:\n", 23 | " raw3 = d.read()\n", 24 | "raw3 = '[' + raw3 + ']'\n", 25 | "craig = json.loads(raw3)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "rentCollect = rentCollect[0]['housing']\n", 37 | "address = address[0]['addrs']\n", 38 | "craig = craig[0]['listings']" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/plain": [ 51 | "['2171', 'Allston Way', 37.8698901, -122.2661499]" 52 | ] 53 | }, 54 | "execution_count": 3, 55 | "metadata": {}, 56 | "output_type": "execute_result" 57 | } 58 | ], 59 | "source": [ 60 | "address[0]" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "# clean rentCollect\n", 72 | "rent = []\n", 73 | "for i in range(len(rentCollect)):\n", 74 | " temp = {}\n", 75 | " temp['number'] = rentCollect[i]['number']\n", 76 | " temp['street'] = rentCollect[i]['name'] \n", 77 | " temp['avg0'] = rentCollect[i]['avg0']\n", 78 | " temp['avg1'] = rentCollect[i]['avg1']\n", 79 | " temp['avg2'] = rentCollect[i]['avg2']\n", 80 | " temp['avg3'] = rentCollect[i]['avg3']\n", 81 | " temp['avg4plus'] = rentCollect[i]['avg4plus']\n", 82 | " rent.append(temp)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 5, 88 | "metadata": { 89 | "collapsed": true 90 | }, 91 | "outputs": [], 92 | "source": [ 93 | "# clean address\n", 94 | "loc = []\n", 95 | "for i in range(len(address)):\n", 96 | " temp = {}\n", 97 | " temp['number'] = address[i][0]\n", 98 | " temp['street'] = address[i][1] \n", 99 | " temp['latitude'] = address[i][2]\n", 100 | " temp['longitude'] = address[i][3]\n", 101 | " loc.append(temp)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 6, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "import pandas as pd\n", 113 | "rent_df = pd.DataFrame(rent)\n", 114 | "loc_df = pd.DataFrame(loc)\n", 115 | "craig_df = pd.DataFrame(craig)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 7, 121 | "metadata": { 122 | "collapsed": false, 123 | "scrolled": true 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "df = pd.merge(rent_df, loc_df)\n", 128 | "craig_df = craig_df.drop(['_id', 'link', 'numImages', 'postingDate'], 1)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 8, 134 | "metadata": { 135 | "collapsed": false 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "df1 = df.drop_duplicates(subset = 'latitude')\n", 140 | "df2 = craig_df.drop_duplicates('latitude')" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 9, 146 | "metadata": { 147 | "collapsed": false 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "result = pd.concat([df1, df2])\n", 152 | "result = result.drop_duplicates('latitude')" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 10, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "result = result.reset_index(drop = True)\n", 164 | "result = result.fillna(value = 0)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 12, 170 | "metadata": { 171 | "collapsed": false 172 | }, 173 | "outputs": [ 174 | { 175 | "name": "stderr", 176 | "output_type": "stream", 177 | "text": [ 178 | "/Users/yikaluo/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning: \n", 179 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 180 | "\n", 181 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" 182 | ] 183 | } 184 | ], 185 | "source": [ 186 | "# address\n", 187 | "for i in range(2291):\n", 188 | " if result['address'][i] == 0:\n", 189 | " result['address'][i] = str(result['number'][i]) + ' ' + str(result['street'][i])\n", 190 | " " 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 13, 196 | "metadata": { 197 | "collapsed": false 198 | }, 199 | "outputs": [], 200 | "source": [ 201 | "result = result.drop(['number', 'street'], 1)\n", 202 | "result['price per room'] = pd.Series(0, index=result.index)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 14, 208 | "metadata": { 209 | "collapsed": false 210 | }, 211 | "outputs": [ 212 | { 213 | "name": "stderr", 214 | "output_type": "stream", 215 | "text": [ 216 | "/Users/yikaluo/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:23: SettingWithCopyWarning: \n", 217 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 218 | "\n", 219 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 220 | "/Users/yikaluo/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:7: SettingWithCopyWarning: \n", 221 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 222 | "\n", 223 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 224 | "/Users/yikaluo/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:6: SettingWithCopyWarning: \n", 225 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 226 | "\n", 227 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" 228 | ] 229 | } 230 | ], 231 | "source": [ 232 | "# price\n", 233 | "\n", 234 | "for i in range(2291):\n", 235 | " if result['title'][i] != 0:\n", 236 | " if result['bedrooms'][i] == 0:\n", 237 | " result['bedrooms'][i] = 1\n", 238 | " result['price per room'][i] = result['price'][i] / result['bedrooms'][i]\n", 239 | " else:\n", 240 | " total = 0\n", 241 | " num = 0\n", 242 | " if result['avg0'][i] != 0:\n", 243 | " total += result['avg0'][i]\n", 244 | " num += 1\n", 245 | " if result['avg1'][i] != 0:\n", 246 | " total += result['avg1'][i]\n", 247 | " num += 1\n", 248 | " if result['avg2'][i] != 0:\n", 249 | " total += result['avg2'][i]\n", 250 | " num += 1\n", 251 | " if result['avg3'][i] != 0:\n", 252 | " total += result['avg3'][i]\n", 253 | " num += 1\n", 254 | " result['price per room'][i] = total / num" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 17, 260 | "metadata": { 261 | "collapsed": false 262 | }, 263 | "outputs": [], 264 | "source": [ 265 | "result = result.drop(['avg0', 'avg1', 'avg2', 'avg3', 'avg4plus', 'price'], 1)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 18, 271 | "metadata": { 272 | "collapsed": true 273 | }, 274 | "outputs": [], 275 | "source": [ 276 | "result.to_csv(\"clean_rent2.csv\")" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": { 283 | "collapsed": true 284 | }, 285 | "outputs": [], 286 | "source": [] 287 | } 288 | ], 289 | "metadata": { 290 | "kernelspec": { 291 | "display_name": "Python 2", 292 | "language": "python", 293 | "name": "python2" 294 | }, 295 | "language_info": { 296 | "codemirror_mode": { 297 | "name": "ipython", 298 | "version": 2 299 | }, 300 | "file_extension": ".py", 301 | "mimetype": "text/x-python", 302 | "name": "python", 303 | "nbconvert_exporter": "python", 304 | "pygments_lexer": "ipython2", 305 | "version": "2.7.10" 306 | } 307 | }, 308 | "nbformat": 4, 309 | "nbformat_minor": 0 310 | } 311 | -------------------------------------------------------------------------------- /Data/Clean/data_clean.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import json\n", 12 | "with open('restaurants.json') as d:\n", 13 | " RAW = d.read()\n", 14 | "RAW = '[' + RAW.replace('}{', '},{') + ']'\n", 15 | "rest = json.loads(RAW)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": { 22 | "collapsed": false 23 | }, 24 | "outputs": [ 25 | { 26 | "data": { 27 | "text/plain": [ 28 | "[u'is_claimed',\n", 29 | " u'rating',\n", 30 | " u'mobile_url',\n", 31 | " u'rating_img_url',\n", 32 | " u'review_count',\n", 33 | " u'name',\n", 34 | " u'snippet_image_url',\n", 35 | " u'rating_img_url_small',\n", 36 | " u'url',\n", 37 | " u'location',\n", 38 | " u'menu_date_updated',\n", 39 | " u'phone',\n", 40 | " u'snippet_text',\n", 41 | " u'image_url',\n", 42 | " u'categories',\n", 43 | " u'display_phone',\n", 44 | " u'rating_img_url_large',\n", 45 | " u'menu_provider',\n", 46 | " u'id',\n", 47 | " u'is_closed',\n", 48 | " u'distance']" 49 | ] 50 | }, 51 | "execution_count": 2, 52 | "metadata": {}, 53 | "output_type": "execute_result" 54 | } 55 | ], 56 | "source": [ 57 | "#keys in each dic\n", 58 | "list(rest[0].keys())\n", 59 | "#keys in businesses\n", 60 | "list(rest[0]['businesses'][0].keys())" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 29, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "## Target format: [{'latitude': xxx, 'longtitude': xxx,\n", 72 | "## 'businesses': [{'categories': xxx, 'name': xxx, 'rating': xxx, 'neighborhoods': xxx}, {}, {}...]}, {}, {} ...]\n", 73 | "\n", 74 | "# import pdb; pdb.set_trace()\n", 75 | "new = []\n", 76 | "num_loc = len(rest)\n", 77 | "\n", 78 | "for i in range(num_loc):\n", 79 | " one_loc = {}\n", 80 | " businesses = []\n", 81 | " one_loc['latitude'] = rest[i]['region']['center']['latitude']\n", 82 | " one_loc['longitude'] = rest[i]['region']['center']['longitude']\n", 83 | " # businesses\n", 84 | " num_bus = len(rest[i]['businesses'])\n", 85 | " for j in range(num_bus): \n", 86 | " try:\n", 87 | " one_bus = rest[i]['businesses'][j]\n", 88 | " except:\n", 89 | " import pdb; pdb.set_trace()\n", 90 | " new_bus = {}\n", 91 | " new_bus['categories'] = one_bus['categories'][0][0]\n", 92 | " new_bus['name'] = one_bus['name']\n", 93 | " new_bus['rating'] = one_bus['rating']\n", 94 | " businesses.append(new_bus)\n", 95 | " one_loc['businesses'] = businesses\n", 96 | " new.append(one_loc)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 30, 102 | "metadata": { 103 | "collapsed": false 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "json_str = json.dumps(new)\n", 108 | "with open('clean_restaurant.json', 'w') as d:\n", 109 | " json.dump(json_str, d)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 52, 115 | "metadata": { 116 | "collapsed": true 117 | }, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "{'businesses': [{'categories': u'Brazilian',\n", 123 | " 'name': u'Brazil Fresh Squeeze Cafe',\n", 124 | " 'rating': 4.5},\n", 125 | " {'categories': u'Japanese', 'name': u'Simply Bowl', 'rating': 4.0},\n", 126 | " {'categories': u'Cajun/Creole',\n", 127 | " 'name': u\"Angeline's Louisiana Kitchen\",\n", 128 | " 'rating': 4.0},\n", 129 | " {'categories': u'Asian Fusion', 'name': u'Toss Noodle Bar', 'rating': 4.0},\n", 130 | " {'categories': u'Pizza', 'name': u'Sliver Pizzeria', 'rating': 4.5},\n", 131 | " {'categories': u'Vegan', 'name': u'Cinnaholic', 'rating': 4.5},\n", 132 | " {'categories': u'Japanese', 'name': u'Sushinista', 'rating': 4.5},\n", 133 | " {'categories': u'African',\n", 134 | " 'name': u'Suya African Caribbean Grill',\n", 135 | " 'rating': 4.0},\n", 136 | " {'categories': u'Burgers', 'name': u'Eureka!', 'rating': 4.0},\n", 137 | " {'categories': u'Thai', 'name': u'Imm Thai Street Food', 'rating': 4.0},\n", 138 | " {'categories': u'Japanese', 'name': u'Sushi Secrets', 'rating': 4.0},\n", 139 | " {'categories': u'Sushi Bars', 'name': u'Sumo Roll', 'rating': 4.5},\n", 140 | " {'categories': u'Sandwiches', 'name': u'The Sandwich Spot', 'rating': 4.0},\n", 141 | " {'categories': u'American (New)', 'name': u'Gather', 'rating': 4.0},\n", 142 | " {'categories': u'Chinese', 'name': u'Great China', 'rating': 4.0},\n", 143 | " {'categories': u'Japanese', 'name': u'Tamon Tea', 'rating': 4.5},\n", 144 | " {'categories': u'Pubs', 'name': u'Jupiter', 'rating': 4.0},\n", 145 | " {'categories': u'Latin American', 'name': u'Platano', 'rating': 4.0},\n", 146 | " {'categories': u'Japanese', 'name': u'Ippuku', 'rating': 4.0},\n", 147 | " {'categories': u'Italian', 'name': u'PIQ Bakery', 'rating': 4.0}],\n", 148 | " 'latitude': 37.8701173693928,\n", 149 | " 'longitude': -122.267956974363}" 150 | ] 151 | }, 152 | "execution_count": 52, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "new[0]" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 54, 164 | "metadata": { 165 | "collapsed": false 166 | }, 167 | "outputs": [], 168 | "source": [ 169 | "# Average rating & #Cafes & #Pubs\n", 170 | "analysis = []\n", 171 | "for i in range(len(new)):\n", 172 | " loc = {}\n", 173 | " loc['latitude'] = new[i]['latitude']\n", 174 | " loc['longitude'] = new[i]['longitude']\n", 175 | " total = 0\n", 176 | " num_bus = len(new[i]['businesses'])\n", 177 | " for j in range(num_bus):\n", 178 | " total += new[i]['businesses'][j]['rating']\n", 179 | " loc['avg_rating'] = float(total) / float(num_bus)\n", 180 | " \n", 181 | " cafe = 0\n", 182 | " pub = 0\n", 183 | " num_bus = len(new[i]['businesses'])\n", 184 | " for j in range(num_bus):\n", 185 | " if ('Coffee' in new[i]['businesses'][j]['categories']) or ('Cafe' in new[i]['businesses'][j]['name']):\n", 186 | " cafe += 1\n", 187 | " if ('Pubs' in new[i]['businesses'][j]['categories']) or ('Bar' in new[i]['businesses'][j]['name']):\n", 188 | " pub += 1\n", 189 | " loc['#cafes'] = cafe\n", 190 | " loc['#pubs'] = pub\n", 191 | " \n", 192 | " analysis.append(loc)\n", 193 | " " 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 56, 199 | "metadata": { 200 | "collapsed": true 201 | }, 202 | "outputs": [], 203 | "source": [ 204 | "summary = json.dumps(analysis)\n", 205 | "with open('analysis.json', 'w') as d:\n", 206 | " json.dump(summary, d)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 57, 212 | "metadata": { 213 | "collapsed": true 214 | }, 215 | "outputs": [], 216 | "source": [ 217 | "# CSV\n", 218 | "import csv\n", 219 | "import json\n", 220 | "\n", 221 | "x = json.loads(summary)\n", 222 | "f = csv.writer(open(\"restaurant.csv\", \"wb+\"))\n", 223 | "f.writerow(['latitude', 'longitude', 'avg_rating', '#cafes', '#pubs'])\n", 224 | "\n", 225 | "for x in x:\n", 226 | " f.writerow([x['latitude'], \n", 227 | " x['longitude'], \n", 228 | " x['avg_rating'], \n", 229 | " x['#cafes'],\n", 230 | " x['#pubs']])" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 1, 236 | "metadata": { 237 | "collapsed": false 238 | }, 239 | "outputs": [ 240 | { 241 | "ename": "NameError", 242 | "evalue": "name 'analysis' is not defined", 243 | "output_type": "error", 244 | "traceback": [ 245 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 246 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 247 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0manalysis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 248 | "\u001b[0;31mNameError\u001b[0m: name 'analysis' is not defined" 249 | ] 250 | } 251 | ], 252 | "source": [ 253 | "len(analysis)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": { 260 | "collapsed": true 261 | }, 262 | "outputs": [], 263 | "source": [] 264 | } 265 | ], 266 | "metadata": { 267 | "kernelspec": { 268 | "display_name": "Python 2", 269 | "language": "python", 270 | "name": "python2" 271 | }, 272 | "language_info": { 273 | "codemirror_mode": { 274 | "name": "ipython", 275 | "version": 2 276 | }, 277 | "file_extension": ".py", 278 | "mimetype": "text/x-python", 279 | "name": "python", 280 | "nbconvert_exporter": "python", 281 | "pygments_lexer": "ipython2", 282 | "version": "2.7.11" 283 | } 284 | }, 285 | "nbformat": 4, 286 | "nbformat_minor": 0 287 | } 288 | -------------------------------------------------------------------------------- /Data/Final/.ipynb_checkpoints/Outlier-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "df = pd.read_csv(\"final_edge_distance.csv\")\n", 12 | "df = df[(df['latitude'] > 37.84) \n", 13 | " & (df['latitude'] < 37.9) \n", 14 | " & (df['longitude'] < -122.24) \n", 15 | " & (df['longitude'] > -122.3)\n", 16 | " & (df['price per room'] > 400)\n", 17 | " & (df['price per room'] < 3500)]\n", 18 | "df['sqft per room'] = df['sqft'] / df['bedrooms']\n", 19 | "\n", 20 | "df = df[['address', 'price per room', 'latitude', 'longitude', 'edge_distance', \n", 21 | " 'sqft per room', 'avg_rating', '#cafes', '#pubs', 'bedrooms', 'bathrooms']]\n", 22 | "df['sqft per room'] = df['sqft per room'].round()\n", 23 | "df['edge_distance'] = df['edge_distance'].round(2)\n", 24 | "df.to_csv(\"final_no_outlier\")" 25 | ] 26 | } 27 | ], 28 | "metadata": { 29 | "kernelspec": { 30 | "display_name": "Python 3", 31 | "language": "python", 32 | "name": "python3" 33 | }, 34 | "language_info": { 35 | "codemirror_mode": { 36 | "name": "ipython", 37 | "version": 3 38 | }, 39 | "file_extension": ".py", 40 | "mimetype": "text/x-python", 41 | "name": "python", 42 | "nbconvert_exporter": "python", 43 | "pygments_lexer": "ipython3", 44 | "version": "3.5.1" 45 | } 46 | }, 47 | "nbformat": 4, 48 | "nbformat_minor": 0 49 | } 50 | -------------------------------------------------------------------------------- /Data/Final/Outlier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "df = pd.read_csv(\"final_edge_distance.csv\")\n", 12 | "df = df[(df['latitude'] > 37.84) \n", 13 | " & (df['latitude'] < 37.9) \n", 14 | " & (df['longitude'] < -122.24) \n", 15 | " & (df['longitude'] > -122.3)\n", 16 | " & (df['price per room'] > 400)\n", 17 | " & (df['price per room'] < 3500)]\n", 18 | "df['sqft per room'] = df['sqft'] / df['bedrooms']\n", 19 | "\n", 20 | "df = df[['address', 'price per room', 'latitude', 'longitude', 'edge_distance', \n", 21 | " 'sqft per room', 'avg_rating', '#cafes', '#pubs', 'bedrooms', 'bathrooms']]\n", 22 | "df['sqft per room'] = df['sqft per room'].round()\n", 23 | "df['edge_distance'] = df['edge_distance'].round(2)\n", 24 | "df.to_csv(\"final_no_outlier\")" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [] 35 | } 36 | ], 37 | "metadata": { 38 | "kernelspec": { 39 | "display_name": "Python 2", 40 | "language": "python", 41 | "name": "python2" 42 | }, 43 | "language_info": { 44 | "codemirror_mode": { 45 | "name": "ipython", 46 | "version": 2 47 | }, 48 | "file_extension": ".py", 49 | "mimetype": "text/x-python", 50 | "name": "python", 51 | "nbconvert_exporter": "python", 52 | "pygments_lexer": "ipython2", 53 | "version": "2.7.10" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 0 58 | } 59 | -------------------------------------------------------------------------------- /Data/Final/README.txt: -------------------------------------------------------------------------------- 1 | final.csv: 2 | containing all the features we need 3 | a) address, latitude, longitude 4 | b) bathrooms/bedrooms/total sqft 5 | c) distance(miles): from the central point of campus 6 | d) avg_rating(5 stars)/cafes/pubs 7 | e) price per room 8 | 9 | distance.csv: 10 | compare distance with price 11 | 12 | internal.csv: 13 | compare bathroom/bedroom/sqft with price 14 | calculated sqft per room 15 | filtered out all the bathroom/bedroom/sqft w 0 value 16 | 17 | food.csv: 18 | compare restaurant rating/cafe/pub with price -------------------------------------------------------------------------------- /Data/Final/internal.csv: -------------------------------------------------------------------------------- 1 | bedrooms,sqft,price per room,sqft/room 2 | 4,3400,1875,850.0 3 | 3,3009,1966,1003.0 4 | 5,3000,1700,600.0 5 | 4,2500,1550,625.0 6 | 3,2464,1500,821.3 7 | 3,2400,1500,800.0 8 | 4,2300,1587,575.0 9 | 4,2200,1237,550.0 10 | 5,2100,1650,420.0 11 | 5,2100,1650,420.0 12 | 3,2100,1400,700.0 13 | 3,2036,1700,678.7 14 | 4,2000,2562,500.0 15 | 4,2000,1487,500.0 16 | 4,1980,1375,495.0 17 | 3,1850,1198,616.7 18 | 3,1850,1199,616.7 19 | 3,1850,1265,616.7 20 | 3,1842,1558,614.0 21 | 4,1806,1150,451.5 22 | 5,1800,1080,360.0 23 | 4,1800,1237,450.0 24 | 2,1800,1600,900.0 25 | 6,1765,1350,294.2 26 | 4,1750,1623,437.5 27 | 3,1750,1433,583.3 28 | 3,1710,1400,570.0 29 | 3,1700,186,566.7 30 | 3,1700,1199,566.7 31 | 3,1670,1766,556.7 32 | 3,1648,1165,549.3 33 | 3,1608,1500,536.0 34 | 3,1600,1266,533.3 35 | 3,1575,1233,525.0 36 | 2,1560,2197,780.0 37 | 1,1518,3450,1518.0 38 | 3,1500,1900,500.0 39 | 3,1500,1933,500.0 40 | 2,1500,1497,750.0 41 | 2,1500,2100,750.0 42 | 2,1500,1875,750.0 43 | 4,1450,1400,362.5 44 | 2,1444,1650,722.0 45 | 2,1434,1900,717.0 46 | 3,1412,1200,470.7 47 | 3,1400,998,466.7 48 | 3,1400,1120,466.7 49 | 2,1400,2125,700.0 50 | 1,1400,5000,1400.0 51 | 2,1369,1990,684.5 52 | 3,1350,1400,450.0 53 | 2,1340,1800,670.0 54 | 2,1339,1600,669.5 55 | 3,1300,950,433.3 56 | 3,1300,1150,433.3 57 | 2,1300,1600,650.0 58 | 2,1220,1275,610.0 59 | 2,1200,900,600.0 60 | 2,1200,1775,600.0 61 | 2,1200,1422,600.0 62 | 2,1200,1750,600.0 63 | 2,1200,2250,600.0 64 | 1,1200,1500,1200.0 65 | 2,1170,1625,585.0 66 | 2,1170,1625,585.0 67 | 2,1170,1625,585.0 68 | 2,1155,1997,577.5 69 | 2,1155,1997,577.5 70 | 1,1155,3995,1155.0 71 | 4,1150,1375,287.5 72 | 2,1150,1300,575.0 73 | 2,1144,2000,572.0 74 | 2,1130,1800,565.0 75 | 2,1113,1600,556.5 76 | 3,1100,816,366.7 77 | 2,1100,1397,550.0 78 | 2,1100,1600,550.0 79 | 2,1100,2000,550.0 80 | 2,1100,1560,550.0 81 | 2,1100,1560,550.0 82 | 2,1100,1560,550.0 83 | 1,1100,2850,1100.0 84 | 1,1100,2285,1100.0 85 | 2,1090,2250,545.0 86 | 2,1068,1500,534.0 87 | 2,1050,1750,525.0 88 | 2,1050,1700,525.0 89 | 1,1050,3375,1050.0 90 | 2,1032,1625,516.0 91 | 2,1015,1747,507.5 92 | 3,1000,933,333.3 93 | 3,1000,966,333.3 94 | 2,1000,1125,500.0 95 | 2,1000,1300,500.0 96 | 2,1000,1300,500.0 97 | 2,1000,1400,500.0 98 | 2,1000,1597,500.0 99 | 2,1000,1625,500.0 100 | 2,1000,1650,500.0 101 | 2,1000,1700,500.0 102 | 1,1000,2700,1000.0 103 | 1,1000,3500,1000.0 104 | 1,1000,2285,1000.0 105 | 1,1000,2285,1000.0 106 | 1,1000,2285,1000.0 107 | 1,980,2650,980.0 108 | 2,975,1650,487.5 109 | 1,965,2495,965.0 110 | 2,960,1350,480.0 111 | 2,950,1125,475.0 112 | 2,950,1394,475.0 113 | 1,950,2200,950.0 114 | 1,950,2200,950.0 115 | 2,900,1750,450.0 116 | 2,900,1137,450.0 117 | 2,900,1200,450.0 118 | 2,900,1225,450.0 119 | 2,900,1250,450.0 120 | 2,900,1487,450.0 121 | 2,900,1487,450.0 122 | 2,900,1487,450.0 123 | 2,900,1550,450.0 124 | 2,900,1550,450.0 125 | 2,900,1600,450.0 126 | 2,900,1600,450.0 127 | 2,900,1650,450.0 128 | 2,900,1650,450.0 129 | 1,900,2000,900.0 130 | 1,900,2300,900.0 131 | 2,898,1964,449.0 132 | 2,890,1400,445.0 133 | 2,880,1475,440.0 134 | 2,875,1625,437.5 135 | 3,870,1200,290.0 136 | 1,864,3150,864.0 137 | 2,860,1700,430.0 138 | 3,851,1165,283.7 139 | 1,851,2395,851.0 140 | 1,851,2875,851.0 141 | 2,850,875,425.0 142 | 2,850,1350,425.0 143 | 2,850,1425,425.0 144 | 1,850,2100,850.0 145 | 1,850,2500,850.0 146 | 1,850,2550,850.0 147 | 1,850,2950,850.0 148 | 2,840,2075,420.0 149 | 2,825,1462,412.5 150 | 2,824,1640,412.0 151 | 2,820,1550,410.0 152 | 3,810,1100,270.0 153 | 3,800,1166,266.7 154 | 2,800,1650,400.0 155 | 2,800,750,400.0 156 | 2,800,975,400.0 157 | 2,800,1375,400.0 158 | 1,800,1900,800.0 159 | 1,800,2390,800.0 160 | 1,800,2500,800.0 161 | 1,800,2600,800.0 162 | 1,800,2750,800.0 163 | 2,790,1200,395.0 164 | 2,790,1717,395.0 165 | 2,789,1915,394.5 166 | 1,788,2595,788.0 167 | 2,784,1497,392.0 168 | 2,782,1247,391.0 169 | 2,780,1325,390.0 170 | 2,775,2000,387.5 171 | 1,773,2400,773.0 172 | 1,770,1400,770.0 173 | 1,768,2100,768.0 174 | 1,765,3257,765.0 175 | 2,760,1247,380.0 176 | 1,756,2950,756.0 177 | 2,750,1047,375.0 178 | 2,750,1050,375.0 179 | 2,750,1100,375.0 180 | 2,750,1197,375.0 181 | 2,750,1197,375.0 182 | 2,750,1197,375.0 183 | 2,750,1225,375.0 184 | 2,750,1400,375.0 185 | 1,750,1950,750.0 186 | 1,750,2000,750.0 187 | 1,750,2175,750.0 188 | 1,750,2200,750.0 189 | 1,750,2485,750.0 190 | 1,750,3250,750.0 191 | 1,750,2250,750.0 192 | 1,748,2250,748.0 193 | 1,745,2350,745.0 194 | 1,734,2875,734.0 195 | 1,734,2910,734.0 196 | 1,728,2250,728.0 197 | 1,726,2995,726.0 198 | 2,725,975,362.5 199 | 1,725,2050,725.0 200 | 1,710,2095,710.0 201 | 1,709,2800,709.0 202 | 2,707,1783,353.5 203 | 2,707,1815,353.5 204 | 2,700,1300,350.0 205 | 2,700,987,350.0 206 | 2,700,1050,350.0 207 | 2,700,1050,350.0 208 | 2,700,1097,350.0 209 | 2,700,1200,350.0 210 | 1,700,1750,700.0 211 | 1,700,2000,700.0 212 | 1,700,2200,700.0 213 | 2,680,1475,340.0 214 | 2,677,1858,338.5 215 | 1,675,2185,675.0 216 | 1,675,2595,675.0 217 | 1,672,2300,672.0 218 | 2,668,1742,334.0 219 | 2,668,1742,334.0 220 | 1,667,2095,667.0 221 | 1,660,2400,660.0 222 | 2,650,1250,325.0 223 | 2,650,1300,325.0 224 | 2,650,1300,325.0 225 | 2,650,1300,325.0 226 | 2,650,1300,325.0 227 | 2,650,1300,325.0 228 | 1,650,1795,650.0 229 | 1,650,1999,650.0 230 | 1,650,2000,650.0 231 | 1,650,2150,650.0 232 | 1,650,2400,650.0 233 | 1,650,2495,650.0 234 | 1,650,2695,650.0 235 | 1,650,1425,650.0 236 | 2,645,1500,322.5 237 | 2,644,1397,322.0 238 | 1,643,2990,643.0 239 | 2,640,1247,320.0 240 | 1,640,1780,640.0 241 | 1,640,2100,640.0 242 | 1,632,2398,632.0 243 | 2,629,1700,314.5 244 | 3,625,1000,208.3 245 | 2,625,1350,312.5 246 | 1,625,2400,625.0 247 | 2,620,1350,310.0 248 | 2,620,1350,310.0 249 | 2,620,1350,310.0 250 | 2,620,1350,310.0 251 | 2,610,1350,305.0 252 | 2,610,1350,305.0 253 | 3,600,1100,200.0 254 | 2,600,1000,300.0 255 | 2,600,1150,300.0 256 | 2,600,1347,300.0 257 | 1,600,1500,600.0 258 | 1,600,1750,600.0 259 | 1,600,1850,600.0 260 | 1,600,2641,600.0 261 | 1,600,1850,600.0 262 | 3,585,1165,195.0 263 | 1,580,1550,580.0 264 | 1,580,1880,580.0 265 | 1,565,2300,565.0 266 | 1,565,2300,565.0 267 | 1,554,2395,554.0 268 | 2,550,1150,275.0 269 | 1,550,1250,550.0 270 | 1,550,1798,550.0 271 | 1,550,1800,550.0 272 | 1,550,1895,550.0 273 | 1,548,2150,548.0 274 | 1,540,2484,540.0 275 | 1,525,1750,525.0 276 | 1,508,2723,508.0 277 | 2,500,950,250.0 278 | 1,500,1600,500.0 279 | 1,500,1795,500.0 280 | 1,500,1795,500.0 281 | 1,500,1795,500.0 282 | 1,500,1795,500.0 283 | 1,500,1795,500.0 284 | 1,500,1800,500.0 285 | 1,500,1900,500.0 286 | 1,500,1945,500.0 287 | 1,490,1600,490.0 288 | 1,450,1495,450.0 289 | 1,450,1700,450.0 290 | 1,450,2100,450.0 291 | 1,432,2100,432.0 292 | 1,420,1500,420.0 293 | 1,408,1250,408.0 294 | 1,400,1450,400.0 295 | 1,400,1500,400.0 296 | 1,400,1550,400.0 297 | 1,400,1550,400.0 298 | 1,400,1595,400.0 299 | 1,400,1650,400.0 300 | 1,400,1675,400.0 301 | 1,400,1850,400.0 302 | 1,400,2000,400.0 303 | 1,400,2350,400.0 304 | 1,386,1250,386.0 305 | 1,374,1595,374.0 306 | 1,350,1600,350.0 307 | 1,330,1795,330.0 308 | 1,300,1850,300.0 309 | 1,300,850,300.0 310 | 1,250,900,250.0 311 | 1,230,1375,230.0 312 | 1,225,1295,225.0 313 | 1,220,1050,220.0 314 | 1,200,870,200.0 315 | 1,200,1250,200.0 316 | 1,160,1300,160.0 317 | 1,153,1295,153.0 -------------------------------------------------------------------------------- /Data/Raw/generateRentCollectionCSV.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | from datetime import datetime 4 | import time 5 | 6 | """ 7 | Use rentCollectionData.json to generate a CSV that has the following fields: 8 | address, price, move-in timestamp, move-in date, bedrooms 9 | """ 10 | 11 | def genTotalCSV(jsonFile): 12 | with open(jsonFile, 'r') as raw_rent: 13 | rent_json = json.load(raw_rent) 14 | with open('rent_collection_board.csv', 'w') as rent_csv: 15 | csv_writer = csv.DictWriter(rent_csv, fieldnames=['address', 'price', 'movein_t', 'movein_d', 'bedrooms']) 16 | csv_writer.writeheader() 17 | for building in rent_json['housing']: 18 | address = building['number'] + " " + building['name'] 19 | for unit in [key for key in building.keys() if key.isdigit()]: 20 | write_dict = { 21 | "address": address, 22 | "movein_d": building[unit]['started'], 23 | "movein_t": time.mktime(datetime.strptime(building[unit]['started'], "%Y-%m-%d").timetuple()), 24 | "price": building[unit]['price'], 25 | "bedrooms": building[unit]['bedrooms'] 26 | } 27 | csv_writer.writerow(write_dict) 28 | 29 | def genAverageCSV(jsonFile): 30 | with open(jsonFile, 'r') as raw_rent: 31 | rent_json = json.load(raw_rent) 32 | with open('rent_collection_board_avg_ppr.csv', 'w') as rent_csv: 33 | csv_writer = csv.DictWriter(rent_csv, fieldnames=['address', 'avg_ppr']) 34 | csv_writer.writeheader() 35 | for building in rent_json['housing']: 36 | address = building['number'] + " " + building['name'] 37 | avg_ppr = [] 38 | for unit in [key for key in building.keys() if key.isdigit()]: 39 | dividend = building[unit]['bedrooms'] if building[unit]['bedrooms'] > 0 else 1 40 | avg_ppr += [float(building[unit]['price'])/dividend] 41 | write_dict = { 42 | "address": address, 43 | "avg_ppr": float(sum(avg_ppr))/len(avg_ppr) 44 | } 45 | csv_writer.writerow(write_dict) 46 | 47 | if __name__ == "__main__": 48 | genTotalCSV('rentCollectionData.json') 49 | genAverageCSV('rentCollectionData.json') 50 | -------------------------------------------------------------------------------- /Map/.ipynb_checkpoints/Distance Divider-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "text/html": [ 13 | "\n", 14 | "
\n", 15 | " \n", 16 | " Loading BokehJS ...\n", 17 | "
" 18 | ] 19 | }, 20 | "metadata": {}, 21 | "output_type": "display_data" 22 | }, 23 | { 24 | "data": { 25 | "application/javascript": [ 26 | "\n", 27 | "(function(global) {\n", 28 | " function now() {\n", 29 | " return new Date();\n", 30 | " }\n", 31 | "\n", 32 | " if (typeof (window._bokeh_onload_callbacks) === \"undefined\") {\n", 33 | " window._bokeh_onload_callbacks = [];\n", 34 | " }\n", 35 | "\n", 36 | " function run_callbacks() {\n", 37 | " window._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n", 38 | " delete window._bokeh_onload_callbacks\n", 39 | " console.info(\"Bokeh: all callbacks have finished\");\n", 40 | " }\n", 41 | "\n", 42 | " function load_libs(js_urls, callback) {\n", 43 | " window._bokeh_onload_callbacks.push(callback);\n", 44 | " if (window._bokeh_is_loading > 0) {\n", 45 | " console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", 46 | " return null;\n", 47 | " }\n", 48 | " if (js_urls == null || js_urls.length === 0) {\n", 49 | " run_callbacks();\n", 50 | " return null;\n", 51 | " }\n", 52 | " console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", 53 | " window._bokeh_is_loading = js_urls.length;\n", 54 | " for (var i = 0; i < js_urls.length; i++) {\n", 55 | " var url = js_urls[i];\n", 56 | " var s = document.createElement('script');\n", 57 | " s.src = url;\n", 58 | " s.async = false;\n", 59 | " s.onreadystatechange = s.onload = function() {\n", 60 | " window._bokeh_is_loading--;\n", 61 | " if (window._bokeh_is_loading === 0) {\n", 62 | " console.log(\"Bokeh: all BokehJS libraries loaded\");\n", 63 | " run_callbacks()\n", 64 | " }\n", 65 | " };\n", 66 | " s.onerror = function() {\n", 67 | " console.warn(\"failed to load library \" + url);\n", 68 | " };\n", 69 | " console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", 70 | " document.getElementsByTagName(\"head\")[0].appendChild(s);\n", 71 | " }\n", 72 | " };\n", 73 | "\n", 74 | " var js_urls = ['https://cdn.pydata.org/bokeh/release/bokeh-0.11.1.min.js', 'https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.11.1.min.js', 'https://cdn.pydata.org/bokeh/release/bokeh-compiler-0.11.1.min.js'];\n", 75 | "\n", 76 | " var inline_js = [\n", 77 | " function(Bokeh) {\n", 78 | " Bokeh.set_log_level(\"info\");\n", 79 | " },\n", 80 | " \n", 81 | " function(Bokeh) {\n", 82 | " Bokeh.$(\"#91803166-9714-4e14-8458-b3bddac7415c\").text(\"BokehJS successfully loaded\");\n", 83 | " },\n", 84 | " function(Bokeh) {\n", 85 | " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.11.1.min.css\");\n", 86 | " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.11.1.min.css\");\n", 87 | " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.11.1.min.css\");\n", 88 | " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.11.1.min.css\");\n", 89 | " }\n", 90 | " ];\n", 91 | "\n", 92 | " function run_inline_js() {\n", 93 | " for (var i = 0; i < inline_js.length; i++) {\n", 94 | " inline_js[i](window.Bokeh);\n", 95 | " }\n", 96 | " }\n", 97 | "\n", 98 | " if (window._bokeh_is_loading === 0) {\n", 99 | " console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", 100 | " run_inline_js();\n", 101 | " } else {\n", 102 | " load_libs(js_urls, function() {\n", 103 | " console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n", 104 | " run_inline_js();\n", 105 | " });\n", 106 | " }\n", 107 | "}(this));" 108 | ] 109 | }, 110 | "metadata": {}, 111 | "output_type": "display_data" 112 | } 113 | ], 114 | "source": [ 115 | "from bokeh.plotting import figure, output_file, show, ColumnDataSource\n", 116 | "from bokeh.models import HoverTool\n", 117 | "import pandas as pd\n", 118 | "from bokeh.io import output_notebook\n", 119 | "import numpy as np\n", 120 | "from bokeh.models import (\n", 121 | " GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool\n", 122 | ")\n", 123 | "from bokeh.io import output_notebook\n", 124 | "output_notebook()" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 2, 130 | "metadata": { 131 | "collapsed": true 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "df = pd.read_csv(\"final_no_outlier\")" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 16, 141 | "metadata": { 142 | "collapsed": false 143 | }, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "0.13257575757575757" 149 | ] 150 | }, 151 | "execution_count": 16, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "maxDis = df[\"edge_distance\"].max() * 5280\n", 158 | "n = 10\n", 159 | "\n", 160 | "d = 700\n", 161 | "i = 1\n", 162 | "\n", 163 | "D1 = df[df[\"edge_distance\"] <= (700/5280)]\n", 164 | "D2 = df[(df[\"edge_distance\"] <= (700*2/5280)) & (df[\"edge_distance\"] >= (700/5280))]\n", 165 | "D1400 = df[(df[\"edge_distance\"] <= (700*2/5280)) & (df[\"edge_distance\"] >= (700/5280))]\n" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": { 172 | "collapsed": true 173 | }, 174 | "outputs": [], 175 | "source": [] 176 | } 177 | ], 178 | "metadata": { 179 | "kernelspec": { 180 | "display_name": "Python 3", 181 | "language": "python", 182 | "name": "python3" 183 | }, 184 | "language_info": { 185 | "codemirror_mode": { 186 | "name": "ipython", 187 | "version": 3 188 | }, 189 | "file_extension": ".py", 190 | "mimetype": "text/x-python", 191 | "name": "python", 192 | "nbconvert_exporter": "python", 193 | "pygments_lexer": "ipython3", 194 | "version": "3.5.1" 195 | } 196 | }, 197 | "nbformat": 4, 198 | "nbformat_minor": 0 199 | } 200 | -------------------------------------------------------------------------------- /Map/.ipynb_checkpoints/Selection Box-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 0 6 | } 7 | -------------------------------------------------------------------------------- /Map/Area Divider/Area Divider.kml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Area Divider 5 | 6 | NorthSide 7 | ($173 more than the Berkeley average)
* Room Size: 470.0 sqft
* Distance to Campus: 0.61 miles
* Number of Bedrooms: 1.74
* Restaurant Ratings: 4.05 out of 5 stars
* 4.42 Cafes around
* 1.52 Pubs around]]>
8 | #poly-F4EB37-1-76 9 | 10 | 11 | 12 | 1 13 | -122.283501,37.880567,0.0 -122.2822952,37.8721433,0.0 -122.25646019999999,37.8757341,0.0 -122.26349100000002,37.883039,0.0 -122.283501,37.880567,0.0 14 | 15 | 16 | 17 |
18 | 19 | Far NorthSide 20 | ($52 less than the Berkeley campus average)
* Room Size: 537.5 sqft
* Distance to Campus: 1.67 miles
* Number of Bedrooms: 2.08
* Restaurant Ratings: 4.05 out of 5 stars
* 3.28 Cafes around
* 1.48 Pubs around]]>
21 | #poly-C6A4CF-0-143 22 | 23 | 24 | 25 | 1 26 | -122.30323790000001,37.8779699,0.0 -122.26349100000002,37.883039,0.0 -122.26456399999998,37.891353,0.0 -122.30581279999998,37.887454,0.0 -122.30323790000001,37.8779699,0.0 27 | 28 | 29 | 30 |
31 | 32 | Far SouthSide 33 | ($67 less than the Berkeley average)
* Room Size: 500.0 sqft
* Distance to Campus: 2.35 miles
* Number of Rooms shared in the Apartment: 1.84
* Restaurant Ratings: 4.0 out of 5 stars
* 3.72 Cafes around
* 1.36 Pubs around]]>
34 | #poly-62AF44-0-122 35 | 36 | 37 | 38 | 1 39 | -122.29482649999999,37.850594599999994,0.0 -122.2916508,37.83886900000001,0.0 -122.24255559999999,37.84679920000001,0.0 -122.24540999999999,37.85843700000001,0.0 -122.29482649999999,37.850594599999994,0.0 40 | 41 | 42 | 43 |
44 | 45 | South West 46 | ($36 less than the Berkeley average)
* Room Size: 475.0 sqft
* Distance to Campus: 1.27 miles
* Number of Bedrooms: 1.7
* Restaurant Ratings: 4.075 out of 5 stars
* 2.58 Cafes around
* 1.66 Pubs around]]>
47 | #poly-A61B4A-0-125 48 | 49 | 50 | 51 | 1 52 | -122.28126530000002,37.866113,0.0 -122.2790337,37.8531361,0.0 -122.265888,37.855198,0.0 -122.266416,37.867565,0.0 -122.28126530000002,37.866113,0.0 53 | 54 | 55 | 56 |
57 | 58 | SouthSide 59 | ($88.5 more than the Berkeley average)
* Room Size: 438.0 sqft
* Distance to Campus: 0.69 miles
* Number of Bedrooms: 1.97
* Restaurant Ratings: 4.025 out of 5 stars
* 1.51 Cafes around
* 1.5 Pubs around]]>
60 | #poly-F8971B-0-120 61 | 62 | 63 | 64 | 1 65 | -122.266416,37.867565,0.0 -122.265888,37.855198,0.0 -122.24540999999999,37.85843700000001,0.0 -122.2507095,37.8699752,0.0 -122.266416,37.867565,0.0 66 | 67 | 68 | 69 |
70 | 71 | WestSide 72 | ($107 less than the Berkeley average)
* Room Size: 375.0 sqft
* Distance to Campus: 0.75 miles
* Number of Bedrooms: 1.42
* Restaurant Ratings: 3.975 out of 5 stars
* 2.98 Cafes around
* 1.73 Pubs around]]>
73 | #poly-0BA9CC-1-105 74 | 75 | 76 | 77 | 1 78 | -122.2822952,37.8721433,0.0 -122.28126530000002,37.866113,0.0 -122.2657299,37.8677392,0.0 -122.26628900000001,37.87418,0.0 -122.2822952,37.8721433,0.0 79 | 80 | 81 | 82 |
83 | 84 | Far WestSide 85 | ($127 less than the Berkeley average)
* Room Size: 494.0 sqft
* Distance to Campus: 2.23 miles
* Number of Bedrooms: 1.55
* Restaurant Ratings: 4.025 out of 5 stars
* 3.97 Cafes around
* 1.52 Pubs around]]>
86 | #poly-795046-0-112 87 | 88 | 89 | 90 | 1 91 | -122.30323790000001,37.8779699,0.0 -122.29482649999999,37.850594599999994,0.0 -122.2790337,37.8531361,0.0 -122.283501,37.880567,0.0 -122.30323790000001,37.8779699,0.0 92 | 93 | 94 | 95 |
96 | 107 | 118 | 119 | 120 | normal 121 | #poly-0BA9CC-1-105-normal 122 | 123 | 124 | highlight 125 | #poly-0BA9CC-1-105-highlight 126 | 127 | 128 | 139 | 150 | 151 | 152 | normal 153 | #poly-62AF44-0-122-normal 154 | 155 | 156 | highlight 157 | #poly-62AF44-0-122-highlight 158 | 159 | 160 | 171 | 182 | 183 | 184 | normal 185 | #poly-795046-0-112-normal 186 | 187 | 188 | highlight 189 | #poly-795046-0-112-highlight 190 | 191 | 192 | 203 | 214 | 215 | 216 | normal 217 | #poly-A61B4A-0-125-normal 218 | 219 | 220 | highlight 221 | #poly-A61B4A-0-125-highlight 222 | 223 | 224 | 235 | 246 | 247 | 248 | normal 249 | #poly-C6A4CF-0-143-normal 250 | 251 | 252 | highlight 253 | #poly-C6A4CF-0-143-highlight 254 | 255 | 256 | 267 | 278 | 279 | 280 | normal 281 | #poly-F4EB37-1-76-normal 282 | 283 | 284 | highlight 285 | #poly-F4EB37-1-76-highlight 286 | 287 | 288 | 299 | 310 | 311 | 312 | normal 313 | #poly-F8971B-0-120-normal 314 | 315 | 316 | highlight 317 | #poly-F8971B-0-120-highlight 318 | 319 | 320 |
321 |
-------------------------------------------------------------------------------- /Map/Area Divider/Distance Divider.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "text/html": [ 13 | "\n", 14 | "
\n", 15 | " \n", 16 | " Loading BokehJS ...\n", 17 | "
" 18 | ] 19 | }, 20 | "metadata": {}, 21 | "output_type": "display_data" 22 | }, 23 | { 24 | "data": { 25 | "application/javascript": [ 26 | "\n", 27 | "(function(global) {\n", 28 | " function now() {\n", 29 | " return new Date();\n", 30 | " }\n", 31 | "\n", 32 | " if (typeof (window._bokeh_onload_callbacks) === \"undefined\") {\n", 33 | " window._bokeh_onload_callbacks = [];\n", 34 | " }\n", 35 | "\n", 36 | " function run_callbacks() {\n", 37 | " window._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n", 38 | " delete window._bokeh_onload_callbacks\n", 39 | " console.info(\"Bokeh: all callbacks have finished\");\n", 40 | " }\n", 41 | "\n", 42 | " function load_libs(js_urls, callback) {\n", 43 | " window._bokeh_onload_callbacks.push(callback);\n", 44 | " if (window._bokeh_is_loading > 0) {\n", 45 | " console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", 46 | " return null;\n", 47 | " }\n", 48 | " if (js_urls == null || js_urls.length === 0) {\n", 49 | " run_callbacks();\n", 50 | " return null;\n", 51 | " }\n", 52 | " console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", 53 | " window._bokeh_is_loading = js_urls.length;\n", 54 | " for (var i = 0; i < js_urls.length; i++) {\n", 55 | " var url = js_urls[i];\n", 56 | " var s = document.createElement('script');\n", 57 | " s.src = url;\n", 58 | " s.async = false;\n", 59 | " s.onreadystatechange = s.onload = function() {\n", 60 | " window._bokeh_is_loading--;\n", 61 | " if (window._bokeh_is_loading === 0) {\n", 62 | " console.log(\"Bokeh: all BokehJS libraries loaded\");\n", 63 | " run_callbacks()\n", 64 | " }\n", 65 | " };\n", 66 | " s.onerror = function() {\n", 67 | " console.warn(\"failed to load library \" + url);\n", 68 | " };\n", 69 | " console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", 70 | " document.getElementsByTagName(\"head\")[0].appendChild(s);\n", 71 | " }\n", 72 | " };\n", 73 | "\n", 74 | " var js_urls = ['https://cdn.pydata.org/bokeh/release/bokeh-0.11.1.min.js', 'https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.11.1.min.js', 'https://cdn.pydata.org/bokeh/release/bokeh-compiler-0.11.1.min.js'];\n", 75 | "\n", 76 | " var inline_js = [\n", 77 | " function(Bokeh) {\n", 78 | " Bokeh.set_log_level(\"info\");\n", 79 | " },\n", 80 | " \n", 81 | " function(Bokeh) {\n", 82 | " Bokeh.$(\"#7b27d152-d18d-4442-854a-a1cc6b1feb69\").text(\"BokehJS successfully loaded\");\n", 83 | " },\n", 84 | " function(Bokeh) {\n", 85 | " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.11.1.min.css\");\n", 86 | " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.11.1.min.css\");\n", 87 | " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.11.1.min.css\");\n", 88 | " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.11.1.min.css\");\n", 89 | " }\n", 90 | " ];\n", 91 | "\n", 92 | " function run_inline_js() {\n", 93 | " for (var i = 0; i < inline_js.length; i++) {\n", 94 | " inline_js[i](window.Bokeh);\n", 95 | " }\n", 96 | " }\n", 97 | "\n", 98 | " if (window._bokeh_is_loading === 0) {\n", 99 | " console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", 100 | " run_inline_js();\n", 101 | " } else {\n", 102 | " load_libs(js_urls, function() {\n", 103 | " console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n", 104 | " run_inline_js();\n", 105 | " });\n", 106 | " }\n", 107 | "}(this));" 108 | ] 109 | }, 110 | "metadata": {}, 111 | "output_type": "display_data" 112 | } 113 | ], 114 | "source": [ 115 | "from bokeh.plotting import figure, output_file, show, ColumnDataSource\n", 116 | "from bokeh.models import HoverTool\n", 117 | "import pandas as pd\n", 118 | "from bokeh.io import output_notebook\n", 119 | "import numpy as np\n", 120 | "from bokeh.models import (\n", 121 | " GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool\n", 122 | ")\n", 123 | "from bokeh.io import output_notebook\n", 124 | "output_notebook()" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 2, 130 | "metadata": { 131 | "collapsed": true 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "df = pd.read_csv(\"final_no_outlier\")\n" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 3, 141 | "metadata": { 142 | "collapsed": false 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "# Data for each layer\n", 147 | "\n", 148 | "D1 = df[df[\"edge_distance\"] <= (700/5280)]\n", 149 | "D2 = df[(df[\"edge_distance\"] <= (700*2/5280)) & (df[\"edge_distance\"] >= (700/5280))]\n", 150 | "D3 = df[(df[\"edge_distance\"] <= (700*3/5280)) & (df[\"edge_distance\"] >= (700*2/5280))]\n", 151 | "D4 = df[(df[\"edge_distance\"] <= (700*4/5280)) & (df[\"edge_distance\"] >= (700*3/5280))]\n", 152 | "D5 = df[(df[\"edge_distance\"] <= (700*5/5280)) & (df[\"edge_distance\"] >= (700*4/5280))]\n", 153 | "D6 = df[(df[\"edge_distance\"] <= (700*6/5280)) & (df[\"edge_distance\"] >= (700*5/5280))]\n", 154 | "D7 = df[(df[\"edge_distance\"] <= (700*7/5280)) & (df[\"edge_distance\"] >= (700*6/5280))]\n", 155 | "D8 = df[(df[\"edge_distance\"] <= (700*8/5280)) & (df[\"edge_distance\"] >= (700*7/5280))]\n", 156 | "D9 = df[(df[\"edge_distance\"] <= (700*9/5280)) & (df[\"edge_distance\"] >= (700*8/5280))]\n", 157 | "D10 = df[(df[\"edge_distance\"] >= (700*9/5280)) & (df[\"edge_distance\"] <= 2.06)]\n", 158 | "\n", 159 | "p = (D10[\"price per room\"].median()+D9[\"price per room\"].median()+D8[\"price per room\"].median()+D7[\"price per room\"].median()+D6[\"price per room\"].median()+D5[\"price per room\"].median()+D4[\"price per room\"].median()+D3[\"price per room\"].median()+D2[\"price per room\"].median()+D1[\"price per room\"].median())/10\n", 160 | "p1 = df[\"price per room\"].median()\n", 161 | "p2 = df[\"price per room\"].mean()\n", 162 | "p3 = (D10[\"price per room\"].mean()+D9[\"price per room\"].mean()+D8[\"price per room\"].mean()+D7[\"price per room\"].mean()+D6[\"price per room\"].mean()+D5[\"price per room\"].mean()+D4[\"price per room\"].mean()+D3[\"price per room\"].mean()+D2[\"price per room\"].mean()+D1[\"price per room\"].mean())/10" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 4, 168 | "metadata": { 169 | "collapsed": false 170 | }, 171 | "outputs": [ 172 | { 173 | "name": "stdout", 174 | "output_type": "stream", 175 | "text": [ 176 | "* Room Price: $ 1785.0\n", 177 | "($17 less than the Berkeley average)\n", 178 | "* Room Size: 62.4 sqft\n", 179 | "* Number of Bedrooms: 0.41\n", 180 | "* Restaurant Rating: 4.02 out of 5 stars\n", 181 | "* 2.59 Cafes\n", 182 | "* 1.35 Pubs\n" 183 | ] 184 | } 185 | ], 186 | "source": [ 187 | "#D1\n", 188 | "print(\"* Room Price: $\", D1[\"price per room\"].median())\n", 189 | "print(\"($17 less than the Berkeley average)\")\n", 190 | "print(\"* Room Size: \", round(D1[\"sqft per room\"].mean(), 2), \"sqft\")\n", 191 | "print(\"* Number of Bedrooms: \", round(D1[\"bedrooms\"].mean(), 2))\n", 192 | "print(\"* Restaurant Rating: \", round(D1[\"avg_rating\"].mean(), 2), \"out of 5 stars\")\n", 193 | "print(\"*\", round(D1[\"#cafes\"].mean(), 2), 'Cafes')\n", 194 | "print(\"*\", round(D1[\"#pubs\"].mean(), 2), 'Pubs')" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 17, 200 | "metadata": { 201 | "collapsed": false 202 | }, 203 | "outputs": [ 204 | { 205 | "name": "stdout", 206 | "output_type": "stream", 207 | "text": [ 208 | "3 17.0\n", 209 | "2 23.0\n", 210 | "6 -113.0\n", 211 | "4 -16.0\n", 212 | "10 -190.5\n", 213 | "9 -148.0\n", 214 | "5 -74.5\n", 215 | "7 -125.0\n", 216 | "8 -126.0\n", 217 | "1 52.0\n" 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "print('3',p1 - D1[\"price per room\"].median())\n", 223 | "print('2',p1 - D2[\"price per room\"].median())\n", 224 | "print('6',p1 - D3[\"price per room\"].median())\n", 225 | "print('4',p1 - D4[\"price per room\"].median())\n", 226 | "print('10',p1 - D5[\"price per room\"].median())\n", 227 | "print('9',p1 - D6[\"price per room\"].median())\n", 228 | "print('5',p1 - D7[\"price per room\"].median())\n", 229 | "print('7',p1 - D8[\"price per room\"].median())\n", 230 | "print('8',p1 - D9[\"price per room\"].median())\n", 231 | "print('1',p1 - D10[\"price per room\"].median())" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 6, 237 | "metadata": { 238 | "collapsed": false 239 | }, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "* Room Price: $ 1779.0\n", 246 | "($23 less than the Berkeley average)\n", 247 | "* Room Size: 114.9 sqft\n", 248 | "* Number of Bedrooms: 0.73\n", 249 | "* Restaurant Rating: 4.07 out of 5 stars\n", 250 | "* 3.59 Cafes\n", 251 | "* 0.51 Pubs\n" 252 | ] 253 | } 254 | ], 255 | "source": [ 256 | "#D2\n", 257 | "print(\"* Room Price: $\", D2[\"price per room\"].median())\n", 258 | "print(\"($23 less than the Berkeley average)\")\n", 259 | "print(\"* Room Size: \", round(D2[\"sqft per room\"].mean(), 2), \"sqft\")\n", 260 | "print(\"* Number of Bedrooms: \", round(D2[\"bedrooms\"].mean(), 2))\n", 261 | "print(\"* Restaurant Rating: \", round(D2[\"avg_rating\"].mean(), 2), \"out of 5 stars\")\n", 262 | "print(\"*\", round(D2[\"#cafes\"].mean(), 2), 'Cafes')\n", 263 | "print(\"*\", round(D2[\"#pubs\"].mean(), 2), 'Pubs')" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 11, 269 | "metadata": { 270 | "collapsed": false 271 | }, 272 | "outputs": [ 273 | { 274 | "name": "stdout", 275 | "output_type": "stream", 276 | "text": [ 277 | "* Room Price: $ 1915.0\n", 278 | "($113 more than the Berkeley average)\n", 279 | "* Room Size: 239.42 sqft\n", 280 | "* Number of Bedrooms: 0.36\n", 281 | "* Restaurant Rating: 4.05 out of 5 stars\n", 282 | "* 3.63 Cafes\n", 283 | "* 0.51 Pubs\n" 284 | ] 285 | } 286 | ], 287 | "source": [ 288 | "#D3\n", 289 | "print(\"* Room Price: $\", D3[\"price per room\"].median())\n", 290 | "print(\"($113 more than the Berkeley average)\")\n", 291 | "print(\"* Room Size: \", round(D3[\"sqft per room\"].mean(), 2), \"sqft\")\n", 292 | "print(\"* Number of Bedrooms: \", round(D3[\"bedrooms\"].mean(), 2))\n", 293 | "print(\"* Restaurant Rating: \", round(D3[\"avg_rating\"].mean(), 2), \"out of 5 stars\")\n", 294 | "print(\"*\", round(D3[\"#cafes\"].mean(), 2), 'Cafes')\n", 295 | "print(\"*\", round(D3[\"#pubs\"].mean(), 2), 'Pubs')" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 8, 301 | "metadata": { 302 | "collapsed": false 303 | }, 304 | "outputs": [ 305 | { 306 | "name": "stdout", 307 | "output_type": "stream", 308 | "text": [ 309 | "* Room Price: $ 1818.0\n", 310 | "($16 more than the Berkeley average)\n", 311 | "* Room Size: 278.33 sqft\n", 312 | "* Number of Bedrooms: 0.47\n", 313 | "* Restaurant Rating: 4.04 out of 5 stars\n", 314 | "* 2.31 Cafes\n", 315 | "* 0.46 Pubs\n" 316 | ] 317 | } 318 | ], 319 | "source": [ 320 | "#D4\n", 321 | "print(\"* Room Price: $\", D4[\"price per room\"].median())\n", 322 | "print(\"($16 more than the Berkeley average)\")\n", 323 | "print(\"* Room Size: \", round(D4[\"sqft per room\"].mean(), 2), \"sqft\")\n", 324 | "print(\"* Number of Bedrooms: \", round(D4[\"bedrooms\"].mean(), 2))\n", 325 | "print(\"* Restaurant Rating: \", round(D4[\"avg_rating\"].mean(), 2), \"out of 5 stars\")\n", 326 | "print(\"*\", round(D4[\"#cafes\"].mean(), 2), 'Cafes')\n", 327 | "print(\"*\", round(D4[\"#pubs\"].mean(), 2), 'Pubs')" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 9, 333 | "metadata": { 334 | "collapsed": false 335 | }, 336 | "outputs": [ 337 | { 338 | "name": "stdout", 339 | "output_type": "stream", 340 | "text": [ 341 | "* Room Price: $ 1992.5\n", 342 | "($190.5 more than the Berkeley average)\n", 343 | "* Room Size: 287.95 sqft\n", 344 | "* Number of Bedrooms: 0.39\n", 345 | "* Restaurant Rating: 4.04 out of 5 stars\n", 346 | "* 1.98 Cafes\n", 347 | "* 0.56 Pubs\n" 348 | ] 349 | } 350 | ], 351 | "source": [ 352 | "#D5\n", 353 | "print(\"* Room Price: $\", D5[\"price per room\"].median())\n", 354 | "print(\"($190.5 more than the Berkeley average)\")\n", 355 | "print(\"* Room Size: \", round(D5[\"sqft per room\"].mean(), 2), \"sqft\")\n", 356 | "print(\"* Number of Bedrooms: \", round(D5[\"bedrooms\"].mean(), 2))\n", 357 | "print(\"* Restaurant Rating: \", round(D5[\"avg_rating\"].mean(), 2), \"out of 5 stars\")\n", 358 | "print(\"*\", round(D5[\"#cafes\"].mean(), 2), 'Cafes')\n", 359 | "print(\"*\", round(D5[\"#pubs\"].mean(), 2), 'Pubs')" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": 12, 365 | "metadata": { 366 | "collapsed": false 367 | }, 368 | "outputs": [ 369 | { 370 | "name": "stdout", 371 | "output_type": "stream", 372 | "text": [ 373 | "* Room Price: $ 1950.0\n", 374 | "($148 more than the Berkeley average)\n", 375 | "* Room Size: 202.18 sqft\n", 376 | "* Number of Bedrooms: 0.48\n", 377 | "* Restaurant Rating: 4.03 out of 5 stars\n", 378 | "* 1.84 Cafes\n", 379 | "* 0.69 Pubs\n" 380 | ] 381 | } 382 | ], 383 | "source": [ 384 | "#D6\n", 385 | "print(\"* Room Price: $\", D6[\"price per room\"].median())\n", 386 | "print(\"($148 more than the Berkeley average)\")\n", 387 | "print(\"* Room Size: \", round(D6[\"sqft per room\"].mean(), 2), \"sqft\")\n", 388 | "print(\"* Number of Bedrooms: \", round(D6[\"bedrooms\"].mean(), 2))\n", 389 | "print(\"* Restaurant Rating: \", round(D6[\"avg_rating\"].mean(), 2), \"out of 5 stars\")\n", 390 | "print(\"*\", round(D6[\"#cafes\"].mean(), 2), 'Cafes')\n", 391 | "print(\"*\", round(D6[\"#pubs\"].mean(), 2), 'Pubs')" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 13, 397 | "metadata": { 398 | "collapsed": false, 399 | "scrolled": true 400 | }, 401 | "outputs": [ 402 | { 403 | "name": "stdout", 404 | "output_type": "stream", 405 | "text": [ 406 | "* Room Price: $ 1876.5\n", 407 | "($74.5 more than the Berkeley average)\n", 408 | "* Room Size: 189.48 sqft\n", 409 | "* Number of Bedrooms: 0.29\n", 410 | "* Restaurant Rating: 4.03 out of 5 stars\n", 411 | "* 2.16 Cafes\n", 412 | "* 0.75 Pubs\n" 413 | ] 414 | } 415 | ], 416 | "source": [ 417 | "#D7\n", 418 | "print(\"* Room Price: $\", D7[\"price per room\"].median())\n", 419 | "print(\"($74.5 more than the Berkeley average)\")\n", 420 | "print(\"* Room Size: \", round(D7[\"sqft per room\"].mean(), 2), \"sqft\")\n", 421 | "print(\"* Number of Bedrooms: \", round(D7[\"bedrooms\"].mean(), 2))\n", 422 | "print(\"* Restaurant Rating: \", round(D7[\"avg_rating\"].mean(), 2), \"out of 5 stars\")\n", 423 | "print(\"*\", round(D7[\"#cafes\"].mean(), 2), 'Cafes')\n", 424 | "print(\"*\", round(D7[\"#pubs\"].mean(), 2), 'Pubs')" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": 14, 430 | "metadata": { 431 | "collapsed": false, 432 | "scrolled": true 433 | }, 434 | "outputs": [ 435 | { 436 | "name": "stdout", 437 | "output_type": "stream", 438 | "text": [ 439 | "* Room Price: $ 1927.0\n", 440 | "($125 more than the Berkeley average)\n", 441 | "* Room Size: 209.43 sqft\n", 442 | "* Number of Bedrooms: 0.37\n", 443 | "* Restaurant Rating: 4.02 out of 5 stars\n", 444 | "* 2.53 Cafes\n", 445 | "* 0.69 Pubs\n" 446 | ] 447 | } 448 | ], 449 | "source": [ 450 | "#D8\n", 451 | "print(\"* Room Price: $\", D8[\"price per room\"].median())\n", 452 | "print(\"($125 more than the Berkeley average)\")\n", 453 | "print(\"* Room Size: \", round(D8[\"sqft per room\"].mean(), 2), \"sqft\")\n", 454 | "print(\"* Number of Bedrooms: \", round(D8[\"bedrooms\"].mean(), 2))\n", 455 | "print(\"* Restaurant Rating: \", round(D8[\"avg_rating\"].mean(), 2), \"out of 5 stars\")\n", 456 | "print(\"*\", round(D8[\"#cafes\"].mean(), 2), 'Cafes')\n", 457 | "print(\"*\", round(D8[\"#pubs\"].mean(), 2), 'Pubs')" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": 15, 463 | "metadata": { 464 | "collapsed": false 465 | }, 466 | "outputs": [ 467 | { 468 | "name": "stdout", 469 | "output_type": "stream", 470 | "text": [ 471 | "* Room Price: $ 1928.0\n", 472 | "($126 more than the Berkeley average)\n", 473 | "* Room Size: 150.0 sqft\n", 474 | "* Number of Bedrooms: 0.31\n", 475 | "* Restaurant Rating: 4.02 out of 5 stars\n", 476 | "* 2.77 Cafes\n", 477 | "* 0.63 Pubs\n" 478 | ] 479 | } 480 | ], 481 | "source": [ 482 | "#D9\n", 483 | "print(\"* Room Price: $\", D9[\"price per room\"].median())\n", 484 | "print(\"($126 more than the Berkeley average)\")\n", 485 | "print(\"* Room Size: \", round(D9[\"sqft per room\"].mean(), 2), \"sqft\")\n", 486 | "print(\"* Number of Bedrooms: \", round(D9[\"bedrooms\"].mean(), 2))\n", 487 | "print(\"* Restaurant Rating: \", round(D9[\"avg_rating\"].mean(), 2), \"out of 5 stars\")\n", 488 | "print(\"*\", round(D9[\"#cafes\"].mean(), 2), 'Cafes')\n", 489 | "print(\"*\", round(D9[\"#pubs\"].mean(), 2), 'Pubs')" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 16, 495 | "metadata": { 496 | "collapsed": false 497 | }, 498 | "outputs": [ 499 | { 500 | "name": "stdout", 501 | "output_type": "stream", 502 | "text": [ 503 | "* Room Price: $ 1750.0\n", 504 | "($52 more than the Berkeley average)\n", 505 | "* Room Size: 257.66 sqft\n", 506 | "* Number of Bedrooms: 0.34\n", 507 | "* Restaurant Rating: 4.03 out of 5 stars\n", 508 | "* 3.1 Cafes\n", 509 | "* 0.85 Pubs\n" 510 | ] 511 | } 512 | ], 513 | "source": [ 514 | "#D10\n", 515 | "print(\"* Room Price: $\", D10[\"price per room\"].median())\n", 516 | "print(\"($52 more than the Berkeley average)\")\n", 517 | "print(\"* Room Size: \", round(D10[\"sqft per room\"].mean(), 2), \"sqft\")\n", 518 | "print(\"* Number of Bedrooms: \", round(D10[\"bedrooms\"].mean(), 2))\n", 519 | "print(\"* Restaurant Rating: \", round(D10[\"avg_rating\"].mean(), 2), \"out of 5 stars\")\n", 520 | "print(\"*\", round(D10[\"#cafes\"].mean(), 2), 'Cafes')\n", 521 | "print(\"*\", round(D10[\"#pubs\"].mean(), 2), 'Pubs')" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": null, 527 | "metadata": { 528 | "collapsed": true 529 | }, 530 | "outputs": [], 531 | "source": [] 532 | } 533 | ], 534 | "metadata": { 535 | "kernelspec": { 536 | "display_name": "Python 3", 537 | "language": "python", 538 | "name": "python3" 539 | }, 540 | "language_info": { 541 | "codemirror_mode": { 542 | "name": "ipython", 543 | "version": 3 544 | }, 545 | "file_extension": ".py", 546 | "mimetype": "text/x-python", 547 | "name": "python", 548 | "nbconvert_exporter": "python", 549 | "pygments_lexer": "ipython3", 550 | "version": "3.5.1" 551 | } 552 | }, 553 | "nbformat": 4, 554 | "nbformat_minor": 0 555 | } 556 | -------------------------------------------------------------------------------- /Map/Area Divider/Distance Divider.kml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Distance Divider 5 | 6 | Campus bound 7 | #line-000000-1-nodesc 8 | 9 | 1 10 | -122.25568770000001,37.8756664,0.0 -122.2660303,37.8740404,0.0 -122.2653437,37.8679764,0.0 -122.25195410000002,37.8698058,0.0 11 | 12 | 13 | 14 | < 0.13 miles to Campus 15 | ($17 less than the Berkeley average)
* Room Size: 62.4 sqft
* Number of Bedrooms: 0.41
* Restaurant Rating: 4.02 out of 5 stars
* 2.59 Cafes
* 1.35 Pubs]]>
16 | #poly-F4EB37-0-112 17 | 18 | 19 | 20 | 1 21 | -122.25618120000001,37.8775634,0.0 -122.26890560000001,37.8756495,0.0 -122.2686481,37.8756833,0.0 -122.2677255,37.8657234,0.0 -122.2500014,37.8680102,0.0 -122.25195410000002,37.86980580000001,0.0 -122.2653437,37.86797640000001,0.0 -122.2660303,37.8740404,0.0 -122.25568770000001,37.8756664,0.0 -122.25618120000001,37.8775634,0.0 22 | 23 | 24 | 25 |
26 | 27 | < 0.27 miles from Campus  28 | ($23 less than the Berkeley average)
* Room Size: 114.9 sqft
* Number of Bedrooms: 0.73
* Restaurant Rating: 4.07 out of 5 stars
* 3.59 Cafes
* 0.51 Pubs]]>
29 | #poly-F4EB37-1-255 30 | 31 | 32 | 33 | 1 34 | -122.25667479999998,37.8794603,0.0 -122.2711802,37.8773263,0.0 -122.2701502,37.8634704,0.0 -122.24800590000001,37.8662824,0.0 -122.2500014,37.8680102,0.0 -122.2678757,37.865570999999996,0.0 -122.2686481,37.8756833,0.0 -122.25618120000001,37.8775634,0.0 -122.25667479999998,37.8794603,0.0 35 | 36 | 37 | 38 |
39 | 40 | < 0.39 miles from Campus 41 | ($113 more than the Berkeley average)
* Room Size: 239.42 sqft
* Number of Bedrooms: 0.36
* Restaurant Rating: 4.05 out of 5 stars
* 3.63 Cafes
* 0.51 Pubs]]>
42 | #poly-F4B400-1-130 43 | 44 | 45 | 46 | 1 47 | -122.2569966,37.881408,0.0 -122.27379800000001,37.8790538,0.0 -122.27272510000002,37.8612003,0.0 -122.24590300000001,37.8643513,0.0 -122.24800590000001,37.8662824,0.0 -122.2701502,37.8634704,0.0 -122.2711802,37.8773263,0.0 -122.25667479999998,37.8794603,0.0 -122.2569966,37.881408,0.0 48 | 49 | 50 | 51 |
52 | 53 | < 0.53 miles from Campus 54 | ($16 more than the Berkeley average)
* Room Size: 278.33 sqft
* Number of Bedrooms: 0.47
* Restaurant Rating: 4.04 out of 5 stars
* 2.31 Cafes
* 0.46 Pubs]]>
55 | #poly-F4EB37-1-191 56 | 57 | 58 | 59 | 1 60 | -122.2604513,37.8608615,0.0 -122.24448680000002,37.8626911,0.0 -122.24590300000001,37.8643513,0.0 -122.27272510000002,37.8612003,0.0 -122.27379800000001,37.8790538,0.0 -122.2569966,37.881408,0.0 -122.25761890000001,37.88327090000001,0.0 -122.27615830000002,37.8807136,0.0 -122.2750854,37.8589641,0.0 -122.2604513,37.8608615,0.0 61 | 62 | 63 | 64 |
65 | 66 | < 0.66 miles from Campus 67 | ($190.5 more than the Berkeley average)
* Room Size: 287.95 sqft
* Number of Bedrooms: 0.39
* Restaurant Rating: 4.04 out of 5 stars
* 1.98 Cafes
* 0.56 Pubs]]>
68 | #poly-DB4436-0-201 69 | 70 | 71 | 72 | 1 73 | -122.25761890000001,37.88327090000001,0.0 -122.258091,37.8850153,0.0 -122.278862,37.88210240000001,0.0 -122.277832,37.8567617,0.0 -122.2428989,37.8608954,0.0 -122.24448680000002,37.8626911,0.0 -122.2750854,37.8589641,0.0 -122.27615830000002,37.8807136,0.0 -122.25761890000001,37.88327090000001,0.0 74 | 75 | 76 | 77 |
78 | 79 | < 0.80 miles from Campus 80 | ($148 more than the Berkeley average)
* Room Size: 202.18 sqft
* Number of Bedrooms: 0.48
* Restaurant Rating: 4.03 out of 5 stars
* 1.84 Cafes
* 0.69 Pubs]]>
81 | #poly-DB4436-0-130 82 | 83 | 84 | 85 | 1 86 | -122.258091,37.8850153,0.0 -122.2586918,37.8870475,0.0 -122.28143690000002,37.8839314,0.0 -122.28023530000002,37.8542543,0.0 -122.24101070000002,37.8588625,0.0 -122.2428989,37.8608954,0.0 -122.277832,37.8567617,0.0 -122.278862,37.88210240000001,0.0 -122.258091,37.8850153,0.0 87 | 88 | 89 | 90 |
91 | 92 | < 0.93 miles from Campus 93 | ($74.5 more than the Berkeley average)
* Room Size: 189.48 sqft
* Number of Bedrooms: 0.29
* Restaurant Rating: 4.03 out of 5 stars
* 2.16 Cafes
* 0.75 Pubs]]>
94 | #poly-F8971B-1-150 95 | 96 | 97 | 98 | 1 99 | -122.2593784,37.888741,0.0 -122.2841835,37.8854895,0.0 -122.28349690000002,37.8855572,0.0 -122.28246690000002,37.852288900000005,0.0 -122.2397232,37.8572361,0.0 -122.24101070000002,37.8588625,0.0 -122.28023530000002,37.8542543,0.0 -122.28143690000002,37.8839314,0.0 -122.2586918,37.8870475,0.0 -122.2593784,37.888741,0.0 100 | 101 | 102 | 103 |
104 | 105 | < 1.06 miles from Campus 106 | ($125 more than the Berkeley average)
* Room Size: 209.43 sqft
* Number of Bedrooms: 0.37
* Restaurant Rating: 4.02 out of 5 stars
* 2.53 Cafes
* 0.69 Pubs]]>
107 | #poly-F8971B-0-171 108 | 109 | 110 | 111 | 1 112 | -122.2603226,37.8906376,0.0 -122.2860718,37.8875217,0.0 -122.28495600000001,37.8500524,0.0 -122.23792080000001,37.8552708,0.0 -122.2397232,37.8572361,0.0 -122.28246690000002,37.852288900000005,0.0 -122.28349690000002,37.8855572,0.0 -122.2593784,37.888741,0.0 -122.2603226,37.8906376,0.0 113 | 114 | 115 | 116 |
117 | 118 | < 1.19 miles from Campus 119 | ($126 more than the Berkeley average)
* Room Size: 150.0 sqft
* Number of Bedrooms: 0.31
* Restaurant Rating: 4.02 out of 5 stars
* 2.77 Cafes
* 0.63 Pubs]]>
120 | #poly-DB4436-0-74 121 | 122 | 123 | 124 | 1 125 | -122.2885609,37.8875894,0.0 -122.2875309,37.8480192,0.0 -122.23611829999999,37.853441,0.0 -122.23792080000001,37.8552708,0.0 -122.28495600000001,37.8500524,0.0 -122.2860718,37.8875217,0.0 -122.2603226,37.8906376,0.0 -122.26083759999999,37.8926697,0.0 -122.2887325,37.8895538,0.0 -122.2885609,37.8875894,0.0 126 | 127 | 128 | 129 |
130 | 131 | Further than 1.19 miles away 132 | ($52 more than the Berkeley average)
* Room Size: 257.66 sqft
* Number of Bedrooms: 0.34
* Restaurant Rating: 4.03 out of 5 stars
* 3.1 Cafes
* 0.85 Pubs]]>
133 | #poly-F9F7A6-1-110 134 | 135 | 136 | 137 | 1 138 | -122.30564120000001,37.8978851,0.0 -122.3051262,37.834734,0.0 -122.2234154,37.8431392,0.0 -122.23611829999999,37.853441,0.0 -122.2875309,37.8480192,0.0 -122.2887325,37.8895538,0.0 -122.26083759999999,37.8926697,0.0 -122.2649574,37.9024905,0.0 -122.30564120000001,37.8978851,0.0 139 | 140 | 141 | 142 |
143 | 152 | 161 | 162 | 163 | normal 164 | #line-000000-1-nodesc-normal 165 | 166 | 167 | highlight 168 | #line-000000-1-nodesc-highlight 169 | 170 | 171 | 182 | 193 | 194 | 195 | normal 196 | #poly-DB4436-0-130-normal 197 | 198 | 199 | highlight 200 | #poly-DB4436-0-130-highlight 201 | 202 | 203 | 214 | 225 | 226 | 227 | normal 228 | #poly-DB4436-0-201-normal 229 | 230 | 231 | highlight 232 | #poly-DB4436-0-201-highlight 233 | 234 | 235 | 246 | 257 | 258 | 259 | normal 260 | #poly-DB4436-0-74-normal 261 | 262 | 263 | highlight 264 | #poly-DB4436-0-74-highlight 265 | 266 | 267 | 278 | 289 | 290 | 291 | normal 292 | #poly-F4B400-1-130-normal 293 | 294 | 295 | highlight 296 | #poly-F4B400-1-130-highlight 297 | 298 | 299 | 310 | 321 | 322 | 323 | normal 324 | #poly-F4EB37-0-112-normal 325 | 326 | 327 | highlight 328 | #poly-F4EB37-0-112-highlight 329 | 330 | 331 | 342 | 353 | 354 | 355 | normal 356 | #poly-F4EB37-1-191-normal 357 | 358 | 359 | highlight 360 | #poly-F4EB37-1-191-highlight 361 | 362 | 363 | 374 | 385 | 386 | 387 | normal 388 | #poly-F4EB37-1-255-normal 389 | 390 | 391 | highlight 392 | #poly-F4EB37-1-255-highlight 393 | 394 | 395 | 406 | 417 | 418 | 419 | normal 420 | #poly-F8971B-0-171-normal 421 | 422 | 423 | highlight 424 | #poly-F8971B-0-171-highlight 425 | 426 | 427 | 438 | 449 | 450 | 451 | normal 452 | #poly-F8971B-1-150-normal 453 | 454 | 455 | highlight 456 | #poly-F8971B-1-150-highlight 457 | 458 | 459 | 470 | 481 | 482 | 483 | normal 484 | #poly-F9F7A6-1-110-normal 485 | 486 | 487 | highlight 488 | #poly-F9F7A6-1-110-highlight 489 | 490 | 491 |
492 |
-------------------------------------------------------------------------------- /Map/AreaMap.txt: -------------------------------------------------------------------------------- 1 | 2 | 2 Area Divider Map: 3 | 4 | https://www.google.com/maps/d/edit?hl=en_US&app=mp&mid=zPokVQ1ZcjGc.kyW71epnUY7s 5 | 6 | 7 | To embed this map in your own website, copy the following HTML and paste it in the source code for your page: 8 | 9 | -------------------------------------------------------------------------------- /Map/HeatMap.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from bokeh.plotting import figure, output_file, show, ColumnDataSource 4 | from bokeh.models import HoverTool, CustomJS 5 | import pandas as pd 6 | from bokeh.io import output_notebook 7 | import numpy as np 8 | from bokeh.models import ( 9 | GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool, ResetTool, RedoTool, UndoTool 10 | ) 11 | from bokeh.models import GeoJSONDataSource 12 | output_notebook() 13 | 14 | 15 | #Get Data 16 | df = pd.read_csv("final_no_outlier") 17 | 18 | latitude = df['latitude'].tolist() 19 | longitude = df['longitude'].tolist() 20 | price = df['price per room'].tolist() 21 | r = [0.0003] * len(price) 22 | address = df['address'].tolist() 23 | distance = df['edge_distance'].tolist() 24 | sqft = df['sqft per room'].tolist() 25 | food = df['avg_rating'].tolist() 26 | cafes = df['#cafes'].tolist() 27 | pubs = df['#pubs'].tolist() 28 | 29 | 30 | # Create color span based on price 31 | colors = ["#F1EEF6", "#D4B9DA", "#C994C7", "#DF65B0", "#DD1C77", "#980043"] 32 | num_color = len(colors) 33 | # create a price range list correspond to the color range list 34 | max_price = max(price) 35 | min_price = min(price) 36 | interval = (max_price - min_price) / num_color 37 | price_range = [] 38 | for i in range(num_color + 1): 39 | price_range.append(min_price + i * interval) 40 | # give color to each location 41 | loc_color = [] 42 | for p in price: 43 | for i in range(num_color): 44 | if p >= price_range[i] and p <= price_range[i + 1]: 45 | loc_color.append(colors[i]) 46 | break 47 | 48 | 49 | # Plot and google map 50 | map_options = GMapOptions(lat=37.87, lng=-122.27, map_type="roadmap", zoom=14) 51 | 52 | plot = GMapPlot( 53 | x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options, title="Austin", plot_width=1200, plot_height=600, 54 | webgl=True 55 | ) 56 | 57 | source = ColumnDataSource( 58 | data=dict( 59 | lat = latitude, 60 | lon = longitude, 61 | color = loc_color, 62 | rprice = price, 63 | dis = distance, 64 | addr = address, 65 | restaurant = food, 66 | cafe = cafes, 67 | pub = pubs, 68 | size = sqft, 69 | ) 70 | ) 71 | circle = Circle(x = 'lon', y = 'lat', fill_color = 'color', size = 10, fill_alpha=0.6, line_color=None) 72 | plot.add_glyph(source, circle) 73 | 74 | # Hover 75 | hover = HoverTool() 76 | hover.point_policy = "follow_mouse" 77 | hover.tooltips = [ 78 | ("Address", "@addr"), 79 | ("Price per Room", "@rprice"), 80 | ("Distance to Campus", "@dis"), 81 | ("Food Quality", "@restaurant"), 82 | ("Number of cafes", "@cafe"), 83 | ("Number of pubs", "@pub"), 84 | ("Room size", "@size"), 85 | ] 86 | 87 | tools = [PanTool(), WheelZoomTool(), hover] 88 | plot.add_tools(*tools) 89 | 90 | output_file("gmap_plot.html") 91 | show(plot) -------------------------------------------------------------------------------- /Map/MISC/Heat Model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 11, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "from six.moves import zip\n", 13 | "\n", 14 | "from bokeh.plotting import figure, show, output_file\n", 15 | "\n", 16 | "N = 4000\n", 17 | "\n", 18 | "x = np.random.random(size=N) * 100\n", 19 | "y = np.random.random(size=N) * 100\n", 20 | "radii = np.random.random(size=N) * 1.5\n", 21 | "colors = [\"#F1EEF6\"] * len(x)\n", 22 | "# colors = [\"#%02x%02x%02x\" % (r, g, 150) for r, g in zip(np.floor(50+2*x), np.floor(30+2*y))]\n", 23 | "\n", 24 | "TOOLS=\"resize,crosshair,pan,wheel_zoom,box_zoom,reset,tap,previewsave,box_select,poly_select,lasso_select\"\n", 25 | "\n", 26 | "output_file(\"color_scatter.html\", title=\"color_scatter.py example\")\n", 27 | "\n", 28 | "p = figure(tools=TOOLS)\n", 29 | "p.scatter(x, y, radius=radii, fill_color=colors, fill_alpha=0.6, line_color=None)\n", 30 | "\n", 31 | "show(p) # open a browser" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/plain": [ 44 | "array([ 1.3351431 , 0.19617915, 0.01596329, ..., 0.51595789,\n", 45 | " 0.79269787, 1.26001538])" 46 | ] 47 | }, 48 | "execution_count": 2, 49 | "metadata": {}, 50 | "output_type": "execute_result" 51 | } 52 | ], 53 | "source": [ 54 | "radii" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 6, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": [ 67 | "4000" 68 | ] 69 | }, 70 | "execution_count": 6, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": [ 76 | "len(radii)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 10, 82 | "metadata": { 83 | "collapsed": false 84 | }, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "text/plain": [ 89 | "array([ 0.59904922, 1.04101823, 1.20725021, ..., 1.21874155,\n", 90 | " 0.1902254 , 0.63196059])" 91 | ] 92 | }, 93 | "execution_count": 10, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "radii" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": true 107 | }, 108 | "outputs": [], 109 | "source": [] 110 | } 111 | ], 112 | "metadata": { 113 | "kernelspec": { 114 | "display_name": "Python 3", 115 | "language": "python", 116 | "name": "python3" 117 | }, 118 | "language_info": { 119 | "codemirror_mode": { 120 | "name": "ipython", 121 | "version": 3 122 | }, 123 | "file_extension": ".py", 124 | "mimetype": "text/x-python", 125 | "name": "python", 126 | "nbconvert_exporter": "python", 127 | "pygments_lexer": "ipython3", 128 | "version": "3.5.1" 129 | } 130 | }, 131 | "nbformat": 4, 132 | "nbformat_minor": 0 133 | } 134 | -------------------------------------------------------------------------------- /Map/MISC/Screen Shot 2016-04-14 at 12.42.44 AM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/Map/MISC/Screen Shot 2016-04-14 at 12.42.44 AM.png -------------------------------------------------------------------------------- /Map/MISC/Screen Shot 2016-04-14 at 12.44.11 AM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/Map/MISC/Screen Shot 2016-04-14 at 12.44.11 AM.png -------------------------------------------------------------------------------- /Map/MISC/Screen Shot 2016-04-14 at 12.45.47 AM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/Map/MISC/Screen Shot 2016-04-14 at 12.45.47 AM.png -------------------------------------------------------------------------------- /README.rtfd/Screen Shot 2016-04-14 at 12.42.44 AM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/README.rtfd/Screen Shot 2016-04-14 at 12.42.44 AM.png -------------------------------------------------------------------------------- /README.rtfd/Screen Shot 2016-04-14 at 12.44.11 AM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/README.rtfd/Screen Shot 2016-04-14 at 12.44.11 AM.png -------------------------------------------------------------------------------- /README.rtfd/Screen Shot 2016-04-14 at 12.45.47 AM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/README.rtfd/Screen Shot 2016-04-14 at 12.45.47 AM.png -------------------------------------------------------------------------------- /README.rtfd/TXT.rtf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/README.rtfd/TXT.rtf -------------------------------------------------------------------------------- /Scraper/CraigslistHousingScraper/CraigslistHousingScraper/CustomDupeFilter.py: -------------------------------------------------------------------------------- 1 | import pymongo 2 | from scrapy.dupefilter import RFPDupeFilter 3 | from scrapy.conf import settings 4 | 5 | 6 | class CustomDupeFilter(RFPDupeFilter): 7 | 8 | def __init__(self, path=None, other=None): 9 | inmem = [str(it['link']) for it in pymongo.MongoClient(settings['MONGODB_SERVER'], settings['MONGODB_PORT']).HousingListings.usedLinks.find()] 10 | self.already_seen = set(inmem) 11 | RFPDupeFilter.__init__(self, path, other) 12 | 13 | def request_seen(self, request): 14 | if request.url in self.already_seen: 15 | return True 16 | else: 17 | self.already_seen.add(request.url) 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /Scraper/CraigslistHousingScraper/CraigslistHousingScraper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/Scraper/CraigslistHousingScraper/CraigslistHousingScraper/__init__.py -------------------------------------------------------------------------------- /Scraper/CraigslistHousingScraper/CraigslistHousingScraper/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | from scrapy.item import Item, Field 9 | 10 | 11 | class CraigslistItem(Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | title = Field() 15 | address = Field() 16 | sqft = Field() 17 | numImages = Field() 18 | description = Field() 19 | price = Field() 20 | postingDate = Field() 21 | updateDate = Field() 22 | latitude = Field() 23 | longitude = Field() 24 | #reposts = Field() 25 | zipcode = Field() 26 | bedrooms = Field() 27 | bathrooms = Field() 28 | link = Field() 29 | -------------------------------------------------------------------------------- /Scraper/CraigslistHousingScraper/CraigslistHousingScraper/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | import pymongo 8 | from scrapy.conf import settings 9 | 10 | 11 | 12 | class MongoPipe(object): 13 | 14 | def __init__(self): 15 | self.server = settings['MONGODB_SERVER'] 16 | self.port = settings['MONGODB_PORT'] 17 | self.client = pymongo.MongoClient(self.server, self.port) 18 | self.db = self.client.HousingListings 19 | self.itemCollection = self.db.listings 20 | self.repeatsCollection = self.db.usedLinks 21 | 22 | def process_item(self, item, spider): 23 | insertedId = self.itemCollection.insert(dict(item)) 24 | rep = self.repeatsCollection.insert({"link": item['link'], 'obj': insertedId}) 25 | return item 26 | 27 | -------------------------------------------------------------------------------- /Scraper/CraigslistHousingScraper/CraigslistHousingScraper/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for CraigslistHousingScraper project 4 | # 5 | # For simplicity, this file contains only the most important settings by 6 | # default. All the other settings are documented here: 7 | # 8 | # http://doc.scrapy.org/en/latest/topics/settings.html 9 | # 10 | from fake_useragent import UserAgent 11 | 12 | BOT_NAME = 'CraigslistHousingScraper' 13 | 14 | MONGODB_SERVER = 'localhost' 15 | MONGODB_PORT = 27017 16 | 17 | SPIDER_MODULES = ['CraigslistHousingScraper.spiders'] 18 | NEWSPIDER_MODULE = 'CraigslistHousingScraper.spiders' 19 | 20 | ITEM_PIPELINES = ( 21 | 'CraigslistHousingScraper.pipelines.MongoPipe', 22 | ) 23 | 24 | DUPEFILTER_CLASS = 'CraigslistHousingScraper.CustomDupeFilter.CustomDupeFilter' 25 | 26 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 27 | ua = UserAgent() 28 | USER_AGENT = ua.random 29 | -------------------------------------------------------------------------------- /Scraper/CraigslistHousingScraper/CraigslistHousingScraper/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /Scraper/CraigslistHousingScraper/CraigslistHousingScraper/spiders/housingScraper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | from scrapy.contrib.spiders import CrawlSpider, Rule 4 | from scrapy.contrib.linkextractors import LinkExtractor 5 | from CraigslistHousingScraper.items import CraigslistItem 6 | import datetime 7 | import time 8 | import pymongo 9 | from scrapy.conf import settings 10 | 11 | 12 | class HousingscraperSpider(CrawlSpider): 13 | name = "housingScraper" 14 | allowed_domains = ["sfbay.craigslist.org"] 15 | start_urls = [ 16 | 'http://sfbay.craigslist.org/search/apa?s=0&query=berkeley' 17 | ] 18 | 19 | rules = [ 20 | Rule(LinkExtractor(allow="http:\/\/sfbay\.craigslist\.org\/search\/", deny="http:\/\/sfbay\.craigslist\.org\/eby\/apa\/", \ 21 | restrict_xpaths=["//*[@id='searchform']/div[4]", "//*[@id='searchform']/div[5]/div[3]/span[2]/a[3]"]),\ 22 | follow=True), 23 | 24 | Rule(LinkExtractor(allow="http:\/\/sfbay\.craigslist\.org\/eby\/apa\/", \ 25 | deny=["http:\/\/sfbay\.craigslist\.org\/search\/"]), \ 26 | callback='parse_listing', follow=False) 27 | ] 28 | 29 | def parse_listing(self, response): 30 | item = CraigslistItem() 31 | 32 | #stolen (and modified) from some dude on the internet cause fuck xpath - https://github.com/jayfeng1/Craigslist-Pricing-Project/blob/master/craigslist/spiders/CraigSpyder.py 33 | # 34 | #temp = postings[i].xpath("span[@class='txt']") 35 | #info = temp.xpath("span[@class='pl']") 36 | #title of posting 37 | item["title"] = response.xpath("//*[@id='titletextonly']/text()").extract()[0] 38 | #date of posting 39 | curr = response.xpath("//*[@id='pagecontainer']/section/section/div[2]/p[2]/time/text()").extract()[0].split()[0] 40 | item["postingDate"] = int(time.mktime(datetime.datetime.strptime(curr, "%Y-%m-%d").timetuple())) 41 | #pre-processing for getting the price in the right format 42 | #item["area"] = ''.join(temp.xpath("span")[2].xpath("span[@class='pnr']").xpath("small/text()").extract()) 43 | item["price"] = int(response.xpath("//*[@id='pagecontainer']/section/h2/span[2]/span[1]/text()").extract()[0].replace("$",""))#price.replace("$","") 44 | item["link"] = response.url 45 | 46 | maplocation = response.xpath("//div[contains(@id,'map')]") 47 | latitude = ''.join(maplocation.xpath('@data-latitude').extract()) 48 | longitude = ''.join(maplocation.xpath('@data-longitude').extract()) 49 | tmp = response.xpath("//*[@id='pagecontainer']/section/section/div[1]/div[1]/div[2]/text()").extract() 50 | if len(tmp) > 0: 51 | item['address'] = tmp[0] 52 | if latitude: 53 | item['latitude'] = float(latitude) 54 | if longitude: 55 | item['longitude'] = float(longitude) 56 | attr = response.xpath("//p[@class='attrgroup']") 57 | 58 | chars_to_remove = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 59 | 60 | try: 61 | beds = str(response.xpath("//*[@id='pagecontainer']/section/section/div[1]/p[1]/span[1]/b[1]/text()").extract()[0]).translate(None, chars_to_remove) 62 | if beds == "": 63 | beds = "1" 64 | item["bedrooms"] = float(beds) 65 | except IndexError: 66 | pass 67 | try: 68 | baths = str(response.xpath("//*[@id='pagecontainer']/section/section/div[1]/p[1]/span[1]/b[2]/text()").extract()[0]).translate(None, chars_to_remove) 69 | if baths == "": 70 | baths = 1 71 | item["bathrooms"] = float(baths) 72 | except IndexError: 73 | pass 74 | try: 75 | item["sqft"] = int(''.join(attr.xpath("span")[1].xpath("b/text()").extract())) 76 | except: 77 | pass 78 | 79 | item['description'] = "".join(response.xpath("//section[@id='postingbody']").xpath("text()").extract()) 80 | item["numImages"] = len(response.xpath("//div[@id='thumbs']").xpath("a")) 81 | return item 82 | -------------------------------------------------------------------------------- /Scraper/CraigslistHousingScraper/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # http://doc.scrapy.org/en/latest/topics/scrapyd.html 5 | 6 | [settings] 7 | default = CraigslistHousingScraper.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = CraigslistHousingScraper 12 | -------------------------------------------------------------------------------- /Scraper/RentBoardDataCollection/dataCollection.js: -------------------------------------------------------------------------------- 1 | //http://www.cityofberkeley.info/RentBoardUnitSearch.aspx 2 | var GoogleMapsAPI = require("googlemaps"); 3 | var keyFile = require('./key'); 4 | var async = require('async'); 5 | 6 | var gmAPI = new GoogleMapsAPI({ 7 | key: keyFile.KEY, 8 | stagger_time: 1000, // for elevationPath 9 | encode_polylines: false, 10 | secure: true, // use https 11 | }); 12 | 13 | var genPoint = function(original_lat, original_lng, radius) { 14 | var r = radius/111300 // = 16000 meters ~ 10 miles 15 | , y0 = original_lat 16 | , x0 = original_lng 17 | , u = Math.random() 18 | , v = Math.random() 19 | , w = r * Math.sqrt(u) 20 | , t = 2 * Math.PI * v 21 | , x = w * Math.cos(t) 22 | , y1 = w * Math.sin(t) 23 | , x1 = x / Math.cos(y0) 24 | 25 | newY = y0 + y1; 26 | newX = x0 + x1; 27 | 28 | return {"lat": newX, "lon": newY} 29 | } 30 | 31 | function pointsGenner(numpts, radius, lat, lng) { 32 | l = []; 33 | for (var i = 0; i < numpts; i++) { 34 | l.push(genPoint(lat, lng, radius)); 35 | } 36 | return l; 37 | } 38 | 39 | function asyncRevGeo(numpts, radius, lat, lon) { 40 | async.eachSeries(pointsGenner(numpts, radius, lat, lon), function gmaps(item, cb) { 41 | gmAPI.reverseGeocode({ 42 | "latlng": item['lon'].toString() + ", " + item['lat'] //'37.872539, -122.263458', 43 | // "bounds": ['37.881475, -122.282017', '37.845395, -122.251977'] 44 | }, function (err, result) { 45 | console.log(result); 46 | setImmediate(cb); 47 | }); 48 | }, function donzo() { 49 | console.log("DONE"); 50 | }); 51 | } 52 | 53 | asyncRevGeo(25, 1500/*3218.69*/, 37.871419, -122.259707); //8k = five miles in meters 54 | 55 | 56 | -------------------------------------------------------------------------------- /Scraper/RentBoardDataCollection/scrapeDataFromApts.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import glob 3 | import json 4 | 5 | addressesFile = glob.glob("../../Data/Clean/addresses.json")[0] 6 | 7 | 8 | def doTheThings(addressesFilename): 9 | lem = [] 10 | jdict = json.loads(open(addressesFilename).read()) 11 | for address in jdict['addrs']: 12 | currstr = address[0] + "/" + address[1] 13 | res = requests.get("http://apartments.jerryuejio.com/map/br0123pr0-5000ls2014-01-01/api/address/" + currstr) 14 | lem.append(res.json()) 15 | ret = {"housing": lem} 16 | with open('rentCollectionData.json', 'w') as out: 17 | json.dump(ret, out, indent=4) 18 | 19 | doTheThings(addressesFile) 20 | -------------------------------------------------------------------------------- /Scraper/YelpScraper/.ipynb_checkpoints/get-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 0 6 | } 7 | -------------------------------------------------------------------------------- /Scraper/YelpScraper/.ipynb_checkpoints/get_yelp-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 0 6 | } 7 | -------------------------------------------------------------------------------- /Scraper/YelpScraper/.python-version: -------------------------------------------------------------------------------- 1 | 2.7.5 2 | -------------------------------------------------------------------------------- /Scraper/YelpScraper/clean.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import csv 4 | 5 | 6 | with open('yelp.json') as d: 7 | RAW = d.read() 8 | RAW = '[' + RAW.replace('}{', '},{') + ']' 9 | rest = json.loads(RAW) 10 | 11 | # Clean1: get rid of extra info 12 | ## Target format: [{'latitude': xxx, 'longtitude': xxx,'businesses': [{'categories': xxx, 'name': xxx, 'rating': xxx, 'neighborhoods': xxx}, {}, {}...]}, {}, {} ...] 13 | 14 | new = [] 15 | num_loc = len(rest) 16 | for i in range(num_loc): 17 | one_loc = {} 18 | businesses = [] 19 | one_loc['latitude'] = rest[i]['region']['center']['latitude'] 20 | one_loc['longitude'] = rest[i]['region']['center']['longitude'] 21 | # businesses 22 | num_bus = len(rest[i]['businesses']) 23 | for j in range(num_bus): 24 | try: 25 | one_bus = rest[i]['businesses'][j] 26 | except: 27 | import pdb; pdb.set_trace() 28 | new_bus = {} 29 | new_bus['categories'] = one_bus['categories'][0][0] 30 | new_bus['name'] = one_bus['name'] 31 | new_bus['rating'] = one_bus['rating'] 32 | businesses.append(new_bus) 33 | one_loc['businesses'] = businesses 34 | new.append(one_loc) 35 | 36 | json_str = json.dumps(new) 37 | with open('yelp_clean1.json', 'w') as d: 38 | json.dump(json_str, d) 39 | 40 | 41 | # Clean2: Average rating & #Cafes & #Pubs 42 | analysis = [] 43 | for i in range(len(new)): 44 | loc = {} 45 | loc['latitude'] = new[i]['latitude'] 46 | loc['longitude'] = new[i]['longitude'] 47 | total = 0 48 | num_bus = len(new[i]['businesses']) 49 | for j in range(num_bus): 50 | total += new[i]['businesses'][j]['rating'] 51 | loc['avg_rating'] = float(total) / float(num_bus) 52 | 53 | cafe = 0 54 | pub = 0 55 | num_bus = len(new[i]['businesses']) 56 | for j in range(num_bus): 57 | if ('Coffee' in new[i]['businesses'][j]['categories']) or ('Cafe' in new[i]['businesses'][j]['name']): 58 | cafe += 1 59 | if ('Pubs' in new[i]['businesses'][j]['categories']) or ('Bar' in new[i]['businesses'][j]['name']): 60 | pub += 1 61 | loc['#cafes'] = cafe 62 | loc['#pubs'] = pub 63 | 64 | analysis.append(loc) 65 | 66 | summary = json.dumps(analysis) 67 | with open('yelp_clean2.json', 'w') as d: 68 | json.dump(summary, d) 69 | 70 | # convert to CSV 71 | x = json.loads(summary) 72 | f = csv.writer(open("yelp.csv", "wb+")) 73 | f.writerow(['latitude', 'longitude', 'avg_rating', '#cafes', '#pubs']) 74 | 75 | for x in x: 76 | f.writerow([x['latitude'], 77 | x['longitude'], 78 | x['avg_rating'], 79 | x['#cafes'], 80 | x['#pubs']]) -------------------------------------------------------------------------------- /Scraper/YelpScraper/requirements.txt: -------------------------------------------------------------------------------- 1 | oauth2==1.0 2 | httplib2==0.9 -------------------------------------------------------------------------------- /Scraper/YelpScraper/sample.py: -------------------------------------------------------------------------------- 1 | """ 2 | This program requires the Python oauth2 library, which you can install via: 3 | `pip install -r requirements.txt`. 4 | Sample usage of the program: 5 | `python sample.py --term="bars" --location="San Francisco, CA"` 6 | """ 7 | import argparse 8 | import json 9 | import pprint 10 | import sys 11 | import urllib 12 | import urllib2 13 | import oauth2 14 | import requests 15 | 16 | 17 | API_HOST = 'api.yelp.com' 18 | SEARCH_PATH = '/v2/search/' 19 | BUSINESS_PATH = '/v2/business/' 20 | 21 | # OAuth credential placeholders that must be filled in by users. 22 | CONSUMER_KEY = 'FmI5xWeT7qEqfRRrPRL8tQ' 23 | CONSUMER_SECRET = 'JGOdZj4uRtlzp3NMjId6VurcWBw' 24 | TOKEN = 'MeIvaQLkMa51m70HLpQw7OhxKBGc1F87' 25 | TOKEN_SECRET = '9ChRhFEPoZB0F-sMrdhwbJpSVz0' 26 | 27 | 28 | """ Prepares OAuth authentication and sends the request to the API. 29 | Args: 30 | host (str): The domain host of the API. 31 | path (str): The path of the API after the domain. 32 | url_params (dict): An optional set of query parameters in the request. 33 | Returns: 34 | dict: The JSON response from the request. 35 | Raises: 36 | urllib2.HTTPError: An error occurs from the HTTP request. 37 | """ 38 | def request(host, path, url_params=None): 39 | 40 | url_params = url_params or {} 41 | url = 'https://{0}{1}?'.format(host, urllib.quote(path.encode('utf8'))) 42 | 43 | consumer = oauth2.Consumer(CONSUMER_KEY, CONSUMER_SECRET) 44 | oauth_request = oauth2.Request( 45 | method="GET", url=url, parameters=url_params) 46 | 47 | oauth_request.update( 48 | { 49 | 'oauth_nonce': oauth2.generate_nonce(), 50 | 'oauth_timestamp': oauth2.generate_timestamp(), 51 | 'oauth_token': TOKEN, 52 | 'oauth_consumer_key': CONSUMER_KEY 53 | } 54 | ) 55 | token = oauth2.Token(TOKEN, TOKEN_SECRET) 56 | oauth_request.sign_request( 57 | oauth2.SignatureMethod_HMAC_SHA1(), consumer, token) 58 | signed_url = oauth_request.to_url() # the url to put online 59 | 60 | print u'Querying {0} ...'.format(url) 61 | print signed_url 62 | conn = urllib2.urlopen(signed_url, None) 63 | try: 64 | response = json.loads(conn.read()) 65 | finally: 66 | conn.close() 67 | 68 | return response 69 | 70 | 71 | """ Query the Search API by a search term and location. 72 | Returns: 73 | dict: The JSON response from the request. 74 | """ 75 | def search(term = None, sort = 1, limit = None, offset = None, r = None, ll = None, location = None, bounds = None): 76 | url_params = {'sort': 1} 77 | if offset != None: 78 | url_params['offset'] = offset 79 | if term != None: 80 | url_params['term'] = term.replace(' ', '+') 81 | if limit != None: 82 | url_params['limit'] = limit 83 | if r != None: 84 | url_params['radius_filter'] = r 85 | if ll != None: 86 | url_params['ll'] = ll.replace(' ', '+') 87 | if bounds != None: 88 | url_params['bounds'] = bounds 89 | if location != None: 90 | url_params['location'] = location.replace(' ', '+'), 91 | 92 | return request(API_HOST, SEARCH_PATH, url_params=url_params) 93 | 94 | 95 | """Query the Business API by a business ID. 96 | Args: 97 | business_id (str): The ID of the business to query. 98 | Returns: 99 | dict: The JSON response from the request. 100 | """ 101 | def get_business(business_id): 102 | business_path = BUSINESS_PATH + business_id 103 | return request(API_HOST, business_path) 104 | 105 | 106 | 107 | """Queries the API by the input values from the user. 108 | Args: 109 | term (str): The search term to query. 110 | location (str): The location of the business to query. 111 | """ 112 | def query_api(term, location): 113 | response = search(term, location) 114 | businesses = response.get('businesses') 115 | 116 | if not businesses: 117 | print u'No businesses for {0} in {1} found.'.format(term, location) 118 | return 119 | 120 | business_id = businesses[0]['id'] 121 | 122 | print u'{0} businesses found, querying business info ' \ 123 | 'for the top result "{1}" ...'.format( 124 | len(businesses), business_id) 125 | response = get_business(business_id) 126 | 127 | print u'Result for business "{0}" found:'.format(business_id) 128 | pprint.pprint(response, indent=2) 129 | 130 | 131 | def main(): 132 | parser = argparse.ArgumentParser() 133 | 134 | parser.add_argument('-q', '--term', dest='term', default=DEFAULT_TERM, 135 | type=str, help='Search term (default: %(default)s)') 136 | parser.add_argument('-l', '--location', dest='location', 137 | default=DEFAULT_LOCATION, type=str, 138 | help='Search location (default: %(default)s)') 139 | 140 | input_values = parser.parse_args() 141 | 142 | try: 143 | query_api(input_values.term, input_values.location) 144 | except urllib2.HTTPError as error: 145 | sys.exit( 146 | 'Encountered HTTP error {0}. Abort program.'.format(error.code)) 147 | 148 | 149 | if __name__ == '__main__': 150 | main() -------------------------------------------------------------------------------- /Scraper/YelpScraper/scrape_area.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sample as s 3 | 4 | def bound(southwest_lat, southwest_long, northeast_lat, northeast_long): 5 | return str(southwest_lat) +','+ str(southwest_long) +'|'+ str(northeast_lat) +','+ str(northeast_long) 6 | 7 | 8 | southwest_lat = 37.853727 9 | southwest_long = -122.278823 10 | northeast_lat = 37.884056 11 | northeast_long = -122.251139 12 | 13 | b = bound(southwest_lat, southwest_long, northeast_lat, northeast_long) 14 | 15 | request = s.search(term = 'food', limit = 30, bounds = b) 16 | with open('test.json', 'a') as fp: 17 | json.dump(request, fp) -------------------------------------------------------------------------------- /Scraper/YelpScraper/scrape_loc.py: -------------------------------------------------------------------------------- 1 | # source activate python2 2 | 3 | import sample as s 4 | import json 5 | import csv 6 | import pandas as pd 7 | 8 | """ Search Parameters 9 | limit = 1 10 | term = 'food' 11 | location = 'berkeley' 12 | offset = 0 #Offset the list of returned business results by this amount 13 | category_filter = "null" 14 | radius_filter = 0 #in meters 15 | southwest_lat = 37.853727 16 | southwest_long = -122.278823 17 | northeast_lat = 37.884056 18 | northeast_long = -122.251139 19 | bound = str(southwest_lat) +','+ str(southwest_long) +'|'+ str(northeast_lat) +','+ str(northeast_long) 20 | latitude = 37.853727 21 | longitude = -122.278823 22 | cll = str(latitude) + ',' + str(longitude) 23 | """ 24 | 25 | 26 | def loc(latitude, longitude): 27 | return str(latitude) + ',' + str(longitude) 28 | 29 | # Pull data for every apartment 30 | # term = None, limit = None, r = None, ll = None, location = None, bounds = None 31 | # f = open("summary.csv") 32 | # data = json.loads(f.read())['addrs'] 33 | # size = len(data) 34 | 35 | # for i in range(size): 36 | # ll = loc(data[i][2], data[i][3]) 37 | # request = s.search(term = 'food', limit = 20, ll = ll) 38 | # with open('restaurants.json', 'a') as fp: 39 | # json.dump(request, fp) 40 | 41 | df = pd.read_csv('summary.csv') 42 | size = len(df) 43 | for i in range(1178, 2262): 44 | ll = loc(df['latitude'][i], df['longitude'][i]) 45 | request = s.search(term = 'food', limit = 20, ll = ll) 46 | with open('yelp.json', 'a') as fp: 47 | json.dump(request, fp) 48 | 49 | 50 | -------------------------------------------------------------------------------- /Server/input-text-styles-source/css/base.css: -------------------------------------------------------------------------------- 1 | /* ============================================================ 2 | RESET 3 | ============================================================ */ 4 | html, body, div, span, applet, object, iframe, 5 | h1, h2, h3, h4, h5, h6, p, blockquote, pre, 6 | a, abbr, acronym, address, big, cite, code, 7 | del, dfn, em, font, img, ins, kbd, q, s, samp, 8 | small, strike, strong, sub, sup, tt, var, 9 | b, u, i, center, 10 | dl, dt, dd, ol, ul, li, 11 | fieldset, form, label, legend, 12 | table, caption, tbody, tfoot, thead, tr, th, td { 13 | background: transparent; 14 | border: 0; 15 | margin: 0; 16 | padding: 0; 17 | vertical-align: baseline; 18 | } 19 | 20 | body { 21 | line-height: 1; 22 | } 23 | 24 | h1, h2, h3, h4, h5, h6 { 25 | clear: both; 26 | font-weight: normal; 27 | } 28 | 29 | ol, ul { 30 | list-style: none; 31 | } 32 | 33 | blockquote { 34 | quotes: none; 35 | } 36 | 37 | blockquote:before, blockquote:after { 38 | content: ''; 39 | content: none; 40 | } 41 | 42 | del { 43 | text-decoration: line-through; 44 | } 45 | 46 | /* tables still need 'cellspacing="0"' in the markup */ 47 | table { 48 | border-collapse: collapse; 49 | border-spacing: 0; 50 | } 51 | 52 | a img { 53 | border: none; 54 | } 55 | 56 | /* ============================================================ 57 | GLOBALS 58 | ============================================================ */ 59 | *, 60 | *:before, 61 | *:after { 62 | -webkit-box-sizing: border-box; 63 | -moz-box-sizing: border-box; 64 | box-sizing: border-box; 65 | } 66 | 67 | body { 68 | background-color: #dc2850; 69 | color: #505050; 70 | font-family: "Open Sans", sans-serif; 71 | font-weight: 400; 72 | font-size: 14px; 73 | line-height: 1.8; 74 | } 75 | 76 | /* Headings */ 77 | h1, h2, h3, h4, h5, h6 { 78 | line-height: 1; 79 | font-weight: 700; 80 | font-family: "Bitter", serif; 81 | } 82 | 83 | a { 84 | text-decoration: none; 85 | color: #dc2850; 86 | } 87 | 88 | a:hover { 89 | color: #9e1a37; 90 | } 91 | 92 | /* clear floated divs */ 93 | .clearfix:after { 94 | content: ""; 95 | display: table; 96 | clear: both; 97 | } 98 | 99 | /* ============================================================ 100 | TEMPLATE 101 | ============================================================ */ 102 | #wrapper { 103 | width: 100%; 104 | margin: 0 auto; 105 | } 106 | 107 | #main { 108 | background-color: #fff; 109 | padding: 30px 0; 110 | } 111 | 112 | .container { 113 | width: 100%; 114 | max-width: 1200px; 115 | margin: 0 auto; 116 | padding: 0 30px; 117 | } 118 | 119 | 120 | /* ============================================================ 121 | MEDIA QUERIES 122 | ============================================================ */ 123 | @media all and (max-width: 960px) { 124 | /* footer */ 125 | footer aside { 126 | width: 100%; 127 | float: none; 128 | margin-bottom: 15px; 129 | } 130 | footer aside:last-child { 131 | margin-bottom: 0; 132 | } 133 | footer aside.logo { 134 | text-align: center; 135 | } 136 | footer ul { 137 | text-align: center; 138 | } 139 | footer ul li { 140 | display: inline-block; 141 | } 142 | footer ul li:after { 143 | content: "\2022"; 144 | } 145 | footer ul li:last-child:after { 146 | content: none; 147 | } 148 | footer ul li a { 149 | margin: 0 10px; 150 | } 151 | } 152 | @media all and (max-width: 400px) { 153 | header .branding { 154 | padding: 15px 0; 155 | } 156 | header .branding .container { 157 | padding: 0 30px; 158 | } 159 | header .social { 160 | float: none; 161 | width: 100%; 162 | text-align: center; 163 | line-height: 30px; 164 | height: 30px; 165 | } 166 | header .social a { 167 | margin: 0 5px; 168 | } 169 | header .logo { 170 | float: none; 171 | margin: 0 auto; 172 | margin-bottom: 15px; 173 | } 174 | } 175 | 176 | html, 177 | body, 178 | div, 179 | span, 180 | applet, 181 | object, 182 | iframe, 183 | h1, 184 | h2, 185 | h3, 186 | h4, 187 | h5, 188 | h6, 189 | p, 190 | blockquote, 191 | pre, 192 | a, 193 | abbr, 194 | acronym, 195 | address, 196 | big, 197 | cite, 198 | code, 199 | del, 200 | dfn, 201 | em, 202 | font, 203 | img, 204 | ins, 205 | kbd, 206 | q, 207 | s, 208 | samp, 209 | small, 210 | strike, 211 | strong, 212 | sub, 213 | sup, 214 | tt, 215 | var, 216 | b, 217 | u, 218 | i, 219 | center, 220 | dl, 221 | dt, 222 | dd, 223 | ol, 224 | ul, 225 | li, 226 | fieldset, 227 | form, 228 | label, 229 | legend, 230 | table, 231 | caption, 232 | tbody, 233 | tfoot, 234 | thead, 235 | tr, 236 | th, 237 | td { 238 | background: transparent; 239 | border: 0; 240 | margin: 0; 241 | padding: 0; 242 | vertical-align: baseline; 243 | } 244 | body { 245 | line-height: 1 246 | } 247 | h1, 248 | h2, 249 | h3, 250 | h4, 251 | h5, 252 | h6 { 253 | clear: both; 254 | font-weight: normal; 255 | } 256 | ol, 257 | ul { 258 | list-style: none 259 | } 260 | blockquote { 261 | quotes: none 262 | } 263 | blockquote:before, 264 | blockquote:after { 265 | content: ''; 266 | content: none; 267 | } 268 | del { 269 | text-decoration: line-through 270 | } 271 | table { 272 | border-collapse: collapse; 273 | border-spacing: 0; 274 | } 275 | a img { 276 | border: none 277 | } 278 | *, 279 | *:before, 280 | *:after { 281 | -webkit-box-sizing: border-box; 282 | -moz-box-sizing: border-box; 283 | box-sizing: border-box; 284 | } 285 | body { 286 | background-color: #6496c8; 287 | color: #505050; 288 | font-family: "Open Sans",sans-serif; 289 | font-weight: 400; 290 | font-size: 14px; 291 | line-height: 1.8; 292 | } 293 | h1, 294 | h2, 295 | h3, 296 | h4, 297 | h5, 298 | h6 { 299 | line-height: 1; 300 | font-weight: 700; 301 | font-family: "Bitter",serif; 302 | } 303 | a { 304 | text-decoration: none; 305 | color: #6496c8; 306 | } 307 | a:hover { 308 | color: #3b70a5 309 | } 310 | .clearfix:after { 311 | content: ""; 312 | display: table; 313 | clear: both; 314 | } 315 | #wrapper { 316 | width: 100%; 317 | margin: 0 auto; 318 | } 319 | #main { 320 | background-color: #fff; 321 | padding: 30px 0; 322 | } 323 | .container { 324 | width: 100%; 325 | max-width: 1200px; 326 | margin: 0 auto; 327 | padding: 0 30px; 328 | } 329 | 330 | 331 | 332 | 333 | -------------------------------------------------------------------------------- /Server/input-text-styles-source/css/font-awesome/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/Server/input-text-styles-source/css/font-awesome/FontAwesome.otf -------------------------------------------------------------------------------- /Server/input-text-styles-source/css/font-awesome/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/Server/input-text-styles-source/css/font-awesome/fontawesome-webfont.eot -------------------------------------------------------------------------------- /Server/input-text-styles-source/css/font-awesome/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/Server/input-text-styles-source/css/font-awesome/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /Server/input-text-styles-source/css/font-awesome/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/Server/input-text-styles-source/css/font-awesome/fontawesome-webfont.woff -------------------------------------------------------------------------------- /Server/input-text-styles-source/css/style.css: -------------------------------------------------------------------------------- 1 | /* ============================================================ 2 | COMMON 3 | ============================================================ */ 4 | /* sections */ 5 | section { 6 | margin-bottom: 60px; 7 | padding: 30px; 8 | background-color: #efefef; 9 | } 10 | section:last-child { 11 | margin-bottom: 0; 12 | } 13 | 14 | /* headings */ 15 | h2 { 16 | margin-bottom: 30px; 17 | } 18 | 19 | /* form elements */ 20 | label { 21 | display: block; 22 | } 23 | 24 | input[type="text"] { 25 | display: block; 26 | margin: 0; 27 | width: 100%; 28 | font-family: "Open Sans", sans-serif; 29 | font-size: 18px; 30 | -webkit-appearance: none; 31 | -moz-appearance: none; 32 | appearance: none; 33 | -webkit-box-shadow: none; 34 | -moz-box-shadow: none; 35 | box-shadow: none; 36 | -webkit-border-radius: none; 37 | -moz-border-radius: none; 38 | -ms-border-radius: none; 39 | -o-border-radius: none; 40 | border-radius: none; 41 | } 42 | input[type="text"]:focus { 43 | outline: none; 44 | } 45 | 46 | /* lists */ 47 | ul.input-list { 48 | list-style: none; 49 | margin: 0 -10px; 50 | padding: 0; 51 | } 52 | ul.input-list li { 53 | display: block; 54 | padding: 0 10px; 55 | width: 50%; 56 | float: left; 57 | } 58 | 59 | @media all and (max-width: 800px) { 60 | ul.input-list { 61 | margin: 0; 62 | } 63 | ul.input-list li { 64 | padding: 0; 65 | width: 100%; 66 | float: none; 67 | margin-bottom: 10px; 68 | } 69 | } 70 | /* ============================================================ 71 | STYLE 1 72 | ============================================================ */ 73 | .style-1 input[type="text"] { 74 | padding: 10px; 75 | border: solid 1px gainsboro; 76 | -webkit-transition: box-shadow 0.3s, border 0.3s; 77 | -moz-transition: box-shadow 0.3s, border 0.3s; 78 | -o-transition: box-shadow 0.3s, border 0.3s; 79 | transition: box-shadow 0.3s, border 0.3s; 80 | } 81 | .style-1 input[type="text"]:focus, .style-1 input[type="text"].focus { 82 | border: solid 1px #707070; 83 | -webkit-box-shadow: 0 0 5px 1px #969696; 84 | -moz-box-shadow: 0 0 5px 1px #969696; 85 | box-shadow: 0 0 5px 1px #969696; 86 | } 87 | 88 | section { 89 | margin-bottom: 60px 90 | } 91 | section:last-child { 92 | margin-bottom: 0 93 | } 94 | section h2 { 95 | margin-bottom: 30px 96 | } 97 | button { 98 | display: inline-block; 99 | margin: 0 10px 0 0; 100 | padding: 5px 25px; 101 | font-size: 18px; 102 | font-family: "Bitter",serif; 103 | line-height: 1.8; 104 | vertical-align: bottom; 105 | -webkit-appearance: none; 106 | -moz-appearance: none; 107 | appearance: none; 108 | -webkit-box-shadow: none; 109 | -moz-box-shadow: none; 110 | box-shadow: none; 111 | -webkit-border-radius: 0; 112 | -moz-border-radius: 0; 113 | border-radius: 0; 114 | } 115 | button:focus { 116 | outline: none 117 | } 118 | section.flat button { 119 | color: #fff; 120 | background-color: #6496c8; 121 | text-shadow: -1px 1px #417cb8; 122 | border: none; 123 | } 124 | section.flat button:hover, 125 | section.flat button.hover { 126 | background-color: #346392; 127 | text-shadow: -1px 1px #27496d; 128 | } 129 | section.flat button:active, 130 | section.flat button.active { 131 | background-color: #27496d; 132 | text-shadow: -1px 1px #193047; 133 | } 134 | section.border button { 135 | color: #6496c8; 136 | background: rgba(0,0,0,0); 137 | border: solid 5px #6496c8; 138 | } 139 | section.border button:hover, 140 | section.border button.hover { 141 | border-color: #346392; 142 | color: #346392; 143 | } 144 | section.border button:active, 145 | section.border button.active { 146 | border-color: #27496d; 147 | color: #27496d; 148 | } 149 | section.gradient button { 150 | color: #fff; 151 | text-shadow: -2px 2px #346392; 152 | background-color: #ff9664; 153 | background-image: -webkit-gradient(linear, left top, left bottom, from(#6496c8), to(#346392)); 154 | background-image: -webkit-linear-gradient(top, #6496c8, #346392); 155 | background-image: -moz-linear-gradient(top, #6496c8, #346392); 156 | background-image: -o-linear-gradient(top, #6496c8, #346392); 157 | background-image: -ms-linear-gradient(top, #6496c8, #346392); 158 | background-image: linear-gradient(top, #6496c8, #346392); 159 | filter: progid:DXImageTransform.Microsoft.gradient(GradientType=0,StartColorStr='#ff9664', EndColorStr='#c86432'); 160 | -webkit-box-shadow: inset 0 0 0 1px #27496d; 161 | -moz-box-shadow: inset 0 0 0 1px #27496d; 162 | box-shadow: inset 0 0 0 1px #27496d; 163 | border: none; 164 | -webkit-border-radius: 15px; 165 | -moz-border-radius: 15px; 166 | border-radius: 15px; 167 | } 168 | section.gradient button:hover, 169 | section.gradient button.hover { 170 | -webkit-box-shadow: inset 0 0 0 1px #27496d,0 5px 15px #193047; 171 | -moz-box-shadow: inset 0 0 0 1px #27496d,0 5px 15px #193047; 172 | box-shadow: inset 0 0 0 1px #27496d,0 5px 15px #193047; 173 | } 174 | section.gradient button:active, 175 | section.gradient button.active { 176 | -webkit-box-shadow: inset 0 0 0 1px #27496d,inset 0 5px 30px #193047; 177 | -moz-box-shadow: inset 0 0 0 1px #27496d,inset 0 5px 30px #193047; 178 | box-shadow: inset 0 0 0 1px #27496d,inset 0 5px 30px #193047; 179 | } 180 | section.press button { 181 | color: #fff; 182 | background-color: #6496c8; 183 | border: none; 184 | -webkit-border-radius: 15px; 185 | -moz-border-radius: 15px; 186 | border-radius: 15px; 187 | -webkit-box-shadow: 0 10px #27496d; 188 | -moz-box-shadow: 0 10px #27496d; 189 | box-shadow: 0 10px #27496d; 190 | } 191 | section.press button:hover, 192 | section.press button.hover { 193 | background-color: #417cb8 194 | } 195 | section.press button:active, 196 | section.press button.active { 197 | background-color: #417cb8; 198 | -webkit-box-shadow: 0 5px #27496d; 199 | -moz-box-shadow: 0 5px #27496d; 200 | box-shadow: 0 5px #27496d; 201 | -webkit-transform: translateY(5px); 202 | -moz-transform: translateY(5px); 203 | -ms-transform: translateY(5px); 204 | -o-transform: translateY(5px); 205 | transform: translateY(5px); 206 | } 207 | @media all and (max-width: 960px) { 208 | button { 209 | font-size: 18px 210 | } 211 | } 212 | @media all and (max-width: 720px) { 213 | button { 214 | font-size: 18px; 215 | } 216 | } 217 | @media all and (max-width: 540px) { 218 | section { 219 | text-align: center 220 | } 221 | button { 222 | font-size: 18px; 223 | } 224 | } 225 | 226 | -------------------------------------------------------------------------------- /Server/input-text-styles-source/img/core/logo-alt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/Server/input-text-styles-source/img/core/logo-alt.png -------------------------------------------------------------------------------- /Server/input-text-styles-source/img/core/logo-alt.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 11 | 13 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /Server/input-text-styles-source/img/core/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/Server/input-text-styles-source/img/core/logo.png -------------------------------------------------------------------------------- /Server/input-text-styles-source/img/core/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 11 | 13 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /Server/input-text-styles-source/img/core/social-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/Server/input-text-styles-source/img/core/social-dark.png -------------------------------------------------------------------------------- /Server/input-text-styles-source/img/core/social-dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 15 | 16 | 17 | 20 | 31 | 48 | 53 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /Server/input-text-styles-source/img/core/social-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logicx24/DailyCalHousingAnalysis/52d962a33271c6c6332c04cd49abef9b729d4962/Server/input-text-styles-source/img/core/social-light.png -------------------------------------------------------------------------------- /Server/input-text-styles-source/img/core/social-light.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 15 | 16 | 17 | 20 | 31 | 48 | 53 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /Server/input-text-styles-source/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Various CSS Input Text Styles 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 |
26 | 27 |
28 |
29 | 30 |
31 |

Price Predictor

32 |
    33 |
  • 34 | 35 |
  • 36 |
  • 37 | 38 |
  • 39 |
40 |
41 | 42 |
43 |
44 | 45 |
46 | 47 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /Server/input-text-styles-source/js/html5shiv.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @preserve HTML5 Shiv prev3.7.1 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed 3 | */ 4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=s.elements;return"string"==typeof a?a.split(" "):a}function e(a){var b=r[a[p]];return b||(b={},q++,a[p]=q,r[q]=b),b}function f(a,c,d){if(c||(c=b),k)return c.createElement(a);d||(d=e(c));var f;return f=d.cache[a]?d.cache[a].cloneNode():o.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!f.canHaveChildren||n.test(a)||f.tagUrn?f:d.frag.appendChild(f)}function g(a,c){if(a||(a=b),k)return a.createDocumentFragment();c=c||e(a);for(var f=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)f.createElement(h[g]);return f}function h(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return s.shivMethods?f(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(s,b.frag)}function i(a){a||(a=b);var d=e(a);return!s.shivCSS||j||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),k||h(a,d),a}var j,k,l="3.7.0",m=a.html5||{},n=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,o=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,p="_html5shiv",q=0,r={};!function(){try{var a=b.createElement("a");a.innerHTML="",j="hidden"in a,k=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){j=!0,k=!0}}();var s={elements:m.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output progress section summary template time video",version:l,shivCSS:m.shivCSS!==!1,supportsUnknownElements:k,shivMethods:m.shivMethods!==!1,type:"default",shivDocument:i,createElement:f,createDocumentFragment:g};a.html5=s,i(b)}(this,document); -------------------------------------------------------------------------------- /Server/listingPrediction.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pymongo 3 | from geopy.distance import vincenty 4 | import datetime 5 | 6 | from math import cos 7 | 8 | #Choose one. 9 | from sklearn.svm import SVR 10 | from sklearn.linear_model import Ridge 11 | from sklearn.kernel_ridge import KernelRidge 12 | from sklearn.linear_model import LogisticRegression 13 | 14 | from sklearn.preprocessing import Imputer 15 | from sklearn import cross_validation 16 | 17 | import requests 18 | from lxml import html 19 | import time 20 | import json 21 | import csv 22 | 23 | def mongoConnection(): 24 | return pymongo.MongoClient().HousingListings.listings 25 | 26 | def jsonDump(): 27 | with open("listings.json", 'w') as out: 28 | listings = list(mongoConnection().find({ 29 | "latitude": {"$exists": True}, 30 | "longitude": {"$exists": True}, 31 | "numImages": {"$exists": True}, 32 | "description": {"$exists": True}, 33 | })) 34 | print(len(listings)) 35 | for i in range(len(listings)): 36 | listings[i].pop("_id") 37 | json.dump({"listings" :listings}, out, ensure_ascii=True) 38 | 39 | class Classifier(object): 40 | 41 | def __init__(self, training_data_csv="training_data.csv", json_dump_reader="listings.json"): 42 | self.training_data_csv = training_data_csv 43 | self.json_dump_reader = json_dump_reader 44 | self.now = datetime.datetime.now() 45 | 46 | def featurize(self, listing): 47 | if "bedrooms" in listing: 48 | bedrooms = listing["bedrooms"] 49 | else: 50 | bedrooms = np.nan 51 | 52 | if "bathrooms" in listing: 53 | bathrooms = listing["bathrooms"] 54 | else: 55 | bathrooms = np.nan 56 | 57 | if "sqft" in listing: 58 | sqft = listing["sqft"] 59 | else: 60 | sqft = np.nan 61 | 62 | if "latitude" in listing and "longitude" in listing: 63 | #distance = self.get_distance(listing['latitude'], listing['longitude']) 64 | distance = self.bounding_dist(listing['latitude'], listing['longitude']) 65 | else: 66 | distance = np.nan 67 | 68 | images = listing['numImages'] 69 | uniqueWords = len(listing['description'].split(" ")) 70 | 71 | time_up = (self.now - datetime.datetime.fromtimestamp(listing['postingDate'])).days 72 | 73 | return np.array([bedrooms, bathrooms, sqft, distance, images, uniqueWords, time_up]) 74 | 75 | def cols(self): 76 | return ['bedrooms', 'bathrooms', 'sqft', 'distance_to_campus', 'num_images', 'unique_words', 'postingDate', 'price'] 77 | 78 | def get_distance(self, lat, lon): 79 | center = (37.872105, -122.259470) 80 | return vincenty(center, (lat, lon)).miles 81 | 82 | def bounding_dist(self, lat, lon): 83 | 84 | top_left_t = (37.874185, -122.266381) 85 | bot_right_t = (37.869671, -122.252347) 86 | 87 | def long_dist_func(lat): 88 | return cos(lat) * lat_dist_func() 89 | 90 | def lat_dist_func(): 91 | return 69.172 92 | 93 | lat_dist = min(abs(lat - top_left_t[0])*lat_dist_func(), abs(lat-bot_right_t[0])*lat_dist_func()) 94 | lon_dist = min(long_dist_func(lat)*abs(top_left_t[1] - lon), long_dist_func(lat)*abs(bot_right_t[1] - lon)) 95 | return (lat_dist**2 + lon_dist**2)**0.5 96 | 97 | def featurized(self, cursor): 98 | x = [] 99 | y = [] 100 | for listing in cursor: 101 | x.append(self.featurize(listing)) 102 | y.append(listing['price']) 103 | return np.array(x), np.array(y) 104 | 105 | def pullListings(self): 106 | return json.loads(open(self.json_dump_reader).read())['listings'] 107 | 108 | def csvToArray(self): 109 | x = [] 110 | y = [] 111 | with open(self.training_data_csv) as fin: 112 | reader = csv.reader(fin) 113 | reader.next() 114 | for row in reader: 115 | y.append(float(row[-1])) 116 | x.append([float(val) for val in row[:-1]]) 117 | x = np.array(x) 118 | y = np.array(y) 119 | imp, x_imp = self.genImputer(x, "median") 120 | 121 | self.feats = x_imp 122 | self.labels = y 123 | self.imp = imp 124 | 125 | 126 | def genImputer(self, arr, strat): 127 | imp = Imputer(missing_values="NaN", strategy=strat, axis=0, copy=True) 128 | return imp, imp.fit_transform(arr) 129 | 130 | def csvDump(self): 131 | listings = self.pullListings() 132 | x, y = self.featurized(listings) 133 | appd = np.hstack((x, y[np.newaxis].T)) 134 | with open(self.training_data_csv, 'w') as out: 135 | writer = csv.writer(out) 136 | writer.writerow(self.cols()) 137 | for i in range(len(appd)): 138 | writer.writerow(list(appd[i])) 139 | 140 | def linkToVector(self, link): 141 | r = requests.get(link) 142 | response = tree = html.fromstring(r.text) 143 | 144 | item = {} 145 | curr = response.xpath("//*[@id='pagecontainer']/section/section/div[2]/p[2]/time/text()")[0].split()[0] 146 | item["postingDate"] = int(time.mktime(datetime.datetime.strptime(curr, "%Y-%m-%d").timetuple())) 147 | item["price"] = int(response.xpath("//*[@id='pagecontainer']/section/h2/span[2]/span[1]/text()")[0].replace("$","")) 148 | maplocation = response.xpath("//div[contains(@id,'map')]") 149 | latitude = ''.join(maplocation[0].xpath('@data-latitude')) 150 | longitude = ''.join(maplocation[0].xpath('@data-longitude')) 151 | tmp = response.xpath("//*[@id='pagecontainer']/section/section/div[1]/div[1]/div[2]/text()") 152 | 153 | chars_to_remove = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 154 | 155 | if len(tmp) > 0: 156 | item['address'] = tmp[0] 157 | if latitude: 158 | item['latitude'] = float(latitude) 159 | if longitude: 160 | item['longitude'] = float(longitude) 161 | try: 162 | item["bedrooms"] = float(response.xpath("//*[@id='pagecontainer']/section/section/div[1]/p[1]/span[1]/b[1]/text()")[0].translate(None, chars_to_remove)) 163 | except IndexError: 164 | pass 165 | try: 166 | item["sqft"] = float(response.xpath("//*[@id='pagecontainer']/section/section/div[1]/p[1]/span[2]/b/text()")[0].translate(None, chars_to_remove)) 167 | except IndexError: 168 | pass 169 | try: 170 | item["bathrooms"] = float(response.xpath("//*[@id='pagecontainer']/section/section/div[1]/p[1]/span[1]/b[2]/text()")[0].translate(None, chars_to_remove)) 171 | except IndexError: 172 | pass 173 | item['description'] = "".join(response.xpath("//section[@id='postingbody']/text()")) 174 | item["numImages"] = len(response.xpath("//div[@id='thumbs']/a")) 175 | return self.featurize(item) 176 | 177 | def train(self): 178 | final = Ridge(alpha=.1) 179 | self.csvToArray() 180 | final.fit(self.feats, self.labels) 181 | self.model = final 182 | 183 | def predictionFromLink(self, link): 184 | vector = self.imp.transform(self.linkToVector(link)) 185 | return self.model.predict(vector) 186 | 187 | def predict(self, vector): 188 | return self.model.predict(self.imp.transform(vector)) 189 | 190 | def cross_validate(self, alpha=.1): 191 | final = Ridge(alpha=alpha) 192 | self.csvToArray() 193 | return cross_validation.cross_val_score(final, self.feats, self.labels, cv=10) 194 | 195 | if __name__ == "__main__": 196 | jsonDump() 197 | classifier = Classifier() 198 | classifier.csvDump() 199 | classifier.train() 200 | for alpha in [0.001, 0.01, 0.1, 1, 10, 0.8]: 201 | print "-------------------------------------------" 202 | print alpha 203 | scores = classifier.cross_validate(alpha) 204 | print scores 205 | print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) 206 | print "-------------------------------------------" 207 | # classifier.train() 208 | print classifier.predictionFromLink("http://sfbay.craigslist.org/eby/apa/5849230189.html") 209 | 210 | 211 | -------------------------------------------------------------------------------- /Server/predictionServer.py: -------------------------------------------------------------------------------- 1 | from flask.ext.api import FlaskAPI 2 | from flask import request, current_app 3 | import listingPrediction 4 | app = FlaskAPI(__name__) 5 | 6 | 7 | classifier = listingPrediction.Classifier() 8 | 9 | @app.route("/classify", methods=['POST']) 10 | def classify(): 11 | toClassify = request.args.get('link')#request.data['link'] 12 | return '{0}({1})'.format("resp", classifier.predictionFromLink(toClassify)[0]) 13 | if __name__ == "__main__": 14 | listingPrediction.jsonDump() 15 | classifier.csvDump() 16 | classifier.train() 17 | app.run(debug=True) 18 | --------------------------------------------------------------------------------