├── geologger ├── __init__.py ├── util.py └── geologger.py ├── requirements.txt └── Dockerfile /geologger/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | python-dateutil==1.5 3 | amqp 4 | anyjson 5 | billiard==2.7.3.12 6 | celery==3.0.5 7 | geojson==1.0.1 8 | kombu==2.3.2 9 | ordereddict==1.1 10 | pymongo==2.1 11 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu 2 | 3 | RUN apt-get update && apt-get install -y r-base python-dev python-pip 4 | RUN apt-get install -y python-rpy2 python-pymongo supervisor 5 | RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 7F0CEB10 6 | RUN echo "deb http://repo.mongodb.org/apt/ubuntu "$(lsb_release -sc)"/mongodb-org/3.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.0.list 7 | RUN apt-get update && sudo apt-get install -y mongodb-org-mongos 8 | 9 | # Install GeoLight 10 | RUN echo "install.packages('GeoLight', repos=\"http://cran.rstudio.com/\")" > packages.R 11 | RUN Rscript packages.R 12 | 13 | # Get working dir ready for celery 14 | RUN mkdir /opt/celeryq 15 | WORKDIR /opt/celeryq 16 | ADD requirements.txt /opt/celeryq/requirements.txt 17 | ADD geologger /opt/celeryq/geologger 18 | RUN pip install -r /opt/celeryq/requirements.txt 19 | 20 | -------------------------------------------------------------------------------- /geologger/util.py: -------------------------------------------------------------------------------- 1 | import pymongo 2 | import csv 3 | import tempfile 4 | import pandas 5 | import urllib 6 | import urlparse 7 | import os 8 | import datetime 9 | from StringIO import StringIO 10 | 11 | TASK_DB = "cybercom_queue" 12 | TASK_COLLECTION = "task_log" 13 | 14 | def csv2json(fname, dateformat=None, fromstring=False): 15 | """ Convert CSV file to JSON document """ 16 | if fromstring: 17 | csvfile = StringIO(fname) 18 | reader = csv.DictReader(csvfile,dialect="excel") 19 | else: 20 | reader = csv.DictReader(open(fname,'rU')) 21 | rows = [ convertdate(row, dateformat) for row in reader ] 22 | return rows 23 | 24 | def convertdate(data,dtformat=None): 25 | """ Manipulate datetime into correct format""" 26 | if dtformat: 27 | data['datetime'] = datetime.datetime.strptime(data['datetime'], dtformat).isoformat() 28 | #.strftime("%Y-%m-%d %H:%M:%S") 29 | return data 30 | else: 31 | return data 32 | 33 | def stringsave(instring): 34 | outfile = tempfile.NamedTemporaryFile(mode="wb+", delete=False).name 35 | fout = open(outfile,'w') 36 | for line in instring: 37 | fout.writelines(line) 38 | return outfile 39 | 40 | 41 | def cleanup( files ): 42 | for file in files: 43 | os.remove(file) 44 | return "Deleted %s" % files 45 | 46 | 47 | def dict2csv(data, outfile=None, subkey=None): 48 | """ Convert regular structured list of dictionaries to CSV 49 | - If outfile is not specified a temporary file is created and its name returned 50 | - Subkey will select a subkey of the returned JSON to generate the CSV from: 51 | Example: 52 | data = {"data": [ { "date": "2011-15-10T12:00:00Z", "light": "10" } ], 53 | "location": [ "a", "b" ], "tagname": "PABU" 54 | } 55 | subkey = "data" 56 | dict2csv(data,subkey) 57 | 58 | """ 59 | if subkey: 60 | data = data[subkey] 61 | if not outfile: 62 | outfile = tempfile.NamedTemporaryFile(mode="wb+", delete=False).name 63 | f = csv.writer(open(outfile,'wb+')) 64 | f.writerow( data[0].keys() ) 65 | for item in data: 66 | f.writerow( item.values() ) 67 | return outfile 68 | 69 | def df2csv(data, outfile=None, subkey=None): 70 | """ Deserializes a JSON representation of an R Data frame convereted using RJSONIO toJSON """ 71 | if subkey: 72 | data = data[subkey] 73 | if not outfile: 74 | outfile = tempfile.NamedTemporaryFile(mode="wb+", delete=False).name 75 | pandas.DataFrame(data).to_csv(outfile) 76 | return outfile 77 | 78 | def pandasdf(data): 79 | """ Convert to verbose python dictionary representation """ 80 | return pandas.DataFrame(data) 81 | 82 | def url_fix(s, charset='utf-8'): 83 | """ Replace unsafe characters in URLs """ 84 | if isinstance(s, unicode): 85 | s = s.encode(charset, 'ignore') 86 | scheme, netloc, path, qs, anchor = urlparse.urlsplit(s) 87 | path = urllib.quote(path,'/%') 88 | qs = urllib.quote_plus(qs, ':&=') 89 | return urlparse.urlunsplit((scheme, netloc, path, qs, anchor)) 90 | 91 | def mongoconnect(db,col): 92 | """ Connect to Mongo and return connection object, assumes localhost to 93 | force installation of mongos on host 94 | """ 95 | return pymongo.Connection()[db][col] 96 | 97 | 98 | -------------------------------------------------------------------------------- /geologger/geologger.py: -------------------------------------------------------------------------------- 1 | from celery import task 2 | import subprocess 3 | import csv 4 | import simplejson as json 5 | import pymongo 6 | import tempfile 7 | import datetime, time 8 | import urlparse, urllib 9 | import pandas 10 | import rpy2.robjects as robjects 11 | import os 12 | from util import * 13 | import geojson 14 | 15 | template = """ 16 | library(lattice) 17 | library(ggplot2) 18 | library(plyr) 19 | %(version)s 20 | """ 21 | 22 | def runR(datain, 23 | script, 24 | outformat, 25 | saveoutput=False, 26 | savedisplay=False, 27 | saverdata=False): 28 | """ Helper function to make running R scripted tasks easier""" 29 | # Set current work directory to a tmp dir for R script, gather up all output from there when done. 30 | r = robjects.r 31 | if saveoutput | savedisplay: 32 | tempdir = ""# create temporary directory 33 | if saveoutput: 34 | r('setwd("%s")' % tempdir ) 35 | # Optionally store and persist .RData to disk 36 | # PDF Grabbing - grab PDF output and place in sensible location 37 | if savedisplay: 38 | r('pdf("%s")' % tempdir ) 39 | r(script) 40 | # cleanup temp directory 41 | if saverdata & saveoutput: 42 | r('save.image()') 43 | return 44 | 45 | def getTagData(tagname, user_id="guest", db="geologger", col="lightlogs"): 46 | """ Get light level data for a tag """ 47 | url = "http://test.cybercommons.org/mongo/db_find/%s/%s/{'spec':{'tagname':'%s','user_id':'%s'}}" %(db,col,tagname, user_id) 48 | url_get = urllib.urlopen(url_fix(url)).read() 49 | if url_get == "[]": 50 | return {"error": "Empty result"} 51 | else: 52 | return json.loads(url_get)[0] 53 | 54 | @task 55 | def importTagData_manual( uploadloc, tagname, notes, location, dateformat=None , task_id=None, user_id=None): 56 | """ Import a geologger tag to mongodb """ 57 | data = { 58 | "tagname":tagname, 59 | "notes": notes, 60 | "release_location": location, 61 | "user_id": user_id, 62 | "timestamp": "%sZ" % datetime.datetime.now().isoformat(), 63 | "task_id": task_id 64 | } 65 | data['data'] = csv2json(uploadloc, dateformat) 66 | try: 67 | c = mongoconnect('geologger','lightlogs') 68 | c.insert( data ) 69 | return url_fix('http://test.cybercommons.org/mongo/db_find/geologger/lightlogs/{"spec":{"tagname":"%s","user_id":"%s"}}' % (tagname,user_id)) 70 | except: 71 | return "Error saving to mongodb" 72 | @task 73 | def importTagData( data=None, task_id=None, user_id=None ): 74 | """ A task for importing geologger tag data """ 75 | if isinstance(data,unicode or str): 76 | datain = json.loads(data) 77 | else: 78 | datain = data 79 | 80 | dataout = { "data": datain['data'], 81 | "tagname": datain['tagname'], 82 | "notes": datain['notes'], 83 | "species": datain['species'], 84 | "timestamp": "%sZ" % datetime.datetime.now().isoformat(), 85 | "user_id": user_id, 86 | "task_id": task_id 87 | } 88 | try: 89 | c = mongoconnect('geologger','lightlogs') 90 | c.insert(dataout) 91 | return url_fix('http://test.cybercommons.org/mongo/db_find/geologger/lightlogs/{"spec":{"tagname":"%s","user_id":"%s"}}' % (dataout['tagname'],dataout['user_id'])) 92 | except: 93 | return "Error saving to mongo" 94 | 95 | 96 | @task 97 | def twilightCalc( tagname=None, threshold=None, task_id=None, user_id=None): 98 | """ Python wrapper for GeoLight twilightCalc() """ 99 | r = robjects.r 100 | r.library('GeoLight') 101 | r.library('RJSONIO') 102 | tagdata = getTagData(tagname,user_id) 103 | if tagdata != {"error": "Empty result"}: 104 | ligdata = dict2csv(tagdata,subkey="data") 105 | r('lig <- read.csv("%s", header=T)' % ligdata) 106 | r('trans <- twilightCalc(lig$datetime, lig$light, LightThreshold=%s, ask=F)' % threshold) 107 | c = mongoconnect('geologger','twilights') 108 | data = { 109 | "data":json.loads(r('toJSON(trans)')[0]), 110 | "tagname": tagname, 111 | "user_id": user_id, 112 | "threshold": threshold, 113 | "timestamp": datetime.datetime.now().isoformat(), 114 | "format": "RJSONIO", 115 | "task_id": task_id 116 | } 117 | c.insert(data) 118 | cleanup([ligdata]) 119 | return 'http://test.cybercommons.org/mongo/db_find/geologger/twilights/{"spec":{"tagname":"%s","user_id":"%s"}}' % (tagname, user_id) 120 | else: 121 | return "Had a problem finding lightlog data" 122 | 123 | @task 124 | def twilightInsert(tagname=None, data=None, threshold=None, task_id=None,user_id=None): 125 | """ Take twilight data from web interface """ 126 | c = mongoconnect('geologger','twilights') 127 | 128 | data = { 129 | "data": json.loads( data ), 130 | "tagname": tagname, 131 | "user_id": user_id, 132 | "threshold": threshold, 133 | "timestamp": datetime.datetime.now().isoformat(), 134 | "format": "JSON-list", 135 | "task_id": task_id 136 | } 137 | c.save(data) 138 | return 'http://test.cybercommons.org/mongo/db_find/geologger/twilights/{"spec":{"tagname":"%s","user_id":"%s"}}' % (tagname, user_id) 139 | 140 | @task 141 | def deleteTag(tagname=None, user_id=None): 142 | l = mongoconnect('geologger','lightlogs') 143 | l.remove({"tagname":tagname,"user_id":user_id}) 144 | t = mongoconnect('geologger','twilights') 145 | t.remove({"tagname":tagname,"user_id":user_id}) 146 | c = mongoconnect('geologger','coord') 147 | c.remove({"tagname":tagname,"user_id":user_id}) 148 | 149 | @task 150 | def changeLight( tagname=None, riseprob=None, setprob=None, days=None, task_id=None, user_id=None): 151 | """ Python wrapper for GeoLight changeLight() """ 152 | r = robjects.r 153 | r.library('GeoLight') 154 | r.library('RJSONIO') 155 | twilight = df2csv(getTagData(tagname=tagname, user_id=user_id, col="twilights"), subkey="data") 156 | if len(twilight) < 5: 157 | return "Twilights have not yet been calculated, please compute twilight events and then try again" 158 | r('twilight <- read.csv("%s", header=T)' % twilight) 159 | r('twilight$tFirst <- as.POSIXlt(twilight$tFirst, origin="1970-01-01")') # Convert to R Datetime 160 | r('twilight$tSecond <- as.POSIXlt(twilight$tFirst, origin="1970-01-01")') # Convert to R Datetime 161 | r('change <- changeLight(twilight$tFirst, twilight$tSecond, twilight$type, rise.prob=%s, set.prob=%s, days=%s,plot=F)' % (riseprob,setprob,days)) 162 | # Hack to get "." out of variable names so json can be stored in MongoDB 163 | # see: "http://docs.mongodb.org/manual/reference/limits/#Restrictions on Field Names" 164 | r('names(change)[3] <- "rise_prob"') 165 | r('names(change)[4] <- "set_prob"') 166 | r('names(change$setProb)[2] <- "prob_y"') 167 | r('names(change$riseProb)[2] <- "prob_y"') 168 | r('names(change$migTable)[5] <- "P_start"') 169 | r('names(change$migTable)[6] <- "P_end"') 170 | c = mongoconnect('geologger','changelight') 171 | data = { 172 | "data": json.loads(r('toJSON(change)')[0]), 173 | "params": { "riseprob": riseprob, "setprob":setprob,"days":days }, 174 | "user_id": user_id, 175 | "tagname": tagname, 176 | "timestamp": datetime.datetime.now().isoformat(), 177 | "task_id": task_id 178 | } 179 | c.insert(data) 180 | cleanup([twilight]) 181 | return 'http://test.cybercommons.org/mongo/db_find/geologger/changelight/{"spec":{"tagname":"%s","user_id":"%s"}}' % (tagname, user_id) 182 | 183 | @task 184 | def distanceFilter( transdata, elevation, distance, task_id=None, user_id=None ): 185 | """ Python wrapper for GeoLight distanceFilter() """ 186 | pass 187 | 188 | @task 189 | def coord( data=None, task_id=None, user_id=None ): 190 | """ Python wrapper for GeoLight coord() 191 | expects data like: 192 | data = { 193 | "tagname": "PABU_test", 194 | "sunelevation": -4.5, 195 | "computed": True, 196 | "threshold": 4.5, 197 | "twilights": [{ 198 | "tFirst": "2011-07-30T15:21:24.000Z", 199 | "tSecond": "2011-07-31T15:21:24.000Z", 200 | "type": "sunrise", 201 | "active": True 202 | }, { 203 | "tFirst": "2011-07-30T15:21:24.000Z", 204 | "tSecond": "2011-07-31T15:21:24.000Z", 205 | "type": "sunrise", 206 | "active": True 207 | }], 208 | "calibperiod": ["2011-07-30T15:21:24.000Z", "2011-07-30T15:21:24.000Z"] 209 | 210 | } 211 | 212 | Data can be provided as JSON string or as a python dictionary. 213 | """ 214 | if isinstance(data,unicode or str): 215 | datain = json.loads(data) 216 | else: 217 | datain = data 218 | 219 | datain['user_id'] = user_id 220 | datain['timestamp'] = datetime.datetime.now().isoformat() 221 | tagname = datain['tagname'] 222 | sunelevation = datain['sunelevation'] 223 | r = robjects.r 224 | r.library('GeoLight') 225 | r.library('RJSONIO') 226 | # Save input twilights from UI 227 | t = mongoconnect('geologger','twilights') 228 | t.save(datain) 229 | # Convert input to csv for reading in R 230 | twilight = df2csv(datain, subkey="twilights") 231 | r('twilights <- read.csv("%s", header=T)' % (twilight)) 232 | # Filter actives 233 | r('twilights <- subset(twilights, twilights$active == "True")') 234 | # Convert sunrise/sunset to 1,2 235 | r('twilights$typecat[twilights$type == "sunrise"] <- 1') 236 | r('twilights$typecat[twilights$type == "sunset"] <- 2') 237 | # Convert datetimes 238 | r('twilights$tFirst <- as.POSIXct(strptime(twilights$tFirst, format="%Y-%m-%dT%H:%M:%OSZ", tz="GMT"))') 239 | r('twilights$tSecond <- as.POSIXct(strptime(twilights$tSecond, format="%Y-%m-%dT%H:%M:%OSZ", tz="GMT"))') 240 | r('coord <- coord(twilights$tFirst, twilights$tSecond, twilights$typecat, degElevation = %s)'% sunelevation) 241 | r('coord <- as.data.frame(cbind(as.data.frame(coord), twilights$tFirst, twilights$tSecond))' ) 242 | 243 | r('names(coord) <- c("x","y","tFirst","tSecond")') 244 | r('coord <- subset(coord, !is.na(y) & !is.na(x))') 245 | r('coord$tFirst <- as.character(strftime(coord$tFirst, "%Y-%m-%dT%H:%M:%SZ"))') 246 | r('coord$tSecond <- as.character(strftime(coord$tSecond, "%Y-%m-%dT%H:%M:%SZ"))') 247 | #r('coord <- subset(coord, !is.na(x))') 248 | d = mongoconnect('geologger', 'debug') 249 | c = mongoconnect('geologger','coord') 250 | 251 | 252 | # dataout = dict(geojson.FeatureCollection(geojson.Feature(geojson.MultiPoint(json.loads(r('toJSON(coord)')[0]))))) 253 | df = pandasdf(json.loads(r('toJSON(coord)')[0])) 254 | track = [ dict([ 255 | (colname, row[i]) 256 | for i,colname in enumerate(df.columns) 257 | ]) 258 | for row in df.values 259 | ] 260 | 261 | d.insert({"dataframe": df.to_string(), "fromR": json.loads(r('toJSON(coord)')[0])}) 262 | 263 | dataout = json.loads( 264 | geojson.dumps( 265 | geojson.FeatureCollection( [ 266 | geojson.Feature(geometry=geojson.Point( 267 | [item['x'],item['y']]), properties={"tFirst": item['tFirst'], "tSecond": item['tSecond']} 268 | ) 269 | for item in track 270 | ] 271 | ) 272 | ) 273 | ) 274 | dataout['properties'] = { 275 | "sunelevation": sunelevation, 276 | "tagname": tagname, 277 | "user_id": user_id, 278 | "timestamp": datetime.datetime.now().isoformat(), 279 | "task_id": task_id 280 | } 281 | c.insert(dataout) 282 | cleanup([twilight]) 283 | return 'http://test.cybercommons.org/mongo/db_find/geologger/coord/{"spec":{"tagname":"%s","user_id":"%s"}}' % (tagname,user_id) 284 | 285 | 286 | @task 287 | def getElevation( data=None, task_id=None, user_id=None): 288 | """ 289 | Wrapper for GeoLight getElevation 290 | Expects data like: 291 | data = { 292 | "twilights": [ 293 | { 294 | "active": true, 295 | "tSecond": "2011-07-30T16:21:30.000Z", 296 | "tFirst": "2011-07-30T06:58:15.000Z", 297 | "type": "sunset" 298 | }, 299 | { 300 | "active": true, 301 | "tSecond": "2011-07-31T06:53:08.181Z", 302 | "tFirst": "2011-07-30T16:21:30.000Z", 303 | "type": "sunrise" 304 | }, 305 | { 306 | "active": true, 307 | "tSecond": "2011-07-31T16:25:39.230Z", 308 | "tFirst": "2011-07-31T06:53:08.181Z", 309 | "type": "sunset" 310 | } 311 | ], 312 | "tagname": "Pabu_test", 313 | "release_location": [ 314 | 35.1, 315 | -97.0 316 | ], 317 | "threshold": 5.5 318 | } 319 | """ 320 | if isinstance(data,unicode or str): 321 | datain = json.loads(data) 322 | else: 323 | datain = data 324 | 325 | r = robjects.r 326 | r.library('GeoLight') 327 | r.library('RJSONIO') 328 | lat, lon = datain['release_location'] 329 | tagname = datain['tagname'] 330 | twjson = dict2csv(datain, subkey="twilights") 331 | r('twilights <- read.csv("%s", header=T)' % twjson) 332 | r('twilights$tFirst <- strptime(twilights$tFirst, format="%Y-%m-%dT%H:%M:%OSZ")') 333 | r('twilights$tSecond <- strptime(twilights$tSecond, format="%Y-%m-%dT%H:%M:%OSZ")') 334 | r('paste(levels(twilights$type))') 335 | r('levels(twilights$type) <- c(1,2)') 336 | r('twilights <- subset(twilights, twilights$active == "True")') 337 | r('elev <- getElevation(twilights$tFirst, twilights$tSecond, twilights$type, known.coord=c(%s,%s), plot=F)' %(lon, lat) ) 338 | elev = r('elev') 339 | dataout = { "task_id": task_id, "user_id": user_id, "sunelevation": elev[0], "timestamp": datetime.datetime.now().isoformat() , "tagname": tagname } 340 | return dataout 341 | 342 | 343 | --------------------------------------------------------------------------------