├── geologger
    ├── __init__.py
    ├── util.py
    └── geologger.py
├── requirements.txt
└── Dockerfile


/geologger/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | pandas
 2 | python-dateutil==1.5
 3 | amqp
 4 | anyjson
 5 | billiard==2.7.3.12
 6 | celery==3.0.5
 7 | geojson==1.0.1
 8 | kombu==2.3.2
 9 | ordereddict==1.1
10 | pymongo==2.1
11 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu
 2 | 
 3 | RUN apt-get update && apt-get install -y r-base python-dev python-pip 
 4 | RUN apt-get install -y python-rpy2 python-pymongo supervisor 
 5 | RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 7F0CEB10
 6 | RUN echo "deb http://repo.mongodb.org/apt/ubuntu "$(lsb_release -sc)"/mongodb-org/3.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.0.list
 7 | RUN apt-get update && sudo apt-get install -y mongodb-org-mongos
 8 | 
 9 | # Install GeoLight
10 | RUN echo "install.packages('GeoLight', repos=\"http://cran.rstudio.com/\")" > packages.R
11 | RUN Rscript  packages.R
12 | 
13 | # Get working dir ready for celery
14 | RUN mkdir /opt/celeryq
15 | WORKDIR /opt/celeryq
16 | ADD requirements.txt /opt/celeryq/requirements.txt
17 | ADD geologger /opt/celeryq/geologger
18 | RUN pip install -r /opt/celeryq/requirements.txt  
19 | 
20 | 


--------------------------------------------------------------------------------
/geologger/util.py:
--------------------------------------------------------------------------------
 1 | import pymongo
 2 | import csv
 3 | import tempfile
 4 | import pandas
 5 | import urllib
 6 | import urlparse
 7 | import os
 8 | import datetime
 9 | from StringIO import StringIO
10 | 
11 | TASK_DB = "cybercom_queue"
12 | TASK_COLLECTION = "task_log"
13 | 
14 | def csv2json(fname, dateformat=None, fromstring=False):
15 |     """ Convert CSV file to JSON document """
16 |     if fromstring: 
17 |         csvfile = StringIO(fname)
18 |         reader = csv.DictReader(csvfile,dialect="excel")
19 |     else:
20 |         reader = csv.DictReader(open(fname,'rU'))
21 |     rows = [ convertdate(row, dateformat) for row in reader ]
22 |     return rows
23 | 
24 | def convertdate(data,dtformat=None):
25 |     """ Manipulate datetime into correct format"""
26 |     if dtformat:
27 |         data['datetime'] = datetime.datetime.strptime(data['datetime'], dtformat).isoformat()
28 | #.strftime("%Y-%m-%d %H:%M:%S")
29 |         return data
30 |     else:
31 |         return data
32 | 
33 | def stringsave(instring):
34 |     outfile = tempfile.NamedTemporaryFile(mode="wb+", delete=False).name
35 |     fout = open(outfile,'w')
36 |     for line in instring:
37 |         fout.writelines(line)
38 |     return outfile
39 |     
40 | 
41 | def cleanup( files ):
42 |     for file in files:
43 |         os.remove(file)
44 |     return "Deleted %s" % files
45 | 
46 | 
47 | def dict2csv(data, outfile=None, subkey=None):
48 |     """ Convert regular structured list of dictionaries to CSV 
49 |         - If outfile is not specified a temporary file is created and its name returned
50 |         - Subkey will select a subkey of the returned JSON to generate the CSV from:
51 |             Example: 
52 |             data = {"data": [ { "date": "2011-15-10T12:00:00Z", "light": "10" } ],
53 |                     "location": [ "a", "b" ], "tagname": "PABU"
54 |                 }
55 |             subkey = "data"
56 |             dict2csv(data,subkey)
57 |             
58 |     """
59 |     if subkey:
60 |         data = data[subkey]
61 |     if not outfile:
62 |         outfile = tempfile.NamedTemporaryFile(mode="wb+", delete=False).name
63 |     f = csv.writer(open(outfile,'wb+'))
64 |     f.writerow( data[0].keys() )
65 |     for item in data:
66 |         f.writerow( item.values() )
67 |     return outfile
68 | 
69 | def df2csv(data, outfile=None, subkey=None):
70 |     """ Deserializes a JSON representation of an R Data frame convereted using RJSONIO toJSON """
71 |     if subkey:
72 |         data = data[subkey]
73 |     if not outfile:
74 |         outfile = tempfile.NamedTemporaryFile(mode="wb+", delete=False).name
75 |     pandas.DataFrame(data).to_csv(outfile)
76 |     return outfile
77 | 
78 | def pandasdf(data):
79 |     """ Convert to verbose python dictionary representation """
80 |     return pandas.DataFrame(data)
81 | 
82 | def url_fix(s, charset='utf-8'):
83 |     """ Replace unsafe characters in URLs """ 
84 |     if isinstance(s, unicode):
85 |         s = s.encode(charset, 'ignore')
86 |     scheme, netloc, path, qs, anchor = urlparse.urlsplit(s)
87 |     path = urllib.quote(path,'/%')
88 |     qs = urllib.quote_plus(qs, ':&=')
89 |     return urlparse.urlunsplit((scheme, netloc, path, qs, anchor))
90 | 
91 | def mongoconnect(db,col):
92 |     """ Connect to Mongo and return connection object, assumes localhost to 
93 |     force installation of mongos on host
94 |     """ 
95 |     return pymongo.Connection()[db][col]
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/geologger/geologger.py:
--------------------------------------------------------------------------------
  1 | from celery import task
  2 | import subprocess
  3 | import csv
  4 | import simplejson as json
  5 | import pymongo
  6 | import tempfile
  7 | import datetime, time
  8 | import urlparse, urllib
  9 | import pandas
 10 | import rpy2.robjects as robjects
 11 | import os
 12 | from util import *
 13 | import geojson
 14 | 
 15 | template = """
 16 | library(lattice)
 17 | library(ggplot2)
 18 | library(plyr)
 19 | %(version)s
 20 | """
 21 | 
 22 | def runR(datain, 
 23 |             script,
 24 |             outformat, 
 25 |             saveoutput=False,
 26 |             savedisplay=False,
 27 |             saverdata=False):
 28 |     """ Helper function to make running R scripted tasks easier"""
 29 |     # Set current work directory to a tmp dir for R script, gather up all output from there when done.
 30 |     r = robjects.r
 31 |     if saveoutput | savedisplay:
 32 |         tempdir = ""# create temporary directory 
 33 |     if saveoutput:
 34 |         r('setwd("%s")' % tempdir )
 35 |     # Optionally store and persist .RData to disk
 36 |     # PDF Grabbing - grab PDF output and place in sensible location
 37 |     if savedisplay:
 38 |         r('pdf("%s")' % tempdir )
 39 |     r(script)
 40 |     # cleanup temp directory
 41 |     if saverdata & saveoutput:
 42 |         r('save.image()')
 43 |     return 
 44 | 
 45 | def getTagData(tagname, user_id="guest", db="geologger", col="lightlogs"):
 46 |     """ Get light level data for a tag """ 
 47 |     url = "http://test.cybercommons.org/mongo/db_find/%s/%s/{'spec':{'tagname':'%s','user_id':'%s'}}" %(db,col,tagname, user_id)
 48 |     url_get = urllib.urlopen(url_fix(url)).read()
 49 |     if url_get == "[]":
 50 |         return {"error": "Empty result"}
 51 |     else:
 52 |         return json.loads(url_get)[0]
 53 | 
 54 | @task
 55 | def importTagData_manual( uploadloc, tagname, notes, location, dateformat=None , task_id=None, user_id=None):
 56 |     """ Import a geologger tag to mongodb """ 
 57 |     data = {
 58 |             "tagname":tagname, 
 59 |             "notes": notes, 
 60 |             "release_location": location, 
 61 |             "user_id": user_id, 
 62 |             "timestamp": "%sZ" % datetime.datetime.now().isoformat(),
 63 |             "task_id": task_id
 64 |            }
 65 |     data['data'] = csv2json(uploadloc, dateformat)
 66 |     try:
 67 |         c = mongoconnect('geologger','lightlogs')
 68 |         c.insert( data )
 69 |         return url_fix('http://test.cybercommons.org/mongo/db_find/geologger/lightlogs/{"spec":{"tagname":"%s","user_id":"%s"}}' % (tagname,user_id))
 70 |     except:
 71 |         return "Error saving to mongodb"
 72 | @task
 73 | def importTagData( data=None, task_id=None, user_id=None ):
 74 |     """ A task for importing geologger tag data """
 75 |     if isinstance(data,unicode or str):
 76 |         datain = json.loads(data)
 77 |     else:
 78 |         datain = data
 79 | 
 80 |     dataout = { "data": datain['data'],
 81 |                 "tagname": datain['tagname'],
 82 |                 "notes": datain['notes'],
 83 |                 "species": datain['species'],
 84 |                 "timestamp": "%sZ" % datetime.datetime.now().isoformat(),
 85 |                 "user_id": user_id,
 86 |                 "task_id": task_id
 87 |               }
 88 |     try: 
 89 |         c = mongoconnect('geologger','lightlogs')
 90 |         c.insert(dataout)
 91 |         return url_fix('http://test.cybercommons.org/mongo/db_find/geologger/lightlogs/{"spec":{"tagname":"%s","user_id":"%s"}}' % (dataout['tagname'],dataout['user_id']))
 92 |     except:
 93 |         return "Error saving to mongo"
 94 | 
 95 | 
 96 | @task
 97 | def twilightCalc( tagname=None, threshold=None, task_id=None, user_id=None):
 98 |     """ Python wrapper for GeoLight twilightCalc() """
 99 |     r = robjects.r
100 |     r.library('GeoLight')
101 |     r.library('RJSONIO')
102 |     tagdata = getTagData(tagname,user_id)
103 |     if tagdata != {"error": "Empty result"}:
104 |         ligdata = dict2csv(tagdata,subkey="data")
105 |         r('lig <- read.csv("%s", header=T)' % ligdata)
106 |         r('trans <- twilightCalc(lig$datetime, lig$light, LightThreshold=%s, ask=F)' % threshold)
107 |         c = mongoconnect('geologger','twilights') 
108 |         data = { 
109 |                 "data":json.loads(r('toJSON(trans)')[0]), 
110 |                 "tagname": tagname, 
111 |                 "user_id": user_id, 
112 |                 "threshold": threshold, 
113 |                 "timestamp": datetime.datetime.now().isoformat(),
114 |                 "format": "RJSONIO",
115 |                 "task_id": task_id
116 |                 }
117 |         c.insert(data)
118 |         cleanup([ligdata])
119 |         return 'http://test.cybercommons.org/mongo/db_find/geologger/twilights/{"spec":{"tagname":"%s","user_id":"%s"}}' % (tagname, user_id)
120 |     else:
121 |         return "Had a problem finding lightlog data"
122 | 
123 | @task
124 | def twilightInsert(tagname=None, data=None, threshold=None, task_id=None,user_id=None):
125 |     """ Take twilight data from web interface """
126 |     c = mongoconnect('geologger','twilights')
127 | 
128 |     data = { 
129 |         "data": json.loads( data ), 
130 |         "tagname": tagname, 
131 |         "user_id": user_id, 
132 |         "threshold": threshold, 
133 |         "timestamp": datetime.datetime.now().isoformat(),
134 |         "format": "JSON-list",
135 |         "task_id": task_id
136 |         }
137 |     c.save(data)
138 |     return 'http://test.cybercommons.org/mongo/db_find/geologger/twilights/{"spec":{"tagname":"%s","user_id":"%s"}}' % (tagname, user_id)
139 | 
140 | @task
141 | def deleteTag(tagname=None, user_id=None):
142 |     l = mongoconnect('geologger','lightlogs')
143 |     l.remove({"tagname":tagname,"user_id":user_id})
144 |     t = mongoconnect('geologger','twilights')
145 |     t.remove({"tagname":tagname,"user_id":user_id})
146 |     c = mongoconnect('geologger','coord')
147 |     c.remove({"tagname":tagname,"user_id":user_id})
148 | 
149 | @task
150 | def changeLight( tagname=None, riseprob=None, setprob=None, days=None, task_id=None, user_id=None):
151 |     """ Python wrapper for GeoLight changeLight() """
152 |     r = robjects.r
153 |     r.library('GeoLight')
154 |     r.library('RJSONIO')
155 |     twilight = df2csv(getTagData(tagname=tagname, user_id=user_id, col="twilights"), subkey="data")
156 |     if len(twilight) < 5:
157 |         return "Twilights have not yet been calculated, please compute twilight events and then try again"
158 |     r('twilight <- read.csv("%s", header=T)' % twilight)
159 |     r('twilight$tFirst <- as.POSIXlt(twilight$tFirst, origin="1970-01-01")') # Convert to R Datetime
160 |     r('twilight$tSecond <- as.POSIXlt(twilight$tFirst, origin="1970-01-01")') # Convert to R Datetime
161 |     r('change <- changeLight(twilight$tFirst, twilight$tSecond, twilight$type, rise.prob=%s, set.prob=%s, days=%s,plot=F)' % (riseprob,setprob,days))
162 |     # Hack to get "." out of variable names so json can be stored in MongoDB
163 |     #   see: "http://docs.mongodb.org/manual/reference/limits/#Restrictions on Field Names"
164 |     r('names(change)[3] <- "rise_prob"')
165 |     r('names(change)[4] <- "set_prob"')
166 |     r('names(change$setProb)[2] <- "prob_y"')
167 |     r('names(change$riseProb)[2] <- "prob_y"')
168 |     r('names(change$migTable)[5] <- "P_start"')
169 |     r('names(change$migTable)[6] <- "P_end"')
170 |     c = mongoconnect('geologger','changelight')
171 |     data = { 
172 |             "data": json.loads(r('toJSON(change)')[0]), 
173 |             "params": { "riseprob": riseprob, "setprob":setprob,"days":days },
174 |             "user_id": user_id, 
175 |             "tagname": tagname,
176 |             "timestamp": datetime.datetime.now().isoformat(),
177 |             "task_id": task_id
178 |             }
179 |     c.insert(data)
180 |     cleanup([twilight])
181 |     return 'http://test.cybercommons.org/mongo/db_find/geologger/changelight/{"spec":{"tagname":"%s","user_id":"%s"}}' % (tagname, user_id)
182 | 
183 | @task
184 | def distanceFilter( transdata, elevation, distance, task_id=None, user_id=None ):
185 |     """ Python wrapper for GeoLight distanceFilter() """
186 |     pass
187 | 
188 | @task
189 | def coord( data=None, task_id=None, user_id=None ):
190 |     """ Python wrapper for GeoLight coord() 
191 |         expects data like:
192 |         data = { 
193 |                          "tagname": "PABU_test",
194 |                          "sunelevation": -4.5,
195 |                          "computed": True,
196 |                          "threshold": 4.5,
197 |                          "twilights": [{ 
198 |                             "tFirst": "2011-07-30T15:21:24.000Z", 
199 |                             "tSecond": "2011-07-31T15:21:24.000Z",
200 |                             "type": "sunrise",
201 |                             "active": True
202 |                             }, { 
203 |                             "tFirst": "2011-07-30T15:21:24.000Z", 
204 |                             "tSecond": "2011-07-31T15:21:24.000Z",
205 |                             "type": "sunrise",
206 |                             "active": True
207 |                             }],
208 |                           "calibperiod": ["2011-07-30T15:21:24.000Z", "2011-07-30T15:21:24.000Z"]
209 |                     
210 |                 }
211 | 
212 |         Data can be provided as JSON string or as a python dictionary.
213 |     """
214 |     if isinstance(data,unicode or str):
215 |         datain = json.loads(data)
216 |     else:
217 |         datain = data
218 | 
219 |     datain['user_id'] = user_id
220 |     datain['timestamp'] = datetime.datetime.now().isoformat()
221 |     tagname = datain['tagname']
222 |     sunelevation = datain['sunelevation']
223 |     r = robjects.r
224 |     r.library('GeoLight')
225 |     r.library('RJSONIO')
226 |     # Save input twilights from UI
227 |     t = mongoconnect('geologger','twilights')
228 |     t.save(datain)
229 |     # Convert input to csv for reading in R
230 |     twilight = df2csv(datain, subkey="twilights")
231 |     r('twilights <- read.csv("%s", header=T)' % (twilight))
232 |     # Filter actives
233 |     r('twilights <- subset(twilights, twilights$active == "True")')
234 |     # Convert sunrise/sunset to 1,2
235 |     r('twilights$typecat[twilights$type == "sunrise"] <- 1')
236 |     r('twilights$typecat[twilights$type == "sunset"] <- 2')
237 |     # Convert datetimes
238 |     r('twilights$tFirst <- as.POSIXct(strptime(twilights$tFirst, format="%Y-%m-%dT%H:%M:%OSZ", tz="GMT"))')
239 |     r('twilights$tSecond <- as.POSIXct(strptime(twilights$tSecond, format="%Y-%m-%dT%H:%M:%OSZ", tz="GMT"))')
240 |     r('coord <- coord(twilights$tFirst, twilights$tSecond, twilights$typecat, degElevation = %s)'% sunelevation) 
241 |     r('coord <- as.data.frame(cbind(as.data.frame(coord), twilights$tFirst, twilights$tSecond))' ) 
242 |     
243 |     r('names(coord) <- c("x","y","tFirst","tSecond")')
244 |     r('coord <- subset(coord, !is.na(y) & !is.na(x))')
245 |     r('coord$tFirst <- as.character(strftime(coord$tFirst, "%Y-%m-%dT%H:%M:%SZ"))')
246 |     r('coord$tSecond <- as.character(strftime(coord$tSecond, "%Y-%m-%dT%H:%M:%SZ"))')
247 |     #r('coord <- subset(coord, !is.na(x))')
248 |     d = mongoconnect('geologger', 'debug')
249 |     c = mongoconnect('geologger','coord')
250 | 
251 | 
252 | #    dataout = dict(geojson.FeatureCollection(geojson.Feature(geojson.MultiPoint(json.loads(r('toJSON(coord)')[0])))))
253 |     df = pandasdf(json.loads(r('toJSON(coord)')[0]))
254 |     track = [ dict([
255 |         (colname, row[i]) 
256 |         for i,colname in enumerate(df.columns)
257 |         ])
258 |         for row in df.values
259 |     ]
260 | 
261 |     d.insert({"dataframe": df.to_string(), "fromR": json.loads(r('toJSON(coord)')[0])})
262 |         
263 |     dataout = json.loads(
264 |                 geojson.dumps(
265 |                     geojson.FeatureCollection( [
266 |                          geojson.Feature(geometry=geojson.Point(
267 |                             [item['x'],item['y']]), properties={"tFirst": item['tFirst'], "tSecond": item['tSecond']}
268 |                           ) 
269 |                             for item in track 
270 |                         ] 
271 |                     )
272 |                 )
273 |               )
274 |     dataout['properties'] = {  
275 |             "sunelevation": sunelevation, 
276 |             "tagname": tagname, 
277 |             "user_id": user_id,
278 |             "timestamp": datetime.datetime.now().isoformat(),
279 |             "task_id": task_id   
280 |         }
281 |     c.insert(dataout)
282 |     cleanup([twilight])
283 |     return 'http://test.cybercommons.org/mongo/db_find/geologger/coord/{"spec":{"tagname":"%s","user_id":"%s"}}' % (tagname,user_id)
284 | 
285 | 
286 | @task
287 | def getElevation( data=None, task_id=None, user_id=None):
288 |     """ 
289 |     Wrapper for GeoLight getElevation 
290 |     Expects data like:
291 |      data =  {
292 |          "twilights": [
293 |           {
294 |            "active": true,
295 |            "tSecond": "2011-07-30T16:21:30.000Z",
296 |            "tFirst": "2011-07-30T06:58:15.000Z",
297 |            "type": "sunset"
298 |           },
299 |           {
300 |            "active": true,
301 |            "tSecond": "2011-07-31T06:53:08.181Z",
302 |            "tFirst": "2011-07-30T16:21:30.000Z",
303 |            "type": "sunrise"
304 |           },
305 |           {
306 |            "active": true,
307 |            "tSecond": "2011-07-31T16:25:39.230Z",
308 |            "tFirst": "2011-07-31T06:53:08.181Z",
309 |            "type": "sunset"
310 |           }
311 |          ],
312 |          "tagname": "Pabu_test",
313 |          "release_location": [
314 |           35.1,
315 |           -97.0
316 |          ],
317 |         "threshold": 5.5
318 |         }
319 |     """ 
320 |     if isinstance(data,unicode or str):
321 |         datain = json.loads(data)
322 |     else:
323 |         datain = data
324 |    
325 |     r = robjects.r
326 |     r.library('GeoLight')
327 |     r.library('RJSONIO')
328 |     lat, lon = datain['release_location']
329 |     tagname = datain['tagname']
330 |     twjson = dict2csv(datain, subkey="twilights")
331 |     r('twilights <- read.csv("%s", header=T)' % twjson)
332 |     r('twilights$tFirst <- strptime(twilights$tFirst, format="%Y-%m-%dT%H:%M:%OSZ")')
333 |     r('twilights$tSecond <- strptime(twilights$tSecond, format="%Y-%m-%dT%H:%M:%OSZ")')
334 |     r('paste(levels(twilights$type))')
335 |     r('levels(twilights$type) <- c(1,2)')
336 |     r('twilights <- subset(twilights, twilights$active == "True")')
337 |     r('elev <- getElevation(twilights$tFirst, twilights$tSecond, twilights$type, known.coord=c(%s,%s), plot=F)' %(lon, lat) )
338 |     elev = r('elev')
339 |     dataout = { "task_id": task_id, "user_id": user_id, "sunelevation": elev[0], "timestamp": datetime.datetime.now().isoformat() , "tagname": tagname }
340 |     return dataout
341 |     
342 | 
343 | 


--------------------------------------------------------------------------------